Skip to content
Snippets Groups Projects
Commit 6e4ef3b6 authored by Spencer Delcore's avatar Spencer Delcore
Browse files

multi predictions for lost

parent 163e1cb7
No related branches found
No related tags found
No related merge requests found
# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane Siméoni @ valeo.ai
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane Siméoni @ valeo.ai
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import torch
import json
......@@ -22,6 +22,98 @@ import skimage.io
from PIL import Image
from tqdm import tqdm
from torchvision import transforms as pth_transforms
import pickle
from os import listdir
from os.path import isfile, join
class GenericDataset:
def __init__(self, data_arr, name):
self.name = name
self.data_arr = data_arr
if self.name == 'KITTI':
#with open(r"/root/lost/datasets/kitti_labels.pkl", "rb") as input_file:
with open(r"/root/lost/Kitti2Coco/train/kitti_labels.pkl", "rb") as input_file:
self.annots = pickle.load(input_file)
print(len(self.data_arr))
keys = self.data_arr.copy()
for k in range(len(keys)-1, -1, -1):
im_name = self.data_arr[k].split("/")[-1].split(".")[0]
if im_name not in self.annots.keys():
self.data_arr.remove(self.data_arr[k])
else:
self.annots = None
print(self.__len__())
def __getitem__(self, i):
with open(self.data_arr[i], "rb") as f:
img = Image.open(f)
img = img.convert("RGB")
im_name = self.data_arr[i].split("/")[-1].split(".")[0]
new_x, new_y = img.size
# Build a dataloader
img = transform(img)
if self.annots == None:
return [img, self.data_arr[i]]
if self.annots != None:
if self.name == 'KITTI':
print(self.annots[im_name]['annotations'])
print(self.annots[im_name])
return [img, self.data_arr[i], self.annots[im_name]['annotations'], img.size, self.annots[im_name]]
return [img, self.data_arr[i], self.annots[im_name], img.size]
def __len__(self):
return len(self.data_arr)
def extract_gt(self, targets, im_name):
if self.annots == None:
return None
if self.name == 'KITTI':
return None # TODO need to handle returning annotations
im = self.annots[im_name]
# {"labels": ['bbox_x1','bbox_y1','bbox_x2','bbox_y2','class', 'test']}
gt_bbxs = im[0:4]
gt_clss = im[4]
return np.asarray(gt_bbxs), gt_clss
class ImageFolderDataset:
def __init__(self, name, dir_path):
self.dir_path = dir_path
self.name = name
self.dataloader_paths = []
# Read the image
print("Loading dataset..")
print(self.name)
image_files = [f for f in listdir(dir_path) if isfile(join(dir_path, f))]
for image_file in image_files:
image_dir = join(dir_path, image_file)
self.dataloader_paths.append(image_dir)
self.dataloader = GenericDataset(self.dataloader_paths, self.name)
print("done initializing dataset.")
def get_image_name(self, inp):
return inp.split("/")[-1].split(".")[0]
def load_image(self, im_name):
if self.name == 'KITTI':
ext = '.png'
return skimage.io.imread(join(self.dir_path, im_name+ext))
def extract_gt(self, targets, im_name):
return self.dataloader.extract_gt(targets, im_name)
# Image transformation applied to all images
transform = pth_transforms.Compose(
......@@ -279,7 +371,7 @@ def extract_gt_VOC(targets, remove_hards=False):
return np.asarray(gt_bbxs), gt_clss
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
def _bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
# https://github.com/ultralytics/yolov5/blob/develop/utils/general.py
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
box2 = box2.T
......@@ -317,7 +409,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
+ (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
) / 4 # center distance squared
if DIoU:
return iou - rho2 / c2 # DIoU
return iou - rho2 / c2, inter, union # DIoU
elif (
CIoU
): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
......@@ -326,12 +418,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
)
with torch.no_grad():
alpha = v / (v - iou + (1 + eps))
return iou - (rho2 / c2 + v * alpha) # CIoU
return iou - (rho2 / c2 + v * alpha), inter, union # CIoU
else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
c_area = cw * ch + eps # convex area
return iou - (c_area - union) / c_area # GIoU
return iou - (c_area - union) / c_area, inter, union # GIoU
else:
return iou # IoU
return iou, inter, union # IoU
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
return _bbox_iou(box1, box2, x1y1x2y2, GIoU, DIoU, CIoU, eps)[0]
def select_coco_20k(sel_file, all_annotations_file):
print('Building COCO 20k dataset.')
......
......@@ -25,7 +25,7 @@ from tqdm import tqdm
from PIL import Image
from networks import get_model
from datasets import ImageDataset, Dataset, bbox_iou
from datasets import ImageDataset, Dataset, ImageFolderDataset, bbox_iou
from visualizations import visualize_fms, visualize_predictions, visualize_seed_expansion
from object_discovery import lost, detect_box, dino_seg
......@@ -54,7 +54,7 @@ if __name__ == "__main__":
"--dataset",
default="VOC07",
type=str,
choices=[None, "VOC07", "VOC12", "COCO20k"],
choices=[None, "VOC07", "VOC12", "COCO20k", "KITTI"],
help="Dataset name.",
)
parser.add_argument(
......@@ -81,6 +81,7 @@ if __name__ == "__main__":
parser.add_argument("--no_hard", action="store_true", help="Only used in the case of the VOC_all setup (see the paper).")
parser.add_argument("--no_evaluation", action="store_true", help="Compute the evaluation.")
parser.add_argument("--save_predictions", default=True, type=bool, help="Save predicted bouding boxes.")
parser.add_argument("--num_init_seeds", default=1, type=int, help="Number of initial seeds to expand from.")
# Visualization
parser.add_argument(
......@@ -126,6 +127,8 @@ if __name__ == "__main__":
# If an image_path is given, apply the method only to the image
if args.image_path is not None:
dataset = ImageDataset(args.image_path)
elif args.dataset == "KITTI":
dataset = ImageFolderDataset("/root/kitti/training/image_2/")
else:
dataset = Dataset(args.dataset, args.set, args.no_hard)
......@@ -164,12 +167,13 @@ if __name__ == "__main__":
# -------------------------------------------------------------------------------------------------------
# Loop over images
preds_dict = {}
gt_dict = {}
cnt = 0
corloc = np.zeros(len(dataset.dataloader))
pbar = tqdm(dataset.dataloader)
for im_id, inp in enumerate(pbar):
torch.cuda.empty_cache()
# ------------ IMAGE PROCESSING -------------------------------------------
img = inp[0]
init_image_size = img.shape
......@@ -192,7 +196,9 @@ if __name__ == "__main__":
img = paded
# Move to gpu
img = img.cuda(non_blocking=True)
if device == torch.device("cuda"):
img = img.cuda(non_blocking=True)
# Size for transformers
w_featmap = img.shape[-2] // args.patch_size
h_featmap = img.shape[-1] // args.patch_size
......@@ -283,19 +289,25 @@ if __name__ == "__main__":
# ------------ Apply LOST -------------------------------------------
if not args.dinoseg:
pred, A, scores, seed = lost(
feats,
[w_featmap, h_featmap],
scales,
init_image_size,
k_patches=args.k_patches,
preds, A, scores, seeds = lost(
feats,
[w_featmap, h_featmap],
scales,
init_image_size,
k_patches=args.k_patches,
num_init_seeds=args.num_init_seeds
)
if not args.dinoseg:
# ------------ Visualizations -------------------------------------------
if args.visualize == "fms":
visualize_fms(A.clone().cpu().numpy(), seed, scores, [w_featmap, h_featmap], scales, vis_folder, im_name)
for i, x in enumerate(zip(preds, seeds)):
pred, seed = x
visualize_fms(A.clone().cpu().numpy(), seed, scores, [w_featmap, h_featmap], scales, vis_folder, im_name+'_'+str(i))
elif args.visualize == "seed_expansion":
for i, x in enumerate(zip(preds, seeds)):
pred, seed = x
image = dataset.load_image(im_name)
# Before expansion
......@@ -306,11 +318,16 @@ if __name__ == "__main__":
scales=scales,
initial_im_size=init_image_size[1:],
)
visualize_seed_expansion(image, pred, seed, pred_seed, scales, [w_featmap, h_featmap], vis_folder, im_name)
visualize_seed_expansion(image, pred, seed, pred_seed, scales, [w_featmap, h_featmap], vis_folder, im_name+'_'+str(i))
elif args.visualize == "pred":
image = dataset.load_image(im_name)
visualize_predictions(image, pred, seed, scales, [w_featmap, h_featmap], vis_folder, im_name)
for i, x in enumerate(zip(preds, seeds)):
pred, seed = x
image_name = None
if i == len(preds) -1:
image_name = im_name
visualize_predictions(image, pred, seed, scales, [w_featmap, h_featmap], vis_folder, image_name)
# Save the prediction
preds_dict[im_name] = pred
......@@ -320,12 +337,20 @@ if __name__ == "__main__":
continue
# Compare prediction to GT boxes
ious = bbox_iou(torch.from_numpy(pred), torch.from_numpy(gt_bbxs))
ious = []
ious = bbox_iou(torch.from_numpy(pred), torch.from_numpy(np.asarray(gt_bbxs)))
if torch.any(ious >= 0.50):
#corloc[im_id] = 1
corloc[im_id] = 0
for i in ious:
if i >= 0.50:
corloc[im_id] += 1
if torch.any(ious >= 0.5):
corloc[im_id] = 1
cnt += 1
cnt += len(inp[2])
if cnt % 50 == 0:
pbar.set_description(f"Found {int(np.sum(corloc))}/{cnt}")
......
......@@ -12,15 +12,34 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import enum
from matplotlib.pyplot import box
import torch
import scipy
import scipy.ndimage
import numpy as np
from datasets import bbox_iou
from datasets import _bbox_iou
def aspect_ratio(box):
xdiff = abs(box[0] - box[2]) # Using absolute value to ignore negatives
ydiff = abs(box[1] - box[3])
return xdiff / ydiff
def lost(feats, dims, scales, init_image_size, k_patches=100):
def box_area(box):
xdiff = abs(box[0] - box[2]) # Using absolute value to ignore negatives
ydiff = abs(box[1] - box[3])
return xdiff * ydiff
def IOU_2D(box1, box2):
ious, inter, union = _bbox_iou(box1, box2)
for b in box2:
print(box_area(b))
print(box_area(box1), ious, inter, union)
return ious
def lost(feats, dims, scales, init_image_size, k_patches=100, num_init_seeds=1, iou_threshold=0.1, num_sliding_windows=1):
"""
Implementation of LOST method.
Inputs
......@@ -38,23 +57,66 @@ def lost(feats, dims, scales, init_image_size, k_patches=100):
# Compute the similarity
A = (feats @ feats.transpose(1, 2)).squeeze()
if num_init_seeds== 0:
num_init_seeds = len(A)
# Compute the inverse degree centrality measure per patch
sorted_patches, scores = patch_scoring(A)
# Select the initial seed
seed = sorted_patches[0]
seeds = sorted_patches[0:num_init_seeds]
# Seed expansion
potentials = sorted_patches[:k_patches]
similars = potentials[A[seed, potentials] > 0.0]
M = torch.sum(A[similars, :], dim=0)
# Box extraction
pred, _ = detect_box(
M, seed, dims, scales=scales, initial_im_size=init_image_size[1:]
)
preds = []
filtered_seeds= []
return np.asarray(pred), A, scores, seed
# Seed expansion
for i, seed in enumerate(seeds):
potentials = sorted_patches[i:k_patches+i]
#potentials = torch.cat((sorted_patches[:i], sorted_patches[i:k_patches+1]),0)
#potentials = sorted_patches
# TODO: potentials should take into consideration distance from init seed
similars = potentials[A[seed, potentials] > 0.0]
M = torch.sum(A[similars, :], dim=0)
# Box extraction
pred, _ = detect_box(
M, seed, dims, scales=scales, initial_im_size=init_image_size[1:]
)
pred = np.asarray(pred)
add_pred = aspect_ratio(pred) > 1.0
ious = 0
## TODO, if pick good iou
## if one bbox is completely inside another than pick the smaller one
if len(preds) > 0 and add_pred:
idx_to_remove = -1
ious, inter, union = _bbox_iou(torch.from_numpy(pred), torch.from_numpy(np.asarray(preds)))
for i, p in enumerate(preds) :
if box_area(pred) == union[i] or box_area(pred) > box_area(p): #then new prediction is encasing the whole thing so we DO NOT want to add it
#add_pred = False
break
elif box_area(p) == union[i] or box_area(pred) < box_area(p): #then this already stored pred is larger so we want to remove it and add pred
add_pred= True
#idx_to_remove = i
#print('elim bbox: ', p, 'add: ', pred)
break
if idx_to_remove >= 0:
#print(preds, idx_to_remove)
preds.pop(idx_to_remove)
ious = ious[ious!=ious[idx_to_remove]]
add_pred = add_pred and not any(ious >= iou_threshold)
if add_pred:
#print(ious, pred)
filtered_seeds.append(seed)
preds.append(pred)
#print("Generated", len(preds), "predictions")
return np.asarray(preds), A, scores, filtered_seeds
def patch_scoring(M, threshold=0.):
......@@ -123,7 +185,6 @@ def detect_box(A, seed, dims, initial_im_size=None, scales=None):
def dino_seg(attn, dims, patch_size, head=0):
"""
Extraction of boxes based on the DINO segmentation method proposed in https://github.com/facebookresearch/dino.
Modified from https://github.com/facebookresearch/dino/blob/main/visualize_attention.py
"""
w_featmap, h_featmap = dims
nh = attn.shape[1]
......
OUTPUT_PATH=/root/kitti/lost_output
DINO_ARCH=vit_base
LOST_FEATURES=k
K_PATCHES=25
PATCH_SIZE=16
cd /root/lost/
rm -rf $OUTPUT_PATH
mkdir -p $OUTPUT_PATH
echo $OUTPUT_PATH
python main_lost.py \
--dataset KITTI \
--output_dir $OUTPUT_PATH \
--arch $DINO_ARCH \
--which_feature $LOST_FEATURES \
--k_patches $K_PATCHES \
--patch_size $PATCH_SIZE \
--visualize pred \
--num_init_seeds 1
exit
\ No newline at end of file
declare -a images=(
#"000011" "004540" "004541" "007256" "007259" "007267" "007265" "007271"
#"000188" "000085" "000038" "000056" "000093" "000263"
#"000028" "000048" "000066" "004459" "000435" "003333" "000291"
"000003"
)
DATASET_PATH=/root/kitti/training/image_2/
DINO_PATH=/root/lost/dino
DINO_ARCH=vit_base
LOST_FEATURES=k
K_PATCHES=10
PATCH_SIZE=16
OUTPUT_PATH=/root/lost/outputs/samples
rm -rf $OUTPUT_PATH
for i in "${images[@]}"
do
echo evaluating $i
#mkdir -p $OUTPUT_PATH/$i/dino
mkdir -p $OUTPUT_PATH/$i/lost
#mkdir -p $OUTPUT_PATH/$i/lost-dinoseg
#mkdir -p $OUTPUT_PATH/$i/images
cd /root/lost/
python main_lost.py \
--image_path $DATASET_PATH/$i.png \
--output_dir $OUTPUT_PATH/$i/lost \
--arch $DINO_ARCH \
--which_feature $LOST_FEATURES \
--k_patches $K_PATCHES \
--visualize pred \
--num_init_seeds 1
python main_lost.py \
--image_path $DATASET_PATH/$i.png \
--output_dir $OUTPUT_PATH/$i/lost \
--arch $DINO_ARCH \
--which_feature $LOST_FEATURES \
--k_patches $K_PATCHES \
--visualize fms \
--num_init_seeds 1
python main_lost.py \
--image_path $DATASET_PATH/$i.png \
--output_dir $OUTPUT_PATH/$i/lost \
--arch $DINO_ARCH \
--which_feature $LOST_FEATURES \
--k_patches $K_PATCHES \
--visualize seed_expansion \
--num_init_seeds 1
echo
done
\ No newline at end of file
......@@ -18,23 +18,32 @@ import skimage.io
import numpy as np
import torch.nn as nn
from PIL import Image
from random import *
import matplotlib.pyplot as plt
def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, plot_seed=False):
def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, plot_seed=False, is_gt=False):
"""
Visualization of the predicted box and the corresponding seed patch.
"""
w_featmap, h_featmap = dims
# Plot the box
cv2.rectangle(
image,
(int(pred[0]), int(pred[1])),
(int(pred[2]), int(pred[3])),
(255, 0, 0), 3,
)
if not is_gt:
cv2.rectangle(
image,
(int(pred[0]), int(pred[1])),
(int(pred[2]), int(pred[3])),
(255, randint(0,255), randint(0,255)), 3,
)
else:
cv2.rectangle(
image,
(int(pred[0]), int(pred[1])),
(int(pred[2]), int(pred[3])),
(0, 255, 0), 3,
)
print("image.shape:",image.shape, "\npred_box: [x1,y1,x2,y2]", pred)
# Plot the seed
if plot_seed:
s_ = np.unravel_index(seed.cpu().numpy(), (w_featmap, h_featmap))
......@@ -45,10 +54,10 @@ def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name,
(int(s_[1] * scales[1] + (size_[1] / 2)), int(s_[0] * scales[0] + (size_[0] / 2))),
(0, 255, 0), -1,
)
pltname = f"{vis_folder}/LOST_{im_name}.png"
Image.fromarray(image).save(pltname)
print(f"Predictions saved at {pltname}.")
if im_name is not None:
pltname = f"{vis_folder}/LOST_{im_name}.png"
Image.fromarray(image).save(pltname)
#print(f"Predictions saved at {pltname}.")
def visualize_fms(A, seed, scores, dims, scales, output_folder, im_name):
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment