From 6e4ef3b6efe95fe2add6ffa392810107e1781c8e Mon Sep 17 00:00:00 2001 From: Spencer Delcore <sdelcore@uwaterloo.ca> Date: Fri, 17 Mar 2023 20:55:42 -0400 Subject: [PATCH] multi predictions for lost --- datasets.py | 134 +++++++++++++++++++++++++++++++----- main_lost.py | 59 +++++++++++----- object_discovery.py | 89 ++++++++++++++++++++---- scripts/run-dataset.sh | 24 +++++++ scripts/run-single-image.sh | 57 +++++++++++++++ visualizations.py | 33 +++++---- 6 files changed, 334 insertions(+), 62 deletions(-) create mode 100644 scripts/run-dataset.sh create mode 100644 scripts/run-single-image.sh diff --git a/datasets.py b/datasets.py index 35646ac..0d4e239 100755 --- a/datasets.py +++ b/datasets.py @@ -1,17 +1,17 @@ -# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane Siméoni @ valeo.ai -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - +# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane Siméoni @ valeo.ai +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import torch import json @@ -22,6 +22,98 @@ import skimage.io from PIL import Image from tqdm import tqdm from torchvision import transforms as pth_transforms +import pickle + + +from os import listdir +from os.path import isfile, join + +class GenericDataset: + def __init__(self, data_arr, name): + self.name = name + self.data_arr = data_arr + if self.name == 'KITTI': + #with open(r"/root/lost/datasets/kitti_labels.pkl", "rb") as input_file: + with open(r"/root/lost/Kitti2Coco/train/kitti_labels.pkl", "rb") as input_file: + self.annots = pickle.load(input_file) + print(len(self.data_arr)) + + keys = self.data_arr.copy() + for k in range(len(keys)-1, -1, -1): + im_name = self.data_arr[k].split("/")[-1].split(".")[0] + if im_name not in self.annots.keys(): + self.data_arr.remove(self.data_arr[k]) + else: + self.annots = None + + print(self.__len__()) + + def __getitem__(self, i): + with open(self.data_arr[i], "rb") as f: + img = Image.open(f) + img = img.convert("RGB") + im_name = self.data_arr[i].split("/")[-1].split(".")[0] + new_x, new_y = img.size + + # Build a dataloader + img = transform(img) + if self.annots == None: + return [img, self.data_arr[i]] + if self.annots != None: + if self.name == 'KITTI': + print(self.annots[im_name]['annotations']) + print(self.annots[im_name]) + return [img, self.data_arr[i], self.annots[im_name]['annotations'], img.size, self.annots[im_name]] + return [img, self.data_arr[i], self.annots[im_name], img.size] + + def __len__(self): + return len(self.data_arr) + + def extract_gt(self, targets, im_name): + if self.annots == None: + return None + + if self.name == 'KITTI': + return None # TODO need to handle returning annotations + + im = self.annots[im_name] + # {"labels": ['bbox_x1','bbox_y1','bbox_x2','bbox_y2','class', 'test']} + gt_bbxs = im[0:4] + gt_clss = im[4] + + return np.asarray(gt_bbxs), gt_clss + + +class ImageFolderDataset: + def __init__(self, name, dir_path): + + self.dir_path = dir_path + self.name = name + self.dataloader_paths = [] + + # Read the image + print("Loading dataset..") + print(self.name) + image_files = [f for f in listdir(dir_path) if isfile(join(dir_path, f))] + for image_file in image_files: + image_dir = join(dir_path, image_file) + self.dataloader_paths.append(image_dir) + self.dataloader = GenericDataset(self.dataloader_paths, self.name) + + print("done initializing dataset.") + + def get_image_name(self, inp): + return inp.split("/")[-1].split(".")[0] + + def load_image(self, im_name): + if self.name == 'KITTI': + ext = '.png' + return skimage.io.imread(join(self.dir_path, im_name+ext)) + + def extract_gt(self, targets, im_name): + return self.dataloader.extract_gt(targets, im_name) + + # Image transformation applied to all images transform = pth_transforms.Compose( @@ -279,7 +371,7 @@ def extract_gt_VOC(targets, remove_hards=False): return np.asarray(gt_bbxs), gt_clss -def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): +def _bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): # https://github.com/ultralytics/yolov5/blob/develop/utils/general.py # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 box2 = box2.T @@ -317,7 +409,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2 ) / 4 # center distance squared if DIoU: - return iou - rho2 / c2 # DIoU + return iou - rho2 / c2, inter, union # DIoU elif ( CIoU ): # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 @@ -326,12 +418,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= ) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) - return iou - (rho2 / c2 + v * alpha) # CIoU + return iou - (rho2 / c2 + v * alpha), inter, union # CIoU else: # GIoU https://arxiv.org/pdf/1902.09630.pdf c_area = cw * ch + eps # convex area - return iou - (c_area - union) / c_area # GIoU + return iou - (c_area - union) / c_area, inter, union # GIoU else: - return iou # IoU + return iou, inter, union # IoU + +def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): + return _bbox_iou(box1, box2, x1y1x2y2, GIoU, DIoU, CIoU, eps)[0] + def select_coco_20k(sel_file, all_annotations_file): print('Building COCO 20k dataset.') diff --git a/main_lost.py b/main_lost.py index c4f2938..898ff9b 100755 --- a/main_lost.py +++ b/main_lost.py @@ -25,7 +25,7 @@ from tqdm import tqdm from PIL import Image from networks import get_model -from datasets import ImageDataset, Dataset, bbox_iou +from datasets import ImageDataset, Dataset, ImageFolderDataset, bbox_iou from visualizations import visualize_fms, visualize_predictions, visualize_seed_expansion from object_discovery import lost, detect_box, dino_seg @@ -54,7 +54,7 @@ if __name__ == "__main__": "--dataset", default="VOC07", type=str, - choices=[None, "VOC07", "VOC12", "COCO20k"], + choices=[None, "VOC07", "VOC12", "COCO20k", "KITTI"], help="Dataset name.", ) parser.add_argument( @@ -81,6 +81,7 @@ if __name__ == "__main__": parser.add_argument("--no_hard", action="store_true", help="Only used in the case of the VOC_all setup (see the paper).") parser.add_argument("--no_evaluation", action="store_true", help="Compute the evaluation.") parser.add_argument("--save_predictions", default=True, type=bool, help="Save predicted bouding boxes.") + parser.add_argument("--num_init_seeds", default=1, type=int, help="Number of initial seeds to expand from.") # Visualization parser.add_argument( @@ -126,6 +127,8 @@ if __name__ == "__main__": # If an image_path is given, apply the method only to the image if args.image_path is not None: dataset = ImageDataset(args.image_path) + elif args.dataset == "KITTI": + dataset = ImageFolderDataset("/root/kitti/training/image_2/") else: dataset = Dataset(args.dataset, args.set, args.no_hard) @@ -164,12 +167,13 @@ if __name__ == "__main__": # ------------------------------------------------------------------------------------------------------- # Loop over images preds_dict = {} + gt_dict = {} cnt = 0 corloc = np.zeros(len(dataset.dataloader)) pbar = tqdm(dataset.dataloader) for im_id, inp in enumerate(pbar): - + torch.cuda.empty_cache() # ------------ IMAGE PROCESSING ------------------------------------------- img = inp[0] init_image_size = img.shape @@ -192,7 +196,9 @@ if __name__ == "__main__": img = paded # Move to gpu - img = img.cuda(non_blocking=True) + if device == torch.device("cuda"): + img = img.cuda(non_blocking=True) + # Size for transformers w_featmap = img.shape[-2] // args.patch_size h_featmap = img.shape[-1] // args.patch_size @@ -283,19 +289,25 @@ if __name__ == "__main__": # ------------ Apply LOST ------------------------------------------- if not args.dinoseg: - pred, A, scores, seed = lost( - feats, - [w_featmap, h_featmap], - scales, - init_image_size, - k_patches=args.k_patches, + preds, A, scores, seeds = lost( + feats, + [w_featmap, h_featmap], + scales, + init_image_size, + k_patches=args.k_patches, + num_init_seeds=args.num_init_seeds ) + if not args.dinoseg: # ------------ Visualizations ------------------------------------------- if args.visualize == "fms": - visualize_fms(A.clone().cpu().numpy(), seed, scores, [w_featmap, h_featmap], scales, vis_folder, im_name) + for i, x in enumerate(zip(preds, seeds)): + pred, seed = x + visualize_fms(A.clone().cpu().numpy(), seed, scores, [w_featmap, h_featmap], scales, vis_folder, im_name+'_'+str(i)) elif args.visualize == "seed_expansion": + for i, x in enumerate(zip(preds, seeds)): + pred, seed = x image = dataset.load_image(im_name) # Before expansion @@ -306,11 +318,16 @@ if __name__ == "__main__": scales=scales, initial_im_size=init_image_size[1:], ) - visualize_seed_expansion(image, pred, seed, pred_seed, scales, [w_featmap, h_featmap], vis_folder, im_name) + visualize_seed_expansion(image, pred, seed, pred_seed, scales, [w_featmap, h_featmap], vis_folder, im_name+'_'+str(i)) elif args.visualize == "pred": image = dataset.load_image(im_name) - visualize_predictions(image, pred, seed, scales, [w_featmap, h_featmap], vis_folder, im_name) + for i, x in enumerate(zip(preds, seeds)): + pred, seed = x + image_name = None + if i == len(preds) -1: + image_name = im_name + visualize_predictions(image, pred, seed, scales, [w_featmap, h_featmap], vis_folder, image_name) # Save the prediction preds_dict[im_name] = pred @@ -320,12 +337,20 @@ if __name__ == "__main__": continue # Compare prediction to GT boxes - ious = bbox_iou(torch.from_numpy(pred), torch.from_numpy(gt_bbxs)) + ious = [] + + ious = bbox_iou(torch.from_numpy(pred), torch.from_numpy(np.asarray(gt_bbxs))) + + if torch.any(ious >= 0.50): + #corloc[im_id] = 1 + corloc[im_id] = 0 + for i in ious: + if i >= 0.50: + corloc[im_id] += 1 - if torch.any(ious >= 0.5): - corloc[im_id] = 1 - cnt += 1 + cnt += len(inp[2]) + if cnt % 50 == 0: pbar.set_description(f"Found {int(np.sum(corloc))}/{cnt}") diff --git a/object_discovery.py b/object_discovery.py index 42da074..88342f3 100644 --- a/object_discovery.py +++ b/object_discovery.py @@ -12,15 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. +import enum +from matplotlib.pyplot import box import torch import scipy import scipy.ndimage import numpy as np -from datasets import bbox_iou +from datasets import _bbox_iou +def aspect_ratio(box): + xdiff = abs(box[0] - box[2]) # Using absolute value to ignore negatives + ydiff = abs(box[1] - box[3]) + return xdiff / ydiff -def lost(feats, dims, scales, init_image_size, k_patches=100): +def box_area(box): + xdiff = abs(box[0] - box[2]) # Using absolute value to ignore negatives + ydiff = abs(box[1] - box[3]) + return xdiff * ydiff + +def IOU_2D(box1, box2): + ious, inter, union = _bbox_iou(box1, box2) + for b in box2: + print(box_area(b)) + print(box_area(box1), ious, inter, union) + + return ious + +def lost(feats, dims, scales, init_image_size, k_patches=100, num_init_seeds=1, iou_threshold=0.1, num_sliding_windows=1): """ Implementation of LOST method. Inputs @@ -38,23 +57,66 @@ def lost(feats, dims, scales, init_image_size, k_patches=100): # Compute the similarity A = (feats @ feats.transpose(1, 2)).squeeze() + if num_init_seeds== 0: + num_init_seeds = len(A) + # Compute the inverse degree centrality measure per patch sorted_patches, scores = patch_scoring(A) # Select the initial seed - seed = sorted_patches[0] + seeds = sorted_patches[0:num_init_seeds] - # Seed expansion - potentials = sorted_patches[:k_patches] - similars = potentials[A[seed, potentials] > 0.0] - M = torch.sum(A[similars, :], dim=0) - - # Box extraction - pred, _ = detect_box( - M, seed, dims, scales=scales, initial_im_size=init_image_size[1:] - ) + preds = [] + filtered_seeds= [] - return np.asarray(pred), A, scores, seed + # Seed expansion + for i, seed in enumerate(seeds): + + potentials = sorted_patches[i:k_patches+i] + #potentials = torch.cat((sorted_patches[:i], sorted_patches[i:k_patches+1]),0) + #potentials = sorted_patches + # TODO: potentials should take into consideration distance from init seed + similars = potentials[A[seed, potentials] > 0.0] + M = torch.sum(A[similars, :], dim=0) + + # Box extraction + pred, _ = detect_box( + M, seed, dims, scales=scales, initial_im_size=init_image_size[1:] + ) + pred = np.asarray(pred) + + add_pred = aspect_ratio(pred) > 1.0 + ious = 0 + + ## TODO, if pick good iou + ## if one bbox is completely inside another than pick the smaller one + if len(preds) > 0 and add_pred: + idx_to_remove = -1 + ious, inter, union = _bbox_iou(torch.from_numpy(pred), torch.from_numpy(np.asarray(preds))) + + for i, p in enumerate(preds) : + if box_area(pred) == union[i] or box_area(pred) > box_area(p): #then new prediction is encasing the whole thing so we DO NOT want to add it + #add_pred = False + break + elif box_area(p) == union[i] or box_area(pred) < box_area(p): #then this already stored pred is larger so we want to remove it and add pred + add_pred= True + #idx_to_remove = i + #print('elim bbox: ', p, 'add: ', pred) + break + if idx_to_remove >= 0: + #print(preds, idx_to_remove) + preds.pop(idx_to_remove) + ious = ious[ious!=ious[idx_to_remove]] + + add_pred = add_pred and not any(ious >= iou_threshold) + + if add_pred: + #print(ious, pred) + filtered_seeds.append(seed) + preds.append(pred) + + #print("Generated", len(preds), "predictions") + return np.asarray(preds), A, scores, filtered_seeds def patch_scoring(M, threshold=0.): @@ -123,7 +185,6 @@ def detect_box(A, seed, dims, initial_im_size=None, scales=None): def dino_seg(attn, dims, patch_size, head=0): """ Extraction of boxes based on the DINO segmentation method proposed in https://github.com/facebookresearch/dino. - Modified from https://github.com/facebookresearch/dino/blob/main/visualize_attention.py """ w_featmap, h_featmap = dims nh = attn.shape[1] diff --git a/scripts/run-dataset.sh b/scripts/run-dataset.sh new file mode 100644 index 0000000..3566356 --- /dev/null +++ b/scripts/run-dataset.sh @@ -0,0 +1,24 @@ + +OUTPUT_PATH=/root/kitti/lost_output + +DINO_ARCH=vit_base +LOST_FEATURES=k +K_PATCHES=25 +PATCH_SIZE=16 + +cd /root/lost/ +rm -rf $OUTPUT_PATH +mkdir -p $OUTPUT_PATH +echo $OUTPUT_PATH + +python main_lost.py \ + --dataset KITTI \ + --output_dir $OUTPUT_PATH \ + --arch $DINO_ARCH \ + --which_feature $LOST_FEATURES \ + --k_patches $K_PATCHES \ + --patch_size $PATCH_SIZE \ + --visualize pred \ + --num_init_seeds 1 + +exit \ No newline at end of file diff --git a/scripts/run-single-image.sh b/scripts/run-single-image.sh new file mode 100644 index 0000000..720f909 --- /dev/null +++ b/scripts/run-single-image.sh @@ -0,0 +1,57 @@ +declare -a images=( + #"000011" "004540" "004541" "007256" "007259" "007267" "007265" "007271" + #"000188" "000085" "000038" "000056" "000093" "000263" + #"000028" "000048" "000066" "004459" "000435" "003333" "000291" + "000003" +) +DATASET_PATH=/root/kitti/training/image_2/ +DINO_PATH=/root/lost/dino + +DINO_ARCH=vit_base +LOST_FEATURES=k +K_PATCHES=10 +PATCH_SIZE=16 + +OUTPUT_PATH=/root/lost/outputs/samples +rm -rf $OUTPUT_PATH + +for i in "${images[@]}" +do + echo evaluating $i + + #mkdir -p $OUTPUT_PATH/$i/dino + mkdir -p $OUTPUT_PATH/$i/lost + #mkdir -p $OUTPUT_PATH/$i/lost-dinoseg + #mkdir -p $OUTPUT_PATH/$i/images + + cd /root/lost/ + + python main_lost.py \ + --image_path $DATASET_PATH/$i.png \ + --output_dir $OUTPUT_PATH/$i/lost \ + --arch $DINO_ARCH \ + --which_feature $LOST_FEATURES \ + --k_patches $K_PATCHES \ + --visualize pred \ + --num_init_seeds 1 + + python main_lost.py \ + --image_path $DATASET_PATH/$i.png \ + --output_dir $OUTPUT_PATH/$i/lost \ + --arch $DINO_ARCH \ + --which_feature $LOST_FEATURES \ + --k_patches $K_PATCHES \ + --visualize fms \ + --num_init_seeds 1 + + python main_lost.py \ + --image_path $DATASET_PATH/$i.png \ + --output_dir $OUTPUT_PATH/$i/lost \ + --arch $DINO_ARCH \ + --which_feature $LOST_FEATURES \ + --k_patches $K_PATCHES \ + --visualize seed_expansion \ + --num_init_seeds 1 + + echo +done \ No newline at end of file diff --git a/visualizations.py b/visualizations.py index 867b311..a4ec699 100755 --- a/visualizations.py +++ b/visualizations.py @@ -18,23 +18,32 @@ import skimage.io import numpy as np import torch.nn as nn from PIL import Image +from random import * import matplotlib.pyplot as plt -def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, plot_seed=False): +def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, plot_seed=False, is_gt=False): """ Visualization of the predicted box and the corresponding seed patch. """ w_featmap, h_featmap = dims # Plot the box - cv2.rectangle( - image, - (int(pred[0]), int(pred[1])), - (int(pred[2]), int(pred[3])), - (255, 0, 0), 3, - ) - + if not is_gt: + cv2.rectangle( + image, + (int(pred[0]), int(pred[1])), + (int(pred[2]), int(pred[3])), + (255, randint(0,255), randint(0,255)), 3, + ) + else: + cv2.rectangle( + image, + (int(pred[0]), int(pred[1])), + (int(pred[2]), int(pred[3])), + (0, 255, 0), 3, + ) + print("image.shape:",image.shape, "\npred_box: [x1,y1,x2,y2]", pred) # Plot the seed if plot_seed: s_ = np.unravel_index(seed.cpu().numpy(), (w_featmap, h_featmap)) @@ -45,10 +54,10 @@ def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, (int(s_[1] * scales[1] + (size_[1] / 2)), int(s_[0] * scales[0] + (size_[0] / 2))), (0, 255, 0), -1, ) - - pltname = f"{vis_folder}/LOST_{im_name}.png" - Image.fromarray(image).save(pltname) - print(f"Predictions saved at {pltname}.") + if im_name is not None: + pltname = f"{vis_folder}/LOST_{im_name}.png" + Image.fromarray(image).save(pltname) + #print(f"Predictions saved at {pltname}.") def visualize_fms(A, seed, scores, dims, scales, output_folder, im_name): """ -- GitLab