From 6e4ef3b6efe95fe2add6ffa392810107e1781c8e Mon Sep 17 00:00:00 2001
From: Spencer Delcore <sdelcore@uwaterloo.ca>
Date: Fri, 17 Mar 2023 20:55:42 -0400
Subject: [PATCH] multi predictions for lost

---
 datasets.py                 | 134 +++++++++++++++++++++++++++++++-----
 main_lost.py                |  59 +++++++++++-----
 object_discovery.py         |  89 ++++++++++++++++++++----
 scripts/run-dataset.sh      |  24 +++++++
 scripts/run-single-image.sh |  57 +++++++++++++++
 visualizations.py           |  33 +++++----
 6 files changed, 334 insertions(+), 62 deletions(-)
 create mode 100644 scripts/run-dataset.sh
 create mode 100644 scripts/run-single-image.sh

diff --git a/datasets.py b/datasets.py
index 35646ac..0d4e239 100755
--- a/datasets.py
+++ b/datasets.py
@@ -1,17 +1,17 @@
-# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane SimÃ©oni @ valeo.ai
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane SimÃ©oni @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import torch
 import json
@@ -22,6 +22,98 @@ import skimage.io
 from PIL import Image
 from tqdm import tqdm
 from torchvision import transforms as pth_transforms
+import pickle
+
+
+from os import listdir
+from os.path import isfile, join
+
+class GenericDataset:
+    def __init__(self, data_arr, name):
+        self.name = name
+        self.data_arr = data_arr
+        if self.name == 'KITTI':
+            #with open(r"/root/lost/datasets/kitti_labels.pkl", "rb") as input_file:
+            with open(r"/root/lost/Kitti2Coco/train/kitti_labels.pkl", "rb") as input_file:
+                self.annots = pickle.load(input_file)
+                print(len(self.data_arr))
+
+                keys = self.data_arr.copy()
+                for k in range(len(keys)-1, -1, -1):
+                    im_name = self.data_arr[k].split("/")[-1].split(".")[0]
+                    if im_name not in self.annots.keys():
+                        self.data_arr.remove(self.data_arr[k])
+        else:
+            self.annots = None
+
+        print(self.__len__())
+
+    def __getitem__(self, i):
+        with open(self.data_arr[i], "rb") as f:
+            img = Image.open(f)
+            img = img.convert("RGB")
+            im_name = self.data_arr[i].split("/")[-1].split(".")[0]
+            new_x, new_y = img.size
+            
+            # Build a dataloader
+            img = transform(img)
+            if self.annots == None:
+                return [img, self.data_arr[i]]
+            if self.annots != None:
+                if self.name == 'KITTI':
+                    print(self.annots[im_name]['annotations'])
+                    print(self.annots[im_name])
+                    return [img, self.data_arr[i], self.annots[im_name]['annotations'], img.size, self.annots[im_name]]
+                return [img, self.data_arr[i], self.annots[im_name], img.size]
+
+    def __len__(self):
+        return len(self.data_arr)
+    
+    def extract_gt(self, targets, im_name):
+        if self.annots == None:
+            return None
+        
+        if self.name == 'KITTI':
+            return None # TODO need to handle returning annotations
+
+        im = self.annots[im_name]
+        # {"labels": ['bbox_x1','bbox_y1','bbox_x2','bbox_y2','class', 'test']}
+        gt_bbxs = im[0:4]
+        gt_clss = im[4]
+
+        return np.asarray(gt_bbxs), gt_clss
+        
+
+class ImageFolderDataset:
+    def __init__(self, name, dir_path):
+        
+        self.dir_path = dir_path
+        self.name = name
+        self.dataloader_paths = []
+
+        # Read the image
+        print("Loading dataset..")
+        print(self.name)
+        image_files = [f for f in listdir(dir_path) if isfile(join(dir_path, f))]
+        for image_file in image_files:
+            image_dir = join(dir_path, image_file)
+            self.dataloader_paths.append(image_dir)
+        self.dataloader = GenericDataset(self.dataloader_paths, self.name)
+
+        print("done initializing dataset.")
+
+    def get_image_name(self, inp):
+        return inp.split("/")[-1].split(".")[0]
+
+    def load_image(self, im_name):
+        if self.name == 'KITTI':
+            ext = '.png'
+        return skimage.io.imread(join(self.dir_path, im_name+ext))
+
+    def extract_gt(self, targets, im_name):
+        return self.dataloader.extract_gt(targets, im_name)
+    
+
 
 # Image transformation applied to all images
 transform = pth_transforms.Compose(
@@ -279,7 +371,7 @@ def extract_gt_VOC(targets, remove_hards=False):
     return np.asarray(gt_bbxs), gt_clss
 
 
-def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+def _bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
     # https://github.com/ultralytics/yolov5/blob/develop/utils/general.py
     # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
     box2 = box2.T
@@ -317,7 +409,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
                 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
             ) / 4  # center distance squared
             if DIoU:
-                return iou - rho2 / c2  # DIoU
+                return iou - rho2 / c2, inter, union  # DIoU
             elif (
                 CIoU
             ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
@@ -326,12 +418,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
                 )
                 with torch.no_grad():
                     alpha = v / (v - iou + (1 + eps))
-                return iou - (rho2 / c2 + v * alpha)  # CIoU
+                return iou - (rho2 / c2 + v * alpha), inter, union  # CIoU
         else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
             c_area = cw * ch + eps  # convex area
-            return iou - (c_area - union) / c_area  # GIoU
+            return iou - (c_area - union) / c_area, inter, union  # GIoU
     else:
-        return iou  # IoU
+        return iou, inter, union  # IoU
+
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    return _bbox_iou(box1, box2, x1y1x2y2, GIoU, DIoU, CIoU, eps)[0]
+
 
 def select_coco_20k(sel_file, all_annotations_file):
     print('Building COCO 20k dataset.')
diff --git a/main_lost.py b/main_lost.py
index c4f2938..898ff9b 100755
--- a/main_lost.py
+++ b/main_lost.py
@@ -25,7 +25,7 @@ from tqdm import tqdm
 from PIL import Image
 
 from networks import get_model
-from datasets import ImageDataset, Dataset, bbox_iou
+from datasets import ImageDataset, Dataset, ImageFolderDataset, bbox_iou
 from visualizations import visualize_fms, visualize_predictions, visualize_seed_expansion
 from object_discovery import lost, detect_box, dino_seg
 
@@ -54,7 +54,7 @@ if __name__ == "__main__":
         "--dataset",
         default="VOC07",
         type=str,
-        choices=[None, "VOC07", "VOC12", "COCO20k"],
+        choices=[None, "VOC07", "VOC12", "COCO20k", "KITTI"],
         help="Dataset name.",
     )
     parser.add_argument(
@@ -81,6 +81,7 @@ if __name__ == "__main__":
     parser.add_argument("--no_hard", action="store_true", help="Only used in the case of the VOC_all setup (see the paper).")
     parser.add_argument("--no_evaluation", action="store_true", help="Compute the evaluation.")
     parser.add_argument("--save_predictions", default=True, type=bool, help="Save predicted bouding boxes.")
+    parser.add_argument("--num_init_seeds", default=1, type=int, help="Number of initial seeds to expand from.")
 
     # Visualization
     parser.add_argument(
@@ -126,6 +127,8 @@ if __name__ == "__main__":
     # If an image_path is given, apply the method only to the image
     if args.image_path is not None:
         dataset = ImageDataset(args.image_path)
+    elif args.dataset == "KITTI":
+        dataset = ImageFolderDataset("/root/kitti/training/image_2/")
     else:
         dataset = Dataset(args.dataset, args.set, args.no_hard)
 
@@ -164,12 +167,13 @@ if __name__ == "__main__":
     # -------------------------------------------------------------------------------------------------------
     # Loop over images
     preds_dict = {}
+    gt_dict = {}
     cnt = 0
     corloc = np.zeros(len(dataset.dataloader))
     
     pbar = tqdm(dataset.dataloader)
     for im_id, inp in enumerate(pbar):
-
+        torch.cuda.empty_cache()
         # ------------ IMAGE PROCESSING -------------------------------------------
         img = inp[0]
         init_image_size = img.shape
@@ -192,7 +196,9 @@ if __name__ == "__main__":
         img = paded
 
         # Move to gpu
-        img = img.cuda(non_blocking=True)
+        if device == torch.device("cuda"):
+            img = img.cuda(non_blocking=True)
+        
         # Size for transformers
         w_featmap = img.shape[-2] // args.patch_size
         h_featmap = img.shape[-1] // args.patch_size
@@ -283,19 +289,25 @@ if __name__ == "__main__":
 
         # ------------ Apply LOST -------------------------------------------
         if not args.dinoseg:
-            pred, A, scores, seed = lost(
-                feats,
-                [w_featmap, h_featmap],
-                scales,
-                init_image_size,
-                k_patches=args.k_patches,
+            preds, A, scores, seeds = lost(
+            feats,
+            [w_featmap, h_featmap],
+            scales,
+            init_image_size,
+            k_patches=args.k_patches,
+            num_init_seeds=args.num_init_seeds
             )
 
+        if not args.dinoseg:
             # ------------ Visualizations -------------------------------------------
             if args.visualize == "fms":
-                visualize_fms(A.clone().cpu().numpy(), seed, scores, [w_featmap, h_featmap], scales, vis_folder, im_name)
+                for i, x in enumerate(zip(preds, seeds)):
+                    pred, seed = x
+                    visualize_fms(A.clone().cpu().numpy(), seed, scores, [w_featmap, h_featmap], scales, vis_folder, im_name+'_'+str(i))
 
             elif args.visualize == "seed_expansion":
+                for i, x in enumerate(zip(preds, seeds)):
+                    pred, seed = x
                 image = dataset.load_image(im_name)
 
                 # Before expansion
@@ -306,11 +318,16 @@ if __name__ == "__main__":
                     scales=scales,
                     initial_im_size=init_image_size[1:],
                 )
-                visualize_seed_expansion(image, pred, seed, pred_seed, scales, [w_featmap, h_featmap], vis_folder, im_name)
+                visualize_seed_expansion(image, pred, seed, pred_seed, scales, [w_featmap, h_featmap], vis_folder, im_name+'_'+str(i))
 
             elif args.visualize == "pred":
                 image = dataset.load_image(im_name)
-                visualize_predictions(image, pred, seed, scales, [w_featmap, h_featmap], vis_folder, im_name)
+                for i, x in enumerate(zip(preds, seeds)):
+                    pred, seed = x
+                    image_name = None
+                    if i == len(preds) -1:
+                        image_name = im_name
+                    visualize_predictions(image, pred, seed, scales, [w_featmap, h_featmap], vis_folder, image_name)
 
         # Save the prediction
         preds_dict[im_name] = pred
@@ -320,12 +337,20 @@ if __name__ == "__main__":
             continue
 
         # Compare prediction to GT boxes
-        ious = bbox_iou(torch.from_numpy(pred), torch.from_numpy(gt_bbxs))
+        ious = []
+            
+        ious = bbox_iou(torch.from_numpy(pred), torch.from_numpy(np.asarray(gt_bbxs)))
+
+        if torch.any(ious >= 0.50):
+            #corloc[im_id] = 1
+            corloc[im_id] = 0
+        for i in ious:
+            if i >= 0.50:
+                corloc[im_id] += 1 
 
-        if torch.any(ious >= 0.5):
-            corloc[im_id] = 1
 
-        cnt += 1
+        cnt += len(inp[2])
+        
         if cnt % 50 == 0:
             pbar.set_description(f"Found {int(np.sum(corloc))}/{cnt}")
 
diff --git a/object_discovery.py b/object_discovery.py
index 42da074..88342f3 100644
--- a/object_discovery.py
+++ b/object_discovery.py
@@ -12,15 +12,34 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import enum
+from matplotlib.pyplot import box
 import torch
 import scipy
 import scipy.ndimage
 
 import numpy as np
-from datasets import bbox_iou
+from datasets import _bbox_iou
 
+def aspect_ratio(box):
+    xdiff = abs(box[0] - box[2]) # Using absolute value to ignore negatives
+    ydiff = abs(box[1] - box[3])
+    return xdiff / ydiff
 
-def lost(feats, dims, scales, init_image_size, k_patches=100):
+def box_area(box):
+    xdiff = abs(box[0] - box[2]) # Using absolute value to ignore negatives
+    ydiff = abs(box[1] - box[3])
+    return xdiff * ydiff
+
+def IOU_2D(box1, box2):
+    ious, inter, union = _bbox_iou(box1, box2)
+    for b in box2:
+        print(box_area(b))
+    print(box_area(box1), ious, inter, union)
+
+    return ious
+
+def lost(feats, dims, scales, init_image_size, k_patches=100, num_init_seeds=1, iou_threshold=0.1, num_sliding_windows=1):
     """
     Implementation of LOST method.
     Inputs
@@ -38,23 +57,66 @@ def lost(feats, dims, scales, init_image_size, k_patches=100):
     # Compute the similarity
     A = (feats @ feats.transpose(1, 2)).squeeze()
 
+    if num_init_seeds== 0:
+        num_init_seeds = len(A)
+
     # Compute the inverse degree centrality measure per patch
     sorted_patches, scores = patch_scoring(A)
 
     # Select the initial seed
-    seed = sorted_patches[0]
+    seeds = sorted_patches[0:num_init_seeds]
 
-    # Seed expansion
-    potentials = sorted_patches[:k_patches]
-    similars = potentials[A[seed, potentials] > 0.0]
-    M = torch.sum(A[similars, :], dim=0)
-
-    # Box extraction
-    pred, _ = detect_box(
-        M, seed, dims, scales=scales, initial_im_size=init_image_size[1:]
-    )
+    preds = []
+    filtered_seeds= []
 
-    return np.asarray(pred), A, scores, seed
+    # Seed expansion
+    for i, seed in enumerate(seeds):
+        
+        potentials = sorted_patches[i:k_patches+i]
+        #potentials = torch.cat((sorted_patches[:i], sorted_patches[i:k_patches+1]),0)
+        #potentials = sorted_patches
+        #  TODO: potentials should take into consideration distance from init seed
+        similars = potentials[A[seed, potentials] > 0.0]
+        M = torch.sum(A[similars, :], dim=0)
+
+        # Box extraction
+        pred, _ = detect_box(
+            M, seed, dims, scales=scales, initial_im_size=init_image_size[1:]
+        )
+        pred = np.asarray(pred)
+
+        add_pred = aspect_ratio(pred) > 1.0
+        ious = 0
+        
+        ## TODO, if pick good iou
+        ## if one bbox is completely inside another than pick the smaller one
+        if len(preds) > 0 and add_pred:
+            idx_to_remove = -1
+            ious, inter, union = _bbox_iou(torch.from_numpy(pred), torch.from_numpy(np.asarray(preds)))
+
+            for i, p in enumerate(preds) :
+                if box_area(pred) == union[i] or box_area(pred) > box_area(p): #then new prediction is encasing the whole thing so we DO NOT want to add it
+                    #add_pred = False
+                    break 
+                elif box_area(p) == union[i] or box_area(pred) < box_area(p): #then this already stored pred is larger so we want to remove it and add pred
+                    add_pred= True
+                    #idx_to_remove = i
+                    #print('elim bbox: ', p, 'add: ', pred)
+                    break 
+            if idx_to_remove >= 0:
+                #print(preds, idx_to_remove)
+                preds.pop(idx_to_remove)
+                ious = ious[ious!=ious[idx_to_remove]] 
+
+            add_pred = add_pred and not any(ious >= iou_threshold)
+            
+        if add_pred:
+            #print(ious, pred)
+            filtered_seeds.append(seed)
+            preds.append(pred)
+
+    #print("Generated", len(preds), "predictions")
+    return np.asarray(preds), A, scores, filtered_seeds
 
 
 def patch_scoring(M, threshold=0.):
@@ -123,7 +185,6 @@ def detect_box(A, seed, dims, initial_im_size=None, scales=None):
 def dino_seg(attn, dims, patch_size, head=0):
     """
     Extraction of boxes based on the DINO segmentation method proposed in https://github.com/facebookresearch/dino. 
-    Modified from https://github.com/facebookresearch/dino/blob/main/visualize_attention.py
     """
     w_featmap, h_featmap = dims
     nh = attn.shape[1]
diff --git a/scripts/run-dataset.sh b/scripts/run-dataset.sh
new file mode 100644
index 0000000..3566356
--- /dev/null
+++ b/scripts/run-dataset.sh
@@ -0,0 +1,24 @@
+
+OUTPUT_PATH=/root/kitti/lost_output
+
+DINO_ARCH=vit_base
+LOST_FEATURES=k
+K_PATCHES=25
+PATCH_SIZE=16
+
+cd /root/lost/
+rm -rf $OUTPUT_PATH
+mkdir -p $OUTPUT_PATH
+echo $OUTPUT_PATH
+
+python main_lost.py \
+    --dataset KITTI \
+    --output_dir $OUTPUT_PATH \
+    --arch $DINO_ARCH \
+    --which_feature $LOST_FEATURES \
+    --k_patches $K_PATCHES \
+    --patch_size $PATCH_SIZE \
+    --visualize pred \
+    --num_init_seeds 1
+
+exit
\ No newline at end of file
diff --git a/scripts/run-single-image.sh b/scripts/run-single-image.sh
new file mode 100644
index 0000000..720f909
--- /dev/null
+++ b/scripts/run-single-image.sh
@@ -0,0 +1,57 @@
+declare -a images=(
+    #"000011" "004540" "004541" "007256" "007259" "007267" "007265" "007271"
+    #"000188" "000085" "000038" "000056" "000093" "000263"
+    #"000028" "000048" "000066" "004459" "000435" "003333" "000291"
+    "000003"
+)
+DATASET_PATH=/root/kitti/training/image_2/
+DINO_PATH=/root/lost/dino
+
+DINO_ARCH=vit_base
+LOST_FEATURES=k
+K_PATCHES=10
+PATCH_SIZE=16
+
+OUTPUT_PATH=/root/lost/outputs/samples
+rm -rf $OUTPUT_PATH
+
+for i in "${images[@]}"
+do
+    echo evaluating $i
+
+    #mkdir -p $OUTPUT_PATH/$i/dino
+    mkdir -p $OUTPUT_PATH/$i/lost
+    #mkdir -p $OUTPUT_PATH/$i/lost-dinoseg
+    #mkdir -p $OUTPUT_PATH/$i/images
+
+    cd /root/lost/
+    
+    python main_lost.py \
+        --image_path $DATASET_PATH/$i.png \
+        --output_dir $OUTPUT_PATH/$i/lost \
+        --arch $DINO_ARCH \
+        --which_feature $LOST_FEATURES \
+        --k_patches $K_PATCHES \
+        --visualize pred \
+        --num_init_seeds 1
+
+    python main_lost.py \
+        --image_path $DATASET_PATH/$i.png \
+        --output_dir $OUTPUT_PATH/$i/lost \
+        --arch $DINO_ARCH \
+        --which_feature $LOST_FEATURES \
+        --k_patches $K_PATCHES \
+        --visualize fms \
+        --num_init_seeds 1
+
+    python main_lost.py \
+        --image_path $DATASET_PATH/$i.png \
+        --output_dir $OUTPUT_PATH/$i/lost \
+        --arch $DINO_ARCH \
+        --which_feature $LOST_FEATURES \
+        --k_patches $K_PATCHES \
+        --visualize seed_expansion \
+        --num_init_seeds 1
+
+    echo 
+done
\ No newline at end of file
diff --git a/visualizations.py b/visualizations.py
index 867b311..a4ec699 100755
--- a/visualizations.py
+++ b/visualizations.py
@@ -18,23 +18,32 @@ import skimage.io
 import numpy as np
 import torch.nn as nn
 from PIL import Image
+from random import *
 
 import matplotlib.pyplot as plt
 
-def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, plot_seed=False):
+def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, plot_seed=False, is_gt=False):
     """
     Visualization of the predicted box and the corresponding seed patch.
     """
     w_featmap, h_featmap = dims
 
     # Plot the box
-    cv2.rectangle(
-        image,
-        (int(pred[0]), int(pred[1])),
-        (int(pred[2]), int(pred[3])),
-        (255, 0, 0), 3,
-    )
-
+    if not is_gt:
+        cv2.rectangle(
+            image,
+            (int(pred[0]), int(pred[1])),
+            (int(pred[2]), int(pred[3])),
+            (255, randint(0,255), randint(0,255)), 3,
+        )
+    else:
+        cv2.rectangle(
+            image,
+            (int(pred[0]), int(pred[1])),
+            (int(pred[2]), int(pred[3])),
+            (0, 255, 0), 3,
+        )
+    print("image.shape:",image.shape, "\npred_box: [x1,y1,x2,y2]", pred)
     # Plot the seed
     if plot_seed:
         s_ = np.unravel_index(seed.cpu().numpy(), (w_featmap, h_featmap))
@@ -45,10 +54,10 @@ def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name,
             (int(s_[1] * scales[1] + (size_[1] / 2)), int(s_[0] * scales[0] + (size_[0] / 2))),
             (0, 255, 0), -1,
         )
-
-    pltname = f"{vis_folder}/LOST_{im_name}.png"
-    Image.fromarray(image).save(pltname)
-    print(f"Predictions saved at {pltname}.")
+    if im_name is not None:
+        pltname = f"{vis_folder}/LOST_{im_name}.png"
+        Image.fromarray(image).save(pltname)
+    #print(f"Predictions saved at {pltname}.")
 
 def visualize_fms(A, seed, scores, dims, scales, output_folder, im_name):
     """
-- 
GitLab