multi predictions for lost

6e4ef3b6 · Spencer Delcore · 163e1cb7 · 6e4ef3b6 · 6e4ef3b6 · 6e4ef3b6
Commit 6e4ef3b6 authored 2 years ago by Spencer Delcore
--- a/datasets.py
+++ b/datasets.py
-# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane Siméoni @ valeo.ai
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane Siméoni @ valeo.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import torch
 import json
@@ -22,6 +22,98 @@ import skimage.io
 from PIL import Image
 from tqdm import tqdm
 from torchvision import transforms as pth_transforms
+import pickle
+
+
+from os import listdir
+from os.path import isfile, join
+
+class GenericDataset:
+    def __init__(self, data_arr, name):
+        self.name = name
+        self.data_arr = data_arr
+        if self.name == 'KITTI':
+            #with open(r"/root/lost/datasets/kitti_labels.pkl", "rb") as input_file:
+            with open(r"/root/lost/Kitti2Coco/train/kitti_labels.pkl", "rb") as input_file:
+                self.annots = pickle.load(input_file)
+                print(len(self.data_arr))
+
+                keys = self.data_arr.copy()
+                for k in range(len(keys)-1, -1, -1):
+                    im_name = self.data_arr[k].split("/")[-1].split(".")[0]
+                    if im_name not in self.annots.keys():
+                        self.data_arr.remove(self.data_arr[k])
+        else:
+            self.annots = None
+
+        print(self.__len__())
+
+    def __getitem__(self, i):
+        with open(self.data_arr[i], "rb") as f:
+            img = Image.open(f)
+            img = img.convert("RGB")
+            im_name = self.data_arr[i].split("/")[-1].split(".")[0]
+            new_x, new_y = img.size
+            
+            # Build a dataloader
+            img = transform(img)
+            if self.annots == None:
+                return [img, self.data_arr[i]]
+            if self.annots != None:
+                if self.name == 'KITTI':
+                    print(self.annots[im_name]['annotations'])
+                    print(self.annots[im_name])
+                    return [img, self.data_arr[i], self.annots[im_name]['annotations'], img.size, self.annots[im_name]]
+                return [img, self.data_arr[i], self.annots[im_name], img.size]
+
+    def __len__(self):
+        return len(self.data_arr)
+    
+    def extract_gt(self, targets, im_name):
+        if self.annots == None:
+            return None
+        
+        if self.name == 'KITTI':
+            return None # TODO need to handle returning annotations
+
+        im = self.annots[im_name]
+        # {"labels": ['bbox_x1','bbox_y1','bbox_x2','bbox_y2','class', 'test']}
+        gt_bbxs = im[0:4]
+        gt_clss = im[4]
+
+        return np.asarray(gt_bbxs), gt_clss
+        
+
+class ImageFolderDataset:
+    def __init__(self, name, dir_path):
+        
+        self.dir_path = dir_path
+        self.name = name
+        self.dataloader_paths = []
+
+        # Read the image
+        print("Loading dataset..")
+        print(self.name)
+        image_files = [f for f in listdir(dir_path) if isfile(join(dir_path, f))]
+        for image_file in image_files:
+            image_dir = join(dir_path, image_file)
+            self.dataloader_paths.append(image_dir)
+        self.dataloader = GenericDataset(self.dataloader_paths, self.name)
+
+        print("done initializing dataset.")
+
+    def get_image_name(self, inp):
+        return inp.split("/")[-1].split(".")[0]
+
+    def load_image(self, im_name):
+        if self.name == 'KITTI':
+            ext = '.png'
+        return skimage.io.imread(join(self.dir_path, im_name+ext))
+
+    def extract_gt(self, targets, im_name):
+        return self.dataloader.extract_gt(targets, im_name)
+    
+

 # Image transformation applied to all images
 transform = pth_transforms.Compose(
@@ -279,7 +371,7 @@ def extract_gt_VOC(targets, remove_hards=False):
    return np.asarray(gt_bbxs), gt_clss


-def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+def _bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
    # https://github.com/ultralytics/yolov5/blob/develop/utils/general.py
    # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
    box2 = box2.T
@@ -317,7 +409,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
                + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2
            ) / 4  # center distance squared
            if DIoU:
-                return iou - rho2 / c2  # DIoU
+                return iou - rho2 / c2, inter, union  # DIoU
            elif (
                CIoU
            ):  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
@@ -326,12 +418,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
                )
                with torch.no_grad():
                    alpha = v / (v - iou + (1 + eps))
-                return iou - (rho2 / c2 + v * alpha)  # CIoU
+                return iou - (rho2 / c2 + v * alpha), inter, union  # CIoU
        else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
            c_area = cw * ch + eps  # convex area
-            return iou - (c_area - union) / c_area  # GIoU
+            return iou - (c_area - union) / c_area, inter, union  # GIoU
    else:
-        return iou  # IoU
+        return iou, inter, union  # IoU
+
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    return _bbox_iou(box1, box2, x1y1x2y2, GIoU, DIoU, CIoU, eps)[0]
+

 def select_coco_20k(sel_file, all_annotations_file):
    print('Building COCO 20k dataset.')

--- a/main_lost.py
+++ b/main_lost.py
@@ -25,7 +25,7 @@ from tqdm import tqdm
 from PIL import Image

 from networks import get_model
-from datasets import ImageDataset, Dataset, bbox_iou
+from datasets import ImageDataset, Dataset, ImageFolderDataset, bbox_iou
 from visualizations import visualize_fms, visualize_predictions, visualize_seed_expansion
 from object_discovery import lost, detect_box, dino_seg

@@ -54,7 +54,7 @@ if __name__ == "__main__":
        "--dataset",
        default="VOC07",
        type=str,
-        choices=[None, "VOC07", "VOC12", "COCO20k"],
+        choices=[None, "VOC07", "VOC12", "COCO20k", "KITTI"],
        help="Dataset name.",
    )
    parser.add_argument(
@@ -81,6 +81,7 @@ if __name__ == "__main__":
    parser.add_argument("--no_hard", action="store_true", help="Only used in the case of the VOC_all setup (see the paper).")
    parser.add_argument("--no_evaluation", action="store_true", help="Compute the evaluation.")
    parser.add_argument("--save_predictions", default=True, type=bool, help="Save predicted bouding boxes.")
+    parser.add_argument("--num_init_seeds", default=1, type=int, help="Number of initial seeds to expand from.")

    # Visualization
    parser.add_argument(
@@ -126,6 +127,8 @@ if __name__ == "__main__":
    # If an image_path is given, apply the method only to the image
    if args.image_path is not None:
        dataset = ImageDataset(args.image_path)
+    elif args.dataset == "KITTI":
+        dataset = ImageFolderDataset("/root/kitti/training/image_2/")
    else:
        dataset = Dataset(args.dataset, args.set, args.no_hard)

@@ -164,12 +167,13 @@ if __name__ == "__main__":
    # -------------------------------------------------------------------------------------------------------
    # Loop over images
    preds_dict = {}
+    gt_dict = {}
    cnt = 0
    corloc = np.zeros(len(dataset.dataloader))
    
    pbar = tqdm(dataset.dataloader)
    for im_id, inp in enumerate(pbar):
-
+        torch.cuda.empty_cache()
        # ------------ IMAGE PROCESSING -------------------------------------------
        img = inp[0]
        init_image_size = img.shape
@@ -192,7 +196,9 @@ if __name__ == "__main__":
        img = paded

        # Move to gpu
-        img = img.cuda(non_blocking=True)
+        if device == torch.device("cuda"):
+            img = img.cuda(non_blocking=True)
+        
        # Size for transformers
        w_featmap = img.shape[-2] // args.patch_size
        h_featmap = img.shape[-1] // args.patch_size
@@ -283,19 +289,25 @@ if __name__ == "__main__":

        # ------------ Apply LOST -------------------------------------------
        if not args.dinoseg:
-            pred, A, scores, seed = lost(
-                feats,
-                [w_featmap, h_featmap],
-                scales,
-                init_image_size,
-                k_patches=args.k_patches,
+            preds, A, scores, seeds = lost(
+            feats,
+            [w_featmap, h_featmap],
+            scales,
+            init_image_size,
+            k_patches=args.k_patches,
+            num_init_seeds=args.num_init_seeds
            )

+        if not args.dinoseg:
            # ------------ Visualizations -------------------------------------------
            if args.visualize == "fms":
-                visualize_fms(A.clone().cpu().numpy(), seed, scores, [w_featmap, h_featmap], scales, vis_folder, im_name)
+                for i, x in enumerate(zip(preds, seeds)):
+                    pred, seed = x
+                    visualize_fms(A.clone().cpu().numpy(), seed, scores, [w_featmap, h_featmap], scales, vis_folder, im_name+'_'+str(i))

            elif args.visualize == "seed_expansion":
+                for i, x in enumerate(zip(preds, seeds)):
+                    pred, seed = x
                image = dataset.load_image(im_name)

                # Before expansion
@@ -306,11 +318,16 @@ if __name__ == "__main__":
                    scales=scales,
                    initial_im_size=init_image_size[1:],
                )
-                visualize_seed_expansion(image, pred, seed, pred_seed, scales, [w_featmap, h_featmap], vis_folder, im_name)
+                visualize_seed_expansion(image, pred, seed, pred_seed, scales, [w_featmap, h_featmap], vis_folder, im_name+'_'+str(i))

            elif args.visualize == "pred":
                image = dataset.load_image(im_name)
-                visualize_predictions(image, pred, seed, scales, [w_featmap, h_featmap], vis_folder, im_name)
+                for i, x in enumerate(zip(preds, seeds)):
+                    pred, seed = x
+                    image_name = None
+                    if i == len(preds) -1:
+                        image_name = im_name
+                    visualize_predictions(image, pred, seed, scales, [w_featmap, h_featmap], vis_folder, image_name)

        # Save the prediction
        preds_dict[im_name] = pred
@@ -320,12 +337,20 @@ if __name__ == "__main__":
            continue

        # Compare prediction to GT boxes
-        ious = bbox_iou(torch.from_numpy(pred), torch.from_numpy(gt_bbxs))
+        ious = []
+            
+        ious = bbox_iou(torch.from_numpy(pred), torch.from_numpy(np.asarray(gt_bbxs)))
+
+        if torch.any(ious >= 0.50):
+            #corloc[im_id] = 1
+            corloc[im_id] = 0
+        for i in ious:
+            if i >= 0.50:
+                corloc[im_id] += 1 

-        if torch.any(ious >= 0.5):
-            corloc[im_id] = 1

-        cnt += 1
+        cnt += len(inp[2])
+        
        if cnt % 50 == 0:
            pbar.set_description(f"Found {int(np.sum(corloc))}/{cnt}")


--- a/object_discovery.py
+++ b/object_discovery.py
@@ -12,15 +12,34 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import enum
+from matplotlib.pyplot import box
 import torch
 import scipy
 import scipy.ndimage

 import numpy as np
-from datasets import bbox_iou
+from datasets import _bbox_iou

+def aspect_ratio(box):
+    xdiff = abs(box[0] - box[2]) # Using absolute value to ignore negatives
+    ydiff = abs(box[1] - box[3])
+    return xdiff / ydiff

-def lost(feats, dims, scales, init_image_size, k_patches=100):
+def box_area(box):
+    xdiff = abs(box[0] - box[2]) # Using absolute value to ignore negatives
+    ydiff = abs(box[1] - box[3])
+    return xdiff * ydiff
+
+def IOU_2D(box1, box2):
+    ious, inter, union = _bbox_iou(box1, box2)
+    for b in box2:
+        print(box_area(b))
+    print(box_area(box1), ious, inter, union)
+
+    return ious
+
+def lost(feats, dims, scales, init_image_size, k_patches=100, num_init_seeds=1, iou_threshold=0.1, num_sliding_windows=1):
    """
    Implementation of LOST method.
    Inputs
@@ -38,23 +57,66 @@ def lost(feats, dims, scales, init_image_size, k_patches=100):
    # Compute the similarity
    A = (feats @ feats.transpose(1, 2)).squeeze()

+    if num_init_seeds== 0:
+        num_init_seeds = len(A)
+
    # Compute the inverse degree centrality measure per patch
    sorted_patches, scores = patch_scoring(A)

    # Select the initial seed
-    seed = sorted_patches[0]
+    seeds = sorted_patches[0:num_init_seeds]

-    # Seed expansion
-    potentials = sorted_patches[:k_patches]
-    similars = potentials[A[seed, potentials] > 0.0]
-    M = torch.sum(A[similars, :], dim=0)
-
-    # Box extraction
-    pred, _ = detect_box(
-        M, seed, dims, scales=scales, initial_im_size=init_image_size[1:]
-    )
+    preds = []
+    filtered_seeds= []

-    return np.asarray(pred), A, scores, seed
+    # Seed expansion
+    for i, seed in enumerate(seeds):
+        
+        potentials = sorted_patches[i:k_patches+i]
+        #potentials = torch.cat((sorted_patches[:i], sorted_patches[i:k_patches+1]),0)
+        #potentials = sorted_patches
+        #  TODO: potentials should take into consideration distance from init seed
+        similars = potentials[A[seed, potentials] > 0.0]
+        M = torch.sum(A[similars, :], dim=0)
+
+        # Box extraction
+        pred, _ = detect_box(
+            M, seed, dims, scales=scales, initial_im_size=init_image_size[1:]
+        )
+        pred = np.asarray(pred)
+
+        add_pred = aspect_ratio(pred) > 1.0
+        ious = 0
+        
+        ## TODO, if pick good iou
+        ## if one bbox is completely inside another than pick the smaller one
+        if len(preds) > 0 and add_pred:
+            idx_to_remove = -1
+            ious, inter, union = _bbox_iou(torch.from_numpy(pred), torch.from_numpy(np.asarray(preds)))
+
+            for i, p in enumerate(preds) :
+                if box_area(pred) == union[i] or box_area(pred) > box_area(p): #then new prediction is encasing the whole thing so we DO NOT want to add it
+                    #add_pred = False
+                    break 
+                elif box_area(p) == union[i] or box_area(pred) < box_area(p): #then this already stored pred is larger so we want to remove it and add pred
+                    add_pred= True
+                    #idx_to_remove = i
+                    #print('elim bbox: ', p, 'add: ', pred)
+                    break 
+            if idx_to_remove >= 0:
+                #print(preds, idx_to_remove)
+                preds.pop(idx_to_remove)
+                ious = ious[ious!=ious[idx_to_remove]] 
+
+            add_pred = add_pred and not any(ious >= iou_threshold)
+            
+        if add_pred:
+            #print(ious, pred)
+            filtered_seeds.append(seed)
+            preds.append(pred)
+
+    #print("Generated", len(preds), "predictions")
+    return np.asarray(preds), A, scores, filtered_seeds


 def patch_scoring(M, threshold=0.):
@@ -123,7 +185,6 @@ def detect_box(A, seed, dims, initial_im_size=None, scales=None):
 def dino_seg(attn, dims, patch_size, head=0):
    """
    Extraction of boxes based on the DINO segmentation method proposed in https://github.com/facebookresearch/dino. 
-    Modified from https://github.com/facebookresearch/dino/blob/main/visualize_attention.py
    """
    w_featmap, h_featmap = dims
    nh = attn.shape[1]

--- a/scripts/run-dataset.sh
+++ b/scripts/run-dataset.sh
+
+OUTPUT_PATH=/root/kitti/lost_output
+
+DINO_ARCH=vit_base
+LOST_FEATURES=k
+K_PATCHES=25
+PATCH_SIZE=16
+
+cd /root/lost/
+rm -rf $OUTPUT_PATH
+mkdir -p $OUTPUT_PATH
+echo $OUTPUT_PATH
+
+python main_lost.py \
+    --dataset KITTI \
+    --output_dir $OUTPUT_PATH \
+    --arch $DINO_ARCH \
+    --which_feature $LOST_FEATURES \
+    --k_patches $K_PATCHES \
+    --patch_size $PATCH_SIZE \
+    --visualize pred \
+    --num_init_seeds 1
+
+exit
\ No newline at end of file
--- a/scripts/run-single-image.sh
+++ b/scripts/run-single-image.sh
+declare -a images=(
+    #"000011" "004540" "004541" "007256" "007259" "007267" "007265" "007271"
+    #"000188" "000085" "000038" "000056" "000093" "000263"
+    #"000028" "000048" "000066" "004459" "000435" "003333" "000291"
+    "000003"
+)
+DATASET_PATH=/root/kitti/training/image_2/
+DINO_PATH=/root/lost/dino
+
+DINO_ARCH=vit_base
+LOST_FEATURES=k
+K_PATCHES=10
+PATCH_SIZE=16
+
+OUTPUT_PATH=/root/lost/outputs/samples
+rm -rf $OUTPUT_PATH
+
+for i in "${images[@]}"
+do
+    echo evaluating $i
+
+    #mkdir -p $OUTPUT_PATH/$i/dino
+    mkdir -p $OUTPUT_PATH/$i/lost
+    #mkdir -p $OUTPUT_PATH/$i/lost-dinoseg
+    #mkdir -p $OUTPUT_PATH/$i/images
+
+    cd /root/lost/
+    
+    python main_lost.py \
+        --image_path $DATASET_PATH/$i.png \
+        --output_dir $OUTPUT_PATH/$i/lost \
+        --arch $DINO_ARCH \
+        --which_feature $LOST_FEATURES \
+        --k_patches $K_PATCHES \
+        --visualize pred \
+        --num_init_seeds 1
+
+    python main_lost.py \
+        --image_path $DATASET_PATH/$i.png \
+        --output_dir $OUTPUT_PATH/$i/lost \
+        --arch $DINO_ARCH \
+        --which_feature $LOST_FEATURES \
+        --k_patches $K_PATCHES \
+        --visualize fms \
+        --num_init_seeds 1
+
+    python main_lost.py \
+        --image_path $DATASET_PATH/$i.png \
+        --output_dir $OUTPUT_PATH/$i/lost \
+        --arch $DINO_ARCH \
+        --which_feature $LOST_FEATURES \
+        --k_patches $K_PATCHES \
+        --visualize seed_expansion \
+        --num_init_seeds 1
+
+    echo 
+done
\ No newline at end of file
--- a/visualizations.py
+++ b/visualizations.py
@@ -18,23 +18,32 @@ import skimage.io
 import numpy as np
 import torch.nn as nn
 from PIL import Image
+from random import *

 import matplotlib.pyplot as plt

-def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, plot_seed=False):
+def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name, plot_seed=False, is_gt=False):
    """
    Visualization of the predicted box and the corresponding seed patch.
    """
    w_featmap, h_featmap = dims

    # Plot the box
-    cv2.rectangle(
-        image,
-        (int(pred[0]), int(pred[1])),
-        (int(pred[2]), int(pred[3])),
-        (255, 0, 0), 3,
-    )
-
+    if not is_gt:
+        cv2.rectangle(
+            image,
+            (int(pred[0]), int(pred[1])),
+            (int(pred[2]), int(pred[3])),
+            (255, randint(0,255), randint(0,255)), 3,
+        )
+    else:
+        cv2.rectangle(
+            image,
+            (int(pred[0]), int(pred[1])),
+            (int(pred[2]), int(pred[3])),
+            (0, 255, 0), 3,
+        )
+    print("image.shape:",image.shape, "\npred_box: [x1,y1,x2,y2]", pred)
    # Plot the seed
    if plot_seed:
        s_ = np.unravel_index(seed.cpu().numpy(), (w_featmap, h_featmap))
@@ -45,10 +54,10 @@ def visualize_predictions(image, pred, seed, scales, dims, vis_folder, im_name,
            (int(s_[1] * scales[1] + (size_[1] / 2)), int(s_[0] * scales[0] + (size_[0] / 2))),
            (0, 255, 0), -1,
        )
-
-    pltname = f"{vis_folder}/LOST_{im_name}.png"
-    Image.fromarray(image).save(pltname)
-    print(f"Predictions saved at {pltname}.")
+    if im_name is not None:
+        pltname = f"{vis_folder}/LOST_{im_name}.png"
+        Image.fromarray(image).save(pltname)
+    #print(f"Predictions saved at {pltname}.")

 def visualize_fms(A, seed, scores, dims, scales, output_folder, im_name):
    """