diff --git a/README.md b/README.md index 40a5f8743e71a1c23f80cde5679f863eefc0bccf..cd46abcf8718f687203c3edeab44313e10f2e786 100644 --- a/README.md +++ b/README.md @@ -134,13 +134,48 @@ python main_lost.py --dataset VOC07 --set trainval --arch resnet50_imagenet #Res ``` ## Towards unsupervised object detection -In this work, we additionally use LOST predictions to train object detection models without any human supervision. We explore two scenarios: class-agnostic (CA) and (pseudo) class-aware training of object detectors (OD). +In this work, we additionally use LOST predictions to train object detection models without any human supervision. We explore two scenarios: class-agnostic (CAD) and (pseudo) class-aware training of object detectors (OD). The next section present the different steps to reproduce our results. +### Installation +We use the [detectron2](https://github.com/facebookresearch/detectron2) framework to train a Faster R-CNN model with LOST predictions as pseudo-gt. In order to reproduce our results, please install the framework using the next commands. +```bash +git clone https://github.com/facebookresearch/detectron2.git +python -m pip install -e detectron2 +``` + +Then please copy LOST-specific files to detectron2 framework, following: +```bash +cp tools/*.py detectron2/. +mkdir detectron2/configs/LOST +cp tools/configs/* detectron2/configs/LOST/. +``` + +### Training a Class-Agnostic Detector (CAD) with LOST pseudo-annotations. + +* Before launching a training, data must be formated to fit detectron2 and COCO styles. Following are the command lines to do this formatting for boxes predicted with LOST. +```bash +cd detectron2; + +# Format pseudo-boxes data to fit detectron2 +python prepare_voc_LOST_CAD_pseudo_boxes_in_detectron2_format.py --year 2007 --pboxes ../outputs/VOC07_trainval/LOST-vit_small16_k/preds.pkl # for VOC07 +python prepare_voc_LOST_CAD_pseudo_boxes_in_detectron2_format.py --year 2012 --pboxes ../outputs/VOC12_trainval/LOST-vit_small16_k/preds.pkl # for VOC12 -### Evaluating LOST+CA (corloc results) -The predictions of the class-agnostic Faster R-CNN model trained using LOST boxes as pseudo-gt are stored in the folder `data/CAD_predictions`. In order to launch the corloc evaluation, please launch the following scripts. It is to be noted that in this evaluation, only the box with the highest confidence score is considered per image. +# Format VOC data to fit COCO style +python prepare_voc_data_in_coco_style.py --is_CAD --voc07_dir ../datasets/VOC2007 --voc12_dir ../datasets/VOC2012 +``` +* The next command line allows you to launch a CAD training with 4 gpus on the VOC2007 dataset. The batch size is set to 16, 4 to 8 GPUs may be needed depending on your machines. Please make sure to change the argument value `MODEL.WEIGHTS` to the correct path of DINO weights. +```bash +python tools/train_net_for_LOST_CAD.py --num-gpus 4 --config-file ./configs/LOST/RN50_DINO_FRCNN_VOC07_CAD.yaml DATALOADER.NUM_WORKERS 8 OUTPUT_DIR ./outputs/RN50_DINO_FRCNN_VOC07_CAD MODEL.WEIGHTS /path/to/DINO/WEIGHTS ``` + +Inference results of the model will be stored in `$OUTPUT_DIR/inference`. + +### Evaluating LOST+CAD (corloc results) + +We have provided predictions of a class-agnostic Faster R-CNN model trained using LOST boxes as pseudo-gt; they are stored in the folder `data/CAD_predictions`. In order to launch the corloc evaluation, please launch the following scripts. It is to be noted that in this evaluation, only the box with the highest confidence score is considered per image. + +```bash python main_corloc_evaluation.py --dataset VOC07 --set trainval --type_pred detectron --pred_file data/CAD_predictions/LOST_plus_CAD_VOC07.json python main_corloc_evaluation.py --dataset VOC12 --set trainval --type_pred detectron --pred_file data/CAD_predictions/LOST_plus_CAD_VOC12.json python main_corloc_evaluation.py --dataset COCO20k --set train --type_pred detectron --pred_file data/CAD_predictions/LOST_plus_CAD_COCO20k.json @@ -173,8 +208,8 @@ The following table presents the obtained corloc results. <tr> </table> -### Training the models -We use the [detectron2](https://github.com/facebookresearch/detectron2) framework to train a Faster R-CNN model with LOST predictions as pseudo-gt. In order to reproduce our results, please install the framework. + +### Details We use the `R50-C4` model of Detectron2 with ResNet50 pre-trained with DINO self-supervision [model](https://dl.fbaipublicfiles.com/dino/dino_resnet50_pretrain/dino_resnet50_pretrain.pth). diff --git a/tools/configs/RN50_DINO_FRCNN_VOC07_CAD.yaml b/tools/configs/RN50_DINO_FRCNN_VOC07_CAD.yaml new file mode 100755 index 0000000000000000000000000000000000000000..fa90043205db2c7f18cf569613357d844b4bfb37 --- /dev/null +++ b/tools/configs/RN50_DINO_FRCNN_VOC07_CAD.yaml @@ -0,0 +1,38 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + RPN: + PRE_NMS_TOPK_TEST: 6000 + POST_NMS_TOPK_TEST: 1000 + WEIGHTS: "/path/to/dino/weights.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + STRIDE_IN_1X1: False + NORM: "SyncBN" + ROI_HEADS: + NAME: "Res5ROIHeadsExtraNorm" + NUM_CLASSES: 1 + BACKBONE: + FREEZE_AT: 2 + ROI_BOX_HEAD: + NORM: "SyncBN" # RGB Mean and Std + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] +INPUT: + MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) + MIN_SIZE_TEST: 800 + FORMAT: "RGB" +DATASETS: + TRAIN: ('voc_2007_trainval_LOST_CAD', ) + TEST: ('voc_2007_test_CAD_coco_style', ) +TEST: + EVAL_PERIOD: 5000 + PRECISE_BN: + ENABLED: True +SOLVER: + STEPS: (18000, 22000) + MAX_ITER: 24000 + WARMUP_ITERS: 100 # Maybe needs tuning. + IMS_PER_BATCH: 16 + BASE_LR: 0.02 # Maybe it will need some tuning. MoCo used 0.02. +OUTPUT_DIR: "./outputs/RN50_DINO_FRCNN_VOC07_CAD" diff --git a/tools/configs/RN50_DINO_FRCNN_VOC12_CAD.yaml b/tools/configs/RN50_DINO_FRCNN_VOC12_CAD.yaml new file mode 100755 index 0000000000000000000000000000000000000000..c397c2076a68cb6e41f0de20db1d76313ca812e4 --- /dev/null +++ b/tools/configs/RN50_DINO_FRCNN_VOC12_CAD.yaml @@ -0,0 +1,38 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + RPN: + PRE_NMS_TOPK_TEST: 6000 + POST_NMS_TOPK_TEST: 1000 + WEIGHTS: "/path/to/dino/weights.pkl" + MASK_ON: False + RESNETS: + DEPTH: 50 + STRIDE_IN_1X1: False + NORM: "SyncBN" + ROI_HEADS: + NAME: "Res5ROIHeadsExtraNorm" + NUM_CLASSES: 1 + BACKBONE: + FREEZE_AT: 2 + ROI_BOX_HEAD: + NORM: "SyncBN" # RGB Mean and Std + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] +INPUT: + MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) + MIN_SIZE_TEST: 800 + FORMAT: "RGB" +DATASETS: + TRAIN: ('voc_2012_trainval_LOST_CAD', ) + TEST: ('voc_2007_test_CAD_coco_style', ) +TEST: + EVAL_PERIOD: 5000 + PRECISE_BN: + ENABLED: True +SOLVER: + STEPS: (18000, 22000) + MAX_ITER: 24000 + WARMUP_ITERS: 100 # Maybe needs tuning. + IMS_PER_BATCH: 16 + BASE_LR: 0.02 # Maybe it will need some tuning. MoCo used 0.02. +OUTPUT_DIR: "./outputs/RN50_DINO_FRCNN_VOC12_CAD" diff --git a/tools/prepare_voc_LOST_CAD_pseudo_boxes_in_detectron2_format.py b/tools/prepare_voc_LOST_CAD_pseudo_boxes_in_detectron2_format.py new file mode 100755 index 0000000000000000000000000000000000000000..4bc51839c00ab8faf7d0972a0fb9f515e4bcad4e --- /dev/null +++ b/tools/prepare_voc_LOST_CAD_pseudo_boxes_in_detectron2_format.py @@ -0,0 +1,93 @@ +import argparse +import os +import pdb +from os.path import join +from os import listdir, getcwd + +import xml.etree.ElementTree as ET +import pathlib +import pickle +import json + +import detectron2.data +from detectron2.structures import BoxMode + + +def get_img_size(ann_file): + # Get the width and height from the annotation file. + ann_file = open(ann_file) + tree = ET.parse(ann_file) + root = tree.getroot() + size = root.find('size') + width = int(size.find('width').text) + height = int(size.find('height').text) + return width, height + + +def prepare_annotation_data(loc_object): + if not isinstance(loc_object[0], (list, tuple)): + loc_object = [loc_object,] + + annotations = [] + for obj in loc_object: + xmin, ymin, xmax, ymax = [float(x) for x in obj] + annotations.append({ + "iscrowd": 0, + "bbox": [xmin, ymin, xmax, ymax], + "category_id": 0, + "bbox_mode": BoxMode.XYXY_ABS}) + + return annotations + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Prepares the LOST pseudo-boxes from a VOC" + "dataset in the data format expected from detectron2.") + parser.add_argument("--voc_dir", type=str, default='../datasets/VOC', + help="Path to where the VOC dataset is.") + parser.add_argument("--year", type=str, default='2007', help="Year of VOC dataset.") + parser.add_argument("--pboxes", type=str, default='../outputs/VOC07_trainval/LOST-vit_small16_k/preds.pkl', + help="Path to where the LOST CA pseudo boxes for the VOCyear trainval data are.") + args = parser.parse_args() + + # Dataset directory + voc_dir = f"{args.voc_dir}{args.year}" + + # Load the boxes + with open(args.pboxes, 'rb') as handle: + LOST_pseudo_boxes = pickle.load(handle) + + data = [] + cnt = 0 + for image_name in LOST_pseudo_boxes: + image_id = image_name[:-len('.jpg')] + image_id_int = int(image_id) + full_img_path = pathlib.Path(voc_dir) / "JPEGImages" / image_name + full_ann_path = pathlib.Path(voc_dir) / "Annotations" / f"{image_id}.xml" + width, height = get_img_size(full_ann_path) + assert full_img_path.is_file() + data.append({ + "file_name": str(full_img_path), + "image_id": image_id, + "height": height, "width": width, + "annotations": prepare_annotation_data(LOST_pseudo_boxes[image_name]), + }) + cnt += 1 + print(f'Number images saved {cnt}') + dataset_name = f"voc_{args.year}_trainval_LOST_CAD" + json_data = { + "dataset": data, + "meta_data": { + "dirname": voc_dir, + "evaluator_type": "pascal_voc", + "name": dataset_name, + "split": "trainval", + "year": args.year, + "thing_classes": detectron2.data.MetadataCatalog.get(f"voc_{args.year}_trainval").thing_classes, + }} + + dst_file = f'./datasets/{dataset_name}.json' + print(f"The pseudo-boxes at {args.pboxes} will be transformed into a detectron2-compatible dataset format at {dst_file}") + with open(dst_file, 'w') as outfile: + json.dump(json_data, outfile) \ No newline at end of file diff --git a/tools/prepare_voc_data_in_coco_style.py b/tools/prepare_voc_data_in_coco_style.py new file mode 100755 index 0000000000000000000000000000000000000000..1b3aa2b6827252bf64ca646747ef564f05f837f9 --- /dev/null +++ b/tools/prepare_voc_data_in_coco_style.py @@ -0,0 +1,106 @@ +import argparse +import os +from os.path import join + +import xml.etree.ElementTree as ET +import pathlib +import json + +from detectron2.structures import BoxMode + + +CLASSES = [ + "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", + "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", + "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + +def get_img_size(ann_file): + # Get the width and height from the annotation file. + ann_file = open(ann_file) + tree = ET.parse(ann_file) + root = tree.getroot() + size = root.find('size') + width = int(size.find('width').text) + height = int(size.find('height').text) + return width, height + + +def prepare_annotation_data(ann_file, class_agnostic=False): + ann_file = open(ann_file) + tree=ET.parse(ann_file) + root = tree.getroot() + size = root.find('size') + w = int(size.find('width').text) + h = int(size.find('height').text) + + annotations = [] + for obj in root.iter('object'): + difficult = int(obj.find('difficult').text) + + cls = obj.find('name').text + if cls not in CLASSES or difficult==1: + continue + + cls_id = 0 if class_agnostic else CLASSES.index(cls) + + bbox = obj.find("bndbox") + bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] + # Original annotations are integers in the range [1, W or H] + # Assuming they mean 1-based pixel indices (inclusive), + # a box with annotation (xmin=1, xmax=W) covers the whole image. + # In coordinate space this is represented by (xmin=0, xmax=W) + bbox[0] -= 1.0 + bbox[1] -= 1.0 + annotations.append({ + "iscrowd": 0, #difficult, + "bbox": bbox, + "category_id": cls_id, + "bbox_mode": BoxMode.XYXY_ABS}) # + return annotations + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--voc07_dir", type=str, default='../datasets/VOC2007', + help="Path where the VOC2007 data are.") + parser.add_argument("--voc12_dir", type=str, default='../datasets/VOC2012', + help="Path where the VOC2012 data are.") + parser.add_argument("--is_CAD", action='store_true', + help="Are pseuod-boxes class-agnostic?") + args = parser.parse_args() + + year2dir = {"2007": args.voc07_dir, "2012": args.voc12_dir} + sets = [('2012', 'trainval'), ('2007', 'trainval'), ('2007', 'test'),] + + CAD_name = "_CAD" if args.is_CAD else "" + + for year, image_set in sets: + image_ids = open(f'{year2dir[year]}/ImageSets/Main/{image_set}.txt').read().strip().split() + print(f"==> Year: {year}, ImageSet: {image_set}, Number of images: {len(image_ids)}") + data = [] + for image_id in image_ids: + full_img_path = pathlib.Path(year2dir[year]) / "JPEGImages" / f"{image_id}.jpg" + full_ann_path = pathlib.Path(year2dir[year]) / "Annotations" / f"{image_id}.xml" + width, height = get_img_size(full_ann_path) + assert full_img_path.is_file() + data.append({ + "file_name": str(full_img_path), + "image_id": image_id, + "height": height, "width": width, + "annotations": prepare_annotation_data(full_ann_path, args.is_CAD), + }) + + json_data = { + "dataset": data, + "meta_data": { + "dirname": f"datasets/VOC{year}", + "evaluator_type": "coco", + "name": f"voc_{year}_trainval{CAD_name}_coco_style", + "split": image_set, + "year": int(year), + }} + + dst_file = f'./datasets/voc_objects_{year}_{image_set}{CAD_name}_coco_style.json' + print(f"Saving the coco-style voc data at {dst_file}") + with open(dst_file, 'w') as outfile: + json.dump(json_data, outfile) diff --git a/tools/train_net_for_LOST_CAD.py b/tools/train_net_for_LOST_CAD.py new file mode 100755 index 0000000000000000000000000000000000000000..2dcd0882cbcb95f1c3c25cc29f60799bb9185416 --- /dev/null +++ b/tools/train_net_for_LOST_CAD.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python +# Copyright (c) Facebook, Inc. and its affiliates. +""" +Detection Training Script. + +This scripts reads a given config file and runs the training or evaluation. +It is an entry point that is made to train standard models in detectron2. + +In order to let one script support training of many models, +this script contains logic that are specific to these built-in models and therefore +may not be suitable for your own project. +For example, your research project perhaps only needs a single "evaluator". + +Therefore, we recommend you to use detectron2 as an library and take +this file as an example of how to use the library. +You may want to write your own script with your datasets and other customizations. +""" + +import logging +import os +from collections import OrderedDict +import torch + +import detectron2.utils.comm as comm +from detectron2.checkpoint import DetectionCheckpointer +from detectron2.config import get_cfg +from detectron2.data import MetadataCatalog +from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch +from detectron2.evaluation import ( + CityscapesInstanceEvaluator, + CityscapesSemSegEvaluator, + COCOEvaluator, + COCOPanopticEvaluator, + DatasetEvaluators, + LVISEvaluator, + PascalVOCDetectionEvaluator, + SemSegEvaluator, + verify_results, +) +from detectron2.modeling import GeneralizedRCNNWithTTA +from detectron2.layers import get_norm +from detectron2.modeling.roi_heads import ROI_HEADS_REGISTRY, Res5ROIHeads + +#******************************************************************************* +#********************** REGISTERING THE NECESSARY DATASETS ********************* +import json +import detectron2.data +def register_voc_in_coco_style( + voc2007_trainval_json_path="./datasets/voc_objects_2007_CAD_trainval_coco_style.json", + voc2007_test_json_path="./datasets/voc_objects_2007_test_CAD_coco_style.json", + voc2012_trainval_json_path="./datasets/voc_objects_2012_test_CAD_coco_style.json"): + + dataset_suffix = "coco_style" + voc2007_trainval_dataset_name = f"voc_2007_trainval_CAD_{dataset_suffix}" + voc2007_test_dataset_name = f"voc_2007_test_CAD_{dataset_suffix}" + voc2012_trainval_dataset_name = f"voc_2012_trainval_CAD_{dataset_suffix}" + + print(f"Registering the '{voc2007_trainval_dataset_name}' from the json file {voc2007_trainval_json_path}") + def voc2007_trainval_dataset_function(): + with open(voc2007_trainval_json_path) as infile: + json_data = json.load(infile) + return json_data["dataset"] + detectron2.data.DatasetCatalog.register( + voc2007_trainval_dataset_name, voc2007_trainval_dataset_function) + detectron2.data.MetadataCatalog.get(voc2007_trainval_dataset_name).thing_classes = ( + detectron2.data.MetadataCatalog.get("voc_2007_trainval").thing_classes) + detectron2.data.MetadataCatalog.get(voc2007_trainval_dataset_name).evaluator_type = "coco" + detectron2.data.MetadataCatalog.get(voc2007_trainval_dataset_name).split = detectron2.data.MetadataCatalog.get("voc_2007_trainval").split + detectron2.data.MetadataCatalog.get(voc2007_trainval_dataset_name).year = detectron2.data.MetadataCatalog.get("voc_2007_trainval").year + detectron2.data.MetadataCatalog.get(voc2007_trainval_dataset_name).name = voc2007_trainval_dataset_name + + print(f"Registering the '{voc2007_test_dataset_name}' from the json file {voc2007_test_json_path}") + def voc2007_test_dataset_function(): + with open(voc2007_test_json_path) as infile: + json_data = json.load(infile) + return json_data["dataset"] + detectron2.data.DatasetCatalog.register( + voc2007_test_dataset_name, voc2007_test_dataset_function) + detectron2.data.MetadataCatalog.get(voc2007_test_dataset_name).thing_classes = ( + detectron2.data.MetadataCatalog.get("voc_2007_test").thing_classes) + detectron2.data.MetadataCatalog.get(voc2007_test_dataset_name).evaluator_type = "coco" + detectron2.data.MetadataCatalog.get(voc2007_test_dataset_name).split = detectron2.data.MetadataCatalog.get("voc_2007_test").split + detectron2.data.MetadataCatalog.get(voc2007_test_dataset_name).year = detectron2.data.MetadataCatalog.get("voc_2007_test").year + detectron2.data.MetadataCatalog.get(voc2007_test_dataset_name).name = voc2007_test_dataset_name + + print(f"Registering the '{voc2012_trainval_dataset_name}' from the json file {voc2012_trainval_json_path}") + def voc2012_trainval_dataset_function(): + with open(voc2012_trainval_json_path) as infile: + json_data = json.load(infile) + return json_data["dataset"] + detectron2.data.DatasetCatalog.register( + voc2012_trainval_dataset_name, voc2012_trainval_dataset_function) + detectron2.data.MetadataCatalog.get(voc2012_trainval_dataset_name).thing_classes = ( + detectron2.data.MetadataCatalog.get("voc_2012_trainval").thing_classes) + detectron2.data.MetadataCatalog.get(voc2012_trainval_dataset_name).evaluator_type = "coco" + detectron2.data.MetadataCatalog.get(voc2012_trainval_dataset_name).split = detectron2.data.MetadataCatalog.get("voc_2012_trainval").split + detectron2.data.MetadataCatalog.get(voc2012_trainval_dataset_name).year = detectron2.data.MetadataCatalog.get("voc_2012_trainval").year + detectron2.data.MetadataCatalog.get(voc2012_trainval_dataset_name).name = voc2012_trainval_dataset_name + + +def register_CAD_LOST_pseudo_boxes_for_the_voc2007_trainval_dataset( + voc2007_json_path="./datasets/voc_2007_trainval_LOST_CAD.json", + voc2007_dataset_name="voc_2007_trainval_LOST_CAD"): + + print(f"Registering the '{voc2007_dataset_name}' from the json file {voc2007_json_path}") + def voc_2007_trainval_dataset_function(): + with open(voc2007_json_path) as infile: + json_data = json.load(infile) + return json_data["dataset"] + detectron2.data.DatasetCatalog.register( + voc2007_dataset_name, voc_2007_trainval_dataset_function) + detectron2.data.MetadataCatalog.get(voc2007_dataset_name).thing_classes = ( + detectron2.data.MetadataCatalog.get(f"voc_2007_trainval").thing_classes) + detectron2.data.MetadataCatalog.get(voc2007_dataset_name).evaluator_type = "coco" + +register_voc_in_coco_style() +register_CAD_LOST_pseudo_boxes_for_the_voc2007_trainval_dataset() +#******************************************************************************* +#******************************************************************************* + +@ROI_HEADS_REGISTRY.register() +class Res5ROIHeadsExtraNorm(Res5ROIHeads): + """ + As described in the MOCO paper, there is an extra BN layer + following the res5 stage. + """ + def _build_res5_block(self, cfg): + seq, out_channels = super()._build_res5_block(cfg) + norm = cfg.MODEL.RESNETS.NORM + norm = get_norm(norm, out_channels) + seq.add_module("norm", norm) + return seq, out_channels + + +class Trainer(DefaultTrainer): + """ + We use the "DefaultTrainer" which contains pre-defined default logic for + standard training workflow. They may not work for you, especially if you + are working on a new research project. In that case you can write your + own training loop. You can use "tools/plain_train_net.py" as an example. + """ + + @classmethod + def build_evaluator(cls, cfg, dataset_name, output_folder=None): + """ + Create evaluator(s) for a given dataset. + This uses the special metadata "evaluator_type" associated with each builtin dataset. + For your own dataset, you can simply create an evaluator manually in your + script and do not have to worry about the hacky if-else logic here. + """ + if output_folder is None: + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") + evaluator_list = [] + evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type + if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: + evaluator_list.append( + SemSegEvaluator( + dataset_name, + distributed=True, + output_dir=output_folder, + ) + ) + if evaluator_type in ["coco", "coco_panoptic_seg"]: + evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) + if evaluator_type == "coco_panoptic_seg": + evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) + if evaluator_type == "cityscapes_instance": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesInstanceEvaluator(dataset_name) + if evaluator_type == "cityscapes_sem_seg": + assert ( + torch.cuda.device_count() >= comm.get_rank() + ), "CityscapesEvaluator currently do not work with multiple machines." + return CityscapesSemSegEvaluator(dataset_name) + elif evaluator_type == "pascal_voc": + return PascalVOCDetectionEvaluator(dataset_name) + elif evaluator_type == "lvis": + return LVISEvaluator(dataset_name, output_dir=output_folder) + if len(evaluator_list) == 0: + raise NotImplementedError( + "no Evaluator for the dataset {} with the type {}".format( + dataset_name, evaluator_type + ) + ) + elif len(evaluator_list) == 1: + return evaluator_list[0] + return DatasetEvaluators(evaluator_list) + + @classmethod + def test_with_TTA(cls, cfg, model): + logger = logging.getLogger("detectron2.trainer") + # In the end of training, run an evaluation with TTA + # Only support some R-CNN models. + logger.info("Running inference with test-time augmentation ...") + model = GeneralizedRCNNWithTTA(cfg, model) + evaluators = [ + cls.build_evaluator( + cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") + ) + for name in cfg.DATASETS.TEST + ] + res = cls.test(cfg, model, evaluators) + res = OrderedDict({k + "_TTA": v for k, v in res.items()}) + return res + + +def setup(args): + """ + Create configs and perform basic setups. + """ + cfg = get_cfg() + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + default_setup(cfg, args) + return cfg + + +def main(args): + cfg = setup(args) + + if args.eval_only: + model = Trainer.build_model(cfg) + DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( + cfg.MODEL.WEIGHTS, resume=args.resume + ) + res = Trainer.test(cfg, model) + if cfg.TEST.AUG.ENABLED: + res.update(Trainer.test_with_TTA(cfg, model)) + if comm.is_main_process(): + verify_results(cfg, res) + return res + + """ + If you'd like to do anything fancier than the standard training logic, + consider writing your own training loop (see plain_train_net.py) or + subclassing the trainer. + """ + trainer = Trainer(cfg) + trainer.resume_or_load(resume=args.resume) + if cfg.TEST.AUG.ENABLED: + trainer.register_hooks( + [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] + ) + return trainer.train() + + +if __name__ == "__main__": + args = default_argument_parser().parse_args() + + print("Command Line Args:", args) + launch( + main, + args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + args=(args,), + )