From 95d604cb18bc104873ff1592f1f983d588cf7a15 Mon Sep 17 00:00:00 2001 From: Jitesh Jain Date: Sat, 10 Jun 2023 15:02:37 +0530 Subject: [PATCH] :zap: Update Readme --- README.md | 3 +- ...ern_image_huge_bs16_160k_896x896_1024.yaml | 7 ++--- ...ormer_intern_image_huge_bs16_90k_1024.yaml | 31 ++++++++++++++++--- ...mer_intern_image_huge_bs16_100ep_1024.yaml | 5 ++- demo/predictor.py | 6 ++-- 5 files changed, 37 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 21a37e8..2de87f0 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Equal Contribution -[[`Project Page`](https://praeclarumjj3.github.io/oneformer/)] [[`arXiv`](https://arxiv.org/abs/2211.06220)] [[`pdf`](https://arxiv.org/pdf/2211.06220.pdf)] [[`BibTeX`](#4citation)] +[[`Project Page`](https://praeclarumjj3.github.io/oneformer/)] [[`arXiv`](https://arxiv.org/abs/2211.06220)] [[`pdf`](https://openaccess.thecvf.com/content/CVPR2023/papers/Jain_OneFormer_One_Transformer_To_Rule_Universal_Image_Segmentation_CVPR_2023_paper.pdf)] [[`Slides`](https://drive.google.com/file/d/12XhiOXD08_LwzBwosoLVk7i8D45V8YfW/view?usp=sharing)] [[`Poster`](https://drive.google.com/file/d/1-U3hCYVNVht26NM-zbE87p1V4idc5bCt/view?usp=sharing)] [[`BibTeX`](#4citation)] This repo contains the code for our paper **OneFormer: One Transformer to Rule Universal Image Segmentation**. @@ -38,6 +38,7 @@ This repo contains the code for our paper **OneFormer: One Transformer to Rule U ## News +- **[June 10, 2023]**: OneFormer achieves SOTA performance on ADE20K panoptic segmentation with **54.5 PQ** and on Cityscapes instance segmentation with **50.6 AP** scores. We release the corresponding models with InternImage-H backbone publicly! - **[February 27, 2023]**: OneFormer is accepted to CVPR 2023! - **[January 26, 2023]**: OneFormer sets new SOTA performance on the the Mapillary Vistas val (both panoptic & semantic segmentation) and Cityscapes test (panoptic segmentation) sets. We’ve released the checkpoints too! - **[January 19, 2023]**: OneFormer is now available as a part of the 🤗 **HuggingFace [transformers](https://huggingface.co/docs/transformers/main/en/model_doc/oneformer) library** and **[model hub](https://huggingface.co/models?filter=oneformer)**! 🚀 diff --git a/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml b/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml index 1c1317c..50d4e4a 100644 --- a/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml +++ b/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml @@ -3,10 +3,6 @@ MODEL: BACKBONE: NAME: "D2InternImage" SEM_SEG_HEAD: - NAME: "OneFormerHead" - IGNORE_VALUE: 255 - NUM_CLASSES: 150 - LOSS_WEIGHT: 1.0 CONVS_DIM: 1024 MASK_DIM: 1024 INTERNIMAGE: @@ -51,3 +47,6 @@ TEST: MIN_SIZES: [448, 678, 896, 1120, 1344, 1568] MAX_SIZE: 6272 FLIP: True +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.00002 diff --git a/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml b/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml index 66013af..fe55ab5 100644 --- a/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml +++ b/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml @@ -3,10 +3,6 @@ MODEL: BACKBONE: NAME: "D2InternImage" SEM_SEG_HEAD: - NAME: "OneFormerHead" - IGNORE_VALUE: 255 - NUM_CLASSES: 150 - LOSS_WEIGHT: 1.0 CONVS_DIM: 1024 MASK_DIM: 1024 INTERNIMAGE: @@ -30,4 +26,29 @@ MODEL: CONTEXT_LENGTH: 77 N_CTX: 16 TEST: - DETECTIONS_PER_IMAGE: 250 \ No newline at end of file + DETECTIONS_PER_IMAGE: 250 +INPUT: + MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 896) for x in range(5, 21)]"] + MIN_SIZE_TRAIN_SAMPLING: "choice" + MIN_SIZE_TEST: 896 + MAX_SIZE_TRAIN: 3584 + MAX_SIZE_TEST: 3584 + CROP: + ENABLED: True + TYPE: "absolute" + SIZE: (896, 896) + SINGLE_CATEGORY_MAX_AREA: 1.0 + COLOR_AUG_SSD: True + SIZE_DIVISIBILITY: 896 # used in dataset mapper + FORMAT: "RGB" +TEST: + DETECTIONS_PER_IMAGE: 250 + EVAL_PERIOD: 5000 + AUG: + ENABLED: False + MIN_SIZES: [448, 678, 896, 1120, 1344, 1568] + MAX_SIZE: 6272 + FLIP: True +SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.00002 \ No newline at end of file diff --git a/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml b/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml index e6c9ba1..748d71f 100644 --- a/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml +++ b/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml @@ -4,9 +4,6 @@ MODEL: NAME: "D2InternImage" SEM_SEG_HEAD: NAME: "OneFormerHead" - IGNORE_VALUE: 255 - NUM_CLASSES: 150 - LOSS_WEIGHT: 1.0 CONVS_DIM: 1024 MASK_DIM: 1024 INTERNIMAGE: @@ -30,6 +27,8 @@ MODEL: CONTEXT_LENGTH: 77 N_CTX: 16 SOLVER: + IMS_PER_BATCH: 16 + BASE_LR: 0.00002 STEPS: (655556, 735184) MAX_ITER: 737500 AMP: diff --git a/demo/predictor.py b/demo/predictor.py index 76e32bb..f012155 100644 --- a/demo/predictor.py +++ b/demo/predictor.py @@ -52,6 +52,8 @@ def run_on_image(self, image, task): # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] vis_output = {} + + assert task in ['panoptic', 'semantic', 'instance'], "task should be one of 'panoptic', 'semantic', 'instance'" if task == 'panoptic': visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE) @@ -61,14 +63,14 @@ def run_on_image(self, image, task): panoptic_seg.to(self.cpu_device), segments_info, alpha=0.7 ) - if task == 'panoptic' or task == 'semantic': + if task == 'semantic': visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE_BW) predictions = self.predictor(image, task) vis_output['semantic_inference'] = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device), alpha=0.7 ) - if task == 'panoptic' or task == 'instance': + if task == 'instance': visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE_BW) predictions = self.predictor(image, task) instances = predictions["instances"].to(self.cpu_device)