From 95d604cb18bc104873ff1592f1f983d588cf7a15 Mon Sep 17 00:00:00 2001
From: Jitesh Jain <jitesh.jj2@gmail.com>
Date: Sat, 10 Jun 2023 15:02:37 +0530
Subject: [PATCH] :zap: Update Readme

---
 README.md                                     |  3 +-
 ...ern_image_huge_bs16_160k_896x896_1024.yaml |  7 ++---
 ...ormer_intern_image_huge_bs16_90k_1024.yaml | 31 ++++++++++++++++---
 ...mer_intern_image_huge_bs16_100ep_1024.yaml |  5 ++-
 demo/predictor.py                             |  6 ++--
 5 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/README.md b/README.md
index 21a37e8..2de87f0 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 
 <sup>&dagger;</sup> Equal Contribution
 
-[[`Project Page`](https://praeclarumjj3.github.io/oneformer/)] [[`arXiv`](https://arxiv.org/abs/2211.06220)] [[`pdf`](https://arxiv.org/pdf/2211.06220.pdf)] [[`BibTeX`](#4citation)]
+[[`Project Page`](https://praeclarumjj3.github.io/oneformer/)] [[`arXiv`](https://arxiv.org/abs/2211.06220)] [[`pdf`](https://openaccess.thecvf.com/content/CVPR2023/papers/Jain_OneFormer_One_Transformer_To_Rule_Universal_Image_Segmentation_CVPR_2023_paper.pdf)] [[`Slides`](https://drive.google.com/file/d/12XhiOXD08_LwzBwosoLVk7i8D45V8YfW/view?usp=sharing)] [[`Poster`](https://drive.google.com/file/d/1-U3hCYVNVht26NM-zbE87p1V4idc5bCt/view?usp=sharing)] [[`BibTeX`](#4citation)]
 
 This repo contains the code for our paper **OneFormer: One Transformer to Rule Universal Image Segmentation**.
 
@@ -38,6 +38,7 @@ This repo contains the code for our paper **OneFormer: One Transformer to Rule U
 
 ## News
 
+- **[June 10, 2023]**: OneFormer achieves SOTA performance on ADE20K panoptic segmentation with **54.5 PQ** and on Cityscapes instance segmentation with **50.6 AP** scores. We release the corresponding models with InternImage-H backbone publicly!
 - **[February 27, 2023]**: OneFormer is accepted to CVPR 2023!
 - **[January 26, 2023]**: OneFormer sets new SOTA performance on the the Mapillary Vistas val (both panoptic & semantic segmentation) and Cityscapes test (panoptic segmentation) sets. We’ve released the checkpoints too!
 - **[January 19, 2023]**: OneFormer is now available as a part of the 🤗 **HuggingFace [transformers](https://huggingface.co/docs/transformers/main/en/model_doc/oneformer) library** and **[model hub](https://huggingface.co/models?filter=oneformer)**! 🚀
diff --git a/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml b/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml
index 1c1317c..50d4e4a 100644
--- a/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml
+++ b/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml
@@ -3,10 +3,6 @@ MODEL:
   BACKBONE:
     NAME: "D2InternImage"
   SEM_SEG_HEAD:
-    NAME: "OneFormerHead"
-    IGNORE_VALUE: 255
-    NUM_CLASSES: 150
-    LOSS_WEIGHT: 1.0
     CONVS_DIM: 1024
     MASK_DIM: 1024
   INTERNIMAGE:
@@ -51,3 +47,6 @@ TEST:
     MIN_SIZES: [448, 678, 896, 1120, 1344, 1568]
     MAX_SIZE: 6272
     FLIP: True
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.00002
diff --git a/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml b/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml
index 66013af..fe55ab5 100644
--- a/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml
+++ b/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml
@@ -3,10 +3,6 @@ MODEL:
   BACKBONE:
     NAME: "D2InternImage"
   SEM_SEG_HEAD:
-    NAME: "OneFormerHead"
-    IGNORE_VALUE: 255
-    NUM_CLASSES: 150
-    LOSS_WEIGHT: 1.0
     CONVS_DIM: 1024
     MASK_DIM: 1024
   INTERNIMAGE:
@@ -30,4 +26,29 @@ MODEL:
     CONTEXT_LENGTH: 77
     N_CTX: 16
 TEST:
-  DETECTIONS_PER_IMAGE: 250
\ No newline at end of file
+  DETECTIONS_PER_IMAGE: 250
+INPUT:
+  MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 896) for x in range(5, 21)]"]
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 896
+  MAX_SIZE_TRAIN: 3584
+  MAX_SIZE_TEST: 3584
+  CROP:
+    ENABLED: True
+    TYPE: "absolute"
+    SIZE: (896, 896)
+    SINGLE_CATEGORY_MAX_AREA: 1.0
+  COLOR_AUG_SSD: True
+  SIZE_DIVISIBILITY: 896  # used in dataset mapper
+  FORMAT: "RGB"
+TEST:
+  DETECTIONS_PER_IMAGE: 250
+  EVAL_PERIOD: 5000
+  AUG:
+    ENABLED: False
+    MIN_SIZES: [448, 678, 896, 1120, 1344, 1568]
+    MAX_SIZE: 6272
+    FLIP: True
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.00002
\ No newline at end of file
diff --git a/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml b/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml
index e6c9ba1..748d71f 100644
--- a/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml
+++ b/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml
@@ -4,9 +4,6 @@ MODEL:
     NAME: "D2InternImage"
   SEM_SEG_HEAD:
     NAME: "OneFormerHead"
-    IGNORE_VALUE: 255
-    NUM_CLASSES: 150
-    LOSS_WEIGHT: 1.0
     CONVS_DIM: 1024
     MASK_DIM: 1024
   INTERNIMAGE:
@@ -30,6 +27,8 @@ MODEL:
     CONTEXT_LENGTH: 77
     N_CTX: 16
 SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.00002
   STEPS: (655556, 735184)
   MAX_ITER: 737500
   AMP:
diff --git a/demo/predictor.py b/demo/predictor.py
index 76e32bb..f012155 100644
--- a/demo/predictor.py
+++ b/demo/predictor.py
@@ -52,6 +52,8 @@ def run_on_image(self, image, task):
         # Convert image from OpenCV BGR format to Matplotlib RGB format.
         image = image[:, :, ::-1]
         vis_output = {}
+
+        assert task in ['panoptic', 'semantic', 'instance'], "task should be one of 'panoptic', 'semantic', 'instance'"
         
         if task == 'panoptic':
             visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE)
@@ -61,14 +63,14 @@ def run_on_image(self, image, task):
             panoptic_seg.to(self.cpu_device), segments_info, alpha=0.7
         )
 
-        if task == 'panoptic' or task == 'semantic':
+        if task == 'semantic':
             visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE_BW)
             predictions = self.predictor(image, task)
             vis_output['semantic_inference'] = visualizer.draw_sem_seg(
                 predictions["sem_seg"].argmax(dim=0).to(self.cpu_device), alpha=0.7
             )
 
-        if task == 'panoptic' or task == 'instance':
+        if task == 'instance':
             visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE_BW)
             predictions = self.predictor(image, task)
             instances = predictions["instances"].to(self.cpu_device)