⚡ Update Readme

SHI-Labs · Jun 10, 2023 · 4de62ea · 4de62ea
1 parent 97ff452
commit 4de62ea
Show file tree

Hide file tree

Showing 5 changed files with 36 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -38,6 +38,7 @@ This repo contains the code for our paper **OneFormer: One Transformer to Rule U
 
 ## News
 
+- **[June 10, 2023]**: OneFormer achieves SOTA performance on ADE20K panoptic segmentation with **54.5 PQ** and on Cityscapes instance segmentation with **50.6 AP** scores. We release the corresponding models with InternImage-H backbone publicly!
 - **[February 27, 2023]**: OneFormer is accepted to CVPR 2023!
 - **[January 26, 2023]**: OneFormer sets new SOTA performance on the the Mapillary Vistas val (both panoptic & semantic segmentation) and Cityscapes test (panoptic segmentation) sets. We’ve released the checkpoints too!
 - **[January 19, 2023]**: OneFormer is now available as a part of the 🤗 **HuggingFace [transformers](https://huggingface.co/docs/transformers/main/en/model_doc/oneformer) library** and **[model hub](https://huggingface.co/models?filter=oneformer)**! 🚀

diff --git a/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml b/configs/ade20k/intern_image/oneformer_intern_image_huge_bs16_160k_896x896_1024.yaml
@@ -3,10 +3,6 @@ MODEL:
   BACKBONE:
     NAME: "D2InternImage"
   SEM_SEG_HEAD:
-    NAME: "OneFormerHead"
-    IGNORE_VALUE: 255
-    NUM_CLASSES: 150
-    LOSS_WEIGHT: 1.0
     CONVS_DIM: 1024
     MASK_DIM: 1024
   INTERNIMAGE:
@@ -51,3 +47,6 @@ TEST:
     MIN_SIZES: [448, 678, 896, 1120, 1344, 1568]
     MAX_SIZE: 6272
     FLIP: True
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.00002
diff --git a/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml b/configs/cityscapes/intern_image/oneformer_intern_image_huge_bs16_90k_1024.yaml
@@ -3,10 +3,6 @@ MODEL:
   BACKBONE:
     NAME: "D2InternImage"
   SEM_SEG_HEAD:
-    NAME: "OneFormerHead"
-    IGNORE_VALUE: 255
-    NUM_CLASSES: 150
-    LOSS_WEIGHT: 1.0
     CONVS_DIM: 1024
     MASK_DIM: 1024
   INTERNIMAGE:
@@ -30,4 +26,29 @@ MODEL:
     CONTEXT_LENGTH: 77
     N_CTX: 16
 TEST:
-  DETECTIONS_PER_IMAGE: 250
+  DETECTIONS_PER_IMAGE: 250
+INPUT:
+  MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 896) for x in range(5, 21)]"]
+  MIN_SIZE_TRAIN_SAMPLING: "choice"
+  MIN_SIZE_TEST: 896
+  MAX_SIZE_TRAIN: 3584
+  MAX_SIZE_TEST: 3584
+  CROP:
+    ENABLED: True
+    TYPE: "absolute"
+    SIZE: (896, 896)
+    SINGLE_CATEGORY_MAX_AREA: 1.0
+  COLOR_AUG_SSD: True
+  SIZE_DIVISIBILITY: 896  # used in dataset mapper
+  FORMAT: "RGB"
+TEST:
+  DETECTIONS_PER_IMAGE: 250
+  EVAL_PERIOD: 5000
+  AUG:
+    ENABLED: False
+    MIN_SIZES: [448, 678, 896, 1120, 1344, 1568]
+    MAX_SIZE: 6272
+    FLIP: True
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.00002
diff --git a/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml b/configs/coco/intern_image/oneformer_intern_image_huge_bs16_100ep_1024.yaml
@@ -4,9 +4,6 @@ MODEL:
     NAME: "D2InternImage"
   SEM_SEG_HEAD:
     NAME: "OneFormerHead"
-    IGNORE_VALUE: 255
-    NUM_CLASSES: 150
-    LOSS_WEIGHT: 1.0
     CONVS_DIM: 1024
     MASK_DIM: 1024
   INTERNIMAGE:
@@ -30,6 +27,8 @@ MODEL:
     CONTEXT_LENGTH: 77
     N_CTX: 16
 SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.00002
   STEPS: (655556, 735184)
   MAX_ITER: 737500
   AMP:

diff --git a/demo/predictor.py b/demo/predictor.py
@@ -52,6 +52,8 @@ def run_on_image(self, image, task):
         # Convert image from OpenCV BGR format to Matplotlib RGB format.
         image = image[:, :, ::-1]
         vis_output = {}
+
+        assert task in ['panoptic', 'semantic', 'instance'], "task should be one of 'panoptic', 'semantic', 'instance'"
 
         if task == 'panoptic':
             visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE)
@@ -61,14 +63,14 @@ def run_on_image(self, image, task):
             panoptic_seg.to(self.cpu_device), segments_info, alpha=0.7
         )
 
-        if task == 'panoptic' or task == 'semantic':
+        if task == 'semantic':
             visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE_BW)
             predictions = self.predictor(image, task)
             vis_output['semantic_inference'] = visualizer.draw_sem_seg(
                 predictions["sem_seg"].argmax(dim=0).to(self.cpu_device), alpha=0.7
             )
 
-        if task == 'panoptic' or task == 'instance':
+        if task == 'instance':
             visualizer = Visualizer(image, metadata=self.metadata, instance_mode=ColorMode.IMAGE_BW)
             predictions = self.predictor(image, task)
             instances = predictions["instances"].to(self.cpu_device)