diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..7448793
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,3 @@
+work_dirs/
+artifacts/
+wandb/
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000..c00c1f5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,42 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: '[Bug]'
+labels: bug
+assignees: ''
+---
+
+### Describe the bug
+
+A clear and concise description of what the bug is.
+
+\[here\]
+
+### To Reproduce
+
+The command you executed.
+
+```shell
+[here]
+```
+
+### Post related information
+
+1. The output of `pip list | grep "mmcv\|mmcls\|^torch"`
+   \[here\]
+2. Your config file if you modified it or created a new one.
+
+```python
+[here]
+```
+
+3. Your train log file if you meet the problem during training.
+   \[here\]
+4. Other code you modified in the `mmcls` folder.
+   \[here\]
+
+### Additional context
+
+Add any other context about the problem here.
+
+\[here\]
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000..23b7c09
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,32 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: '[Feature]'
+labels: enhancement
+assignees: ''
+---
+
+### Describe the feature
+
+\[here\]
+
+### Motivation
+
+A clear and concise description of the motivation of the feature.
+Ex1. It is inconvenient when \[....\].
+Ex2. There is a recent paper \[....\], which is very helpful for \[....\].
+
+\[here\]
+
+### Related resources
+
+If there is an official code release or third-party implementation, please also provide the information here, which would be very helpful.
+
+\[here\]
+
+### Additional context
+
+Add any other context or screenshots about the feature request here.
+If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
+
+\[here\]
diff --git a/.github/ISSUE_TEMPLATE/general-questions.md b/.github/ISSUE_TEMPLATE/general-questions.md
new file mode 100644
index 0000000..42d5fb2
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/general-questions.md
@@ -0,0 +1,31 @@
+---
+name: General questions
+about: 'Ask general questions to get help '
+title: ''
+labels: help wanted
+assignees: ''
+---
+
+### Checklist
+
+- I have searched related issues but cannot get the expected help.
+- I have read related documents and don't know what to do.
+
+### Describe the question you meet
+
+\[here\]
+
+### Post related information
+
+1. The output of `pip list | grep "mmcv\|mmcls\|^torch"`
+   \[here\]
+2. Your config file if you modified it or created a new one.
+
+```python
+[here]
+```
+
+3. Your train log file if you meet the problem during training.
+   \[here\]
+4. Other code you modified in the `mmcls` folder.
+   \[here\]
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..3916457
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,28 @@
+Thanks for your contribution and we appreciate it a lot. The following instructions would make your pull request more healthy and more easily get feedback. If you do not understand some items, don't worry, just make the pull request and seek help from maintainers.
+
+## Motivation
+
+Please describe the motivation of this PR and the goal you want to achieve through this PR.
+
+## Modification
+
+Please briefly describe what modification is made in this PR.
+
+## Results (Optional)
+
+| Dataset | Model | box AP |
+| :-----: | :---: | :----: |
+|         |       |        |
+
+## Use cases (Optional)
+
+If this PR introduces a new feature, it is better to list some use cases here and update the documentation.
+
+## Checklist
+
+**Before PR**:
+
+- [ ] Pre-commit or other linting tools are used to fix the potential lint issues.
+- [ ] Bug fixes are fully covered by unit tests, the case that causes the bug should be added in the unit tests.
+- [ ] The modification is covered by complete unit tests. If not, please add more unit test to ensure the correctness.
+- [ ] The documentation has been modified accordingly, like docstring or example tutorials.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..ea40c59
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,66 @@
+name: build
+
+on: [pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Install pre-commit hook
+        run: |
+          pip install pre-commit
+          pre-commit install
+      - name: Linting
+        run: pre-commit run --all-files
+      - name: Check docstring coverage
+        run: |
+          pip install interrogate
+          interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 0 dethub
+
+  build_cuda:
+    runs-on: ubuntu-18.04
+    env:
+      NVIDIA_DRIVER_CAPABILITIES: 'utility,compute'
+      DEBIAN_FRONTEND: noninteractive
+    container:
+      image: nvcr.io/nvidia/pytorch:22.07-py3
+    strategy:
+      matrix:
+        include:
+          - mmcv: 1.6.0
+            mmcv_link: "cu115/torch1.11.0"
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install utils
+        run: pip install psutil
+      - name: Install system dependencies
+        run: |
+          apt-get update && apt-get install -y vim libgl1-mesa-dev
+          apt-get clean
+          rm -rf /var/lib/apt/lists/*
+          FORCE_CUDA="1"
+      - name: Install mmdet dependencies
+        run: |
+          pip install setuptools
+          pip install --no-cache-dir openmim==0.3.0 && \
+          pip install -r requirements/requirements.txt
+          pip install -r requirements/tests.txt
+          pip uninstall -y opencv-python && pip install opencv-python==4.5.1.48 && \
+          MMCV_WITH_OPS=1 pip install mmcv==2.0.0rc1 && \
+          pip install 'git+https://github.com/facebookresearch/detectron2.git'
+          python -c "import mmcv"
+      - name: Build and install
+        run: rm -rf .eggs && pip install -e .
+      - name: Soft link
+        run: ln -s /opt/conda/lib/python3.8/site-packages /opt/site-packages
+      - name: Run unittests and generate coverage report
+        run: |
+          coverage run --branch --source=dethub -m pytest tests/
+          coverage xml
+          coverage report -m
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4fa2e23
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,173 @@
+### https://raw.github.com/github/gitignore/218a941be92679ce67d0484547e3e142b2f5f6f0/Global/macOS.gitignore
+
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+
+### https://raw.github.com/github/gitignore/218a941be92679ce67d0484547e3e142b2f5f6f0/Python.gitignore
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+.idea
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..13a2d96
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,45 @@
+repos:
+  - repo: https://github.com/PyCQA/flake8
+    rev: 4.0.1
+    hooks:
+      - id: flake8
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.10.1
+    hooks:
+      - id: isort
+  - repo: https://github.com/pre-commit/mirrors-yapf
+    rev: v0.30.0
+    hooks:
+      - id: yapf
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.1.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: check-merge-conflict
+      - id: fix-encoding-pragma
+        args: ["--remove"]
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+  - repo: https://github.com/executablebooks/mdformat
+    rev: 0.7.9
+    hooks:
+      - id: mdformat
+        args: ["--number", "--table-width", "200"]
+        additional_dependencies:
+          - mdformat-openmmlab
+          - mdformat_frontmatter
+          - linkify-it-py
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.1.0
+    hooks:
+      - id: codespell
+        args: ["--skip", "custom_det/core/evaluation/ot_cost.py"]
+  - repo: https://github.com/myint/docformatter
+    rev: v1.3.1
+    hooks:
+      - id: docformatter
+        args: ["--in-place", "--wrap-descriptions", "79"]
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..db8102a
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,27 @@
+FROM nvcr.io/nvidia/pytorch:22.07-py3
+
+RUN apt update -y && apt install -y \
+    git
+RUN apt-get update && apt-get install -y \
+    vim \
+    libgl1-mesa-dev
+ENV FORCE_CUDA="1"
+
+# Install python package.
+WORKDIR /dethub
+COPY ./ /dethub
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir openmim==0.3.1 && \
+    pip install . && \
+    pip uninstall -y opencv-python && pip install opencv-python==4.5.1.48 && \
+    MMCV_WITH_OPS=1 pip install mmcv==2.0.0rc1 && \
+    pip install 'git+https://github.com/facebookresearch/detectron2.git'
+
+# Language settings
+ENV LANG C.UTF-8
+ENV LANGUAGE en_US
+
+RUN ln -s /opt/conda/lib/python3.8/site-packages /opt/site-packages
+RUN git config --global --add safe.directory /workspace
+
+WORKDIR /workspace
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..390c37d
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,4 @@
+include fovkrt/*.txt
+include dethub/VERSION
+recursive-include dethub/.mim/configs *.py *.yml
+recursive-include dethub/.mim/tools *.sh *.py
diff --git a/README.md b/README.md
index f68aff5..8ddf386 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,64 @@
-# open-detection-experiments
\ No newline at end of file
+# DetHub
+
+[![build](https://github.com/okotaku/dethub/actions/workflows/build.yml/badge.svg)](https://github.com/okotaku/dethub/actions/workflows/build.yml)
+[![license](https://img.shields.io/github/license/okotaku/dethub.svg)](https://github.com/okotaku/dethub/blob/main/LICENSE)
+
+## Introduction
+
+DetHub is an open source object detection / instance segmentation experiments hub. Our main contribution is supporting detection datasets and share baselines.
+
+- Support more and more datasets
+- Provide reproducible baseline configs for these datasets
+- Provide pretrained models, results and inference codes for these datasets
+
+Documentation: [docs](docs)
+
+## Supported Datasets
+
+- [x] [TensorFlow - Help Protect the Great Barrier Reef (Kaggle)](configs/projects/gbr_cots/)
+- [x] [LIVECell dataset](configs/projects/livecell/)
+- [x] [Sartorius - Cell Instance Segmentation (Kaggle)](configs/projects/sartorius_cellseg/)
+
+## Get Started
+
+Please refer to [get_started.md](docs/source/get_started.md) for get started.
+Other tutorials for:
+
+- [run](docs/source/run.md)
+
+## Contributing
+
+### CONTRIBUTING
+
+We appreciate all contributions to improve dethub. Please refer to [CONTRIBUTING.md](https://github.com/open-mmlab/mmcv/blob/master/CONTRIBUTING.md) for the contributing guideline.
+
+## License
+
+This project is released under the [Apache 2.0 license](LICENSE).
+
+## Acknowledgement
+
+This repo borrows the architecture design and part of the code from [mmdetection](https://github.com/open-mmlab/mmdetection).
+
+Also, please check the following openmmlab projects and the corresponding Documentation.
+
+- [OpenMMLab](https://openmmlab.com/)
+- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision.
+- [MIM](https://github.com/open-mmlab/mim): MIM Installs OpenMMLab Packages.
+- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
+
+#### Citation
+
+```
+@article{mmdetection,
+  title   = {{MMDetection}: Open MMLab Detection Toolbox and Benchmark},
+  author  = {Chen, Kai and Wang, Jiaqi and Pang, Jiangmiao and Cao, Yuhang and
+             Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and
+             Liu, Ziwei and Xu, Jiarui and Zhang, Zheng and Cheng, Dazhi and
+             Zhu, Chenchen and Cheng, Tianheng and Zhao, Qijie and Li, Buyu and
+             Lu, Xin and Zhu, Rui and Wu, Yue and Dai, Jifeng and Wang, Jingdong
+             and Shi, Jianping and Ouyang, Wanli and Loy, Chen Change and Lin, Dahua},
+  journal= {arXiv preprint arXiv:1906.07155},
+  year={2019}
+}
+```
diff --git a/configs/_base_/models/yolox_s.py b/configs/_base_/models/yolox_s.py
new file mode 100644
index 0000000..a74e857
--- /dev/null
+++ b/configs/_base_/models/yolox_s.py
@@ -0,0 +1,62 @@
+model = dict(
+    type='YOLOX',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(480, 800),
+                size_divisor=32,
+                interval=10)
+        ]),
+    backbone=dict(
+        type='CSPDarknet',
+        deepen_factor=0.33,
+        widen_factor=0.5,
+        out_indices=(2, 3, 4),
+        use_depthwise=False,
+        spp_kernal_sizes=(5, 9, 13),
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish'),
+    ),
+    neck=dict(
+        type='YOLOXPAFPN',
+        in_channels=[128, 256, 512],
+        out_channels=128,
+        num_csp_blocks=1,
+        use_depthwise=False,
+        upsample_cfg=dict(scale_factor=2, mode='nearest'),
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish')),
+    bbox_head=dict(
+        type='YOLOXHead',
+        num_classes=80,
+        in_channels=128,
+        feat_channels=128,
+        stacked_convs=2,
+        strides=(8, 16, 32),
+        use_depthwise=False,
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish'),
+        loss_cls=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=True,
+            reduction='sum',
+            loss_weight=1.0),
+        loss_bbox=dict(
+            type='IoULoss',
+            mode='square',
+            eps=1e-16,
+            reduction='sum',
+            loss_weight=5.0),
+        loss_obj=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=True,
+            reduction='sum',
+            loss_weight=1.0),
+        loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
+    train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
+    # In order to align the source code, the threshold of the val phase is
+    # 0.01, and the threshold of the test phase is 0.001.
+    test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
diff --git a/configs/_base_/models/yolox_x.py b/configs/_base_/models/yolox_x.py
new file mode 100644
index 0000000..ed5deda
--- /dev/null
+++ b/configs/_base_/models/yolox_x.py
@@ -0,0 +1,62 @@
+model = dict(
+    type='YOLOX',
+    data_preprocessor=dict(
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(480, 800),
+                size_divisor=32,
+                interval=10)
+        ]),
+    backbone=dict(
+        type='CSPDarknet',
+        deepen_factor=1.33,
+        widen_factor=1.25,
+        out_indices=(2, 3, 4),
+        use_depthwise=False,
+        spp_kernal_sizes=(5, 9, 13),
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish'),
+    ),
+    neck=dict(
+        type='YOLOXPAFPN',
+        in_channels=[320, 640, 1280],
+        out_channels=320,
+        num_csp_blocks=4,
+        use_depthwise=False,
+        upsample_cfg=dict(scale_factor=2, mode='nearest'),
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish')),
+    bbox_head=dict(
+        type='YOLOXHead',
+        num_classes=80,
+        in_channels=320,
+        feat_channels=320,
+        stacked_convs=2,
+        strides=(8, 16, 32),
+        use_depthwise=False,
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='Swish'),
+        loss_cls=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=True,
+            reduction='sum',
+            loss_weight=1.0),
+        loss_bbox=dict(
+            type='IoULoss',
+            mode='square',
+            eps=1e-16,
+            reduction='sum',
+            loss_weight=5.0),
+        loss_obj=dict(
+            type='CrossEntropyLoss',
+            use_sigmoid=True,
+            reduction='sum',
+            loss_weight=1.0),
+        loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
+    train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
+    # In order to align the source code, the threshold of the val phase is
+    # 0.01, and the threshold of the test phase is 0.001.
+    test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
diff --git a/configs/projects/gbr_cots/README.md b/configs/projects/gbr_cots/README.md
new file mode 100644
index 0000000..fb65997
--- /dev/null
+++ b/configs/projects/gbr_cots/README.md
@@ -0,0 +1,60 @@
+# TensorFlow - Help Protect the Great Barrier Reef (Kaggle)
+
+Kaggle [TensorFlow - Help Protect the Great Barrier Reef](https://www.kaggle.com/competitions/tensorflow-great-barrier-reef)
+
+## Run demo
+
+```
+$ docker compose exec dethub python tools/image_demo.py configs/projects/gbr_cots/demo/5756.jpg configs/projects/gbr_cots/yolox/yolox_s_gbr_cots.py https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_gbr_cots-cefaa435.pth --out-file configs/projects/gbr_cots/demo/5756_demo.jpg
+```
+
+![plot](demo/5756_demo.jpg)
+
+## Prepare datasets
+
+1. Download competition data from Kaggle
+
+```
+kaggle competitions download -c tensorflow-great-barrier-reef
+```
+
+2. Download coco format json.
+
+```
+kaggle datasets download https://www.kaggle.com/datasets/takuok/gbrcotscocoformat
+```
+
+\*We prepared coco format files from [this script](../../../tools/dataset_converters/prepare_gbr_cots.py).
+
+3. Unzip the files as follows
+
+```
+data/gbr_cots
+├── train_images
+├── train.csv
+├── dtrain_g0.json
+└── dval_g0.json
+```
+
+## Run train
+
+Set env variables
+
+```
+$ export DATA_DIR=/path/to/data
+```
+
+Start a docker container
+
+```
+$ docker compose up -d dethub
+```
+
+Run train
+
+```
+# single gpu
+$ docker compose exec dethub python /opt/site-packages/mmdet/.mim/tools/train.py configs/projects/gbr_cots/yolox/yolox_s_gbr_cots.py
+# multi gpus
+$ docker compose exec dethub python -m torch.distributed.launch --nproc_per_node=2 /opt/site-packages/mmdet/.mim/tools/train.py configs/projects/gbr_cots/yolox/yolox_s_gbr_cots.py --launcher pytorch
+```
diff --git a/configs/projects/gbr_cots/demo/5756.jpg b/configs/projects/gbr_cots/demo/5756.jpg
new file mode 100644
index 0000000..a66df4a
Binary files /dev/null and b/configs/projects/gbr_cots/demo/5756.jpg differ
diff --git a/configs/projects/gbr_cots/demo/5756_demo.jpg b/configs/projects/gbr_cots/demo/5756_demo.jpg
new file mode 100644
index 0000000..697100f
Binary files /dev/null and b/configs/projects/gbr_cots/demo/5756_demo.jpg differ
diff --git a/configs/projects/gbr_cots/yolox/README.md b/configs/projects/gbr_cots/yolox/README.md
new file mode 100644
index 0000000..2a59ceb
--- /dev/null
+++ b/configs/projects/gbr_cots/yolox/README.md
@@ -0,0 +1,31 @@
+# YOLOX
+
+> [YOLOX: Exceeding YOLO Series in 2021](https://arxiv.org/abs/2107.08430)
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+In this report, we present some experienced improvements to YOLO series, forming a new high-performance detector -- YOLOX. We switch the YOLO detector to an anchor-free manner and conduct other advanced detection techniques, i.e., a decoupled head and the leading label assignment strategy SimOTA to achieve state-of-the-art results across a large scale range of models: For YOLO-Nano with only 0.91M parameters and 1.08G FLOPs, we get 25.3% AP on COCO, surpassing NanoDet by 1.8% AP; for YOLOv3, one of the most widely used detectors in industry, we boost it to 47.3% AP on COCO, outperforming the current best practice by 3.0% AP; for YOLOX-L with roughly the same amount of parameters as YOLOv4-CSP, YOLOv5-L, we achieve 50.0% AP on COCO at a speed of 68.9 FPS on Tesla V100, exceeding YOLOv5-L by 1.8% AP. Further, we won the 1st Place on Streaming Perception Challenge (Workshop on Autonomous Driving at CVPR 2021) using a single YOLOX-L model. We hope this report can provide useful experience for developers and researchers in practical scenes, and we also provide deploy versions with ONNX, TensorRT, NCNN, and Openvino supported.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/40661020/144001736-9fb303dd-eac7-46b0-ad45-214cfa51e928.png"/>
+</div>
+
+## Results and Models
+
+|     Backbone     | box AP |            Config             |                                                 Download                                                  |
+| :--------------: | :----: | :---------------------------: | :-------------------------------------------------------------------------------------------------------: |
+| yolox_s_gbr_cots |  40.3  | [config](yolox_s_gbr_cots.py) | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_gbr_cots-cefaa435.pth) |
+| yolox_x_gbr_cots |  44.1  | [config](yolox_s_gbr_cots.py) | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_x_gbr_cots-b8614c53.pth) |
+
+## Citation
+
+```latex
+@article{yolox2021,
+  title={{YOLOX}: Exceeding YOLO Series in 2021},
+  author={Ge, Zheng and Liu, Songtao and Wang, Feng and Li, Zeming and Sun, Jian},
+  journal={arXiv preprint arXiv:2107.08430},
+  year={2021}
+}
+```
diff --git a/configs/projects/gbr_cots/yolox/yolox_s_gbr_cots.py b/configs/projects/gbr_cots/yolox/yolox_s_gbr_cots.py
new file mode 100644
index 0000000..2e5d712
--- /dev/null
+++ b/configs/projects/gbr_cots/yolox/yolox_s_gbr_cots.py
@@ -0,0 +1,217 @@
+_base_ = [
+    '/opt/site-packages/mmdet/.mim/configs/_base_/default_runtime.py',
+    '../../../_base_/models/yolox_s.py'
+]
+custom_imports = dict(imports=['dethub'], allow_failed_imports=False)
+
+img_scale = (1536, 1536)  # height, width
+img_scale_test = (1536 * 3, 1536 * 3)  # height, width
+
+# model settings
+num_classes = 1
+model = dict(
+    data_preprocessor=dict(
+        _delete_=True,
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(1024, 2048),
+                size_divisor=32,
+                interval=10)
+        ]),
+    bbox_head=dict(num_classes=num_classes))
+
+# dataset settings
+data_root = 'data/gbr_cots/'
+dataset_type = 'CocoDataset'
+file_client_args = dict(backend='disk')
+
+metainfo = dict(CLASSES=['gbr'], PALETTE=[(220, 20, 60)])
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.5, 1.5),
+        pad_val=114.0),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='DumpImage', max_imgs=100, dump_dir='dump'),
+    # According to the official implementation, multi-scale
+    # training is not considered here but in the
+    # 'mmdet/models/detectors/yolox.py'.
+    # Resize and Pad are for the last 15 epochs when Mosaic,
+    # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        # If the image is three-channel, the pad value needs
+        # to be set separately for each channel.
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='PackDetInputs')
+]
+
+train_dataset = dict(
+    # use MultiImageMixDataset wrapper to support mosaic and mixup
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dtrain_g0.json',
+        data_prefix=dict(img=''),
+        pipeline=[
+            dict(type='LoadImageFromFile', file_client_args=file_client_args),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(type='Resize', scale=img_scale_test, keep_ratio=True),
+            dict(
+                type='RandomCrop',
+                crop_type='absolute_range',
+                crop_size=(1024, 2048),
+                recompute_bbox=True,
+                allow_negative_crop=True),
+        ],
+        filter_cfg=dict(filter_empty_gt=False)),
+    pipeline=train_pipeline)
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='Resize', scale=img_scale_test, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=train_dataset)
+val_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dval_g0.json',
+        data_prefix=dict(img=''),
+        test_mode=True,
+        filter_cfg=dict(filter_empty_gt=False),
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoMetric', ann_file=data_root + 'dval_g0.json', metric='bbox')
+test_evaluator = val_evaluator
+
+# training settings
+max_epochs = 20
+num_last_epochs = 10
+interval = 5
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=interval,
+    dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# optimizer
+base_lr = 0.01
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
+        nesterov=True),
+    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
+
+# learning rate
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 3 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=3,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 3 to -num_last_epochs epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=3,
+        T_max=max_epochs - 5,
+        end=max_epochs - 5,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last -num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=max_epochs - 5,
+        end=max_epochs,
+    )
+]
+
+default_hooks = dict(
+    checkpoint=dict(
+        save_best='auto',
+        interval=interval,
+        max_keep_ckpts=3  # only keep latest 3 checkpoints
+    ),
+    visualization=dict(draw=False, interval=1))
+custom_hooks = [
+    dict(
+        type='YOLOXModeSwitchHook',
+        num_last_epochs=num_last_epochs,
+        priority=48),
+    dict(type='SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(
+        type='WandbVisBackend',
+        init_kwargs=dict(project='gbr_cots', name='yolox_s_gbr_cots'),
+        define_metric_cfg={'coco/bbox_mAP': 'max'})
+]
+visualizer = dict(vis_backends=vis_backends)
+
+load_from = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(enable=True, base_batch_size=64)
diff --git a/configs/projects/gbr_cots/yolox/yolox_x_gbr_cots.py b/configs/projects/gbr_cots/yolox/yolox_x_gbr_cots.py
new file mode 100644
index 0000000..dc2e24a
--- /dev/null
+++ b/configs/projects/gbr_cots/yolox/yolox_x_gbr_cots.py
@@ -0,0 +1,218 @@
+_base_ = [
+    '/opt/site-packages/mmdet/.mim/configs/_base_/default_runtime.py',
+    '../../../_base_/models/yolox_x.py'
+]
+custom_imports = dict(imports=['dethub'], allow_failed_imports=False)
+
+fp16 = dict(loss_scale='dynamic')
+img_scale = (1536, 1536)  # height, width
+img_scale_test = (1536 * 3, 1536 * 3)  # height, width
+
+# model settings
+num_classes = 1
+model = dict(
+    data_preprocessor=dict(
+        _delete_=True,
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(1280, 1792),
+                size_divisor=32,
+                interval=10)
+        ]),
+    bbox_head=dict(num_classes=num_classes))
+
+# dataset settings
+data_root = 'data/gbr_cots/'
+dataset_type = 'CocoDataset'
+file_client_args = dict(backend='disk')
+
+metainfo = dict(CLASSES=['gbr'], PALETTE=[(220, 20, 60)])
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.5, 1.5),
+        pad_val=114.0),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='DumpImage', max_imgs=100, dump_dir='dump'),
+    # According to the official implementation, multi-scale
+    # training is not considered here but in the
+    # 'mmdet/models/detectors/yolox.py'.
+    # Resize and Pad are for the last 15 epochs when Mosaic,
+    # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        # If the image is three-channel, the pad value needs
+        # to be set separately for each channel.
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='PackDetInputs')
+]
+
+train_dataset = dict(
+    # use MultiImageMixDataset wrapper to support mosaic and mixup
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dtrain_g0.json',
+        data_prefix=dict(img=''),
+        pipeline=[
+            dict(type='LoadImageFromFile', file_client_args=file_client_args),
+            dict(type='LoadAnnotations', with_bbox=True),
+            dict(type='Resize', scale=img_scale_test, keep_ratio=True),
+            dict(
+                type='RandomCrop',
+                crop_type='absolute_range',
+                crop_size=(1024, 2048),
+                recompute_bbox=True,
+                allow_negative_crop=True),
+        ],
+        filter_cfg=dict(filter_empty_gt=False)),
+    pipeline=train_pipeline)
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='Resize', scale=img_scale_test, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=train_dataset)
+val_dataloader = dict(
+    batch_size=2,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dval_g0.json',
+        data_prefix=dict(img=''),
+        test_mode=True,
+        filter_cfg=dict(filter_empty_gt=False),
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoMetric', ann_file=data_root + 'dval_g0.json', metric='bbox')
+test_evaluator = val_evaluator
+
+# training settings
+max_epochs = 20
+num_last_epochs = 10
+interval = 5
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=interval,
+    dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# optimizer
+base_lr = 0.005
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
+        nesterov=True),
+    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
+
+# learning rate
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 3 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=3,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 3 to -num_last_epochs epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=3,
+        T_max=max_epochs - 5,
+        end=max_epochs - 5,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last -num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=max_epochs - 5,
+        end=max_epochs,
+    )
+]
+
+default_hooks = dict(
+    checkpoint=dict(
+        save_best='auto',
+        interval=interval,
+        max_keep_ckpts=3  # only keep latest 3 checkpoints
+    ),
+    visualization=dict(draw=False, interval=1))
+custom_hooks = [
+    dict(
+        type='YOLOXModeSwitchHook',
+        num_last_epochs=num_last_epochs,
+        priority=48),
+    dict(type='SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(
+        type='WandbVisBackend',
+        init_kwargs=dict(project='gbr_cots', name='yolox_x_gbr_cots'),
+        define_metric_cfg={'coco/bbox_mAP': 'max'})
+]
+visualizer = dict(vis_backends=vis_backends)
+
+load_from = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(enable=True, base_batch_size=64)
diff --git a/configs/projects/livecell/README.md b/configs/projects/livecell/README.md
new file mode 100644
index 0000000..8a8af5a
--- /dev/null
+++ b/configs/projects/livecell/README.md
@@ -0,0 +1,82 @@
+# LIVECell dataset
+
+[LIVECell dataset](https://sartorius-research.github.io/LIVECell/)
+
+## Abstract
+
+Light microscopy combined with well-established protocols of two-dimensional cell culture facilitates high-throughput quantitative imaging to study biological phenomena. Accurate segmentation of individual cells in images enables exploration of
+complex biological questions, but can require sophisticated imaging processing pipelines in cases of low contrast and high
+object density. Deep learning-based methods are considered state-of-the-art for image segmentation but typically require vast
+amounts of annotated data, for which there is no suitable resource available in the field of label-free cellular imaging. Here, we
+present LIVECell, a large, high-quality, manually annotated and expert-validated dataset of phase-contrast images, consisting
+of over 1.6 million cells from a diverse set of cell morphologies and culture densities. To further demonstrate its use, we train
+convolutional neural network-based models using LIVECell and evaluate model segmentation accuracy with a proposed a suite
+of benchmarks.
+
+## Run demo
+
+```
+$ docker compose exec dethub python tools/image_demo.py configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1.tif configs/projects/livecell/yolox/yolox_s_livecell.py https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_livecell-b3f4347f.pth --out-file configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1_demo.jpg
+```
+
+![plot](demo/A172_Phase_A7_1_00d00h00m_1_demo.jpg)
+
+## Prepare datasets
+
+1. Download LIVECell dataset from https://github.com/sartorius-research/LIVECell
+
+```
+wget http://livecell-dataset.s3.eu-central-1.amazonaws.com/LIVECell_dataset_2021/images.zip
+```
+
+2. Download coco format json.
+
+```
+kaggle datasets download https://www.kaggle.com/datasets/takuok/livecellcocoformat
+```
+
+\*We prepared coco format files from [this script](../../../tools/dataset_converters/prepare_livecell.py).
+Original annotation files from dataset from https://github.com/sartorius-research/LIVECell
+
+```
+wget http://livecell-dataset.s3.eu-central-1.amazonaws.com/LIVECell_dataset_2021/annotations/LIVECell/livecell_coco_train.json
+wget http://livecell-dataset.s3.eu-central-1.amazonaws.com/LIVECell_dataset_2021/annotations/LIVECell/livecell_coco_val.json
+wget http://livecell-dataset.s3.eu-central-1.amazonaws.com/LIVECell_dataset_2021/annotations/LIVECell/livecell_coco_test.json
+```
+
+3. Unzip the files as follows
+
+```
+data/livecell
+├── images
+├── livecell_coco_train_8class.json
+├── livecell_coco_val_8class.json
+└── livecell_coco_test_8class.json
+```
+
+## Run train
+
+Set env variables
+
+```
+$ export DATA_DIR=/path/to/data
+```
+
+Start a docker container
+
+```
+$ docker compose up -d dethub
+```
+
+Run train
+
+```
+# single gpu
+$ docker compose exec dethub python /opt/site-packages/mmdet/.mim/tools/train.py configs/projects/livecell/yolox/yolox_s_livecell.py
+# multi gpus
+$ docker compose exec dethub python -m torch.distributed.launch --nproc_per_node=2 /opt/site-packages/mmdet/.mim/tools/train.py configs/projects/livecell/yolox/yolox_s_livecell.py --launcher pytorch
+```
+
+## Acknowledgement
+
+[Kaggle Sartorius - Cell Instance Segmentation 1st place solution](https://github.com/tascj/kaggle-sartorius-cell-instance-segmentation-solution)
diff --git a/configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1.tif b/configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1.tif
new file mode 100644
index 0000000..cc6e2d6
Binary files /dev/null and b/configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1.tif differ
diff --git a/configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1_demo.jpg b/configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1_demo.jpg
new file mode 100644
index 0000000..62517e8
Binary files /dev/null and b/configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1_demo.jpg differ
diff --git a/configs/projects/livecell/yolox/README.md b/configs/projects/livecell/yolox/README.md
new file mode 100644
index 0000000..a9a5d9b
--- /dev/null
+++ b/configs/projects/livecell/yolox/README.md
@@ -0,0 +1,33 @@
+# YOLOX
+
+> [YOLOX: Exceeding YOLO Series in 2021](https://arxiv.org/abs/2107.08430)
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+In this report, we present some experienced improvements to YOLO series, forming a new high-performance detector -- YOLOX. We switch the YOLO detector to an anchor-free manner and conduct other advanced detection techniques, i.e., a decoupled head and the leading label assignment strategy SimOTA to achieve state-of-the-art results across a large scale range of models: For YOLO-Nano with only 0.91M parameters and 1.08G FLOPs, we get 25.3% AP on COCO, surpassing NanoDet by 1.8% AP; for YOLOv3, one of the most widely used detectors in industry, we boost it to 47.3% AP on COCO, outperforming the current best practice by 3.0% AP; for YOLOX-L with roughly the same amount of parameters as YOLOv4-CSP, YOLOv5-L, we achieve 50.0% AP on COCO at a speed of 68.9 FPS on Tesla V100, exceeding YOLOv5-L by 1.8% AP. Further, we won the 1st Place on Streaming Perception Challenge (Workshop on Autonomous Driving at CVPR 2021) using a single YOLOX-L model. We hope this report can provide useful experience for developers and researchers in practical scenes, and we also provide deploy versions with ONNX, TensorRT, NCNN, and Openvino supported.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/40661020/144001736-9fb303dd-eac7-46b0-ad45-214cfa51e928.png"/>
+</div>
+
+## Results and Models
+
+#### Box Results
+
+|     Backbone     | box AP |            Config             |                                                 Download                                                  |
+| :--------------: | :----: | :---------------------------: | :-------------------------------------------------------------------------------------------------------: |
+| yolox_s_livecell |  53.4  | [config](yolox_s_livecell.py) | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_livecell-b3f4347f.pth) |
+| yolox_x_livecell |  55.1  | [config](yolox_x_livecell.py) | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_x_livecell-b1fb7170.pth) |
+
+## Citation
+
+```latex
+@article{yolox2021,
+  title={{YOLOX}: Exceeding YOLO Series in 2021},
+  author={Ge, Zheng and Liu, Songtao and Wang, Feng and Li, Zeming and Sun, Jian},
+  journal={arXiv preprint arXiv:2107.08430},
+  year={2021}
+}
+```
diff --git a/configs/projects/livecell/yolox/yolox_s_livecell.py b/configs/projects/livecell/yolox/yolox_s_livecell.py
new file mode 100644
index 0000000..6ce880c
--- /dev/null
+++ b/configs/projects/livecell/yolox/yolox_s_livecell.py
@@ -0,0 +1,218 @@
+_base_ = [
+    '/opt/site-packages/mmdet/.mim/configs/_base_/default_runtime.py',
+    '../../../_base_/models/yolox_s.py'
+]
+custom_imports = dict(imports=['dethub'], allow_failed_imports=False)
+fp16 = dict(loss_scale=512.)
+
+img_scale = (1536, 1536)  # height, width
+
+# model settings
+num_classes = 8
+model = dict(
+    data_preprocessor=dict(
+        _delete_=True,
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(1024, 2048),
+                size_divisor=32,
+                interval=10)
+        ]),
+    bbox_head=dict(num_classes=num_classes))
+
+# dataset settings
+data_root = 'data/livecell/'
+dataset_type = 'CocoDataset'
+file_client_args = dict(backend='disk')
+
+metainfo = dict(
+    CLASSES=[
+        'shsy5y', 'a172', 'bt474', 'bv2', 'huh7', 'mcf7', 'skov3', 'skbr3'
+    ],
+    PALETTE=[(220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230),
+             (106, 0, 228), (0, 60, 100), (0, 80, 100), (0, 0, 70)])
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.5, 1.5),
+        pad_val=114.0),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='DumpImage', max_imgs=100, dump_dir='dump'),
+    # According to the official implementation, multi-scale
+    # training is not considered here but in the
+    # 'mmdet/models/detectors/yolox.py'.
+    # Resize and Pad are for the last 15 epochs when Mosaic,
+    # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        # If the image is three-channel, the pad value needs
+        # to be set separately for each channel.
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
+    dict(type='PackDetInputs')
+]
+
+train_dataset = dict(
+    # use MultiImageMixDataset wrapper to support mosaic and mixup
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='livecell_coco_train_8class.json',
+        data_prefix=dict(img='images/livecell_train_val_images/'),
+        pipeline=[
+            dict(type='LoadImageFromFile', file_client_args=file_client_args),
+            dict(type='LoadAnnotations', with_bbox=True)
+        ],
+        filter_cfg=dict(filter_empty_gt=False)),
+    pipeline=train_pipeline)
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=train_dataset)
+val_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='livecell_coco_val_8class.json',
+        data_prefix=dict(img='images/livecell_train_val_images/'),
+        test_mode=True,
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoFastMetric',
+    ann_file=data_root + 'livecell_coco_val_8class.json',
+    metric='bbox',
+    proposal_nums=(100, 300, 3000))
+test_evaluator = val_evaluator
+
+# training settings
+max_epochs = 50
+num_last_epochs = 10
+interval = 10
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=interval,
+    dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# optimizer
+base_lr = 0.05
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
+        nesterov=True),
+    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
+
+# learning rate
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 5 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 5 to -num_last_epochs epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=5,
+        T_max=max_epochs - num_last_epochs,
+        end=max_epochs - num_last_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last -num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=max_epochs - num_last_epochs,
+        end=max_epochs,
+    )
+]
+
+default_hooks = dict(
+    checkpoint=dict(
+        save_best='auto',
+        interval=interval,
+        max_keep_ckpts=3  # only keep latest 3 checkpoints
+    ),
+    visualization=dict(draw=False, interval=5))
+custom_hooks = [
+    dict(
+        type='YOLOXModeSwitchHook',
+        num_last_epochs=num_last_epochs,
+        priority=48),
+    dict(type='SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(
+        type='WandbVisBackend',
+        init_kwargs=dict(project='livecell', name='yolox_s_livecell'),
+        define_metric_cfg={'coco/bbox_mAP': 'max'})
+]
+visualizer = dict(vis_backends=vis_backends)
+
+load_from = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(enable=True, base_batch_size=64)
diff --git a/configs/projects/livecell/yolox/yolox_x_livecell.py b/configs/projects/livecell/yolox/yolox_x_livecell.py
new file mode 100644
index 0000000..191f10e
--- /dev/null
+++ b/configs/projects/livecell/yolox/yolox_x_livecell.py
@@ -0,0 +1,218 @@
+_base_ = [
+    '/opt/site-packages/mmdet/.mim/configs/_base_/default_runtime.py',
+    '../../../_base_/models/yolox_x.py'
+]
+custom_imports = dict(imports=['dethub'], allow_failed_imports=False)
+fp16 = dict(loss_scale=512.)
+
+img_scale = (1536, 1536)  # height, width
+
+# model settings
+num_classes = 8
+model = dict(
+    data_preprocessor=dict(
+        _delete_=True,
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(1280, 1792),
+                size_divisor=32,
+                interval=10)
+        ]),
+    bbox_head=dict(num_classes=num_classes))
+
+# dataset settings
+data_root = 'data/livecell/'
+dataset_type = 'CocoDataset'
+file_client_args = dict(backend='disk')
+
+metainfo = dict(
+    CLASSES=[
+        'shsy5y', 'a172', 'bt474', 'bv2', 'huh7', 'mcf7', 'skov3', 'skbr3'
+    ],
+    PALETTE=[(220, 20, 60), (119, 11, 32), (0, 0, 142), (0, 0, 230),
+             (106, 0, 228), (0, 60, 100), (0, 80, 100), (0, 0, 70)])
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.5, 1.5),
+        pad_val=114.0),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='DumpImage', max_imgs=100, dump_dir='dump'),
+    # According to the official implementation, multi-scale
+    # training is not considered here but in the
+    # 'mmdet/models/detectors/yolox.py'.
+    # Resize and Pad are for the last 15 epochs when Mosaic,
+    # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        # If the image is three-channel, the pad value needs
+        # to be set separately for each channel.
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
+    dict(type='PackDetInputs')
+]
+
+train_dataset = dict(
+    # use MultiImageMixDataset wrapper to support mosaic and mixup
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='livecell_coco_train_8class.json',
+        data_prefix=dict(img='images/livecell_train_val_images/'),
+        pipeline=[
+            dict(type='LoadImageFromFile', file_client_args=file_client_args),
+            dict(type='LoadAnnotations', with_bbox=True)
+        ],
+        filter_cfg=dict(filter_empty_gt=False)),
+    pipeline=train_pipeline)
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=train_dataset)
+val_dataloader = dict(
+    batch_size=2,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='livecell_coco_val_8class.json',
+        data_prefix=dict(img='images/livecell_train_val_images/'),
+        test_mode=True,
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoFastMetric',
+    ann_file=data_root + 'livecell_coco_val_8class.json',
+    metric='bbox',
+    proposal_nums=(100, 300, 3000))
+test_evaluator = val_evaluator
+
+# training settings
+max_epochs = 50
+num_last_epochs = 10
+interval = 10
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=interval,
+    dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# optimizer
+base_lr = 0.01
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
+        nesterov=True),
+    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
+
+# learning rate
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 5 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 5 to -num_last_epochs epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=5,
+        T_max=max_epochs - num_last_epochs,
+        end=max_epochs - num_last_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last -num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=max_epochs - num_last_epochs,
+        end=max_epochs,
+    )
+]
+
+default_hooks = dict(
+    checkpoint=dict(
+        save_best='auto',
+        interval=interval,
+        max_keep_ckpts=3  # only keep latest 3 checkpoints
+    ),
+    visualization=dict(draw=False, interval=5))
+custom_hooks = [
+    dict(
+        type='YOLOXModeSwitchHook',
+        num_last_epochs=num_last_epochs,
+        priority=48),
+    dict(type='SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(
+        type='WandbVisBackend',
+        init_kwargs=dict(project='livecell', name='yolox_x_livecell'),
+        define_metric_cfg={'coco/bbox_mAP': 'max'})
+]
+visualizer = dict(vis_backends=vis_backends)
+
+load_from = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(enable=True, base_batch_size=64)
diff --git a/configs/projects/sartorius_cellseg/README.md b/configs/projects/sartorius_cellseg/README.md
new file mode 100644
index 0000000..3e3ff6e
--- /dev/null
+++ b/configs/projects/sartorius_cellseg/README.md
@@ -0,0 +1,65 @@
+# Sartorius - Cell Instance Segmentation (Kaggle)
+
+Kaggle [Sartorius - Cell Instance Segmentation](https://www.kaggle.com/c/sartorius-cell-instance-segmentation)
+
+## Run demo
+
+```
+$ docker compose exec dethub python tools/image_demo.py configs/projects/sartorius_cellseg/demo/0030fd0e6378.png configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg.py https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_sartorius_cellseg-3d3d487f.pth --out-file configs/projects/sartorius_cellseg/demo/0030fd0e6378_demo.jpg
+```
+
+![plot](demo/0030fd0e6378_demo.jpg)
+
+## Prepare datasets
+
+1. Download competition data from Kaggle
+
+```
+kaggle competitions download -c sartorius-cell-instance-segmentation
+```
+
+2. Download coco format json.
+
+```
+kaggle datasets download https://www.kaggle.com/datasets/takuok/sartorius-cocoformat
+```
+
+\*We prepared coco format files from [this script](../../../tools/dataset_converters/prepare_sartorius_cellseg.py).
+
+3. Unzip the files as follows
+
+```
+data/sartorius_cellseg
+├── train
+├── train_semi_supervised
+├── train.csv
+├── dtrain.json
+└── dval.json
+```
+
+## Run train
+
+Set env variables
+
+```
+$ export DATA_DIR=/path/to/data
+```
+
+Start a docker container
+
+```
+$ docker compose up -d dethub
+```
+
+Run train
+
+```
+# single gpu
+$ docker compose exec dethub python /opt/site-packages/mmdet/.mim/tools/train.py configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg.py
+# multi gpus
+$ docker compose exec dethub python -m torch.distributed.launch --nproc_per_node=2 /opt/site-packages/mmdet/.mim/tools/train.py configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg.py --launcher pytorch
+```
+
+## Acknowledgement
+
+[Kaggle Sartorius - Cell Instance Segmentation 1st place solution](https://github.com/tascj/kaggle-sartorius-cell-instance-segmentation-solution)
diff --git a/configs/projects/sartorius_cellseg/demo/0030fd0e6378.png b/configs/projects/sartorius_cellseg/demo/0030fd0e6378.png
new file mode 100644
index 0000000..506b65a
Binary files /dev/null and b/configs/projects/sartorius_cellseg/demo/0030fd0e6378.png differ
diff --git a/configs/projects/sartorius_cellseg/demo/0030fd0e6378_demo.jpg b/configs/projects/sartorius_cellseg/demo/0030fd0e6378_demo.jpg
new file mode 100644
index 0000000..19186c6
Binary files /dev/null and b/configs/projects/sartorius_cellseg/demo/0030fd0e6378_demo.jpg differ
diff --git a/configs/projects/sartorius_cellseg/yolox/README.md b/configs/projects/sartorius_cellseg/yolox/README.md
new file mode 100644
index 0000000..2aafab2
--- /dev/null
+++ b/configs/projects/sartorius_cellseg/yolox/README.md
@@ -0,0 +1,34 @@
+# YOLOX
+
+> [YOLOX: Exceeding YOLO Series in 2021](https://arxiv.org/abs/2107.08430)
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+In this report, we present some experienced improvements to YOLO series, forming a new high-performance detector -- YOLOX. We switch the YOLO detector to an anchor-free manner and conduct other advanced detection techniques, i.e., a decoupled head and the leading label assignment strategy SimOTA to achieve state-of-the-art results across a large scale range of models: For YOLO-Nano with only 0.91M parameters and 1.08G FLOPs, we get 25.3% AP on COCO, surpassing NanoDet by 1.8% AP; for YOLOv3, one of the most widely used detectors in industry, we boost it to 47.3% AP on COCO, outperforming the current best practice by 3.0% AP; for YOLOX-L with roughly the same amount of parameters as YOLOv4-CSP, YOLOv5-L, we achieve 50.0% AP on COCO at a speed of 68.9 FPS on Tesla V100, exceeding YOLOv5-L by 1.8% AP. Further, we won the 1st Place on Streaming Perception Challenge (Workshop on Autonomous Driving at CVPR 2021) using a single YOLOX-L model. We hope this report can provide useful experience for developers and researchers in practical scenes, and we also provide deploy versions with ONNX, TensorRT, NCNN, and Openvino supported.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/40661020/144001736-9fb303dd-eac7-46b0-ad45-214cfa51e928.png"/>
+</div>
+
+## Results and Models
+
+#### Box Results
+
+|                Backbone                | box AP |                       Config                        |                                                 Download                                                  |
+| :------------------------------------: | :----: | :-------------------------------------------------: | :-------------------------------------------------------------------------------------------------------: |
+| yolox_s_sartorius_cellseg_cocopretrain |  35.7  | [config](yolox_s_sartorius_cellseg_cocopretrain.py) | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_sartorius_cellseg_cocopretrain-58d3885a.pth) |
+|       yolox_s_sartorius_cellseg        |  36.7  |       [config](yolox_s_sartorius_cellseg.py)        | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_sartorius_cellseg-3d3d487f.pth) |
+|       yolox_x_sartorius_cellseg        |  38.0  |       [config](yolox_x_sartorius_cellseg.py)        | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_x_sartorius_cellseg-9776898b.pth) |
+
+## Citation
+
+```latex
+@article{yolox2021,
+  title={{YOLOX}: Exceeding YOLO Series in 2021},
+  author={Ge, Zheng and Liu, Songtao and Wang, Feng and Li, Zeming and Sun, Jian},
+  journal={arXiv preprint arXiv:2107.08430},
+  year={2021}
+}
+```
diff --git a/configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg.py b/configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg.py
new file mode 100644
index 0000000..7d107c7
--- /dev/null
+++ b/configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg.py
@@ -0,0 +1,215 @@
+_base_ = [
+    '/opt/site-packages/mmdet/.mim/configs/_base_/default_runtime.py',
+    '../../../_base_/models/yolox_s.py'
+]
+custom_imports = dict(imports=['dethub'], allow_failed_imports=False)
+
+img_scale = (1536, 1536)  # height, width
+
+# model settings
+num_classes = 3
+model = dict(
+    data_preprocessor=dict(
+        _delete_=True,
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(1024, 2048),
+                size_divisor=32,
+                interval=10)
+        ]),
+    bbox_head=dict(num_classes=num_classes))
+
+# dataset settings
+data_root = 'data/sartorius_cellseg/'
+dataset_type = 'CocoDataset'
+file_client_args = dict(backend='disk')
+
+metainfo = dict(
+    CLASSES=['shsy5y', 'astro', 'cort'],
+    PALETTE=[(220, 20, 60), (119, 11, 32), (0, 0, 142)])
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.5, 1.5),
+        pad_val=114.0),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='DumpImage', max_imgs=100, dump_dir='dump'),
+    # According to the official implementation, multi-scale
+    # training is not considered here but in the
+    # 'mmdet/models/detectors/yolox.py'.
+    # Resize and Pad are for the last 15 epochs when Mosaic,
+    # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        # If the image is three-channel, the pad value needs
+        # to be set separately for each channel.
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
+    dict(type='PackDetInputs')
+]
+
+train_dataset = dict(
+    # use MultiImageMixDataset wrapper to support mosaic and mixup
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dtrain.json',
+        data_prefix=dict(img=''),
+        pipeline=[
+            dict(type='LoadImageFromFile', file_client_args=file_client_args),
+            dict(type='LoadAnnotations', with_bbox=True)
+        ],
+        filter_cfg=dict(filter_empty_gt=False)),
+    pipeline=train_pipeline)
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=train_dataset)
+val_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dval.json',
+        data_prefix=dict(img=''),
+        test_mode=True,
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoFastMetric',
+    ann_file=data_root + 'dval.json',
+    metric='bbox',
+    proposal_nums=(100, 300, 3000))
+test_evaluator = val_evaluator
+
+# training settings
+max_epochs = 100
+num_last_epochs = 10
+interval = 10
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=interval,
+    dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# optimizer
+base_lr = 0.01
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
+        nesterov=True),
+    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
+
+# learning rate
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 5 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 5 to -num_last_epochs epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=5,
+        T_max=max_epochs - num_last_epochs,
+        end=max_epochs - num_last_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last -num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=max_epochs - num_last_epochs,
+        end=max_epochs,
+    )
+]
+
+default_hooks = dict(
+    checkpoint=dict(
+        save_best='auto',
+        interval=interval,
+        max_keep_ckpts=3  # only keep latest 3 checkpoints
+    ),
+    visualization=dict(draw=False, interval=1))
+custom_hooks = [
+    dict(
+        type='YOLOXModeSwitchHook',
+        num_last_epochs=num_last_epochs,
+        priority=48),
+    dict(type='SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(
+        type='WandbVisBackend',
+        init_kwargs=dict(
+            project='sartorius_cellseg', name='yolox_s_sartorius_cellseg'),
+        define_metric_cfg={'coco/bbox_mAP': 'max'})
+]
+visualizer = dict(vis_backends=vis_backends)
+
+load_from = 'https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_livecell-b3f4347f.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(enable=True, base_batch_size=64)
diff --git a/configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg_cocopretrain.py b/configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg_cocopretrain.py
new file mode 100644
index 0000000..c285325
--- /dev/null
+++ b/configs/projects/sartorius_cellseg/yolox/yolox_s_sartorius_cellseg_cocopretrain.py
@@ -0,0 +1,216 @@
+_base_ = [
+    '/opt/site-packages/mmdet/.mim/configs/_base_/default_runtime.py',
+    '../../../_base_/models/yolox_s.py'
+]
+custom_imports = dict(imports=['dethub'], allow_failed_imports=False)
+
+img_scale = (1536, 1536)  # height, width
+
+# model settings
+num_classes = 3
+model = dict(
+    data_preprocessor=dict(
+        _delete_=True,
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(1024, 2048),
+                size_divisor=32,
+                interval=10)
+        ]),
+    bbox_head=dict(num_classes=num_classes))
+
+# dataset settings
+data_root = 'data/sartorius_cellseg/'
+dataset_type = 'CocoDataset'
+file_client_args = dict(backend='disk')
+
+metainfo = dict(
+    CLASSES=['shsy5y', 'astro', 'cort'],
+    PALETTE=[(220, 20, 60), (119, 11, 32), (0, 0, 142)])
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.5, 1.5),
+        pad_val=114.0),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='DumpImage', max_imgs=100, dump_dir='dump'),
+    # According to the official implementation, multi-scale
+    # training is not considered here but in the
+    # 'mmdet/models/detectors/yolox.py'.
+    # Resize and Pad are for the last 15 epochs when Mosaic,
+    # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        # If the image is three-channel, the pad value needs
+        # to be set separately for each channel.
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
+    dict(type='PackDetInputs')
+]
+
+train_dataset = dict(
+    # use MultiImageMixDataset wrapper to support mosaic and mixup
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dtrain.json',
+        data_prefix=dict(img=''),
+        pipeline=[
+            dict(type='LoadImageFromFile', file_client_args=file_client_args),
+            dict(type='LoadAnnotations', with_bbox=True)
+        ],
+        filter_cfg=dict(filter_empty_gt=False)),
+    pipeline=train_pipeline)
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=train_dataset)
+val_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dval.json',
+        data_prefix=dict(img=''),
+        test_mode=True,
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoFastMetric',
+    ann_file=data_root + 'dval.json',
+    metric='bbox',
+    proposal_nums=(100, 300, 3000))
+test_evaluator = val_evaluator
+
+# training settings
+max_epochs = 100
+num_last_epochs = 10
+interval = 10
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=interval,
+    dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# optimizer
+base_lr = 0.01
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
+        nesterov=True),
+    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
+
+# learning rate
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 5 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 5 to -num_last_epochs epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=5,
+        T_max=max_epochs - num_last_epochs,
+        end=max_epochs - num_last_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last -num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=max_epochs - num_last_epochs,
+        end=max_epochs,
+    )
+]
+
+default_hooks = dict(
+    checkpoint=dict(
+        save_best='auto',
+        interval=interval,
+        max_keep_ckpts=3  # only keep latest 3 checkpoints
+    ),
+    visualization=dict(draw=False, interval=1))
+custom_hooks = [
+    dict(
+        type='YOLOXModeSwitchHook',
+        num_last_epochs=num_last_epochs,
+        priority=48),
+    dict(type='SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(
+        type='WandbVisBackend',
+        init_kwargs=dict(
+            project='sartorius_cellseg',
+            name='yolox_s_sartorius_cellseg_cocopretrain'),
+        define_metric_cfg={'coco/bbox_mAP': 'max'})
+]
+visualizer = dict(vis_backends=vis_backends)
+
+load_from = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(enable=True, base_batch_size=64)
diff --git a/configs/projects/sartorius_cellseg/yolox/yolox_x_sartorius_cellseg.py b/configs/projects/sartorius_cellseg/yolox/yolox_x_sartorius_cellseg.py
new file mode 100644
index 0000000..217f27e
--- /dev/null
+++ b/configs/projects/sartorius_cellseg/yolox/yolox_x_sartorius_cellseg.py
@@ -0,0 +1,215 @@
+_base_ = [
+    '/opt/site-packages/mmdet/.mim/configs/_base_/default_runtime.py',
+    '../../../_base_/models/yolox_x.py'
+]
+custom_imports = dict(imports=['dethub'], allow_failed_imports=False)
+
+img_scale = (1536, 1536)  # height, width
+
+# model settings
+num_classes = 3
+model = dict(
+    data_preprocessor=dict(
+        _delete_=True,
+        type='DetDataPreprocessor',
+        pad_size_divisor=32,
+        batch_augments=[
+            dict(
+                type='BatchSyncRandomResize',
+                random_size_range=(1280, 1792),
+                size_divisor=32,
+                interval=10)
+        ]),
+    bbox_head=dict(num_classes=num_classes))
+
+# dataset settings
+data_root = 'data/sartorius_cellseg/'
+dataset_type = 'CocoDataset'
+file_client_args = dict(backend='disk')
+
+metainfo = dict(
+    CLASSES=['shsy5y', 'astro', 'cort'],
+    PALETTE=[(220, 20, 60), (119, 11, 32), (0, 0, 142)])
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.5, 1.5),
+        pad_val=114.0),
+    dict(
+        type='PhotoMetricDistortion',
+        brightness_delta=32,
+        contrast_range=(0.5, 1.5),
+        saturation_range=(0.5, 1.5),
+        hue_delta=18),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='DumpImage', max_imgs=100, dump_dir='dump'),
+    # According to the official implementation, multi-scale
+    # training is not considered here but in the
+    # 'mmdet/models/detectors/yolox.py'.
+    # Resize and Pad are for the last 15 epochs when Mosaic,
+    # RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        # If the image is three-channel, the pad value needs
+        # to be set separately for each channel.
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
+    dict(type='PackDetInputs')
+]
+
+train_dataset = dict(
+    # use MultiImageMixDataset wrapper to support mosaic and mixup
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dtrain.json',
+        data_prefix=dict(img=''),
+        pipeline=[
+            dict(type='LoadImageFromFile', file_client_args=file_client_args),
+            dict(type='LoadAnnotations', with_bbox=True)
+        ],
+        filter_cfg=dict(filter_empty_gt=False)),
+    pipeline=train_pipeline)
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=file_client_args),
+    dict(type='Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=train_dataset)
+val_dataloader = dict(
+    batch_size=2,
+    num_workers=4,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='dval.json',
+        data_prefix=dict(img=''),
+        test_mode=True,
+        pipeline=test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    type='CocoFastMetric',
+    ann_file=data_root + 'dval.json',
+    metric='bbox',
+    proposal_nums=(100, 300, 3000))
+test_evaluator = val_evaluator
+
+# training settings
+max_epochs = 70
+num_last_epochs = 10
+interval = 10
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=interval,
+    dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+# optimizer
+base_lr = 0.001
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(
+        type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
+        nesterov=True),
+    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
+
+# learning rate
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 5 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 5 to -num_last_epochs epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=5,
+        T_max=max_epochs - num_last_epochs,
+        end=max_epochs - num_last_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last -num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=max_epochs - num_last_epochs,
+        end=max_epochs,
+    )
+]
+
+default_hooks = dict(
+    checkpoint=dict(
+        save_best='auto',
+        interval=interval,
+        max_keep_ckpts=3  # only keep latest 3 checkpoints
+    ),
+    visualization=dict(draw=False, interval=1))
+custom_hooks = [
+    dict(
+        type='YOLOXModeSwitchHook',
+        num_last_epochs=num_last_epochs,
+        priority=48),
+    dict(type='SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+vis_backends = [
+    dict(type='LocalVisBackend'),
+    dict(
+        type='WandbVisBackend',
+        init_kwargs=dict(
+            project='sartorius_cellseg', name='yolox_x_sartorius_cellseg'),
+        define_metric_cfg={'coco/bbox_mAP': 'max'})
+]
+visualizer = dict(vis_backends=vis_backends)
+
+load_from = 'https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_x_livecell-b1fb7170.pth'  # noqa
+
+# NOTE: `auto_scale_lr` is for automatically scaling LR,
+# USER SHOULD NOT CHANGE ITS VALUES.
+# base_batch_size = (8 GPUs) x (8 samples per GPU)
+auto_scale_lr = dict(enable=True, base_batch_size=64)
diff --git a/dethub/__init__.py b/dethub/__init__.py
new file mode 100644
index 0000000..3ac507f
--- /dev/null
+++ b/dethub/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from . import datasets, engine, evaluation, models, visualization
diff --git a/dethub/datasets/__init__.py b/dethub/datasets/__init__.py
new file mode 100644
index 0000000..a6902ef
--- /dev/null
+++ b/dethub/datasets/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from . import transforms
diff --git a/dethub/datasets/transforms/__init__.py b/dethub/datasets/transforms/__init__.py
new file mode 100644
index 0000000..1d9cdd3
--- /dev/null
+++ b/dethub/datasets/transforms/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from .dump_image import DumpImage
diff --git a/dethub/datasets/transforms/dump_image.py b/dethub/datasets/transforms/dump_image.py
new file mode 100644
index 0000000..d97274a
--- /dev/null
+++ b/dethub/datasets/transforms/dump_image.py
@@ -0,0 +1,73 @@
+from os import path as osp
+
+import mmcv
+import mmengine
+import numpy as np
+import torch
+from mmengine.structures import InstanceData
+from torch.multiprocessing import Value
+
+from mmdet.registry import TRANSFORMS
+from mmdet.structures import DetDataSample
+from mmdet.visualization import DetLocalVisualizer
+
+
+@TRANSFORMS.register_module()
+class DumpImage:
+    """Dump the image processed by the pipeline.
+
+    Args:
+        max_imgs (int): Maximum value of output.
+        dump_dir (str): Dump output directory.
+    """
+
+    def __init__(self, max_imgs, dump_dir):
+        self.max_imgs = max_imgs
+        self.dump_dir = dump_dir
+        mmengine.mkdir_or_exist(self.dump_dir)
+        self.num_dumped_imgs = Value('i', 0)
+        self.det_local_visualizer = DetLocalVisualizer()
+
+    def __call__(self, results):
+        """Dump the input image to the specified directory.
+
+        No changes will be
+        made.
+        Args:
+            results (dict): Result dict from loading pipeline.
+        Returns:
+            results (dict): Result dict from loading pipeline. (same as input)
+        """
+
+        enable_dump = False
+        with self.num_dumped_imgs.get_lock():
+            if self.num_dumped_imgs.value < self.max_imgs:
+                self.num_dumped_imgs.value += 1
+                enable_dump = True
+                dump_id = self.num_dumped_imgs.value
+
+        if enable_dump:
+            img = results['img']
+            out_file = osp.join(self.dump_dir, f'dump_{dump_id}.png')
+            mmcv.imwrite(img.astype(np.uint8), out_file)
+
+            gt_instances = InstanceData()
+            gt_instances.bboxes = torch.Tensor(results['gt_bboxes'].tensor)
+            gt_instances.labels = torch.Tensor(
+                results['gt_bboxes_labels']).long()
+            if 'gt_masks' in results:
+                gt_instances.masks = results['gt_masks'].masks
+
+            gt_det_data_sample = DetDataSample()
+            gt_det_data_sample.gt_instances = gt_instances
+            out_file = osp.join(self.dump_dir, f'dump_{dump_id}_withgt.png')
+            self.det_local_visualizer.add_datasample(
+                'image', img, gt_det_data_sample, out_file=out_file)
+
+        return results
+
+    def __repr__(self):
+        repr_str = self.__class__.__name__ + (f'(max_imgs={self.max_imgs}, '
+                                              f'dump_dir="{self.dump_dir}")')
+
+        return repr_str
diff --git a/dethub/engine/__init__.py b/dethub/engine/__init__.py
new file mode 100644
index 0000000..0437369
--- /dev/null
+++ b/dethub/engine/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from . import hooks
diff --git a/dethub/engine/hooks/__init__.py b/dethub/engine/hooks/__init__.py
new file mode 100644
index 0000000..bf3d194
--- /dev/null
+++ b/dethub/engine/hooks/__init__.py
@@ -0,0 +1,3 @@
+# flake8: noqa:F401
+from .ema_hook import EMAHook
+from .visualization_hook import DetVisualizationHook
diff --git a/dethub/engine/hooks/ema_hook.py b/dethub/engine/hooks/ema_hook.py
new file mode 100644
index 0000000..ec35745
--- /dev/null
+++ b/dethub/engine/hooks/ema_hook.py
@@ -0,0 +1,54 @@
+import copy
+import logging
+
+from mmengine.hooks.ema_hook import EMAHook as Base
+from mmengine.logging import print_log
+from mmengine.registry import HOOKS
+from mmengine.runner.checkpoint import _load_checkpoint_to_model
+
+
+@HOOKS.register_module(force=True)
+class EMAHook(Base):
+
+    def __init__(self,
+                 ema_type: str = 'ExponentialMovingAverage',
+                 strict_load: bool = True,
+                 begin_iter: int = 0,
+                 begin_epoch: int = 0,
+                 **kwargs):
+        self.strict_load = strict_load
+        self.ema_cfg = dict(type=ema_type, **kwargs)
+        assert not (begin_iter != 0 and begin_epoch != 0), (
+            '`begin_iter` and `begin_epoch` should not be both set.')
+        assert begin_iter >= 0, (
+            f'begin_iter must larger than 0, but got begin: {begin_iter}')
+        assert begin_epoch >= 0, (
+            f'begin_epoch must larger than 0, but got begin: {begin_epoch}')
+        self.begin_iter = begin_iter
+        self.begin_epoch = begin_epoch
+        # If `begin_epoch` and `begin_iter` are not set, `EMAHook` will be
+        # enabled at 0 iteration.
+        self.enabled_by_epoch = self.begin_epoch > 0
+
+    def after_load_checkpoint(self, runner, checkpoint: dict) -> None:
+        """Resume ema parameters from checkpoint.
+
+        Args:
+            runner (Runner): The runner of the testing process.
+        """
+        if 'ema_state_dict' in checkpoint and runner._resume:
+            # The original model parameters are actually saved in ema
+            # field swap the weights back to resume ema state.
+            self._swap_ema_state_dict(checkpoint)
+            self.ema_model.load_state_dict(
+                checkpoint['ema_state_dict'], strict=self.strict_load)
+
+        # Support load checkpoint without ema state dict.
+        else:
+            print_log(
+                'There is no `ema_state_dict` in checkpoint. '
+                '`EMAHook` will make a copy of `state_dict` as the '
+                'initial `ema_state_dict`', 'current', logging.WARNING)
+            _load_checkpoint_to_model(self.ema_model.module,
+                                      copy.deepcopy(checkpoint['state_dict']),
+                                      False)
diff --git a/dethub/engine/hooks/visualization_hook.py b/dethub/engine/hooks/visualization_hook.py
new file mode 100644
index 0000000..2843c67
--- /dev/null
+++ b/dethub/engine/hooks/visualization_hook.py
@@ -0,0 +1,51 @@
+import os.path as osp
+from typing import Sequence
+
+import mmcv
+from mmengine.fileio import FileClient
+from mmengine.runner import Runner
+
+from mmdet.engine.hooks.visualization_hook import DetVisualizationHook as Base
+from mmdet.registry import HOOKS
+from mmdet.structures import DetDataSample
+
+
+@HOOKS.register_module(force=True)
+class DetVisualizationHook(Base):
+
+    def after_val_iter(self, runner: Runner, batch_idx: int, data_batch: dict,
+                       outputs: Sequence[DetDataSample]) -> None:
+        """Run after every ``self.interval`` validation iterations.
+
+        Args:
+            runner (:obj:`Runner`): The runner of the validation process.
+            batch_idx (int): The index of the current batch in the val loop.
+            data_batch (dict): Data from dataloader.
+            outputs (Sequence[:obj:`DetDataSample`]]): A batch of data samples
+                that contain annotations and predictions.
+        """
+        if self.draw is False:
+            return
+
+        if self.file_client is None:
+            self.file_client = FileClient(**self.file_client_args)
+
+        # There is no guarantee that the same batch of images
+        # is visualized for each evaluation.
+        total_curr_iter = runner.iter + batch_idx
+
+        # Visualize only the first data
+        img_path = outputs[0].img_path
+        img_bytes = self.file_client.get(img_path)
+        img = mmcv.imfrombytes(img_bytes, channel_order='rgb')
+
+        if total_curr_iter % self.interval == 0:
+            self._visualizer.add_datasample(
+                osp.basename(img_path) if self.show else
+                (f'iter{runner.iter}/{osp.basename(img_path)}'),
+                img,
+                data_sample=outputs[0],
+                show=self.show,
+                wait_time=self.wait_time,
+                pred_score_thr=self.score_thr,
+                step=total_curr_iter)
diff --git a/dethub/evaluation/__init__.py b/dethub/evaluation/__init__.py
new file mode 100644
index 0000000..9ee87ce
--- /dev/null
+++ b/dethub/evaluation/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from . import metrics
diff --git a/dethub/evaluation/metrics/__init__.py b/dethub/evaluation/metrics/__init__.py
new file mode 100644
index 0000000..7c66884
--- /dev/null
+++ b/dethub/evaluation/metrics/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from .coco_fast_metric import CocoFastMetric
diff --git a/dethub/evaluation/metrics/coco_fast_metric.py b/dethub/evaluation/metrics/coco_fast_metric.py
new file mode 100644
index 0000000..e68e45d
--- /dev/null
+++ b/dethub/evaluation/metrics/coco_fast_metric.py
@@ -0,0 +1,292 @@
+import itertools
+import os.path as osp
+import tempfile
+from collections import OrderedDict
+from typing import Dict
+
+import numpy as np
+from detectron2.evaluation.fast_eval_api import COCOeval_opt
+from mmengine.fileio import load
+from mmengine.logging import MMLogger
+from terminaltables import AsciiTable
+
+from mmdet.datasets.api_wrappers import COCO
+from mmdet.evaluation.metrics.coco_metric import CocoMetric
+from mmdet.registry import METRICS
+
+
+class COCOeval_optfix(COCOeval_opt):
+
+    def summarize(self):
+        """Compute and display summary metrics for evaluation results.
+
+        Note this function can *only* be applied on the default parameter
+        setting
+        """
+
+        def _summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
+            p = self.params
+            iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | ' \
+                   'maxDets={:>3d} ] = {:0.3f}'
+            titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
+            typeStr = '(AP)' if ap == 1 else '(AR)'
+            iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
+                if iouThr is None else '{:0.2f}'.format(iouThr)
+
+            aind = [
+                i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng
+            ]
+            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+            if ap == 1:
+                # dimension of precision: [TxRxKxAxM]
+                s = self.eval['precision']
+                # IoU
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:, :, :, aind, mind]
+            else:
+                # dimension of recall: [TxKxAxM]
+                s = self.eval['recall']
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:, :, aind, mind]
+            if len(s[s > -1]) == 0:
+                mean_s = -1
+            else:
+                mean_s = np.mean(s[s > -1])
+            print(
+                iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets,
+                            mean_s))
+            return mean_s
+
+        def _summarizeDets():
+            stats = np.zeros((12, ))
+            # fix for more proposals
+            stats[0] = _summarize(1, maxDets=self.params.maxDets[2])
+            stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
+            stats[2] = _summarize(
+                1, iouThr=.75, maxDets=self.params.maxDets[2])
+            stats[3] = _summarize(
+                1, areaRng='small', maxDets=self.params.maxDets[2])
+            stats[4] = _summarize(
+                1, areaRng='medium', maxDets=self.params.maxDets[2])
+            stats[5] = _summarize(
+                1, areaRng='large', maxDets=self.params.maxDets[2])
+            stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
+            stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
+            stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
+            stats[9] = _summarize(
+                0, areaRng='small', maxDets=self.params.maxDets[2])
+            stats[10] = _summarize(
+                0, areaRng='medium', maxDets=self.params.maxDets[2])
+            stats[11] = _summarize(
+                0, areaRng='large', maxDets=self.params.maxDets[2])
+            return stats
+
+        def _summarizeKps():
+            stats = np.zeros((10, ))
+            stats[0] = _summarize(1, maxDets=20)
+            stats[1] = _summarize(1, maxDets=20, iouThr=.5)
+            stats[2] = _summarize(1, maxDets=20, iouThr=.75)
+            stats[3] = _summarize(1, maxDets=20, areaRng='medium')
+            stats[4] = _summarize(1, maxDets=20, areaRng='large')
+            stats[5] = _summarize(0, maxDets=20)
+            stats[6] = _summarize(0, maxDets=20, iouThr=.5)
+            stats[7] = _summarize(0, maxDets=20, iouThr=.75)
+            stats[8] = _summarize(0, maxDets=20, areaRng='medium')
+            stats[9] = _summarize(0, maxDets=20, areaRng='large')
+            return stats
+
+        if not self.eval:
+            raise Exception('Please run accumulate() first')
+        iouType = self.params.iouType
+        if iouType == 'segm' or iouType == 'bbox':
+            summarize = _summarizeDets
+        elif iouType == 'keypoints':
+            summarize = _summarizeKps
+        self.stats = summarize()
+
+
+@METRICS.register_module()
+class CocoFastMetric(CocoMetric):
+
+    def compute_metrics(self, results: list) -> Dict[str, float]:
+        """Compute the metrics from processed results.
+
+        Args:
+            results (list): The processed results of each batch.
+        Returns:
+            Dict[str, float]: The computed metrics. The keys are the names of
+            the metrics, and the values are corresponding results.
+        """
+        logger: MMLogger = MMLogger.get_current_instance()
+
+        # split gt and prediction list
+        gts, preds = zip(*results)
+
+        tmp_dir = None
+        if self.outfile_prefix is None:
+            tmp_dir = tempfile.TemporaryDirectory()
+            outfile_prefix = osp.join(tmp_dir.name, 'results')
+        else:
+            outfile_prefix = self.outfile_prefix
+
+        if self._coco_api is None:
+            # use converted gt json file to initialize coco api
+            logger.info('Converting ground truth to coco format...')
+            coco_json_path = self.gt_to_coco_json(
+                gt_dicts=gts, outfile_prefix=outfile_prefix)
+            self._coco_api = COCO(coco_json_path)
+
+        # handle lazy init
+        if self.cat_ids is None:
+            self.cat_ids = self._coco_api.get_cat_ids(
+                cat_names=self.dataset_meta['CLASSES'])
+        if self.img_ids is None:
+            self.img_ids = self._coco_api.get_img_ids()
+
+        # convert predictions to coco format and dump to json file
+        result_files = self.results2json(preds, outfile_prefix)
+
+        eval_results = OrderedDict()
+        if self.format_only:
+            logger.info('results are saved in '
+                        f'{osp.dirname(outfile_prefix)}')
+            return eval_results
+
+        for metric in self.metrics:
+            logger.info(f'Evaluating {metric}...')
+
+            # TODO: May refactor fast_eval_recall to an independent metric?
+            # fast eval recall
+            if metric == 'proposal_fast':
+                ar = self.fast_eval_recall(
+                    preds, self.proposal_nums, self.iou_thrs, logger=logger)
+                log_msg = []
+                for i, num in enumerate(self.proposal_nums):
+                    eval_results[f'AR@{num}'] = ar[i]
+                    log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}')
+                log_msg = ''.join(log_msg)
+                logger.info(log_msg)
+                continue
+
+            # evaluate proposal, bbox and segm
+            iou_type = 'bbox' if metric == 'proposal' else metric
+            if metric not in result_files:
+                raise KeyError(f'{metric} is not in results')
+            try:
+                predictions = load(result_files[metric])
+                if iou_type == 'segm':
+                    # Refer to https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L331  # noqa
+                    # When evaluating mask AP, if the results contain bbox,
+                    # cocoapi will use the box area instead of the mask area
+                    # for calculating the instance area. Though the overall AP
+                    # is not affected, this leads to different
+                    # small/medium/large mask AP results.
+                    for x in predictions:
+                        x.pop('bbox')
+                coco_dt = self._coco_api.loadRes(predictions)
+
+            except IndexError:
+                logger.error(
+                    'The testing results of the whole dataset is empty.')
+                break
+
+            coco_eval = COCOeval_optfix(self._coco_api, coco_dt, iou_type)
+
+            coco_eval.params.catIds = self.cat_ids
+            coco_eval.params.imgIds = self.img_ids
+            coco_eval.params.maxDets = list(self.proposal_nums)
+            coco_eval.params.iouThrs = self.iou_thrs
+
+            # mapping of cocoEval.stats
+            coco_metric_names = {
+                'mAP': 0,
+                'mAP_50': 1,
+                'mAP_75': 2,
+                'mAP_s': 3,
+                'mAP_m': 4,
+                'mAP_l': 5,
+                'AR@100': 6,
+                'AR@300': 7,
+                'AR@1000': 8,
+                'AR_s@1000': 9,
+                'AR_m@1000': 10,
+                'AR_l@1000': 11
+            }
+            metric_items = self.metric_items
+            if metric_items is not None:
+                for metric_item in metric_items:
+                    if metric_item not in coco_metric_names:
+                        raise KeyError(
+                            f'metric item "{metric_item}" is not supported')
+
+            if metric == 'proposal':
+                coco_eval.params.useCats = 0
+                coco_eval.evaluate()
+                coco_eval.accumulate()
+                coco_eval.summarize()
+                if metric_items is None:
+                    metric_items = [
+                        'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000',
+                        'AR_m@1000', 'AR_l@1000'
+                    ]
+
+                for item in metric_items:
+                    val = float(
+                        f'{coco_eval.stats[coco_metric_names[item]]:.3f}')
+                    eval_results[item] = val
+            else:
+                coco_eval.evaluate()
+                coco_eval.accumulate()
+                coco_eval.summarize()
+                if self.classwise:  # Compute per-category AP
+                    # Compute per-category AP
+                    # from https://github.com/facebookresearch/detectron2/
+                    precisions = coco_eval.eval['precision']
+                    # precision: (iou, recall, cls, area range, max dets)
+                    assert len(self.cat_ids) == precisions.shape[2]
+
+                    results_per_category = []
+                    for idx, cat_id in enumerate(self.cat_ids):
+                        # area range index 0: all area ranges
+                        # max dets index -1: typically 100 per image
+                        nm = self._coco_api.loadCats(cat_id)[0]
+                        precision = precisions[:, :, idx, 0, -1]
+                        precision = precision[precision > -1]
+                        if precision.size:
+                            ap = np.mean(precision)
+                        else:
+                            ap = float('nan')
+                        results_per_category.append(
+                            (f'{nm["name"]}', f'{round(ap, 3)}'))
+                        eval_results[f'{nm["name"]}_precision'] = round(ap, 3)
+
+                    num_columns = min(6, len(results_per_category) * 2)
+                    results_flatten = list(
+                        itertools.chain(*results_per_category))
+                    headers = ['category', 'AP'] * (num_columns // 2)
+                    results_2d = itertools.zip_longest(*[
+                        results_flatten[i::num_columns]
+                        for i in range(num_columns)
+                    ])
+                    table_data = [headers]
+                    table_data += [result for result in results_2d]
+                    table = AsciiTable(table_data)
+                    logger.info('\n' + table.table)
+
+                if metric_items is None:
+                    metric_items = [
+                        'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'
+                    ]
+
+                for metric_item in metric_items:
+                    key = f'{metric}_{metric_item}'
+                    val = coco_eval.stats[coco_metric_names[metric_item]]
+                    eval_results[key] = float(f'{round(val, 3)}')
+
+        if tmp_dir is not None:
+            tmp_dir.cleanup()
+        return eval_results
diff --git a/dethub/models/__init__.py b/dethub/models/__init__.py
new file mode 100644
index 0000000..e8b6774
--- /dev/null
+++ b/dethub/models/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from . import task_modules
diff --git a/dethub/models/task_modules/__init__.py b/dethub/models/task_modules/__init__.py
new file mode 100644
index 0000000..220890b
--- /dev/null
+++ b/dethub/models/task_modules/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from . import assigners
diff --git a/dethub/models/task_modules/assigners/__init__.py b/dethub/models/task_modules/assigners/__init__.py
new file mode 100644
index 0000000..ad459bb
--- /dev/null
+++ b/dethub/models/task_modules/assigners/__init__.py
@@ -0,0 +1,3 @@
+# flake8: noqa:F401
+from .iou2d_calculator import BboxOverlaps2D
+from .sim_ota_assigner import SimOTAAssigner
diff --git a/dethub/models/task_modules/assigners/iou2d_calculator.py b/dethub/models/task_modules/assigners/iou2d_calculator.py
new file mode 100644
index 0000000..35c22ac
--- /dev/null
+++ b/dethub/models/task_modules/assigners/iou2d_calculator.py
@@ -0,0 +1,61 @@
+import torch
+from mmcv.ops import bbox_overlaps as cudaext_bbox_overlaps
+
+from mmdet.models.task_modules.assigners import BboxOverlaps2D as Base
+from mmdet.registry import TASK_UTILS
+from mmdet.structures.bbox import bbox_overlaps as torch_bbox_overlaps
+
+
+def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
+    if bboxes1.is_cuda and mode in ('iou', 'iof'):
+        return cudaext_bbox_overlaps(bboxes1, bboxes2, mode, is_aligned)
+    else:
+        return torch_bbox_overlaps(bboxes1, bboxes2, mode, is_aligned, eps)
+
+
+def cast_tensor_type(x, scale=1., dtype=None):
+    if dtype == 'fp16':
+        # scale is for preventing overflows
+        x = (x / scale).half()
+    return x
+
+
+@TASK_UTILS.register_module(force=True)
+class BboxOverlaps2D(Base):
+
+    def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
+        """Calculate IoU between 2D bboxes.
+
+        Args:
+            bboxes1 (Tensor): bboxes have shape (m, 4) in <x1, y1, x2, y2>
+                format, or shape (m, 5) in <x1, y1, x2, y2, score> format.
+            bboxes2 (Tensor): bboxes have shape (m, 4) in <x1, y1, x2, y2>
+                format, shape (m, 5) in <x1, y1, x2, y2, score> format, or be
+                empty. If ``is_aligned `` is ``True``, then m and n must be
+                equal.
+            mode (str): "iou" (intersection over union), "iof" (intersection
+                over foreground), or "giou" (generalized intersection over
+                union).
+            is_aligned (bool, optional): If True, then m and n must be equal.
+                Default False.
+        Returns:
+            Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
+        """
+        assert bboxes1.size(-1) in [0, 4, 5]
+        assert bboxes2.size(-1) in [0, 4, 5]
+        if bboxes2.size(-1) == 5:
+            bboxes2 = bboxes2[..., :4]
+        if bboxes1.size(-1) == 5:
+            bboxes1 = bboxes1[..., :4]
+
+        if self.dtype == 'fp16':
+            # change tensor type to save cpu and cuda memory and keep speed
+            bboxes1 = cast_tensor_type(bboxes1, self.scale, self.dtype)
+            bboxes2 = cast_tensor_type(bboxes2, self.scale, self.dtype)
+            overlaps = bbox_overlaps(bboxes1, bboxes2, mode, is_aligned)
+            if not overlaps.is_cuda and overlaps.dtype == torch.float16:
+                # resume cpu float32
+                overlaps = overlaps.float()
+            return overlaps
+
+        return bbox_overlaps(bboxes1, bboxes2, mode, is_aligned)
diff --git a/dethub/models/task_modules/assigners/sim_ota_assigner.py b/dethub/models/task_modules/assigners/sim_ota_assigner.py
new file mode 100644
index 0000000..de140c6
--- /dev/null
+++ b/dethub/models/task_modules/assigners/sim_ota_assigner.py
@@ -0,0 +1,262 @@
+from typing import Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from mmengine.structures import InstanceData
+from torch import Tensor
+
+from mmdet.models.task_modules.assigners.assign_result import AssignResult
+from mmdet.models.task_modules.assigners.base_assigner import BaseAssigner
+from mmdet.registry import TASK_UTILS
+from mmdet.utils import ConfigType
+
+try:
+    import simota_cuda_ops
+except ModuleNotFoundError:
+    raise ModuleNotFoundError(
+        'Please compile SimOTA CUDA ops following README.')
+
+INF = 100000.0
+EPS = 1.0e-7
+
+
+def get_in_gt_and_in_center_info(priors, gt_bboxes, center_radius):
+    """Use CUDA extension to avoid unnecessary memory allocation."""
+    if priors.is_cuda:
+        num_prior = priors.size(0)
+        num_gt = gt_bboxes.size(0)
+        is_in_gts = torch.empty(
+            num_prior, num_gt, device=priors.device, dtype=torch.bool)
+        is_in_cts = torch.empty(
+            num_prior, num_gt, device=priors.device, dtype=torch.bool)
+        simota_cuda_ops.check_prior_in_gt(priors, gt_bboxes, is_in_gts,
+                                          is_in_cts, center_radius)
+
+        is_in_gts_all = is_in_gts.any(dim=1)
+        is_in_cts_all = is_in_cts.any(dim=1)
+    else:
+        num_gt = gt_bboxes.size(0)
+
+        repeated_x = priors[:, 0].unsqueeze(1).expand(-1, num_gt)
+        repeated_y = priors[:, 1].unsqueeze(1).expand(-1, num_gt)
+        repeated_stride_x = priors[:, 2].unsqueeze(1).expand(-1, num_gt)
+        repeated_stride_y = priors[:, 3].unsqueeze(1).expand(-1, num_gt)
+
+        # is prior centers in gt bboxes, shape: [n_prior, n_gt]
+        l_ = repeated_x - gt_bboxes[:, 0]
+        t_ = repeated_y - gt_bboxes[:, 1]
+        r_ = gt_bboxes[:, 2] - repeated_x
+        b_ = gt_bboxes[:, 3] - repeated_y
+
+        deltas = torch.stack([l_, t_, r_, b_], dim=1)
+        is_in_gts = deltas.min(dim=1).values > 0
+        is_in_gts_all = is_in_gts.sum(dim=1) > 0
+
+        # is prior centers in gt centers
+        gt_cxs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
+        gt_cys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
+        ct_box_l = gt_cxs - center_radius * repeated_stride_x
+        ct_box_t = gt_cys - center_radius * repeated_stride_y
+        ct_box_r = gt_cxs + center_radius * repeated_stride_x
+        ct_box_b = gt_cys + center_radius * repeated_stride_y
+
+        cl_ = repeated_x - ct_box_l
+        ct_ = repeated_y - ct_box_t
+        cr_ = ct_box_r - repeated_x
+        cb_ = ct_box_b - repeated_y
+
+        ct_deltas = torch.stack([cl_, ct_, cr_, cb_], dim=1)
+        is_in_cts = ct_deltas.min(dim=1).values > 0
+        is_in_cts_all = is_in_cts.sum(dim=1) > 0
+
+    # in boxes or in centers, shape: [num_priors]
+    is_in_gts_or_centers = is_in_gts_all | is_in_cts_all
+
+    # both in boxes and centers, shape: [num_fg, num_gt]
+    is_in_boxes_and_centers = (
+        is_in_gts[is_in_gts_or_centers, :]
+        & is_in_cts[is_in_gts_or_centers, :])
+    return is_in_gts_or_centers, is_in_boxes_and_centers
+
+
+def get_cls_cost(valid_pred_scores, gt_labels):
+    """(large input_size, large num_gt) -> large num_valid (large num_valid,
+    large num_gt) -> high F.binary_cross_entropy(..., reduction='none') memory
+    cost Use CUDA extension to reduce the cost (to 1/num_classes)"""
+    num_valid = valid_pred_scores.size(0)
+    num_gt = gt_labels.size(0)
+    if valid_pred_scores.is_cuda:
+        cls_cost = torch.empty(
+            num_valid,
+            num_gt,
+            device=valid_pred_scores.device,
+            dtype=valid_pred_scores.dtype)
+        gt_onehot_labels = F.one_hot(
+            gt_labels.to(torch.int64), valid_pred_scores.shape[-1]).float()
+        simota_cuda_ops.binary_cross_entropy_cost(valid_pred_scores.sqrt_(),
+                                                  gt_onehot_labels, cls_cost)
+    else:
+        gt_onehot_label = (
+            F.one_hot(gt_labels.to(torch.int64),
+                      valid_pred_scores.shape[-1]).float().unsqueeze(0).expand(
+                          num_valid, -1, -1))
+
+        valid_pred_scores = valid_pred_scores.sqrt_().unsqueeze(1).expand(
+            -1, num_gt, -1)
+        cls_cost = F.binary_cross_entropy(
+            valid_pred_scores, gt_onehot_label, reduction='none').sum(-1)
+    return cls_cost
+
+
+@TASK_UTILS.register_module(force=True)
+class SimOTAAssigner(BaseAssigner):
+    """Computes matching between predictions and ground truth.
+
+    Args:
+        center_radius (float): Ground truth center size
+            to judge whether a prior is in center. Defaults to 2.5.
+        candidate_topk (int): The candidate top-k which used to
+            get top-k ious to calculate dynamic-k. Defaults to 10.
+        iou_weight (float): The scale factor for regression
+            iou cost. Defaults to 3.0.
+        cls_weight (float): The scale factor for classification
+            cost. Defaults to 1.0.
+        iou_calculator (ConfigType): Config of overlaps Calculator.
+            Defaults to dict(type='BboxOverlaps2D').
+    """
+
+    def __init__(self,
+                 center_radius: float = 2.5,
+                 candidate_topk: int = 10,
+                 iou_weight: float = 3.0,
+                 cls_weight: float = 1.0,
+                 iou_calculator: ConfigType = dict(type='BboxOverlaps2D')):
+        self.center_radius = center_radius
+        self.candidate_topk = candidate_topk
+        self.iou_weight = iou_weight
+        self.cls_weight = cls_weight
+        self.iou_calculator = TASK_UTILS.build(iou_calculator)
+
+    def assign(self,
+               pred_instances: InstanceData,
+               gt_instances: InstanceData,
+               gt_instances_ignore: Optional[InstanceData] = None,
+               **kwargs) -> AssignResult:
+        """Assign gt to priors using SimOTA.
+
+        Args:
+            pred_instances (:obj:`InstanceData`): Instances of model
+                predictions. It includes ``priors``, and the priors can
+                be anchors or points, or the bboxes predicted by the
+                previous stage, has shape (n, 4). The bboxes predicted by
+                the current model or stage will be named ``bboxes``,
+                ``labels``, and ``scores``, the same as the ``InstanceData``
+                in other places.
+            gt_instances (:obj:`InstanceData`): Ground truth of instance
+                annotations. It usually includes ``bboxes``, with shape (k, 4),
+                and ``labels``, with shape (k, ).
+            gt_instances_ignore (:obj:`InstanceData`, optional): Instances
+                to be ignored during training. It includes ``bboxes``
+                attribute data that is ignored during training and testing.
+                Defaults to None.
+        Returns:
+            obj:`AssignResult`: The assigned result.
+        """
+        gt_bboxes = gt_instances.bboxes
+        gt_labels = gt_instances.labels
+        num_gt = gt_bboxes.size(0)
+
+        decoded_bboxes = pred_instances.bboxes
+        pred_scores = pred_instances.scores
+        priors = pred_instances.priors
+        num_bboxes = decoded_bboxes.size(0)
+
+        # assign 0 by default
+        assigned_gt_inds = decoded_bboxes.new_full((num_bboxes, ),
+                                                   0,
+                                                   dtype=torch.long)
+        if num_gt == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            max_overlaps = decoded_bboxes.new_zeros((num_bboxes, ))
+            assigned_labels = decoded_bboxes.new_full((num_bboxes, ),
+                                                      -1,
+                                                      dtype=torch.long)
+            return AssignResult(
+                num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
+
+        valid_mask, is_in_boxes_and_center = self.get_in_gt_and_in_center_info(
+            priors, gt_bboxes)
+        valid_decoded_bbox = decoded_bboxes[valid_mask]
+        valid_pred_scores = pred_scores[valid_mask]
+        num_valid = valid_decoded_bbox.size(0)
+        if num_valid == 0:
+            # No valid bboxes, return empty assignment
+            max_overlaps = decoded_bboxes.new_zeros((num_bboxes, ))
+            assigned_labels = decoded_bboxes.new_full((num_bboxes, ),
+                                                      -1,
+                                                      dtype=torch.long)
+            return AssignResult(
+                num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
+
+        pairwise_ious = self.iou_calculator(valid_decoded_bbox, gt_bboxes)
+        iou_cost = -torch.log(pairwise_ious + EPS)
+
+        cls_cost = get_cls_cost(valid_pred_scores, gt_labels)
+        cost_matrix = cls_cost
+        cost_matrix *= self.cls_weight
+        iou_cost *= self.iou_weight
+        cost_matrix += iou_cost
+        cost_matrix[~is_in_boxes_and_center] = INF
+
+        matched_pred_ious, matched_gt_inds = \
+            self.dynamic_k_matching(
+                cost_matrix, pairwise_ious, num_gt, valid_mask)
+
+        # convert to AssignResult format
+        assigned_gt_inds[valid_mask] = matched_gt_inds + 1
+        assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)
+        assigned_labels[valid_mask] = gt_labels[matched_gt_inds].long()
+        max_overlaps = assigned_gt_inds.new_full((num_bboxes, ),
+                                                 -INF,
+                                                 dtype=torch.float32)
+        max_overlaps[valid_mask] = matched_pred_ious
+        return AssignResult(
+            num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)
+
+    def get_in_gt_and_in_center_info(
+            self, priors: Tensor, gt_bboxes: Tensor) -> Tuple[Tensor, Tensor]:
+        return get_in_gt_and_in_center_info(priors, gt_bboxes,
+                                            self.center_radius)
+
+    def dynamic_k_matching(self, cost: Tensor, pairwise_ious: Tensor,
+                           num_gt: int,
+                           valid_mask: Tensor) -> Tuple[Tensor, Tensor]:
+        """Use IoU and matching cost to calculate the dynamic top-k positive
+        targets."""
+        matching_matrix = torch.zeros_like(cost, dtype=torch.uint8)
+        # select candidate topk ious for dynamic-k calculation
+        candidate_topk = min(self.candidate_topk, pairwise_ious.size(0))
+        topk_ious, _ = torch.topk(pairwise_ious, candidate_topk, dim=0)
+        # calculate dynamic k for each gt
+        dynamic_ks = torch.clamp(topk_ious.sum(0).int(), min=1)
+        for gt_idx in range(num_gt):
+            _, pos_idx = torch.topk(
+                cost[:, gt_idx], k=dynamic_ks[gt_idx], largest=False)
+            matching_matrix[:, gt_idx][pos_idx] = 1
+
+        del topk_ious, dynamic_ks, pos_idx
+
+        prior_match_gt_mask = matching_matrix.sum(1) > 1
+        if prior_match_gt_mask.sum() > 0:
+            cost_min, cost_argmin = torch.min(
+                cost[prior_match_gt_mask, :], dim=1)
+            matching_matrix[prior_match_gt_mask, :] *= 0
+            matching_matrix[prior_match_gt_mask, cost_argmin] = 1
+        # get foreground mask inside box and center prior
+        fg_mask_inboxes = matching_matrix.sum(1) > 0
+        valid_mask[valid_mask.clone()] = fg_mask_inboxes
+
+        matched_gt_inds = matching_matrix[fg_mask_inboxes, :].argmax(1)
+        matched_pred_ious = (matching_matrix *
+                             pairwise_ious).sum(1)[fg_mask_inboxes]
+        return matched_pred_ious, matched_gt_inds
diff --git a/dethub/simota_cuda_ops/binary_cross_entropy_cost.cpp b/dethub/simota_cuda_ops/binary_cross_entropy_cost.cpp
new file mode 100644
index 0000000..80c0077
--- /dev/null
+++ b/dethub/simota_cuda_ops/binary_cross_entropy_cost.cpp
@@ -0,0 +1,15 @@
+#include "pytorch_cpp_helper.hpp"
+
+// C++ interface
+
+void BinaryCrossEntropyCostLauncher(Tensor valid_pred_scores,
+                                    Tensor gt_onehot_label, Tensor cost_matrix);
+
+void binary_cross_entropy_cost(Tensor valid_pred_scores, Tensor gt_onehot_label,
+                               Tensor cost_matrix) {
+  CHECK_CUDA_INPUT(valid_pred_scores);
+  CHECK_CUDA_INPUT(gt_onehot_label);
+
+  BinaryCrossEntropyCostLauncher(valid_pred_scores, gt_onehot_label,
+                                 cost_matrix);
+}
diff --git a/dethub/simota_cuda_ops/binary_cross_entropy_cost_kernel.cu b/dethub/simota_cuda_ops/binary_cross_entropy_cost_kernel.cu
new file mode 100644
index 0000000..1db1c5a
--- /dev/null
+++ b/dethub/simota_cuda_ops/binary_cross_entropy_cost_kernel.cu
@@ -0,0 +1,60 @@
+#include "pytorch_cuda_helper.hpp"
+
+template <typename T>
+__global__ void binary_cross_entropy_cost_cuda_kernel(
+    const T *valid_pred_scores, const T *gt_onehot_label, T *cost_matrix,
+    const int num_pred_scores, const int num_gt, const int num_classes) {
+
+  CUDA_1D_KERNEL_LOOP(index, num_pred_scores * num_gt) {
+    int b1 = index / num_gt;
+    int b2 = index % num_gt;
+
+    int base1 = b1 * num_classes;
+    int base2 = b2 * num_classes;
+
+    // https://github.com/pytorch/pytorch/blob/v1.10.0/aten/src/ATen/native/cuda/Loss.cu#L100-L112
+    const T zero = 0;
+    const T one = 1;
+    const T neg_100 = -100;
+    T cost = 0;
+    for (int offset = 0; offset < num_classes; offset++) {
+      T input_val = valid_pred_scores[base1 + offset];
+      T target_val = gt_onehot_label[base2 + offset];
+
+      CUDA_KERNEL_ASSERT(input_val >= zero && input_val <= one)
+
+      T log_input_val = std::log(input_val);
+      T log_1_minus_input_val = std::log(one - input_val);
+
+      log_input_val = std::max(log_input_val, neg_100);
+      log_1_minus_input_val = std::max(log_1_minus_input_val, neg_100);
+
+      cost += ((target_val - one) * log_1_minus_input_val -
+               (target_val * log_input_val));
+    }
+    cost_matrix[index] = cost;
+  }
+}
+
+void BinaryCrossEntropyCostLauncher(Tensor valid_pred_scores,
+                                    Tensor gt_onehot_label,
+                                    Tensor cost_matrix) {
+  int num_pred_scores = valid_pred_scores.size(0);
+  int num_gt = gt_onehot_label.size(0);
+  int num_classes = valid_pred_scores.size(1);
+  int output_size = cost_matrix.numel();
+
+  at::cuda::CUDAGuard device_guard(valid_pred_scores.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      valid_pred_scores.scalar_type(), "binary_cross_entropy_cost_cuda_kernel",
+      ([&] {
+        binary_cross_entropy_cost_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                valid_pred_scores.data_ptr<scalar_t>(),
+                gt_onehot_label.data_ptr<scalar_t>(),
+                cost_matrix.data_ptr<scalar_t>(), num_pred_scores, num_gt,
+                num_classes);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/dethub/simota_cuda_ops/check_prior_in_gt.cpp b/dethub/simota_cuda_ops/check_prior_in_gt.cpp
new file mode 100644
index 0000000..7352ebb
--- /dev/null
+++ b/dethub/simota_cuda_ops/check_prior_in_gt.cpp
@@ -0,0 +1,15 @@
+#include "pytorch_cpp_helper.hpp"
+
+void CheckPriorsInGtLauncher(Tensor priors, Tensor gt_bboxes, Tensor is_in_gts,
+                             Tensor is_in_cts, float center_radius);
+
+void check_prior_in_gt(Tensor priors, Tensor gt_bboxes, Tensor is_in_gts,
+                        Tensor is_in_cts, float center_radius) {
+  CHECK_CUDA_INPUT(priors);
+  CHECK_CUDA_INPUT(gt_bboxes);
+  CHECK_CUDA_INPUT(is_in_gts);
+  CHECK_CUDA_INPUT(is_in_cts);
+
+  return CheckPriorsInGtLauncher(priors, gt_bboxes, is_in_gts, is_in_cts,
+                                 center_radius);
+}
diff --git a/dethub/simota_cuda_ops/check_prior_in_gt_kernel.cu b/dethub/simota_cuda_ops/check_prior_in_gt_kernel.cu
new file mode 100644
index 0000000..0ea2ab1
--- /dev/null
+++ b/dethub/simota_cuda_ops/check_prior_in_gt_kernel.cu
@@ -0,0 +1,58 @@
+#include "pytorch_cuda_helper.hpp"
+
+template <typename T>
+__global__ void check_prior_in_gt_cuda_kernel(
+    const T *priors, const T *gt_bboxes, bool *is_in_gts, bool *is_in_cts,
+    const int num_prior, const int num_gt, const float center_radius) {
+
+  CUDA_1D_KERNEL_LOOP(index, num_prior * num_gt) {
+    int b1 = index / num_gt;
+    int b2 = index % num_gt;
+
+    int base1 = b1 * 4;
+    T prior_x = priors[base1];
+    T prior_y = priors[base1 + 1];
+    T prior_x_stride = priors[base1 + 2];
+    T prior_y_stride = priors[base1 + 3];
+
+    int base2 = b2 * 4;
+    T gt_x1 = gt_bboxes[base2];
+    T gt_y1 = gt_bboxes[base2 + 1];
+    T gt_x2 = gt_bboxes[base2 + 2];
+    T gt_y2 = gt_bboxes[base2 + 3];
+
+    T ct_x = 0.5 * (gt_x1 + gt_x2);
+    T ct_y = 0.5 * (gt_y1 + gt_y2);
+    T ct_x1 = ct_x - center_radius * prior_x_stride;
+    T ct_x2 = ct_x + center_radius * prior_x_stride;
+    T ct_y1 = ct_y - center_radius * prior_y_stride;
+    T ct_y2 = ct_y + center_radius * prior_y_stride;
+
+    bool is_in_gt = (prior_x > gt_x1) & (prior_x < gt_x2) & (prior_y > gt_y1) &
+                    (prior_y < gt_y2);
+    bool is_in_ct = (prior_x > ct_x1) & (prior_x < ct_x2) & (prior_y > ct_y1) &
+                    (prior_y < ct_y2);
+
+    is_in_gts[index] = is_in_gt;
+    is_in_cts[index] = is_in_ct;
+  }
+}
+
+void CheckPriorsInGtLauncher(Tensor priors, Tensor gt_bboxes, Tensor is_in_gts,
+                             Tensor is_in_cts, float center_radius) {
+  int output_size = is_in_gts.numel();
+  int num_prior = priors.size(0);
+  int num_gt = gt_bboxes.size(0);
+
+  at::cuda::CUDAGuard device_guard(priors.device());
+  cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  AT_DISPATCH_FLOATING_TYPES_AND_HALF(
+      priors.scalar_type(), "check_prior_in_gt_cuda_kernel", ([&] {
+        check_prior_in_gt_cuda_kernel<scalar_t>
+            <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
+                priors.data_ptr<scalar_t>(), gt_bboxes.data_ptr<scalar_t>(),
+                is_in_gts.data_ptr<bool>(), is_in_cts.data_ptr<bool>(),
+                num_prior, num_gt, center_radius);
+      }));
+  AT_CUDA_CHECK(cudaGetLastError());
+}
diff --git a/dethub/simota_cuda_ops/pybind.cpp b/dethub/simota_cuda_ops/pybind.cpp
new file mode 100644
index 0000000..64c589b
--- /dev/null
+++ b/dethub/simota_cuda_ops/pybind.cpp
@@ -0,0 +1,15 @@
+#include "pytorch_cpp_helper.hpp"
+
+void check_prior_in_gt(Tensor priors, Tensor gt_bboxes, Tensor is_in_gts,
+                        Tensor is_in_cts, float center_radius);
+
+void binary_cross_entropy_cost(Tensor valid_pred_scores, Tensor gt_onehot_label,
+                               Tensor cost_matrix);
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("check_prior_in_gt", &check_prior_in_gt,
+        "check if priors in gt_bboxes or gt center regions");
+
+  m.def("binary_cross_entropy_cost", &binary_cross_entropy_cost,
+        "classification cost of SimOTA using binary_cross_entropy loss");
+}
diff --git a/dethub/simota_cuda_ops/pytorch_cpp_helper.hpp b/dethub/simota_cuda_ops/pytorch_cpp_helper.hpp
new file mode 100644
index 0000000..64a32cc
--- /dev/null
+++ b/dethub/simota_cuda_ops/pytorch_cpp_helper.hpp
@@ -0,0 +1,26 @@
+// https://github.com/open-mmlab/mmcv/blob/v1.4.0/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp
+
+#ifndef PYTORCH_CPP_HELPER
+#define PYTORCH_CPP_HELPER
+#include <torch/extension.h>
+
+#include <vector>
+
+using namespace at;
+
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+
+#define CHECK_CUDA(x)                                                          \
+  TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CPU(x)                                                           \
+  TORCH_CHECK(!x.device().is_cuda(), #x " must be a CPU tensor")
+#define CHECK_CONTIGUOUS(x)                                                    \
+  TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_CUDA_INPUT(x)                                                    \
+  CHECK_CUDA(x);                                                               \
+  CHECK_CONTIGUOUS(x)
+#define CHECK_CPU_INPUT(x)                                                     \
+  CHECK_CPU(x);                                                                \
+  CHECK_CONTIGUOUS(x)
+
+#endif  // PYTORCH_CPP_HELPER
diff --git a/dethub/simota_cuda_ops/pytorch_cuda_helper.hpp b/dethub/simota_cuda_ops/pytorch_cuda_helper.hpp
new file mode 100644
index 0000000..84e1dd7
--- /dev/null
+++ b/dethub/simota_cuda_ops/pytorch_cuda_helper.hpp
@@ -0,0 +1,33 @@
+// https://github.com/open-mmlab/mmcv/blob/v1.4.0/mmcv/ops/csrc/common/pytorch_cuda_helper.hpp
+// https://github.com/open-mmlab/mmcv/blob/v1.4.0/mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp
+
+#ifndef PYTORCH_CUDA_HELPER
+#define PYTORCH_CUDA_HELPER
+
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#include <THC/THCAtomics.cuh>
+
+#include <cuda.h>
+
+#define CUDA_1D_KERNEL_LOOP(i, n)                              \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+       i += blockDim.x * gridDim.x)
+
+#define THREADS_PER_BLOCK 512
+
+inline int GET_BLOCKS(const int N) {
+  int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+  int max_block_num = 4096;
+  return min(optimal_block_num, max_block_num);
+}
+
+using at::Half;
+using at::Tensor;
+using phalf = at::Half;
+
+#define __PHALF(x) (x)
+#endif  // PYTORCH_CUDA_HELPER
diff --git a/dethub/simota_cuda_ops/setup.py b/dethub/simota_cuda_ops/setup.py
new file mode 100644
index 0000000..aa5f723
--- /dev/null
+++ b/dethub/simota_cuda_ops/setup.py
@@ -0,0 +1,19 @@
+from setuptools import setup
+
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+
+setup(
+    name='simota_cuda_ops',
+    ext_modules=[
+        CUDAExtension(
+            name='simota_cuda_ops',
+            sources=[
+                'pybind.cpp',
+                'check_prior_in_gt_kernel.cu',
+                'check_prior_in_gt.cpp',
+                'binary_cross_entropy_cost_kernel.cu',
+                'binary_cross_entropy_cost.cpp',
+            ],
+            extra_compile_args={'nvcc': ['-O3']})
+    ],
+    cmdclass={'build_ext': BuildExtension})
diff --git a/dethub/testing/__init__.py b/dethub/testing/__init__.py
new file mode 100644
index 0000000..f0265ff
--- /dev/null
+++ b/dethub/testing/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from ._utils import get_detector_cfg
diff --git a/dethub/testing/_utils.py b/dethub/testing/_utils.py
new file mode 100644
index 0000000..9977651
--- /dev/null
+++ b/dethub/testing/_utils.py
@@ -0,0 +1,38 @@
+import copy
+from os.path import dirname, exists, join
+
+from mmengine.config import Config
+
+
+def _get_config_directory():
+    """Find the predefined detector config directory."""
+    try:
+        # Assume we are running in the source mmdetection repo
+        repo_dpath = dirname(dirname(__file__))
+    except NameError:
+        # For IPython development when this __file__ is not defined
+        import dethub
+        repo_dpath = dirname(dethub.__file__)
+    config_dpath = join(repo_dpath, '.mim/configs')
+    if not exists(config_dpath):
+        raise Exception('Cannot find config path')
+    return config_dpath
+
+
+def _get_config_module(fname):
+    """Load a configuration as a python module."""
+    config_dpath = _get_config_directory()
+    config_fpath = join(config_dpath, fname)
+    config_mod = Config.fromfile(config_fpath)
+    return config_mod
+
+
+def get_detector_cfg(fname):
+    """Grab configs necessary to create a detector.
+
+    These are deep copied to allow for safe modification of parameters without
+    influencing other tests.
+    """
+    config = _get_config_module(fname)
+    model = copy.deepcopy(config.model)
+    return model
diff --git a/dethub/version.py b/dethub/version.py
new file mode 100644
index 0000000..3ada1aa
--- /dev/null
+++ b/dethub/version.py
@@ -0,0 +1,3 @@
+__version__ = '0.1.0'
+
+__all__ = ['__version__']
diff --git a/dethub/visualization/__init__.py b/dethub/visualization/__init__.py
new file mode 100644
index 0000000..b0db95c
--- /dev/null
+++ b/dethub/visualization/__init__.py
@@ -0,0 +1,2 @@
+# flake8: noqa:F401
+from .vis_backend import WandbVisBackend
diff --git a/dethub/visualization/vis_backend.py b/dethub/visualization/vis_backend.py
new file mode 100644
index 0000000..bfb76a6
--- /dev/null
+++ b/dethub/visualization/vis_backend.py
@@ -0,0 +1,59 @@
+import os
+from typing import Optional
+
+import numpy as np
+from mmengine.registry import VISBACKENDS
+from mmengine.visualization import WandbVisBackend as Base
+from mmengine.visualization.vis_backend import force_init_env
+
+
+@VISBACKENDS.register_module(force=True)
+class WandbVisBackend(Base):
+
+    def __init__(self,
+                 save_dir: str,
+                 init_kwargs: Optional[dict] = None,
+                 define_metric_cfg: Optional[dict] = None,
+                 commit: Optional[bool] = True):
+        super(Base, self).__init__(save_dir)
+        self._init_kwargs = init_kwargs
+        self._define_metric_cfg = define_metric_cfg
+        self._commit = commit
+
+    def _init_env(self):
+        """Setup env for wandb."""
+        if not os.path.exists(self._save_dir):
+            os.makedirs(self._save_dir, exist_ok=True)  # type: ignore
+        if self._init_kwargs is None:
+            self._init_kwargs = {'dir': self._save_dir}
+        else:
+            self._init_kwargs.setdefault('dir', self._save_dir)
+        try:
+            import wandb
+        except ImportError:
+            raise ImportError(
+                'Please run "pip install wandb" to install wandb')
+
+        wandb.init(**self._init_kwargs)
+        if self._define_metric_cfg is not None:
+            for metric, summary in self._define_metric_cfg.items():
+                wandb.define_metric(metric, summary=summary)
+        self._wandb = wandb
+
+    @force_init_env
+    def add_image(self,
+                  name: str,
+                  image: np.ndarray,
+                  step: int = 0,
+                  **kwargs) -> None:
+        """Record the image to wandb.
+
+        Args:
+            name (str): The image identifier.
+            image (np.ndarray): The image to be saved. The format
+                should be RGB.
+            step (int): Useless parameter. Wandb does not
+                need this parameter. Default to 0.
+        """
+        image = self._wandb.Image(image)
+        self._wandb.log({name: image}, commit=self._commit)
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..e74eee5
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,16 @@
+version: "3.8"
+services:
+  dethub:
+    build: ./
+    runtime: nvidia
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=all
+      - PYTHONPATH=/workspace:$PYTHONPATH
+    container_name: dethub
+    tty: true
+    volumes:
+      - $DATA_DIR:/workspace/data
+      - $PWD:/workspace
+    working_dir: /workspace
+    ipc: host
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/source/api.rst b/docs/source/api.rst
new file mode 100644
index 0000000..c6b463b
--- /dev/null
+++ b/docs/source/api.rst
@@ -0,0 +1,41 @@
+dethub.datasets
+--------------
+
+transforms
+^^^^^^^^^^^^
+.. automodule:: dethub.datasets.transforms
+    :members:
+
+dethub.engine
+--------------
+hooks
+^^^^^^^^^^
+.. automodule:: dethub.engine.hooks
+    :members:
+
+dethub.evaluation
+--------------
+
+metrics
+^^^^^^^^^^
+.. automodule:: dethub.engine.metrics
+    :members:
+
+
+dethub.models
+--------------
+
+detectors
+^^^^^^^^^^
+.. automodule:: dethub.models.detectors
+    :members:
+
+task_modules
+^^^^^^^^^^
+.. automodule:: dethub.models.task_modules
+    :members:
+
+dethub.visulization
+--------------
+.. automodule::mmdet.visulization
+    :members:
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..075d190
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,72 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+
+import sphinx_rtd_theme
+
+sys.path.insert(
+    0,
+    os.path.dirname(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+# -- Project information -----------------------------------------------------
+
+project = 'dethub'
+copyright = '2022, dethub'
+author = 'dethub Contributors'
+
+# The full version, including alpha/beta/rc tags
+release = '2022'
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx_autodoc_typehints',
+    'sphinx.ext.viewcode'
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = 'en'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The master toctree document.
+master_doc = 'index'
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
diff --git a/docs/source/get_started.md b/docs/source/get_started.md
new file mode 100644
index 0000000..2b23857
--- /dev/null
+++ b/docs/source/get_started.md
@@ -0,0 +1,41 @@
+# Data Preparation
+
+Prepare datasets in data directory. You can reference each datasets format on [each projects README](../../configs/projects).
+
+```
+/path/to/data
+├── coco
+├── gbr_cots
+├── livecell
+└── sartorius_cellseg
+```
+
+# Environment setup
+
+Clone repo
+
+```
+$ git clone https://github.com/okotaku/dethub
+```
+
+Set env variables
+
+```
+$ export DATA_DIR=/path/to/data
+```
+
+Start a docker container
+
+```
+$ docker compose up -d dethub
+# optional install
+$ docker compose exec dethub pip install -r docker/dev.txt
+```
+
+Run demo
+
+```
+$ docker compose exec dethub python tools/image_demo.py ${IMG} ${CONFIG_FILE} ${CHECKPOINT_FILE} ${OUT_FILE}
+# Example
+$ docker compose exec dethub python tools/image_demo.py configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1.tif configs/projects/livecell/yolox/yolox_s_livecell.py https://github.com/okotaku/dethub-weights/releases/download/v0.0.1/yolox_s_livecell-b3f4347f.pth --out-file configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1_demo.jpg
+```
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..0fbdd90
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,26 @@
+Welcome to dethub's documentation!
+=======================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Get Started
+
+   get_started.md
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Quick Run
+
+   run.md
+
+.. toctree::
+   :maxdepth: 1
+   :caption: API Reference
+
+   api.rst
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`search`
diff --git a/docs/source/run.md b/docs/source/run.md
new file mode 100644
index 0000000..99863df
--- /dev/null
+++ b/docs/source/run.md
@@ -0,0 +1,36 @@
+# Prepare configs
+
+For basic usage of configs, see [MMDetection Tutorial 1: Learn about Configs](https://mmdetection.readthedocs.io/en/stable/tutorials/config.html)
+
+# Train a model
+
+```
+# single-gpu
+$ docker compose exec dethub python /opt/site-packages/mmdet/.mim/tools/train.py ${CONFIG_FILE}
+# Example
+$ docker compose exec dethub python /opt/site-packages/mmdet/.mim/tools/train.py configs/projects/livecell/yolox/yolox_s_livecell.py
+
+# multiple-gpu
+$ docker compose exec dethub python -m torch.distributed.launch --nproc_per_node=${GPUS} /opt/site-packages/mmdet/.mim/tools/train.py ${CONFIG_FILE} --launcher pytorch
+```
+
+# Test a dataset
+
+```
+# single-gpu
+$ docker compose exec dethub python /opt/site-packages/mmdet/.mim/tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE}
+# Example
+$ docker compose exec dethub python /opt/site-packages/mmdet/.mim/tools/test.py configs/projects/livecell/yolox/yolox_s_livecell.py work_dirs/yolox_s_livecell/epoch_100.pth
+```
+
+# Run demo
+
+```
+$ docker compose exec dethub python tools/image_demo.py ${IMG} ${CONFIG_FILE} ${CHECKPOINT_FILE} ${OUT_FILE}
+# Example
+$ docker compose exec dethub python tools/image_demo.py configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1.tif configs/projects/livecell/yolox/yolox_s_livecell.py work_dirs/yolox_s_livecell/epoch_100.pth --out-file configs/projects/livecell/demo/A172_Phase_A7_1_00d00h00m_1_demo.jpg
+```
+
+# More details
+
+See [MMDetection Docs](https://mmdetection.readthedocs.io/en/stable/)
diff --git a/requirements/dev.txt b/requirements/dev.txt
new file mode 100644
index 0000000..553490f
--- /dev/null
+++ b/requirements/dev.txt
@@ -0,0 +1,2 @@
+scikit-learn
+tqdm
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
new file mode 100644
index 0000000..225b746
--- /dev/null
+++ b/requirements/requirements.txt
@@ -0,0 +1,8 @@
+albumentations
+imgaug
+mmcls==v1.0.0rc0
+mmdet==v3.0.0rc0
+mmengine
+timm
+ujson
+wandb
diff --git a/requirements/tests.txt b/requirements/tests.txt
new file mode 100644
index 0000000..e079f8a
--- /dev/null
+++ b/requirements/tests.txt
@@ -0,0 +1 @@
+pytest
diff --git a/scripts/install.sh b/scripts/install.sh
new file mode 100644
index 0000000..8312236
--- /dev/null
+++ b/scripts/install.sh
@@ -0,0 +1,3 @@
+cd dethub/simota_cuda_ops
+python setup.py install
+cd ../..
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..56407a1
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,21 @@
+[isort]
+line_length = 79
+multi_line_output = 0
+extra_standard_library = setuptools
+known_first_party = mmdet
+known_third_party = PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,pytorch_sphinx_theme,requests,scipy,seaborn,six,terminaltables,torch,ts,yaml
+no_lines_before = STDLIB,LOCALFOLDER
+default_section = THIRDPARTY
+
+[yapf]
+BASED_ON_STYLE = pep8
+BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
+SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
+
+# ignore-words-list needs to be lowercase format. For example, if we want to
+# ignore word "BA", then we need to append "ba" to ignore-words-list rather
+# than "BA"
+[codespell]
+skip = *.ipynb
+quiet-level = 3
+ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,TOOD,tood,ba
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..af6e9d3
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,191 @@
+import os
+import os.path as osp
+import shutil
+import sys
+import warnings
+from setuptools import find_packages as _find_packages
+from setuptools import setup
+
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+
+
+def readme():
+    with open('README.md', encoding='utf-8') as f:
+        content = f.read()
+    return content
+
+
+def get_version():
+    version_file = 'dethub/version.py'
+    with open(version_file, 'r', encoding='utf-8') as f:
+        exec(compile(f.read(), version_file, 'exec'))
+    return locals()['__version__']
+
+
+def parse_requirements(fname='requirements/requirements.txt',
+                       with_version=True):
+    """Parse the package dependencies listed in a requirements file but strips
+    specific versioning information.
+
+    Args:
+        fname (str): path to requirements file
+        with_version (bool, default=False): if True include version specs
+    Returns:
+        List[str]: list of requirements items
+    CommandLine:
+        python -c "import setup; print(setup.parse_requirements())"
+    """
+    import re
+    import sys
+    from os.path import exists
+    require_fpath = fname
+
+    def parse_line(line):
+        """Parse information from a line in a requirements text file."""
+        if line.startswith('-r '):
+            # Allow specifying requirements in other files
+            target = line.split(' ')[1]
+            for info in parse_require_file(target):
+                yield info
+        else:
+            info = {'line': line}
+            if line.startswith('-e '):
+                info['package'] = line.split('#egg=')[1]
+            else:
+                # Remove versioning from the package
+                pat = '(' + '|'.join(['>=', '==', '>']) + ')'
+                parts = re.split(pat, line, maxsplit=1)
+                parts = [p.strip() for p in parts]
+
+                info['package'] = parts[0]
+                if len(parts) > 1:
+                    op, rest = parts[1:]
+                    if ';' in rest:
+                        # Handle platform specific dependencies
+                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
+                        version, platform_deps = map(str.strip,
+                                                     rest.split(';'))
+                        info['platform_deps'] = platform_deps
+                    else:
+                        version = rest  # NOQA
+                    if '--' in version:
+                        # the `extras_require` doesn't accept options.
+                        version = version.split('--')[0].strip()
+                    info['version'] = (op, version)
+            yield info
+
+    def parse_require_file(fpath):
+        with open(fpath, 'r') as f:
+            for line in f.readlines():
+                line = line.strip()
+                if line and not line.startswith('#'):
+                    for info in parse_line(line):
+                        yield info
+
+    def gen_packages_items():
+        if exists(require_fpath):
+            for info in parse_require_file(require_fpath):
+                parts = [info['package']]
+                if with_version and 'version' in info:
+                    parts.extend(info['version'])
+                if not sys.version.startswith('3.4'):
+                    # apparently package_deps are broken in 3.4
+                    platform_deps = info.get('platform_deps')
+                    if platform_deps is not None:
+                        parts.append(';' + platform_deps)
+                item = ''.join(parts)
+                yield item
+
+    packages = list(gen_packages_items())
+    return packages
+
+
+def add_mim_extension():
+    """Add extra files that are required to support MIM into the package.
+
+    These files will be added by creating a symlink to the originals if the
+    package is installed in `editable` mode (e.g. pip install -e .), or by
+    copying from the originals otherwise.
+    """
+
+    # parse installment mode
+    if 'develop' in sys.argv:
+        # installed by `pip install -e .`
+        mode = 'symlink'
+    elif 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:
+        # installed by `pip install .`
+        # or create source distribution by `python setup.py sdist`
+        mode = 'copy'
+    else:
+        return
+
+    filenames = ['tools', 'configs', 'model-index.yml']
+    repo_path = osp.dirname(__file__)
+    mim_path = osp.join(repo_path, 'dethub', '.mim')
+    os.makedirs(mim_path, exist_ok=True)
+
+    for filename in filenames:
+        if osp.exists(filename):
+            src_path = osp.join(repo_path, filename)
+            tar_path = osp.join(mim_path, filename)
+
+            if osp.isfile(tar_path) or osp.islink(tar_path):
+                os.remove(tar_path)
+            elif osp.isdir(tar_path):
+                shutil.rmtree(tar_path)
+
+            if mode == 'symlink':
+                src_relpath = osp.relpath(src_path, osp.dirname(tar_path))
+                try:
+                    os.symlink(src_relpath, tar_path)
+                except OSError:
+                    # Creating a symbolic link on windows may raise an
+                    # `OSError: [WinError 1314]` due to privilege. If
+                    # the error happens, the src file will be copied
+                    mode = 'copy'
+                    warnings.warn(
+                        f'Failed to create a symbolic link for {src_relpath}, '
+                        f'and it will be copied to {tar_path}')
+                else:
+                    continue
+
+            if mode == 'copy':
+                if osp.isfile(src_path):
+                    shutil.copyfile(src_path, tar_path)
+                elif osp.isdir(src_path):
+                    shutil.copytree(src_path, tar_path)
+                else:
+                    warnings.warn(f'Cannot copy file {src_path}.')
+            else:
+                raise ValueError(f'Invalid mode {mode}')
+
+
+if __name__ == '__main__':
+    add_mim_extension()
+    setup(
+        name='dethub',
+        version=get_version(),
+        description='dethub',
+        long_description=readme(),
+        long_description_content_type='text/markdown',
+        keywords='computer vision, object detection',
+        packages=_find_packages(),
+        include_package_data=True,
+        url='',
+        author='dethub Contributors',
+        author_email='',
+        install_requires=parse_requirements('requirements/requirements.txt'),
+        ext_modules=[
+            CUDAExtension(
+                name='simota_cuda_ops',
+                sources=[
+                    'dethub/simota_cuda_ops/pybind.cpp',
+                    'dethub/simota_cuda_ops/check_prior_in_gt_kernel.cu',
+                    'dethub/simota_cuda_ops/check_prior_in_gt.cpp',
+                    'dethub/simota_cuda_ops/binary_cross_entropy_cost_kernel.cu',  # noqa
+                    'dethub/simota_cuda_ops/binary_cross_entropy_cost.cpp',
+                ],
+                extra_compile_args={'nvcc': ['-O3']})
+        ],
+        zip_safe=False,
+        cmdclass={'build_ext': BuildExtension})
diff --git a/tests/data/color.jpg b/tests/data/color.jpg
new file mode 100644
index 0000000..05d62b8
Binary files /dev/null and b/tests/data/color.jpg differ
diff --git a/tests/test_engine/test_hooks/test_ema_hook.py b/tests/test_engine/test_hooks/test_ema_hook.py
new file mode 100644
index 0000000..1657eab
--- /dev/null
+++ b/tests/test_engine/test_hooks/test_ema_hook.py
@@ -0,0 +1,300 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+from unittest import TestCase
+from unittest.mock import Mock
+
+import torch
+import torch.nn as nn
+from mmengine.evaluator import Evaluator
+from mmengine.model import BaseModel, ExponentialMovingAverage
+from mmengine.optim import OptimWrapper
+from mmengine.registry import DATASETS, MODEL_WRAPPERS
+from mmengine.runner import Runner
+from mmengine.testing import assert_allclose
+from torch.utils.data import Dataset
+
+from dethub.engine.hooks import EMAHook
+
+
+class ToyModel(nn.Module):
+
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(2, 1)
+
+    def forward(self, inputs, data_sample, mode='tensor'):
+        labels = torch.stack(data_sample)
+        inputs = torch.stack(inputs)
+        outputs = self.linear(inputs)
+        if mode == 'tensor':
+            return outputs
+        elif mode == 'loss':
+            loss = (labels - outputs).sum()
+            outputs = dict(loss=loss)
+            return outputs
+        else:
+            return outputs
+
+
+class ToyModel1(BaseModel, ToyModel):
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, *args, **kwargs):
+        return super(BaseModel, self).forward(*args, **kwargs)
+
+
+class ToyModel2(BaseModel, ToyModel):
+
+    def __init__(self):
+        super().__init__()
+        self.linear1 = nn.Linear(2, 1)
+
+    def forward(self, *args, **kwargs):
+        return super(BaseModel, self).forward(*args, **kwargs)
+
+
+class ToyModel3(BaseModel, ToyModel):
+
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(2, 2)
+
+    def forward(self, *args, **kwargs):
+        return super(BaseModel, self).forward(*args, **kwargs)
+
+
+@DATASETS.register_module()
+class DummyDataset(Dataset):
+    METAINFO = dict()  # type: ignore
+    data = torch.randn(12, 2)
+    label = torch.ones(12)
+
+    @property
+    def metainfo(self):
+        return self.METAINFO
+
+    def __len__(self):
+        return self.data.size(0)
+
+    def __getitem__(self, index):
+        return dict(inputs=self.data[index], data_sample=self.label[index])
+
+
+class TestEMAHook(TestCase):
+
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.temp_dir.cleanup()
+
+    def test_ema_hook(self):
+        device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+        model = ToyModel1().to(device)
+        evaluator = Evaluator([])
+        evaluator.evaluate = Mock(return_value=dict(acc=0.5))
+        runner = Runner(
+            model=model,
+            train_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            val_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=False),
+                batch_size=3,
+                num_workers=0),
+            val_evaluator=evaluator,
+            work_dir=self.temp_dir.name,
+            optim_wrapper=OptimWrapper(
+                torch.optim.Adam(ToyModel().parameters())),
+            train_cfg=dict(by_epoch=True, max_epochs=2, val_interval=1),
+            val_cfg=dict(),
+            default_hooks=dict(logger=None),
+            custom_hooks=[dict(type='EMAHook', )],
+            experiment_name='test1')
+        runner.train()
+        for hook in runner.hooks:
+            if isinstance(hook, EMAHook):
+                self.assertTrue(
+                    isinstance(hook.ema_model, ExponentialMovingAverage))
+
+        self.assertTrue(
+            osp.exists(osp.join(self.temp_dir.name, 'epoch_2.pth')))
+        checkpoint = torch.load(osp.join(self.temp_dir.name, 'epoch_2.pth'))
+        self.assertTrue('ema_state_dict' in checkpoint)
+        self.assertTrue(checkpoint['ema_state_dict']['steps'] == 8)
+
+        # load and testing
+        runner = Runner(
+            model=model,
+            test_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            test_evaluator=evaluator,
+            test_cfg=dict(),
+            work_dir=self.temp_dir.name,
+            load_from=osp.join(self.temp_dir.name, 'epoch_2.pth'),
+            default_hooks=dict(logger=None),
+            custom_hooks=[dict(type='EMAHook')],
+            experiment_name='test2')
+        runner.test()
+
+        @MODEL_WRAPPERS.register_module()
+        class DummyWrapper(BaseModel):
+
+            def __init__(self, model):
+                super().__init__()
+                self.module = model
+
+            def forward(self, *args, **kwargs):
+                return self.module(*args, **kwargs)
+
+        # with model wrapper
+        runner = Runner(
+            model=DummyWrapper(ToyModel()),
+            test_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            test_evaluator=evaluator,
+            test_cfg=dict(),
+            work_dir=self.temp_dir.name,
+            load_from=osp.join(self.temp_dir.name, 'epoch_2.pth'),
+            default_hooks=dict(logger=None),
+            custom_hooks=[dict(type='EMAHook')],
+            experiment_name='test3')
+        runner.test()
+
+        # Test load checkpoint without ema_state_dict
+        ckpt = torch.load(osp.join(self.temp_dir.name, 'epoch_2.pth'))
+        ckpt.pop('ema_state_dict')
+        torch.save(ckpt,
+                   osp.join(self.temp_dir.name, 'without_ema_state_dict.pth'))
+        runner = Runner(
+            model=DummyWrapper(ToyModel()),
+            test_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            test_evaluator=evaluator,
+            test_cfg=dict(),
+            work_dir=self.temp_dir.name,
+            load_from=osp.join(self.temp_dir.name,
+                               'without_ema_state_dict.pth'),
+            default_hooks=dict(logger=None),
+            custom_hooks=[dict(type='EMAHook')],
+            experiment_name='test4')
+        runner.test()
+
+        # Test does not load ckpt strict_loadly.
+        # Test load checkpoint without ema_state_dict
+        runner = Runner(
+            model=ToyModel2(),
+            test_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            test_evaluator=evaluator,
+            test_cfg=dict(),
+            work_dir=self.temp_dir.name,
+            load_from=osp.join(self.temp_dir.name, 'epoch_2.pth'),
+            default_hooks=dict(logger=None),
+            custom_hooks=[dict(type='EMAHook', strict_load=False)],
+            experiment_name='test5')
+        runner.test()
+
+        # Test does not load ckpt strict_loadly.
+        # Test load checkpoint without ema_state_dict
+        # Test with different size head.
+        runner = Runner(
+            model=ToyModel3(),
+            test_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            test_evaluator=evaluator,
+            test_cfg=dict(),
+            work_dir=self.temp_dir.name,
+            load_from=osp.join(self.temp_dir.name,
+                               'without_ema_state_dict.pth'),
+            default_hooks=dict(logger=None),
+            custom_hooks=[dict(type='EMAHook', strict_load=False)],
+            experiment_name='test5.1')
+        runner.test()
+
+        # Test enable ema at 5 epochs.
+        runner = Runner(
+            model=model,
+            train_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            val_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=False),
+                batch_size=3,
+                num_workers=0),
+            val_evaluator=evaluator,
+            work_dir=self.temp_dir.name,
+            optim_wrapper=OptimWrapper(
+                torch.optim.Adam(ToyModel().parameters())),
+            train_cfg=dict(by_epoch=True, max_epochs=10, val_interval=1),
+            val_cfg=dict(),
+            default_hooks=dict(logger=None),
+            custom_hooks=[dict(type='EMAHook', begin_epoch=5)],
+            experiment_name='test6')
+        runner.train()
+        state_dict = torch.load(
+            osp.join(self.temp_dir.name, 'epoch_4.pth'), map_location='cpu')
+        self.assertIn('ema_state_dict', state_dict)
+        for k, v in state_dict['state_dict'].items():
+            assert_allclose(v, state_dict['ema_state_dict']['module.' + k])
+        state_dict = torch.load(
+            osp.join(self.temp_dir.name, 'epoch_5.pth'), map_location='cpu')
+        self.assertIn('ema_state_dict', state_dict)
+
+        # Test enable ema at 5 iterations.
+        runner = Runner(
+            model=model,
+            train_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=True),
+                batch_size=3,
+                num_workers=0),
+            val_dataloader=dict(
+                dataset=dict(type='DummyDataset'),
+                sampler=dict(type='DefaultSampler', shuffle=False),
+                batch_size=3,
+                num_workers=0),
+            val_evaluator=evaluator,
+            work_dir=self.temp_dir.name,
+            optim_wrapper=OptimWrapper(
+                torch.optim.Adam(ToyModel().parameters())),
+            train_cfg=dict(by_epoch=False, max_iters=10, val_interval=1),
+            val_cfg=dict(),
+            default_hooks=dict(
+                checkpoint=dict(
+                    type='CheckpointHook', interval=1, by_epoch=False)),
+            custom_hooks=[dict(type='EMAHook', begin_iter=5)],
+            experiment_name='test7')
+        runner.train()
+        state_dict = torch.load(
+            osp.join(self.temp_dir.name, 'iter_4.pth'), map_location='cpu')
+        self.assertIn('ema_state_dict', state_dict)
+        for k, v in state_dict['state_dict'].items():
+            assert_allclose(v, state_dict['ema_state_dict']['module.' + k])
+        state_dict = torch.load(
+            osp.join(self.temp_dir.name, 'iter_5.pth'), map_location='cpu')
+        self.assertIn('ema_state_dict', state_dict)
diff --git a/tests/test_engine/test_hooks/test_visualization_hook.py b/tests/test_engine/test_hooks/test_visualization_hook.py
new file mode 100644
index 0000000..fc78732
--- /dev/null
+++ b/tests/test_engine/test_hooks/test_visualization_hook.py
@@ -0,0 +1,69 @@
+import os.path as osp
+import shutil
+import time
+from unittest import TestCase
+from unittest.mock import Mock
+
+import torch
+from mmengine.structures import InstanceData
+
+from dethub.engine.hooks import DetVisualizationHook
+from mmdet.structures import DetDataSample
+from mmdet.visualization import DetLocalVisualizer
+
+
+def _rand_bboxes(num_boxes, h, w):
+    cx, cy, bw, bh = torch.rand(num_boxes, 4).T
+
+    tl_x = ((cx * w) - (w * bw / 2)).clamp(0, w)
+    tl_y = ((cy * h) - (h * bh / 2)).clamp(0, h)
+    br_x = ((cx * w) + (w * bw / 2)).clamp(0, w)
+    br_y = ((cy * h) + (h * bh / 2)).clamp(0, h)
+
+    bboxes = torch.stack([tl_x, tl_y, br_x, br_y], dim=0).T
+    return bboxes
+
+
+class TestVisualizationHook(TestCase):
+
+    def setUp(self) -> None:
+        DetLocalVisualizer.get_instance('visualizer')
+
+        pred_instances = InstanceData()
+        pred_instances.bboxes = _rand_bboxes(5, 10, 12)
+        pred_instances.labels = torch.randint(0, 2, (5, ))
+        pred_instances.scores = torch.rand((5, ))
+        pred_det_data_sample = DetDataSample()
+        pred_det_data_sample.set_metainfo({
+            'img_path':
+            osp.join(osp.dirname(__file__), '../../data/color.jpg')
+        })
+        pred_det_data_sample.pred_instances = pred_instances
+        self.outputs = [pred_det_data_sample] * 2
+
+    def test_after_val_iter(self):
+        runner = Mock()
+        runner.iter = 1
+        hook = DetVisualizationHook()
+        hook.after_val_iter(runner, 1, {}, self.outputs)
+
+    def test_after_test_iter(self):
+        runner = Mock()
+        runner.iter = 1
+        hook = DetVisualizationHook(draw=True)
+        hook.after_test_iter(runner, 1, {}, self.outputs)
+        self.assertEqual(hook._test_index, 2)
+
+        # test
+        timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+        test_out_dir = timestamp + '1'
+        runner.work_dir = timestamp
+        runner.timestamp = '1'
+        hook = DetVisualizationHook(draw=False, test_out_dir=test_out_dir)
+        hook.after_test_iter(runner, 1, {}, self.outputs)
+        self.assertTrue(not osp.exists(f'{timestamp}/1/{test_out_dir}'))
+
+        hook = DetVisualizationHook(draw=True, test_out_dir=test_out_dir)
+        hook.after_test_iter(runner, 1, {}, self.outputs)
+        self.assertTrue(osp.exists(f'{timestamp}/1/{test_out_dir}'))
+        shutil.rmtree(f'{timestamp}')
diff --git a/tests/test_evaluation/test_coco_metric.py b/tests/test_evaluation/test_coco_metric.py
new file mode 100644
index 0000000..a9db93a
--- /dev/null
+++ b/tests/test_evaluation/test_coco_metric.py
@@ -0,0 +1,400 @@
+import os.path as osp
+import tempfile
+from unittest import TestCase
+
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+from mmengine.fileio import dump
+
+from dethub.evaluation.metrics import CocoFastMetric
+
+
+class TestCocoMetric(TestCase):
+
+    def _create_dummy_coco_json(self, json_name):
+        dummy_mask = np.zeros((10, 10), order='F', dtype=np.uint8)
+        dummy_mask[:5, :5] = 1
+        rle_mask = mask_util.encode(dummy_mask)
+        rle_mask['counts'] = rle_mask['counts'].decode('utf-8')
+        image = {
+            'id': 0,
+            'width': 640,
+            'height': 640,
+            'file_name': 'fake_name.jpg',
+        }
+
+        annotation_1 = {
+            'id': 1,
+            'image_id': 0,
+            'category_id': 0,
+            'area': 400,
+            'bbox': [50, 60, 20, 20],
+            'iscrowd': 0,
+            'segmentation': rle_mask,
+        }
+
+        annotation_2 = {
+            'id': 2,
+            'image_id': 0,
+            'category_id': 0,
+            'area': 900,
+            'bbox': [100, 120, 30, 30],
+            'iscrowd': 0,
+            'segmentation': rle_mask,
+        }
+
+        annotation_3 = {
+            'id': 3,
+            'image_id': 0,
+            'category_id': 1,
+            'area': 1600,
+            'bbox': [150, 160, 40, 40],
+            'iscrowd': 0,
+            'segmentation': rle_mask,
+        }
+
+        annotation_4 = {
+            'id': 4,
+            'image_id': 0,
+            'category_id': 0,
+            'area': 10000,
+            'bbox': [250, 260, 100, 100],
+            'iscrowd': 0,
+            'segmentation': rle_mask,
+        }
+
+        categories = [
+            {
+                'id': 0,
+                'name': 'car',
+                'supercategory': 'car',
+            },
+            {
+                'id': 1,
+                'name': 'bicycle',
+                'supercategory': 'bicycle',
+            },
+        ]
+
+        fake_json = {
+            'images': [image],
+            'annotations':
+            [annotation_1, annotation_2, annotation_3, annotation_4],
+            'categories': categories
+        }
+
+        dump(fake_json, json_name)
+
+    def _create_dummy_results(self):
+        bboxes = np.array([[50, 60, 70, 80], [100, 120, 130, 150],
+                           [150, 160, 190, 200], [250, 260, 350, 360]])
+        scores = np.array([1.0, 0.98, 0.96, 0.95])
+        labels = np.array([0, 0, 1, 0])
+        dummy_mask = np.zeros((4, 10, 10), dtype=np.uint8)
+        dummy_mask[:, :5, :5] = 1
+        return dict(
+            bboxes=torch.from_numpy(bboxes),
+            scores=torch.from_numpy(scores),
+            labels=torch.from_numpy(labels),
+            masks=torch.from_numpy(dummy_mask))
+
+    def setUp(self):
+        self.tmp_dir = tempfile.TemporaryDirectory()
+
+    def tearDown(self):
+        self.tmp_dir.cleanup()
+
+    def test_init(self):
+        fake_json_file = osp.join(self.tmp_dir.name, 'fake_data.json')
+        self._create_dummy_coco_json(fake_json_file)
+        with self.assertRaisesRegex(KeyError, 'metric should be one of'):
+            CocoFastMetric(ann_file=fake_json_file, metric='unknown')
+
+    def test_evaluate(self):
+        # create dummy data
+        fake_json_file = osp.join(self.tmp_dir.name, 'fake_data.json')
+        self._create_dummy_coco_json(fake_json_file)
+        dummy_pred = self._create_dummy_results()
+
+        # test single coco dataset evaluation
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file,
+            classwise=False,
+            outfile_prefix=f'{self.tmp_dir.name}/test')
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process(
+            {},
+            [dict(pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))])
+        eval_results = coco_metric.evaluate(size=1)
+        target = {
+            'coco/bbox_mAP': 1.0,
+            'coco/bbox_mAP_50': 1.0,
+            'coco/bbox_mAP_75': 1.0,
+            'coco/bbox_mAP_s': 1.0,
+            'coco/bbox_mAP_m': 1.0,
+            'coco/bbox_mAP_l': 1.0,
+        }
+        self.assertDictEqual(eval_results, target)
+        self.assertTrue(
+            osp.isfile(osp.join(self.tmp_dir.name, 'test.bbox.json')))
+
+        # test box and segm coco dataset evaluation
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file,
+            metric=['bbox', 'segm'],
+            classwise=False,
+            outfile_prefix=f'{self.tmp_dir.name}/test')
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process(
+            {},
+            [dict(pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))])
+        eval_results = coco_metric.evaluate(size=1)
+        target = {
+            'coco/bbox_mAP': 1.0,
+            'coco/bbox_mAP_50': 1.0,
+            'coco/bbox_mAP_75': 1.0,
+            'coco/bbox_mAP_s': 1.0,
+            'coco/bbox_mAP_m': 1.0,
+            'coco/bbox_mAP_l': 1.0,
+            'coco/segm_mAP': 1.0,
+            'coco/segm_mAP_50': 1.0,
+            'coco/segm_mAP_75': 1.0,
+            'coco/segm_mAP_s': 1.0,
+            'coco/segm_mAP_m': 1.0,
+            'coco/segm_mAP_l': 1.0,
+        }
+        self.assertDictEqual(eval_results, target)
+        self.assertTrue(
+            osp.isfile(osp.join(self.tmp_dir.name, 'test.bbox.json')))
+        self.assertTrue(
+            osp.isfile(osp.join(self.tmp_dir.name, 'test.segm.json')))
+
+        # test invalid custom metric_items
+        with self.assertRaisesRegex(KeyError,
+                                    'metric item "invalid" is not supported'):
+            coco_metric = CocoFastMetric(
+                ann_file=fake_json_file, metric_items=['invalid'])
+            coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+            coco_metric.process({}, [
+                dict(
+                    pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))
+            ])
+            coco_metric.evaluate(size=1)
+
+        # test custom metric_items
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file, metric_items=['mAP_m'])
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process(
+            {},
+            [dict(pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))])
+        eval_results = coco_metric.evaluate(size=1)
+        target = {
+            'coco/bbox_mAP_m': 1.0,
+        }
+        self.assertDictEqual(eval_results, target)
+
+    def test_classwise_evaluate(self):
+        # create dummy data
+        fake_json_file = osp.join(self.tmp_dir.name, 'fake_data.json')
+        self._create_dummy_coco_json(fake_json_file)
+        dummy_pred = self._create_dummy_results()
+
+        # test single coco dataset evaluation
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file, metric='bbox', classwise=True)
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process(
+            {},
+            [dict(pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))])
+        eval_results = coco_metric.evaluate(size=1)
+        target = {
+            'coco/bbox_mAP': 1.0,
+            'coco/bbox_mAP_50': 1.0,
+            'coco/bbox_mAP_75': 1.0,
+            'coco/bbox_mAP_s': 1.0,
+            'coco/bbox_mAP_m': 1.0,
+            'coco/bbox_mAP_l': 1.0,
+            'coco/car_precision': 1.0,
+            'coco/bicycle_precision': 1.0,
+        }
+        self.assertDictEqual(eval_results, target)
+
+    def test_manually_set_iou_thrs(self):
+        # create dummy data
+        fake_json_file = osp.join(self.tmp_dir.name, 'fake_data.json')
+        self._create_dummy_coco_json(fake_json_file)
+
+        # test single coco dataset evaluation
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file, metric='bbox', iou_thrs=[0.3, 0.6])
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        self.assertEqual(coco_metric.iou_thrs, [0.3, 0.6])
+
+    def test_fast_eval_recall(self):
+        # create dummy data
+        fake_json_file = osp.join(self.tmp_dir.name, 'fake_data.json')
+        self._create_dummy_coco_json(fake_json_file)
+        dummy_pred = self._create_dummy_results()
+
+        # test default proposal nums
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file, metric='proposal_fast')
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process(
+            {},
+            [dict(pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))])
+        eval_results = coco_metric.evaluate(size=1)
+        target = {'coco/AR@100': 1.0, 'coco/AR@300': 1.0, 'coco/AR@1000': 1.0}
+        self.assertDictEqual(eval_results, target)
+
+        # test manually set proposal nums
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file,
+            metric='proposal_fast',
+            proposal_nums=(2, 4))
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process(
+            {},
+            [dict(pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))])
+        eval_results = coco_metric.evaluate(size=1)
+        target = {'coco/AR@2': 0.5, 'coco/AR@4': 1.0}
+        self.assertDictEqual(eval_results, target)
+
+    def test_evaluate_proposal(self):
+        # create dummy data
+        fake_json_file = osp.join(self.tmp_dir.name, 'fake_data.json')
+        self._create_dummy_coco_json(fake_json_file)
+        dummy_pred = self._create_dummy_results()
+
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file, metric='proposal')
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process(
+            {},
+            [dict(pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))])
+        eval_results = coco_metric.evaluate(size=1)
+        print(eval_results)
+        target = {
+            'coco/AR@100': 1,
+            'coco/AR@300': 1.0,
+            'coco/AR@1000': 1.0,
+            'coco/AR_s@1000': 1.0,
+            'coco/AR_m@1000': 1.0,
+            'coco/AR_l@1000': 1.0
+        }
+        self.assertDictEqual(eval_results, target)
+
+    def test_empty_results(self):
+        # create dummy data
+        fake_json_file = osp.join(self.tmp_dir.name, 'fake_data.json')
+        self._create_dummy_coco_json(fake_json_file)
+        coco_metric = CocoFastMetric(ann_file=fake_json_file, metric='bbox')
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        bboxes = np.zeros((0, 4))
+        labels = np.array([])
+        scores = np.array([])
+        dummy_mask = np.zeros((0, 10, 10), dtype=np.uint8)
+        empty_pred = dict(
+            bboxes=torch.from_numpy(bboxes),
+            scores=torch.from_numpy(scores),
+            labels=torch.from_numpy(labels),
+            masks=torch.from_numpy(dummy_mask))
+        coco_metric.process(
+            {},
+            [dict(pred_instances=empty_pred, img_id=0, ori_shape=(640, 640))])
+        # coco api Index error will be caught
+        coco_metric.evaluate(size=1)
+
+    def test_evaluate_without_json(self):
+        dummy_pred = self._create_dummy_results()
+
+        dummy_mask = np.zeros((10, 10), order='F', dtype=np.uint8)
+        dummy_mask[:5, :5] = 1
+        rle_mask = mask_util.encode(dummy_mask)
+        rle_mask['counts'] = rle_mask['counts'].decode('utf-8')
+        instances = [{
+            'bbox_label': 0,
+            'bbox': [50, 60, 70, 80],
+            'ignore_flag': 0,
+            'mask': rle_mask,
+        }, {
+            'bbox_label': 0,
+            'bbox': [100, 120, 130, 150],
+            'ignore_flag': 0,
+            'mask': rle_mask,
+        }, {
+            'bbox_label': 1,
+            'bbox': [150, 160, 190, 200],
+            'ignore_flag': 0,
+            'mask': rle_mask,
+        }, {
+            'bbox_label': 0,
+            'bbox': [250, 260, 350, 360],
+            'ignore_flag': 0,
+            'mask': rle_mask,
+        }]
+        coco_metric = CocoFastMetric(
+            ann_file=None,
+            metric=['bbox', 'segm'],
+            classwise=False,
+            outfile_prefix=f'{self.tmp_dir.name}/test')
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process({}, [
+            dict(
+                pred_instances=dummy_pred,
+                img_id=0,
+                ori_shape=(640, 640),
+                instances=instances)
+        ])
+        eval_results = coco_metric.evaluate(size=1)
+        print(eval_results)
+        target = {
+            'coco/bbox_mAP': 1.0,
+            'coco/bbox_mAP_50': 1.0,
+            'coco/bbox_mAP_75': 1.0,
+            'coco/bbox_mAP_s': 1.0,
+            'coco/bbox_mAP_m': 1.0,
+            'coco/bbox_mAP_l': 1.0,
+            'coco/segm_mAP': 1.0,
+            'coco/segm_mAP_50': 1.0,
+            'coco/segm_mAP_75': 1.0,
+            'coco/segm_mAP_s': 1.0,
+            'coco/segm_mAP_m': 1.0,
+            'coco/segm_mAP_l': 1.0,
+        }
+        self.assertDictEqual(eval_results, target)
+        self.assertTrue(
+            osp.isfile(osp.join(self.tmp_dir.name, 'test.bbox.json')))
+        self.assertTrue(
+            osp.isfile(osp.join(self.tmp_dir.name, 'test.segm.json')))
+        self.assertTrue(
+            osp.isfile(osp.join(self.tmp_dir.name, 'test.gt.json')))
+
+    def test_format_only(self):
+        # create dummy data
+        fake_json_file = osp.join(self.tmp_dir.name, 'fake_data.json')
+        self._create_dummy_coco_json(fake_json_file)
+        dummy_pred = self._create_dummy_results()
+
+        with self.assertRaises(AssertionError):
+            CocoFastMetric(
+                ann_file=fake_json_file,
+                classwise=False,
+                format_only=True,
+                outfile_prefix=None)
+
+        coco_metric = CocoFastMetric(
+            ann_file=fake_json_file,
+            metric='bbox',
+            classwise=False,
+            format_only=True,
+            outfile_prefix=f'{self.tmp_dir.name}/test')
+        coco_metric.dataset_meta = dict(CLASSES=['car', 'bicycle'])
+        coco_metric.process(
+            {},
+            [dict(pred_instances=dummy_pred, img_id=0, ori_shape=(640, 640))])
+        eval_results = coco_metric.evaluate(size=1)
+        self.assertDictEqual(eval_results, dict())
+        self.assertTrue(osp.exists(f'{self.tmp_dir.name}/test.bbox.json'))
diff --git a/tests/test_models/test_detectors/test_single_stage.py b/tests/test_models/test_detectors/test_single_stage.py
new file mode 100644
index 0000000..aa15842
--- /dev/null
+++ b/tests/test_models/test_detectors/test_single_stage.py
@@ -0,0 +1,110 @@
+import time
+import unittest
+from unittest import TestCase
+
+import torch
+from mmengine.logging import MessageHub
+from parameterized import parameterized
+
+from dethub.testing import get_detector_cfg
+from mmdet.structures import DetDataSample
+from mmdet.testing import demo_mm_inputs
+from mmdet.utils import register_all_modules
+
+
+class TestSingleStageDetector(TestCase):
+
+    def setUp(self):
+        register_all_modules()
+
+    @parameterized.expand([
+        'projects/livecell/yolox/yolox_s_livecell.py',
+    ])
+    def test_init(self, cfg_file):
+        model = get_detector_cfg(cfg_file)
+        if 'backbone' in model:
+            model.backbone.init_cfg = None
+
+        from mmdet.models import build_detector
+        detector = build_detector(model)
+        self.assertTrue(detector.backbone)
+        self.assertTrue(detector.neck)
+        self.assertTrue(detector.bbox_head)
+
+    @parameterized.expand([('projects/livecell/yolox/yolox_s_livecell.py',
+                            ('cuda', ))])
+    def test_single_stage_forward_loss_mode(self, cfg_file, devices):
+        message_hub = MessageHub.get_instance(
+            f'test_single_stage_forward_loss_mode-{time.time()}')
+        message_hub.update_info('iter', 0)
+        message_hub.update_info('epoch', 0)
+        model = get_detector_cfg(cfg_file)
+        if 'backbone' in model:
+            model.backbone.init_cfg = None
+
+        from mmdet.models import build_detector
+        assert all([device in ['cpu', 'cuda'] for device in devices])
+
+        for device in devices:
+            detector = build_detector(model)
+            detector.init_weights()
+
+            if device == 'cuda':
+                if not torch.cuda.is_available():
+                    return unittest.skip('test requires GPU and torch+cuda')
+                detector = detector.cuda()
+
+            packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
+            data = detector.data_preprocessor(packed_inputs, True)
+            losses = detector.forward(**data, mode='loss')
+            self.assertIsInstance(losses, dict)
+
+    @parameterized.expand([('projects/livecell/yolox/yolox_s_livecell.py',
+                            ('cuda', ))])
+    def test_single_stage_forward_predict_mode(self, cfg_file, devices):
+        model = get_detector_cfg(cfg_file)
+        if 'backbone' in model:
+            model.backbone.init_cfg = None
+
+        from mmdet.models import build_detector
+        assert all([device in ['cpu', 'cuda'] for device in devices])
+
+        for device in devices:
+            detector = build_detector(model)
+
+            if device == 'cuda':
+                if not torch.cuda.is_available():
+                    return unittest.skip('test requires GPU and torch+cuda')
+                detector = detector.cuda()
+
+            packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
+            data = detector.data_preprocessor(packed_inputs, False)
+            # Test forward test
+            detector.eval()
+            with torch.no_grad():
+                batch_results = detector.forward(**data, mode='predict')
+                self.assertEqual(len(batch_results), 2)
+                self.assertIsInstance(batch_results[0], DetDataSample)
+
+    @parameterized.expand([('projects/livecell/yolox/yolox_s_livecell.py',
+                            ('cuda', ))])
+    def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
+        model = get_detector_cfg(cfg_file)
+        if 'backbone' in model:
+            model.backbone.init_cfg = None
+
+        from mmdet.models import build_detector
+        assert all([device in ['cpu', 'cuda'] for device in devices])
+
+        for device in devices:
+            detector = build_detector(model)
+
+            if device == 'cuda':
+                if not torch.cuda.is_available():
+                    return unittest.skip('test requires GPU and torch+cuda')
+                detector = detector.cuda()
+
+            packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
+            data = detector.data_preprocessor(packed_inputs, False)
+            batch_results = detector.forward(**data, mode='tensor')
+            self.assertIsInstance(batch_results, tuple)
diff --git a/tests/test_models/test_task_modules/test_assigners/test_simota_assigner.py b/tests/test_models/test_task_modules/test_assigners/test_simota_assigner.py
new file mode 100644
index 0000000..88aec90
--- /dev/null
+++ b/tests/test_models/test_task_modules/test_assigners/test_simota_assigner.py
@@ -0,0 +1,65 @@
+from unittest import TestCase
+
+import torch
+from mmengine.structures import InstanceData
+from mmengine.testing import assert_allclose
+
+from dethub.models.task_modules.assigners import SimOTAAssigner
+
+
+class TestSimOTAAssigner(TestCase):
+
+    def test_assign(self):
+        assigner = SimOTAAssigner(
+            center_radius=2.5,
+            candidate_topk=1,
+            iou_weight=3.0,
+            cls_weight=1.0)
+        pred_instances = InstanceData(
+            bboxes=torch.Tensor([[23, 23, 43, 43], [4, 5, 6, 7]]),
+            scores=torch.FloatTensor([[0.2], [0.8]]),
+            priors=torch.Tensor([[30, 30, 8, 8], [4, 5, 6, 7]]))
+        gt_instances = InstanceData(
+            bboxes=torch.Tensor([[23, 23, 43, 43]]),
+            labels=torch.LongTensor([0]))
+        assign_result = assigner.assign(
+            pred_instances=pred_instances, gt_instances=gt_instances)
+
+        expected_gt_inds = torch.LongTensor([1, 0])
+        assert_allclose(assign_result.gt_inds, expected_gt_inds)
+
+    def test_assign_with_no_valid_bboxes(self):
+        assigner = SimOTAAssigner(
+            center_radius=2.5,
+            candidate_topk=1,
+            iou_weight=3.0,
+            cls_weight=1.0)
+        pred_instances = InstanceData(
+            bboxes=torch.Tensor([[123, 123, 143, 143], [114, 151, 161, 171]]),
+            scores=torch.FloatTensor([[0.2], [0.8]]),
+            priors=torch.Tensor([[30, 30, 8, 8], [55, 55, 8, 8]]))
+        gt_instances = InstanceData(
+            bboxes=torch.Tensor([[0, 0, 1, 1]]), labels=torch.LongTensor([0]))
+        assign_result = assigner.assign(
+            pred_instances=pred_instances, gt_instances=gt_instances)
+
+        expected_gt_inds = torch.LongTensor([0, 0])
+        assert_allclose(assign_result.gt_inds, expected_gt_inds)
+
+    def test_assign_with_empty_gt(self):
+        assigner = SimOTAAssigner(
+            center_radius=2.5,
+            candidate_topk=1,
+            iou_weight=3.0,
+            cls_weight=1.0)
+        pred_instances = InstanceData(
+            bboxes=torch.Tensor([[[30, 40, 50, 60]], [[4, 5, 6, 7]]]),
+            scores=torch.FloatTensor([[0.2], [0.8]]),
+            priors=torch.Tensor([[0, 12, 23, 34], [4, 5, 6, 7]]))
+        gt_instances = InstanceData(
+            bboxes=torch.empty(0, 4), labels=torch.empty(0))
+
+        assign_result = assigner.assign(
+            pred_instances=pred_instances, gt_instances=gt_instances)
+        expected_gt_inds = torch.LongTensor([0, 0])
+        assert_allclose(assign_result.gt_inds, expected_gt_inds)
diff --git a/tools/dataset_converters/prepare_gbr_cots.py b/tools/dataset_converters/prepare_gbr_cots.py
new file mode 100644
index 0000000..cf335cb
--- /dev/null
+++ b/tools/dataset_converters/prepare_gbr_cots.py
@@ -0,0 +1,90 @@
+import cv2
+import mmcv
+import pandas as pd
+from sklearn.model_selection import StratifiedGroupKFold
+from tqdm import tqdm
+
+CATEGORIES = ['gbr']
+CAT2IDX = {cat: idx for idx, cat in enumerate(CATEGORIES)}
+ID_COL = 'image_id'
+
+
+def init_coco():
+    return {
+        'info': {},
+        'categories': [{
+            'id': idx,
+            'name': cat,
+        } for cat, idx in CAT2IDX.items()]
+    }
+
+
+def df2coco(df):
+    img_infos = []
+    ann_infos = []
+    img_id = 0
+    ann_id = 0
+    for _, row in tqdm(df.iterrows()):
+        img_path = f'video_{str(row["video_id"])}/' \
+                   f'{str(row["video_frame"])}.jpg'
+        img = cv2.imread(f'data/train_images/{img_path}')
+        img_info = dict(
+            id=img_id,
+            width=img.shape[1],
+            height=img.shape[0],
+            file_name=img_path,
+        )
+        if len(row['annotations']) != 0:
+            for ann in row['annotations']:
+                b_width = ann['width']
+                b_height = ann['height']
+
+                # some boxes in COTS are outside the image height and width
+                if (ann['x'] + b_width > 1280):
+                    b_width = 1280 - ann['x']
+                if (ann['y'] + b_height > 720):
+                    b_height = 720 - ann['y']
+
+                ann_info = dict(
+                    id=ann_id,
+                    image_id=img_id,
+                    category_id=0,
+                    iscrowd=0,
+                    area=ann['width'] * ann['height'],
+                    bbox=[ann['x'], ann['y'], b_width, b_height],
+                    segmentation=[])
+                ann_infos.append(ann_info)
+                ann_id += 1
+        img_infos.append(img_info)
+        img_id += 1
+
+    coco = init_coco()
+    coco['images'] = img_infos
+    coco['annotations'] = ann_infos
+    return coco
+
+
+def main():
+    df = pd.read_csv('data/train.csv')
+    df['annotations'] = df['annotations'].apply(eval)
+    df['len_ann'] = df['annotations'].map(lambda x: len(x))
+    df['has_label'] = (df['len_ann'] > 0) * 1
+
+    df = df.sample(frac=1, random_state=0).reset_index(drop=True)
+    kf = StratifiedGroupKFold(n_splits=5)
+    splits = list(
+        kf.split(df['has_label'], df['has_label'], df['sequence'].values))
+
+    for fold, (train_inds, val_inds) in enumerate(splits):
+        train_df = df.iloc[train_inds]
+        train_coco = df2coco(train_df)
+        mmcv.dump(train_coco, f'dtrain_g{fold}.json')
+
+        val_df = df.iloc[val_inds]
+        val_coco = df2coco(val_df)
+        mmcv.dump(val_coco, f'dval_g{fold}.json')
+        break
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/prepare_livecell.py b/tools/dataset_converters/prepare_livecell.py
new file mode 100644
index 0000000..6502642
--- /dev/null
+++ b/tools/dataset_converters/prepare_livecell.py
@@ -0,0 +1,63 @@
+import copy
+
+import mmengine
+from pycocotools.coco import COCO
+from tqdm import tqdm
+
+CATEGORIES = ('shsy5y', 'a172', 'bt474', 'bv2', 'huh7', 'mcf7', 'skov3',
+              'skbr3')
+CAT2IDX = {cat: idx for idx, cat in enumerate(CATEGORIES)}
+
+
+def init_coco():
+    return {
+        'info': {},
+        'categories': [{
+            'id': idx,
+            'name': cat,
+        } for cat, idx in CAT2IDX.items()]
+    }
+
+
+def to_multiclass(ann_file):
+    img_infos = []
+    ann_infos = []
+    img_id = 0
+    ann_id = 0
+    coco = COCO(ann_file)
+    for _id in tqdm(coco.getImgIds()):
+        img_info = copy.deepcopy(coco.loadImgs(_id)[0])
+        del img_info['original_filename']
+        del img_info['url']
+        img_info['id'] = img_id
+        filename = img_info['file_name']
+        subdir = filename.split('_')[0]
+        img_info['file_name'] = filename
+        cat = subdir.lower()
+
+        img_infos.append(img_info)
+        for ann_info in coco.loadAnns(coco.getAnnIds(_id)):
+            ann_info = copy.deepcopy(ann_info)
+            ann_info['image_id'] = img_id
+            ann_info['id'] = ann_id
+            ann_info['category_id'] = CAT2IDX[cat]
+            ann_infos.append(ann_info)
+            ann_id += 1
+        img_id += 1
+
+    coco = init_coco()
+    coco['images'] = img_infos
+    coco['annotations'] = ann_infos
+    return coco
+
+
+if __name__ == '__main__':
+    mmengine.dump(
+        to_multiclass('data/livecell_coco_train.json'),
+        'data/livecell_coco_train_8class.json')
+    mmengine.dump(
+        to_multiclass('data/livecell_coco_val.json'),
+        'data/livecell_coco_val_8class.json')
+    mmengine.dump(
+        to_multiclass('data/livecell_coco_test.json'),
+        'data/livecell_coco_test_8class.json')
diff --git a/tools/dataset_converters/prepare_sartorius_cellseg.py b/tools/dataset_converters/prepare_sartorius_cellseg.py
new file mode 100644
index 0000000..3cb505a
--- /dev/null
+++ b/tools/dataset_converters/prepare_sartorius_cellseg.py
@@ -0,0 +1,99 @@
+import mmcv
+import numpy as np
+import pandas as pd
+import pycocotools.mask as mask_utils
+from sklearn.model_selection import StratifiedKFold
+from tqdm import tqdm
+
+CATEGORIES = ('shsy5y', 'astro', 'cort')
+CAT2IDX = {cat: idx for idx, cat in enumerate(CATEGORIES)}
+IMG_HEIGHT = 520
+IMG_WIDTH = 704
+
+
+def init_coco():
+    return {
+        'info': {},
+        'categories': [{
+            'id': idx,
+            'name': cat,
+        } for cat, idx in CAT2IDX.items()]
+    }
+
+
+def krle2mask(rle, height, width):
+    s = rle.split()
+    starts, lengths = [
+        np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])
+    ]
+    starts -= 1
+    ends = starts + lengths
+    img = np.zeros(height * width, dtype=np.uint8)
+    for lo, hi in zip(starts, ends):
+        img[lo:hi] = 1
+    return img.reshape(height, width)  # Needed to align to RLE direction
+
+
+def df2coco(df):
+    img_infos = []
+    ann_infos = []
+    img_id = 0
+    ann_id = 0
+    for img_name, img_df in tqdm(df.groupby('id'), total=df['id'].nunique()):
+        img_info = dict(
+            id=img_id,
+            width=IMG_WIDTH,
+            height=IMG_HEIGHT,
+            file_name=f'{img_name}.png',
+        )
+        for kaggle_rle, cell_type in zip(img_df['annotation'],
+                                         img_df['cell_type']):
+            mask = krle2mask(kaggle_rle, IMG_HEIGHT, IMG_WIDTH)
+            mask = np.asfortranarray(mask)
+            rle = mask_utils.encode(mask)
+            rle['counts'] = rle['counts'].decode()
+            bbox = mask_utils.toBbox(rle).tolist()
+            ann_info = dict(
+                id=ann_id,
+                image_id=img_id,
+                category_id=CAT2IDX[cell_type],
+                iscrowd=0,
+                segmentation=rle,
+                area=bbox[2] * bbox[3],
+                bbox=bbox,
+            )
+            ann_infos.append(ann_info)
+            ann_id += 1
+        img_infos.append(img_info)
+        img_id += 1
+
+    coco = init_coco()
+    coco['images'] = img_infos
+    coco['annotations'] = ann_infos
+    return coco
+
+
+def main():
+    df = pd.read_csv('data/train.csv')
+
+    all_samples = np.array(sorted(set(df['sample_id'])))
+    sample2celltype = dict(zip(df['sample_id'], df['cell_type']))
+    cell_types = [sample2celltype[_] for _ in all_samples]
+    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
+    splits = list(skf.split(all_samples, cell_types))
+    for fold, (train_inds, val_inds) in enumerate(splits):
+        train_samples = all_samples[train_inds]
+        train_df = df[df['sample_id'].isin(train_samples)]
+        train_coco = df2coco(train_df)
+        mmcv.dump(train_coco, 'data/dtrain.json')
+
+        val_samples = all_samples[val_inds]
+        val_df = df[df['sample_id'].isin(val_samples)]
+        val_coco = df2coco(val_df)
+        mmcv.dump(val_coco, 'data/dval.json')
+
+        break
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/image_demo.py b/tools/image_demo.py
new file mode 100644
index 0000000..dd1470a
--- /dev/null
+++ b/tools/image_demo.py
@@ -0,0 +1,98 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import asyncio
+from argparse import ArgumentParser
+
+import mmcv
+
+from mmdet.apis import (async_inference_detector, inference_detector,
+                        init_detector)
+from mmdet.registry import VISUALIZERS
+from mmdet.utils import register_all_modules
+
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument('img', help='Image file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument('--out-file', default=None, help='Path to output file')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    parser.add_argument(
+        '--palette',
+        default='coco',
+        choices=['coco', 'voc', 'citys', 'random'],
+        help='Color palette used for visualization')
+    parser.add_argument(
+        '--score-thr', type=float, default=0.3, help='bbox score threshold')
+    parser.add_argument(
+        '--async-test',
+        action='store_true',
+        help='whether to set async options for async inference.')
+    args = parser.parse_args()
+    return args
+
+
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()
+
+    # TODO: Support inference of image directory.
+    # build the model from a config file and a checkpoint file
+    model = init_detector(
+        args.config, args.checkpoint, palette=args.palette, device=args.device)
+
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    # the dataset_meta is loaded from the checkpoint and
+    # then pass to the model in init_detector
+    visualizer.dataset_meta = model.dataset_meta
+
+    # test a single image
+    result = inference_detector(model, args.img)
+
+    # show the results
+    img = mmcv.imread(args.img)
+    img = mmcv.imconvert(img, 'bgr', 'rgb')
+    visualizer.add_datasample(
+        'result',
+        img,
+        data_sample=result,
+        draw_gt=False,
+        show=args.out_file is None,
+        wait_time=0,
+        out_file=args.out_file,
+        pred_score_thr=args.score_thr)
+
+
+async def async_main(args):
+    # build the model from a config file and a checkpoint file
+    model = init_detector(args.config, args.checkpoint, device=args.device)
+
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = model.dataset_meta
+
+    # test a single image
+    tasks = asyncio.create_task(async_inference_detector(model, args.img))
+    result = await asyncio.gather(tasks)
+    # show the results
+    img = mmcv.imread(args.img)
+    img = mmcv.imconvert(img, 'bgr', 'rgb')
+    visualizer.add_datasample(
+        'result',
+        img,
+        pred_sample=result[0],
+        show=args.out_file is None,
+        wait_time=0,
+        out_file=args.out_file,
+        pred_score_thr=args.score_thr)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    assert not args.async_test, 'async inference is not supported yet.'
+    if args.async_test:
+        asyncio.run(async_main(args))
+    else:
+        main(args)