diff --git a/configs/_base_/datasets/crowdhuman/crowdhuman_dino.py b/configs/_base_/datasets/crowdhuman/crowdhuman_dino.py new file mode 100644 index 0000000..866ed22 --- /dev/null +++ b/configs/_base_/datasets/crowdhuman/crowdhuman_dino.py @@ -0,0 +1,94 @@ +# dataset settings +dataset_type = 'CrowdHumanDataset' +data_root = 'data/CrowdHuman/' + +file_client_args = dict(backend='disk') + +long_size = 1400 + +train_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='RandomFlip', prob=0.5), + dict( + type='RandomChoice', + transforms=[ + [ + dict( + type='RandomChoiceResize', + scales=[ + (480, long_size), (512, long_size), (544, long_size), + (576, long_size), (608, long_size), (640, long_size), + (672, long_size), (704, long_size), (736, long_size), + (768, long_size), (800, long_size) + ], + keep_ratio=True) + ], + [ + dict( + type='RandomChoiceResize', + # The radio of all image in train dataset < 7 + # follow the original implement + scales=[(400, 5000), (500, 5000), (600, 5000)], + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 720), + allow_negative_crop=True), + dict( + type='RandomChoiceResize', + scales=[ + (480, long_size), (512, long_size), (544, long_size), + (576, long_size), (608, long_size), (640, long_size), + (672, long_size), (704, long_size), (736, long_size), + (768, long_size), (800, long_size) + ], + keep_ratio=True) + ] + ]), + dict(type='PackDetInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='Resize', scale=(long_size, 800), keep_ratio=True), + # If you don't have a gt annotation, delete the pipeline + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PackDetInputs', + meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', + 'scale_factor')) +] +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + batch_sampler=dict(type='AspectRatioBatchSampler'), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotation_train.odgt', + data_prefix=dict(img='Images/'), + filter_cfg=dict(filter_empty_gt=True, min_size=32), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=1, + num_workers=2, + persistent_workers=True, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='annotation_val.odgt', + data_prefix=dict(img='Images/'), + test_mode=True, + pipeline=test_pipeline)) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='CrowdHumanMetric', + ann_file=data_root + 'annotation_val.odgt', + metric=['AP', 'MR', 'JI']) +test_evaluator = val_evaluator diff --git a/configs/projects/crowdhuman/dino/README.md b/configs/projects/crowdhuman/dino/README.md new file mode 100644 index 0000000..99649ac --- /dev/null +++ b/configs/projects/crowdhuman/dino/README.md @@ -0,0 +1,12 @@ +# DINO + +[model page](https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/dino/README.md) + +## Results and Models + +#### Box Results + +| Backbone | box AP | Config | Download | +| :---------------------------------: | :----: | :----------------------------------------------: | :-------------------------------------------------------------------------------------------------------------: | +| dino-dino-4scale_r50_crowdhuman | 92.9 | [config](dino-4scale_r50_crowdhuman.py) | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.1.1dino-crowdhuman/dino-4scale_r50_crowdhuman-ebfc9dc7.pth) | +| dino-4scale_r50-pre-lvis_crowdhuman | 92.2 | [config](dino-4scale_r50-pre-lvis_crowdhuman.py) | [model](https://github.com/okotaku/dethub-weights/releases/download/v0.1.1dino-crowdhuman/dino-4scale_r50-pre-lvis_crowdhuman-d54735f1.pth) | diff --git a/configs/projects/crowdhuman/dino/dino-4scale_r50-pre-lvis_crowdhuman.py b/configs/projects/crowdhuman/dino/dino-4scale_r50-pre-lvis_crowdhuman.py new file mode 100644 index 0000000..6541053 --- /dev/null +++ b/configs/projects/crowdhuman/dino/dino-4scale_r50-pre-lvis_crowdhuman.py @@ -0,0 +1,14 @@ +_base_ = [ + 'mmdet::_base_/default_runtime.py', + '../../../_base_/models/dino/dino-4scale_r50.py', + '../../../_base_/datasets/crowdhuman/crowdhuman_dino.py', + '../../../_base_/schedules/dino/dino_12e.py' +] +custom_imports = dict(imports=['dethub'], allow_failed_imports=False) +fp16 = dict(loss_scale=512.) + +# model settings +num_classes = 1 +model = dict(bbox_head=dict(num_classes=num_classes)) + +load_from = 'https://github.com/okotaku/dethub-weights/releases/download/v0.1.1dino/dino-4scale_r50_lvis-ea80fe74.pth' # noqa diff --git a/configs/projects/crowdhuman/dino/dino-4scale_r50_crowdhuman.py b/configs/projects/crowdhuman/dino/dino-4scale_r50_crowdhuman.py new file mode 100644 index 0000000..570e92f --- /dev/null +++ b/configs/projects/crowdhuman/dino/dino-4scale_r50_crowdhuman.py @@ -0,0 +1,14 @@ +_base_ = [ + 'mmdet::_base_/default_runtime.py', + '../../../_base_/models/dino/dino-4scale_r50.py', + '../../../_base_/datasets/crowdhuman/crowdhuman_dino.py', + '../../../_base_/schedules/dino/dino_12e.py' +] +custom_imports = dict(imports=['dethub'], allow_failed_imports=False) +fp16 = dict(loss_scale=512.) + +# model settings +num_classes = 1 +model = dict(bbox_head=dict(num_classes=num_classes)) + +load_from = 'https://download.openmmlab.com/mmdetection/v3.0/dino/dino-4scale_r50_8xb2-12e_coco/dino-4scale_r50_8xb2-12e_coco_20221202_182705-55b2bba2.pth' # noqa diff --git a/configs/projects/crowdhuman/dino/metafile.yml b/configs/projects/crowdhuman/dino/metafile.yml new file mode 100644 index 0000000..4bcf0df --- /dev/null +++ b/configs/projects/crowdhuman/dino/metafile.yml @@ -0,0 +1,22 @@ +Collections: + - Name: DINO + +Models: + - Name: dino-4scale_r50_crowdhuman + In Collection: DINO + Config: configs/projects/crowdhuman/dino/dino-4scale_r50_crowdhuman.py + Results: + - Task: Object Detection + Dataset: lvis + Metrics: + box AP: 92.9 + Weights: https://github.com/okotaku/dethub-weights/releases/download/v0.1.1dino-crowdhuman/dino-4scale_r50_crowdhuman-ebfc9dc7.pth + - Name: dino-4scale_r50-pre-lvis_crowdhuman + In Collection: DINO + Config: configs/projects/crowdhuman/dino/dino-4scale_r50-pre-lvis_crowdhuman.py + Results: + - Task: Object Detection + Dataset: lvis + Metrics: + box AP: 92.2 + Weights: https://github.com/okotaku/dethub-weights/releases/download/v0.1.1dino-crowdhuman/dino-4scale_r50-pre-lvis_crowdhuman-d54735f1.pth diff --git a/model-index.yml b/model-index.yml index 680c437..6aac1b8 100644 --- a/model-index.yml +++ b/model-index.yml @@ -5,6 +5,7 @@ Import: - configs/projects/coco/yolox/metafile.yml - configs/projects/coco/rpn/metafile.yml - configs/projects/coco1class/yolox/metafile.yml + - configs/projects/crowdhuman/dino/metafile.yml - configs/projects/crowdhuman/yolox/metafile.yml - configs/projects/findfallenpeople/yolox/metafile.yml - configs/projects/gbr_cots/yolox/metafile.yml