CLIPResNet 训练中,突然上升nan。 #1918
zhaoguoqing12
started this conversation in
General
Replies: 3 comments 1 reply
-
We recommend using English or English & Chinese for issues so that we could have broader discussion. |
Beta Was this translation helpful? Give feedback.
0 replies
-
In practice, CLIPResNet is very sensitive to the learning rate. Apart from a small learning rate, a generally more conservative learning rate warmup/decay strategy is worth trying. |
Beta Was this translation helpful? Give feedback.
1 reply
-
你好,请问你解决这个问题了吗,最终可以达到mmocr中的hmean分数了吗? |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Prerequisite
Task
I have modified the scripts/configs, or I'm working on my own tasks/models/datasets.
Branch
main branch https://github.com/open-mmlab/mmocr
Environment
无
Reproduces the problem - code sample
model = dict(
type='DBNet',
backbone=dict(
type='CLIPResNet',
init_cfg=dict(
type='Pretrained',
checkpoint=
'https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth'
)),
neck=dict(
type='FPNC',
in_channels=[256, 512, 1024, 2048],
lateral_channels=256,
asf_cfg=dict(attention_type='ScaleChannelSpatial')),
det_head=dict(
type='DBHead',
in_channels=256,
module_loss=dict(type='DBModuleLoss'),
postprocessor=dict(
type='DBPostprocessor', text_repr_type='quad',
epsilon_ratio=0.002)),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_bbox=True,
with_polygon=True,
with_label=True),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=0.12549019607843137,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5], {
'cls': 'Affine',
'rotate': [-10, 10]
}, ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
]
default_scope = 'mmocr'
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'))
randomness = dict(seed=None)
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=5),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', interval=10),
sampler_seed=dict(type='DistSamplerSeedHook'),
sync_buffer=dict(type='SyncBuffersHook'),
visualization=dict(
type='VisualizationHook',
interval=1,
enable=False,
show=False,
draw_gt=False,
draw_pred=False))
log_level = 'INFO'
log_processor = dict(type='LogProcessor', window_size=10, by_epoch=True)
load_from = './resnet50-oclip-7ba0c533.pth'
resume = False
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = dict(type='HmeanIOUMetric')
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='TextDetLocalVisualizer',
name='visualizer',
vis_backends=[dict(type='LocalVisBackend')])
icdar2015_textdet_data_root = '/data/guoqing/hand_writting_data/'
icdar2015_textdet_train = dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_train.json',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=None)
icdar2015_textdet_test = dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_test.json',
test_mode=True,
pipeline=None)
optim_wrapper = dict(
type='OptimWrapper', optimizer=dict(type='Adam', lr=0.0001))
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=10)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
train_list = [
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_train.json',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=None)
]
test_list = [
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_test.json',
test_mode=True,
pipeline=None)
]
train_dataloader = dict(
batch_size=16,
num_workers=16,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_train.json',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=None)
],
pipeline=[
dict(
type='LoadImageFromFile',
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_bbox=True,
with_polygon=True,
with_label=True),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=0.12549019607843137,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5], {
'cls': 'Affine',
'rotate': [-10, 10]
}, ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]))
val_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_test.json',
test_mode=True,
pipeline=None)
],
pipeline=[
dict(
type='LoadImageFromFile',
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape',
'scale_factor', 'instances'))
]))
test_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_test.json',
test_mode=True,
pipeline=None)
],
pipeline=[
dict(
type='LoadImageFromFile',
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape',
'scale_factor', 'instances'))
]))
auto_scale_lr = dict(base_batch_size=16)
param_scheduler = [
dict(type='LinearLR', end=100, start_factor=0.001),
dict(type='PolyLR', power=0.9, eta_min=1e-07, begin=100, end=500)
]
launcher = 'pytorch'
work_dir = './work_dirs/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015'
Reproduces the problem - command or script
CUDA_VISIBLE_DEVICES=1,2 PORT=29500 nohup ./tools/dist_train.sh configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py 2 &
Reproduces the problem - error message
已降低学习率,依然无法收敛,loss nan。
Additional information
自己数据集,已验证不是数据集问题,学习率多次降低调整
Beta Was this translation helpful? Give feedback.
All reactions