From a97891471f011002dd26f571e2fd6098af69e88f Mon Sep 17 00:00:00 2001 From: vasilenk Date: Tue, 5 Jul 2022 11:17:01 -0700 Subject: [PATCH] S-2022.06-RC1 --- ...ileNetSSD_deploy-1066x300_updated.prototxt | 1819 +++++++++++++ .../mtcnn_v1/caffe_model/convert-mtcnn.py | 10 +- .../mtcnn-transp-pnet_optimized.caffemodel | 3 + .../mtcnn-transp-pnet_optimized.prototxt | 796 ++++++ .../ResNet-50-deploy-1280x720.prototxt | 2324 +++++++++++++++++ 5 files changed, 4950 insertions(+), 2 deletions(-) create mode 100644 caffe_models/mobilenet_ssd/caffe_model/MobileNetSSD_deploy-1066x300_updated.prototxt create mode 100644 caffe_models/mtcnn_v1/caffe_model/mtcnn-transp-pnet_optimized.caffemodel create mode 100644 caffe_models/mtcnn_v1/caffe_model/mtcnn-transp-pnet_optimized.prototxt create mode 100644 caffe_models/resnet_50/caffe_model/ResNet-50-deploy-1280x720.prototxt diff --git a/caffe_models/mobilenet_ssd/caffe_model/MobileNetSSD_deploy-1066x300_updated.prototxt b/caffe_models/mobilenet_ssd/caffe_model/MobileNetSSD_deploy-1066x300_updated.prototxt new file mode 100644 index 0000000..d849fa7 --- /dev/null +++ b/caffe_models/mobilenet_ssd/caffe_model/MobileNetSSD_deploy-1066x300_updated.prototxt @@ -0,0 +1,1819 @@ +name: "MobileNet-SSD" +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 300 + dim: 1066 +} +layer { + name: "conv0" + type: "Convolution" + bottom: "data" + top: "conv0" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 32 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv0/relu" + type: "ReLU" + bottom: "conv0" + top: "conv0" +} +layer { + name: "conv1/dw" + type: "Convolution" + bottom: "conv0" + top: "conv1/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 32 + pad: 1 + kernel_size: 3 + group: 32 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv1/dw/relu" + type: "ReLU" + bottom: "conv1/dw" + top: "conv1/dw" +} +layer { + name: "conv1" + type: "Convolution" + bottom: "conv1/dw" + top: "conv1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv1/relu" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "conv2/dw" + type: "Convolution" + bottom: "conv1" + top: "conv2/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + group: 64 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv2/dw/relu" + type: "ReLU" + bottom: "conv2/dw" + top: "conv2/dw" +} +layer { + name: "conv2" + type: "Convolution" + bottom: "conv2/dw" + top: "conv2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv2/relu" + type: "ReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "conv3/dw" + type: "Convolution" + bottom: "conv2" + top: "conv3/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + group: 128 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv3/dw/relu" + type: "ReLU" + bottom: "conv3/dw" + top: "conv3/dw" +} +layer { + name: "conv3" + type: "Convolution" + bottom: "conv3/dw" + top: "conv3" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv3/relu" + type: "ReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv4/dw" + type: "Convolution" + bottom: "conv3" + top: "conv4/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + group: 128 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv4/dw/relu" + type: "ReLU" + bottom: "conv4/dw" + top: "conv4/dw" +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv4/dw" + top: "conv4" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 256 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv4/relu" + type: "ReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "conv5/dw" + type: "Convolution" + bottom: "conv4" + top: "conv5/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + group: 256 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv5/dw/relu" + type: "ReLU" + bottom: "conv5/dw" + top: "conv5/dw" +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv5/dw" + top: "conv5" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 256 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv5/relu" + type: "ReLU" + bottom: "conv5" + top: "conv5" +} +layer { + name: "conv6/dw" + type: "Convolution" + bottom: "conv5" + top: "conv6/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + group: 256 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv6/dw/relu" + type: "ReLU" + bottom: "conv6/dw" + top: "conv6/dw" +} +layer { + name: "conv6" + type: "Convolution" + bottom: "conv6/dw" + top: "conv6" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv6/relu" + type: "ReLU" + bottom: "conv6" + top: "conv6" +} +layer { + name: "conv7/dw" + type: "Convolution" + bottom: "conv6" + top: "conv7/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + group: 512 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv7/dw/relu" + type: "ReLU" + bottom: "conv7/dw" + top: "conv7/dw" +} +layer { + name: "conv7" + type: "Convolution" + bottom: "conv7/dw" + top: "conv7" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv7/relu" + type: "ReLU" + bottom: "conv7" + top: "conv7" +} +layer { + name: "conv8/dw" + type: "Convolution" + bottom: "conv7" + top: "conv8/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + group: 512 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv8/dw/relu" + type: "ReLU" + bottom: "conv8/dw" + top: "conv8/dw" +} +layer { + name: "conv8" + type: "Convolution" + bottom: "conv8/dw" + top: "conv8" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv8/relu" + type: "ReLU" + bottom: "conv8" + top: "conv8" +} +layer { + name: "conv9/dw" + type: "Convolution" + bottom: "conv8" + top: "conv9/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + group: 512 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv9/dw/relu" + type: "ReLU" + bottom: "conv9/dw" + top: "conv9/dw" +} +layer { + name: "conv9" + type: "Convolution" + bottom: "conv9/dw" + top: "conv9" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv9/relu" + type: "ReLU" + bottom: "conv9" + top: "conv9" +} +layer { + name: "conv10/dw" + type: "Convolution" + bottom: "conv9" + top: "conv10/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + group: 512 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv10/dw/relu" + type: "ReLU" + bottom: "conv10/dw" + top: "conv10/dw" +} +layer { + name: "conv10" + type: "Convolution" + bottom: "conv10/dw" + top: "conv10" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv10/relu" + type: "ReLU" + bottom: "conv10" + top: "conv10" +} +layer { + name: "conv11/dw" + type: "Convolution" + bottom: "conv10" + top: "conv11/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + group: 512 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv11/dw/relu" + type: "ReLU" + bottom: "conv11/dw" + top: "conv11/dw" +} +layer { + name: "conv11" + type: "Convolution" + bottom: "conv11/dw" + top: "conv11" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv11/relu" + type: "ReLU" + bottom: "conv11" + top: "conv11" +} +layer { + name: "conv12/dw" + type: "Convolution" + bottom: "conv11" + top: "conv12/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + group: 512 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv12/dw/relu" + type: "ReLU" + bottom: "conv12/dw" + top: "conv12/dw" +} +layer { + name: "conv12" + type: "Convolution" + bottom: "conv12/dw" + top: "conv12" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 1024 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv12/relu" + type: "ReLU" + bottom: "conv12" + top: "conv12" +} +layer { + name: "conv13/dw" + type: "Convolution" + bottom: "conv12" + top: "conv13/dw" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 1024 + pad: 1 + kernel_size: 3 + group: 1024 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + engine: CAFFE + } +} +layer { + name: "conv13/dw/relu" + type: "ReLU" + bottom: "conv13/dw" + top: "conv13/dw" +} +layer { + name: "conv13" + type: "Convolution" + bottom: "conv13/dw" + top: "conv13" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 1024 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv13/relu" + type: "ReLU" + bottom: "conv13" + top: "conv13" +} +layer { + name: "conv14_1" + type: "Convolution" + bottom: "conv13" + top: "conv14_1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 256 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv14_1/relu" + type: "ReLU" + bottom: "conv14_1" + top: "conv14_1" +} +layer { + name: "conv14_2" + type: "Convolution" + bottom: "conv14_1" + top: "conv14_2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv14_2/relu" + type: "ReLU" + bottom: "conv14_2" + top: "conv14_2" +} +layer { + name: "conv15_1" + type: "Convolution" + bottom: "conv14_2" + top: "conv15_1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv15_1/relu" + type: "ReLU" + bottom: "conv15_1" + top: "conv15_1" +} +layer { + name: "conv15_2" + type: "Convolution" + bottom: "conv15_1" + top: "conv15_2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv15_2/relu" + type: "ReLU" + bottom: "conv15_2" + top: "conv15_2" +} +layer { + name: "conv16_1" + type: "Convolution" + bottom: "conv15_2" + top: "conv16_1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 128 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv16_1/relu" + type: "ReLU" + bottom: "conv16_1" + top: "conv16_1" +} +layer { + name: "conv16_2" + type: "Convolution" + bottom: "conv16_1" + top: "conv16_2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv16_2/relu" + type: "ReLU" + bottom: "conv16_2" + top: "conv16_2" +} +layer { + name: "conv17_1" + type: "Convolution" + bottom: "conv16_2" + top: "conv17_1" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 64 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv17_1/relu" + type: "ReLU" + bottom: "conv17_1" + top: "conv17_1" +} +layer { + name: "conv17_2" + type: "Convolution" + bottom: "conv17_1" + top: "conv17_2" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + stride: 2 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv17_2/relu" + type: "ReLU" + bottom: "conv17_2" + top: "conv17_2" +} +layer { + name: "conv11_mbox_loc" + type: "Convolution" + bottom: "conv11" + top: "conv11_mbox_loc" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 12 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv11_mbox_conf" + type: "Convolution" + bottom: "conv11" + top: "conv11_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 63 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv11_mbox_priorbox" + type: "PriorBox" + bottom: "conv11" + bottom: "data" + top: "conv11_mbox_priorbox" + prior_box_param { + min_size: 60.0 + aspect_ratio: 2.0 + flip: true + clip: false + variance: 0.10000000149011612 + variance: 0.10000000149011612 + variance: 0.20000000298023224 + variance: 0.20000000298023224 + offset: 0.5 + } +} +layer { + name: "conv13_mbox_loc" + type: "Convolution" + bottom: "conv13" + top: "conv13_mbox_loc" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv13_mbox_conf" + type: "Convolution" + bottom: "conv13" + top: "conv13_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv13_mbox_priorbox" + type: "PriorBox" + bottom: "conv13" + bottom: "data" + top: "conv13_mbox_priorbox" + prior_box_param { + min_size: 105.0 + max_size: 150.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.10000000149011612 + variance: 0.10000000149011612 + variance: 0.20000000298023224 + variance: 0.20000000298023224 + offset: 0.5 + } +} +layer { + name: "conv14_2_mbox_loc" + type: "Convolution" + bottom: "conv14_2" + top: "conv14_2_mbox_loc" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv14_2_mbox_conf" + type: "Convolution" + bottom: "conv14_2" + top: "conv14_2_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv14_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv14_2" + bottom: "data" + top: "conv14_2_mbox_priorbox" + prior_box_param { + min_size: 150.0 + max_size: 195.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.10000000149011612 + variance: 0.10000000149011612 + variance: 0.20000000298023224 + variance: 0.20000000298023224 + offset: 0.5 + } +} +layer { + name: "conv15_2_mbox_loc" + type: "Convolution" + bottom: "conv15_2" + top: "conv15_2_mbox_loc" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv15_2_mbox_conf" + type: "Convolution" + bottom: "conv15_2" + top: "conv15_2_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv15_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv15_2" + bottom: "data" + top: "conv15_2_mbox_priorbox" + prior_box_param { + min_size: 195.0 + max_size: 240.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.10000000149011612 + variance: 0.10000000149011612 + variance: 0.20000000298023224 + variance: 0.20000000298023224 + offset: 0.5 + } +} +layer { + name: "conv16_2_mbox_loc" + type: "Convolution" + bottom: "conv16_2" + top: "conv16_2_mbox_loc" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv16_2_mbox_conf" + type: "Convolution" + bottom: "conv16_2" + top: "conv16_2_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv16_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv16_2" + bottom: "data" + top: "conv16_2_mbox_priorbox" + prior_box_param { + min_size: 240.0 + max_size: 285.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.10000000149011612 + variance: 0.10000000149011612 + variance: 0.20000000298023224 + variance: 0.20000000298023224 + offset: 0.5 + } +} +layer { + name: "conv17_2_mbox_loc" + type: "Convolution" + bottom: "conv17_2" + top: "conv17_2_mbox_loc" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 24 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv17_2_mbox_conf" + type: "Convolution" + bottom: "conv17_2" + top: "conv17_2_mbox_conf" + param { + lr_mult: 1.0 + decay_mult: 1.0 + } + param { + lr_mult: 2.0 + decay_mult: 0.0 + } + convolution_param { + num_output: 126 + kernel_size: 1 + weight_filler { + type: "msra" + } + bias_filler { + type: "constant" + value: 0.0 + } + } +} +layer { + name: "conv17_2_mbox_priorbox" + type: "PriorBox" + bottom: "conv17_2" + bottom: "data" + top: "conv17_2_mbox_priorbox" + prior_box_param { + min_size: 285.0 + max_size: 300.0 + aspect_ratio: 2.0 + aspect_ratio: 3.0 + flip: true + clip: false + variance: 0.10000000149011612 + variance: 0.10000000149011612 + variance: 0.20000000298023224 + variance: 0.20000000298023224 + offset: 0.5 + } +} +layer { + name: "mbox_priorbox" + type: "Concat" + bottom: "conv11_mbox_priorbox" + bottom: "conv13_mbox_priorbox" + bottom: "conv14_2_mbox_priorbox" + bottom: "conv15_2_mbox_priorbox" + bottom: "conv16_2_mbox_priorbox" + bottom: "conv17_2_mbox_priorbox" + top: "mbox_priorbox" + concat_param { + axis: 2 + } +} +layer { + name: "conv11_mbox_conf_reshape" + type: "Reshape" + bottom: "conv11_mbox_conf" + top: "conv11_mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: 3 + dim: 21 + dim: -1 + } + } +} +layer { + name: "conv11_mbox_conf_perm" + type: "Permute" + bottom: "conv11_mbox_conf_reshape" + top: "conv11_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 1 + order: 3 + } +} +layer { + name: "conv11_mbox_conf_softmax" + type: "Softmax" + bottom: "conv11_mbox_conf_perm" + top: "conv11_mbox_conf_softmax" + softmax_param { + axis: 1 + } +} +layer { + name: "conv13_mbox_conf_reshape" + type: "Reshape" + bottom: "conv13_mbox_conf" + top: "conv13_mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: 6 + dim: 21 + dim: -1 + } + } +} +layer { + name: "conv13_mbox_conf_perm" + type: "Permute" + bottom: "conv13_mbox_conf_reshape" + top: "conv13_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 1 + order: 3 + } +} +layer { + name: "conv13_mbox_conf_softmax" + type: "Softmax" + bottom: "conv13_mbox_conf_perm" + top: "conv13_mbox_conf_softmax" + softmax_param { + axis: 1 + } +} +layer { + name: "conv14_2_mbox_conf_reshape" + type: "Reshape" + bottom: "conv14_2_mbox_conf" + top: "conv14_2_mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: 6 + dim: 21 + dim: -1 + } + } +} +layer { + name: "conv14_2_mbox_conf_perm" + type: "Permute" + bottom: "conv14_2_mbox_conf_reshape" + top: "conv14_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 1 + order: 3 + } +} +layer { + name: "conv14_2_mbox_conf_softmax" + type: "Softmax" + bottom: "conv14_2_mbox_conf_perm" + top: "conv14_2_mbox_conf_softmax" + softmax_param { + axis: 1 + } +} +layer { + name: "conv15_2_mbox_conf_reshape" + type: "Reshape" + bottom: "conv15_2_mbox_conf" + top: "conv15_2_mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: 6 + dim: 21 + dim: -1 + } + } +} +layer { + name: "conv15_2_mbox_conf_perm" + type: "Permute" + bottom: "conv15_2_mbox_conf_reshape" + top: "conv15_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 1 + order: 3 + } +} +layer { + name: "conv15_2_mbox_conf_softmax" + type: "Softmax" + bottom: "conv15_2_mbox_conf_perm" + top: "conv15_2_mbox_conf_softmax" + softmax_param { + axis: 1 + } +} +layer { + name: "conv16_2_mbox_conf_reshape" + type: "Reshape" + bottom: "conv16_2_mbox_conf" + top: "conv16_2_mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: 6 + dim: 21 + dim: -1 + } + } +} +layer { + name: "conv16_2_mbox_conf_perm" + type: "Permute" + bottom: "conv16_2_mbox_conf_reshape" + top: "conv16_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 1 + order: 3 + } +} +layer { + name: "conv16_2_mbox_conf_softmax" + type: "Softmax" + bottom: "conv16_2_mbox_conf_perm" + top: "conv16_2_mbox_conf_softmax" + softmax_param { + axis: 1 + } +} +layer { + name: "conv17_2_mbox_conf_reshape" + type: "Reshape" + bottom: "conv17_2_mbox_conf" + top: "conv17_2_mbox_conf_reshape" + reshape_param { + shape { + dim: 0 + dim: 6 + dim: 21 + dim: -1 + } + } +} +layer { + name: "conv17_2_mbox_conf_perm" + type: "Permute" + bottom: "conv17_2_mbox_conf_reshape" + top: "conv17_2_mbox_conf_perm" + permute_param { + order: 0 + order: 2 + order: 1 + order: 3 + } +} +layer { + name: "conv17_2_mbox_conf_softmax" + type: "Softmax" + bottom: "conv17_2_mbox_conf_perm" + top: "conv17_2_mbox_conf_softmax" + softmax_param { + axis: 1 + } +} +layer { + name: "detection_out" + type: "DetectionOutput" + bottom: "conv11_mbox_conf_softmax" + bottom: "conv13_mbox_conf_softmax" + bottom: "conv14_2_mbox_conf_softmax" + bottom: "conv15_2_mbox_conf_softmax" + bottom: "conv16_2_mbox_conf_softmax" + bottom: "conv17_2_mbox_conf_softmax" + bottom: "conv11_mbox_loc" + bottom: "conv13_mbox_loc" + bottom: "conv14_2_mbox_loc" + bottom: "conv15_2_mbox_loc" + bottom: "conv16_2_mbox_loc" + bottom: "conv17_2_mbox_loc" + bottom: "mbox_priorbox" + top: "detection_out" + detection_output_param { + num_classes: 21 + share_location: true + background_label_id: 0 + nms_param { + nms_threshold: 0.44999998807907104 + top_k: 100 + } + code_type: CENTER_SIZE + keep_top_k: 100 + confidence_threshold: 0.25 + no_permute: true + } +} diff --git a/caffe_models/mtcnn_v1/caffe_model/convert-mtcnn.py b/caffe_models/mtcnn_v1/caffe_model/convert-mtcnn.py index 42528cc..acc4777 100755 --- a/caffe_models/mtcnn_v1/caffe_model/convert-mtcnn.py +++ b/caffe_models/mtcnn_v1/caffe_model/convert-mtcnn.py @@ -10,6 +10,7 @@ parser = argparse.ArgumentParser(description='Conversion options') parser.add_argument('-relu', action='store_true', help='Use ReLU instead of PReLU') parser.add_argument('-transp', action='store_true', help='Transpose convolution coefficients') +parser.add_argument('-pnet_optimized', action='store_true', help='Keep one instance of PNet') args = parser.parse_args() baseName = "mtcnn" @@ -19,12 +20,17 @@ if args.transp: baseName += "-transp" +if args.pnet_optimized: + baseName += "-pnet_optimized" + netPatched = caffe.Net("{}.prototxt".format(baseName), caffe.TEST) def convertDet1(): net = caffe.Net('det1.prototxt', 'det1.caffemodel', caffe.TEST) - - for pnet in ['pnet1', 'pnet2', 'pnet3', 'pnet4', 'pnet5', 'pnet6', 'pnet7', 'pnet8', 'pnet9']: + + pnet_list = ['pnet1', ] if args.pnet_optimized else ['pnet1', 'pnet2', 'pnet3', 'pnet4', 'pnet5', 'pnet6', 'pnet7', 'pnet8', 'pnet9'] + + for pnet in pnet_list: for layer in ['conv1', 'conv2', 'conv3']: if args.transp: netPatched.params[pnet+'-'+layer][0].data[...] = np.transpose(net.params[layer][0].data, (0,1,3,2)) diff --git a/caffe_models/mtcnn_v1/caffe_model/mtcnn-transp-pnet_optimized.caffemodel b/caffe_models/mtcnn_v1/caffe_model/mtcnn-transp-pnet_optimized.caffemodel new file mode 100644 index 0000000..7a949c8 --- /dev/null +++ b/caffe_models/mtcnn_v1/caffe_model/mtcnn-transp-pnet_optimized.caffemodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1015bbc4b3a4c5e72c96593c730e7253a5a3946ff2e722cb8e233162f65eecc7 +size 1986783 diff --git a/caffe_models/mtcnn_v1/caffe_model/mtcnn-transp-pnet_optimized.prototxt b/caffe_models/mtcnn_v1/caffe_model/mtcnn-transp-pnet_optimized.prototxt new file mode 100644 index 0000000..3fb214d --- /dev/null +++ b/caffe_models/mtcnn_v1/caffe_model/mtcnn-transp-pnet_optimized.prototxt @@ -0,0 +1,796 @@ +name: "MTCNN" + +# +# PNet1: 320x240 +# + +input: "pnet1-data" +input_dim: 1 +input_dim: 3 # C +input_dim: 240 # H +input_dim: 320 # W + +layer { + name: "pnet1-conv1" + type: "Convolution" + bottom: "pnet1-data" + top: "pnet1-conv1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 10 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } +layer { + name: "pnet1-PReLU1" + type: "PReLU" + bottom: "pnet1-conv1" + top: "pnet1-PReLU1" + } +layer { + name: "pnet1-pool1" + type: "Pooling" + bottom: "pnet1-PReLU1" + top: "pnet1-pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } + } + +layer { + name: "pnet1-conv2" + type: "Convolution" + bottom: "pnet1-pool1" + top: "pnet1-conv2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 16 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } +layer { + name: "pnet1-PReLU2" + type: "PReLU" + bottom: "pnet1-conv2" + top: "pnet1-PReLU2" + } + +layer { + name: "pnet1-conv3" + type: "Convolution" + bottom: "pnet1-PReLU2" + top: "pnet1-conv3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 32 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } +layer { + name: "pnet1-PReLU3" + type: "PReLU" + bottom: "pnet1-conv3" + top: "pnet1-PReLU3" + } + + +#layer { +# name: "pnet1-conv4-1" +# type: "Convolution" +# bottom: "pnet1-conv3" +# top: "pnet1-conv4-1" +# param { +# lr_mult: 1 +# decay_mult: 1 +# } +# param { +# lr_mult: 2 +# decay_mult: 0 +# } +# convolution_param { +# num_output: 2 +# kernel_size: 1 +# stride: 1 +# weight_filler { +# type: "xavier" +# } +# bias_filler { +# type: "constant" +# value: 0 +# } +# } +# } +# +#layer { +# name: "pnet1-conv4-2" +# type: "Convolution" +# bottom: "pnet1-conv3" +# top: "pnet1-conv4-2" +# param { +# lr_mult: 1 +# decay_mult: 1 +# } +# param { +# lr_mult: 2 +# decay_mult: 0 +# } +# convolution_param { +# num_output: 4 +# kernel_size: 1 +# stride: 1 +# weight_filler { +# type: "xavier" +# } +# bias_filler { +# type: "constant" +# value: 0 +# } +# } +# } + +layer { + name: "pnet1-conv4" + type: "Convolution" + bottom: "pnet1-PReLU3" + top: "pnet1-conv4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 6 + kernel_size: 1 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } + +#layer { +# name: "pnet1-prob1" +# type: "Softmax" +# bottom: "pnet1-conv4-1" +# top: "pnet1-prob1" +# } + +# +# RNet +# + +input: "rnet-data" +input_dim: 1 +input_dim: 3 +input_dim: 24 +input_dim: 24 + + +########################## +###################### +layer { + name: "rnet-conv1" + type: "Convolution" + bottom: "rnet-data" + top: "rnet-conv1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 28 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } +layer { + name: "rnet-prelu1" + type: "PReLU" + bottom: "rnet-conv1" + top: "rnet-prelu1" + propagate_down: true + } +layer { + name: "rnet-pool1" + type: "Pooling" + bottom: "rnet-prelu1" + top: "rnet-pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } + } + +layer { + name: "rnet-conv2" + type: "Convolution" + bottom: "rnet-pool1" + top: "rnet-conv2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 48 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } +layer { + name: "rnet-prelu2" + type: "PReLU" + bottom: "rnet-conv2" + top: "rnet-prelu2" + propagate_down: true + } +layer { + name: "rnet-pool2" + type: "Pooling" + bottom: "rnet-prelu2" + top: "rnet-pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } + } +#################################### + +################################## +layer { + name: "rnet-conv3" + type: "Convolution" + bottom: "rnet-pool2" + top: "rnet-conv3" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + kernel_size: 2 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } +layer { + name: "rnet-prelu3" + type: "PReLU" + bottom: "rnet-conv3" + top: "rnet-prelu3" + propagate_down: true + } +############################### + +############################### + +layer { + name: "rnet-conv4" + type: "InnerProduct" + bottom: "rnet-prelu3" + top: "rnet-conv4" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + inner_product_param { + num_output: 128 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } +layer { + name: "rnet-prelu4" + type: "PReLU" + bottom: "rnet-conv4" + top: "rnet-prelu4" + } + +#layer { +# name: "rnet-conv5-1" +# type: "InnerProduct" +# bottom: "rnet-conv4" +# top: "rnet-conv5-1" +# param { +# lr_mult: 0 +# decay_mult: 0 +# } +# param { +# lr_mult: 0 +# decay_mult: 0 +# } +# inner_product_param { +# num_output: 2 +# #kernel_size: 1 +# #stride: 1 +# weight_filler { +# type: "xavier" +# } +# bias_filler { +# type: "constant" +# value: 0 +# } +# } +# } +#layer { +# name: "rnet-conv5-2" +# type: "InnerProduct" +# bottom: "rnet-conv4" +# top: "rnet-conv5-2" +# param { +# lr_mult: 1 +# decay_mult: 1 +# } +# param { +# lr_mult: 2 +# decay_mult: 1 +# } +# inner_product_param { +# num_output: 4 +# #kernel_size: 1 +# #stride: 1 +# weight_filler { +# type: "xavier" +# } +# bias_filler { +# type: "constant" +# value: 0 +# } +# } +# } + +layer { + name: "rnet-conv5" + type: "InnerProduct" + bottom: "rnet-prelu4" + top: "rnet-conv5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 1 + } + inner_product_param { + num_output: 6 + #kernel_size: 1 + #stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } + } + +#layer { +# name: "rnet-prob1" +# type: "Softmax" +# bottom: "rnet-conv5-1" +# top: "rnet-prob1" +# } + +# +# ONet +# + +input: "onet-data" +input_dim: 1 +input_dim: 3 +input_dim: 48 +input_dim: 48 +################################## +layer { + name: "onet-conv1" + type: "Convolution" + bottom: "onet-data" + top: "onet-conv1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 1 + } + convolution_param { + num_output: 32 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "onet-prelu1" + type: "PReLU" + bottom: "onet-conv1" + top: "onet-prelu1" +} +layer { + name: "onet-pool1" + type: "Pooling" + bottom: "onet-prelu1" + top: "onet-pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "onet-conv2" + type: "Convolution" + bottom: "onet-pool1" + top: "onet-conv2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 1 + } + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 1 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} + +layer { + name: "onet-prelu2" + type: "PReLU" + bottom: "onet-conv2" + top: "onet-prelu2" +} +layer { + name: "onet-pool2" + type: "Pooling" + bottom: "onet-prelu2" + top: "onet-pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} + +layer { + name: "onet-conv3" + type: "Convolution" + bottom: "onet-pool2" + top: "onet-conv3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 1 + } + convolution_param { + num_output: 64 + kernel_size: 3 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "onet-prelu3" + type: "PReLU" + bottom: "onet-conv3" + top: "onet-prelu3" +} +layer { + name: "onet-pool3" + type: "Pooling" + bottom: "onet-prelu3" + top: "onet-pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "onet-conv4" + type: "Convolution" + bottom: "onet-pool3" + top: "onet-conv4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 1 + } + convolution_param { + num_output: 128 + kernel_size: 2 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "onet-prelu4" + type: "PReLU" + bottom: "onet-conv4" + top: "onet-prelu4" +} + + +layer { + name: "onet-conv5" + type: "InnerProduct" + bottom: "onet-prelu4" + top: "onet-conv5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 1 + } + inner_product_param { + #kernel_size: 3 + num_output: 256 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} + +layer { + name: "onet-drop5" + type: "Dropout" + bottom: "onet-conv5" + top: "onet-conv5" + dropout_param { + dropout_ratio: 0.25 + } +} +layer { + name: "onet-prelu5" + type: "PReLU" + bottom: "onet-conv5" + top: "onet-prelu5" +} + + +#layer { +# name: "onet-conv6-1" +# type: "InnerProduct" +# bottom: "onet-conv5" +# top: "onet-conv6-1" +# param { +# lr_mult: 1 +# decay_mult: 1 +# } +# param { +# lr_mult: 2 +# decay_mult: 1 +# } +# inner_product_param { +# #kernel_size: 1 +# num_output: 2 +# weight_filler { +# type: "xavier" +# } +# bias_filler { +# type: "constant" +# value: 0 +# } +# } +#} +#layer { +# name: "onet-conv6-2" +# type: "InnerProduct" +# bottom: "onet-conv5" +# top: "onet-conv6-2" +# param { +# lr_mult: 1 +# decay_mult: 1 +# } +# param { +# lr_mult: 2 +# decay_mult: 1 +# } +# inner_product_param { +# #kernel_size: 1 +# num_output: 4 +# weight_filler { +# type: "xavier" +# } +# bias_filler { +# type: "constant" +# value: 0 +# } +# } +#} +#layer { +# name: "onet-conv6-3" +# type: "InnerProduct" +# bottom: "onet-conv5" +# top: "onet-conv6-3" +# param { +# lr_mult: 1 +# decay_mult: 1 +# } +# param { +# lr_mult: 2 +# decay_mult: 1 +# } +# inner_product_param { +# #kernel_size: 1 +# num_output: 10 +# weight_filler { +# type: "xavier" +# } +# bias_filler { +# type: "constant" +# value: 0 +# } +# } +#} + +layer { + name: "onet-conv6" + type: "InnerProduct" + bottom: "onet-prelu5" + top: "onet-conv6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 1 + } + inner_product_param { + #kernel_size: 1 + num_output: 16 + weight_filler { + type: "xavier" + } + bias_filler { + type: "constant" + value: 0 + } + } +} + +#layer { +# name: "onet-prob1" +# type: "Softmax" +# bottom: "onet-conv6-1" +# top: "onet-prob1" +#} + diff --git a/caffe_models/resnet_50/caffe_model/ResNet-50-deploy-1280x720.prototxt b/caffe_models/resnet_50/caffe_model/ResNet-50-deploy-1280x720.prototxt new file mode 100644 index 0000000..183fbc5 --- /dev/null +++ b/caffe_models/resnet_50/caffe_model/ResNet-50-deploy-1280x720.prototxt @@ -0,0 +1,2324 @@ +name: "ResNet-50" +input: "data" +input_dim: 1 +input_dim: 3 +input_dim: 720 +input_dim: 1280 + +layer { + bottom: "data" + top: "conv1" + name: "conv1" + type: "Convolution" + convolution_param { + num_output: 64 + kernel_size: 7 + pad: 3 + stride: 2 + } +} + +layer { + bottom: "conv1" + top: "conv1" + name: "bn_conv1" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "conv1" + top: "conv1" + name: "scale_conv1" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "conv1" + top: "conv1" + name: "conv1_relu" + type: "ReLU" +} + +layer { + bottom: "conv1" + top: "pool1" + name: "pool1" + type: "Pooling" + pooling_param { + kernel_size: 3 + stride: 2 + pool: MAX + } +} + +layer { + bottom: "pool1" + top: "res2a_branch1" + name: "res2a_branch1" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2a_branch1" + top: "res2a_branch1" + name: "bn2a_branch1" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2a_branch1" + top: "res2a_branch1" + name: "scale2a_branch1" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "pool1" + top: "res2a_branch2a" + name: "res2a_branch2a" + type: "Convolution" + convolution_param { + num_output: 64 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2a_branch2a" + top: "res2a_branch2a" + name: "bn2a_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2a_branch2a" + top: "res2a_branch2a" + name: "scale2a_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2a_branch2a" + top: "res2a_branch2a" + name: "res2a_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res2a_branch2a" + top: "res2a_branch2b" + name: "res2a_branch2b" + type: "Convolution" + convolution_param { + num_output: 64 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2a_branch2b" + top: "res2a_branch2b" + name: "bn2a_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2a_branch2b" + top: "res2a_branch2b" + name: "scale2a_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2a_branch2b" + top: "res2a_branch2b" + name: "res2a_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res2a_branch2b" + top: "res2a_branch2c" + name: "res2a_branch2c" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2a_branch2c" + top: "res2a_branch2c" + name: "bn2a_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2a_branch2c" + top: "res2a_branch2c" + name: "scale2a_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2a_branch1" + bottom: "res2a_branch2c" + top: "res2a" + name: "res2a" + type: "Eltwise" +} + +layer { + bottom: "res2a" + top: "res2a" + name: "res2a_relu" + type: "ReLU" +} + +layer { + bottom: "res2a" + top: "res2b_branch2a" + name: "res2b_branch2a" + type: "Convolution" + convolution_param { + num_output: 64 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2b_branch2a" + top: "res2b_branch2a" + name: "bn2b_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2b_branch2a" + top: "res2b_branch2a" + name: "scale2b_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2b_branch2a" + top: "res2b_branch2a" + name: "res2b_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res2b_branch2a" + top: "res2b_branch2b" + name: "res2b_branch2b" + type: "Convolution" + convolution_param { + num_output: 64 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2b_branch2b" + top: "res2b_branch2b" + name: "bn2b_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2b_branch2b" + top: "res2b_branch2b" + name: "scale2b_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2b_branch2b" + top: "res2b_branch2b" + name: "res2b_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res2b_branch2b" + top: "res2b_branch2c" + name: "res2b_branch2c" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2b_branch2c" + top: "res2b_branch2c" + name: "bn2b_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2b_branch2c" + top: "res2b_branch2c" + name: "scale2b_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2a" + bottom: "res2b_branch2c" + top: "res2b" + name: "res2b" + type: "Eltwise" +} + +layer { + bottom: "res2b" + top: "res2b" + name: "res2b_relu" + type: "ReLU" +} + +layer { + bottom: "res2b" + top: "res2c_branch2a" + name: "res2c_branch2a" + type: "Convolution" + convolution_param { + num_output: 64 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2c_branch2a" + top: "res2c_branch2a" + name: "bn2c_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2c_branch2a" + top: "res2c_branch2a" + name: "scale2c_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2c_branch2a" + top: "res2c_branch2a" + name: "res2c_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res2c_branch2a" + top: "res2c_branch2b" + name: "res2c_branch2b" + type: "Convolution" + convolution_param { + num_output: 64 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2c_branch2b" + top: "res2c_branch2b" + name: "bn2c_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2c_branch2b" + top: "res2c_branch2b" + name: "scale2c_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2c_branch2b" + top: "res2c_branch2b" + name: "res2c_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res2c_branch2b" + top: "res2c_branch2c" + name: "res2c_branch2c" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res2c_branch2c" + top: "res2c_branch2c" + name: "bn2c_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res2c_branch2c" + top: "res2c_branch2c" + name: "scale2c_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2b" + bottom: "res2c_branch2c" + top: "res2c" + name: "res2c" + type: "Eltwise" +} + +layer { + bottom: "res2c" + top: "res2c" + name: "res2c_relu" + type: "ReLU" +} + +layer { + bottom: "res2c" + top: "res3a_branch1" + name: "res3a_branch1" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 1 + pad: 0 + stride: 2 + bias_term: false + } +} + +layer { + bottom: "res3a_branch1" + top: "res3a_branch1" + name: "bn3a_branch1" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3a_branch1" + top: "res3a_branch1" + name: "scale3a_branch1" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res2c" + top: "res3a_branch2a" + name: "res3a_branch2a" + type: "Convolution" + convolution_param { + num_output: 128 + kernel_size: 1 + pad: 0 + stride: 2 + bias_term: false + } +} + +layer { + bottom: "res3a_branch2a" + top: "res3a_branch2a" + name: "bn3a_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3a_branch2a" + top: "res3a_branch2a" + name: "scale3a_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3a_branch2a" + top: "res3a_branch2a" + name: "res3a_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res3a_branch2a" + top: "res3a_branch2b" + name: "res3a_branch2b" + type: "Convolution" + convolution_param { + num_output: 128 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3a_branch2b" + top: "res3a_branch2b" + name: "bn3a_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3a_branch2b" + top: "res3a_branch2b" + name: "scale3a_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3a_branch2b" + top: "res3a_branch2b" + name: "res3a_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res3a_branch2b" + top: "res3a_branch2c" + name: "res3a_branch2c" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3a_branch2c" + top: "res3a_branch2c" + name: "bn3a_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3a_branch2c" + top: "res3a_branch2c" + name: "scale3a_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3a_branch1" + bottom: "res3a_branch2c" + top: "res3a" + name: "res3a" + type: "Eltwise" +} + +layer { + bottom: "res3a" + top: "res3a" + name: "res3a_relu" + type: "ReLU" +} + +layer { + bottom: "res3a" + top: "res3b_branch2a" + name: "res3b_branch2a" + type: "Convolution" + convolution_param { + num_output: 128 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3b_branch2a" + top: "res3b_branch2a" + name: "bn3b_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3b_branch2a" + top: "res3b_branch2a" + name: "scale3b_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3b_branch2a" + top: "res3b_branch2a" + name: "res3b_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res3b_branch2a" + top: "res3b_branch2b" + name: "res3b_branch2b" + type: "Convolution" + convolution_param { + num_output: 128 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3b_branch2b" + top: "res3b_branch2b" + name: "bn3b_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3b_branch2b" + top: "res3b_branch2b" + name: "scale3b_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3b_branch2b" + top: "res3b_branch2b" + name: "res3b_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res3b_branch2b" + top: "res3b_branch2c" + name: "res3b_branch2c" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3b_branch2c" + top: "res3b_branch2c" + name: "bn3b_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3b_branch2c" + top: "res3b_branch2c" + name: "scale3b_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3a" + bottom: "res3b_branch2c" + top: "res3b" + name: "res3b" + type: "Eltwise" +} + +layer { + bottom: "res3b" + top: "res3b" + name: "res3b_relu" + type: "ReLU" +} + +layer { + bottom: "res3b" + top: "res3c_branch2a" + name: "res3c_branch2a" + type: "Convolution" + convolution_param { + num_output: 128 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3c_branch2a" + top: "res3c_branch2a" + name: "bn3c_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3c_branch2a" + top: "res3c_branch2a" + name: "scale3c_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3c_branch2a" + top: "res3c_branch2a" + name: "res3c_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res3c_branch2a" + top: "res3c_branch2b" + name: "res3c_branch2b" + type: "Convolution" + convolution_param { + num_output: 128 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3c_branch2b" + top: "res3c_branch2b" + name: "bn3c_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3c_branch2b" + top: "res3c_branch2b" + name: "scale3c_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3c_branch2b" + top: "res3c_branch2b" + name: "res3c_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res3c_branch2b" + top: "res3c_branch2c" + name: "res3c_branch2c" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3c_branch2c" + top: "res3c_branch2c" + name: "bn3c_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3c_branch2c" + top: "res3c_branch2c" + name: "scale3c_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3b" + bottom: "res3c_branch2c" + top: "res3c" + name: "res3c" + type: "Eltwise" +} + +layer { + bottom: "res3c" + top: "res3c" + name: "res3c_relu" + type: "ReLU" +} + +layer { + bottom: "res3c" + top: "res3d_branch2a" + name: "res3d_branch2a" + type: "Convolution" + convolution_param { + num_output: 128 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3d_branch2a" + top: "res3d_branch2a" + name: "bn3d_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3d_branch2a" + top: "res3d_branch2a" + name: "scale3d_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3d_branch2a" + top: "res3d_branch2a" + name: "res3d_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res3d_branch2a" + top: "res3d_branch2b" + name: "res3d_branch2b" + type: "Convolution" + convolution_param { + num_output: 128 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3d_branch2b" + top: "res3d_branch2b" + name: "bn3d_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3d_branch2b" + top: "res3d_branch2b" + name: "scale3d_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3d_branch2b" + top: "res3d_branch2b" + name: "res3d_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res3d_branch2b" + top: "res3d_branch2c" + name: "res3d_branch2c" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res3d_branch2c" + top: "res3d_branch2c" + name: "bn3d_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res3d_branch2c" + top: "res3d_branch2c" + name: "scale3d_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3c" + bottom: "res3d_branch2c" + top: "res3d" + name: "res3d" + type: "Eltwise" +} + +layer { + bottom: "res3d" + top: "res3d" + name: "res3d_relu" + type: "ReLU" +} + +layer { + bottom: "res3d" + top: "res4a_branch1" + name: "res4a_branch1" + type: "Convolution" + convolution_param { + num_output: 1024 + kernel_size: 1 + pad: 0 + stride: 2 + bias_term: false + } +} + +layer { + bottom: "res4a_branch1" + top: "res4a_branch1" + name: "bn4a_branch1" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4a_branch1" + top: "res4a_branch1" + name: "scale4a_branch1" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res3d" + top: "res4a_branch2a" + name: "res4a_branch2a" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 2 + bias_term: false + } +} + +layer { + bottom: "res4a_branch2a" + top: "res4a_branch2a" + name: "bn4a_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4a_branch2a" + top: "res4a_branch2a" + name: "scale4a_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4a_branch2a" + top: "res4a_branch2a" + name: "res4a_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res4a_branch2a" + top: "res4a_branch2b" + name: "res4a_branch2b" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4a_branch2b" + top: "res4a_branch2b" + name: "bn4a_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4a_branch2b" + top: "res4a_branch2b" + name: "scale4a_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4a_branch2b" + top: "res4a_branch2b" + name: "res4a_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res4a_branch2b" + top: "res4a_branch2c" + name: "res4a_branch2c" + type: "Convolution" + convolution_param { + num_output: 1024 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4a_branch2c" + top: "res4a_branch2c" + name: "bn4a_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4a_branch2c" + top: "res4a_branch2c" + name: "scale4a_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4a_branch1" + bottom: "res4a_branch2c" + top: "res4a" + name: "res4a" + type: "Eltwise" +} + +layer { + bottom: "res4a" + top: "res4a" + name: "res4a_relu" + type: "ReLU" +} + +layer { + bottom: "res4a" + top: "res4b_branch2a" + name: "res4b_branch2a" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4b_branch2a" + top: "res4b_branch2a" + name: "bn4b_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4b_branch2a" + top: "res4b_branch2a" + name: "scale4b_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4b_branch2a" + top: "res4b_branch2a" + name: "res4b_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res4b_branch2a" + top: "res4b_branch2b" + name: "res4b_branch2b" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4b_branch2b" + top: "res4b_branch2b" + name: "bn4b_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4b_branch2b" + top: "res4b_branch2b" + name: "scale4b_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4b_branch2b" + top: "res4b_branch2b" + name: "res4b_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res4b_branch2b" + top: "res4b_branch2c" + name: "res4b_branch2c" + type: "Convolution" + convolution_param { + num_output: 1024 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4b_branch2c" + top: "res4b_branch2c" + name: "bn4b_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4b_branch2c" + top: "res4b_branch2c" + name: "scale4b_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4a" + bottom: "res4b_branch2c" + top: "res4b" + name: "res4b" + type: "Eltwise" +} + +layer { + bottom: "res4b" + top: "res4b" + name: "res4b_relu" + type: "ReLU" +} + +layer { + bottom: "res4b" + top: "res4c_branch2a" + name: "res4c_branch2a" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4c_branch2a" + top: "res4c_branch2a" + name: "bn4c_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4c_branch2a" + top: "res4c_branch2a" + name: "scale4c_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4c_branch2a" + top: "res4c_branch2a" + name: "res4c_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res4c_branch2a" + top: "res4c_branch2b" + name: "res4c_branch2b" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4c_branch2b" + top: "res4c_branch2b" + name: "bn4c_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4c_branch2b" + top: "res4c_branch2b" + name: "scale4c_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4c_branch2b" + top: "res4c_branch2b" + name: "res4c_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res4c_branch2b" + top: "res4c_branch2c" + name: "res4c_branch2c" + type: "Convolution" + convolution_param { + num_output: 1024 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4c_branch2c" + top: "res4c_branch2c" + name: "bn4c_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4c_branch2c" + top: "res4c_branch2c" + name: "scale4c_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4b" + bottom: "res4c_branch2c" + top: "res4c" + name: "res4c" + type: "Eltwise" +} + +layer { + bottom: "res4c" + top: "res4c" + name: "res4c_relu" + type: "ReLU" +} + +layer { + bottom: "res4c" + top: "res4d_branch2a" + name: "res4d_branch2a" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4d_branch2a" + top: "res4d_branch2a" + name: "bn4d_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4d_branch2a" + top: "res4d_branch2a" + name: "scale4d_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4d_branch2a" + top: "res4d_branch2a" + name: "res4d_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res4d_branch2a" + top: "res4d_branch2b" + name: "res4d_branch2b" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4d_branch2b" + top: "res4d_branch2b" + name: "bn4d_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4d_branch2b" + top: "res4d_branch2b" + name: "scale4d_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4d_branch2b" + top: "res4d_branch2b" + name: "res4d_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res4d_branch2b" + top: "res4d_branch2c" + name: "res4d_branch2c" + type: "Convolution" + convolution_param { + num_output: 1024 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4d_branch2c" + top: "res4d_branch2c" + name: "bn4d_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4d_branch2c" + top: "res4d_branch2c" + name: "scale4d_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4c" + bottom: "res4d_branch2c" + top: "res4d" + name: "res4d" + type: "Eltwise" +} + +layer { + bottom: "res4d" + top: "res4d" + name: "res4d_relu" + type: "ReLU" +} + +layer { + bottom: "res4d" + top: "res4e_branch2a" + name: "res4e_branch2a" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4e_branch2a" + top: "res4e_branch2a" + name: "bn4e_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4e_branch2a" + top: "res4e_branch2a" + name: "scale4e_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4e_branch2a" + top: "res4e_branch2a" + name: "res4e_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res4e_branch2a" + top: "res4e_branch2b" + name: "res4e_branch2b" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4e_branch2b" + top: "res4e_branch2b" + name: "bn4e_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4e_branch2b" + top: "res4e_branch2b" + name: "scale4e_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4e_branch2b" + top: "res4e_branch2b" + name: "res4e_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res4e_branch2b" + top: "res4e_branch2c" + name: "res4e_branch2c" + type: "Convolution" + convolution_param { + num_output: 1024 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4e_branch2c" + top: "res4e_branch2c" + name: "bn4e_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4e_branch2c" + top: "res4e_branch2c" + name: "scale4e_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4d" + bottom: "res4e_branch2c" + top: "res4e" + name: "res4e" + type: "Eltwise" +} + +layer { + bottom: "res4e" + top: "res4e" + name: "res4e_relu" + type: "ReLU" +} + +layer { + bottom: "res4e" + top: "res4f_branch2a" + name: "res4f_branch2a" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4f_branch2a" + top: "res4f_branch2a" + name: "bn4f_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4f_branch2a" + top: "res4f_branch2a" + name: "scale4f_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4f_branch2a" + top: "res4f_branch2a" + name: "res4f_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res4f_branch2a" + top: "res4f_branch2b" + name: "res4f_branch2b" + type: "Convolution" + convolution_param { + num_output: 256 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4f_branch2b" + top: "res4f_branch2b" + name: "bn4f_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4f_branch2b" + top: "res4f_branch2b" + name: "scale4f_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4f_branch2b" + top: "res4f_branch2b" + name: "res4f_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res4f_branch2b" + top: "res4f_branch2c" + name: "res4f_branch2c" + type: "Convolution" + convolution_param { + num_output: 1024 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res4f_branch2c" + top: "res4f_branch2c" + name: "bn4f_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res4f_branch2c" + top: "res4f_branch2c" + name: "scale4f_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4e" + bottom: "res4f_branch2c" + top: "res4f" + name: "res4f" + type: "Eltwise" +} + +layer { + bottom: "res4f" + top: "res4f" + name: "res4f_relu" + type: "ReLU" +} + +layer { + bottom: "res4f" + top: "res5a_branch1" + name: "res5a_branch1" + type: "Convolution" + convolution_param { + num_output: 2048 + kernel_size: 1 + pad: 0 + stride: 2 + bias_term: false + } +} + +layer { + bottom: "res5a_branch1" + top: "res5a_branch1" + name: "bn5a_branch1" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5a_branch1" + top: "res5a_branch1" + name: "scale5a_branch1" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res4f" + top: "res5a_branch2a" + name: "res5a_branch2a" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 1 + pad: 0 + stride: 2 + bias_term: false + } +} + +layer { + bottom: "res5a_branch2a" + top: "res5a_branch2a" + name: "bn5a_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5a_branch2a" + top: "res5a_branch2a" + name: "scale5a_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5a_branch2a" + top: "res5a_branch2a" + name: "res5a_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res5a_branch2a" + top: "res5a_branch2b" + name: "res5a_branch2b" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res5a_branch2b" + top: "res5a_branch2b" + name: "bn5a_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5a_branch2b" + top: "res5a_branch2b" + name: "scale5a_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5a_branch2b" + top: "res5a_branch2b" + name: "res5a_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res5a_branch2b" + top: "res5a_branch2c" + name: "res5a_branch2c" + type: "Convolution" + convolution_param { + num_output: 2048 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res5a_branch2c" + top: "res5a_branch2c" + name: "bn5a_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5a_branch2c" + top: "res5a_branch2c" + name: "scale5a_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5a_branch1" + bottom: "res5a_branch2c" + top: "res5a" + name: "res5a" + type: "Eltwise" +} + +layer { + bottom: "res5a" + top: "res5a" + name: "res5a_relu" + type: "ReLU" +} + +layer { + bottom: "res5a" + top: "res5b_branch2a" + name: "res5b_branch2a" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res5b_branch2a" + top: "res5b_branch2a" + name: "bn5b_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5b_branch2a" + top: "res5b_branch2a" + name: "scale5b_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5b_branch2a" + top: "res5b_branch2a" + name: "res5b_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res5b_branch2a" + top: "res5b_branch2b" + name: "res5b_branch2b" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res5b_branch2b" + top: "res5b_branch2b" + name: "bn5b_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5b_branch2b" + top: "res5b_branch2b" + name: "scale5b_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5b_branch2b" + top: "res5b_branch2b" + name: "res5b_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res5b_branch2b" + top: "res5b_branch2c" + name: "res5b_branch2c" + type: "Convolution" + convolution_param { + num_output: 2048 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res5b_branch2c" + top: "res5b_branch2c" + name: "bn5b_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5b_branch2c" + top: "res5b_branch2c" + name: "scale5b_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5a" + bottom: "res5b_branch2c" + top: "res5b" + name: "res5b" + type: "Eltwise" +} + +layer { + bottom: "res5b" + top: "res5b" + name: "res5b_relu" + type: "ReLU" +} + +layer { + bottom: "res5b" + top: "res5c_branch2a" + name: "res5c_branch2a" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res5c_branch2a" + top: "res5c_branch2a" + name: "bn5c_branch2a" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5c_branch2a" + top: "res5c_branch2a" + name: "scale5c_branch2a" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5c_branch2a" + top: "res5c_branch2a" + name: "res5c_branch2a_relu" + type: "ReLU" +} + +layer { + bottom: "res5c_branch2a" + top: "res5c_branch2b" + name: "res5c_branch2b" + type: "Convolution" + convolution_param { + num_output: 512 + kernel_size: 3 + pad: 1 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res5c_branch2b" + top: "res5c_branch2b" + name: "bn5c_branch2b" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5c_branch2b" + top: "res5c_branch2b" + name: "scale5c_branch2b" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5c_branch2b" + top: "res5c_branch2b" + name: "res5c_branch2b_relu" + type: "ReLU" +} + +layer { + bottom: "res5c_branch2b" + top: "res5c_branch2c" + name: "res5c_branch2c" + type: "Convolution" + convolution_param { + num_output: 2048 + kernel_size: 1 + pad: 0 + stride: 1 + bias_term: false + } +} + +layer { + bottom: "res5c_branch2c" + top: "res5c_branch2c" + name: "bn5c_branch2c" + type: "BatchNorm" + batch_norm_param { + use_global_stats: true + } +} + +layer { + bottom: "res5c_branch2c" + top: "res5c_branch2c" + name: "scale5c_branch2c" + type: "Scale" + scale_param { + bias_term: true + } +} + +layer { + bottom: "res5b" + bottom: "res5c_branch2c" + top: "res5c" + name: "res5c" + type: "Eltwise" +} + +layer { + bottom: "res5c" + top: "res5c" + name: "res5c_relu" + type: "ReLU" +} + +layer { + bottom: "res5c" + top: "pool5" + name: "pool5" + type: "Pooling" + pooling_param { + kernel_h: 23 + kernel_w: 40 + stride: 1 + pool: AVE + } +} + +layer { + bottom: "pool5" + top: "fc1000" + name: "fc1000" + type: "InnerProduct" + inner_product_param { + num_output: 1000 + weight_filler { + type: "msra" + } + } +} + +layer { + bottom: "fc1000" + top: "prob" + name: "prob" + type: "Softmax" +} +