From 06d317e2e538d49fa3c11ed51895d5ead265f164 Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Tue, 10 Jul 2018 00:50:01 +0900 Subject: [PATCH 1/8] Fix the URL of tr_plus_indices.mat --- datasets/download.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets/download.sh b/datasets/download.sh index 863df918..078c064c 100644 --- a/datasets/download.sh +++ b/datasets/download.sh @@ -11,7 +11,7 @@ wget http://vision.grasp.upenn.edu/video/FLIC-full.zip unzip FLIC-full.zip rm -rf FLIC-full.zip cd FLIC-full -wget http://cims.nyu.edu/~tompson/data/tr_plus_indices.mat +wget https://cims.nyu.edu/~tompson/data/tr_plus_indices.mat cd .. # Get LSP Extended Training Dataset From e5cb7fc3f214e6a7eb09737a9adc485a7f8d8b9c Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Tue, 17 Jul 2018 16:41:11 +0900 Subject: [PATCH 2/8] Update download.sh --- datasets/download.sh | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/datasets/download.sh b/datasets/download.sh index 078c064c..bdd19f9e 100644 --- a/datasets/download.sh +++ b/datasets/download.sh @@ -7,27 +7,33 @@ fi cd data # get FLIC-full dataset and FLIC-Plus annotations -wget http://vision.grasp.upenn.edu/video/FLIC-full.zip -unzip FLIC-full.zip -rm -rf FLIC-full.zip -cd FLIC-full -wget https://cims.nyu.edu/~tompson/data/tr_plus_indices.mat -cd .. +if [ ! -f FLIC-full/tr_plus_indices.mat ]; then + wget http://vision.grasp.upenn.edu/video/FLIC-full.zip + unzip FLIC-full.zip + rm -rf FLIC-full.zip + cd FLIC-full + wget https://cims.nyu.edu/~tompson/data/tr_plus_indices.mat + cd .. +fi # Get LSP Extended Training Dataset -wget http://www.comp.leeds.ac.uk/mat4saj/lspet_dataset.zip -unzip lspet_dataset.zip -rm -rf lspet_dataset.zip -mkdir lspet_dataset -mv images lspet_dataset/ -mv joints.mat lspet_dataset/ -mv README.txt lspet_dataset/ +if [ ! -d lspet_dataset ]; then + wget http://www.comp.leeds.ac.uk/mat4saj/lspet_dataset.zip + unzip lspet_dataset.zip + rm -rf lspet_dataset.zip + mkdir lspet_dataset + mv images lspet_dataset/ + mv joints.mat lspet_dataset/ + mv README.txt lspet_dataset/ +fi # Get Annotations -wget http://datasets.d2.mpi-inf.mpg.de/leonid14cvpr/mpii_human_pose_v1_u12_1.tar.gz -tar zxvf mpii_human_pose_v1_u12_1.tar.gz -rm -rf mpii_human_pose_v1_u12_1.tar.gz -mv mpii_human_pose_v1_u12_1 mpii +if [ ! -d mpii ]; then + wget http://datasets.d2.mpi-inf.mpg.de/leonid14cvpr/mpii_human_pose_v1_u12_1.tar.gz + tar zxvf mpii_human_pose_v1_u12_1.tar.gz + rm -rf mpii_human_pose_v1_u12_1.tar.gz + mv mpii_human_pose_v1_u12_1 mpii +fi # Get Images cd mpii From a1f1e90ff75de6dc380a7ce8bc50b7ba1b202f90 Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Tue, 17 Jul 2018 17:05:15 +0900 Subject: [PATCH 3/8] Fix download.sh --- README.md | 46 +++++++++----------------------------------- datasets/download.sh | 17 +--------------- models/AlexNet.py | 25 ++++++++++++------------ 3 files changed, 22 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 190de4aa..cb686287 100644 --- a/README.md +++ b/README.md @@ -5,48 +5,20 @@ NOTE: This is not official implementation. Original paper is [DeepPose: Human Po # Requirements - Python 3.5.1+ - - [Chainer 1.13.0+](https://github.com/pfnet/chainer) - - numpy 1.9+ - - scikit-image 0.11.3+ - - OpenCV 3.1.0+ + - [Chainer](https://chainer.org/) 4.2.0 + - [CuPy](https://cupy.chainer.org/) 4.2.0 + - [ChainerCV](http://chainercv.readthedocs.io/en/stable/index.html) 0.10.0 + - [NumPy](http://numpy.org/) 1.14.5 + - [opencv-python](https://pypi.org/project/opencv-python/) 3.4.1.15 -I strongly recommend to use Anaconda environment. This repo may be able to be used in Python 2.7 environment, but I haven't tested. +# Download Datasets -## Installation of dependencies - -``` -pip install chainer -pip install numpy -pip install scikit-image -# for python3 -conda install -c https://conda.binstar.org/menpo opencv3 -# for python2 -conda install opencv ``` - -# Dataset preparation - +bash download.sh ``` -bash datasets/download.sh -python datasets/flic_dataset.py -python datasets/lsp_dataset.py -python datasets/mpii_dataset.py -``` - -- [FLIC-full dataset](http://vision.grasp.upenn.edu/cgi-bin/index.php?n=VideoLearning.FLIC) -- [LSP Extended dataset](http://www.comp.leeds.ac.uk/mat4saj/lspet_dataset.zip) -- **MPII dataset** - - [Annotation](http://datasets.d2.mpi-inf.mpg.de/leonid14cvpr/mpii_human_pose_v1_u12_1.tar.gz) - - [Images](http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1.tar.gz) - -## MPII Dataset -- [MPII Human Pose Dataset](http://human-pose.mpi-inf.mpg.de/#download) -- training images: 18079, test images: 6908 - - test images don't have any annotations - - so we split trining imges into training/test joint set - - each joint set has -- training joint set: 17928, test joint set: 1991 +- [FLIC-full dataset](https://bensapp.github.io/flic-dataset.html) +- [LSP Extended dataset](http://sam.johnson.io/research/lspet.html) # Start training diff --git a/datasets/download.sh b/datasets/download.sh index bdd19f9e..a14712ca 100644 --- a/datasets/download.sh +++ b/datasets/download.sh @@ -18,7 +18,7 @@ fi # Get LSP Extended Training Dataset if [ ! -d lspet_dataset ]; then - wget http://www.comp.leeds.ac.uk/mat4saj/lspet_dataset.zip + wget http://sam.johnson.io/research/lspet_dataset.zip unzip lspet_dataset.zip rm -rf lspet_dataset.zip mkdir lspet_dataset @@ -27,18 +27,3 @@ if [ ! -d lspet_dataset ]; then mv README.txt lspet_dataset/ fi -# Get Annotations -if [ ! -d mpii ]; then - wget http://datasets.d2.mpi-inf.mpg.de/leonid14cvpr/mpii_human_pose_v1_u12_1.tar.gz - tar zxvf mpii_human_pose_v1_u12_1.tar.gz - rm -rf mpii_human_pose_v1_u12_1.tar.gz - mv mpii_human_pose_v1_u12_1 mpii -fi - -# Get Images -cd mpii -wget http://datasets.d2.mpi-inf.mpg.de/andriluka14cvpr/mpii_human_pose_v1.tar.gz -tar zxvf mpii_human_pose_v1.tar.gz -rm -rf mpii_human_pose_v1.tar.gz - -cd .. diff --git a/models/AlexNet.py b/models/AlexNet.py index 563b4284..6f447207 100644 --- a/models/AlexNet.py +++ b/models/AlexNet.py @@ -15,17 +15,16 @@ class AlexNet(chainer.Chain): def __init__(self, n_joints): - super(AlexNet, self).__init__( - conv1=L.Convolution2D(3, 96, 11, stride=4, pad=1), - conv2=L.Convolution2D(96, 256, 5, stride=1, pad=2), - conv3=L.Convolution2D(256, 384, 3, stride=1, pad=1), - conv4=L.Convolution2D(384, 384, 3, stride=1, pad=1), - conv5=L.Convolution2D(384, 256, 3, stride=1, pad=1), - fc6=L.Linear(9216, 4096), - fc7=L.Linear(4096, 4096), - fc8=L.Linear(4096, n_joints * 2) - ) - self.train = True + super(AlexNet, self).__init__() + with self.init_scope(): + self.conv1 = L.Convolution2D(3, 96, 11, stride=4, pad=1) + self.conv2 = L.Convolution2D(96, 256, 5, stride=1, pad=2) + self.conv3 = L.Convolution2D(256, 384, 3, stride=1, pad=1) + self.conv4 = L.Convolution2D(384, 384, 3, stride=1, pad=1) + self.conv5 = L.Convolution2D(384, 256, 3, stride=1, pad=1) + self.fc6 = L.Linear(9216, 4096) + self.fc7 = L.Linear(4096, 4096) + self.fc8 = L.Linear(4096, n_joints * 2) def __call__(self, x): h = F.relu(self.conv1(x)) @@ -41,7 +40,7 @@ def __call__(self, x): h = F.relu(self.conv5(h)) h = F.max_pooling_2d(h, 3, stride=2) - h = F.dropout(F.relu(self.fc6(h)), train=self.train, ratio=0.6) - h = F.dropout(F.relu(self.fc7(h)), train=self.train, ratio=0.6) + h = F.dropout(F.relu(self.fc6(h)), ratio=0.6) + h = F.dropout(F.relu(self.fc7(h)), ratio=0.6) return self.fc8(h) From adeec685b1d5a9a26223c3038a7bbed90823364a Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Fri, 23 Nov 2018 04:32:53 +0900 Subject: [PATCH 4/8] Add Dockerfile --- docker/Dockerfile | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docker/Dockerfile diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000..e69de29b From c98c1143e012fec32332075cae17add2309a85f5 Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Tue, 16 Jul 2019 12:06:30 +0000 Subject: [PATCH 5/8] Update for the latest Chainer --- .gitignore | 7 +- README.md | 32 +- datasets/download.sh | 29 -- datasets/flic_dataset.py | 81 ---- deeppose/__init__.py | 3 + deeppose/datasets/__init__.py | 0 deeppose/datasets/flic_dataset.py | 79 ++++ .../datasets}/lsp_dataset.py | 0 .../datasets}/mpii_dataset.py | 0 .../loss.py => deeppose/functions/l2_loss.py | 8 +- deeppose/models/AlexNet.py | 47 +++ deeppose/models/__init__.py | 1 + deeppose/models/alexnet.py | 50 +++ deeppose/models/deeppose.py | 17 + deeppose/utils/__init__.py | 0 deeppose/utils/common.py | 43 ++ deeppose/utils/flic_utils.py | 82 ++++ models/AlexNet.py | 46 --- models/ResNet50.py | 107 ----- models/VGG_flic.py | 84 ---- scripts/cmd_options.py | 134 ------- {shells => scripts}/create_anime.sh | 0 scripts/dataset.py | 189 --------- scripts/download.sh | 22 ++ scripts/draw_loss.py | 81 ---- scripts/evaluate_flic.py | 284 -------------- scripts/logger.py | 45 --- scripts/train.py | 369 +++++++----------- shells/train_flic.sh | 31 -- shells/train_lsp.sh | 31 -- shells/train_mpii.sh | 31 -- tests/test_alexnet.py | 28 ++ tests/test_dataset.py | 246 ------------ tests/test_flic_dataset.py | 47 +++ 34 files changed, 586 insertions(+), 1668 deletions(-) delete mode 100644 datasets/download.sh delete mode 100644 datasets/flic_dataset.py create mode 100644 deeppose/__init__.py create mode 100644 deeppose/datasets/__init__.py create mode 100644 deeppose/datasets/flic_dataset.py rename {datasets => deeppose/datasets}/lsp_dataset.py (100%) rename {datasets => deeppose/datasets}/mpii_dataset.py (100%) rename scripts/loss.py => deeppose/functions/l2_loss.py (88%) create mode 100644 deeppose/models/AlexNet.py create mode 100644 deeppose/models/__init__.py create mode 100644 deeppose/models/alexnet.py create mode 100644 deeppose/models/deeppose.py create mode 100644 deeppose/utils/__init__.py create mode 100644 deeppose/utils/common.py create mode 100644 deeppose/utils/flic_utils.py delete mode 100644 models/AlexNet.py delete mode 100644 models/ResNet50.py delete mode 100644 models/VGG_flic.py delete mode 100644 scripts/cmd_options.py rename {shells => scripts}/create_anime.sh (100%) delete mode 100644 scripts/dataset.py create mode 100644 scripts/download.sh delete mode 100644 scripts/draw_loss.py delete mode 100644 scripts/evaluate_flic.py delete mode 100644 scripts/logger.py delete mode 100644 shells/train_flic.sh delete mode 100644 shells/train_lsp.sh delete mode 100644 shells/train_mpii.sh create mode 100644 tests/test_alexnet.py delete mode 100644 tests/test_dataset.py create mode 100644 tests/test_flic_dataset.py diff --git a/.gitignore b/.gitignore index d3f1bab0..6c6b7e68 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,9 @@ -# OSX specific -.DS_Store - # dirs data results -ompose + +# OSX specific +.DS_Store # sftp settings sftp-config.json diff --git a/README.md b/README.md index cb686287..479d5661 100644 --- a/README.md +++ b/README.md @@ -1,45 +1,35 @@ # DeepPose -NOTE: This is not official implementation. Original paper is [DeepPose: Human Pose Estimation via Deep Neural Networks](http://arxiv.org/abs/1312.4659). +**NOTE: This is NOT the official implementation.** + +This is an unofficial implementation of [DeepPose: Human Pose Estimation via Deep Neural Networks](http://arxiv.org/abs/1312.4659). # Requirements - Python 3.5.1+ - - [Chainer](https://chainer.org/) 4.2.0 - - [CuPy](https://cupy.chainer.org/) 4.2.0 - - [ChainerCV](http://chainercv.readthedocs.io/en/stable/index.html) 0.10.0 - - [NumPy](http://numpy.org/) 1.14.5 - - [opencv-python](https://pypi.org/project/opencv-python/) 3.4.1.15 + - [Chainer](https://chainer.org/)>=4.2.0 + - [CuPy](https://cupy.chainer.org/)>=4.2.0 + - [ChainerCV](http://chainercv.readthedocs.io/en/stable/index.html)>=0.10.0 + - [NumPy](http://numpy.org/)>=1.14.5 + - [opencv-python](https://pypi.org/project/opencv-python/)==3.4.5.20 # Download Datasets ``` -bash download.sh +bash datasets/download.sh ``` - [FLIC-full dataset](https://bensapp.github.io/flic-dataset.html) - [LSP Extended dataset](http://sam.johnson.io/research/lspet.html) -# Start training +# How to start training Starting with the prepared shells is the easiest way. If you want to run `train.py` with your own settings, please check the options first by `python scripts/train.py --help` and modify one of the following shells to customize training settings. ## For FLIC Dataset ``` -bash shells/train_flic.sh -``` - -## For LSP Dataset - -``` -bash shells/train_lsp.sh -``` - -## For MPII Dataset - -``` -bash shells/train_mpii.sh +python scripts/train.py -o results/$(date "+%Y-%m-%d_%H-%M-%S") ``` ### GPU memory requirement diff --git a/datasets/download.sh b/datasets/download.sh deleted file mode 100644 index a14712ca..00000000 --- a/datasets/download.sh +++ /dev/null @@ -1,29 +0,0 @@ -#! /bin/bash -# Copyright (c) 2016 Shunta Saito - -if [ ! -d data ]; then - mkdir data -fi -cd data - -# get FLIC-full dataset and FLIC-Plus annotations -if [ ! -f FLIC-full/tr_plus_indices.mat ]; then - wget http://vision.grasp.upenn.edu/video/FLIC-full.zip - unzip FLIC-full.zip - rm -rf FLIC-full.zip - cd FLIC-full - wget https://cims.nyu.edu/~tompson/data/tr_plus_indices.mat - cd .. -fi - -# Get LSP Extended Training Dataset -if [ ! -d lspet_dataset ]; then - wget http://sam.johnson.io/research/lspet_dataset.zip - unzip lspet_dataset.zip - rm -rf lspet_dataset.zip - mkdir lspet_dataset - mv images lspet_dataset/ - mv joints.mat lspet_dataset/ - mv README.txt lspet_dataset/ -fi - diff --git a/datasets/flic_dataset.py b/datasets/flic_dataset.py deleted file mode 100644 index 8e1cded3..00000000 --- a/datasets/flic_dataset.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from scipy.io import loadmat - -import numpy as np - -crop_sizes = { - '12-oc': (0, 0), - 'along': (0, 0), - 'batma': (0, 0), - 'bend-': (0, 0), - 'ten-c': (0, 0), - 'giant': (42, 396), - 'princ': (10, 464), - 'schin': (6, 461), - 'others': (56, 364) -} - - -def get_joint_list(joints): - head = np.asarray(joints['reye']) + \ - np.asarray(joints['leye']) + \ - np.asarray(joints['nose']) - head /= 3 - del joints['reye'] - del joints['leye'] - del joints['nose'] - joints['head'] = head.tolist() - joint_pos = [joints['lwri']] - joint_pos.append(joints['lelb']) - joint_pos.append(joints['lsho']) - joint_pos.append(joints['head']) - joint_pos.append(joints['rsho']) - joint_pos.append(joints['relb']) - joint_pos.append(joints['rwri']) - - return np.array(joint_pos).flatten() - - -def save_crop_images_and_joints(): - training_indices = loadmat('data/FLIC-full/tr_plus_indices.mat') - training_indices = training_indices['tr_plus_indices'].flatten() - - examples = loadmat('data/FLIC-full/examples.mat') - examples = examples['examples'][0] - joint_ids = ['lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri', 'lhip', - 'lkne', 'lank', 'rhip', 'rkne', 'rank', 'leye', 'reye', - 'lear', 'rear', 'nose', 'msho', 'mhip', 'mear', 'mtorso', - 'mluarm', 'mruarm', 'mllarm', 'mrlarm', 'mluleg', 'mruleg', - 'mllleg', 'mrlleg'] - - available = joint_ids[:8] - available.extend(joint_ids[12:14]) - available.extend([joint_ids[16]]) - - target_joints = ['lsho', 'lelb', 'lwri', - 'leye', 'reye', 'nose', - 'rsho', 'relb', 'rwri'] - - fp_train = open('data/FLIC-full/train_joints.csv', 'w') - fp_test = open('data/FLIC-full/test_joints.csv', 'w') - for i, example in enumerate(examples): - joint = example[2].T - joint = dict(zip(joint_ids, joint)) - fname = example[3][0] - joint = get_joint_list(joint) - msg = '{},{}'.format(fname, ','.join([str(j) for j in joint.tolist()])) - if i in training_indices: - print(msg, file=fp_train) - else: - print(msg, file=fp_test) - - -if __name__ == '__main__': - save_crop_images_and_joints() diff --git a/deeppose/__init__.py b/deeppose/__init__.py new file mode 100644 index 00000000..f60df55a --- /dev/null +++ b/deeppose/__init__.py @@ -0,0 +1,3 @@ +from deeppose import datasets +from deeppose import models +from deeppose import utils \ No newline at end of file diff --git a/deeppose/datasets/__init__.py b/deeppose/datasets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/deeppose/datasets/flic_dataset.py b/deeppose/datasets/flic_dataset.py new file mode 100644 index 00000000..54f3e4d1 --- /dev/null +++ b/deeppose/datasets/flic_dataset.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Shunta Saito + +import os +import io +import zipfile + +import cv2 +import numpy as np +from chainercv.chainer_experimental.datasets import sliceable +from scipy.io import loadmat +from deeppose.utils import flic_utils +import threading + + +class FLICDataset(sliceable.GetterDataset): + + def __init__(self, split='train', dataset_zip_path='data/FLIC.zip'): + super().__init__() + self.dataset_zip_path = dataset_zip_path + self.zf = zipfile.ZipFile(self.dataset_zip_path) + self.zf_pid = os.getpid() + self.img_paths = [fn for fn in self.zf.namelist() if fn.endswith('.jpg')] + + examples = loadmat(io.BytesIO(self.zf.read('FLIC/examples.mat')))['examples'][0] + if split == 'train': + self.examples = [e for e in examples if e['istrain'][0][0] == 1] + elif split == 'test': + self.examples = [e for e in examples if e['istest'][0][0] == 1] + else: + raise ValueError('\'split\' argument should be either \'train\' or \'test\'.') + + joint_names = flic_utils.flic_joint_names + available_joints = flic_utils.flic_available_joints + self.available_joint_ids = [joint_names.index(a) for a in available_joints] + + self.add_getter('img', self._get_image) + self.add_getter('point', self._get_point) + self.lock = threading.Lock() + + def __len__(self): + return len(self.examples) + + def __getstate__(self): + d = self.__dict__.copy() + d['zf'] = None + d['lock'] = None + return d + + def __setstate__(self, state): + self.__dict__ = state + self._lock = threading.Lock() + + def _get_image(self, i): + """Extract image from the zipfile. + + Returns: + img (ndarray): The shape is (C, H, W) and the channel follows RGB order (NOT BGR!). + """ + with self.lock: + if self.zf is None or self.zf_pid != os.getpid(): + self.zf_pid = os.getpid() + self.zf = zipfile.ZipFile(self.dataset_zip_path) + image_data = self.zf.read('FLIC/images/{}'.format(self.examples[i][3][0])) + + image_file = np.frombuffer(image_data, np.uint8) + img = cv2.imdecode(image_file, cv2.IMREAD_COLOR) + assert len(img.shape) == 3 and img.shape[2] == 3, "The image has wrong shape: {}".format(img.shape) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.asarray(img, dtype=np.float32) + img = img.transpose((2, 0, 1)) + + return img + + def _get_point(self, i): + point = self.examples[i][2].T[self.available_joint_ids].astype(np.float32) + return point[:, ::-1] # (x, y) -> (y, x) + diff --git a/datasets/lsp_dataset.py b/deeppose/datasets/lsp_dataset.py similarity index 100% rename from datasets/lsp_dataset.py rename to deeppose/datasets/lsp_dataset.py diff --git a/datasets/mpii_dataset.py b/deeppose/datasets/mpii_dataset.py similarity index 100% rename from datasets/mpii_dataset.py rename to deeppose/datasets/mpii_dataset.py diff --git a/scripts/loss.py b/deeppose/functions/l2_loss.py similarity index 88% rename from scripts/loss.py rename to deeppose/functions/l2_loss.py index ffe2b7c2..df21283a 100644 --- a/scripts/loss.py +++ b/deeppose/functions/l2_loss.py @@ -2,18 +2,14 @@ # -*- coding: utf-8 -*- # Copyright (c) 2016 Shunta Saito -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals from chainer import reporter import chainer -class MeanSquaredError(chainer.Function): +class L2Loss(chainer.FunctionNode): - """Mean squared error (a.k.a. Euclidean loss) function. + """L2 loss function. In forward method, it calculates mean squared error between two variables with ignoring all elements that the value of ignore_joints at the same diff --git a/deeppose/models/AlexNet.py b/deeppose/models/AlexNet.py new file mode 100644 index 00000000..bcf7505a --- /dev/null +++ b/deeppose/models/AlexNet.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Shunta Saito + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import chainer +import chainer.functions as F +import chainer.links as L + + +class AlexNet(chainer.Chain): + + """Single-GPU AlexNet without partition toward the channel axis.""" + + insize = 227 + + def __init__(self): + super(Alex, self).__init__() + with self.init_scope(): + self.conv1 = L.Convolution2D(None, 96, 11, stride=4) + self.conv2 = L.Convolution2D(None, 256, 5, pad=2) + self.conv3 = L.Convolution2D(None, 384, 3, pad=1) + self.conv4 = L.Convolution2D(None, 384, 3, pad=1) + self.conv5 = L.Convolution2D(None, 256, 3, pad=1) + self.fc6 = L.Linear(None, 4096) + self.fc7 = L.Linear(None, 4096) + self.fc8 = L.Linear(None, 1000) + + def forward(self, x, t): + h = F.max_pooling_2d(F.local_response_normalization( + F.relu(self.conv1(x))), 3, stride=2) + h = F.max_pooling_2d(F.local_response_normalization( + F.relu(self.conv2(h))), 3, stride=2) + h = F.relu(self.conv3(h)) + h = F.relu(self.conv4(h)) + h = F.max_pooling_2d(F.relu(self.conv5(h)), 3, stride=2) + h = F.dropout(F.relu(self.fc6(h))) + h = F.dropout(F.relu(self.fc7(h))) + h = self.fc8(h) + + loss = F.softmax_cross_entropy(h, t) + chainer.report({'loss': loss, 'accuracy': F.accuracy(h, t)}, self) + return loss \ No newline at end of file diff --git a/deeppose/models/__init__.py b/deeppose/models/__init__.py new file mode 100644 index 00000000..9bba3727 --- /dev/null +++ b/deeppose/models/__init__.py @@ -0,0 +1 @@ +from deeppose.models.alexnet import AlexNet \ No newline at end of file diff --git a/deeppose/models/alexnet.py b/deeppose/models/alexnet.py new file mode 100644 index 00000000..cb73eccc --- /dev/null +++ b/deeppose/models/alexnet.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Shunta Saito + +import chainer +import chainer.functions as F +import chainer.links as L +from chainercv.links import Conv2DActiv +from chainercv.links import PickableSequentialChain + + +class AlexNet(PickableSequentialChain): + + """Single-GPU AlexNet without partition toward the channel axis.""" + + insize = 220 + + def __init__(self, n_class=22): + super().__init__() + with self.init_scope(): + self.conv1 = Conv2DActiv(None, 96, 11, 4) + self.lrn1 = _local_responce_normalization + self.pool1 = _max_pooling_2d + + self.conv2 = Conv2DActiv(None, 256, 5, 2) + self.lrn2 = _local_responce_normalization + self.pool2 = _max_pooling_2d + + self.conv3 = Conv2DActiv(None, 384, 3, pad=1) + self.conv4 = Conv2DActiv(None, 384, 3, pad=1) + self.conv5 = Conv2DActiv(None, 256, 3, pad=1) + self.pool5 = _max_pooling_2d + + self.fc6 = L.Linear(None, 4096) + self.dropout1 = _dropout + self.fc7 = L.Linear(None, 4096) + self.dropout2 = _dropout + self.fc8 = L.Linear(None, n_class) + + +def _max_pooling_2d(x): + return F.max_pooling_2d(x, ksize=3, stride=2) + + +def _local_responce_normalization(x): + return F.local_response_normalization(x) + + +def _dropout(x): + return F.dropout(x, ratio=0.6) \ No newline at end of file diff --git a/deeppose/models/deeppose.py b/deeppose/models/deeppose.py new file mode 100644 index 00000000..2560876d --- /dev/null +++ b/deeppose/models/deeppose.py @@ -0,0 +1,17 @@ +import chainer +import chainer.links as L +import numpy as np +from deeppose import models + + +class DeepPose(chainer.Chain): + + def __init__(self, extractor, n_point=22): + super().__init__() + with self.init_scope(): + self.extractor = extractor + self.fc = L.Linear(None, n_point) + + def forward(self, x): + feat = self.extractor(x) + return self.fc(feat) diff --git a/deeppose/utils/__init__.py b/deeppose/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/deeppose/utils/common.py b/deeppose/utils/common.py new file mode 100644 index 00000000..000f9609 --- /dev/null +++ b/deeppose/utils/common.py @@ -0,0 +1,43 @@ +import numpy as np +from chainercv import transforms + + +def crop_with_joints(img, point, scale_h=1.5, sacle_w=1.2): + min_y, min_x = point.min(axis=0) + max_y, max_x = point.max(axis=0) + + width = max_x - min_x + height = max_y - min_y + + new_width = width * sacle_w + new_height = height * scale_h + + center_x = (max_x + min_x) / 2 + center_y = (max_y + min_y) / 2 + + _, img_height, img_width = img.shape + new_min_x = int(np.clip(center_x - new_width / 2, 0, img_width)) + new_max_x = int(np.clip(new_min_x + new_width, 0, img_width)) + new_min_y = int(np.clip(center_y - new_height / 2, 0, img_height)) + new_max_y = int(np.clip(new_min_y + new_height, 0, img_width)) + + crop = img[:, new_min_y:new_max_y, new_min_x:new_max_x] + point = point - np.array([new_min_y, new_min_x]) + + return crop, point + + +def to_square(img, point, size=(220, 220)): + in_size = img.shape[1:] # (H, W) + img = transforms.resize(img, size) + point = transforms.resize_point([point], in_size, size) + + return img, point[0] + + +def lr_flip(img, point): + _, height, width = img.shape + img = transforms.flip(img, x_flip=True) + point = transforms.flip_point([point], (height, width), x_flip=True)[0] + + return img, point diff --git a/deeppose/utils/flic_utils.py b/deeppose/utils/flic_utils.py new file mode 100644 index 00000000..9b0ab417 --- /dev/null +++ b/deeppose/utils/flic_utils.py @@ -0,0 +1,82 @@ +import cv2 +import numpy as np + + +flic_available_joints = [ + 'L_Shoulder', + 'L_Elbow', + 'L_Wrist', + 'R_Shoulder', + 'R_Elbow', + 'R_Wrist', + 'L_Hip', + 'R_Hip', + 'L_Eye', + 'R_Eye', + 'Nose', +] + +flic_joint_names = [ + # Body + 'L_Shoulder', + 'L_Elbow', + 'L_Wrist', + 'R_Shoulder', + 'R_Elbow', + 'R_Wrist', + 'L_Hip', + 'L_Knee', + 'L_Ankle', + 'R_Hip', + 'R_Knee', + 'R_Ankle', + # Face + 'L_Eye', + 'R_Eye', + 'L_Ear', + 'R_Ear', + 'Nose', + # ? + 'M_Shoulder', + 'M_Hip', + 'M_Ear', + 'M_Torso', + 'M_LUpperArm', + 'M_RUpperArm', + 'M_LLowerArm', + 'M_RLowerArm', + 'M_LUpperLeg', + 'M_RUpperLeg', + 'M_LLowerLeg', + 'M_RLowerLeg', +] + +flic_joint_pairs = [ + (0, 1), + (1, 2), + (3, 4), + (4, 5), + (6, 0), + (7, 3), + (8, 10), + (9, 10), + (0, 3), + (6, 7) +] + + +def draw_joints(img, point): + img = img.transpose(1, 2, 0) # (C, H, W) -> (H, W, C) + img = img[:, :, ::-1] # RGB -> BGR + img = img.astype(np.uint8) + + for start_i, end_i in flic_joint_pairs: + st = tuple(int(v) for v in point[start_i, ::-1]) + en = tuple(int(v) for v in point[end_i, ::-1]) + cv2.line(img, st, en, (0, 0, 255), 2, cv2.LINE_AA) + + for y, x in point: + cv2.circle(img, (int(x), int(y)), 3, (0, 0, 0), -1, cv2.LINE_AA) + cv2.circle(img, (int(x), int(y)), 2, (255, 255, 255), -1, cv2.LINE_AA) + + return img[:, :, ::-1] diff --git a/models/AlexNet.py b/models/AlexNet.py deleted file mode 100644 index 6f447207..00000000 --- a/models/AlexNet.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import chainer -import chainer.functions as F -import chainer.links as L - - -class AlexNet(chainer.Chain): - - def __init__(self, n_joints): - super(AlexNet, self).__init__() - with self.init_scope(): - self.conv1 = L.Convolution2D(3, 96, 11, stride=4, pad=1) - self.conv2 = L.Convolution2D(96, 256, 5, stride=1, pad=2) - self.conv3 = L.Convolution2D(256, 384, 3, stride=1, pad=1) - self.conv4 = L.Convolution2D(384, 384, 3, stride=1, pad=1) - self.conv5 = L.Convolution2D(384, 256, 3, stride=1, pad=1) - self.fc6 = L.Linear(9216, 4096) - self.fc7 = L.Linear(4096, 4096) - self.fc8 = L.Linear(4096, n_joints * 2) - - def __call__(self, x): - h = F.relu(self.conv1(x)) - h = F.max_pooling_2d(h, 3, stride=2) - h = F.local_response_normalization(h) - - h = F.relu(self.conv2(h)) - h = F.max_pooling_2d(h, 3, stride=2) - h = F.local_response_normalization(h) - - h = F.relu(self.conv3(h)) - h = F.relu(self.conv4(h)) - h = F.relu(self.conv5(h)) - h = F.max_pooling_2d(h, 3, stride=2) - - h = F.dropout(F.relu(self.fc6(h)), ratio=0.6) - h = F.dropout(F.relu(self.fc7(h)), ratio=0.6) - - return self.fc8(h) diff --git a/models/ResNet50.py b/models/ResNet50.py deleted file mode 100644 index 8a4dfac7..00000000 --- a/models/ResNet50.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import chainer -import chainer.functions as F -import chainer.links as L -import math -import numpy as np - - -class BottleNeckA(chainer.Chain): - - def __init__(self, in_size, ch, out_size, stride=2): - w = math.sqrt(2) - super(BottleNeckA, self).__init__( - conv1=L.Convolution2D(in_size, ch, 1, stride, 0, w, nobias=True), - bn1=L.BatchNormalization(ch), - conv2=L.Convolution2D(ch, ch, 3, 1, 1, w, nobias=True), - bn2=L.BatchNormalization(ch), - conv3=L.Convolution2D(ch, out_size, 1, 1, 0, w, nobias=True), - bn3=L.BatchNormalization(out_size), - conv4=L.Convolution2D( - in_size, out_size, 1, stride, 0, w, nobias=True), - bn4=L.BatchNormalization(out_size), - ) - - def __call__(self, x, train): - h1 = F.relu(self.bn1(self.conv1(x), test=not train)) - h1 = F.relu(self.bn2(self.conv2(h1), test=not train)) - h1 = self.bn3(self.conv3(h1), test=not train) - h2 = self.bn4(self.conv4(x), test=not train) - - return F.relu(h1 + h2) - - -class BottleNeckB(chainer.Chain): - - def __init__(self, in_size, ch): - w = math.sqrt(2) - super(BottleNeckB, self).__init__( - conv1=L.Convolution2D(in_size, ch, 1, 1, 0, w, nobias=True), - bn1=L.BatchNormalization(ch), - conv2=L.Convolution2D(ch, ch, 3, 1, 1, w, nobias=True), - bn2=L.BatchNormalization(ch), - conv3=L.Convolution2D(ch, in_size, 1, 1, 0, w, nobias=True), - bn3=L.BatchNormalization(in_size), - ) - - def __call__(self, x, train): - h = F.relu(self.bn1(self.conv1(x), test=not train)) - h = F.relu(self.bn2(self.conv2(h), test=not train)) - h = self.bn3(self.conv3(h), test=not train) - - return F.relu(h + x) - - -class Block(chainer.Chain): - - def __init__(self, layer, in_size, ch, out_size, stride=2): - super(Block, self).__init__() - links = [('a', BottleNeckA(in_size, ch, out_size, stride))] - for i in range(layer - 1): - links += [('b_{}'.format(i + 1), BottleNeckB(out_size, ch))] - - for link in links: - self.add_link(*link) - self.forward = links - - def __call__(self, x, train): - for name, _ in self.forward: - f = getattr(self, name) - x = f(x, train) - - return x - - -class ResNet50(chainer.Chain): - - def __init__(self, n_joints): - self.train = True - w = math.sqrt(2) - super(ResNet50, self).__init__() - links = [('conv1', L.Convolution2D(3, 64, 7, 2, 3, w, nobias=True))] - links += [('bn1', L.BatchNormalization(64))] - links += [('res2', Block(3, 64, 64, 256, 1))] - links += [('res3', Block(4, 256, 128, 512))] - links += [('res4', Block(6, 512, 256, 1024))] - links += [('res5', Block(3, 1024, 512, 2048))] - links += [('out_fc', L.Linear(None, n_joints * 2))] - for link in links: - self.add_link(*link) - - def __call__(self, x): - h = self.bn1(self.conv1(x), test=not self.train) - h = F.max_pooling_2d(F.relu(h), 3, stride=2) - h = self.res2(h, self.train) - h = self.res3(h, self.train) - h = self.res4(h, self.train) - h = self.res5(h, self.train) - h = F.average_pooling_2d(h, h.data.shape[2], stride=1) - return self.out_fc(h) diff --git a/models/VGG_flic.py b/models/VGG_flic.py deleted file mode 100644 index 1a5f00d8..00000000 --- a/models/VGG_flic.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import chainer -import chainer.functions as F -import chainer.links as L -import numpy as np - - -class VGG_BN(chainer.Chain): - - def __init__(self, n_joints): - self.train = True - - super(VGG_BN, self).__init__() - links = [('conv1_1', L.Convolution2D(3, 64, 3, stride=1, pad=1))] - links += [('bn1_1', L.BatchNormalization(64))] - links += [('conv1_2', L.Convolution2D(64, 64, 3, stride=1, pad=1))] - links += [('bn1_2', L.BatchNormalization(64))] - - links += [('conv2_1', L.Convolution2D(64, 128, 3, stride=1, pad=1))] - links += [('bn2_1', L.BatchNormalization(128))] - links += [('conv2_2', L.Convolution2D(128, 128, 3, stride=1, pad=1))] - links += [('bn2_2', L.BatchNormalization(128))] - - links += [('conv3_1', L.Convolution2D(128, 256, 3, stride=1, pad=1))] - links += [('bn3_1', L.BatchNormalization(256))] - links += [('conv3_2', L.Convolution2D(256, 256, 3, stride=1, pad=1))] - links += [('bn3_2', L.BatchNormalization(256))] - links += [('conv3_3', L.Convolution2D(256, 256, 3, stride=1, pad=1))] - links += [('bn3_3', L.BatchNormalization(256))] - - links += [('conv4_1', L.Convolution2D(256, 512, 3, stride=1, pad=1))] - links += [('bn4_1', L.BatchNormalization(512))] - links += [('conv4_2', L.Convolution2D(512, 512, 3, stride=1, pad=1))] - links += [('bn4_2', L.BatchNormalization(512))] - links += [('conv4_3', L.Convolution2D(512, 512, 3, stride=1, pad=1))] - links += [('bn4_3', L.BatchNormalization(512))] - - links += [('conv5_1', L.Convolution2D(512, 512, 3, stride=1, pad=1))] - links += [('bn5_1', L.BatchNormalization(512))] - links += [('conv5_2', L.Convolution2D(512, 512, 3, stride=1, pad=1))] - links += [('bn5_2', L.BatchNormalization(512))] - links += [('conv5_3', L.Convolution2D(512, 512, 3, stride=1, pad=1))] - links += [('bn5_3', L.BatchNormalization(512))] - links += [('fc6', L.Linear(None, 4096))] - links += [('fc7', L.Linear(4096, 4096))] - links += [('fc8', L.Linear(4096, out_size ** 2))] - for link in links: - self.add_link(*link) - - def __call__(self, x): - h = F.relu(self.bn1_1(self.conv1_1(x), test=not self.train)) - h = F.relu(self.bn1_2(self.conv1_2(h), test=not self.train)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.bn2_1(self.conv2_1(h), test=not self.train)) - h = F.relu(self.bn2_2(self.conv2_2(h), test=not self.train)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.bn3_1(self.conv3_1(h), test=not self.train)) - h = F.relu(self.bn3_2(self.conv3_2(h), test=not self.train)) - h = F.relu(self.bn3_3(self.conv3_3(h), test=not self.train)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.bn4_1(self.conv4_1(h), test=not self.train)) - h = F.relu(self.bn4_2(self.conv4_2(h), test=not self.train)) - h = F.relu(self.bn4_3(self.conv4_3(h), test=not self.train)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.bn5_1(self.conv5_1(h), test=not self.train)) - h = F.relu(self.bn5_2(self.conv5_2(h), test=not self.train)) - h = F.relu(self.bn5_3(self.conv5_3(h), test=not self.train)) - h = F.max_pooling_2d(h, 2, stride=2) - - h = F.relu(self.fc6(h)) - h = F.relu(self.fc7(h)) - return self.fc8(h) diff --git a/scripts/cmd_options.py b/scripts/cmd_options.py deleted file mode 100644 index fd83e5dc..00000000 --- a/scripts/cmd_options.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import argparse - - -def get_arguments(): - parser = argparse.ArgumentParser() - - # Basic parameters - parser.add_argument('--epoch', type=int, default=100) - parser.add_argument('--batchsize', type=int, default=2) - parser.add_argument('--snapshot', type=int, default=10) - parser.add_argument('--channel', type=int, default=3) - parser.add_argument('--test_freq', type=int, default=10) - parser.add_argument('--seed', type=int, default=1701) - parser.add_argument('--ignore_label', type=float, default=-1) - parser.add_argument( - '--model', type=str, default='models/AlexNet_flic.py', - help='Model definition file in models dir') - parser.add_argument( - '--gpus', type=str, default='0', - help='Comma-separated GPU IDs (e.g., "0,1,2")') - parser.add_argument( - '--train_csv_fn', type=str, - default='data/FLIC-full/train_joints.csv') - parser.add_argument( - '--test_csv_fn', type=str, - default='data/FLIC-full/test_joints.csv') - parser.add_argument( - '--img_dir', type=str, - default='data/FLIC-full/images') - parser.add_argument( - '--valid_freq', type=int, default=5, - help='Perform test every this epoch (0 means no test)') - parser.add_argument( - '--show_log_iter', type=int, default=10, - help='Show loss value per this iterations') - - # Data argumentation settings - parser.add_argument( - '--im_size', type=int, default=220, - help='Resize input image into this big') - parser.add_argument( - '--fliplr', action='store_true', default=False, - help=('Flip image\'s left and right for data augmentation')) - parser.add_argument( - '--rotate', action='store_true', default=False, - help=('Randomly rotate images for data augmentation')) - parser.add_argument( - '--rotate_range', type=int, default=10, - help=('The max angle(degree) of rotation for data augmentation')) - parser.add_argument( - '--zoom', action='store_true', default=False, - help=('Randomly zoom out/in images for data augmentation')) - parser.add_argument( - '--base_zoom', type=float, default=1.5, - help=('How big is the input image region comapred to bbox of joints')) - parser.add_argument( - '--zoom_range', type=float, default=0.2, - help=('The max zooming amount for data augmentation')) - parser.add_argument( - '--translate', action='store_true', default=False, - help=('Randomly translate images for data augmentation')) - parser.add_argument( - '--translate_range', type=int, default=5, - help=('The max size of random translation for data augmentation')) - parser.add_argument( - '--min_dim', type=int, default=0, - help='Minimum dimension of a person') - parser.add_argument( - '--coord_normalize', action='store_true', default=False, - help=('Perform normalization to all joint coordinates')) - parser.add_argument( - '--gcn', action='store_true', default=False, - help=('Perform global contrast normalization for each input image')) - - # Data configuration - parser.add_argument('--n_joints', type=int, default=7) - parser.add_argument( - '--fname_index', type=int, default=0, - help='the index of image file name in a csv line') - parser.add_argument( - '--joint_index', type=int, default=1, - help='the start index of joint values in a csv line') - parser.add_argument( - '--symmetric_joints', type=str, default='[[2, 4], [1, 5], [0, 6]]', - help='Symmetric joint ids in JSON format') - # flic_swap_joints = [(2, 4), (1, 5), (0, 6)] - # lsp_swap_joints = [(8, 9), (7, 10), (6, 11), (2, 3), (1, 4), (0, 5)] - # mpii_swap_joints = [(12, 13), (11, 14), (10, 15), (2, 3), (1, 4), (0, 5)] - - # Optimization settings - parser.add_argument( - '--opt', type=str, default='Adam', - choices=['MomentumSGD', 'Adam', 'AdaGrad', 'RMSprop'], - help='Optimization method') - parser.add_argument('--weight_decay', type=float, default=0.0005) - parser.add_argument('--adam_alpha', type=float, default=0.001) - parser.add_argument('--adam_beta1', type=float, default=0.9) - parser.add_argument('--adam_beta2', type=float, default=0.999) - parser.add_argument('--adam_eps', type=float, default=1e-8) - parser.add_argument('--lr', type=float, default=0.01) - parser.add_argument( - '--lr_decay_freq', type=int, default=10, - help='The learning rate will be decreased every this epoch') - parser.add_argument( - '--lr_decay_ratio', type=float, default=0.1, - help='When the learning rate is decreased, this number will be' - 'multiplied') - - # Resuming - parser.add_argument( - '--resume_model', type=str, default=None, - help='Load model definition file to use for resuming training') - parser.add_argument( - '--resume_param', type=str, default=None, - help='Load learnt model parameters from this file (it\'s necessary' - 'when you resume a training)') - parser.add_argument( - '--resume_opt', type=str, default=None, - help='Load optimization states from this file (it\'s necessary' - 'when you resume a training)') - - args = parser.parse_args() - args.epoch += 1 - - return args diff --git a/shells/create_anime.sh b/scripts/create_anime.sh similarity index 100% rename from shells/create_anime.sh rename to scripts/create_anime.sh diff --git a/scripts/dataset.py b/scripts/dataset.py deleted file mode 100644 index f3971954..00000000 --- a/scripts/dataset.py +++ /dev/null @@ -1,189 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from chainer.dataset import dataset_mixin -from skimage import transform - -import csv -import cv2 as cv -import json -import logging -import numpy as np -import os - - -class PoseDataset(dataset_mixin.DatasetMixin): - - def __init__(self, csv_fn, img_dir, im_size, fliplr, rotate, rotate_range, - zoom, base_zoom, zoom_range, translate, translate_range, - min_dim, coord_normalize, gcn, joint_num, fname_index, - joint_index, symmetric_joints, ignore_label): - for key, val in locals().items(): - setattr(self, key, val) - self.symmetric_joints = json.loads(symmetric_joints) - self.load_images() - logging.info('{} is ready'.format(csv_fn)) - - def get_available_joints(self, joints, ignore_joints): - _joints = [] - for i, joint in enumerate(joints): - if ignore_joints is not None \ - and (ignore_joints[i][0] == 0 or ignore_joints[i][1] == 0): - continue - _joints.append(joint) - return np.array(_joints) - - def calc_joint_center(self, joints): - x_center = (np.min(joints[:, 0]) + np.max(joints[:, 0])) / 2 - y_center = (np.min(joints[:, 1]) + np.max(joints[:, 1])) / 2 - return [x_center, y_center] - - def calc_joint_bbox_size(self, joints): - lt = np.min(joints, axis=0) - rb = np.max(joints, axis=0) - return rb[0] - lt[0], rb[1] - lt[1] - - def load_images(self): - self.images = {} - self.joints = [] - self.info = [] - for line in csv.reader(open(self.csv_fn)): - image_id = line[self.fname_index] - if image_id in self.images: - image = self.images[image_id] - else: - img_fn = '{}/{}'.format(self.img_dir, image_id) - assert os.path.exists(img_fn), \ - 'File not found: {}'.format(img_fn) - image = cv.imread(img_fn) - self.images[image_id] = image - - coords = [float(c) for c in line[self.joint_index:]] - joints = np.array(list(zip(coords[0::2], coords[1::2]))) - - # Ignore small label regions smaller than min_dim - ig = [0 if v == self.ignore_label else 1 for v in joints.flatten()] - ig = np.array(list(zip(ig[0::2], ig[1::2]))) - available_joints = self.get_available_joints(joints, ig) - bbox_w, bbox_h = self.calc_joint_bbox_size(available_joints) - if bbox_w < self.min_dim or bbox_h < self.min_dim: - continue - - self.joints.append((image_id, joints)) - center_x, center_y = self.calc_joint_center(available_joints) - self.info.append((ig, bbox_w, bbox_h, center_x, center_y)) - - def __len__(self): - return len(self.joints) - - def apply_fliplr(self, image, joints): - image = cv.flip(image, 1) - joints[:, 0] = (image.shape[1] - 1) - joints[:, 0] - for i, j in self.symmetric_joints: - joints[i], joints[j] = joints[j].copy(), joints[i].copy() - return image, joints - - def apply_zoom(self, image, joints, center_x, center_y, fx=None, fy=None): - joint_vecs = joints - np.array([center_x, center_y]) - if fx is None and fy is None: - zoom = 1.0 + np.random.uniform(-self.zoom_range, self.zoom_range) - fx, fy = zoom, zoom - image = cv.resize(image, None, fx=fx, fy=fy) - joint_vecs *= np.array([fx, fy]) - center_x, center_y = center_x * fx, center_y * fy - joints = joint_vecs + np.array([center_x, center_y]) - return image, joints, center_x, center_y - - def apply_translate(self, image, joints): - dx = np.random.randint(-self.translate_range, self.translate_range) - dy = np.random.randint(-self.translate_range, self.translate_range) - if dx > 0: - tmp = np.zeros_like(image) - tmp[:, dx:] = image[:, :image.shape[1] - dx] - image = tmp - else: - tmp = np.zeros_like(image) - tmp[:, :image.shape[1] + dx] = image[:, -dx:] - image = tmp - if dy > 0: - tmp = np.zeros_like(image) - tmp[dy:, :] = image[:image.shape[0] - dy, :] - image = tmp - else: - tmp = np.zeros_like(image) - tmp[:image.shape[0] + dy, :] = image[-dy:, :] - image = tmp - joints += np.array([dx, dy]) - return image, joints - - def apply_rotate(self, image, joints, ignore_joints): - available_joints = self.get_available_joints(joints, ignore_joints) - joint_center = self.calc_joint_center(available_joints) - angle = np.random.randint(0, self.rotate_range) - image = transform.rotate(image, angle, center=joint_center) - image = (image * 255).astype(np.uint8) - theta = -np.radians(angle) - c, s = np.cos(theta), np.sin(theta) - rot_mat = np.matrix([[c, -s], [s, c]]) - joints = rot_mat.dot((joints - joint_center).T).T + joint_center - return image, np.array(joints.tolist()) - - def crop_reshape(self, image, joints, bbox_w, bbox_h, center_x, center_y): - bbox_h, bbox_w = bbox_h * self.base_zoom, bbox_w * self.base_zoom - y_min = int(np.clip(center_y - bbox_h / 2, 0, image.shape[0])) - y_max = int(np.clip(center_y + bbox_h / 2, 0, image.shape[0])) - x_min = int(np.clip(center_x - bbox_w / 2, 0, image.shape[1])) - x_max = int(np.clip(center_x + bbox_w / 2, 0, image.shape[1])) - image = image[y_min:y_max, x_min:x_max] - joints -= np.array([x_min, y_min]) - fx, fy = self.im_size / image.shape[1], self.im_size / image.shape[0] - cx, cy = image.shape[1] // 2, image.shape[0] // 2 - image, joints = self.apply_zoom(image, joints, cx, cy, fx, fy)[:2] - return image, joints - - def apply_coord_normalize(self, image, joints): - h, w = image.shape[:2] - center_x, center_y = w // 2, h // 2 - joints -= np.array([center_x, center_y]) - joints[:, 0] /= w - joints[:, 1] /= h - return image, joints - - def apply_gcn(self, image, joints): - image = image.astype(np.float) - image -= image.reshape(-1, 3).mean(axis=0) - image /= image.reshape(-1, 3).std(axis=0) + 1e-5 - return image, joints - - def get_example(self, i): - img_id, joints = self.joints[i] - image = self.images[img_id] - ignore_joints, bbox_w, bbox_h, cx, cy = self.info[i] - - if self.rotate: - image, joints = self.apply_rotate(image, joints, ignore_joints) - if self.translate: - image, joitns = self.apply_translate(image, joints) - if self.zoom: - image, joints, cx, cy = self.apply_zoom(image, joints, cx, cy) - - image, joints = self.crop_reshape( - image, joints, bbox_w, bbox_h, cx, cy) - - if self.fliplr and np.random.randint(0, 2) == 1: - image, joints = self.apply_fliplr(image, joints) - if self.coord_normalize: - image, joints = self.apply_coord_normalize(image, joints) - if self.gcn: - image, joints = self.apply_gcn(image, joints) - - image = image.astype(np.float32).transpose(2, 0, 1) - joints = joints.astype(np.float32).flatten() - ignore_joints = np.array(ignore_joints, dtype=np.int32).flatten() - - return image, joints, ignore_joints diff --git a/scripts/download.sh b/scripts/download.sh new file mode 100644 index 00000000..1d2c9f74 --- /dev/null +++ b/scripts/download.sh @@ -0,0 +1,22 @@ +#! /bin/bash +# Copyright (c) 2016 Shunta Saito + +if [ ! -d data ]; then + mkdir data +fi +cd data + +# get FLIC-full dataset and FLIC-Plus annotations +if [ ! -f FLIC-full.zip ]; then + wget http://vision.grasp.upenn.edu/video/FLIC-full.zip +fi + +if [ ! -f tr_plus_indices.mat ]; then + wget +fi + +# Get LSP Extended Training Dataset +if [ ! -d lspet_dataset.zip ]; then + wget http://sam.johnson.io/research/lspet_dataset.zip +fi + diff --git a/scripts/draw_loss.py b/scripts/draw_loss.py deleted file mode 100644 index bae2e286..00000000 --- a/scripts/draw_loss.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import argparse -import numpy as np -import re -import sys - -if sys.platform in ['linux', 'linux2']: - import matplotlib - matplotlib.use('Agg') - import matplotlib.pyplot as plt -else: - import matplotlib - import matplotlib.pyplot as plt - - -def draw_loss_curve(logfile, outfile): - try: - train_loss = [] - test_loss = [] - for line in open(logfile): - line = line.strip() - if 'epoch:' not in line: - continue - epoch = int(re.search('epoch:\s*([0-9]+)', line).groups()[0]) - if 'training' in line and 'inf' not in line: - print(line) - tr_l = float(re.search('loss:\s*([0-9\.]+)', line).groups()[0]) - train_loss.append([epoch, tr_l]) - if 'test' in line and 'inf' not in line: - te_l = float(re.search('loss:\s*([0-9\.]+)', line).groups()[0]) - test_loss.append([epoch, te_l]) - - train_loss = np.asarray(train_loss)[1:] - test_loss = np.asarray(test_loss)[1:] - - if not len(train_loss) > 1: - return - - print(train_loss) - print(test_loss) - - plt.clf() - fig, ax1 = plt.subplots() - ax1.plot(train_loss[:, 0], train_loss[:, 1], - label='training loss', c='r') - ax1.set_xlim([2, len(train_loss)]) - ax1.set_xlabel('epoch') - ax1.set_ylabel('training loss') - ax1.legend(bbox_to_anchor=(0.25, -0.1), loc=9) - - if len(test_loss) > 1: - ax2 = ax1.twinx() - ax2.plot(test_loss[:, 0], test_loss[:, 1], label='test loss', - c='b') - ax2.set_ylabel('test loss') - - ax2.legend(bbox_to_anchor=(0.75, -0.1), loc=9) - # ax2.set_ylim(ax1.get_ylim()) - - plt.savefig(outfile, bbox_inches='tight') - - except Exception as e: - print(str(type(e)), e, line) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--logfile', type=str, default='log.txt') - parser.add_argument('--outfile', type=str, default='log.png') - args = parser.parse_args() - print(args) - - draw_loss_curve(args.logfile, args.outfile) diff --git a/scripts/evaluate_flic.py b/scripts/evaluate_flic.py deleted file mode 100644 index 59177606..00000000 --- a/scripts/evaluate_flic.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from chainer import cuda -from chainer import serializers -from chainer import Variable -from transform import Transform - -import argparse -import cv2 as cv -import glob -import imp -import numpy as np -import os -import re -import sys - - -def cropping(img, joints, min_dim): - # image cropping - _joints = joints.reshape((len(joints) // 2, 2)) - posi_joints = [(j[0], j[1]) for j in _joints if j[0] > 0 and j[1] > 0] - x, y, w, h = cv.boundingRect(np.asarray([posi_joints])) - if w < min_dim: - w = min_dim - if h < min_dim: - h = min_dim - - # bounding rect extending - x -= (w * 1.5 - w) / 2 - y -= (h * 1.5 - h) / 2 - w *= 1.5 - h *= 1.5 - - # clipping - x, y, w, h = [int(z) for z in [x, y, w, h]] - x = np.clip(x, 0, img.shape[1] - 1) - y = np.clip(y, 0, img.shape[0] - 1) - w = np.clip(w, 1, img.shape[1] - (x + 1)) - h = np.clip(h, 1, img.shape[0] - (y + 1)) - img = img[y:y + h, x:x + w] - - # joint shifting - _joints = np.asarray([(j[0] - x, j[1] - y) for j in _joints]) - joints = _joints.flatten() - - return img, joints - - -def resize(img, joints, size): - orig_h, orig_w = img.shape[:2] - joints[0::2] = joints[0::2] / float(orig_w) * size - joints[1::2] = joints[1::2] / float(orig_h) * size - img = cv.resize(img, (size, size), interpolation=cv.INTER_NEAREST) - - return img, joints - - -def contrast(img): - if not img.dtype == np.float32: - img = img.astype(np.float32) - # global contrast normalization - img -= img.reshape(-1, 3).mean(axis=0) - img -= img.reshape(-1, 3).std(axis=0) + 1e-5 - - return img - - -def input_transform(datum, datadir, fname_index, joint_index, min_dim, gcn): - img_fn = '%s/images/%s' % (datadir, datum[fname_index]) - if not os.path.exists(img_fn): - raise IOError('%s is not exist' % img_fn) - - img = cv.imread(img_fn) - joints = np.asarray([int(float(p)) for p in datum[joint_index:]]) - img, joints = cropping(img, joints, min_dim) - img, joints = resize(img, joints, size) - if gcn: - img = contrast(img) - else: - img /= 255.0 - - return img, joints - - -def load_model(args): - model_fn = os.path.basename(args.model) - model_name = model_fn.split('.')[0] - model = imp.load_source(model_name, args.model) - model = getattr(model, model_name) - model = model(args.joint_num) - serializers.load_npz(args.param, model) - model.train = False - - return model - - -def load_data(trans, args, x): - c = args.channel - s = args.size - d = args.joint_num * 2 - - # data augmentation - input_data = np.zeros((len(x), c, s, s)) - label = np.zeros((len(x), d)) - - for i, line in enumerate(x): - d, t = trans.transform(line.split(','), args.datadir, - args.fname_index, args.joint_index) - input_data[i] = d.transpose((2, 0, 1)) - label[i] = t - - return input_data, label - - -def create_tiled_image(perm, out_dir, result_dir, epoch, suffix, N=25): - fnames = np.array(sorted(glob.glob('%s/*%s.jpg' % (out_dir, suffix)))) - tile_fnames = fnames[perm[:N]] - - h, w, pad = 220, 220, 2 - side = int(np.ceil(np.sqrt(len(tile_fnames)))) - canvas = np.zeros((side * h + pad * (side + 1), - side * w + pad * (side + 1), 3)) - - for i, fname in enumerate(tile_fnames): - img = cv.imread(fname) - x = w * (i % side) + pad * (i % side + 1) - y = h * (i // side) + pad * (i // side + 1) - canvas[y:y + h, x:x + w, :] = img - - if args.resize > 0: - canvas = cv.resize(canvas, (args.resize, args.resize)) - cv.imwrite('%s/test_%d_tiled_%s.jpg' % (result_dir, epoch, suffix), canvas) - - -def test(args): - # test data - test_fn = '%s/test_joints.csv' % args.datadir - test_dl = np.array([l.strip() for l in open(test_fn).readlines()]) - - # load model - if args.gpu >= 0: - cuda.get_device(args.gpu).use() - model = load_model(args) - if args.gpu >= 0: - model.to_gpu(args.gpu) - - # create output dir - epoch = int(re.search('epoch-([0-9]+)', args.param).groups()[0]) - result_dir = os.path.dirname(args.param) - out_dir = '%s/test_%d' % (result_dir, epoch) - if not os.path.exists(out_dir): - os.makedirs(out_dir) - out_log = '%s.log' % out_dir - fp = open(out_log, 'w') - - mean_error = 0.0 - N = len(test_dl) - for i in range(0, N, args.batchsize): - lines = test_dl[i:i + args.batchsize] - input_data, labels = load_data(trans, args, lines) - - if args.gpu >= 0: - input_data = cuda.to_gpu(input_data.astype(np.float32)) - labels = cuda.to_gpu(labels.astype(np.float32)) - else: - input_data = input_data.astype(np.float32) - labels = labels.astype(np.float32) - - x = Variable(input_data, volatile=True) - t = Variable(labels, volatile=True) - model(x, t) - - if args.gpu >= 0: - preds = cuda.to_cpu(model.pred.data) - input_data = cuda.to_cpu(input_data) - labels = cuda.to_cpu(labels) - else: - preds = model.pred.data - - for n, line in enumerate(lines): - img_fn = line.split(',')[args.fname_index] - img = input_data[n].transpose((1, 2, 0)) - pred = preds[n] - img_pred, pred = trans.revert(img, pred) - - # turn label data into image coordinates - label = labels[n] - img_label, label = trans.revert(img, label) - - # calc mean_error - error = np.linalg.norm(pred - label) / len(pred) - mean_error += error - - # create pred, label tuples - img_pred = np.array(img_pred.copy()) - img_label = np.array(img_label.copy()) - pred = [tuple(p) for p in pred] - label = [tuple(p) for p in label] - - # all limbs - img_label = draw_joints( - img_label, label, args.draw_limb, args.text_scale) - img_pred = draw_joints( - img_pred, pred, args.draw_limb, args.text_scale) - - msg = '{:5}/{:5} {}\terror:{}\tmean_error:{}'.format( - i + n, N, img_fn, error, mean_error / (i + n + 1)) - print(msg, file=fp) - print(msg) - - fn, ext = os.path.splitext(img_fn) - tr_fn = '%s/%d-%d_%s_pred%s' % (out_dir, i, n, fn, ext) - la_fn = '%s/%d-%d_%s_label%s' % (out_dir, i, n, fn, ext) - cv.imwrite(tr_fn, img_pred) - cv.imwrite(la_fn, img_label) - - -def tile(args): - # create output dir - epoch = int(re.search('epoch-([0-9]+)', args.param).groups()[0]) - result_dir = os.path.dirname(args.param) - out_dir = '%s/test_%d' % (result_dir, epoch) - if not os.path.exists(out_dir): - raise Exception('%s is not exist' % out_dir) - - # save tiled image of randomly chosen results and labels - n_img = len(glob.glob('%s/*pred*' % (out_dir))) - perm = np.random.permutation(n_img) - create_tiled_image(perm, out_dir, result_dir, epoch, 'pred', args.n_imgs) - create_tiled_image(perm, out_dir, result_dir, epoch, 'label', args.n_imgs) - - -if __name__ == '__main__': - sys.path.append('tests') - sys.path.append('models') - - from test_flic_dataset import draw_joints - - parser = argparse.ArgumentParser() - parser.add_argument('--model', type=str, - help='model definition file in models dir') - parser.add_argument('--param', type=str, - help='trained parameters file in result dir') - parser.add_argument('--batchsize', type=int, default=128) - parser.add_argument('--gpu', type=int, default=0) - parser.add_argument('--datadir', type=str, default='data/FLIC-full') - parser.add_argument('--mode', type=str, default='test', - choices=['test', 'tile'], - help='test or create tiled image') - parser.add_argument('--n_imgs', type=int, default=9, - help='how many images will be tiled') - parser.add_argument('--resize', type=int, default=-1, - help='resize the results of tiling') - parser.add_argument('--seed', type=int, default=9, - help='random seed to select images to be tiled') - parser.add_argument('--draw_limb', type=bool, default=True, - help='whether draw limb line to visualize') - parser.add_argument('--text_scale', type=float, default=1.0, - help='text scale when drawing indices of joints') - args = parser.parse_args() - - result_dir = os.path.dirname(args.param) - log_fn = grep.grep('{}/log.txt'.format(result_dir))[0] - for line in open(log_fn): - if 'Namespace' in line: - args.joint_num = int( - re.search('joint_num=([0-9]+)', line).groups()[0]) - args.fname_index = int( - re.search('fname_index=([0-9]+)', line).groups()[0]) - args.joint_index = int( - re.search('joint_index=([0-9]+)', line).groups()[0]) - break - - if args.mode == 'test': - test(args) - elif args.mode == 'tile': - np.random.seed(args.seed) - tile(args) diff --git a/scripts/logger.py b/scripts/logger.py deleted file mode 100644 index a38feb2a..00000000 --- a/scripts/logger.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from chainer.training import extensions -from chainer.training.extensions import log_report as log_report_module - -import logging -import sys - - -class LogPrinter(extensions.PrintReport): - - def __init__(self, entries, log_report=str('LogReport'), out=sys.stdout): - self._entries = entries - self._log_report = log_report - self._log_len = 0 - - def __call__(self, trainer): - log_report = self._log_report - if isinstance(log_report, str): - log_report = trainer.get_extension(log_report) - elif isinstance(log_report, log_report_module.LogReport): - log_report(trainer) # update the log report - else: - raise TypeError('log report has a wrong type %s' % - type(log_report)) - - log = log_report.log - log_len = self._log_len - while len(log) > log_len: - self._print(log[log_len]) - log_len += 1 - self._log_len = log_len - - def _print(self, observation): - msg = '' - for i, entry in enumerate(self._entries): - if entry in observation: - msg += '{}:{}, '.format(entry, observation[entry]) - logging.info(msg) diff --git a/scripts/train.py b/scripts/train.py index 0e2adf8a..33cf30ab 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,229 +1,162 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from chainer import iterators -from chainer import optimizers -from chainer import serializers + +import argparse +import random + +import chainer +import chainer.functions as F +import chainerx +import numpy as np +from chainer import dataset +from chainercv.chainer_experimental.datasets.sliceable import TransformDataset from chainer import training from chainer.training import extensions +from chainercv import transforms +from deeppose.datasets import flic_dataset +from deeppose.models import alexnet +from deeppose.models import deeppose +from PIL import Image +from deeppose.utils import common +import random -import chainer -import cmd_options -import dataset -import imp -import logger -import logging -import loss -import os -import shutil -import sys -import tempfile -import time - - -def create_result_dir(model_path, resume_model): - if not os.path.exists('results'): - os.mkdir('results') - if resume_model is None: - prefix = '{}_{}'.format( - os.path.splitext(os.path.basename(model_path))[0], - time.strftime('%Y-%m-%d_%H-%M-%S')) - result_dir = tempfile.mkdtemp(prefix=prefix, dir='results') - if not os.path.exists(result_dir): - os.makedirs(result_dir) - else: - result_dir = os.path.dirname(resume_model) - - return result_dir - - -def create_logger(args, result_dir): - logging.basicConfig(filename='{}/log.txt'.format(result_dir)) - root = logging.getLogger() - root.setLevel(logging.DEBUG) - ch = logging.StreamHandler(sys.stdout) - ch.setLevel(logging.DEBUG) - msg_format = '%(asctime)s [%(levelname)s] %(message)s' - formatter = logging.Formatter(msg_format) - ch.setFormatter(formatter) - root.addHandler(ch) - logging.info(sys.version_info) - logging.info('chainer version: {}'.format(chainer.__version__)) - logging.info('cuda: {}, cudnn: {}'.format( - chainer.cuda.available, chainer.cuda.cudnn_enabled)) - logging.info(args) - - -def get_model(model_path, n_joints, result_dir, resume_model): - model_fn = os.path.basename(model_path) - model_name = model_fn.split('.')[0] - model = imp.load_source(model_name, model_path) - model = getattr(model, model_name) - - # Initialize - model = model(n_joints) - - # Copy files - dst = '{}/{}'.format(result_dir, model_fn) - if not os.path.exists(dst): - shutil.copy(model_path, dst) - - # load model - if resume_model is not None: - serializers.load_npz(resume_model, model) - - return model - - -def get_optimizer(model, opt, lr, adam_alpha=None, adam_beta1=None, - adam_beta2=None, adam_eps=None, weight_decay=None, - resume_opt=None): - if opt == 'MomentumSGD': - optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9) - elif opt == 'Adam': - optimizer = optimizers.Adam( - alpha=adam_alpha, beta1=adam_beta1, - beta2=adam_beta2, eps=adam_eps) - elif opt == 'AdaGrad': - optimizer = optimizers.AdaGrad(lr=lr) - elif opt == 'RMSprop': - optimizer = optimizers.RMSprop(lr=lr) - else: - raise Exception('No optimizer is selected') - - # The first model as the master model +class DeepPoseTrainChain(chainer.Chain): + + def __init__(self, n_point=22): + super().__init__() + extractor = alexnet.AlexNet() + extractor.pick = 'dropout2' # Extract output from this layer + extractor.remove_unused() # Remove subsequent layers + with self.init_scope(): + self.model = deeppose.DeepPose(extractor=extractor, n_point=n_point) + + def encode(self, point, height, width): + """Encode joint coordinates into normalized ones for loss calculation.""" + xp = chainer.backend.get_array_module(point) + b, n_point = point.shape[:2] + if point.ndim == 2: + point = F.reshape(point, (b, n_point // 2, 2)) + center = xp.asarray([height / 2, width / 2], dtype=point.dtype) + + return (point - center) / xp.asarray([height, width], dtype=point.dtype) + + def decode(self, point, img_shape): + pass + + def forward(self, x, y): + _, _, height, width = x.shape + assert height == self.model.extractor.insize + assert width == self.model.extractor.insize + + pred = self.model(x) + norm_pred = self.encode(pred, height, width) + norm_y = self.encode(y, height, width) + loss = F.mean_squared_error(norm_pred, norm_y) + + chainer.reporter.report({'loss': loss}, self) + + return loss + + +class TrainTransform(object): + + def __init__(self, insize=220): + self.insize = insize + self.scale_h = 1.5 + self.scale_w = 1.2 + + def __call__(self, x): + img, point = x + + img, point = common.crop_with_joints(img, point, self.scale_h, self.scale_w) + img, point = common.to_square(img, point, (self.insize, self.insize)) + + if random.randint(0, 1) == 1: + img, point = common.lr_flip(img, point) + + return img, point.astype(np.float32) + + +class ValidTransform(object): + + def __init__(self, insize=220): + self.insize = insize + self.scale_h = 1.5 + self.scale_w = 1.2 + + def __call__(self, x): + img, point = x + + img, point = common.crop_with_joints(img, point, self.scale_h, self.scale_w) + img, point = common.to_square(img, point, (self.insize, self.insize)) + + return img, point.astype(np.float32) + + +def main(): + parser = argparse.ArgumentParser(description='Training of DeepPose on the FLIC dataset') + parser.add_argument('--batchsize', '-B', type=int, default=128, help='Learning minibatch size') + parser.add_argument('--epoch', '-E', type=int, default=1000, help='Number of epochs to train') + parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') + parser.add_argument('--out', '-o', default='results', help='Output directory') + parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') + parser.add_argument('--device', '-d', type=str, default='cuda:0', help='GPU ID (negative value indicates CPU)') + parser.add_argument('--dataset-zip-path', '-D', type=str, default='data/FLIC.zip') + parser.add_argument('--loaderjob', '-j', type=int, default=8) + parser.add_argument('--test', action='store_true', default=False) + args = parser.parse_args() + + device = chainer.get_device(args.device) + + # Initialize the model to train + model = DeepPoseTrainChain(n_point=22) + model.to_device(device) + device.use() + + # Load the dataset files + dataset = flic_dataset.FLICDataset(split='train', dataset_zip_path=args.dataset_zip_path) + + # Split dataset into train and valid + np.random.seed(0) + idx = int(np.random.randint(len(dataset) * 0.8)) + train, valid = dataset.slice[:idx], dataset.slice[idx:] + + # Apply data augmentation + train = TransformDataset(train, ('img', 'point'), TrainTransform(model.model.extractor.insize)) + valid = TransformDataset(valid, ('img', 'point'), ValidTransform(model.model.extractor.insize)) + + # These iterators load the images with subprocesses running in parallel to the training/validation. + train_iter = chainer.iterators.MultiprocessIterator( + train, args.batchsize, n_processes=args.loaderjob) + val_iter = chainer.iterators.MultiprocessIterator( + valid, args.val_batchsize, repeat=False, n_processes=args.loaderjob) + + # Set up an optimizer + optimizer = chainer.optimizers.AdaGrad(lr=0.0005) optimizer.setup(model) - if opt == 'MomentumSGD': - optimizer.add_hook( - chainer.optimizer.WeightDecay(weight_decay)) - - if resume_opt is not None: - serializers.load_npz(resume_opt, optimizer) - - return optimizer - - -def transform(args, x_queue, datadir, fname_index, joint_index, o_queue): - trans = Transform(args) - while True: - x = x_queue.get() - if x is None: - break - x, t = trans.transform(x.split(','), datadir, fname_index, joint_index) - o_queue.put((x.transpose((2, 0, 1)), t)) - - -def load_data(args, input_q, minibatch_q): - c = args.channel - s = args.size - d = args.joint_num * 2 - - input_data_base = Array(ctypes.c_float, args.batchsize * c * s * s) - input_data = np.ctypeslib.as_array(input_data_base.get_obj()) - input_data = input_data.reshape((args.batchsize, c, s, s)) - - label_base = Array(ctypes.c_float, args.batchsize * d) - label = np.ctypeslib.as_array(label_base.get_obj()) - label = label.reshape((args.batchsize, d)) - - x_queue, o_queue = Queue(), Queue() - workers = [Process(target=transform, - args=(args, x_queue, args.datadir, args.fname_index, - args.joint_index, o_queue)) - for _ in range(args.batchsize)] - for w in workers: - w.start() - - while True: - x_batch = input_q.get() - if x_batch is None: - break - - # data augmentation - for x in x_batch: - x_queue.put(x) - j = 0 - while j != len(x_batch): - a, b = o_queue.get() - input_data[j] = a - label[j] = b - j += 1 - minibatch_q.put([input_data, label]) - - for _ in range(args.batchsize): - x_queue.put(None) - for w in workers: - w.join() + # Set up a trainer + updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) + trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) + val_interval = ((1, 'iteration') if args.test else (1, 'epoch')) + log_interval = ((1, 'iteration') if args.test else (1, 'epoch')) -if __name__ == '__main__': - args = cmd_options.get_arguments() - result_dir = create_result_dir(args.model, args.resume_model) - create_logger(args, result_dir) - model = get_model(args.model, args.n_joints, result_dir, args.resume_model) - model = loss.PoseEstimationError(model) - opt = get_optimizer(model, args.opt, args.lr, adam_alpha=args.adam_alpha, - adam_beta1=args.adam_beta1, adam_beta2=args.adam_beta2, - adam_eps=args.adam_eps, weight_decay=args.weight_decay, - resume_opt=args.resume_opt) - train_dataset = dataset.PoseDataset( - args.train_csv_fn, args.img_dir, args.im_size, args.fliplr, - args.rotate, args.rotate_range, args.zoom, args.base_zoom, - args.zoom_range, args.translate, args.translate_range, args.min_dim, - args.coord_normalize, args.gcn, args.n_joints, args.fname_index, - args.joint_index, args.symmetric_joints, args.ignore_label - ) - test_dataset = dataset.PoseDataset( - args.test_csv_fn, args.img_dir, args.im_size, args.fliplr, - args.rotate, args.rotate_range, args.zoom, args.base_zoom, - args.zoom_range, args.translate, args.translate_range, args.min_dim, - args.coord_normalize, args.gcn, args.n_joints, args.fname_index, - args.joint_index, args.symmetric_joints, args.ignore_label - ) - - train_iter = iterators.MultiprocessIterator(train_dataset, args.batchsize) - test_iter = iterators.MultiprocessIterator( - test_dataset, args.batchsize, repeat=False, shuffle=False) - - gpus = [int(i) for i in args.gpus.split(',')] - devices = {'main': gpus[0]} - if len(gpus) > 2: - for gid in gpus[1:]: - devices.update({'gpu{}'.format(gid): gid}) - updater = training.ParallelUpdater(train_iter, opt, devices=devices) - - interval = (args.snapshot, 'epoch') - trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=result_dir) - trainer.extend(extensions.dump_graph('main/loss')) - - # Save parameters and optimization state - trainer.extend(extensions.snapshot_object( - model, 'epoch-{.updater.epoch}.model'), trigger=interval) - trainer.extend(extensions.snapshot_object( - opt, 'epoch-{.updater.epoch}.state'), trigger=interval) - trainer.extend(extensions.snapshot(), trigger=interval) - - if args.opt == 'MomentumSGD' or args.opt == 'AdaGrad': - trainer.reporter.add_observer('lr', opt.lr) - trainer.extend(IntervalShift( - 'lr', args.lr, args.lr_decay_freq, args.lr_decay_ratio)) - - # Show log - trainer.extend( - extensions.LogReport(trigger=(args.show_log_iter, 'iteration'))) - trainer.extend(logger.LogPrinter( - ['epoch', 'main/loss', 'validation/main/loss', 'lr'])) - - eval_model = model.copy() - eval_model.predictor.train = False - trainer.extend( - extensions.Evaluator(test_iter, eval_model, device=gpus[0]), - trigger=(args.valid_freq, 'epoch')) + trainer.extend(extensions.Evaluator(val_iter, model, device=device), trigger=val_interval) + trainer.extend(extensions.snapshot(), trigger=val_interval) + trainer.extend(extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=val_interval) + trainer.extend(extensions.LogReport(trigger=log_interval)) + trainer.extend(extensions.observe_lr(), trigger=log_interval) + trainer.extend(extensions.PrintReport( + ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'lr']), trigger=log_interval) + trainer.extend(extensions.ProgressBar(update_interval=10)) + trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch')) + + if args.resume: + chainer.serializers.load_npz(args.resume, trainer) trainer.run() + + +if __name__ == '__main__': + main() diff --git a/shells/train_flic.sh b/shells/train_flic.sh deleted file mode 100644 index d799bf38..00000000 --- a/shells/train_flic.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 Shunta Saito - -CHAINER_TYPE_CHECK=0 \ -python scripts/train.py \ ---model models/AlexNet.py \ ---gpus 8 \ ---epoch 100 \ ---batchsize 128 \ ---snapshot 10 \ ---valid_freq 5 \ ---train_csv_fn data/FLIC-full/train_joints.csv \ ---test_csv_fn data/FLIC-full/test_joints.csv \ ---img_dir data/FLIC-full/images \ ---test_freq 10 \ ---seed 1701 \ ---im_size 220 \ ---fliplr \ ---rotate \ ---rotate_range 10 \ ---zoom \ ---zoom_range 0.2 \ ---translate \ ---translate_range 5 \ ---coord_normalize \ ---gcn \ ---n_joints 7 \ ---fname_index 0 \ ---joint_index 1 \ ---symmetric_joints "[[2, 4], [1, 5], [0, 6]]" \ ---opt Adam diff --git a/shells/train_lsp.sh b/shells/train_lsp.sh deleted file mode 100644 index 3f35c9a1..00000000 --- a/shells/train_lsp.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 Shunta Saito - -CHAINER_TYPE_CHECK=0 \ -python scripts/train.py \ ---model models/AlexNet.py \ ---gpus 7 \ ---epoch 100 \ ---batchsize 128 \ ---snapshot 10 \ ---valid_freq 5 \ ---train_csv_fn data/lspet_dataset/train_joints.csv \ ---test_csv_fn data/lspet_dataset/test_joints.csv \ ---img_dir data/lspet_dataset/images \ ---test_freq 10 \ ---seed 1701 \ ---im_size 220 \ ---fliplr \ ---rotate \ ---rotate_range 10 \ ---zoom \ ---zoom_range 0.2 \ ---translate \ ---translate_range 5 \ ---coord_normalize \ ---gcn \ ---n_joints 14 \ ---fname_index 0 \ ---joint_index 1 \ ---symmetric_joints "[[8, 9], [7, 10], [6, 11], [2, 3], [1, 4], [0, 5]]" \ ---opt Adam diff --git a/shells/train_mpii.sh b/shells/train_mpii.sh deleted file mode 100644 index 418881d3..00000000 --- a/shells/train_mpii.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 Shunta Saito - -CHAINER_TYPE_CHECK=0 \ -python scripts/train.py \ ---model models/AlexNet.py \ ---gpus 6 \ ---epoch 100 \ ---batchsize 128 \ ---snapshot 10 \ ---valid_freq 5 \ ---train_csv_fn data/mpii/train_joints.csv \ ---test_csv_fn data/mpii/test_joints.csv \ ---img_dir data/mpii/images \ ---test_freq 10 \ ---seed 1701 \ ---im_size 220 \ ---fliplr \ ---rotate \ ---rotate_range 10 \ ---zoom \ ---zoom_range 0.2 \ ---translate \ ---translate_range 5 \ ---coord_normalize \ ---gcn \ ---n_joints 14 \ ---fname_index 0 \ ---joint_index 1 \ ---symmetric_joints "[[12, 13], [11, 14], [10, 15], [2, 3], [1, 4], [0, 5]]" \ ---opt Adam \ diff --git a/tests/test_alexnet.py b/tests/test_alexnet.py new file mode 100644 index 00000000..6f8d93fe --- /dev/null +++ b/tests/test_alexnet.py @@ -0,0 +1,28 @@ +import os +import unittest +from deeppose.models import alexnet +import numpy as np +from chainer import computational_graph + +class TestAlexNet(unittest.TestCase): + + def setUp(self): + self.model = alexnet.AlexNet(n_class=22) + self.model.pick = 'fc7' + self.model.remove_unused() + + insize = self.model.insize + self.x = np.random.rand(1, 3, insize, insize).astype(np.float32) + + def test_forward(self): + y = self.model(self.x) + assert y.shape == (1, 4096) + + def test_graph(self): + y = self.model(self.x) + g = computational_graph.build_computational_graph(y) + + outdir = 'data/test_models' + os.makedirs(outdir, exist_ok=True) + with open(os.path.join(outdir, 'alexnet.dot'), 'w') as o: + o.write(g.dump()) \ No newline at end of file diff --git a/tests/test_dataset.py b/tests/test_dataset.py deleted file mode 100644 index 46ba3d42..00000000 --- a/tests/test_dataset.py +++ /dev/null @@ -1,246 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2016 Shunta Saito - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import cv2 as cv -import numpy as np -import os -import sys -import tempfile -import unittest - - -class TestPoseDataset(unittest.TestCase): - - def setUp(self): - np.random.seed(1) - self.joints = np.array([ - [300.20, 220.20], - [280.80, 240.40], - [300.20, 260.60], - [320.30, 240.40] - ]) - self.line = '*,' - self.line += ','.join([str(v) for v in self.joints.flatten().tolist()]) - self.n_test = 2 - - # Prepare for FLIC dataset - fd, self.flic_csv = tempfile.mkstemp() - with os.fdopen(fd, 'w') as f: - lines = open('data/FLIC-full/test_joints.csv').readlines() - np.random.shuffle(lines) - for line in lines[:self.n_test]: - print(line.strip(), file=f) - self.dataset = self.create_dataset(csv_fn=self.flic_csv) - - # Prepare for LSP dataset - fd, self.lsp_csv = tempfile.mkstemp() - with os.fdopen(fd, 'w') as f: - lines = open('data/lspet_dataset/test_joints.csv').readlines() - np.random.shuffle(lines) - for line in lines[:self.n_test]: - print(line.strip(), file=f) - - # Prepare for MPII dataset - fd, self.mpii_csv = tempfile.mkstemp() - with os.fdopen(fd, 'w') as f: - lines = open('data/mpii/test_joints.csv').readlines() - np.random.shuffle(lines) - for line in lines[:self.n_test]: - print(line.strip(), file=f) - - def create_dataset(self, - csv_fn='data/FLIC-full/test_joints.csv', - img_dir='data/FLIC-full/images', - symmetric_joints='[[2, 4], [1, 5], [0, 6]]', - im_size=220, - fliplr=False, - rotate=False, - rotate_range=10, - zoom=False, - base_zoom=1.5, - zoom_range=0.2, - translate=False, - translate_range=5, - min_dim=0, - coord_normalize=False, - gcn=False, - joint_num=7, - fname_index=0, - joint_index=1, - ignore_label=-1): - sys.path.insert(0, 'scripts') - from dataset import PoseDataset - dataset = PoseDataset( - csv_fn, img_dir, im_size, fliplr, rotate, rotate_range, zoom, - base_zoom, zoom_range, translate, translate_range, min_dim, - coord_normalize, gcn, joint_num, fname_index, joint_index, - symmetric_joints, ignore_label - ) - return dataset - - def test_calc_joint_center(self): - center = self.dataset.calc_joint_center(self.joints) - np.testing.assert_array_equal(center, [300.55, 240.4]) - - def test_calc_joint_bbox_size(self): - bbox_w, bbox_h = self.dataset.calc_joint_bbox_size(self.joints) - self.assertEqual(bbox_w, (320.30 - 280.80)) - self.assertEqual(bbox_h, (260.60 - 220.20)) - - def draw_joints(self, image, joints, prefix, ignore_joints): - if image.shape[2] != 3: - _image = image.transpose(1, 2, 0).copy() - else: - _image = image.copy() - if joints.ndim == 1: - joints = np.array(list(zip(joints[0::2], joints[1::2]))) - if ignore_joints.ndim == 1: - ignore_joints = np.array( - list(zip(ignore_joints[0::2], ignore_joints[1::2]))) - for i, (x, y) in enumerate(joints): - if ignore_joints is not None \ - and (ignore_joints[i][0] == 0 or ignore_joints[i][1] == 0): - continue - cv.circle(_image, (int(x), int(y)), 2, (0, 0, 255), -1) - cv.putText( - _image, str(i), (int(x), int(y)), cv.FONT_HERSHEY_SIMPLEX, - 1.0, (255, 255, 255), 3) - cv.putText( - _image, str(i), (int(x), int(y)), cv.FONT_HERSHEY_SIMPLEX, - 1.0, (0, 0, 0), 1) - _, fn_img = tempfile.mkstemp() - basename = os.path.basename(fn_img) - fn_img = fn_img.replace(basename, prefix + basename) - fn_img = fn_img + '.png' - cv.imwrite(fn_img, _image) - - def test_apply_fliplr(self): - for i, (img_id, joints) in enumerate(self.dataset.joints): - image = self.dataset.images[img_id] - ig, bbox_w, bbox_h, center_x, center_y = self.dataset.info[i] - self.draw_joints(image, joints, 'fliplr_{}_before_'.format(i), ig) - image, joints = self.dataset.apply_fliplr(image, joints) - self.draw_joints(image, joints, 'fliplr_{}_after'.format(i), ig) - - def test_apply_zoom(self): - for i, (img_id, joints) in enumerate(self.dataset.joints): - image = self.dataset.images[img_id] - ig, bbox_w, bbox_h, cx, cy = self.dataset.info[i] - self.draw_joints(image, joints, 'zoom_{}_before_'.format(i), ig) - image, joints = self.dataset.apply_zoom(image, joints, cx, cy)[:2] - self.draw_joints(image, joints, 'zoom_{}_after_'.format(i), ig) - - def test_apply_translate(self): - for i, (img_id, joints) in enumerate(self.dataset.joints): - image = self.dataset.images[img_id] - ig, bbox_w, bbox_h, center_x, center_y = self.dataset.info[i] - self.draw_joints(image, joints, 'trans_{}_before_'.format(i), ig) - image, joints = self.dataset.apply_translate(image, joints) - self.draw_joints(image, joints, 'trans_{}_after_'.format(i), ig) - - def test_apply_rotate(self): - for i, (img_id, joints) in enumerate(self.dataset.joints): - image = self.dataset.images[img_id] - ig, bbox_w, bbox_h, center_x, center_y = self.dataset.info[i] - self.draw_joints(image, joints, 'rotate_{}_before_'.format(i), ig) - image, joints = self.dataset.apply_rotate(image, joints, ig) - self.draw_joints(image, joints, 'rotate_{}_after_'.format(i), ig) - - def test_apply_coord_normalize(self): - for image_id, joints in self.dataset.joints: - image = self.dataset.images[image_id] - image, joints = self.dataset.apply_coord_normalize(image, joints) - - def test_apply_gcn(self): - for image_id, joints in self.dataset.joints: - image = self.dataset.images[image_id] - image, joints = self.dataset.apply_gcn(image, joints) - np.testing.assert_allclose( - image.reshape(-1, 3).mean(axis=0), [0, 0, 0], atol=1e-5) - np.testing.assert_allclose( - image.reshape(-1, 3).std(axis=0), [1., 1., 1.], atol=1e-5) - - def test_flic(self): - img_dir = 'data/FLIC-full/images' - symmetric_joints = '[[2, 4], [1, 5], [0, 6]]' - np.random.rand(3) - dataset = self.create_dataset( - self.flic_csv, - img_dir=img_dir, - symmetric_joints=symmetric_joints, - fliplr=True, - rotate=True, - rotate_range=10, - zoom=True, - base_zoom=1.5, - zoom_range=0.2, - translate=True, - translate_range=5, - coord_normalize=False, - gcn=False, - ) - self.assertEqual(len(dataset), self.n_test) - for i in range(len(dataset)): - image, joints, ignore_joints = dataset.get_example(i) - image = image.astype(np.uint8) - self.draw_joints( - image, joints, 'flic_{}_'.format(i), ignore_joints) - - def test_lsp(self): - img_dir = 'data/lspet_dataset/images' - symmetric_joints = '[[8, 9], [7, 10], [6, 11], [2, 3], [1, 4], [0, 5]]' - np.random.rand(3) - dataset = self.create_dataset( - self.lsp_csv, - img_dir=img_dir, - symmetric_joints=symmetric_joints, - fliplr=True, - rotate=True, - rotate_range=10, - zoom=True, - base_zoom=1.5, - zoom_range=0.2, - translate=True, - translate_range=5, - coord_normalize=False, - gcn=False, - ) - self.assertEqual(len(dataset), self.n_test) - for i in range(len(dataset)): - image, joints, ignore_joints = dataset.get_example(i) - image = image.astype(np.uint8) - self.draw_joints( - image, joints, 'lsp_{}_'.format(i), ignore_joints) - - def test_mpii(self): - img_dir = 'data/mpii/images' - symmetric_joints = \ - '[[12, 13], [11, 14], [10, 15], [2, 3], [1, 4], [0, 5]]' - np.random.rand(3) - dataset = self.create_dataset( - self.mpii_csv, - img_dir=img_dir, - symmetric_joints=symmetric_joints, - fliplr=True, - rotate=True, - rotate_range=10, - zoom=True, - base_zoom=1.5, - zoom_range=0.2, - translate=True, - translate_range=5, - coord_normalize=False, - gcn=False, - ) - self.assertEqual(len(dataset), self.n_test) - for i in range(len(dataset)): - image, joints, ignore_joints = dataset.get_example(i) - image = image.astype(np.uint8) - self.draw_joints( - image, joints, 'mpii_{}_'.format(i), ignore_joints) diff --git a/tests/test_flic_dataset.py b/tests/test_flic_dataset.py new file mode 100644 index 00000000..1963fe9b --- /dev/null +++ b/tests/test_flic_dataset.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Copyright (c) 2016 Shunta Saito + +import os +import sys +import tempfile +import unittest + +import cv2 +import numpy as np +from chainercv.utils import write_image +from deeppose.datasets import flic_dataset +from deeppose.utils import flic_utils +from deeppose.utils import common + +class TestFLICDataset(unittest.TestCase): + + def setUp(self): + self.ds = flic_dataset.FLICDataset() + self.outdir = 'data/flic_test' + os.makedirs(self.outdir, exist_ok=True) + + def test_get_example(self): + np.random.seed(0) + for i in range(10): + j = np.random.randint(len(self.ds)) + img, point = self.ds[j] + + assert img.shape == (3, 480, 720) + assert point.shape == (11, 2) + + img = flic_utils.draw_joints(img, point)[:, :, ::-1] + cv2.imwrite('{}/flic_test_{:02d}.png'.format(self.outdir, i), img) + + img, point = self.ds[j] + img, point = common.crop_with_joints(img, point) + vis = flic_utils.draw_joints(img, point)[:, :, ::-1] + cv2.imwrite('{}/flic_test_crop_{:02d}.png'.format(self.outdir, i), vis) + + img, point = common.to_square(img, point) + vis = flic_utils.draw_joints(img, point)[:, :, ::-1] + cv2.imwrite('{}/flic_test_resize_{:02d}.png'.format(self.outdir, i), vis) + + img, point = common.lr_flip(img, point) + vis = flic_utils.draw_joints(img, point)[:, :, ::-1] + cv2.imwrite('{}/flic_test_flip_{:02d}.png'.format(self.outdir, i), vis) From 01907773eb53f1d71ef18899c84d4231eab28916 Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Tue, 16 Jul 2019 12:34:09 +0000 Subject: [PATCH 6/8] Add translation augmentation --- deeppose/utils/common.py | 13 ++++++++++++- scripts/train.py | 7 +++++-- tests/test_flic_dataset.py | 5 +++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/deeppose/utils/common.py b/deeppose/utils/common.py index 000f9609..a5335f58 100644 --- a/deeppose/utils/common.py +++ b/deeppose/utils/common.py @@ -2,7 +2,7 @@ from chainercv import transforms -def crop_with_joints(img, point, scale_h=1.5, sacle_w=1.2): +def crop_with_joints(img, point, scale_h=1.5, sacle_w=1.2, random_offset_ratio_y=0, random_offset_ratio_x=0): min_y, min_x = point.min(axis=0) max_y, max_x = point.max(axis=0) @@ -21,6 +21,15 @@ def crop_with_joints(img, point, scale_h=1.5, sacle_w=1.2): new_min_y = int(np.clip(center_y - new_height / 2, 0, img_height)) new_max_y = int(np.clip(new_min_y + new_height, 0, img_width)) + offset_y = random_offset_ratio_y * new_height / 2 + offset_y = np.random.uniform(-offset_y, offset_y) + offset_x = random_offset_ratio_x * new_width / 2 + offset_x = np.random.uniform(-offset_x, offset_x) + new_min_x = int(np.clip(new_min_x + offset_x, 0, min_x)) + new_max_x = int(np.clip(new_max_x + offset_x, max_x, img_width)) + new_min_y = int(np.clip(new_min_y + offset_y, 0, min_y)) + new_max_y = int(np.clip(new_max_y + offset_y, max_y, img_width)) + crop = img[:, new_min_y:new_max_y, new_min_x:new_max_x] point = point - np.array([new_min_y, new_min_x]) @@ -41,3 +50,5 @@ def lr_flip(img, point): point = transforms.flip_point([point], (height, width), x_flip=True)[0] return img, point + + diff --git a/scripts/train.py b/scripts/train.py index 33cf30ab..038bac73 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -64,11 +64,14 @@ def __init__(self, insize=220): self.insize = insize self.scale_h = 1.5 self.scale_w = 1.2 + self.random_offset_ratio_y = 0.2 + self.random_offset_ratio_x = 0.2 def __call__(self, x): img, point = x - img, point = common.crop_with_joints(img, point, self.scale_h, self.scale_w) + img, point = common.crop_with_joints( + img, point, self.scale_h, self.scale_w, self.random_offset_ratio_y, self.random_offset_ratio_x) img, point = common.to_square(img, point, (self.insize, self.insize)) if random.randint(0, 1) == 1: @@ -144,7 +147,7 @@ def main(): trainer.extend(extensions.Evaluator(val_iter, model, device=device), trigger=val_interval) trainer.extend(extensions.snapshot(), trigger=val_interval) - trainer.extend(extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=val_interval) + trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( diff --git a/tests/test_flic_dataset.py b/tests/test_flic_dataset.py index 1963fe9b..b4555243 100644 --- a/tests/test_flic_dataset.py +++ b/tests/test_flic_dataset.py @@ -38,6 +38,11 @@ def test_get_example(self): vis = flic_utils.draw_joints(img, point)[:, :, ::-1] cv2.imwrite('{}/flic_test_crop_{:02d}.png'.format(self.outdir, i), vis) + img, point = self.ds[j] + img, point = common.crop_with_joints(img, point, random_offset_ratio_y=0.2, random_offset_ratio_x=0.2) + vis = flic_utils.draw_joints(img, point)[:, :, ::-1] + cv2.imwrite('{}/flic_test_offset_{:02d}.png'.format(self.outdir, i), vis) + img, point = common.to_square(img, point) vis = flic_utils.draw_joints(img, point)[:, :, ::-1] cv2.imwrite('{}/flic_test_resize_{:02d}.png'.format(self.outdir, i), vis) From 80c4201cbb4433e6c4a1844fbdbf62821fa3e750 Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Tue, 16 Jul 2019 12:53:59 +0000 Subject: [PATCH 7/8] Fix bugs in crop_with_joints --- deeppose/utils/common.py | 22 ++++++++++++++++++---- scripts/download.sh | 4 ---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/deeppose/utils/common.py b/deeppose/utils/common.py index a5335f58..bdb74884 100644 --- a/deeppose/utils/common.py +++ b/deeppose/utils/common.py @@ -1,3 +1,5 @@ +import math + import numpy as np from chainercv import transforms @@ -5,6 +7,21 @@ def crop_with_joints(img, point, scale_h=1.5, sacle_w=1.2, random_offset_ratio_y=0, random_offset_ratio_x=0): min_y, min_x = point.min(axis=0) max_y, max_x = point.max(axis=0) + _, img_height, img_width = img.shape + + # Zero-padding + if min_y < 0: + np.pad(img, ((0, 0), (math.ceil(-min_y), 0), (0, 0)), 'constant') + min_y = 0 + if min_x < 0: + np.pad(img, ((0, 0), (0, 0), (math.ceil(-min_x), 0)), 'constant') + min_x = 0 + if max_y > img_height: + np.pad(img, ((0, 0), (0, math.ceil(max_y - img_height)), (0, 0)), 'constant') + max_y = img_height - 1 + if max_x > img_width: + np.pad(img, ((0, 0), (0, 0), (0, math.ceil(max_x - img_width))), 'constant') + max_x = img_width - 1 width = max_x - min_x height = max_y - min_y @@ -15,7 +32,6 @@ def crop_with_joints(img, point, scale_h=1.5, sacle_w=1.2, random_offset_ratio_y center_x = (max_x + min_x) / 2 center_y = (max_y + min_y) / 2 - _, img_height, img_width = img.shape new_min_x = int(np.clip(center_x - new_width / 2, 0, img_width)) new_max_x = int(np.clip(new_min_x + new_width, 0, img_width)) new_min_y = int(np.clip(center_y - new_height / 2, 0, img_height)) @@ -28,7 +44,7 @@ def crop_with_joints(img, point, scale_h=1.5, sacle_w=1.2, random_offset_ratio_y new_min_x = int(np.clip(new_min_x + offset_x, 0, min_x)) new_max_x = int(np.clip(new_max_x + offset_x, max_x, img_width)) new_min_y = int(np.clip(new_min_y + offset_y, 0, min_y)) - new_max_y = int(np.clip(new_max_y + offset_y, max_y, img_width)) + new_max_y = int(np.clip(new_max_y + offset_y, max_y, img_height)) crop = img[:, new_min_y:new_max_y, new_min_x:new_max_x] point = point - np.array([new_min_y, new_min_x]) @@ -50,5 +66,3 @@ def lr_flip(img, point): point = transforms.flip_point([point], (height, width), x_flip=True)[0] return img, point - - diff --git a/scripts/download.sh b/scripts/download.sh index 1d2c9f74..da99024e 100644 --- a/scripts/download.sh +++ b/scripts/download.sh @@ -11,10 +11,6 @@ if [ ! -f FLIC-full.zip ]; then wget http://vision.grasp.upenn.edu/video/FLIC-full.zip fi -if [ ! -f tr_plus_indices.mat ]; then - wget -fi - # Get LSP Extended Training Dataset if [ ! -d lspet_dataset.zip ]; then wget http://sam.johnson.io/research/lspet_dataset.zip From 78baca0ac89b41982ec4f1221f780ba8741495c5 Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Tue, 6 Aug 2019 13:21:11 +0900 Subject: [PATCH 8/8] Add calc_pcp --- deeppose/utils/calc_pcp.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 deeppose/utils/calc_pcp.py diff --git a/deeppose/utils/calc_pcp.py b/deeppose/utils/calc_pcp.py new file mode 100644 index 00000000..d44d3d60 --- /dev/null +++ b/deeppose/utils/calc_pcp.py @@ -0,0 +1,23 @@ +import numpy as np + + +def calc_pcp(pred, target, parts): + detected = [] + for start_i, end_i in parts: + correct_len = np.sqrt((target[start_i] - target[end_i]) ** 2) + correct_area_radius = correct_len / 2 + + start_dis = np.sqrt((target[start_i] - pred[start_i]) ** 2) + end_dis = np.sqrt((target[end_i] - pred[end_i]) ** 2) + + if start_dis <= correct_area_radius and end_dis <= correct_area_radius: + detected.append(True) + else: + detected.append(False) + + return np.asarray(detected) + + +def calc_pdj(pred, target, parts): + +