From 8392fe9c5e39faf5bb72b2ef91f56952919ce023 Mon Sep 17 00:00:00 2001 From: Stefan Helmert Date: Sat, 27 Feb 2016 01:04:52 +0100 Subject: [PATCH 1/8] added opportunity to disable RGB->BGR convertion in plot_pose_stickmodel_cv2mat() --- src/utils/visualization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils/visualization.py b/src/utils/visualization.py index 410dea5..003e346 100644 --- a/src/utils/visualization.py +++ b/src/utils/visualization.py @@ -9,15 +9,15 @@ import matplotlib.pyplot as plt import cv2 -def plot_pose_stickmodel_cv2mat(im, kpts, lw=3): +def plot_pose_stickmodel_cv2mat(im, kpts, lw=3, isRGB=True): ''' im : image kpts: key points 2 x N, where N is the number of keypoints (x,y) format lw : line width ''' - - im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) + if isRGB: + im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) #Plot the keypoints - this works for MPII style keypoints #Right leg From 3228df97c85aa3dc11ea9518d6e61e9582690ce2 Mon Sep 17 00:00:00 2001 From: Stefan Helmert Date: Thu, 3 Mar 2016 16:21:40 +0100 Subject: [PATCH 2/8] bugfix: major_ver has to be converted to int, because it is string --- src/pose_video_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pose_video_demo.py b/src/pose_video_demo.py index 7b82d96..8b3abfa 100755 --- a/src/pose_video_demo.py +++ b/src/pose_video_demo.py @@ -36,7 +36,7 @@ def posevideo(input_video_name, output_video_name=None, output_csv_name=None, is frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if(output_video_name is not None and '' != output_video_name): if(False == outv.isOpened()): - if(major_ver<3): + if int(major_ver) < 3: fps = cap.get(cv2.cv.CV_CAP_PROP_FPS) outv.open(output_video_name, cv2.cv.CV_FOURCC('A', 'P', '4', '1'), fps, (np.size(frame, 1), np.size(frame, 0)), True) #, frame.shape, True) else: From 85a8d3b20e1937f30dd6cf2b428aca9bb25a0b41 Mon Sep 17 00:00:00 2001 From: Stefan Helmert Date: Fri, 11 Mar 2016 22:23:14 +0100 Subject: [PATCH 3/8] bugfix: arguments of pose_video_demo can not be parsed before definition --- src/__init__.py | 2 ++ src/pose_video_demo.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/__init__.py b/src/__init__.py index ac5e7c8..6a049da 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -5,3 +5,5 @@ # Written by Joao Carreira, Pulkit Agrawal and Katerina Fragkiadki # -------------------------------------------------------- from . import config +import _init_paths + diff --git a/src/pose_video_demo.py b/src/pose_video_demo.py index 8b3abfa..e1d3095 100755 --- a/src/pose_video_demo.py +++ b/src/pose_video_demo.py @@ -7,6 +7,7 @@ ************************************************************************/ """ +import _init_paths import cv2 import test_demo as td import scipy.misc as scm @@ -62,9 +63,8 @@ def parse_args(): parser.add_argument('--isGPU', dest='isGPU', help='Boolean value that specifies if a GPU should be used for detection - isGPU=False means the network runs on CPU', default=True, type=bool) parser.add_argument('--deviceId', dest='deviceId', help='Natural value that specifies the number of the GPU which should be used. It starts with 0.', default='0', type=int) parser.add_argument('--input_video', dest='input_video_name', help='The name of the video which should be analyzed.', default='video/demo.avi', type=str) - default_output_name = (parser.parse_args().input_video_name).rsplit('.', 1)[0] - parser.add_argument('--output_video', dest='output_video_name', help='The name of the video to be newly created containing the stick model.', default=default_output_name+'_PoseIEF.avi', type=str) - parser.add_argument('--output_csv', dest='output_csv_name', help='The name of the csv file to be newly created containing the joint postions.', default=default_output_name+'_PoseIEF.csv', type=str) + parser.add_argument('--output_video', dest='output_video_name', help='The name of the video to be newly created containing the stick model.', default='?', type=str) + parser.add_argument('--output_csv', dest='output_csv_name', help='The name of the csv file to be newly created containing the joint postions.', default='?', type=str) parser.add_argument('--x_bodyPt', dest='x_bodyPt', help='Natural value that represents the x-coordinate of the pointer telling which human should be analyzed.', default=600, type=int) parser.add_argument('--y_bodyPt', dest='y_bodyPt', help='Natural value that represents the y-coordinate of the pointer telling which human should be analyzed.', default=400, type=int) if len(sys.argv) == 1: @@ -72,6 +72,12 @@ def parse_args(): sys.exit(1) args = parser.parse_args() + default_output_name = args.input_video_name.rsplit('.', 1)[0] + if '?' == args.output_video_name: + args.output_video_name = default_output_name+'_PoseIEF.avi' + if '?' == args.output_csv_name: + args.output_csv_name = default_output_name+'_PoseIEF.csv' + return args if __name__ == '__main__': From 3cdc131bb2a1d485babed1a08c64be6998012cf9 Mon Sep 17 00:00:00 2001 From: Stefan Helmert Date: Wed, 16 Mar 2016 19:19:12 +0100 Subject: [PATCH 4/8] prediction options more flexible: number of iterations, optional use of pose from last prediction to initialize next prediction, optional fixed scale (disable netScale); current Problems: runaway --- src/pose_video_demo.py | 5 +- src/test_demo.py | 117 +++++++++++++++++++++++++++-------------- 2 files changed, 81 insertions(+), 41 deletions(-) diff --git a/src/pose_video_demo.py b/src/pose_video_demo.py index e1d3095..60bd2cb 100755 --- a/src/pose_video_demo.py +++ b/src/pose_video_demo.py @@ -43,7 +43,10 @@ def posevideo(input_video_name, output_video_name=None, output_csv_name=None, is else: fps = cap.get(cv2.CAP_PROP_FPS) outv.open(output_video_name, cv2.VideoWriter_fourcc('A', 'P', '4', '1'), fps, (np.size(frame, 1), np.size(frame, 0)), True) #, frame.shape, True) - pose,_ = ief.predict(frame, bodyPt) + try: + pose, currPose = ief.predict(frame, bodyPt, False, 1, True, 0, True, currPose) + except: + pose, currPose = ief.predict(frame, bodyPt, False, 1, True, 0) cnt += 1 print('Frame number: '+str(cnt)) if(output_csv_name is not None and '' != output_csv_name): diff --git a/src/test_demo.py b/src/test_demo.py index 2de1539..2c98c83 100644 --- a/src/test_demo.py +++ b/src/test_demo.py @@ -22,6 +22,8 @@ import scipy.io as sio import scipy.misc as scm import pdb +import time +import cv2 LIST_SCALES = cfg.SCALE_LAMBDA @@ -51,6 +53,7 @@ def get_pose_net(isGPU=True, deviceId=0): metaData = pickle.load(open(metaFile, 'r')) return net, metaData + ## # Predicting Poses class PoseIEF(object): @@ -68,73 +71,107 @@ def __init__(self, netScale=None, netPose=None, metaPose=None, cropSz=256, poseI self.cropSz_ = cropSz self.poseImSz_ = poseImSz - ## - #Predict pose - def predict(self, imName='./test_images/mpii-test-079555750.jpg', - bodyPt=(249,249), returnIm=False): - ''' - imName : image file name for which the pose needs to be predicted - bodyPt : A point on the body of the person (torso) for whom the pose - is to be predicted - returnIm: If True, return the image also - ''' - cropSz, poseImSz = self.cropSz_, self.poseImSz_ - #Read the image - if(isinstance(imName, str)): - im = scm.imread(imName) - else: - im = imName - + def proc_fixedScale(self, im, cropSz, poseImSz, bodyPt, scaleIdx): + imScale = np.zeros((cropSz, cropSz, 3)) + oScale = np.zeros((2)) + oPos = np.zeros((2)) + scale = LIST_SCALES[scaleIdx] + imScale, scs, crpPos = imu.centered_crop(cropSz, copy.deepcopy(im), bodyPt, scale, returnScale=True) + oScale = np.array(scs).reshape(1,2) + oPos = np.array(crpPos).reshape(1,2) + xSt, ySt = (cropSz - poseImSz)/2, (cropSz - poseImSz)/2 + xEn, yEn = xSt + poseImSz, ySt + poseImSz + imScale = imScale[ySt:yEn, xSt:xEn,:].reshape((1,poseImSz,poseImSz,3)) + return imScale, xSt, ySt, oPos, oScale, scaleIdx + + + + def proc_netScale(self, im, cropSz, poseImSz, bodyPt): #Crop the image at different scales + t = time.time() imData = np.zeros((len(LIST_SCALES), cropSz, cropSz, 3)) scData = np.zeros((len(LIST_SCALES), 2)) posData = np.zeros((len(LIST_SCALES), 2)) for i,s in enumerate(LIST_SCALES): - imData[i], scs, crpPos = imu.centered_crop(cropSz, copy.deepcopy(im), bodyPt, s, - returnScale=True) + imData[i], scs, crpPos = imu.centered_crop(cropSz, copy.deepcopy(im), bodyPt, s, returnScale=True) scData[i] = np.array(scs).reshape(1,2) posData[i] = np.array(crpPos).reshape(1,2) + print('crop time: {:.3f}s').format(time.time() - t) #Use the scale net to find the best scale - scaleOp = self.netScale_.forward(blobs=['fc-op'], data=imData) + t = time.time() + scaleOp = self.netScale_.forward(blobs=['fc-op'], data=imData) + print('netScale time: {:.3f}s').format(time.time() - t) scaleIdx = scaleOp['fc-op'].squeeze().argmax() scale = LIST_SCALES[scaleIdx] #Scale to use to return the image in the original space oScale = scData[scaleIdx] #Original location of the cropped image oPos = posData[scaleIdx] - #Prepare image for pose prediction imScale = imData[scaleIdx] + print(scaleIdx) + print(len(imData)) xSt, ySt = (cropSz - poseImSz)/2, (cropSz - poseImSz)/2 xEn, yEn = xSt + poseImSz, ySt + poseImSz imScale = imScale[ySt:yEn, xSt:xEn,:].reshape((1,poseImSz,poseImSz,3)) + return imScale, xSt, ySt, oPos, oScale, scaleIdx - #Seed pose - currPose = np.zeros((1,17,2,1)).astype(np.float32) - for i in range(16): - currPose[0,i,0] = copy.deepcopy(self.seedPose_[0,i] - xSt) - currPose[0,i,1] = copy.deepcopy(self.seedPose_[1,i] - ySt) - #The marking point is the center of the image - currPose[0, 16, 0] = poseImSz / 2 - currPose[0, 16, 1] = poseImSz / 2 - + def proc_netPose(self, imScale, currPose): + t = time.time() #Dummy labels labels = np.zeros((1,16,2,1)).astype(np.float32) + poseOp = self.netPose_.forward(blobs=['cls3_fc'], image=imScale, kp_pos=copy.deepcopy(currPose), label=labels) + print('netPose time: {:.3f}s').format(time.time() - t) + kPred = copy.deepcopy(poseOp['cls3_fc'].squeeze()) + for i in range(16): + dx, dy = kPred[i], kPred[16 + i] + #print(dx, dy) + currPose[0,i,0] = currPose[0,i,0] + self.mxStepSz_ * dx + currPose[0,i,1] = currPose[0,i,1] + self.mxStepSz_ * dy + return currPose + ## + #Predict pose + def predict(self, imName='./test_images/mpii-test-079555750.jpg', bodyPt=(249,249), returnIm=False, noIterations=4, fixedScale=False, scaleIdx=None, initialPose=False, currPose=None): + ''' + imName : image file name for which the pose needs to be predicted + bodyPt : A point on the body of the person (torso) for whom the pose + is to be predicted + returnIm: If True, return the image also + ''' + tt = time.time() + cropSz, poseImSz = self.cropSz_, self.poseImSz_ + #Read the image + if(isinstance(imName, str)): + im = scm.imread(imName) + else: + im = imName + + if True == fixedScale: + imScale, xSt, ySt, oPos, oScale, scaleIdx = self.proc_fixedScale(im, cropSz, poseImSz, bodyPt, scaleIdx) + else: + imScale, xSt, ySt, oPos, oScale, scaleIdx = self.proc_netScale(im, cropSz, poseImSz, bodyPt) + + if False == initialPose: + #Seed pose + currPose = np.zeros((1,17,2,1)).astype(np.float32) + for i in range(16): + currPose[0,i,0] = copy.deepcopy(self.seedPose_[0,i] - xSt) + currPose[0,i,1] = copy.deepcopy(self.seedPose_[1,i] - ySt) + #The marking point is the center of the image + currPose[0, 16, 0] = poseImSz / 2 + currPose[0, 16, 1] = poseImSz / 2 + cv2.imshow('imScale', imScale[0]) + cv2.waitKey(1) + currPose = np.divide(currPose,1.002) #Predict Pose - for step in range(4): - poseOp = self.netPose_.forward(blobs=['cls3_fc'], image=imScale, - kp_pos=copy.deepcopy(currPose), label=labels) - kPred = copy.deepcopy(poseOp['cls3_fc'].squeeze()) - for i in range(16): - dx, dy = kPred[i], kPred[16 + i] - currPose[0,i,0] = currPose[0,i,0] + self.mxStepSz_ * dx - currPose[0,i,1] = currPose[0,i,1] + self.mxStepSz_ * dy - + for step in range(noIterations): + currPose = self.proc_netPose(imScale, currPose) #Convert the pose in the original image coordinated origPose = (currPose.squeeze() + np.array([xSt, ySt]).reshape(1,2)) * oScale + oPos - + + print('predict time: {:.3f}s').format(time.time() - tt) if returnIm: #return origPose, copy.deepcopy(currPose), imScale[0] return origPose, im From 3211c84c271b8da557ebbdeabe083d7291df932f Mon Sep 17 00:00:00 2001 From: Stefan Helmert Date: Thu, 17 Mar 2016 00:11:20 +0100 Subject: [PATCH 5/8] prepared for test with initialisation of each prediction with previous pose; initialisation is weighted with loopfactor --- src/pose_video_demo.py | 4 ++-- src/test_demo.py | 25 +++++++++++++------------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/pose_video_demo.py b/src/pose_video_demo.py index 60bd2cb..4ef0663 100755 --- a/src/pose_video_demo.py +++ b/src/pose_video_demo.py @@ -44,9 +44,9 @@ def posevideo(input_video_name, output_video_name=None, output_csv_name=None, is fps = cap.get(cv2.CAP_PROP_FPS) outv.open(output_video_name, cv2.VideoWriter_fourcc('A', 'P', '4', '1'), fps, (np.size(frame, 1), np.size(frame, 0)), True) #, frame.shape, True) try: - pose, currPose = ief.predict(frame, bodyPt, False, 1, True, 0, True, currPose) + pose, currPose = ief.predict(frame, bodyPt, False, 2, True, 0, True, currPose, 0.0) except: - pose, currPose = ief.predict(frame, bodyPt, False, 1, True, 0) + pose, currPose = ief.predict(frame, bodyPt, False, 4, True, 0) cnt += 1 print('Frame number: '+str(cnt)) if(output_csv_name is not None and '' != output_csv_name): diff --git a/src/test_demo.py b/src/test_demo.py index 2c98c83..78cfeba 100644 --- a/src/test_demo.py +++ b/src/test_demo.py @@ -132,7 +132,7 @@ def proc_netPose(self, imScale, currPose): return currPose ## #Predict pose - def predict(self, imName='./test_images/mpii-test-079555750.jpg', bodyPt=(249,249), returnIm=False, noIterations=4, fixedScale=False, scaleIdx=None, initialPose=False, currPose=None): + def predict(self, imName='./test_images/mpii-test-079555750.jpg', bodyPt=(249,249), returnIm=False, noIterations=4, fixedScale=False, scaleIdx=None, initialPose=False, currPose=None, loopfactor=1.0): ''' imName : image file name for which the pose needs to be predicted bodyPt : A point on the body of the person (torso) for whom the pose @@ -152,19 +152,20 @@ def predict(self, imName='./test_images/mpii-test-079555750.jpg', bodyPt=(249,24 else: imScale, xSt, ySt, oPos, oScale, scaleIdx = self.proc_netScale(im, cropSz, poseImSz, bodyPt) - if False == initialPose: #Seed pose - currPose = np.zeros((1,17,2,1)).astype(np.float32) - for i in range(16): - currPose[0,i,0] = copy.deepcopy(self.seedPose_[0,i] - xSt) - currPose[0,i,1] = copy.deepcopy(self.seedPose_[1,i] - ySt) - #The marking point is the center of the image - currPose[0, 16, 0] = poseImSz / 2 - currPose[0, 16, 1] = poseImSz / 2 + currPose_ = np.zeros((1,17,2,1)).astype(np.float32) + for i in range(16): + currPose_[0,i,0] = copy.deepcopy(self.seedPose_[0,i] - xSt) + currPose_[0,i,1] = copy.deepcopy(self.seedPose_[1,i] - ySt) + #The marking point is the center of the image + currPose_[0, 16, 0] = poseImSz / 2 + currPose_[0, 16, 1] = poseImSz / 2 - cv2.imshow('imScale', imScale[0]) - cv2.waitKey(1) - currPose = np.divide(currPose,1.002) + if False == initialPose: + currPose = currPose_ + #cv2.imshow('imScale', imScale[0]) + #cv2.waitKey(1) + currPose = np.add(np.multiply(currPose, loopfactor), np.multiply(currPose_, 1.0-loopfactor)) #Predict Pose for step in range(noIterations): currPose = self.proc_netPose(imScale, currPose) From 56879b6d1d47072b57393151e4b9a20db9a48cad Mon Sep 17 00:00:00 2001 From: Stefan Helmert Date: Thu, 24 Mar 2016 18:35:55 +0100 Subject: [PATCH 6/8] added human detection; multui person operation; tracking for currPose reuse for same person and csv person number; added frame number to csv; parameters about human detection --- src/pose_video_demo.py | 141 +++++++++++++++++++++++++++++++++++++---- src/test_demo.py | 9 +++ 2 files changed, 137 insertions(+), 13 deletions(-) diff --git a/src/pose_video_demo.py b/src/pose_video_demo.py index 4ef0663..b861d80 100755 --- a/src/pose_video_demo.py +++ b/src/pose_video_demo.py @@ -15,10 +15,71 @@ import csv import time, os, sys import argparse +try: + import _init_py_faster_rcnn_paths + import detectcore +except: + print('No person detector found! - Person detection not useable. Please specify the coordinates where humans appear that should be analyzed.') +import collections +import copy +#try: +#except: +# print('py-faster-rcnn not available - no automatic human detection') +class rectangle_c: + def __init__(self): + self.x_center = 0 + self.y_center = 0 + self.x_range = 0 + self.y_range = 0 -def posevideo(input_video_name, output_video_name=None, output_csv_name=None, isGPU=True, deviceId=0, bodyPt=[600, 400]): +def humdet(frame, threshold=0.5): + global gnet + cls_vec, dets_vec = detectcore.detect_object(gnet, frame) + person = dets_vec[cls_vec.index("person")] + human_vec = [] + score_vec = [] + detection_vec = [] + det_world_vec = [] + dets = person + thresh = threshold + inds = np.where(dets[:, -1] >= thresh)[0] + for i in inds: + bbox = dets[i, :4] + score = dets[i, -1] + score_vec.append(score) + detection = rectangle_c() + detection.x_center = (bbox[0] + bbox[2])/2 + detection.y_center = (bbox[1] + bbox[3])/2 + detection.x_range = (bbox[2] - bbox[0]) + detection.y_range = (bbox[3] - bbox[1]) + detection_vec.append(detection) + return detection_vec + + + +def sameorder(objs, objs_old): + objs_new = copy.deepcopy(objs_old) + while len(objs_new) < len(objs): + objs_new.append(rectangle_c()) + objs_set = np.zeros(len(objs_new)) + while np.sum(objs_set) < len(objs) and np.sum(objs_set) < len(objs_set): + for i, obj in enumerate(objs): + dist_min = 10000000 + idx = i + for j, obj_old in enumerate(objs_old): + if 0 == objs_set[j]: + dist = np.sqrt(np.power(obj_old.x_center - obj.x_center, 2) + np.power(obj_old.y_center - obj.y_center, 2)) + if dist < dist_min: + dist_min = dist + idx = j + objs_new[idx] = copy.deepcopy(obj) + objs_set[idx] = 1 + return objs_new + +def posevideo(input_video_name, output_video_name=None, output_csv_name=None, isGPU=True, deviceId=0, bodyPt=[600, 400], iterations=4, fixedScale=False, scaleIdx=0, fbfactor=0.0, thresh=0.9, detinterv=10, bodyPts= [600, 400]): """ processing the video """ # Find OpenCV version + global gnet (major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.') ief = td.PoseIEF(isGPU=isGPU, deviceId=deviceId) @@ -28,8 +89,12 @@ def posevideo(input_video_name, output_video_name=None, output_csv_name=None, is if(output_csv_name is not None and '' != output_csv_name): pose_csv_file = open(output_csv_name, 'w') pose_csv = csv.writer(pose_csv_file) - pose_csv.writerows([['x_rft', 'y_rft', 'x_rkn', 'y_rkn', 'x_rhp', 'y_rhp', 'x_lhp', 'y_lhp', 'x_lkn', 'y_lkn', 'x_lft', 'y_lft', 'x_plv', 'y_plv', 'x_trx', 'y_trx', 'x_un', 'y_un', 'x_hd', 'y_hd', 'x_rhn', 'y_rhn', 'x_rlb', 'y_rlb', 'x_rsh', 'y_rsh', 'x_lsh', 'y_lsh', 'x_llb', 'y_llb', 'x_lhn', 'y_lhn', 'x_hum', 'y_hum']]) + pose_csv.writerows([['no_frm', 'no_prs', 'x_rft', 'y_rft', 'x_rkn', 'y_rkn', 'x_rhp', 'y_rhp', 'x_lhp', 'y_lhp', 'x_lkn', 'y_lkn', 'x_lft', 'y_lft', 'x_plv', 'y_plv', 'x_trx', 'y_trx', 'x_un', 'y_un', 'x_hd', 'y_hd', 'x_rhn', 'y_rhn', 'x_rlb', 'y_rlb', 'x_rsh', 'y_rsh', 'x_lsh', 'y_lsh', 'x_llb', 'y_llb', 'x_lhn', 'y_lhn', 'x_hum', 'y_hum']]) cnt = 0 + + humans_old = [] + currPoses = [] + read_bodyPts = True while(True): ret, frame = cap.read() if ret is False: @@ -43,17 +108,50 @@ def posevideo(input_video_name, output_video_name=None, output_csv_name=None, is else: fps = cap.get(cv2.CAP_PROP_FPS) outv.open(output_video_name, cv2.VideoWriter_fourcc('A', 'P', '4', '1'), fps, (np.size(frame, 1), np.size(frame, 0)), True) #, frame.shape, True) - try: - pose, currPose = ief.predict(frame, bodyPt, False, 2, True, 0, True, currPose, 0.0) - except: - pose, currPose = ief.predict(frame, bodyPt, False, 4, True, 0) - cnt += 1 - print('Frame number: '+str(cnt)) - if(output_csv_name is not None and '' != output_csv_name): - pose_arr = np.append(pose,[]) - pose_csv.writerows([pose_arr]) + poses = [] + if 0 < detinterv: + if 0 == cnt % detinterv: + humans = humdet(frame, thresh) + else: + humans = [] + if read_bodyPts: + human = rectangle_c() + for i, val in enumerate(bodyPts): + if 0 == i % 2: + human.x_center = val + else: + human.y_center = val + humans.append(copy.deepcopy(human)) + read_bodyPts = False + + humans = sameorder(humans, humans_old) + humans_old = humans + for i, human in enumerate(humans): + bodyPt = [human.x_center, human.y_center] + try: + currPose = currPoses[i] + pose, currPose = ief.predict(frame, bodyPt, False, iterations, fixedScale, scaleIdx, True, currPose, fbfactor) + except: + pose, currPose = ief.predict(frame, bodyPt, False, iterations, fixedScale, scaleIdx) + humans_old[i].x_center = pose[7][0] + humans_old[i].y_center = pose[7][1] + try: + currPoses[i] = currPose + except: + currPoses.append(currPose) + cnt += 1 + print('Frame number: '+str(cnt)) + poses.append(pose) + if(output_csv_name is not None and '' != output_csv_name): + pose_arr = np.append([cnt ,i], pose) + pose_csv.writerows([pose_arr]) + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if(output_video_name is not None and '' != output_video_name): - frame = td.vis.plot_pose_stickmodel_cv2mat(frame, pose.squeeze().transpose((1,0))) + for i, pose in enumerate(poses): + frame = td.vis.plot_pose_stickmodel_cv2mat(frame, pose.squeeze().transpose((1,0)), 2, False) + cv2.putText(frame, str(i), (int(humans[i].x_center), int(humans[i].y_center)), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0)) + cv2.imshow('stickmodel', frame) + cv2.waitKey(1) outv.write(frame) if(output_video_name is not None and '' != output_video_name): outv.close() @@ -70,6 +168,17 @@ def parse_args(): parser.add_argument('--output_csv', dest='output_csv_name', help='The name of the csv file to be newly created containing the joint postions.', default='?', type=str) parser.add_argument('--x_bodyPt', dest='x_bodyPt', help='Natural value that represents the x-coordinate of the pointer telling which human should be analyzed.', default=600, type=int) parser.add_argument('--y_bodyPt', dest='y_bodyPt', help='Natural value that represents the y-coordinate of the pointer telling which human should be analyzed.', default=400, type=int) + parser.add_argument('--iterations', dest='iterations', help='Natural value that specifies how much IEF iterations per image should be done.', default=4, type=int) + parser.add_argument('--scaleIdx', dest='scaleIdx', help='Natural value that specifies the IEF scaleIdx if fixed scale flag is set.', default=0, type=int) + parser.add_argument('--fbfactor', dest='fbfactor', help='Fractional value between 0.0 and 1.0 that specifies the weight of the last pose of the last image for the current image.', default=0.0, type=float) + parser.add_argument('--fixedScale', dest='fixedScale', help='Boolean value that deactivates the autoscale network netScale. The scale index scaleIdx has to be specified manualy (default=0)', default=False, type=bool) + parser.add_argument('--bodyPts', dest='bodyPts', help='Natural value coordinates of human appearance/starting points for pose detection. Order: x_person_1 y_person_1 x_person_2 ...', default=[600, 400], type=int, nargs='+') + # arguments for human detection + parser.add_argument('--thresh', dest='thresh', help='Fractional value between 0.0 and 1.0 that specifies the selectivity of human detection.', default=0.9, type=float) + parser.add_argument('--detinterv', dest='detinterv', help='Natural value that specifies every how much images human detection is done.', default=10, type=int) + parser.add_argument('--net', dest='net', help='The name of network used for human detection.', default='vgg16', type=str) + + if len(sys.argv) == 1: parser.print_help() sys.exit(1) @@ -84,8 +193,14 @@ def parse_args(): return args if __name__ == '__main__': + global gnet args = parse_args() print('Called with args:') print(args) - posevideo(args.input_video_name, args.output_video_name, args.output_csv_name, isGPU=args.isGPU, deviceId=args.deviceId, bodyPt=[args.x_bodyPt, args.y_bodyPt]) + frargs = collections.namedtuple('args', 'demo_net cpu_mode gpu_id') + frargs.cpu_mode = not args.isGPU + frargs.demo_net = args.net + frargs.gpu_id = args.deviceId + gnet = detectcore.init(frargs) + posevideo(args.input_video_name, args.output_video_name, args.output_csv_name, isGPU=args.isGPU, deviceId=args.deviceId, bodyPt=[args.x_bodyPt, args.y_bodyPt], iterations=args.iterations, fixedScale=args.fixedScale, scaleIdx=args.scaleIdx, fbfactor=args.fbfactor, thresh=args.thresh, detinterv=args.detinterv, bodyPts=args.bodyPts) diff --git a/src/test_demo.py b/src/test_demo.py index 78cfeba..0b23a02 100644 --- a/src/test_demo.py +++ b/src/test_demo.py @@ -71,6 +71,15 @@ def __init__(self, netScale=None, netPose=None, metaPose=None, cropSz=256, poseI self.cropSz_ = cropSz self.poseImSz_ = poseImSz + + def calc_scaleIdx_from_bbox(self, width, height): + cropSz = self.cropSz_ + hscale = cropSz / height + wscale = cropSz / width + for i,s in enumerate(LIST_SCALES): + if s < hscale or s < wscale: + return s + def proc_fixedScale(self, im, cropSz, poseImSz, bodyPt, scaleIdx): imScale = np.zeros((cropSz, cropSz, 3)) oScale = np.zeros((2)) From e020c98664a41c1c4eaf8f1799b58a8babc8e33c Mon Sep 17 00:00:00 2001 From: Stefan Helmert Date: Fri, 1 Apr 2016 15:11:43 +0200 Subject: [PATCH 7/8] argument for maximum number of tracked humans --- src/pose_video_demo.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pose_video_demo.py b/src/pose_video_demo.py index b861d80..5fd5e5e 100755 --- a/src/pose_video_demo.py +++ b/src/pose_video_demo.py @@ -76,7 +76,7 @@ def sameorder(objs, objs_old): objs_set[idx] = 1 return objs_new -def posevideo(input_video_name, output_video_name=None, output_csv_name=None, isGPU=True, deviceId=0, bodyPt=[600, 400], iterations=4, fixedScale=False, scaleIdx=0, fbfactor=0.0, thresh=0.9, detinterv=10, bodyPts= [600, 400]): +def posevideo(input_video_name, output_video_name=None, output_csv_name=None, isGPU=True, deviceId=0, bodyPt=[600, 400], iterations=4, fixedScale=False, scaleIdx=0, fbfactor=0.0, thresh=0.9, detinterv=10, bodyPts= [600, 400], maxhumans=4): """ processing the video """ # Find OpenCV version global gnet @@ -125,6 +125,7 @@ def posevideo(input_video_name, output_video_name=None, output_csv_name=None, is read_bodyPts = False humans = sameorder(humans, humans_old) + humans = humans[0:maxhumans] humans_old = humans for i, human in enumerate(humans): bodyPt = [human.x_center, human.y_center] @@ -177,6 +178,7 @@ def parse_args(): parser.add_argument('--thresh', dest='thresh', help='Fractional value between 0.0 and 1.0 that specifies the selectivity of human detection.', default=0.9, type=float) parser.add_argument('--detinterv', dest='detinterv', help='Natural value that specifies every how much images human detection is done.', default=10, type=int) parser.add_argument('--net', dest='net', help='The name of network used for human detection.', default='vgg16', type=str) + parser.add_argument('--maxhumans', dest='maxhumans', help='Natural value that specifies maximum number of tracked humans.', default=4, type=int) if len(sys.argv) == 1: @@ -202,5 +204,5 @@ def parse_args(): frargs.demo_net = args.net frargs.gpu_id = args.deviceId gnet = detectcore.init(frargs) - posevideo(args.input_video_name, args.output_video_name, args.output_csv_name, isGPU=args.isGPU, deviceId=args.deviceId, bodyPt=[args.x_bodyPt, args.y_bodyPt], iterations=args.iterations, fixedScale=args.fixedScale, scaleIdx=args.scaleIdx, fbfactor=args.fbfactor, thresh=args.thresh, detinterv=args.detinterv, bodyPts=args.bodyPts) + posevideo(args.input_video_name, args.output_video_name, args.output_csv_name, isGPU=args.isGPU, deviceId=args.deviceId, bodyPt=[args.x_bodyPt, args.y_bodyPt], iterations=args.iterations, fixedScale=args.fixedScale, scaleIdx=args.scaleIdx, fbfactor=args.fbfactor, thresh=args.thresh, detinterv=args.detinterv, bodyPts=args.bodyPts, maxhumans=args.maxhumans) From 0e17a4a6827f15ecf28f71c19818d9b80bc11a78 Mon Sep 17 00:00:00 2001 From: Stefan Helmert Date: Thu, 13 Oct 2016 15:57:42 +0200 Subject: [PATCH 8/8] minor changes --- src/pose_video_demo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pose_video_demo.py b/src/pose_video_demo.py index 5fd5e5e..a2a39f0 100755 --- a/src/pose_video_demo.py +++ b/src/pose_video_demo.py @@ -1,4 +1,5 @@ #!/usr/bin/env python2 +# vim: set shiftwidth=1 """ /************************************************************************ Copyright (c) 2016, Stefan Helmert @@ -127,6 +128,8 @@ def posevideo(input_video_name, output_video_name=None, output_csv_name=None, is humans = sameorder(humans, humans_old) humans = humans[0:maxhumans] humans_old = humans + cnt += 1 + print('Frame number: '+str(cnt)) for i, human in enumerate(humans): bodyPt = [human.x_center, human.y_center] try: @@ -140,8 +143,6 @@ def posevideo(input_video_name, output_video_name=None, output_csv_name=None, is currPoses[i] = currPose except: currPoses.append(currPose) - cnt += 1 - print('Frame number: '+str(cnt)) poses.append(pose) if(output_csv_name is not None and '' != output_csv_name): pose_arr = np.append([cnt ,i], pose)