-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter.py
404 lines (331 loc) · 16.7 KB
/
filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# Tutorial https://learnopencv.com/create-snapchat-instagram-filters-using-mediapipe/
# Filter image from https://lovepik.com/image-401708795/green-peking-opera-facebook.html
import cv2
import mediapipe as mp
import numpy as np
import csv
import faceBlendCommon as fbc
import math
import time
# from concurrent.futures import ThreadPoolExecutor
VISUALIZE_FACE_POINTS = False
filters_config = {
'jing':
[{'path': "filters/jing.png",
'anno_path': "filters/labels_jing_new.csv",
'morph': True, 'animated': False, 'has_alpha': True}]
}
# It can also work with using landmark_pb2 mddoel from the mediapipe framework, but it's also work with mediapipe solutions
# The mp_face_landmarker.py shows that I know how to use the mediapipe framework
def get_landmarks_from_mesh(image):
mp_face_mesh = mp.solutions.face_mesh
# pre-selected landmark indices for the filter
selected_landmarks = [127, 93, 58, 136, 150, 149, 176, 148, 152, 377, 400, 378, 379, 365, 288, 323, 356, 70, 63, 105, 66, 55,
285, 296, 334, 293, 300, 168, 6, 195, 4, 64, 60, 94, 290, 439, 33, 160, 158, 173, 153, 144, 398, 385,
387, 466, 373, 380, 61, 40, 39, 0, 269, 270, 291, 321, 405, 17, 181, 91, 78, 81, 13, 311, 306, 402, 14,
178, 162, 54, 67, 10, 297, 284, 389]
height, width = image.shape[:-1]
with mp_face_mesh.FaceMesh(max_num_faces=1, static_image_mode=True, min_detection_confidence=0.5) as face_mesh:
results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
if not results.multi_face_landmarks:
print("No face detected.")
return
for face_landmarks in results.multi_face_landmarks:
values = np.array(face_landmarks.landmark)
face_coordinates = np.zeros((len(values), 2))
# Extract the x, y coordinates of the face landmarks
for idx,value in enumerate(values):
face_coordinates[idx][0] = value.x
face_coordinates[idx][1] = value.y
# Convert normalized points to image coordinates
face_coordinates = face_coordinates * (width, height)
face_coordinates = face_coordinates.astype("int")
relevant_coordinates = []
for i in selected_landmarks:
relevant_coordinates.append(face_coordinates[i])
return relevant_coordinates
return
def load_filter_image(image_path, has_alpha):
# Read the image
image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
alpha = None
if has_alpha:
b, g, r, alpha = cv2.split(image)
image = cv2.merge((b, g, r))
return image, alpha
def load_landmarks(annotation_file):
with open(annotation_file) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
points = {}
for i, row in enumerate(csv_reader):
# skip head or empty line if it's there
try:
x, y = int(row[1]), int(row[2])
points[row[0]] = (x, y)
except ValueError:
continue
return points
# Instead of find the contours, like using gradiants, we can use the convex hull to find the face shape
def find_convex_hull(points):
hull = []
# The index points of the face filter which will be mapped to the face landmarks
hullIndex = cv2.convexHull(np.array(list(points.values())), clockwise=False, returnPoints=False)
addPoints = [
[48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], # Outer lips
[60], [61], [62], [63], [64], [65], [66], [67], # Inner lips
[27], [28], [29], [30], [31], [32], [33], [34], [35], # Nose
[36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], # Eyes
[17], [18], [19], [20], [21], [22], [23], [24], [25], [26] # Eyebrows
]
# print(len(hullIndex))
hullIndex = np.concatenate((hullIndex, addPoints))
for i in range(0, len(hullIndex)):
# print(i," ",points[str(hullIndex[i][0])])
hull.append(points[str(hullIndex[i][0])])
return hull, hullIndex
# The function to load the only filter that I have
def load_filter(filter_name="jing"):
filters = filters_config[filter_name]
multi_filter_runtime = []
for filter in filters:
dict = {}
image1, image1_alpha = load_filter_image(filter['path'], filter['has_alpha'])
dict['image'] = image1
dict['image_a'] = image1_alpha
points = load_landmarks(filter['anno_path'])
dict['points'] = points
# call dlib library and fbc module to find the convex hull and delaunay triangulation
if filter['morph']:
hull, hullIndex = find_convex_hull(points)
sizeimage1 = image1.shape
rect = (0, 0, sizeimage1[1], sizeimage1[0])
dt = fbc.calculateDelaunayTriangles(rect, hull)
dict['hull'] = hull
dict['hullIndex'] = hullIndex
dict['dt'] = dt
if len(dt) == 0:
continue
# it is not avaliable in this moment
if filter['animated']:
filter_cap = cv2.VideoCapture(filter['path'])
dict['cap'] = filter_cap
multi_filter_runtime.append(dict)
return filters, multi_filter_runtime
def main():
# Input from webcam
cap = cv2.VideoCapture(0)
# cap = cv2.VideoCapture("dataset9_480p_65.mp4")
# Some global(?) variables
count = 0
isFirstFrame = True
sigma = 50
# Load an initial filter, I don't have more filter so it's just decoration
iter_filter_keys = iter(filters_config.keys())
filters, multi_filter_runtime = load_filter(next(iter_filter_keys))
# The main loop
while True:
starttime = time.time()
ret, frame = cap.read()
if not ret:
break
else:
points2 = get_landmarks_from_mesh(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
interval1 = time.time()
# print("Time for get_landmarks_from_mesh: ", interval1 - starttime)
# if face is partially detected
if not points2 or (len(points2) != 75):
continue
# Optical Flow and Stabilization Code
# 0.05s per frame, the bottleneck is here
image2Gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
if isFirstFrame:
points2Prev = np.array(points2, np.float32)
image2GrayPrev = np.copy(image2Gray)
isFirstFrame = False
# Optical Flow Parameters doesn't help, can't solve the bottleneck
# lk_params = dict(winSize=(51, 101), maxLevel=10,
lk_params = dict(winSize=(101, 101), maxLevel=15,
criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 20, 0.001))
points2Next, st, err = cv2.calcOpticalFlowPyrLK(image2GrayPrev, image2Gray, points2Prev,
np.array(points2, np.float32),
**lk_params)
# Final landmark points are a weighted average of detected landmarks and tracked landmarks (that's why we need optical flow)
for k in range(0, len(points2)):
d = cv2.norm(np.array(points2[k]) - points2Next[k])
alpha = math.exp(-d * d / sigma)
points2[k] = (1 - alpha) * np.array(points2[k]) + alpha * points2Next[k]
points2[k] = fbc.constrainPoint(points2[k], frame.shape[1], frame.shape[0])
points2[k] = (int(points2[k][0]), int(points2[k][1]))
# Update variables for next pass
points2Prev = np.array(points2, np.float32)
image2GrayPrev = image2Gray
# End of Optical Flow and Stabilization Code
interval2 = time.time()
# print("Time for Optical Flow: ", interval2 - interval1)
if VISUALIZE_FACE_POINTS:
for idx, point in enumerate(points2):
cv2.circle(frame, point, 2, (255, 0, 0), -1)
cv2.putText(frame, str(idx), point, cv2.FONT_HERSHEY_SIMPLEX, .3, (255, 255, 255), 1)
cv2.imshow("landmarks", frame)
# Filter the frame
for idx, filter in enumerate(filters):
filter_runtime = multi_filter_runtime[idx]
image1 = filter_runtime['image']
points1 = filter_runtime['points']
image1_alpha = filter_runtime['image_a']
if filter['morph']:
hullIndex = filter_runtime['hullIndex']
dt = filter_runtime['dt']
hull1 = filter_runtime['hull']
# create copy of frame
warped_image = np.copy(frame)
# Find convex hull
hull2 = []
for i in range(0, len(hullIndex)):
hull2.append(points2[hullIndex[i][0]])
mask1 = np.zeros((warped_image.shape[0], warped_image.shape[1]), dtype=np.float32)
mask1 = cv2.merge((mask1, mask1, mask1))
image1_alpha_mask = cv2.merge((image1_alpha, image1_alpha, image1_alpha))
# Warp the triangles
for i in range(0, len(dt)):
t1 = []
t2 = []
for j in range(0, 3):
t1.append(hull1[dt[i][j]])
t2.append(hull2[dt[i][j]])
fbc.warpTriangle(image1, warped_image, t1, t2)
fbc.warpTriangle(image1_alpha_mask, mask1, t1, t2)
# Blur the mask before blending
mask1 = cv2.GaussianBlur(mask1, (3, 3), 10)
mask2 = (255.0, 255.0, 255.0) - mask1
# Perform alpha blending of the two images
tmp1 = np.multiply(warped_image, (mask1 * (1.0 / 255)))
tmp2 = np.multiply(frame, (mask2 * (1.0 / 255)))
output = tmp1 + tmp2
interval3 = time.time()
# print("Time for morph filter: ", interval3 - interval2)
else:
dst_points = [points2[int(list(points1.keys())[0])], points2[int(list(points1.keys())[1])]]
tform = fbc.similarityTransform(list(points1.values()), dst_points)
# Apply similarity transform to input image
trans_image = cv2.warpAffine(image1, tform, (frame.shape[1], frame.shape[0]))
trans_alpha = cv2.warpAffine(image1_alpha, tform, (frame.shape[1], frame.shape[0]))
mask1 = cv2.merge((trans_alpha, trans_alpha, trans_alpha))
# Blur the mask before blending
mask1 = cv2.GaussianBlur(mask1, (3, 3), 10)
mask2 = (255.0, 255.0, 255.0) - mask1
# Perform alpha blending of the two images
tmp1 = np.multiply(trans_image, (mask1 * (1.0 / 255)))
tmp2 = np.multiply(frame, (mask2 * (1.0 / 255)))
output = tmp1 + tmp2
frame = output = np.uint8(output)
cv2.imshow("Face Filter", output)
keypressed = cv2.waitKey(1) & 0xFF
if keypressed == 27:
break
count += 1
# Frame as input, return the filtered frame
def filtering(frame):
# startTime = time.time()
# interval1, interval2 = time.time(), None
count = 0
# isFirstFrame = True
sigma = 50
# Resize the frame to 320x240 doesn't help much, the bottleneck is in the optical flow
# frame = cv2.resize(frame, (320, 240))
# Load an initial filter, I don't have more filter so it's just decoration
iter_filter_keys = iter(filters_config.keys())
filters, multi_filter_runtime = load_filter(next(iter_filter_keys))
# The main loop
points2 = get_landmarks_from_mesh(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
if not points2 or (len(points2) != 75):
return frame
# Optical Flow and Stabilization Code, 0.05s per frame, the bottleneck is here
image2Gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
points2Prev = np.array(points2, np.float32)
image2GrayPrev = np.copy(image2Gray)
# Optical Flow Parameters doesn't help
# lk_params = dict(winSize=(51, 51), maxLevel=10,
lk_params = dict(winSize=(101, 101), maxLevel=15,
criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 20, 0.001))
points2Next, st, err = cv2.calcOpticalFlowPyrLK(image2GrayPrev, image2Gray, points2Prev,
np.array(points2, np.float32),
**lk_params)
# interval2 = time.time()
# print("Time for Optical Flow: ", interval2 - interval1)
# Final landmark points are a weighted average of detected landmarks and tracked landmarks
for k in range(0, len(points2)):
d = cv2.norm(np.array(points2[k]) - points2Next[k])
alpha = math.exp(-d * d / sigma)
points2[k] = (1 - alpha) * np.array(points2[k]) + alpha * points2Next[k]
points2[k] = fbc.constrainPoint(points2[k], frame.shape[1], frame.shape[0])
points2[k] = (int(points2[k][0]), int(points2[k][1]))
# Update variables for next pass
points2Prev = np.array(points2, np.float32)
image2GrayPrev = image2Gray
# End of Optical Flow and Stabilization Code
# if VISUALIZE_FACE_POINTS:
# for idx, point in enumerate(points2):
# cv2.circle(frame, point, 2, (255, 0, 0), -1)
# cv2.putText(frame, str(idx), point, cv2.FONT_HERSHEY_SIMPLEX, .3, (255, 255, 255), 1)
# cv2.imshow("landmarks", frame)
for idx, filter in enumerate(filters):
filter_runtime = multi_filter_runtime[idx]
image1 = filter_runtime['image']
points1 = filter_runtime['points']
image1_alpha = filter_runtime['image_a']
if filter['morph']:
hullIndex = filter_runtime['hullIndex']
dt = filter_runtime['dt']
hull1 = filter_runtime['hull']
# create copy of frame
warped_image = np.copy(frame)
# Find convex hull
hull2 = []
for i in range(0, len(hullIndex)):
hull2.append(points2[hullIndex[i][0]])
mask1 = np.zeros((warped_image.shape[0], warped_image.shape[1]), dtype=np.float32)
mask1 = cv2.merge((mask1, mask1, mask1))
image1_alpha_mask = cv2.merge((image1_alpha, image1_alpha, image1_alpha))
# Warp the triangles
for i in range(0, len(dt)):
t1 = []
t2 = []
for j in range(0, 3):
t1.append(hull1[dt[i][j]])
t2.append(hull2[dt[i][j]])
fbc.warpTriangle(image1, warped_image, t1, t2)
fbc.warpTriangle(image1_alpha_mask, mask1, t1, t2)
# This is a simple filter, so traingles are not the bottleneck
# def warp_triangle(i):
# t1 = [hull1[dt[i][j]] for j in range(3)]
# t2 = [hull2[dt[i][j]] for j in range(3)]
# fbc.warpTriangle(image1, warped_image, t1, t2)
# fbc.warpTriangle(image1_alpha_mask, mask1, t1, t2)
# with ThreadPoolExecutor() as executor:
# executor.map(warp_triangle, range(len(dt)))
# Blur the mask before blending
mask1 = cv2.GaussianBlur(mask1, (3, 3), 10)
mask2 = (255.0, 255.0, 255.0) - mask1
# Perform alpha blending of the two images
tmp1 = np.multiply(warped_image, (mask1 * (1.0 / 255)))
tmp2 = np.multiply(frame, (mask2 * (1.0 / 255)))
output = tmp1 + tmp2
else:
dst_points = [points2[int(list(points1.keys())[0])], points2[int(list(points1.keys())[1])]]
tform = fbc.similarityTransform(list(points1.values()), dst_points)
# Apply similarity transform to input image
trans_image = cv2.warpAffine(image1, tform, (frame.shape[1], frame.shape[0]))
trans_alpha = cv2.warpAffine(image1_alpha, tform, (frame.shape[1], frame.shape[0]))
mask1 = cv2.merge((trans_alpha, trans_alpha, trans_alpha))
# Blur the mask before blending
mask1 = cv2.GaussianBlur(mask1, (3, 3), 10)
mask2 = (255.0, 255.0, 255.0) - mask1
# Perform alpha blending of the two images
tmp1 = np.multiply(warped_image, (mask1 * (1.0 / 255)))
tmp2 = np.multiply(frame, (mask2 * (1.0 / 255)))
output = tmp1 + tmp2
frame = output = np.uint8(output)
return frame
if __name__ == "__main__":
main()