-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_app.py
82 lines (63 loc) · 2.81 KB
/
test_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences
import platform
# Load the trained model
model = tf.keras.models.load_model('asl_model_tuned.keras')
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.3)
# Label encoder to convert numerical labels back to original labels
label_encoder = LabelEncoder()
label_encoder.fit(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space'])
# Function to preprocess hand landmarks for prediction
def preprocess_landmarks(landmarks, fixed_length=63):
data_aux = []
for hand_landmarks in landmarks:
for landmark in hand_landmarks.landmark:
data_aux.extend([landmark.x, landmark.y, landmark.z])
# Pad or trim the sequence to the fixed length
if len(data_aux) > fixed_length * 3:
data_aux = data_aux[:fixed_length * 3]
data_padded = pad_sequences([data_aux], maxlen=fixed_length * 3, padding='post', dtype='float32')[0]
# Reshape to the correct shape for the model
reshaped_data = np.array(data_padded).reshape((1, fixed_length, 3, 1))
return reshaped_data
if platform.system() == 'Darwin':
# Capture video from webcam (MAC USERS)
cap = cv2.VideoCapture(1, cv2.CAP_AVFOUNDATION)
else:
# Capture video from webcam (WINDOWS USERS)
cap = cv2.VideoCapture(0)
# Loop to capture frames from the webcam
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
#Convert the BGR image to RGB
img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#Process the frame to detect hands
results = hands.process(img_rgb)
if results.multi_hand_landmarks:
#Preprocess landmarks for prediction
landmarks = preprocess_landmarks(results.multi_hand_landmarks, fixed_length=63)
#Predict the sign
prediction = model.predict(landmarks)
class_idx = np.argmax(prediction)
class_label = label_encoder.inverse_transform([class_idx])[0]
#Draw the landmarks and label on the frame
for hand_landmarks in results.multi_hand_landmarks:
mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
#Display the label
cv2.putText(frame, class_label, (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 0), 3, cv2.LINE_AA)
#Display the frame
cv2.imshow('ASL Sign Detection', frame)
#Break the loop on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break
#Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()