From bf4d7d6d1b4fc511adaefbb8a2da73e138d18c7a Mon Sep 17 00:00:00 2001
From: Pavlo Molchanov <pmolchanov@nvidia.com>
Date: Thu, 2 Jan 2020 20:27:39 -0800
Subject: [PATCH] Corrected to create the correct inputs into the network and
 to use the correct angular gaze loss function referenced from src.

---
 source_for_demo/4_test_faze.py | 43 ++++++++++------------------------
 1 file changed, 12 insertions(+), 31 deletions(-)

diff --git a/source_for_demo/4_test_faze.py b/source_for_demo/4_test_faze.py
index 43a26f0..fe36ba2 100755
--- a/source_for_demo/4_test_faze.py
+++ b/source_for_demo/4_test_faze.py
@@ -106,39 +106,17 @@ def R_y(phi):
             [-sin_, 0., cos_]
         ]).astype(np.float32)
 
-    def vector_to_pitchyaw(vectors):
-        n = vectors.shape[0]
-        out = np.empty((n, 2))
-        vectors = np.divide(vectors, np.linalg.norm(vectors, axis=1).reshape(n, 1))
-        out[:, 0] = np.arcsin(vectors[:, 1])  # theta
-        out[:, 1] = np.arctan2(vectors[:, 0], vectors[:, 2])  # phi
-        return out
-
-    def pitchyaw_to_vector(pitchyaws):
-        n = pitchyaws.shape[0]
-        sin = np.sin(pitchyaws)
-        cos = np.cos(pitchyaws)
-        out = np.empty((n, 3))
-        out[:, 0] = np.multiply(cos[:, 0], sin[:, 1])
-        out[:, 1] = sin[:, 0]
-        out[:, 2] = np.multiply(cos[:, 0], cos[:, 1])
-        return out
-
     def calculate_rotation_matrix(e):
         return np.matmul(R_y(e[1]), R_x(e[0]))
 
-
-    g_a = pitchyaw_to_vector(data['labels'][:, :2])
-    h_a = pitchyaw_to_vector(data['labels'][:, 2:4])
-
     n, h, w, c = data['pixels'].shape
     img = np.zeros((n, c, h, w))
     R_gaze_a = np.zeros((n, 3, 3))
     R_head_a = np.zeros((n, 3, 3))
     for i in range(data['pixels'].shape[0]):
         img[i, :, :, :] = preprocess_image(data['pixels'][i, :, :, :])
-        R_gaze_a[i, :, :] = calculate_rotation_matrix(g_a[i, :])
-        R_head_a[i, :, :] = calculate_rotation_matrix(h_a[i, :])
+        R_gaze_a[i, :, :] = calculate_rotation_matrix(data['labels'][i, :2])
+        R_head_a[i, :, :] = calculate_rotation_matrix(data['labels'][i, 2:4])
 
     # reduce the number of validation samples if
     # you have less GPU memory
@@ -148,16 +126,16 @@ def calculate_rotation_matrix(e):
 
     input_dict_train = {
         'image_a': img[train_indices, :, :, :],
-        'gaze_a': g_a[train_indices, :],
-        'head_a': h_a[train_indices, :],
+        'gaze_a': data['labels'][train_indices, :2],
+        'head_a': data['labels'][train_indices, 2:4],
         'R_gaze_a': R_gaze_a[train_indices, :, :],
         'R_head_a': R_head_a[train_indices, :, :],
     }
 
     input_dict_valid = {
         'image_a': img[valid_indices, :, :, :],
-        'gaze_a': g_a[valid_indices, :],
-        'head_a': h_a[valid_indices, :],
+        'gaze_a': data['labels'][valid_indices, :2],
+        'head_a': data['labels'][valid_indices, 2:4],
         'R_gaze_a': R_gaze_a[valid_indices, :, :],
         'R_head_a': R_head_a[valid_indices, :, :],
     }
@@ -169,6 +147,9 @@ def calculate_rotation_matrix(e):
 #############
 # Finetuning
 
+from losses import GazeAngularLoss
+loss = GazeAngularLoss()
+
 def nn_angular_distance(a, b):
     sim = F.cosine_similarity(a, b, eps=1e-6)
     sim = F.hardtanh(sim, 1e-6, 1.0 - 1e-6)
@@ -182,7 +163,7 @@ def nn_angular_distance(a, b):
 
 network.eval()
 output_dict = network(input_dict_valid)
-valid_loss = nn_angular_distance(output_dict['gaze_a_hat'], input_dict_valid['gaze_a'])
+valid_loss = loss(input_dict_valid, output_dict).cpu()
 print('%04d> , Validation: %.2f' % (0, valid_loss.item()))
 
 for i in range(num_finetuning_steps):
@@ -192,13 +173,13 @@ def nn_angular_distance(a, b):
 
     # forward + backward + optimize
     output_dict = network(input_dict_train)
-    train_loss = nn_angular_distance(output_dict['gaze_a_hat'], input_dict_train['gaze_a'])
+    train_loss = loss(input_dict_train, output_dict)
     train_loss.backward()
     optimizer.step()
 
     if i % 100 == 99:
         network.eval()
         output_dict = network(input_dict_valid)
-        valid_loss = nn_angular_distance(output_dict['gaze_a_hat'], input_dict_valid['gaze_a'])
+        valid_loss = loss(input_dict_valid, output_dict).cpu()
         print('%04d> Train: %.2f, Validation: %.2f' %
               (i+1, train_loss.item(), valid_loss.item()))