luxonis · HonzaCuhel · Aug 30, 2024 · Aug 28, 2024
diff --git a/tools/modules/heads.py b/tools/modules/heads.py
@@ -16,9 +16,10 @@ def make_anchors(feats, strides, grid_cell_offset=0.5):
         sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset  # shift x
         sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset  # shift y
         sy, sx = torch.meshgrid(sy, sx, indexing="ij")
-        anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
-        stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
-    return torch.cat(anchor_points), torch.cat(stride_tensor)
+        anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2).transpose(0, 1))
+        stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device).transpose(0, 1))
+    return anchor_points, stride_tensor
+    # return torch.cat(anchor_points), torch.cat(stride_tensor)
 
 
 class DetectV5(nn.Module):
@@ -432,26 +433,27 @@ def forward(self, x):
         """Perform forward pass through YOLO model and return predictions."""
         bs = x[0].shape[0]  # batch size
         if self.shape != bs:
-            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
+            self.anchors, self.strides = make_anchors(x, self.stride, 0.5)
             self.shape = bs
 
         # Detection part
         outputs = super().forward(x)
 
         # Pose part
-        kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1)  # (bs, 17*3, h*w)
-        pred_kpt = self.kpts_decode(bs, kpt)
-        outputs.append(pred_kpt)
+        for i in range(self.nl):
+            kpt = self.cv4[i](x[i]).view(bs, self.nk, -1)
+            outputs.append(self.kpts_decode(bs, kpt, i))
 
         return outputs
 
-    def kpts_decode(self, bs, kpts):
+    def kpts_decode(self, bs, kpts, i):
         """Decodes keypoints."""
         ndim = self.kpt_shape[1]
         y = kpts.view(bs, *self.kpt_shape, -1)
-        a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
+        a = (y[:, :, :2] * 2.0 + (self.anchors[i] - 0.5)) * self.strides[i]
         if ndim == 3:
-            a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
+            # a = torch.cat((a, y[:, :, 2:3].sigmoid()*10), 2)
+            a = torch.cat((a, y[:, :, 2:3]), 2)
         return a.view(bs, self.nk, -1)
 
 

diff --git a/tools/yolo/yolov8_exporter.py b/tools/yolo/yolov8_exporter.py
@@ -44,7 +44,7 @@ def get_output_names(mode: int) -> List[str]:
     elif mode == OBB_MODE:
         return ["output1_yolov8", "output2_yolov8", "output3_yolov8", "angle_output"]
     elif mode == POSE_MODE:
-        return ["output1_yolov8", "output2_yolov8", "output3_yolov8", "kpt_output"]
+        return ["output1_yolov8", "output2_yolov8", "output3_yolov8", "kpt_output1", "kpt_output2", "kpt_output3"]
     return ["output"]