RVC4 support for parsers v2 (#14)

* fix: YuNetParser dequantization * fix: YuNetParser input size estimation * fix: ImageOutputParser dequantization * style: pre-commit formatting * fix: MonocularDepthParser dequantization * fix: ensure depth map is a 2D array * fix: MonocularDepthParser output reshaping due to dequantization * Precommit fix. * Support for 4D tensors - RVC2 --------- Co-authored-by: Jaša Kerec <[email protected]>
luxonis · Aug 29, 2024 · 9b4aff5 · 9b4aff5
1 parent 1043366
commit 9b4aff5
Show file tree

Hide file tree

Showing 4 changed files with 42 additions and 21 deletions.
diff --git a/depthai_nodes/ml/messages/creators/depth.py b/depthai_nodes/ml/messages/creators/depth.py
@@ -11,32 +11,22 @@ def create_depth_message(
 ) -> dai.ImgFrame:
     """Create a DepthAI message for a depth map.
 
-    @param depth_map: A NumPy array representing the depth map with shape (CHW or HWC).
+    @param depth_map: A NumPy array representing the depth map with shape (HW).
     @type depth_map: np.array
     @param depth_type: A string indicating the type of depth map. It can either be
         'relative' or 'metric'.
     @type depth_type: Literal['relative', 'metric']
     @return: An ImgFrame object containing the depth information.
     @rtype: dai.ImgFrame
     @raise ValueError: If the depth map is not a NumPy array.
-    @raise ValueError: If the depth map is not 3D.
-    @raise ValueError: If the depth map shape is not CHW or HWC.
+    @raise ValueError: If the depth map is not 2D.
     @raise ValueError: If the depth type is not 'relative' or 'metric'.
     """
 
     if not isinstance(depth_map, np.ndarray):
         raise ValueError(f"Expected numpy array, got {type(depth_map)}.")
-    if len(depth_map.shape) != 3:
-        raise ValueError(f"Expected 3D input, got {len(depth_map.shape)}D input.")
-
-    if depth_map.shape[0] == 1:
-        depth_map = depth_map[0, :, :]  # CHW to HW
-    elif depth_map.shape[2] == 1:
-        depth_map = depth_map[:, :, 0]  # HWC to HW
-    else:
-        raise ValueError(
-            "Unexpected image shape. Expected CHW or HWC, got", depth_map.shape
-        )
+    if len(depth_map.shape) != 2:
+        raise ValueError(f"Expected 2D input, got {len(depth_map.shape)}D input.")
 
     if depth_type == "relative":
         data_type = dai.ImgFrame.Type.RAW16

diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py
@@ -58,6 +58,7 @@ def run(self):
                 raise ValueError(
                     f"Expected 1 output layer, got {len(output_layer_names)}."
                 )
+
             output_image = output.getTensor(output_layer_names[0], dequantize=True)
 
             if len(output_image.shape) == 4:

diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py
@@ -58,9 +58,22 @@ def run(self):
                 raise ValueError(
                     f"Expected 1 output layer, got {len(output_layer_names)}."
                 )
-            depth_map = output.getTensor(output_layer_names[0])
 
-            depth_map = depth_map[0]
+            output_map = output.getTensor(output_layer_names[0], dequantize=True)
+
+            if len(output_map.shape) == 3:
+                if output_map.shape[0] == 1:
+                    depth_map = output_map[0]
+                elif output_map.shape[2] == 1:
+                    depth_map = output_map[:, :, 0]
+            elif len(output_map.shape) == 2:
+                depth_map = output_map
+            elif len(output_map.shape) == 4:
+                depth_map = output_map[0][0]
+            else:
+                raise ValueError(
+                    f"Expected 3- or 2-dimensional output, got {len(output_map.shape)}-dimensional",
+                )
 
             depth_message = create_depth_message(
                 depth_map=depth_map,

diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py
@@ -98,18 +98,35 @@ def run(self):
 
             # get input_size
             stride0 = strides[0]
-            _, spatial_positions0, _ = output.getTensor(f"cls_{stride0}").shape
+            cls_stride0_shape = output.getTensor(
+                f"cls_{stride0}", dequantize=True
+            ).shape
+            if len(cls_stride0_shape) == 3:
+                _, spatial_positions0, _ = cls_stride0_shape
+            elif len(cls_stride0_shape) == 2:
+                spatial_positions0, _ = cls_stride0_shape
             input_width = input_height = int(
                 math.sqrt(spatial_positions0) * stride0
             )  # TODO: We assume a square input size. How to get input size when height and width are not equal?
             input_size = (input_width, input_height)
 
             detections = []
             for stride in strides:
-                cls = output.getTensor(f"cls_{stride}").squeeze(0)
-                obj = output.getTensor(f"obj_{stride}").flatten()
-                bbox = output.getTensor(f"bbox_{stride}").squeeze(0)
-                kps = output.getTensor(f"kps_{stride}").squeeze(0)
+                cls = output.getTensor(f"cls_{stride}", dequantize=True)
+                cls = cls.astype(np.float32)
+                cls = cls.squeeze(0) if cls.shape[0] == 1 else cls
+
+                obj = output.getTensor(f"obj_{stride}", dequantize=True).flatten()
+                obj = obj.astype(np.float32)
+
+                bbox = output.getTensor(f"bbox_{stride}", dequantize=True)
+                bbox = bbox.astype(np.float32)
+                bbox = bbox.squeeze(0) if bbox.shape[0] == 1 else bbox
+
+                kps = output.getTensor(f"kps_{stride}", dequantize=True)
+                kps = kps.astype(np.float32)
+                kps = kps.squeeze(0) if kps.shape[0] == 1 else kps
+
                 detections += decode_detections(
                     input_size,
                     stride,