From 9b4aff5b0ba9a1548dd3ca59c988397a0b716549 Mon Sep 17 00:00:00 2001 From: jkbmrz <74824974+jkbmrz@users.noreply.github.com> Date: Thu, 29 Aug 2024 08:53:09 +0200 Subject: [PATCH] RVC4 support for parsers v2 (#14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: YuNetParser dequantization * fix: YuNetParser input size estimation * fix: ImageOutputParser dequantization * style: pre-commit formatting * fix: MonocularDepthParser dequantization * fix: ensure depth map is a 2D array * fix: MonocularDepthParser output reshaping due to dequantization * Precommit fix. * Support for 4D tensors - RVC2 --------- Co-authored-by: Jaša Kerec <61207502+kkeroo@users.noreply.github.com> --- depthai_nodes/ml/messages/creators/depth.py | 18 +++----------- depthai_nodes/ml/parsers/image_output.py | 1 + depthai_nodes/ml/parsers/monocular_depth.py | 17 +++++++++++-- depthai_nodes/ml/parsers/yunet.py | 27 +++++++++++++++++---- 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/depthai_nodes/ml/messages/creators/depth.py b/depthai_nodes/ml/messages/creators/depth.py index dabb44f9..69ab60ee 100644 --- a/depthai_nodes/ml/messages/creators/depth.py +++ b/depthai_nodes/ml/messages/creators/depth.py @@ -11,7 +11,7 @@ def create_depth_message( ) -> dai.ImgFrame: """Create a DepthAI message for a depth map. - @param depth_map: A NumPy array representing the depth map with shape (CHW or HWC). + @param depth_map: A NumPy array representing the depth map with shape (HW). @type depth_map: np.array @param depth_type: A string indicating the type of depth map. It can either be 'relative' or 'metric'. @@ -19,24 +19,14 @@ def create_depth_message( @return: An ImgFrame object containing the depth information. @rtype: dai.ImgFrame @raise ValueError: If the depth map is not a NumPy array. - @raise ValueError: If the depth map is not 3D. - @raise ValueError: If the depth map shape is not CHW or HWC. + @raise ValueError: If the depth map is not 2D. @raise ValueError: If the depth type is not 'relative' or 'metric'. """ if not isinstance(depth_map, np.ndarray): raise ValueError(f"Expected numpy array, got {type(depth_map)}.") - if len(depth_map.shape) != 3: - raise ValueError(f"Expected 3D input, got {len(depth_map.shape)}D input.") - - if depth_map.shape[0] == 1: - depth_map = depth_map[0, :, :] # CHW to HW - elif depth_map.shape[2] == 1: - depth_map = depth_map[:, :, 0] # HWC to HW - else: - raise ValueError( - "Unexpected image shape. Expected CHW or HWC, got", depth_map.shape - ) + if len(depth_map.shape) != 2: + raise ValueError(f"Expected 2D input, got {len(depth_map.shape)}D input.") if depth_type == "relative": data_type = dai.ImgFrame.Type.RAW16 diff --git a/depthai_nodes/ml/parsers/image_output.py b/depthai_nodes/ml/parsers/image_output.py index b159303d..4ae4e50d 100644 --- a/depthai_nodes/ml/parsers/image_output.py +++ b/depthai_nodes/ml/parsers/image_output.py @@ -58,6 +58,7 @@ def run(self): raise ValueError( f"Expected 1 output layer, got {len(output_layer_names)}." ) + output_image = output.getTensor(output_layer_names[0], dequantize=True) if len(output_image.shape) == 4: diff --git a/depthai_nodes/ml/parsers/monocular_depth.py b/depthai_nodes/ml/parsers/monocular_depth.py index c76a8b6d..5e47b686 100644 --- a/depthai_nodes/ml/parsers/monocular_depth.py +++ b/depthai_nodes/ml/parsers/monocular_depth.py @@ -58,9 +58,22 @@ def run(self): raise ValueError( f"Expected 1 output layer, got {len(output_layer_names)}." ) - depth_map = output.getTensor(output_layer_names[0]) - depth_map = depth_map[0] + output_map = output.getTensor(output_layer_names[0], dequantize=True) + + if len(output_map.shape) == 3: + if output_map.shape[0] == 1: + depth_map = output_map[0] + elif output_map.shape[2] == 1: + depth_map = output_map[:, :, 0] + elif len(output_map.shape) == 2: + depth_map = output_map + elif len(output_map.shape) == 4: + depth_map = output_map[0][0] + else: + raise ValueError( + f"Expected 3- or 2-dimensional output, got {len(output_map.shape)}-dimensional", + ) depth_message = create_depth_message( depth_map=depth_map, diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py index 4db4ca92..c8ee782f 100644 --- a/depthai_nodes/ml/parsers/yunet.py +++ b/depthai_nodes/ml/parsers/yunet.py @@ -98,7 +98,13 @@ def run(self): # get input_size stride0 = strides[0] - _, spatial_positions0, _ = output.getTensor(f"cls_{stride0}").shape + cls_stride0_shape = output.getTensor( + f"cls_{stride0}", dequantize=True + ).shape + if len(cls_stride0_shape) == 3: + _, spatial_positions0, _ = cls_stride0_shape + elif len(cls_stride0_shape) == 2: + spatial_positions0, _ = cls_stride0_shape input_width = input_height = int( math.sqrt(spatial_positions0) * stride0 ) # TODO: We assume a square input size. How to get input size when height and width are not equal? @@ -106,10 +112,21 @@ def run(self): detections = [] for stride in strides: - cls = output.getTensor(f"cls_{stride}").squeeze(0) - obj = output.getTensor(f"obj_{stride}").flatten() - bbox = output.getTensor(f"bbox_{stride}").squeeze(0) - kps = output.getTensor(f"kps_{stride}").squeeze(0) + cls = output.getTensor(f"cls_{stride}", dequantize=True) + cls = cls.astype(np.float32) + cls = cls.squeeze(0) if cls.shape[0] == 1 else cls + + obj = output.getTensor(f"obj_{stride}", dequantize=True).flatten() + obj = obj.astype(np.float32) + + bbox = output.getTensor(f"bbox_{stride}", dequantize=True) + bbox = bbox.astype(np.float32) + bbox = bbox.squeeze(0) if bbox.shape[0] == 1 else bbox + + kps = output.getTensor(f"kps_{stride}", dequantize=True) + kps = kps.astype(np.float32) + kps = kps.squeeze(0) if kps.shape[0] == 1 else kps + detections += decode_detections( input_size, stride,