Skip to content

Commit

Permalink
RVC4 support for parsers v2 (#14)
Browse files Browse the repository at this point in the history
* fix: YuNetParser dequantization

* fix: YuNetParser input size estimation

* fix: ImageOutputParser dequantization

* style: pre-commit formatting

* fix: MonocularDepthParser dequantization

* fix: ensure depth map is a 2D array

* fix: MonocularDepthParser output reshaping due to dequantization

* Precommit fix.

* Support for 4D tensors - RVC2

---------

Co-authored-by: Jaša Kerec <[email protected]>
  • Loading branch information
jkbmrz and kkeroo authored Aug 29, 2024
1 parent 1043366 commit 9b4aff5
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 21 deletions.
18 changes: 4 additions & 14 deletions depthai_nodes/ml/messages/creators/depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,22 @@ def create_depth_message(
) -> dai.ImgFrame:
"""Create a DepthAI message for a depth map.
@param depth_map: A NumPy array representing the depth map with shape (CHW or HWC).
@param depth_map: A NumPy array representing the depth map with shape (HW).
@type depth_map: np.array
@param depth_type: A string indicating the type of depth map. It can either be
'relative' or 'metric'.
@type depth_type: Literal['relative', 'metric']
@return: An ImgFrame object containing the depth information.
@rtype: dai.ImgFrame
@raise ValueError: If the depth map is not a NumPy array.
@raise ValueError: If the depth map is not 3D.
@raise ValueError: If the depth map shape is not CHW or HWC.
@raise ValueError: If the depth map is not 2D.
@raise ValueError: If the depth type is not 'relative' or 'metric'.
"""

if not isinstance(depth_map, np.ndarray):
raise ValueError(f"Expected numpy array, got {type(depth_map)}.")
if len(depth_map.shape) != 3:
raise ValueError(f"Expected 3D input, got {len(depth_map.shape)}D input.")

if depth_map.shape[0] == 1:
depth_map = depth_map[0, :, :] # CHW to HW
elif depth_map.shape[2] == 1:
depth_map = depth_map[:, :, 0] # HWC to HW
else:
raise ValueError(
"Unexpected image shape. Expected CHW or HWC, got", depth_map.shape
)
if len(depth_map.shape) != 2:
raise ValueError(f"Expected 2D input, got {len(depth_map.shape)}D input.")

if depth_type == "relative":
data_type = dai.ImgFrame.Type.RAW16
Expand Down
1 change: 1 addition & 0 deletions depthai_nodes/ml/parsers/image_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def run(self):
raise ValueError(
f"Expected 1 output layer, got {len(output_layer_names)}."
)

output_image = output.getTensor(output_layer_names[0], dequantize=True)

if len(output_image.shape) == 4:
Expand Down
17 changes: 15 additions & 2 deletions depthai_nodes/ml/parsers/monocular_depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,22 @@ def run(self):
raise ValueError(
f"Expected 1 output layer, got {len(output_layer_names)}."
)
depth_map = output.getTensor(output_layer_names[0])

depth_map = depth_map[0]
output_map = output.getTensor(output_layer_names[0], dequantize=True)

if len(output_map.shape) == 3:
if output_map.shape[0] == 1:
depth_map = output_map[0]
elif output_map.shape[2] == 1:
depth_map = output_map[:, :, 0]
elif len(output_map.shape) == 2:
depth_map = output_map
elif len(output_map.shape) == 4:
depth_map = output_map[0][0]
else:
raise ValueError(
f"Expected 3- or 2-dimensional output, got {len(output_map.shape)}-dimensional",
)

depth_message = create_depth_message(
depth_map=depth_map,
Expand Down
27 changes: 22 additions & 5 deletions depthai_nodes/ml/parsers/yunet.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,35 @@ def run(self):

# get input_size
stride0 = strides[0]
_, spatial_positions0, _ = output.getTensor(f"cls_{stride0}").shape
cls_stride0_shape = output.getTensor(
f"cls_{stride0}", dequantize=True
).shape
if len(cls_stride0_shape) == 3:
_, spatial_positions0, _ = cls_stride0_shape
elif len(cls_stride0_shape) == 2:
spatial_positions0, _ = cls_stride0_shape
input_width = input_height = int(
math.sqrt(spatial_positions0) * stride0
) # TODO: We assume a square input size. How to get input size when height and width are not equal?
input_size = (input_width, input_height)

detections = []
for stride in strides:
cls = output.getTensor(f"cls_{stride}").squeeze(0)
obj = output.getTensor(f"obj_{stride}").flatten()
bbox = output.getTensor(f"bbox_{stride}").squeeze(0)
kps = output.getTensor(f"kps_{stride}").squeeze(0)
cls = output.getTensor(f"cls_{stride}", dequantize=True)
cls = cls.astype(np.float32)
cls = cls.squeeze(0) if cls.shape[0] == 1 else cls

obj = output.getTensor(f"obj_{stride}", dequantize=True).flatten()
obj = obj.astype(np.float32)

bbox = output.getTensor(f"bbox_{stride}", dequantize=True)
bbox = bbox.astype(np.float32)
bbox = bbox.squeeze(0) if bbox.shape[0] == 1 else bbox

kps = output.getTensor(f"kps_{stride}", dequantize=True)
kps = kps.astype(np.float32)
kps = kps.squeeze(0) if kps.shape[0] == 1 else kps

detections += decode_detections(
input_size,
stride,
Expand Down

0 comments on commit 9b4aff5

Please sign in to comment.