Skip to content

Commit

Permalink
Face Detection Depth Fix (#130)
Browse files Browse the repository at this point in the history
* Ignore mouth points no farther away than the fork tip

* Using depth_octomap to filter out fork/food and fitting a plane to face

* Formatting cleanup

* Addressed bugs & verified both face detection methods work in real

---------

Co-authored-by: taylorkf <[email protected]>
Co-authored-by: Amal Nanavati <[email protected]>
  • Loading branch information
3 people authored Nov 14, 2023
1 parent 7295169 commit 48aeced
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 37 deletions.
6 changes: 5 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,11 @@ function-naming-style=snake_case
#function-rgx=

# Good variable names which should always be accepted, separated by a comma.
good-names=i,
good-names=a,
b,
c,
d,
i,
j,
k,
x,
Expand Down
2 changes: 1 addition & 1 deletion ada_feeding/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ The `ada_feeding` package contains the overarching code to run the robot-assiste

This code has been developed and tested with the Kinova JACO Gen2 Arm, on computers running Ubuntu 22.04. To get started:
- Install [ROS2 Humble](https://docs.ros.org/en/humble/Installation.html)
- Install Python dependencies: `python3 -m pip install overrides pyrealsense2 pyyaml py_trees pymongo tornado trimesh`
- Install Python dependencies: `python3 -m pip install overrides pyrealsense2 pyyaml py_trees pymongo tornado trimesh scikit-spatial`
- NOTE: [`pyrealsense2` is not released for ARM systems](https://github.com/IntelRealSense/librealsense/issues/6449#issuecomment-650784066), so ARM users will have to [build from source](https://github.com/IntelRealSense/librealsense/blob/master/wrappers/python/readme.md#building-from-source). When running `sudo make install`, pay close attention to which path `pyrealsense2` is installed to and add *that path* to the `PYTHONPATH` -- it should be `/use/local/lib` but may be `/usr/local/OFF`.
- Install the code to command the real robot ([instructions here](https://github.com/personalrobotics/ada_ros2/blob/main/README.md))
- Git clone the [PRL fork of pymoveit (branch: `amaln/move_collision`)](https://github.com/personalrobotics/pymoveit2/tree/amaln/move_collision) the [PRL fork of py_trees_ros (branch: `amaln/service_client`)](https://github.com/personalrobotics/py_trees_ros/tree/amaln/service_client), and [ros2_numpy (branch: `humble`)](https://github.com/Box-Robotics/ros2_numpy) into your ROS2 workspace's `src` folder.
Expand Down
187 changes: 153 additions & 34 deletions ada_feeding_perception/ada_feeding_perception/face_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@

# Standard Imports
import collections
from enum import Enum
import os
import threading
from typing import Tuple, Union
from typing import List, Tuple, Union

# Third-party imports
import cv2
Expand All @@ -24,8 +25,10 @@
from rclpy.executors import MultiThreadedExecutor
from rcl_interfaces.msg import ParameterDescriptor, ParameterType
from sensor_msgs.msg import CompressedImage, CameraInfo, Image
from skspatial.objects import Plane, Points
from std_srvs.srv import SetBool


# Local imports
from ada_feeding_msgs.msg import FaceDetection
from ada_feeding_perception.helpers import (
Expand All @@ -35,6 +38,15 @@
)


class DepthComputationMethod(Enum):
"""
Enum for the different methods of computing the depth of the mouth.
"""

AVERAGE_MOUTH_POINTS = "average_mouth_points"
FACIAL_PLANE = "facial_plane"


class FaceDetectionNode(Node):
"""
This node publishes a 3d PointStamped location
Expand Down Expand Up @@ -337,7 +349,7 @@ def detect_largest_face(
is_face_detected: Whether a face was detected in the image.
mouth_center: The (u,v) coordinates of the somion of the largest face
detected in the image.
mouth_points: A list of (u,v) coordinates of the facial landmarks of the mouth.
face_points: A list of (u,v) coordinates of the landmarks of the face.
face_bbox: The bounding box of the largest face detected in the image,
in the form (x, y, w, h).
"""
Expand All @@ -354,7 +366,7 @@ def detect_largest_face(
faces = self.detector.detectMultiScale(image_gray)
is_face_detected = len(faces) > 0
img_mouth_center = (0, 0)
img_mouth_points = []
img_face_points = []
face_bbox = (0, 0, 0, 0)

if is_face_detected:
Expand All @@ -377,30 +389,138 @@ def detect_largest_face(
# Find stomion (mouth center) in image. See below for the landmark indices:
# https://pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/
img_mouth_center = landmarks[largest_face[1]][0][66]
img_mouth_points = landmarks[largest_face[1]][0][48:68]
img_face_points = landmarks[largest_face[1]][0]

return is_face_detected, img_mouth_center, img_face_points, face_bbox

def depth_method_average_mouth_points(
self,
mouth_points: npt.NDArray,
image_depth: npt.NDArray,
threshold: float = 0.5,
) -> Tuple[bool, float]:
"""
Compute the depth of the mouth stomion by averaging the depth of all
viable mouth points.
Parameters
----------
mouth_points: A list of (u,v) coordinates of all facial landmarks.
image_depth: The depth image corresponding to the RGB image that the
face was detected in.
threshold: The minimum fraction of mouth points that must be valid for
the depth to be computed.
Returns
-------
detected: whether the mouth was detected in the depth image.
depth_mm: The depth of the mouth in mm.
"""
# pylint: disable=too-many-locals
# This function is not too complex, but it does have a lot of local variables.

# Retrieve the depth value averaged over all viable mouth coordinates
depth_sum = 0
num_valid_points = 0
for point in mouth_points:
u, v = int(point[0]), int(point[1])
# Ensure that point is contained within the depth image frame
if 0 <= u < image_depth.shape[1] and 0 <= v < image_depth.shape[0]:
if image_depth[v][u] != 0:
num_valid_points += 1
depth_sum += image_depth[v][u]
if num_valid_points >= threshold * len(mouth_points):
# If at least half of the mouth points are valid, use the average
# depth of the mouth points
depth_mm = depth_sum / float(num_valid_points)
return True, depth_mm

self.get_logger().warn(
"The majority of mouth points were invalid (outside the frame or depth not detected). "
"Unable to compute mouth depth by averaging mouth points."
)
return False, 0

return is_face_detected, img_mouth_center, img_mouth_points, face_bbox
def depth_method_facial_plane(
self, face_points: npt.NDArray, image_depth: npt.NDArray, threshold: float = 0.5
) -> Tuple[bool, float]:
"""
Compute the depth of the mouth stomion by fitting a plane to the
internal points of the face and calculating the depth of the stomion
from the plane.
Parameters
----------
face_points: A list of (u,v) coordinates of all facial landmarks.
image_depth: The depth image corresponding to the RGB image that the
face was detected in.
threshold: The minimum fraction of face points that must be valid for
the plane to be fit.
Returns
-------
detected: whether the mouth was detected in the depth image.
depth_mm: The depth of the mouth in mm.
"""
# pylint: disable=too-many-locals
# This function is not too complex, but it does have a lot of local variables.

internal_face_points = face_points[18:] # Ignore points along the face outline
face_points_3d = []
for point in internal_face_points:
u, v = int(point[0]), int(point[1])
# Ensure that point is contained within the depth image frame
if 0 <= u < image_depth.shape[1] and 0 <= v < image_depth.shape[0]:
if image_depth[v][u] != 0:
z = image_depth[v][u]
face_points_3d.append([u, v, z])
if len(face_points_3d) >= threshold * len(internal_face_points):
# Fit a plane to detected face points
plane = Plane.best_fit(Points(face_points_3d))
a, b, c, d = plane.cartesian()

# Locate u,v of stomion. See below for the landmark indices:
# https://pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/
stomion_u, stomion_v = face_points[66]
# Calculate depth of stomion u,v from fit plane
depth_mm = (d + (a * stomion_u) + (b * stomion_v)) / (-c)
return True, depth_mm
self.get_logger().warn(
"The majority of internal face points were invalid (outside the frame or depth not detected). "
"Unable to compute mouth depth from facial plane."
)
return False, 0

def get_mouth_depth(
self, rgb_msg: Union[CompressedImage, Image], face_points: np.ndarray
self,
rgb_msg: Union[CompressedImage, Image],
face_points: npt.NDArray,
methods: List[DepthComputationMethod] = [
DepthComputationMethod.AVERAGE_MOUTH_POINTS,
DepthComputationMethod.FACIAL_PLANE,
],
) -> Tuple[bool, float]:
"""
Get the depth of the mouth of the largest detected face in the image.
Parameters
----------
rgb_msg: The RGB image that the face was detected in.
face_points: A list of (u,v) coordinates of the facial landmarks whose depth
should be averaged to get the predicted depth of the mouth.
face_points: A list of (u,v) coordinates of all facial landmarks.
Returns
-------
detected: whether the mouth was detected in the depth image.
depth_mm: The depth of the mouth in mm.
"""

# pylint: disable=too-many-locals
# pylint: disable=too-many-locals, dangerous-default-value
# This function is not too complex, but it does have a lot of local variables.
# In this case, a list as the default value is okay since we don't change it.

# Pull out a list of (u,v) coordinates of all facial landmarks that can be
# averaged to approximate the mouth center
mouth_points = face_points[48:68]

# Find depth image closest in time to RGB image that face was in
with self.depth_buffer_lock:
Expand Down Expand Up @@ -432,25 +552,24 @@ def get_mouth_depth(
desired_encoding="passthrough",
)

# Retrieve the depth value averaged over all mouth coordinates
depth_sum = 0
num_points_in_frame = 0
for point in face_points:
x, y = int(point[0]), int(point[1])
if 0 <= x < image_depth.shape[1] and 0 <= y < image_depth.shape[0]:
# Points with depth 0mm are invalid (typically out-of-range)
if image_depth[y][x] != 0:
num_points_in_frame += 1
depth_sum += image_depth[y][x]
if num_points_in_frame < 0.5 * len(face_points):
self.get_logger().warn(
"Detected face in the RGB image, but majority of mouth points "
"were invalid. Ignoring this face."
)
return False, 0
# Compute the depth of the mouth
for method in methods:
if method == DepthComputationMethod.AVERAGE_MOUTH_POINTS:
detected, depth_mm = self.depth_method_average_mouth_points(
mouth_points, image_depth
)
elif method == DepthComputationMethod.FACIAL_PLANE:
detected, depth_mm = self.depth_method_facial_plane(
face_points, image_depth
)
else:
self.get_logger().warn(
f"Invalid depth computation method: {method}. Skipping."
)
if detected:
return detected, depth_mm

depth_mm = depth_sum / float(num_points_in_frame)
return True, depth_mm
return False, 0

def get_stomion_point(self, u: int, v: int, depth_mm: float) -> Point:
"""
Expand Down Expand Up @@ -522,14 +641,14 @@ def run(self) -> None:
(
is_face_detected,
img_mouth_center,
img_mouth_points,
img_face_points,
face_bbox,
) = self.detect_largest_face(image_bgr)

if is_face_detected:
# Get the depth of the mouth
is_face_detected_depth, depth_mm = self.get_mouth_depth(
rgb_msg, img_mouth_points
rgb_msg, img_face_points
)
if is_face_detected_depth:
# Get the 3d location of the mouth
Expand All @@ -544,7 +663,7 @@ def run(self) -> None:

# Annotate the image with the face
if is_face_detected:
self.annotate_image(image_bgr, img_mouth_points, face_bbox)
self.annotate_image(image_bgr, img_face_points, face_bbox)

# Publish the face detection image
self.publisher_image.publish(
Expand All @@ -558,16 +677,16 @@ def run(self) -> None:
def annotate_image(
self,
image_bgr: npt.NDArray,
img_mouth_points: npt.NDArray,
img_face_points: npt.NDArray,
face_bbox: Tuple[int, int, int, int],
) -> None:
"""
Annotate the image with the face and mouth center.
Annotate the image with the face and facial landmarks center.
Parameters
----------
image_bgr: The OpenCV image to annotate.
img_mouth_points: A list of (u,v) coordinates of the facial landmarks of the mouth.
img_face_points: A list of (u,v) coordinates of the facial landmarks.
face_bbox: The bounding box of the largest face detected in the image,
in the form (x, y, w, h).
"""
Expand All @@ -577,7 +696,7 @@ def annotate_image(

# Annotate the image with the face.
cv2.rectangle(image_bgr, (x, y), (x + w, y + h), (255, 255, 255), 2)
for landmark_x, landmark_y in img_mouth_points:
for landmark_x, landmark_y in img_face_points:
cv2.circle(
image_bgr,
(int(landmark_x), int(landmark_y)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ def generate_launch_description():
(
"~/aligned_depth",
PythonExpression(
expression=["'", prefix, "/camera/aligned_depth_to_color/image_raw'"]
expression=[
"'",
prefix,
"/camera/aligned_depth_to_color/image_raw'",
]
),
),
]
Expand Down

0 comments on commit 48aeced

Please sign in to comment.