Skip to content

Commit

Permalink
generate initial points outside cuda, make initial_points_number conf…
Browse files Browse the repository at this point in the history
…igurable
  • Loading branch information
true-real-michael committed Mar 28, 2024
1 parent cd403fb commit f9e7f80
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 115 deletions.
8 changes: 0 additions & 8 deletions octreelib/grid/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,8 @@ def map_leaf_points_cuda_ransac(
for i in range(0, len(self.__pose_voxel_coordinates), poses_per_batch)
]

# find the maximum number of leaf voxels across all batches
# this is needed to initialize the random number generators on the GPU
max_leaf_voxels = max(
[
sum([self.n_leaves(pose_number) for pose_number in batch_pose_numbers])
for batch_pose_numbers in batches
]
)
ransac = CudaRansac(
max_blocks_number=max_leaf_voxels,
hypotheses_number=hypotheses_number,
threshold=threshold,
)
Expand Down
129 changes: 62 additions & 67 deletions octreelib/ransac/cuda_ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,11 @@
import numpy.typing as npt
import numba as nb
from numba import cuda
from numba.cuda.random import create_xoroshiro128p_states

from octreelib.internal import PointCloud
from octreelib.ransac.util import (
generate_random_indices,
get_plane_from_points,
measure_distance,
INITIAL_POINTS_NUMBER,
)

__all__ = ["CudaRansac"]
Expand All @@ -25,22 +22,22 @@ class CudaRansac:

def __init__(
self,
max_blocks_number: int,
threshold: float = 0.01,
hypotheses_number: int = CUDA_THREADS,
initial_points_number: int = 6,
):
"""
Initialize the RANSAC parameters.
:param threshold: Distance threshold.
:param hypotheses_number: Number of RANSAC hypotheses. (<= 1024)
:param max_blocks_number: Maximum number of blocks.
:param initial_points_number: Number of initial points to use in RANSAC.
"""

self.__threshold: float = threshold
self.__threads_per_block = min(hypotheses_number, CUDA_THREADS)
# create random number generator states
self.__rng_states = create_xoroshiro128p_states(
self.__threads_per_block * max_blocks_number, seed=0
self.__kernel = self.__get_kernel(initial_points_number)
self.__random_hypotheses_cuda = cuda.to_device(
np.random.random((self.__threads_per_block, initial_points_number))
)

def evaluate(
Expand Down Expand Up @@ -78,12 +75,12 @@ def evaluate(
mask_mutex = cuda.to_device(np.zeros(blocks_number, dtype=np.int32))

# call the kernel
self.__ransac_kernel[blocks_number, self.__threads_per_block](
self.__kernel[blocks_number, self.__threads_per_block](
point_cloud_cuda,
block_sizes_cuda,
block_start_indices_cuda,
self.__random_hypotheses_cuda,
self.__threshold,
self.__rng_states,
result_mask_cuda,
max_inliers_number_cuda,
mask_mutex,
Expand All @@ -94,65 +91,63 @@ def evaluate(
return result_mask

@staticmethod
@cuda.jit
def __ransac_kernel(
point_cloud: PointCloud,
block_sizes: npt.NDArray,
block_start_indices: npt.NDArray,
threshold: float,
rng_states,
result_mask: npt.NDArray,
max_inliers_number: npt.NDArray,
mask_mutex: npt.NDArray,
):
thread_id, block_id = cuda.threadIdx.x, cuda.blockIdx.x
def __get_kernel(initial_points_number):
@cuda.jit
def kernel(
point_cloud: PointCloud,
block_sizes: npt.NDArray,
block_start_indices: npt.NDArray,
random_hypotheses: npt.NDArray,
threshold: float,
result_mask: npt.NDArray,
max_inliers_number: npt.NDArray,
mask_mutex: npt.NDArray,
):
thread_id, block_id = cuda.threadIdx.x, cuda.blockIdx.x

if block_sizes[block_id] < INITIAL_POINTS_NUMBER:
return
if block_sizes[block_id] < initial_points_number:
return

# select random points as inliers
initial_point_indices = cuda.local.array(
shape=INITIAL_POINTS_NUMBER, dtype=nb.size_t
)
initial_point_indices = generate_random_indices(
initial_point_indices,
rng_states,
block_sizes[block_id],
INITIAL_POINTS_NUMBER,
)
for i in range(INITIAL_POINTS_NUMBER):
initial_point_indices[i] = (
block_start_indices[block_id] + initial_point_indices[i]
# select random points as inliers
initial_point_indices = cuda.local.array(
shape=initial_points_number, dtype=nb.size_t
)
for i in range(initial_points_number):
initial_point_indices[i] = nb.int32(
random_hypotheses[thread_id][i] * block_sizes[block_id]
+ block_start_indices[block_id]
)

# calculate the plane coefficients
plane = cuda.local.array(shape=4, dtype=nb.float32)
plane[0], plane[1], plane[2], plane[3] = get_plane_from_points(
point_cloud, initial_point_indices
)

# calculate the plane coefficients
plane = cuda.local.array(shape=4, dtype=nb.float32)
plane[0], plane[1], plane[2], plane[3] = get_plane_from_points(
point_cloud, initial_point_indices
)

# for each point in the block check if it is an inlier
inliers_number_local = 0
for i in range(block_sizes[block_id]):
point = point_cloud[block_start_indices[block_id] + i]
distance = measure_distance(plane, point)
if distance < threshold:
inliers_number_local += 1

# replace the maximum number of inliers if the current number is greater
cuda.atomic.max(max_inliers_number, block_id, inliers_number_local)
cuda.syncthreads()
# set the best mask index for this block
# if this thread has the maximum number of inliers
if (
inliers_number_local == max_inliers_number[block_id]
and cuda.atomic.cas(mask_mutex, block_id, 0, 1) == 0
):
# for each point in the block check if it is an inlier
inliers_number_local = 0
for i in range(block_sizes[block_id]):
if (
measure_distance(
plane, point_cloud[block_start_indices[block_id] + i]
)
< threshold
):
result_mask[block_start_indices[block_id] + i] = True
point = point_cloud[block_start_indices[block_id] + i]
distance = measure_distance(plane, point)
if distance < threshold:
inliers_number_local += 1

# replace the maximum number of inliers if the current number is greater
cuda.atomic.max(max_inliers_number, block_id, inliers_number_local)
cuda.syncthreads()
# set the best mask index for this block
# if this thread has the maximum number of inliers
if (
inliers_number_local == max_inliers_number[block_id]
and cuda.atomic.cas(mask_mutex, block_id, 0, 1) == 0
):
for i in range(block_sizes[block_id]):
if (
measure_distance(
plane, point_cloud[block_start_indices[block_id] + i]
)
< threshold
):
result_mask[block_start_indices[block_id] + i] = True

return kernel
43 changes: 3 additions & 40 deletions octreelib/ransac/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,11 @@
These functions are auxiliary functions for the RANSAC algorithm.
They cannot be defined inside the `CudaRansac` class because
`CudaRansac.__ransac_kernel` would not be able to access them.
This file also contains the `INITIAL_POINTS_NUMBER` constant which
can be used to configure the number of initial points to be used in the RANSAC algorithm.
"""

import math
import numba as nb

from numba import cuda
from numba.cuda.random import xoroshiro128p_uniform_float32

# This constant configures the number of initial points to be used in the RANSAC algorithm.
INITIAL_POINTS_NUMBER = 6


@cuda.jit(
Expand All @@ -34,36 +27,6 @@ def measure_distance(plane, point):
)


@cuda.jit(device=True, inline=True)
def generate_random_int(rng_states, lower_bound, upper_bound):
"""
Generate a random number between a and b.
:param rng_states: Random number generator states.
:param lower_bound: Lower bound.
:param upper_bound: Upper bound.
"""
thread_id = cuda.grid(1)
x = xoroshiro128p_uniform_float32(rng_states, thread_id)
return nb.int32(x * (upper_bound - lower_bound) + lower_bound)


@cuda.jit(device=True, inline=True)
def generate_random_indices(
initial_point_indices, rng_states, block_size, points_number
):
"""
Generate random points from the given block.
:param initial_point_indices: Array to store the initial point indices.
:param rng_states: Random number generator states.
:param block_size: Size of the block.
:param points_number: Number of points to generate.
"""

for i in range(points_number):
initial_point_indices[i] = generate_random_int(rng_states, 0, block_size)
return initial_point_indices


@cuda.jit(device=True, inline=True)
def get_plane_from_points(points, initial_point_indices):
"""
Expand All @@ -79,9 +42,9 @@ def get_plane_from_points(points, initial_point_indices):
centroid_y += points[idx][1]
centroid_z += points[idx][2]

centroid_x /= INITIAL_POINTS_NUMBER
centroid_y /= INITIAL_POINTS_NUMBER
centroid_z /= INITIAL_POINTS_NUMBER
centroid_x /= initial_point_indices.shape[0]
centroid_y /= initial_point_indices.shape[0]
centroid_z /= initial_point_indices.shape[0]

xx, xy, xz, yy, yz, zz = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

Expand Down

0 comments on commit f9e7f80

Please sign in to comment.