Skip to content

Commit

Permalink
New Benchmark Tests for two alt. implementations
Browse files Browse the repository at this point in the history
The chunked_parallel_special_metric has been provided with two alternative implementations: one including IF clause adjusting for bounds for symmetrical matrices (so skipping useless computations) and another implementing the full iteration regardless.

Benchmark have been injected using the pytest-benchmark plugin (will be added to CI in the next commit)
  • Loading branch information
leriomaggio committed Feb 17, 2021
1 parent b4b47de commit 7c67081
Showing 1 changed file with 208 additions and 0 deletions.
208 changes: 208 additions & 0 deletions umap/tests/test_chunked_parallel_spatial_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
from numpy.testing import assert_array_equal
from umap import distances as dist

# --------
# Fixtures
# --------


@pytest.fixture(scope="function")
def stashed_chunked_implementation():
Expand Down Expand Up @@ -55,6 +59,114 @@ def stashed_chunked_parallel_special_metric(
return stashed_chunked_parallel_special_metric


@pytest.fixture(scope="function")
def chunked_parallel_if_clause():
@numba.njit(parallel=True, nogil=True)
def chunked_parallel_special_metric(
X, Y=None, metric=dist.named_distances["hellinger"], chunk_size=16
):
if Y is None:
XX = X
row_size = col_size = X.shape[0]
symmetrical = True
else:
XX = Y
row_size = X.shape[0]
col_size = Y.shape[0]
symmetrical = False

result = np.zeros((row_size, col_size), dtype=np.float32)
n_row_chunks = (row_size // chunk_size) + 1
for chunk_idx in numba.prange(n_row_chunks):
n = chunk_idx * chunk_size
chunk_end_n = min(n + chunk_size, row_size)
m_start = 0 if not symmetrical else n
for m in range(m_start, col_size, chunk_size):
chunk_end_m = min(m + chunk_size, col_size)
for i in range(n, chunk_end_n):
j_start = m if not symmetrical else i + 1
for j in range(j_start, chunk_end_m):
d = metric(X[i], XX[j])
result[i, j] = d
if symmetrical:
result[j, i] = d
return result

return chunked_parallel_special_metric


@pytest.fixture(scope="function")
def chunked_parallel_full_iterations():
@numba.njit(parallel=True, nogil=True)
def chunked_parallel_special_metric(
X, Y=None, metric=dist.named_distances["hellinger"], chunk_size=16
):
if Y is None:
XX = X
row_size = col_size = X.shape[0]
symmetrical = True
else:
XX = Y
row_size, col_size = X.shape[0], Y.shape[0]
symmetrical = False

result = np.zeros((row_size, col_size), dtype=np.float32)
n_row_chunks = (row_size // chunk_size) + 1
for chunk_idx in numba.prange(n_row_chunks):
n = chunk_idx * chunk_size
chunk_end_n = min(n + chunk_size, row_size)
m_start = 0 if not symmetrical else n
for m in range(m_start, col_size, chunk_size):
chunk_end_m = min(m + chunk_size, col_size)
for i in range(n, chunk_end_n):
for j in range(m, chunk_end_m):
d = metric(X[i], XX[j])
result[i, j] = d
return result

return chunked_parallel_special_metric


@pytest.fixture(scope="function")
def benchmark_data(request):
shape = request.param
spatial_data = np.random.randn(*shape).astype(np.float32)
return np.abs(spatial_data)


# ---------------------------------------------------------------

# Uncomment this to skip the tests
# @pytest.mark.skip(reason="Focus on benchmark for now. This passes!")
def test_chunked_parallel_alternative_implementations(
spatial_data, chunked_parallel_if_clause, chunked_parallel_full_iterations
):
# Base tests that must pass!
dist_matrix_x = chunked_parallel_if_clause(np.abs(spatial_data[:-2]))
dist_matrix_xy = chunked_parallel_if_clause(
np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2])
)

dist_matrix_x_full = chunked_parallel_full_iterations(np.abs(spatial_data[:-2]))
dist_matrix_xy_full = chunked_parallel_full_iterations(
np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2])
)

assert_array_equal(
dist_matrix_x_full,
dist_matrix_x,
err_msg="Distances don't match for metric hellinger",
)

assert_array_equal(
dist_matrix_xy_full,
dist_matrix_xy,
err_msg="Distances don't match for metric hellinger",
)


# Uncomment this to skip the tests
# @pytest.mark.skip(reason="Focus on benchmark for now. This passes!")
def test_chunked_parallel_special_metric_implementation_hellinger(
spatial_data, stashed_chunked_implementation
):
Expand Down Expand Up @@ -118,3 +230,99 @@ def test_chunked_parallel_special_metric_implementation_hellinger(
dist_chunked_diff_pair,
err_msg="Distances don't match between stashed and current chunked_parallel implementations",
)


@pytest.mark.benchmark(
group="benchmark_single_param",
)
@pytest.mark.parametrize(
"benchmark_data",
[(10 * s, 10 * s) for s in range(1, 101, 10)],
indirect=["benchmark_data"],
)
def test_benchmark_full_iteration_no_symmetrical_skips_x_only(
benchmark,
benchmark_data,
chunked_parallel_full_iterations,
):

# single argument
benchmark.pedantic(
chunked_parallel_full_iterations,
kwargs={"X": benchmark_data, "Y": None},
warmup_rounds=5,
iterations=10,
rounds=10,
)


@pytest.mark.benchmark(
group="benchmark_single_param",
)
@pytest.mark.parametrize(
"benchmark_data",
[(10 * s, 10 * s) for s in range(1, 101, 10)],
indirect=["benchmark_data"],
)
def test_benchmark_check_symmetrical_and_skips_x_only(
benchmark,
benchmark_data,
chunked_parallel_if_clause,
):

# single argument
benchmark.pedantic(
chunked_parallel_if_clause,
kwargs={"X": benchmark_data, "Y": None},
warmup_rounds=5,
iterations=10,
rounds=10,
)


@pytest.mark.benchmark(
group="benchmark_X_Y_params",
)
@pytest.mark.parametrize(
"benchmark_data",
[(10 * s, 10 * s) for s in range(1, 101, 10)],
indirect=["benchmark_data"],
)
def test_benchmark_full_iteration_no_symmetrical_skips_x_y(
benchmark,
benchmark_data,
chunked_parallel_full_iterations,
):

# single argument
benchmark.pedantic(
chunked_parallel_full_iterations,
kwargs={"X": benchmark_data, "Y": benchmark_data},
warmup_rounds=5,
iterations=10,
rounds=10,
)


@pytest.mark.benchmark(
group="benchmark_X_Y_params",
)
@pytest.mark.parametrize(
"benchmark_data",
[(10 * s, 10 * s) for s in range(1, 101, 10)],
indirect=["benchmark_data"],
)
def test_benchmark_check_symmetrical_and_skips_x_y(
benchmark,
benchmark_data,
chunked_parallel_if_clause,
):

# single argument
benchmark.pedantic(
chunked_parallel_if_clause,
kwargs={"X": benchmark_data, "Y": benchmark_data},
warmup_rounds=5,
iterations=10,
rounds=10,
)

0 comments on commit 7c67081

Please sign in to comment.