Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optionally enforce exact distances #667

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion umap/umap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -1611,6 +1611,7 @@ def __init__(
transform_seed=42,
transform_mode="embedding",
force_approximation_algorithm=False,
force_exact_distances=False,
verbose=False,
unique=False,
densmap=False,
Expand Down Expand Up @@ -1648,6 +1649,7 @@ def __init__(
self.transform_seed = transform_seed
self.transform_mode = transform_mode
self.force_approximation_algorithm = force_approximation_algorithm
self.force_exact_distances = force_exact_distances
self.verbose = verbose
self.unique = unique

Expand Down Expand Up @@ -1842,6 +1844,10 @@ def _dist_only(x, y, *kwds):
if self.n_jobs < -1 or self.n_jobs == 0:
raise ValueError("n_jobs must be a postive integer, or -1 (for all cores)")

if self.force_approximation_algorithm and self.force_exact_distances:
raise ValueError("enforcing both exact distances and an approximation "
"contradict each other")

if self.dens_lambda < 0.0:
raise ValueError("dens_lambda cannot be negative")
if self.dens_frac < 0.0 or self.dens_frac > 1.0:
Expand Down Expand Up @@ -1930,6 +1936,9 @@ def _populate_combined_params(self, *models):
self.force_approximation_algorithm = flattened(
[m.force_approximation_algorithm for m in models]
)
self.force_exact_distances = flattened(
[m.force_exact_distances for m in models]
)
self.verbose = flattened([m.verbose for m in models])
self.unique = flattened([m.unique for m in models])

Expand Down Expand Up @@ -2332,7 +2341,9 @@ def fit(self, X, y=None):
verbose=self.verbose,
)
# Handle small cases efficiently by computing all distances
elif X[index].shape[0] < 4096 and not self.force_approximation_algorithm:
elif self.force_exact_distances or (
X[index].shape[0] < 4096 and not self.force_approximation_algorithm
):
self._small_data = True
try:
# sklearn pairwise_distances fails for callable metric on sparse data
Expand Down