Skip to content

Commit

Permalink
Correct sparse correlation distance per issue #71
Browse files Browse the repository at this point in the history
  • Loading branch information
lmcinnes committed May 24, 2018
1 parent c86884d commit d80f313
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions umap/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,18 +696,31 @@ def sparse_correlation(ind1, data1, ind2, data2, n_features):
for i in range(data2.shape[0]):
shifted_data2[i] = data2[i] - mu_y

norm1 = norm(shifted_data1)
norm2 = norm(shifted_data2)
norm1 = np.sqrt(norm(shifted_data1) ** 2 + (n_features - ind1.shape[0]) * mu_x ** 2)
norm2 = np.sqrt(norm(shifted_data2) ** 2 + (n_features - ind2.shape[0]) * mu_y ** 2)

dot_prod_inds, dot_prod_data = sparse_mul(ind1, shifted_data1,
ind2, shifted_data2)

if dot_prod_data.shape[0] == 0:
return 1.0

common_indices = set(dot_prod_inds)

for i in range(dot_prod_data.shape[0]):
dot_product += dot_prod_data[i]

for i in range(ind1.shape[0]):
if ind1[i] not in common_indices:
dot_product -= data1[i] * (mu_y)

for i in range(ind2.shape[0]):
if ind2[i] not in common_indices:
dot_product -= data2[i] * (mu_x)

all_indices = arr_union(ind1, ind2)
dot_product += mu_x * mu_y * all_indices.shape[0]

if dot_product == 0.0:
return 1.0
else:
Expand Down

0 comments on commit d80f313

Please sign in to comment.