Skip to content

Commit

Permalink
Merge pull request #147 from e-koch/cramer_norm_again
Browse files Browse the repository at this point in the history
Cramer Normalization (again)
  • Loading branch information
e-koch authored Mar 8, 2017
2 parents 4486a98 + e180d82 commit 460e68c
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 27 deletions.
1 change: 1 addition & 0 deletions CHANGE.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

Version 1.0 (unreleased)
------------------------
* #147 - Changed Cramer normalization to the spectral norm.
* #146 - Allow weights to be passed in `StatsMoments`.
* #144 - Fix the MVC: subtract mean velocity dispersion, not the array of dispersions.
* #143 - Set periodic boundaries for dendrograms.
Expand Down
8 changes: 8 additions & 0 deletions turbustat/statistics/cramer/cramer.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,14 @@ def cramer_statistic(self, n_jobs=1):
larger, smaller,
metric="euclidean", n_jobs=n_jobs)

# Take sqrt of each
# We default to using the Cramer kernel in Baringhaus & Franz (2004)
# \phi(dist) = sqrt(dist) / 2.
# The normalization values below reflect this
pairdist11 = np.sqrt(pairdist11)
pairdist12 = np.sqrt(pairdist12)
pairdist22 = np.sqrt(pairdist22)

term1 = 0.0
term2 = 0.0
term3 = 0.0
Expand Down
9 changes: 3 additions & 6 deletions turbustat/statistics/threeD_to_twoD.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np


def intensity_data(cube, p=0.1, noise_lim=0.1, norm=True):
def intensity_data(cube, p=0.2, noise_lim=-np.inf, norm=True):
'''
Clips off channels below the given noise limit and keep the
upper percentile specified.
Expand Down Expand Up @@ -61,7 +61,7 @@ def intensity_data(cube, p=0.1, noise_lim=0.1, norm=True):


def _format_data(cube, data_format='intensity', num_spec=1000,
noise_lim=0.0, p=0.1, normalize=True):
noise_lim=-np.inf, p=0.2, normalize=True):
'''
Rearrange data into a 2D object using the given format.
'''
Expand Down Expand Up @@ -97,10 +97,7 @@ def _format_data(cube, data_format='intensity', num_spec=1000,
# Normalize by rescaling the data to an interval between 0 and 1
# Ignore all values of 0, since they're just filled in.
if normalize:
nonzero = np.nonzero(data_matrix)
ptp = np.ptp(data_matrix[nonzero])
data_matrix[nonzero] = \
(data_matrix[nonzero] - data_matrix[nonzero].min()) / ptp
data_matrix /= np.linalg.norm(data_matrix, ord=2)

return data_matrix

Expand Down
Binary file modified turbustat/tests/data/checkVals.npz
Binary file not shown.
Binary file modified turbustat/tests/data/computed_distances.npz
Binary file not shown.
38 changes: 17 additions & 21 deletions turbustat/tests/test_cramer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,37 +5,33 @@
Test functions for Cramer
'''

from unittest import TestCase

import numpy as np
import numpy.testing as npt

from ..statistics import Cramer_Distance
from ._testing_data import \
dataset1, dataset2, computed_data, computed_distances


class testCramer(TestCase):
def test_cramer():
tester = \
Cramer_Distance(dataset1["cube"],
dataset2["cube"],
noise_value1=0.1,
noise_value2=0.1).distance_metric(normalize=False)

def test_cramer(self):
self.tester = \
Cramer_Distance(dataset1["cube"],
dataset2["cube"],
noise_value1=0.1,
noise_value2=0.1).distance_metric(normalize=False)
npt.assert_allclose(tester.data_matrix1,
computed_data["cramer_val"])
npt.assert_almost_equal(tester.distance,
computed_distances['cramer_distance'])

npt.assert_allclose(self.tester.data_matrix1,
computed_data["cramer_val"])
npt.assert_almost_equal(self.tester.distance,
computed_distances['cramer_distance'])

def test_cramer_spatial_diff(self):
def test_cramer_spatial_diff():

small_data = dataset1["cube"][0][:, :26, :26]
small_data = dataset1["cube"][0][:, :26, :26]

self.tester2 = Cramer_Distance(small_data, dataset2["cube"])
self.tester2.distance_metric(normalize=False)
self.tester3 = Cramer_Distance(dataset2["cube"], small_data)
self.tester3.distance_metric(normalize=False)
tester2 = Cramer_Distance(small_data, dataset2["cube"])
tester2.distance_metric(normalize=False)
tester3 = Cramer_Distance(dataset2["cube"], small_data)
tester3.distance_metric(normalize=False)

npt.assert_almost_equal(self.tester2.distance, self.tester3.distance)
npt.assert_almost_equal(tester2.distance, tester3.distance)

0 comments on commit 460e68c

Please sign in to comment.