Skip to content

Commit

Permalink
Add 'volume_stable' invariant measure to NormalizedMI and correspondi…
Browse files Browse the repository at this point in the history
…ng unit tests
  • Loading branch information
marko-tuononen committed Oct 13, 2024
1 parent acb35a0 commit fda09e2
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/normi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
__all__ = ['NormalizedMI']

NORMS = {'joint', 'geometric', 'arithmetic', 'min', 'max'}
INVMEASURES = {'radius', 'volume', 'kraskov'}
INVMEASURES = {'radius', 'volume', 'volume_stable', 'kraskov'}

from ._estimators import NormalizedMI

Expand Down
16 changes: 14 additions & 2 deletions src/normi/_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ def _scale_nearest_neighbor_distance(
invariant_measure : str, default='radius'
- `'radius'` normalizing by mean k-nn radius<br/>
- `'volume'` normalizing by mean k-nn volume<br/>
- `'kraskov'` no normalization
- `'volume_stable'` more stable calculation of mean k-nn volume [2]<br/>
- `'kraskov'` no normalization [1]
n_dims : int
Dimensionality of the embedding space used to estimate the radii.
radii : ndarray, shape (n_samples, )
Expand All @@ -336,12 +337,23 @@ def _scale_nearest_neighbor_distance(
----------
.. [1] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
information". Phys. Rev. E 69, 2004.
.. [2] M. Tuononen, V. Hautamaki, "Improving Numerical Stability of
Normalized Mutual Information Estimator on High Dimensions",
arXiv:2410.07642v1 [cs.IT], 10 Oct 2024.
"""
if invariant_measure == 'radius':
return radii / np.mean(radii)
elif invariant_measure == 'volume':
return radii / (np.mean(radii**n_dims) ** (1 / n_dims))
elif invariant_measure == 'volume_stable':
n_samples = len(radii)
radii_max = np.max(radii)
denominator_in_log_domain = -np.log(n_samples) / n_dims
denominator_in_log_domain += np.log(radii_max)
denominator_in_log_domain += (
(1 / n_dims) * np.log(np.sum((radii / radii_max)**n_dims))
)
return radii / np.exp(denominator_in_log_domain)
elif invariant_measure == 'kraskov':
return radii
# This should never be reached
Expand Down
43 changes: 43 additions & 0 deletions tests/test__estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ def X1_result(method, measure):
'arithmetic': 0.6268627,
'geometric': 0.6269047,
},
'volume_stable': {
'joint': 0.4565186,
'max': 0.6196869,
'min': 0.6342067,
'arithmetic': 0.6268627,
'geometric': 0.6269047,
},
}[measure][method]


Expand All @@ -56,6 +63,14 @@ def X1_result(method, measure):
[0, 0.40824829, 0.81649658, 1.22474487, 1.63299316],
None,
),
('volume_stable', 1, np.arange(5), np.arange(5) / 2, None),
(
'volume_stable',
2,
np.arange(5),
[0, 0.40824829, 0.81649658, 1.22474487, 1.63299316],
None,
),
('kraskov', 1, np.arange(5), np.arange(5), None),
('none', 1, np.arange(5), np.arange(5), BeartypeException),
],
Expand Down Expand Up @@ -190,6 +205,34 @@ def test__reset(normalize_method, X, kwargs):
X1_result('geometric', 'volume'),
None,
),
(
X1(),
{'normalize_method': 'arithmetic', 'invariant_measure': 'volume_stable'},
X1_result('arithmetic', 'volume_stable'),
None,
),
(
X1(),
{'normalize_method': 'geometric', 'invariant_measure': 'volume_stable'},
X1_result('geometric', 'volume_stable'),
None,
),
(
X1(),
{'normalize_method': 'joint', 'invariant_measure': 'volume_stable'},
X1_result('joint', 'volume_stable'),
None,
),
(
X1(),
{
'n_dims': np.array([1, 1]),
'normalize_method': 'geometric',
'invariant_measure': 'volume_stable',
},
X1_result('geometric', 'volume_stable'),
None,
),
],
)
def test_NormalizedMI(X, kwargs, result, error):
Expand Down

0 comments on commit fda09e2

Please sign in to comment.