Skip to content

Commit

Permalink
Update default IPPM hyperparameters (#385)
Browse files Browse the repository at this point in the history
* Update default IPPM hyperparameters

* Update getting-started.md

---------

Co-authored-by: Anirudh Lakra <[email protected]>
Co-authored-by: neukym <[email protected]>
  • Loading branch information
3 people authored Sep 29, 2024
1 parent 95d3fc3 commit a51eb12
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 36 deletions.
2 changes: 1 addition & 1 deletion docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ You see `ModuleNotFoundError: No module named 'kymata'`
# Allow the CBU poetry to communicate with pip
export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring

$VENV_PATH/bin/poetry install
$VENV_PATH/bin/poetry install
```

- Now (within the Apptainer) you can run it using `poetry`, e.g.:
Expand Down
38 changes: 19 additions & 19 deletions kymata/ippm/denoising_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,15 @@ class MaxPoolingStrategy(DenoisingStrategy):
def __init__(
self,
hemi: str,
should_normalise: bool = False,
should_normalise: bool = True,
should_cluster_only_latency: bool = False,
should_max_pool: bool = False,
normal_dist_threshold: float = 5,
should_merge_hemis: bool = False,
should_merge_hemis: bool = True,
should_exclude_insignificant: bool = True,
should_shuffle: bool = True,
bin_significance_threshold: int = 15,
bin_size: int = 25,
bin_significance_threshold: int = 1,
bin_size: int = 1,
):
super().__init__(
hemi,
Expand All @@ -321,14 +321,14 @@ class AdaptiveMaxPoolingStrategy(DenoisingStrategy):
def __init__(
self,
hemi: str,
should_normalise: bool = False,
should_normalise: bool = True,
should_cluster_only_latency: bool = False,
should_max_pool: bool = False,
normal_dist_threshold: float = 5,
should_merge_hemis: bool = False,
should_merge_hemis: bool = True,
should_exclude_insignificant: bool = True,
bin_significance_threshold: int = 5,
base_bin_size: int = 10,
bin_significance_threshold: int = 1,
base_bin_size: int = 1,
):
super().__init__(
hemi,
Expand All @@ -347,20 +347,20 @@ class GMMStrategy(DenoisingStrategy):
def __init__(
self,
hemi: str,
should_normalise: bool = False,
should_cluster_only_latency: bool = False,
should_normalise: bool = True,
should_cluster_only_latency: bool = True,
should_max_pool: bool = False,
normal_dist_threshold: float = 5,
should_merge_hemis: bool = False,
should_merge_hemis: bool = True,
should_exclude_insignificant: bool = True,
should_shuffle: bool = True,
number_of_clusters_upper_bound: int = 5,
number_of_clusters_upper_bound: int = 2,
covariance_type: str = "full",
max_iter: int = 1000,
n_init: int = 8,
n_init: int = 5,
init_params: str = "kmeans",
random_state: Optional[int] = None,
should_evaluate_using_AIC: bool = False,
should_evaluate_using_AIC: bool = True,
):
super().__init__(
hemi,
Expand Down Expand Up @@ -427,17 +427,17 @@ class MeanShiftStrategy(DenoisingStrategy):
def __init__(
self,
hemi: str,
should_normalise: bool = False,
should_cluster_only_latency: bool = False,
should_normalise: bool = True,
should_cluster_only_latency: bool = True,
should_max_pool: bool = False,
normal_dist_threshold: float = 5,
should_merge_hemis: bool = False,
should_merge_hemis: bool = True,
should_exclude_insignificant: bool = True,
should_shuffle: bool = True,
cluster_all: bool = False,
bandwidth: float = 30,
bandwidth: float = 0.5,
seeds: Optional[int] = None,
min_bin_freq: int = 2,
min_bin_freq: int = 1,
n_jobs: int = -1,
):
super().__init__(
Expand Down
28 changes: 12 additions & 16 deletions tests/test_ippm_denoising_strategies_integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,18 +101,15 @@ def test_MaxPoolingStrategy_AllTrue_Fit_Successfully():
)


def test_MaxPoolingStrategy_AllFalse_Fit_Successfully():
def test_MaxPoolingStrategy_AllDefault_Fit_Successfully():
expected_denoised = deepcopy(noisy_test_hexels)
expected_denoised["func1"].right_best_pairings = [
(-100, 1e-50),
(-75, 1e-75),
(30, 1e-100),
(199, 1e-90),
(211, 1e-55),
]
expected_denoised["func2"].right_best_pairings = [(30, 1e-99), (130, 1e-81)]
expected_denoised["func2"].right_best_pairings = [(30, 1e-99)]

strategy = MaxPoolingStrategy(HEMI_RIGHT, bin_significance_threshold=2)
strategy = MaxPoolingStrategy(HEMI_RIGHT)
actual_denoised = strategy.denoise(noisy_test_hexels)

assert (
Expand Down Expand Up @@ -148,7 +145,7 @@ def test_AdaptiveMaxPoolingStrategy_AllTrue_Fit_Successfully():
)


def test_AdaptiveMaxPoolingStrategy_AllFalse_Fit_Successfully():
def test_AdaptiveMaxPoolingStrategy_AllDefault_Fit_Successfully():
expected_denoised = deepcopy(noisy_test_hexels)
expected_denoised["func1"].right_best_pairings = [
(-75, 1e-75),
Expand All @@ -158,7 +155,7 @@ def test_AdaptiveMaxPoolingStrategy_AllFalse_Fit_Successfully():
expected_denoised["func2"].right_best_pairings = [(30, 1e-99), (130, 1e-81)]

strategy = AdaptiveMaxPoolingStrategy(
HEMI_RIGHT, bin_significance_threshold=2, base_bin_size=25
HEMI_RIGHT, bin_significance_threshold=2, base_bin_size=0.025
)
actual_denoised = strategy.denoise(noisy_test_hexels)

Expand Down Expand Up @@ -187,6 +184,7 @@ def test_GMMStrategy_AllTrue_Fit_Successfully():
HEMI_RIGHT,
should_normalise=True,
should_cluster_only_latency=True,
number_of_clusters_upper_bound=5,
random_state=random_seed,
)
actual_denoised = strategy.denoise(noisy_test_hexels)
Expand All @@ -201,7 +199,7 @@ def test_GMMStrategy_AllTrue_Fit_Successfully():
)


def test_GMMStrategy_AllFalse_Fit_Successfully():
def test_GMMStrategy_AllDefault_Fit_Successfully():
random_seed = 40
expected_denoised = deepcopy(noisy_test_hexels)
expected_denoised["func1"].right_best_pairings = [
Expand All @@ -213,11 +211,9 @@ def test_GMMStrategy_AllFalse_Fit_Successfully():
expected_denoised["func2"].right_best_pairings = [
(30, 1e-99),
(130, 1e-81),
(23, 1e-44),
(26, 1e-59),
]

strategy = GMMStrategy(HEMI_RIGHT, random_state=random_seed)
strategy = GMMStrategy(HEMI_RIGHT, number_of_clusters_upper_bound=5, random_state=random_seed, should_evaluate_using_AIC=False)
actual_denoised = strategy.denoise(noisy_test_hexels)

assert (
Expand Down Expand Up @@ -257,7 +253,7 @@ def test_DBSCANStrategy_AllTrue_Fit_Successfully():
)


def test_DBSCANStrategy_AllFalse_Fit_Successfully():
def test_DBSCANStrategy_AllDefault_Fit_Successfully():
expected_denoised = deepcopy(noisy_test_hexels)
expected_denoised["func1"].right_best_pairings = [
(-100, 1e-50),
Expand Down Expand Up @@ -290,7 +286,7 @@ def test_MeanShiftStrategy_AllTrue_Fit_Successfully():
expected_denoised["func2"].right_best_pairings = [(130, 1e-81), (30, 1e-99)]

strategy = MeanShiftStrategy(
HEMI_RIGHT, should_normalise=False, should_cluster_only_latency=True
HEMI_RIGHT, bandwidth=0.03, min_bin_freq=2
)
actual_denoised = strategy.denoise(noisy_test_hexels)

Expand All @@ -304,7 +300,7 @@ def test_MeanShiftStrategy_AllTrue_Fit_Successfully():
)


def test_MeanShiftStrategy_AllFalse_Fit_Successfully():
def test_MeanShiftStrategy_AllDefault_Fit_Successfully():
expected_denoised = deepcopy(noisy_test_hexels)
expected_denoised["func1"].right_best_pairings = [
(199, 1e-90),
Expand All @@ -313,7 +309,7 @@ def test_MeanShiftStrategy_AllFalse_Fit_Successfully():
]
expected_denoised["func2"].right_best_pairings = [(130, 1e-81), (30, 1e-99)]

strategy = MeanShiftStrategy(HEMI_RIGHT)
strategy = MeanShiftStrategy(HEMI_RIGHT, bandwidth=0.03, min_bin_freq=2)
actual_denoised = strategy.denoise(noisy_test_hexels)

assert (
Expand Down

0 comments on commit a51eb12

Please sign in to comment.