kymata-atlas · neukym · Jan 27, 2024 · Jan 15, 2024 · Jan 15, 2024 · Jan 15, 2024
diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py
@@ -1,8 +1,11 @@
 from pathlib import Path
 import argparse
+
 from kymata.gridsearch.plain import do_gridsearch
 from kymata.io.functions import load_function
 from kymata.io.mne import load_emeg_pack
+from kymata.io.nkg import save_expression_set
+from kymata.plot.plot import expression_plot
 
 
 def main():
@@ -20,6 +23,11 @@ def main():
                         help='data path after base dir')
     parser.add_argument('--function_path', type=str, default="predicted_function_contours/GMSloudness/stimulisig",
                         help='snr')
+    parser.add_argument('--save-expression-set', type=Path, default="gridsearch.nkg",
+                        help="Save the results of the gridsearch into an ExpressionSet .nkg file")
+    parser.add_argument('--save-plot', type=Path, default="gridsearch.png",
+                        help="Save an expression plot file")
+    parser.add_argument('--overwrite', action="store_true", help="Silently overwrite existing files.")
     parser.add_argument('--function_name', type=str, default="d_IL2",
                         help='function name in stimulisig')
     parser.add_argument('--emeg_file', type=str, default="participant_01-ave",
@@ -61,23 +69,27 @@ def main():
     # emeg_paths = [Path(emeg_dir, p + r) for p in participants[:2] for r in reps[-1:]]
 
     inverse_operator = Path(args.base_dir, args.inverse_operator, f"{participants[0]}_ico5-3L-loose02-cps-nodepth.fif")
+    inverse_operator = None  # set to None/inverse_operator if you want to run on sensor space/source space
 
     # Load data
-    emeg, ch_names = load_emeg_pack(emeg_paths,
-                                    need_names=False,
-                                    ave_mode=args.ave_mode,
-                                    inverse_operator=None, #inverse_operator, # set to None/inverse_operator if you want to run on sensor space/source space
-                                    p_tshift=None,
-                                    snr=args.snr)
+    emeg_values, ch_names = load_emeg_pack(emeg_paths,
+                                           need_names=True,
+                                           ave_mode=args.ave_mode,
+                                           inverse_operator=inverse_operator,
+                                           p_tshift=None,
+                                           snr=args.snr)
 
     func = load_function(Path(args.base_dir, args.function_path),
                          func_name=args.function_name,
                          bruce_neurons=(5, 10))
     func = func.downsampled(args.downsample_rate)
 
+    channel_space = "source" if inverse_operator is not None else "sensor"
+
     es = do_gridsearch(
-        emeg_values=emeg,
-        sensor_names=ch_names,
+        emeg_values=emeg_values,
+        channel_names=ch_names,
+        channel_space=channel_space,
         function=func,
         seconds_per_split=args.seconds_per_split,
         n_derangements=args.n_derangements,
@@ -89,7 +101,11 @@ def main():
         ave_mode=args.ave_mode,
     )
 
-    # expression_plot(es)
+    if args.save_expression_set is not None:
+        save_expression_set(es, args.save_expression_set, overwrite=args.overwrite)
+
+    expression_plot(es, paired_axes=channel_space == "source", save_to=args.save_plot, overwrite=args.overwrite)
+
 
 if __name__ == '__main__':
     main()
diff --git a/kymata/entities/expression.py b/kymata/entities/expression.py
@@ -92,8 +92,8 @@ def __init__(self,
                 data = data[i]
                 data = self._init_prep_data(data)
                 # Check validity of input data dimensions
-                assert len(channels) == data.shape[0], f"{channel_coord_name} mismatch for {f}"
-                assert len(latencies) == data.shape[1], f"Latencies mismatch for {f}"
+                assert len(channels) == data.shape[0], f"{channel_coord_name} mismatch for {f}: {len(channels)} {channel_coord_name} versus data shape {data.shape}"
+                assert len(latencies) == data.shape[1], f"Latencies mismatch for {f}: {len(latencies)} latencies versus data shape {data.shape}"
                 dataset_dict[layer] = DataArray(
                     data=data,
                     dims=self._dims,
@@ -403,6 +403,9 @@ def best_functions(self) -> DataFrame:
         return super()._best_functions_for_layer(LAYER_SCALP)
 
 
+log_base = 10
+
+
 def p_to_logp(arraylike: ArrayLike) -> ArrayLike:
     """The one-stop-shop for converting from p-values to log p-values."""
     return log10(arraylike)

diff --git a/kymata/gridsearch/plain.py b/kymata/gridsearch/plain.py
@@ -5,14 +5,14 @@
 from kymata.entities.functions import Function
 from kymata.math.combinatorics import generate_derangement
 from kymata.math.vector import normalize, get_stds
-#from kymata.entities.expression import SensorExpressionSet, p_to_logp
-import matplotlib.pyplot as plt
+from kymata.entities.expression import ExpressionSet, SensorExpressionSet, HexelExpressionSet, p_to_logp, log_base
 
 
 def do_gridsearch(
         emeg_values: NDArray,  # chan x time
         function: Function,
-        sensor_names: list[str],
+        channel_names: list,
+        channel_space: str,
         start_latency: float,   # ms
         emeg_t_start: float,    # ms
         emeg_sample_rate: int = 1000,  # Hertz
@@ -21,13 +21,15 @@ def do_gridsearch(
         seconds_per_split: float = 0.5,
         n_splits: int = 800,
         ave_mode: str = 'ave',  # either ave or add, for averaging over input files or adding in as extra evidence
-        add_autocorr: bool = True,
-        plot_name: str = 'example'
-        ):
+) -> ExpressionSet:
     """
     Do the Kymata gridsearch over all hexels for all latencies.
     """
 
+    channel_space = channel_space.lower()
+    if channel_space not in {"sensor", "source"}:
+        raise NotImplementedError(channel_space)
+
     # We'll need to downsample the EMEG to match the function's sample rate
     downsample_rate: int = int(emeg_sample_rate / function.sample_rate)
 
@@ -47,8 +49,10 @@ def do_gridsearch(
     n_channels = emeg_values.shape[0]
 
     # Reshape EMEG into splits of `seconds_per_split` s
-    split_initial_timesteps = [int(start_latency + round(i * 1000 * seconds_per_split * (1 + audio_shift_correction)) - emeg_t_start)
-        for i in range(n_splits)]
+    split_initial_timesteps = [
+        int(start_latency + round(i * 1000 * seconds_per_split * (1 + audio_shift_correction)) - emeg_t_start)
+        for i in range(n_splits)
+    ]
 
     emeg_reshaped = np.zeros((n_channels, n_splits * n_reps, n_samples_per_split))
     for j in range(n_reps):
@@ -75,75 +79,29 @@ def do_gridsearch(
         deranged_emeg = emeg_reshaped[:, derangement, :]
         corrs[:, der_i] = np.fft.irfft(deranged_emeg * F_func)[:, :, :n_samples_per_split//2] / emeg_stds[:, derangement]
 
-    if add_autocorr:
-        auto_corrs = np.zeros((n_splits, n_samples_per_split//2))
-        noise = normalize(np.random.randn(func.shape[0], func.shape[1])) * 0
-        noisy_func = normalize(np.copy(func)) + noise
-        nn = n_samples_per_split // 2
-
-        F_noisy_func = np.fft.rfft(normalize(noisy_func), n=nn, axis=-1)
-        F_func = np.conj(np.fft.rfft(normalize(func), n=nn, axis=-1))
-
-        auto_corrs = np.fft.irfft(F_noisy_func * F_func)
-
     del F_func, deranged_emeg, emeg_reshaped
 
     log_pvalues = _ttest(corrs)
 
-    latencies = np.linspace(start_latency, start_latency + (seconds_per_split * 1000), n_samples_per_split // 2 + 1)[:-1]
-
-    if plot_name:
-        plt.figure(1)
-        corr_avrs = np.mean(corrs[:, 0]**2, axis=-2)
-        maxs = np.max(corr_avrs, axis=1)
-        n_amaxs = 5
-        amaxs = np.argpartition(maxs, -n_amaxs)[-n_amaxs:]
-        amax = np.argmax(corr_avrs) // (n_samples_per_split // 2)
-        amaxs = [i for i in amaxs if i != amax] # + [209]
-
-        plt.plot(latencies, np.mean(corrs[amax, 0], axis=-2).T, 'r-', label=amax)
-        plt.plot(latencies, np.mean(corrs[amaxs, 0], axis=-2).T, label=amaxs)
-        std_null = np.mean(np.std(corrs[:, 1], axis=-2), axis=0).T * 3 / np.sqrt(n_reps * n_splits) # 3 pop std.s
-        std_real = np.std(corrs[amax, 0], axis=-2).T * 3  / np.sqrt(n_reps * n_splits)
-        av_real = np.mean(corrs[amax, 0], axis=-2).T
-        #print(std_null)
-        plt.fill_between(latencies, -std_null, std_null, alpha=0.5, color='grey')
-        plt.fill_between(latencies, av_real - std_real, av_real + std_real, alpha=0.25, color='red')
-
-        if add_autocorr:
-            peak_lat_ind = np.argmax(corr_avrs) % (n_samples_per_split // 2)
-            peak_lat = latencies[peak_lat_ind]
-            peak_corr = np.mean(corrs[amax, 0], axis=-2)[peak_lat_ind]
-            print(f'{function.name}: peak lat, peak corr, ind:', peak_lat, peak_corr, amax)
-
-            auto_corrs = np.mean(auto_corrs, axis=0)
-            plt.plot(latencies, np.roll(auto_corrs, peak_lat_ind) * peak_corr / np.max(auto_corrs), 'k--', label='func auto-corr')
-
-        plt.axvline(0, color='k')
-        plt.legend()
-        plt.xlabel('latencies (ms)')
-        plt.ylabel('Corr coef.')
-        plt.savefig(f'{plot_name}_1.png')
-        plt.clf()
-
-        plt.figure(2)
-        plt.plot(latencies, -log_pvalues[amax].T, 'r-', label=amax)
-        plt.plot(latencies, -log_pvalues[amaxs].T, label=amaxs)
-        plt.axvline(0, color='k')
-        plt.legend()
-        plt.xlabel('latencies (ms)')
-        plt.ylabel('p-values')
-        plt.savefig(f'{plot_name}_2.png')
-        plt.clf()
-
-    return
-
-    """es = SensorExpressionSet(
-        functions=function.name,
-        latencies=latencies / 1000,
-        sensors=sensor_names,
-        data=log_pvalues,
-    )"""
+    latencies_ms = np.linspace(start_latency, start_latency + (seconds_per_split * 1000), n_samples_per_split // 2 + 1)[:-1]
+
+    if channel_space == "sensor":
+        es = SensorExpressionSet(
+            functions=function.name,
+            latencies=latencies_ms / 1000,  # seconds
+            sensors=channel_names,
+            data=log_pvalues,
+        )
+    elif channel_space == "source":
+        es = HexelExpressionSet(
+            functions=function.name + f"_mirrored-lh",  # TODO: revert to just `function.name` when we have both hemispheres in place
+            latencies=latencies_ms / 1000,  # seconds
+            hexels=channel_names,
+            data_lh=log_pvalues,
+            data_rh=log_pvalues,  # TODO: distribute data correctly when we have both hemispheres in place
+        )
+    else:
+        raise NotImplementedError(channel_space)
 
     return es
 
@@ -183,9 +141,11 @@ def _ttest(corrs: NDArray, use_all_lats: bool = True):
     t_stat = numerator / denominator
 
     if np.min(df) <= 300:
-        log_p = np.log(stats.t.sf(np.abs(t_stat), df) * 2)  # two-tailed p-value
+        p = stats.t.sf(np.abs(t_stat), df) * 2  # two-tailed p-value
+        log_p = p_to_logp(p)
     else:
         # norm v good approx for this, (logsf for t not implemented in logspace)
-        log_p = stats.norm.logsf(np.abs(t_stat)) + np.log(2) 
+        log_p = stats.norm.logsf(np.abs(t_stat)) + np.log(2)
+        log_p /= np.log(log_base)  # log base correction
 
-    return log_p / np.log(10)  # log base correction
+    return log_p
diff --git a/kymata/io/mne.py b/kymata/io/mne.py
@@ -1,13 +1,8 @@
-from pathlib import Path
-
 from mne import read_evokeds, minimum_norm, set_eeg_reference
 import numpy as np
 from numpy.typing import NDArray
 from os.path import isfile
 
-from kymata.io.file import path_type
-
-
 
 def load_single_emeg(emeg_path, need_names=False, inverse_operator=None, snr=4):
     emeg_path_npy = f"{emeg_path}.npy"
@@ -19,16 +14,13 @@ def load_single_emeg(emeg_path, need_names=False, inverse_operator=None, snr=4):
         evoked = read_evokeds(emeg_path_fif, verbose=False)  # should be len 1 list
         if inverse_operator is not None:
             lh_emeg, rh_emeg, ch_names = inverse_operate(evoked[0], inverse_operator, snr)
-            # TODO: I think ch_names here is the wrong thing 
-
-            emeg = None #np.concatenate((lh_emeg, rh_emeg), axis=0)
+            # TODO: I think ch_names here is the wrong thing
 
             # TODO: currently this goes OOM (node-h04 atleast):
             #       looks like this will be faster when split up anyway
             #       note, don't run the inv_op twice for rh and lh!
             # TODO: move inverse operator to run after EMEG channel combination
-
-            emeg = lh_emeg
+            emeg = lh_emeg #np.concatenate((lh_emeg, rh_emeg), axis=0)
             del lh_emeg, rh_emeg
         else:
             emeg = evoked[0].get_data()  # numpy array shape (sensor_num, N) = (370, 403_001)
@@ -39,13 +31,15 @@ def load_single_emeg(emeg_path, need_names=False, inverse_operator=None, snr=4):
         del evoked
     return emeg, ch_names
 
+
 def inverse_operate(evoked, inverse_operator, snr=4):
     lambda2 = 1.0 / snr ** 2
     inverse_operator = minimum_norm.read_inverse_operator(inverse_operator, verbose=False)
     set_eeg_reference(evoked, projection=True, verbose=False)
     stc = minimum_norm.apply_inverse(evoked, inverse_operator, lambda2, 'MNE', pick_ori='normal', verbose=False)
     return stc.lh_data, stc.rh_data, evoked.ch_names
 
+
 def load_emeg_pack(emeg_paths, need_names=False, ave_mode=None, inverse_operator=None, p_tshift=None, snr=4):  # TODO: FIX PRE-AVE-NORMALISATION
     if p_tshift is None:
         p_tshift = [0]*len(emeg_paths)

diff --git a/kymata/plot/plot.py b/kymata/plot/plot.py
@@ -75,6 +75,7 @@ def expression_plot(
         hidden_functions_in_legend: bool = True,
         # I/O args
         save_to: Optional[Path] = None,
+        overwrite: bool = True,
 ):
     """
     Generates an expression plot
@@ -274,7 +275,12 @@ def expression_plot(
 
     if save_to is not None:
         pyplot.rcParams['savefig.dpi'] = 300
-        pyplot.savefig(Path(save_to), bbox_inches='tight')
+        save_to = Path(save_to)
+
+        if overwrite or not save_to.exists():
+            pyplot.savefig(Path(save_to), bbox_inches='tight')
+        else:
+            raise FileExistsError(save_to)
 
     pyplot.show()
     pyplot.close()