diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py
new file mode 100644
index 00000000..62727751
--- /dev/null
+++ b/invokers/run_gridsearch.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+import argparse
+from kymata.gridsearch.gridsearch import do_gridsearch
+from kymata.io.functions import load_function
+from kymata.io.mne import load_emeg_pack
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description='Gridsearch Params')
+    parser.add_argument('--emeg_sample_rate', type=int, default=1000,
+                        help='sampling rate of the emeg machine (not implemented yet)')
+    parser.add_argument('--snr', type=float, default=3,
+                        help='inverse solution snr')
+    parser.add_argument('--downsample_rate', type=int, default=5,
+                        help='downsample_rate')
+    parser.add_argument('--base_dir', type=str, default="/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/",
+                        help='base data directory')
+    parser.add_argument('--data_path', type=str, default="intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data",
+                        help='data path after base dir')
+    parser.add_argument('--function_path', type=str, default="predicted_function_contours/GMSloudness/stimulisig",
+                        help='snr')
+    parser.add_argument('--function_name', type=str, default="d_IL2",
+                        help='function name in stimulisig')
+    parser.add_argument('--emeg_file', type=str, default="participant_01-ave",
+                        help='emeg_file_name')
+    parser.add_argument('--ave_mode', type=str, default="ave",
+                        help='either ave or add, either average over the list of repetitions or treat them as extra data')
+    parser.add_argument('--inverse_operator', type=str, default="intrim_preprocessing_files/4_hexel_current_reconstruction/inverse-operators",
+                        help='inverse solution path')
+    parser.add_argument('--seconds_per_split', type=float, default=0.5,
+                        help='seconds in each split of the recording, also maximum range of latencies being checked')
+    parser.add_argument('--n_splits', type=int, default=800,
+                        help='number of splits to split the recording into, (set to 400/seconds_per_split for full file)')
+    parser.add_argument('--n_derangements', type=int, default=1,
+                        help='inverse solution snr')
+    parser.add_argument('--start_latency', type=float, default=-100,
+                        help='earliest latency to check in cross correlation')
+    parser.add_argument('--emeg_t_start', type=float, default=-200,
+                        help='start of the emeg evoked files relative to the start of the function')
+    parser.add_argument('--audio_shift_correction', type=float, default=0.000_537_5,
+                        help='audio shift correction, for every second of function, add this number of seconds (to the start of the emeg split) per seconds of emeg seen')
+    args = parser.parse_args()
+    args.base_dir = Path(args.base_dir)
+
+
+    emeg_dir = Path(args.base_dir, args.data_path)
+    emeg_paths = [Path(emeg_dir, args.emeg_file)]
+
+    participants = ['participant_01',
+                    'participant_01b',
+                    'participant_02',
+                    'participant_03',
+                    'participant_04',
+                    'participant_05',
+                    'pilot_01',
+                    'pilot_02']
+
+    reps = [f'_rep{i}' for i in range(8)] + ['-ave']
+
+    # emeg_paths = [Path(emeg_dir, p + r) for p in participants[:2] for r in reps[-1:]]
+
+    inverse_operator = Path(args.base_dir, args.inverse_operator, f"{participants[0]}_ico5-3L-loose02-cps-nodepth.fif")
+
+    # Load data
+    emeg, ch_names = load_emeg_pack(emeg_paths,
+                                    need_names=False,
+                                    ave_mode=args.ave_mode,
+                                    inverse_operator=None, #inverse_operator, # set to None/inverse_operator if you want to run on sensor space/source space
+                                    p_tshift=None,
+                                    snr=args.snr)
+
+    func = load_function(Path(args.base_dir, args.function_path),
+                         func_name=args.function_name,
+                         bruce_neurons=(5, 10))
+    func = func.downsampled(args.downsample_rate)
+
+    es = do_gridsearch(
+        emeg_values=emeg,
+        sensor_names=ch_names,
+        function=func,
+        seconds_per_split=args.seconds_per_split,
+        n_derangements=args.n_derangements,
+        n_splits=args.n_splits,
+        start_latency=args.start_latency,
+        emeg_t_start=args.emeg_t_start,
+        emeg_sample_rate=args.emeg_sample_rate,
+        audio_shift_correction=args.audio_shift_correction,
+        ave_mode=args.ave_mode,
+    )
+
+    # expression_plot(es)
+
+if __name__ == '__main__':
+    main()
diff --git a/invokers/run_preprocessing_dataset4.py b/invokers/run_preprocessing_dataset4.py
new file mode 100644
index 00000000..bd342134
--- /dev/null
+++ b/invokers/run_preprocessing_dataset4.py
@@ -0,0 +1,33 @@
+from pathlib import Path
+
+import sys
+sys.path.append('/imaging/projects/cbu/kymata/analyses/ollie/kymata-toolbox')
+
+from kymata.io.yaml import load_config
+from kymata.preproc.data_cleansing import run_preprocessing, create_trials, create_trialwise_data
+
+
+# noinspection DuplicatedCode
+def main():
+    config = load_config(str(Path(Path(__file__).parent.parent, "kymata", "config", "dataset4.yaml")))
+
+    create_trialwise_data(
+        dataset_directory_name=config['dataset_directory_name'],
+        list_of_participants=config['list_of_participants'],
+        repetitions_per_runs=config['repetitions_per_runs'],
+        number_of_runs=config['number_of_runs'],
+        number_of_trials=config['number_of_trials'],
+        input_streams=config['input_streams'],
+        eeg_thresh=float(config['eeg_thresh']),
+        grad_thresh=float(config['grad_thresh']),
+        mag_thresh=float(config['mag_thresh']),
+        visual_delivery_latency=config['visual_delivery_latency'],
+        audio_delivery_latency=config['audio_delivery_latency'],
+        audio_delivery_shift_correction=config['audio_delivery_shift_correction'],
+        tmin=config['tmin'],
+        tmax=config['tmax'],
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/kymata/config/dataset3.yaml b/kymata/config/dataset3.yaml
index 6baada0a..13486e19 100644
--- a/kymata/config/dataset3.yaml
+++ b/kymata/config/dataset3.yaml
@@ -45,9 +45,9 @@ supress_excessive_plots_and_prompts: True
 audio_delivery_latency:  26        # in milliseconds
 visual_delivery_latency: 17        # in milliseconds
 tactile_delivery_latency: 0        # in milliseconds
-audio_delivery_shift_correction: 0 # in milliseconds per second
-tmin: -0.2
-tmax: 1.8
+audio_delivery_shift_correction: 0 # in seconds per second
+tmin: -0.2 # seconds
+tmax: 1.8  # seconds
 
 eeg_thresh: 1 #200e-6
 grad_thresh: 1 #4000e-13
diff --git a/kymata/config/dataset4.yaml b/kymata/config/dataset4.yaml
old mode 100644
new mode 100755
index 26bf33ae..842f9f41
--- a/kymata/config/dataset4.yaml
+++ b/kymata/config/dataset4.yaml
@@ -36,9 +36,9 @@ meg: True
 audio_delivery_latency:  26        # in milliseconds
 visual_delivery_latency: 17        # in milliseconds
 tactile_delivery_latency: 0        # in milliseconds
-audio_delivery_shift_correction: 0.5375   # in milliseconds per second
-tmin: -0.2
-tmax: 1.8
+audio_delivery_shift_correction: 0.0005375   # in seconds per second
+tmin: -0.2  # seconds
+tmax: 1.8   # seconds
 
 eeg_thresh: 1 #200e-6
 grad_thresh: 1 #4000e-13
diff --git a/kymata/entities/functions.py b/kymata/entities/functions.py
new file mode 100644
index 00000000..cba27b7b
--- /dev/null
+++ b/kymata/entities/functions.py
@@ -0,0 +1,21 @@
+from dataclasses import dataclass
+
+from numpy.typing import NDArray
+
+
+@dataclass
+class Function:
+    name: str
+    values: NDArray
+    sample_rate: float  # Hertz
+
+    def downsampled(self, rate: int):
+        return Function(
+            name=self.name,
+            values=self.values[::rate],
+            sample_rate=self.sample_rate / rate,
+        )
+
+    @property
+    def time_step(self) -> float:
+        return 1 / self.sample_rate
diff --git a/kymata/entities/iterables.py b/kymata/entities/iterables.py
index 5faeb602..c2a1c910 100644
--- a/kymata/entities/iterables.py
+++ b/kymata/entities/iterables.py
@@ -6,6 +6,7 @@
 from typing import Sequence
 
 from numpy import ndarray
+from numpy.typing import NDArray
 
 
 def all_equal(sequence: Sequence) -> bool:
@@ -20,10 +21,10 @@ def all_equal(sequence: Sequence) -> bool:
         # numpy arrays deal with equality weirdly
 
         # Check first two items are equal, and equal to the rest
-        first: ndarray = sequence[0]
+        first: NDArray = sequence[0]
         if not isinstance(sequence[1], ndarray):
             return False
-        second: ndarray = sequence[1]
+        second: NDArray = sequence[1]
         try:
             # noinspection PyUnresolvedReferences
             return (first == second).all() and all_equal(sequence[1:])
diff --git a/kymata/entities/sparse_data.py b/kymata/entities/sparse_data.py
old mode 100644
new mode 100755
diff --git a/kymata/gridsearch/__init__.py b/kymata/gridsearch/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py
new file mode 100644
index 00000000..defb95da
--- /dev/null
+++ b/kymata/gridsearch/gridsearch.py
@@ -0,0 +1,190 @@
+import numpy as np
+from numpy.typing import NDArray
+from scipy import stats
+
+from kymata.entities.functions import Function
+from kymata.math.combinatorics import generate_derangement
+from kymata.math.vector import normalize, get_stds
+#from kymata.entities.expression import SensorExpressionSet, p_to_logp
+import matplotlib.pyplot as plt
+
+def do_gridsearch(
+        emeg_values: NDArray,  # chan x time
+        function: Function,
+        sensor_names: list[str],
+        start_latency: float,   # ms
+        emeg_t_start: float,    # ms
+        emeg_sample_rate: int = 1000,  # Hertz
+        audio_shift_correction: float = 0.000_537_5,  # seconds/second  # TODO: describe in which direction?
+        n_derangements: int = 1,
+        seconds_per_split: float = 0.5,
+        n_splits: int = 800,
+        ave_mode: str = 'ave', # either ave or add, for averaging over input files or adding in as extra evidence
+        add_autocorr: bool = True,
+        plot_name: str = 'example'
+        ):
+    """
+    Do the Kymata gridsearch over all hexels for all latencies.
+    """
+
+    # We'll need to downsample the EMEG to match the function's sample rate
+    downsample_rate: int = int(emeg_sample_rate / function.sample_rate)
+
+    n_samples_per_split = int(seconds_per_split * emeg_sample_rate * 2 // downsample_rate)
+
+    if ave_mode == 'add':
+        n_reps = len(EMEG_paths)
+    else:
+        n_reps = 1
+
+    func_length = n_splits * n_samples_per_split // 2
+    if func_length < function.values.shape[0]:
+        func = function.values[:func_length].reshape(n_splits, n_samples_per_split // 2)
+        print(f'WARNING: not using full 400s of the file (only using {round(n_splits * seconds_per_split, 2)}s)')
+    else:
+        func = function.values.reshape(n_splits, n_samples_per_split // 2)
+    n_channels = emeg_values.shape[0]
+
+    # Reshape EMEG into splits of `seconds_per_split` s
+    split_initial_timesteps = [int(start_latency + round(i * 1000 * seconds_per_split * (1 + audio_shift_correction)) - emeg_t_start)
+        for i in range(n_splits)]
+
+    emeg_reshaped = np.zeros((n_channels, n_splits * n_reps, n_samples_per_split))
+    for j in range(n_reps):
+        for split_i in range(n_splits):
+            split_start = split_initial_timesteps[split_i]
+            split_stop = split_start + int(2 * emeg_sample_rate * seconds_per_split)
+            emeg_reshaped[:, split_i + (j * n_splits), :] = emeg_values[:, j, split_start:split_stop:downsample_rate]
+
+    del emeg_values
+
+    # Derangements for null distribution
+    derangements = np.zeros((n_derangements, n_splits * n_reps), dtype=int)
+    for der_i in range(n_derangements):
+        derangements[der_i, :] = generate_derangement(n_splits * n_reps, n_splits)
+    derangements = np.vstack((np.arange(n_splits * n_reps), derangements))  # Include the identity on top
+
+    # Fast cross-correlation using FFT
+    emeg_reshaped = normalize(emeg_reshaped)
+    emeg_stds = get_stds(emeg_reshaped, n_samples_per_split // 2)
+    emeg_reshaped = np.fft.rfft(emeg_reshaped, n=n_samples_per_split, axis=-1)
+    F_func = np.conj(np.fft.rfft(normalize(func), n=n_samples_per_split, axis=-1))
+    corrs = np.zeros((n_channels, n_derangements + 1, n_splits * n_reps, n_samples_per_split // 2))
+    for der_i, derangement in enumerate(derangements):
+        deranged_emeg = emeg_reshaped[:, derangement, :]
+        corrs[:, der_i] = np.fft.irfft(deranged_emeg * F_func)[:, :, :n_samples_per_split//2] / emeg_stds[:, derangement]
+
+    if add_autocorr:
+        auto_corrs = np.zeros((n_splits, n_samples_per_split//2))
+        noise = normalize(np.random.randn(func.shape[0], func.shape[1])) * 0
+        noisy_func = normalize(np.copy(func)) + noise
+        nn = n_samples_per_split // 2
+
+        F_noisy_func = np.fft.rfft(normalize(noisy_func), n=nn, axis=-1)
+        F_func = np.conj(np.fft.rfft(normalize(func), n=nn, axis=-1))
+
+        auto_corrs = np.fft.irfft(F_noisy_func * F_func)
+
+    del F_func, deranged_emeg, emeg_reshaped
+
+    log_pvalues = _ttest(corrs)
+
+    latencies = np.linspace(start_latency, start_latency + (seconds_per_split * 1000), n_samples_per_split // 2 + 1)[:-1]
+
+    if plot_name:
+        plt.figure(1)
+        corr_avrs = np.mean(corrs[:, 0]**2, axis=-2)
+        maxs = np.max(corr_avrs, axis=1)
+        n_amaxs = 5
+        amaxs = np.argpartition(maxs, -n_amaxs)[-n_amaxs:]
+        amax = np.argmax(corr_avrs) // (n_samples_per_split // 2)
+        amaxs = [i for i in amaxs if i != amax] # + [209]
+
+        plt.plot(latencies, np.mean(corrs[amax, 0], axis=-2).T, 'r-', label=amax)
+        plt.plot(latencies, np.mean(corrs[amaxs, 0], axis=-2).T, label=amaxs)
+        std_null = np.mean(np.std(corrs[:, 1], axis=-2), axis=0).T * 3 / np.sqrt(n_reps * n_splits) # 3 pop std.s
+        std_real = np.std(corrs[amax, 0], axis=-2).T * 3  / np.sqrt(n_reps * n_splits)
+        av_real = np.mean(corrs[amax, 0], axis=-2).T
+        #print(std_null)
+        plt.fill_between(latencies, -std_null, std_null, alpha=0.5, color='grey')
+        plt.fill_between(latencies, av_real - std_real, av_real + std_real, alpha=0.25, color='red')
+
+        if add_autocorr:
+            peak_lat_ind = np.argmax(corr_avrs) % (n_samples_per_split // 2)
+            peak_lat = latencies[peak_lat_ind]
+            peak_corr = np.mean(corrs[amax, 0], axis=-2)[peak_lat_ind]
+            print(f'{function.name}: peak lat, peak corr, ind:', peak_lat, peak_corr, amax)
+
+            auto_corrs = np.mean(auto_corrs, axis=0)
+            plt.plot(latencies, np.roll(auto_corrs, peak_lat_ind) * peak_corr / np.max(auto_corrs), 'k--', label='func auto-corr')
+
+        plt.axvline(0, color='k')
+        plt.legend()
+        plt.xlabel('latencies (ms)')
+        plt.ylabel('Corr coef.')
+        plt.savefig(f'{plot_name}_1.png')
+        plt.clf()
+
+        plt.figure(2)
+        plt.plot(latencies, -log_pvalues[amax].T, 'r-', label=amax)
+        plt.plot(latencies, -log_pvalues[amaxs].T, label=amaxs)
+        plt.axvline(0, color='k')
+        plt.legend()
+        plt.xlabel('latencies (ms)')
+        plt.ylabel('p-values')
+        plt.savefig(f'{plot_name}_2.png')
+        plt.clf()
+
+    return
+
+    """es = SensorExpressionSet(
+        functions=function.name,
+        latencies=latencies / 1000,
+        sensors=sensor_names,
+        data=log_pvalues,
+    )"""
+
+    return es
+
+
+def _ttest(corrs: NDArray, use_all_lats: bool = True):
+    """
+    Vectorised Welch's t-test.
+    """
+    n_channels, n_derangements, n_splits, t_steps = corrs.shape
+
+    # Fisher Z-Transformation
+    corrs_z = 0.5 * np.log((1 + corrs) / (1 - corrs))
+
+    # Non-deranged values are on top
+    true_mean = np.mean(corrs_z[:, 0, :, :], axis=1)
+    true_var = np.var(corrs_z[:, 0, :, :], axis=1, ddof=1)
+    true_n = n_splits
+
+    # Recompute mean and var for null correlations
+    # TODO: why looking at only 1 in the n_derangements dimension?
+    if use_all_lats:
+        rand_mean = np.mean(corrs_z[:, 1:, :, :].reshape(n_channels, -1), axis=1).reshape(n_channels, 1)
+        rand_var = np.var(corrs_z[:, 1:, :, :].reshape(n_channels, -1), axis=1, ddof=1).reshape(n_channels, 1)
+        rand_n = n_splits * n_derangements * t_steps
+    else:
+        rand_mean = np.mean(corrs_z[:, 1:, :, :].reshape(n_channels, -1, t_steps), axis=1)
+        rand_var = np.var(corrs_z[:, 1:, :, :].reshape(n_channels, -1, t_steps), axis=1, ddof=1)
+        rand_n = n_splits * n_derangements
+
+    # Vectorized two-sample t-tests for all channels and time steps
+    numerator = true_mean - rand_mean
+    denominator = np.sqrt(true_var / true_n + rand_var / rand_n)
+    df = ((true_var / true_n + rand_var / rand_n) ** 2 /
+          ((true_var / true_n) ** 2 / (true_n - 1) +
+           (rand_var / rand_n) ** 2 / (rand_n - 1)))
+
+    t_stat = numerator / denominator
+
+    if np.min(df) <= 300:
+        log_p = np.log(stats.t.sf(np.abs(t_stat), df) * 2)  # two-tailed p-value
+    else:
+        # norm v good approx for this, (logsf for t not implemented in logspace)
+        log_p = stats.norm.logsf(np.abs(t_stat)) + np.log(2) 
+
+    return log_p / np.log(10)  # log base correction
diff --git a/kymata/io/functions.py b/kymata/io/functions.py
new file mode 100644
index 00000000..92f99e1c
--- /dev/null
+++ b/kymata/io/functions.py
@@ -0,0 +1,70 @@
+from pathlib import Path
+
+from numpy import array, float16, convolve, mean
+import numpy as np
+from numpy.typing import NDArray
+from scipy.io import loadmat
+
+from kymata.entities.functions import Function
+from kymata.io.file import path_type
+
+
+def load_function(function_path_without_suffix: path_type, func_name: str, n_derivatives: int = 0, bruce_neurons: tuple = (0, 10)) -> Function:
+    function_path_without_suffix = Path(function_path_without_suffix)
+    func: NDArray
+    if 'neurogram' in func_name:
+        print('USING BRUCE MODEL')
+        if function_path_without_suffix.with_suffix(".npz").exists():
+            func_dict = np.load(function_path_without_suffix.with_suffix(".npz"))
+            func = func_dict[func_name]
+            func = np.mean(func[bruce_neurons[0]:bruce_neurons[1]], axis=0)
+        else:
+            mat = loadmat(function_path_without_suffix.with_suffix('.mat'))['neurogramResults']
+            func = np.array(mat[func_name][0][0])
+            # func = np.mean(func[bruce_neurons[0]:bruce_neurons[1]], axis=0)
+            tt = np.array(mat['t_'+func_name[-2:]][0][0])
+            n_chans = func.shape[0]
+            new_mr_arr = np.zeros((n_chans, 400_000))
+            new_ft_arr = np.zeros((n_chans, 400_000))
+            T_end = tt[0, -1]
+            if func_name == 'neurogram_mr':
+                for i in range(n_chans):
+                    new_mr_arr[i] = np.interp(np.linspace(0, 400, 400_000 + 1)[:-1], tt[0], func[i])
+            elif func_name == 'neurogram_ft':
+                func = np.cumsum(func * (tt[0, 1] - tt[0, 0]), axis=-1)
+                for i in range(n_chans):
+                    func_interp = np.interp(np.linspace(0, 400, 400_000 + 1), tt[0], func[i])
+                    new_ft_arr[i] = np.diff(func_interp, axis=-1)
+
+            func_dict = {'neurogram_mr': new_mr_arr,
+                         'neurogram_ft': new_ft_arr}
+            np.savez(str(function_path_without_suffix.with_suffix(".npz")), **func_dict)
+
+            func = func_dict[func_name]
+            func = np.mean(func[bruce_neurons[0]:bruce_neurons[1]], axis=0)
+
+    else:
+        if function_path_without_suffix.with_suffix(".npz").exists():
+            func_dict = np.load(str(function_path_without_suffix.with_suffix(".npz")))
+            func = np.array(func_dict[func_name])
+        else:
+            mat = loadmat(str(function_path_without_suffix.with_suffix(".mat")))['stimulisig']
+            func_dict = {}
+            for key in mat.dtype.names:
+                if key == 'name':
+                    continue
+                func_dict[key] = np.array(mat[key][0, 0], dtype=np.float16)
+            np.savez(str(function_path_without_suffix.with_suffix(".npz")), **func_dict)
+            func = np.array(mat[func_name][0][0])
+
+        if func_name in ('STL', 'IL', 'LTL'):
+            func = func.T
+
+    for _ in range(n_derivatives):
+        func = np.convolve(func, [-1, 1], 'same')  # derivative
+
+    return Function(
+        name=func_name,
+        values=func.flatten().squeeze(),
+        sample_rate=1000,
+    )
diff --git a/kymata/io/mne.py b/kymata/io/mne.py
new file mode 100644
index 00000000..bd4e1525
--- /dev/null
+++ b/kymata/io/mne.py
@@ -0,0 +1,67 @@
+from pathlib import Path
+
+from mne import read_evokeds, minimum_norm, set_eeg_reference
+import numpy as np
+from numpy.typing import NDArray
+from os.path import isfile
+
+from kymata.io.file import path_type
+
+
+
+def load_single_emeg(emeg_path, need_names=False, inverse_operator=None, snr=4):
+    emeg_path_npy = f"{emeg_path}.npy"
+    emeg_path_fif = f"{emeg_path}.fif"
+    if isfile(emeg_path_npy) and (not need_names) and (inverse_operator is None):
+        ch_names: list[str] = []  # TODO: we'll need these
+        emeg: NDArray = np.load(emeg_path_npy)
+    else:
+        evoked = read_evokeds(emeg_path_fif, verbose=False)  # should be len 1 list
+        if inverse_operator is not None:
+            lh_emeg, rh_emeg, ch_names = inverse_operate(evoked[0], inverse_operator, snr)
+            # TODO: I think ch_names here is the wrong thing 
+            
+            emeg = None #np.concatenate((lh_emeg, rh_emeg), axis=0)
+
+            # TODO: currently this goes OOM (node-h04 atleast):
+            #       looks like this will be faster when split up anyway
+            #       note, don't run the inv_op twice for rh and lh!
+            # TODO: move inverse operator to run after EMEG channel combination
+
+            emeg = lh_emeg
+            del lh_emeg, rh_emeg
+        else:
+            emeg = evoked[0].get_data()  # numpy array shape (sensor_num, N) = (370, 403_001)
+            ch_names = evoked[0].ch_names
+            emeg /= np.max(emeg, axis=1, keepdims=True)
+            if not isfile(emeg_path_npy):
+                np.save(emeg_path_npy, np.array(emeg, dtype=np.float16))
+        del evoked
+    return emeg, ch_names
+
+def inverse_operate(evoked, inverse_operator, snr=4):
+    lambda2 = 1.0 / snr ** 2
+    inverse_operator = minimum_norm.read_inverse_operator(inverse_operator, verbose=False)
+    set_eeg_reference(evoked, projection=True, verbose=False)
+    stc = minimum_norm.apply_inverse(evoked, inverse_operator, lambda2, 'MNE', pick_ori='normal', verbose=False)
+    return stc.lh_data, stc.rh_data, evoked.ch_names
+
+def load_emeg_pack(emeg_paths, need_names=False, ave_mode=None, inverse_operator=None, p_tshift=None, snr=4):  # TODO: FIX PRE-AVE-NORMALISATION
+    if p_tshift is None:
+        p_tshift = [0]*len(emeg_paths)
+    emeg, emeg_names = load_single_emeg(emeg_paths[0], need_names, inverse_operator, snr)
+    emeg = emeg[:,p_tshift[0]:402001 + p_tshift[0]]
+    emeg = np.expand_dims(emeg, 1)
+    if ave_mode == 'add':
+        for i in range(1, len(emeg_paths)):
+            t_shift = p_tshift[i]
+            new_emeg = load_single_emeg(emeg_paths[i], need_names, inverse_operator, snr)[0][:,t_shift:402001 + t_shift]
+            emeg = np.concatenate((emeg, np.expand_dims(new_emeg, 1)), axis=1)
+    elif ave_mode == 'ave':
+        for i in range(1, len(emeg_paths)):
+            t_shift = p_tshift[i]
+            emeg += np.expand_dims(load_single_emeg(emeg_paths[i], need_names, inverse_operator, snr)[0][:,t_shift:402001 + t_shift], 1)
+    elif len(emeg_paths) > 1:
+        raise NotImplementedError(f'ave_mode "{ave_mode}" not known')
+    return emeg, emeg_names
+
diff --git a/kymata/math/__init__.py b/kymata/math/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/kymata/math/combinatorics.py b/kymata/math/combinatorics.py
new file mode 100644
index 00000000..22343425
--- /dev/null
+++ b/kymata/math/combinatorics.py
@@ -0,0 +1,15 @@
+from numpy import arange
+from numpy.random import randint
+
+
+def generate_derangement(n, mod=int(1e9)):  # approx 3ms runtime for n=400
+    while True:
+        v = arange(n)
+        for j in range(n - 1, -1, -1):
+            p = randint(0, j + 1)
+            if v[p] % mod == j % mod:
+                break
+            else:
+                v[j], v[p] = v[p], v[j]
+        else:
+            return v
diff --git a/kymata/math/vector.py b/kymata/math/vector.py
new file mode 100644
index 00000000..88b18faf
--- /dev/null
+++ b/kymata/math/vector.py
@@ -0,0 +1,23 @@
+import numpy as np
+from numpy.typing import NDArray
+
+
+def normalize(x: NDArray) -> NDArray:
+    """
+    Remove the mean and divide by the Euclidean magnitude.
+    """
+    x -= np.mean(x, axis=-1, keepdims=True)
+    x /= np.sqrt(np.sum(x**2, axis=-1, keepdims=True))
+    return x
+
+
+def get_stds(x, n):
+    """
+    Get the stds (times sqrt(n)) to correct for normalisation difference between each set of n_samples
+    """
+    d0, d1, d2 = x.shape
+    y = np.concatenate((np.zeros((d0, d1, 1)), np.cumsum(x**2, axis=-1)), axis=-1)
+    z = np.concatenate((np.zeros((d0, d1, 1)), np.cumsum(x, axis=-1)), axis=-1)
+    y = y[:,:,-n-1:-1] - y[:,:,:n]
+    z = z[:,:,-n-1:-1] - z[:,:,:n]
+    return (y - ((z**2) / n)) ** 0.5
diff --git a/kymata/preproc/data_cleansing.py b/kymata/preproc/data_cleansing.py
old mode 100644
new mode 100755
index ca7c2321..75654835
--- a/kymata/preproc/data_cleansing.py
+++ b/kymata/preproc/data_cleansing.py
@@ -1,6 +1,6 @@
 import os.path
 
-from colorama import Fore
+from colorama import Fore, Style
 import mne
 from numpy import zeros, nanmin
 from pandas import DataFrame, Index
@@ -283,7 +283,6 @@ def _remove_ecg(filt_raw, ica):
     # heartbeats
     ica.plot_overlay(filt_raw, exclude=ica.exclude, picks='mag')
 
-
 def _remove_ecg_eog(filt_raw, ica):
     """
     Note: mutates `ica`.
@@ -298,6 +297,122 @@ def _remove_ecg_eog(filt_raw, ica):
     ica.plot_scores(eog_scores)
 
 
+def create_trialwise_data(dataset_directory_name: str,
+                        list_of_participants: list[str],
+                        repetitions_per_runs: int,
+                        number_of_runs: int,
+                        number_of_trials: int,
+                        input_streams: list[str],
+                        eeg_thresh: float,
+                        grad_thresh: float,
+                        mag_thresh: float,
+                        visual_delivery_latency: float,
+                        audio_delivery_latency: float,
+                        audio_delivery_shift_correction: float,  # seconds
+                        tmin: float,
+                        tmax: float,
+                        ):
+    """Create trials objects from the raw data files (still in sensor space)"""
+
+    global_droplog = []
+
+    data_path = '/imaging/projects/cbu/kymata/data/' + dataset_directory_name
+    save_folder = '3_trialwise_sensorspace'
+
+    print(f"{Fore.GREEN}{Style.BRIGHT}Starting trials and {Style.RESET_ALL}")
+
+    for p in list_of_participants:
+
+        print(f"{Fore.GREEN}{Style.BRIGHT}...Concatenating trials{Style.RESET_ALL}")
+
+        cleaned_raws = []
+
+        for run in range(1, number_of_runs + 1):
+            raw_fname = f'{data_path}/intrim_preprocessing_files/2_cleaned/{p}_run{run}_cleaned_raw.fif.gz'
+            if os.path.isfile(raw_fname):
+                raw = mne.io.Raw(raw_fname, preload=True)
+                events_ = mne.find_events(raw, stim_channel='STI101', shortest_event=1)
+                #print([i[0] for i in mne.pick_events(events_, include=2)])
+                #print(mne.pick_events(events_, include=3))
+                cleaned_raws.append(raw)
+
+        raw = mne.io.concatenate_raws(raws=cleaned_raws, preload=True)
+        raw_events = mne.find_events(raw, stim_channel='STI101', shortest_event=1) #, uint_cast=True)
+
+        print(f"{Fore.GREEN}{Style.BRIGHT}...finding visual events{Style.RESET_ALL}")
+
+        #	Extract visual events
+        visual_events = mne.pick_events(raw_events, include=[2, 3])
+
+        #	Correct for visual equiptment latency error
+        visual_events = mne.event.shift_time_events(visual_events, [2, 3], visual_delivery_latency, 1)
+
+        # trigger_name = 1
+        # for trigger in visual_events:
+        #    trigger[2] = trigger_name  # rename as '1,2,3 ...400' etc
+        #    if trigger_name == 400:
+        #        trigger_name = 1
+        #    else:
+        #        trigger_name = trigger_name + 1
+        #
+        # #  Test there are the correct number of events
+        # assert visual_events.shape[0] == repetitions_per_runs * number_of_runs * number_of_trials
+
+        print(f"{Fore.GREEN}{Style.BRIGHT}...finding audio events{Style.RESET_ALL}")
+
+        #	Extract audio events
+        audio_events_raw = mne.pick_events(raw_events, include=3)
+
+        #	Correct for audio latency error
+        audio_events_raw = mne.event.shift_time_events(audio_events_raw, [3], audio_delivery_latency, 1)
+
+        audio_events = zeros((len(audio_events_raw) * 400, 3), dtype=int)
+        for run, item in enumerate(audio_events_raw):
+            print('item: ', item)
+            audio_events_raw[run][2] = run # rename the audio events raw to pick apart later
+            for trial in range(number_of_trials):
+                audio_events[(trial + (number_of_trials * run))][0] = item[0] + round(
+                    trial * (1000 + audio_delivery_shift_correction))
+                audio_events[(trial + (number_of_trials * run))][1] = 0
+                audio_events[(trial + (number_of_trials * run))][2] = trial
+
+        #  Test there are the correct number of events
+        assert audio_events.shape[0] == repetitions_per_runs * number_of_runs * number_of_trials # TODO handle overlapping visual/audio triggers
+        
+        print(f'\n Runs found: {audio_events.shape[0]} \n')
+
+        #	Denote picks
+        include = [];  # ['MISC006']  # MISC05, trigger channels etc, if needed
+        picks = mne.pick_types(raw.info, meg=True, eeg=True, stim=False, exclude='bads', include=include)
+
+        print(f"{Fore.GREEN}{Style.BRIGHT}... extract and save evoked data{Style.RESET_ALL}")
+
+        for input_stream in input_streams: # TODO n.b. not setup for visual/tactile stream yet
+            if input_stream == 'auditory':
+                events = audio_events
+            #else:
+            #    events = visual_events
+
+            #	Extract trial instances ('epochs')
+
+            _tmin = -0.2
+            _tmax = 400 + 0.8 + 2 # Extra space to account for audio file latency correction
+            epochs = mne.Epochs(raw, audio_events_raw, None, _tmin, _tmax, picks=picks,
+                                baseline=(None, None), reject=dict(eeg=eeg_thresh, grad=grad_thresh, mag=mag_thresh),
+                                preload=True)
+
+            # 	Log which channels are worst
+            dropfig = epochs.plot_drop_log(subject=p)
+            dropfig.savefig(f'{data_path}/intrim_preprocessing_files/{save_folder}/logs/f{input_stream}_drop-log_{p}.jpg')
+
+            global_droplog.append(f'[{input_stream}]{p}:{epochs.drop_log_stats(epochs.drop_log)}')
+
+            for i in range(len(audio_events_raw)):
+                evoked = epochs[str(i)].average()
+                evoked.save(f'{data_path}/intrim_preprocessing_files/{save_folder}/evoked_data/{p}_rep{i}.fif', overwrite=True)
+
+            evoked = epochs.average()
+            evoked.save(f'{data_path}/intrim_preprocessing_files/{save_folder}/evoked_data/{p}-ave.fif', overwrite=True)
 
 
 def create_trials(dataset_directory_name: str,
@@ -311,7 +426,7 @@ def create_trials(dataset_directory_name: str,
                   mag_thresh: float,
                   visual_delivery_latency: float,
                   audio_delivery_latency: float,
-                  audio_delivery_shift_correction: float,
+                  audio_delivery_shift_correction: float,  # seconds
                   tmin: float,
                   tmax: float,
                   ):
@@ -367,7 +482,7 @@ def create_trials(dataset_directory_name: str,
         for run, item in enumerate(audio_events_raw):
             for trial in range(1, number_of_trials + 1):
                 audio_events[(trial + (number_of_trials * run)) - 1][0] = item[0] + round(
-                    (trial - 1) * (1000 + audio_delivery_shift_correction))
+                    (trial - 1) * 1000 * (1 + audio_delivery_shift_correction))
                 audio_events[(trial + (number_of_trials * run)) - 1][1] = 0
                 audio_events[(trial + (number_of_trials * run)) - 1][2] = trial
 
diff --git a/submit_gridsearch.sh b/submit_gridsearch.sh
new file mode 100755
index 00000000..41763242
--- /dev/null
+++ b/submit_gridsearch.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+###
+# To run gridsearch on the queue at the CBU, run the following command in command line:
+#   sbatch submit_gridsearch.sh
+###
+
+
+#SBATCH --job-name=gridsearch
+#SBATCH --output=slurm_log.txt
+#SBATCH --error=slurm_log.txt
+#SBATCH --ntasks=1
+#SBATCH --time=05:00:00
+#SBATCH --mem=1000
+#SBATCH --array=1-1
+#SBATCH --exclusive
+
+conda activate mne_venv
+
+args=(5) # 2 3 4 5 6 7 8 9 10)
+ARG=${args[$SLURM_ARRAY_TASK_ID - 1]}
+
+python invokers/run_gridsearch.py
+    # --snr $ARG # >> result3.txt
+
+conda deactivate