From 98659de7d14f9de71ec7945515dfac5c0c734b3c Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Thu, 7 Dec 2023 12:56:37 +0000 Subject: [PATCH 01/25] gridsearch working --- scripts/gridsearch.py | 370 +++++++++++++++++++----------------------- 1 file changed, 163 insertions(+), 207 deletions(-) diff --git a/scripts/gridsearch.py b/scripts/gridsearch.py index a044fcee..9916217a 100644 --- a/scripts/gridsearch.py +++ b/scripts/gridsearch.py @@ -1,209 +1,165 @@ -from colorama import Fore -from colorama import Style - - -def do_gridsearch(neurophysiology_data_file_directory, - predicted_function_outputs_data, - hexel_expression_master, - functions_to_apply_gridsearch): +import numpy as np +import matplotlib.pyplot as plt +import mne +import time +import os.path +import h5py +from scipy.stats import ttest_ind +import scipy.stats as stats + +def norm(x): + x -= np.mean(x, axis=-1, keepdims=True) + x /= np.sqrt(np.sum(x**2, axis=-1, keepdims=True)) + return x + +def get_EMEG_data(EMEG_path, need_names=False): + if not os.path.isfile(f'{EMEG_path}.npy') or need_names: + evoked = mne.read_evokeds(f'{EMEG_path}.fif', verbose=False) # should be len 1 list + EMEG = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 406_001) + EMEG /= np.max(EMEG, axis=1, keepdims=True) + np.save(f'{EMEG_path}.npy', np.array(EMEG, dtype=np.float16)) # nb fif is float32 + return EMEG, evoked[0].ch_names + else: + return np.load(f'{EMEG_path}.npy'), None + +def load_function(function_path, func_name, downsample_rate): + if not os.path.isfile(function_path + '.h5'): + import scipy.io + mat = scipy.io.loadmat(function_path + '.mat')['stimulisig'] + with h5py.File(function_path + '.h5', 'w') as f: + for key in mat.dtype.names: + if key != 'name': + f.create_dataset(key, data=np.array(mat[key][0,0], dtype=np.float16)) + func = np.array(mat[func_name][0][0])[:,::downsample_rate] # shape = (400, 1000) + else: + with h5py.File(function_path + '.h5', 'r') as f: + func = f[func_name][:,::downsample_rate] + return func.flatten() + +def generate_derangement(n): # approx 3ms runtime for n=400 + while True: + v = np.arange(n) + for j in range(n - 1, -1, -1): + p = np.random.randint(0, j + 1) + if v[p] == j: + break + else: + v[j], v[p] = v[p], v[j] + else: + return v + +def ttest(corrs, f_alpha=0.001, use_all_lats=True): + # Vectorised Welch's t-test + # Fisher Z-Transformation + corrs = 0.5 * np.log((1 + corrs) / (1 - corrs)) + n_channels, nDerangements, n_splits, T_steps = corrs.shape + + true_mean = np.mean(corrs[:, 0, :, :], axis=1) + true_var = np.var(corrs[:, 0, :, :], axis=1, ddof=1) + true_n = n_splits + if use_all_lats: + rand_mean = np.mean(corrs[:, 1:, :, :].reshape(n_channels, -1), axis=1).reshape(n_channels, 1) + rand_var = np.var(corrs[:, 1:, :, :].reshape(n_channels, -1), axis=1, ddof=1).reshape(n_channels, 1) + rand_n = n_splits * nDerangements * T_steps + else: + rand_mean = np.mean(corrs[:, 1:, :, :].reshape(n_channels, -1, T_steps), axis=1) + rand_var = np.var(corrs[:, 1:, :, :].reshape(n_channels, -1, T_steps), axis=1, ddof=1) + rand_n = n_splits * nDerangements + + # Vectorized two-sample t-tests for all channels and time steps + numerator = true_mean - rand_mean + denominator = np.sqrt(true_var / true_n + rand_var / rand_n) + df = ((true_var / true_n + rand_var / rand_n) ** 2 / + ((true_var / true_n) ** 2 / (true_n - 1) + + (rand_var / rand_n) ** 2 / (rand_n - 1))) + + t_stat = numerator / denominator + p = stats.t.sf(np.abs(t_stat), df) * 2 # two-tailed p-value + + # Adjust p-values for multiple comparisons (Bonferroni correction) [NOT SURE ABOUT THIS] + pvalues_adj = p #np.minimum(1, p * T_steps * n_channels / (1 - f_alpha)) + + return pvalues_adj + +def do_gridsearch(data_root='/imaging/projects/cbu/kymata/data', + data_path='/dataset_4-english-narratives/intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data', + function_root='/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/predicted_function_contours', + function_path='/GMSloudness/stimulisig', + func_name='d_IL2', + audio_shift_correction=0.5375, + seconds_per_split=0.5, + n_splits=800, + downsample_rate=5, + nDerangements=1, + rep='-ave', + save_pvalues_path=None, + p='participant_01', + start_lat=-100): '''Do the Kymata gridsearch over all hexels for all latencies''' - print(f"{Fore.GREEN}{Style.BRIGHT}Starting gridsearch{Style.RESET_ALL}") - - # Load variables - functionname = 'TVL2002 Overall instantaneous loudness' - functionname = 'xxxx' - inputstream = 'audio'; - - experimentName = ['DATASET_3-01_visual-and-auditory']; - itemlistFilename = [rootDataSetPath, experimentName, '/items.txt']; - - pre_stimulus_window = 200 # in milliseconds - post_stimulus_window = 800 # in milliseconds - - latency_step = 5 # in milliseconds - latencies = [-200:latency_step:800] - number_of_iterations = 5 - downsample_rate = 4 - cuttoff = 1000 # in milliseconds - - print(f"...for Function: {xxxxx}") - - Check if the master expression already contains it - - Print out similar, and check. - - for hemi in ['left', 'right']: - - print(f"...for {hemi}") - - # Polarity represents a special case of the function search - that - # where the waveform is flipped in it's polarity. The default action - # in Kymata is to check both, and then select the best, as we don't - # care about the polarity. - for polaritiy in ['positive', 'negitive'] - - print(f" ...for {polaritiy}") - - do gridsearch_for_polarity() #??? do we need this at all and just use the two tailness? - - print(f" ...merging polarities") - - merge polarities - - - - print(f"{Fore.GREEN}{Style.BRIGHT}Ending Gridsearch. Saving in master expression file. {Style.RESET_ALL}") - - return xx - - -def do_gridsearch_for_polarity(polarity:String) -> STC file: - '''Do the gridsearch for a given polarity''' - - print(f" ...XYZ") - - dentritic current_data_array = new array() - prediction_waveform = load_data.load_prediction_waveform() - - if polarity = 'negative' - prediction_waveform = prediction_waveform * -1 - - vecotrise and use Numba/CyPY/CUDA or NUMBa and ... - # cut to trial length - wordsignalstrue = wordsignalstrue(:, 1: cutoff)'; - wordsignalstrue = repmat(wordsignalstrue, [1 1 nVertices]); - wordsignalstrue = permute(wordsignalstrue, [3 1 2]); - wordsignalstrue = single(wordsignalstrue); - % wordsignalstrue(wordsignalstrue == 0) = nan; - - # create permorder - [junk permorder] = sort(rand(400, nWords), 2); - permorder = unique([1:nWords; permorder], 'rows'); - permorder(any(bsxfun( @ minus, permorder, 1: nWords) == 0, 2),:) = []; permorder = [1:nWords; permorder]; - permorder = permorder(1:numberOfIterations + 1,:); - - # Convert MEG - signals true, in same order as wordlist - - allMEGdata = single(zeros(nVertices, nTimePoints, nWords)); - - for w in 1:numel(wordlist): - thisword = char(wordlist(w)); - - inputfilename = [rootCodeOutputPath version '/' experimentName, '/5-averaged-by-trial-data/', inputfolder, '/', thisword - '-' leftright '.stc']; - - disp(num2str(w)); - - sensordata = mne_read_stc_file(inputfilename); - allMEGdata(:,:, w) = sensordata.data(:,:) - - clear('-regexp', '^grand'); - clear other as well - - # Start Matching / Mismatching proceedure for each time point - for latency in latencies: - - print(f" ...Latency ', num2str(q) ' out of ', num2str(length(latencies))]") - - MEGdata = allMEGdata(:, (pre_stimulus_window + latency + 1): (pre_stimulus_window + cutoff + latency),:); - - # downsample - MEGdataDownsampled = MEGdata(:, 1: downsample_rate:end,:); - clear MEGdata; - wordsignalstrueDownsampled = wordsignalstrue(:, 1: downsample_rate:end,:); - - if debug: - scatter(MEGdata(1,:, 1), wordsignalstrue(1,:, 1)); - scatter(MEGdataDownsampled(1,:, 1), wordsignalstrueDownsampled(1,:, 1)); - - # ------------------------------- - # Matched / Mismatched - # ------------------------------- - - allcorrs = zeros(nVertices, numberOfIterations, nWords); - - for i in 1:numberOfIterations: - - disp(['Iteration ', num2str(i) ' out of ', num2str(numberOfIterations)]); - - shuffledMEGdata = permute(MEGdataDownsampled, [3 2 1]) - downsampledtimespan = size(shuffledMEGdata, 2) - shuffledMEGdata = reshape(shuffledMEGdata, nWords, nVertices * downsampledtimespan) - shuffledMEGdata = shuffledMEGdata(permorder(i,:),:) - shuffledMEGdata = reshape(shuffledMEGdata, nWords, downsampledtimespan, nVertices) - shuffledMEGdata = permute(shuffledMEGdata, [3 2 1]) - - # Confirm signals are not shorter than cutt-off - assert XYZ - - # Do correlation - allcorrs(:, i,:) = nansum(bsxfun( @ minus, shuffledMEGdata, - nanmean(shuffledMEGdata, 2)). * bsxfun( @ minus, wordsignalstrueDownsampled, nanmean( - wordsignalstrueDownsampled, 2)), 2)./ (sqrt( - nansum((bsxfun( @ minus, shuffledMEGdata, nanmean(shuffledMEGdata, 2))). ^ 2, 2)). * sqrt( - nansum((bsxfun( @ minus, wordsignalstrueDownsampled, nanmean(wordsignalstrueDownsampled, 2))). ^ 2, 2))); - - - clear shuffledMEGdata; - - # Transform populations with fisher-z - - # First eliviate rounding of - 0.999999 causing problems with log() in fisher-z transform. - allcorrs(allcorrs < -0.999999) = -0.999999; - allcorrs(allcorrs > 0.999999) = 0.999999; - - # Transform fisher-z - allcorrs = 0.5 * log((1 + allcorrs). / (1 - allcorrs)); - - truewordsCorr = reshape(allcorrs(:, 1,:), nVertices, nWords, 1); - randwordsCorr = reshape(allcorrs(:, 2: end,:), nVertices, (nWords * numberOfIterations) - nWords, 1); - - if debug: - # lillefor test for Guassianism - vertexnumber = 2444; - h = lillietest(randwordsCorr(vertexnumber,:)); - histfit(randwordsCorr(vertexnumber,:), 40); # plot - histfit(truewordsCorr(vertexnumber,:), 40); - h = findobj(gca, 'Type', 'patch'); - display(h); - set(h(1), 'FaceColor', [0.982 0.909 0.721], 'EdgeColor', 'k'); - set(h(2), 'FaceColor', [0.819 0.565 0.438], 'EdgeColor', 'k'); - And do the little graph disribution box plots below as well. - - # Do population test on signals - - pvalues = zeros(1, nVertices); - for vertex = 1:nVertices: - truepopulation = truewordsCorr(vertex,:); - randpopulation = randwordsCorr(vertex,:); - - # 2-sample t-test - - - Can we do 2 tails to do polarities? - - [h, p, ci, stats] = ttest2(truepopulation, randpopulation, - 1 - ((1 - f_alpha) ^ (1 / (2 * length(latencies) * nVertices))), 'right', 'unequal'); - pvalues(1, vertex) = p - - # Save at correct latency in STC - outputSTC.data((latency) + (pre_stimulus_window + 1),:) = pvalues - - clear MEGdata wordsignalstrueDownsampled MEGdataDownsampled R randwordsGuassian truewordsGuassian truewords randwords; - - - -def do_ad_hoc_power_calculation_on_XYZ(n_samples, alpha, power_val) - 'This works out the power needed on ZYX' - - n_samples = 15000 - h number of samples per group - alpha = 0.0000005 # significance level - power_val = 0.8 # desired power - standard_deviation = xyz - ratio = n_samples1/n_samples2 - - # calculate the minimum detectable effect size - meff = power.tt_ind_solve_power(alpha=alpha, power=power_val, nobs1=n_samples, ratio=ratio, alternative='larger', sd = sd) - - print(f"Minimum detectable effect size: {meff:.3f}") \ No newline at end of file + EMEG_path = f'{data_root}/{data_path}/{p}{rep}' + t0 = time.time() + + # Load data + T_steps = int((2000 * seconds_per_split) // downsample_rate) + EMEG, EMEG_names = get_EMEG_data(EMEG_path, save_pvalues_path) + func = load_function(function_root + function_path, + func_name=func_name, + downsample_rate=downsample_rate) + func = func.reshape(n_splits, T_steps // 2) + n_channels = EMEG.shape[0] + + t1 = time.time(); print(f'Load time: {round(t1-t0, 2)}s') + + # Reshape EMEG into splits of 'seconds_per_split' s + second_start_points = [start_lat + 200 + round((1000 + audio_shift_correction) * seconds_per_split * i) for i in range(n_splits)] # correcting for audio shift in delivery + R_EMEG = np.zeros((n_channels, n_splits, T_steps)) + for i in range(n_splits): + R_EMEG[:,i,:] = EMEG[:, second_start_points[i]:second_start_points[i] + int(2000 * seconds_per_split):downsample_rate] + + # Get derangement for null dist: + permorder = np.zeros((nDerangements, n_splits), dtype=int) + for i in range(nDerangements): + permorder[i, :] = generate_derangement(n_splits) + permorder = np.vstack((np.arange(n_splits), permorder)) + + # FFT cross-corr + R_EMEG = np.fft.rfft(norm(R_EMEG), n=T_steps, axis=-1) + F_func = np.conj(np.fft.rfft(norm(func), n=T_steps, axis=-1)) + corrs = np.zeros((n_channels, nDerangements+1, n_splits, T_steps//2)) + for i, order in enumerate(permorder): + deranged_EMEG = R_EMEG[:, order, :] + corrs[:, i] = np.fft.irfft(deranged_EMEG * F_func)[:, :, :T_steps//2] + + t2 = time.time(); print(f'Corr time: {round(t2-t1, 2)}s') + + plt.figure(1) + plt.plot(np.linspace(start_lat, start_lat + 1000 * seconds_per_split, T_steps//2), np.mean(corrs[[207, 210, 5, 10], 0], axis=-2).T) + plt.plot(np.linspace(start_lat, start_lat + 1000 * seconds_per_split, T_steps//2), np.mean(corrs[0:300:15, 1], axis=-2).T, 'cyan') + plt.axvline(0, color='k') + plt.savefig('testing_savefig.png') + plt.clf() + + t2 = time.time() + + pvalues = ttest(corrs) + + t3 = time.time(); print(f'T-Test time: {round(t3-t2,2)}s' ) + + plt.figure(2) + plt.plot(np.linspace(start_lat, start_lat + 1000 * seconds_per_split, T_steps//2), -np.log(pvalues[[207,210,5,10]].T)) + plt.axvline(0, color='k') + plt.savefig('testing_fig2.png') + plt.clf() + + if save_pvalues_path: + with h5py.File(save_pvalues_path + '.h5', 'w') as f: + f.create_dataset('pvalues', data=pvalues) + f.create_dataset('latencies', data=np.linspace(start_lat, int(start_lat + (1000 * seconds_per_split)), 1000//downsample_rate)) + f.create_dataset('EMEG_names', data=EMEG_names) + + return + + +if __name__ == "__main__": + do_gridsearch() From fb2eba8632435034f39cc077e1def113cad395df Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Wed, 13 Dec 2023 14:27:37 +0000 Subject: [PATCH 02/25] Move gridsearch.py into place --- kymata/gridsearch/__init__.py | 0 {scripts => kymata/gridsearch}/gridsearch.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 kymata/gridsearch/__init__.py rename {scripts => kymata/gridsearch}/gridsearch.py (100%) diff --git a/kymata/gridsearch/__init__.py b/kymata/gridsearch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/gridsearch.py b/kymata/gridsearch/gridsearch.py similarity index 100% rename from scripts/gridsearch.py rename to kymata/gridsearch/gridsearch.py From 1d56f6bff3d47d79799193a63dc6218e614d456a Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Wed, 13 Dec 2023 16:40:22 +0000 Subject: [PATCH 03/25] Refactor WIP: Breaking functionality and default values into separate functions and a demo script --- demos/demo_gridsearch.py | 43 ++++++ kymata/entities/functions.py | 17 +++ kymata/entities/iterables.py | 5 +- kymata/entities/sparse_data.py | 5 +- kymata/gridsearch/gridsearch.py | 228 ++++++++++++-------------------- kymata/io/functions.py | 27 ++++ kymata/io/mne.py | 18 +++ kymata/math/__init__.py | 0 kymata/math/combinatorics.py | 18 +++ kymata/math/vector.py | 11 ++ 10 files changed, 226 insertions(+), 146 deletions(-) create mode 100644 demos/demo_gridsearch.py create mode 100644 kymata/entities/functions.py create mode 100644 kymata/io/functions.py create mode 100644 kymata/io/mne.py create mode 100644 kymata/math/__init__.py create mode 100644 kymata/math/combinatorics.py create mode 100644 kymata/math/vector.py diff --git a/demos/demo_gridsearch.py b/demos/demo_gridsearch.py new file mode 100644 index 00000000..66b2475d --- /dev/null +++ b/demos/demo_gridsearch.py @@ -0,0 +1,43 @@ +from pathlib import Path + +import numpy as np +from numpy.typing import NDArray + +from kymata.gridsearch.gridsearch import do_gridsearch +from kymata.io.functions import load_function +from kymata.io.mne import get_emeg_data +from kymata.plot.plotting import expression_plot + +downsample_rate = 5 +function_name = 'd_IL2' + +func = load_function(str(Path('/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/predicted_function_contours', '/GMSloudness/stimulisig')), + func_name=function_name) +func = func.downsampled(downsample_rate) + +emeg_dir = Path(f"/imaging/projects/cbu/kymata/data//dataset_4-english-narratives/intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data") +emeg_filename = 'participant_01-ave' + +# Load data +emeg_path_npy = Path(emeg_dir, f"{emeg_filename}.npy") +emeg_path_fif = Path(emeg_dir, f"{emeg_filename}.fif") +try: + ch_names: list[str] = [] # TODO: we'll need these + emeg: NDArray = np.load(emeg_path_npy) +except FileNotFoundError: + emeg, ch_names = get_emeg_data(emeg_path_fif) + np.save(emeg_path_npy, np.array(emeg, dtype=np.float16)) + +es = do_gridsearch( + emeg_values=emeg, + sensor_names=ch_names, + function_name=function_name, + function=func, + downsample_rate=downsample_rate, + seconds_per_split=0.5, + n_derangements=1, # TODO: 1? + n_splits=800, + start_latency_ms=-100, +) + +expression_plot(es) diff --git a/kymata/entities/functions.py b/kymata/entities/functions.py new file mode 100644 index 00000000..19291b4d --- /dev/null +++ b/kymata/entities/functions.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass + +from numpy.typing import NDArray + + +@dataclass +class Function: + name: str + values: NDArray + tstep: float + + def downsampled(self, rate: int): + return Function( + name=self.name, + values=self.values[:, ::rate], + tstep=self.tstep * rate + ) diff --git a/kymata/entities/iterables.py b/kymata/entities/iterables.py index 5faeb602..c2a1c910 100644 --- a/kymata/entities/iterables.py +++ b/kymata/entities/iterables.py @@ -6,6 +6,7 @@ from typing import Sequence from numpy import ndarray +from numpy.typing import NDArray def all_equal(sequence: Sequence) -> bool: @@ -20,10 +21,10 @@ def all_equal(sequence: Sequence) -> bool: # numpy arrays deal with equality weirdly # Check first two items are equal, and equal to the rest - first: ndarray = sequence[0] + first: NDArray = sequence[0] if not isinstance(sequence[1], ndarray): return False - second: ndarray = sequence[1] + second: NDArray = sequence[1] try: # noinspection PyUnresolvedReferences return (first == second).all() and all_equal(sequence[1:]) diff --git a/kymata/entities/sparse_data.py b/kymata/entities/sparse_data.py index 4c39a220..838054cf 100644 --- a/kymata/entities/sparse_data.py +++ b/kymata/entities/sparse_data.py @@ -1,6 +1,7 @@ from __future__ import annotations -from numpy import ndarray, nanmin, greater +from numpy import nanmin, greater +from numpy.typing import NDArray import sparse from xarray import Dataset @@ -27,7 +28,7 @@ def expand_dims(x: sparse.COO, axis=-1) -> sparse.COO: return x -def minimise_pmatrix(pmatrix: ndarray) -> sparse.COO: +def minimise_pmatrix(pmatrix: NDArray) -> sparse.COO: """ Converts a hexel-x-latency data matrix containing p-values into a sparse matrix only storing the minimum (over latencies) p-value for each hexel. diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index 9916217a..6315d8fb 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -1,58 +1,85 @@ import numpy as np -import matplotlib.pyplot as plt -import mne -import time -import os.path -import h5py -from scipy.stats import ttest_ind -import scipy.stats as stats - -def norm(x): - x -= np.mean(x, axis=-1, keepdims=True) - x /= np.sqrt(np.sum(x**2, axis=-1, keepdims=True)) - return x - -def get_EMEG_data(EMEG_path, need_names=False): - if not os.path.isfile(f'{EMEG_path}.npy') or need_names: - evoked = mne.read_evokeds(f'{EMEG_path}.fif', verbose=False) # should be len 1 list - EMEG = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 406_001) - EMEG /= np.max(EMEG, axis=1, keepdims=True) - np.save(f'{EMEG_path}.npy', np.array(EMEG, dtype=np.float16)) # nb fif is float32 - return EMEG, evoked[0].ch_names - else: - return np.load(f'{EMEG_path}.npy'), None - -def load_function(function_path, func_name, downsample_rate): - if not os.path.isfile(function_path + '.h5'): - import scipy.io - mat = scipy.io.loadmat(function_path + '.mat')['stimulisig'] - with h5py.File(function_path + '.h5', 'w') as f: - for key in mat.dtype.names: - if key != 'name': - f.create_dataset(key, data=np.array(mat[key][0,0], dtype=np.float16)) - func = np.array(mat[func_name][0][0])[:,::downsample_rate] # shape = (400, 1000) - else: - with h5py.File(function_path + '.h5', 'r') as f: - func = f[func_name][:,::downsample_rate] - return func.flatten() - -def generate_derangement(n): # approx 3ms runtime for n=400 - while True: - v = np.arange(n) - for j in range(n - 1, -1, -1): - p = np.random.randint(0, j + 1) - if v[p] == j: - break - else: - v[j], v[p] = v[p], v[j] - else: - return v - -def ttest(corrs, f_alpha=0.001, use_all_lats=True): - # Vectorised Welch's t-test +from numpy.typing import NDArray +from scipy import stats + +from kymata.entities.functions import Function +from kymata.math.combinatorics import generate_derangement +from kymata.math.vector import normalize +from kymata.entities.expression import SensorExpressionSet + + +def do_gridsearch(function: Function, + function_name: str, + sensor_names: list[str], + emeg_values: NDArray, + n_derangements: int, + downsample_rate: int, + seconds_per_split: float, + n_splits: int, + start_latency_ms: float, + audio_shift_correction: float = 0.5375, + ) -> SensorExpressionSet: + """ + Do the Kymata gridsearch over all hexels for all latencies + """ + + # TODO: 2000? + n_timesteps = int((2000 * seconds_per_split) // downsample_rate) + + func = function.values.reshape(n_splits, n_timesteps // 2) + n_channels = emeg_values.shape[0] + + # Reshape EMEG into splits of 'seconds_per_split' s + second_start_points = [ + # TODO: 200? + start_latency_ms + 200 + round((1000 + audio_shift_correction) # correcting for audio shift in delivery + * seconds_per_split * i) + for i in range(n_splits) + ] + r_emeg = np.zeros((n_channels, n_splits, n_timesteps)) + for i in range(n_splits): + r_emeg[:, i, :] = emeg_values[:, second_start_points[i] + :second_start_points[i] + + int(2000 * seconds_per_split) + :downsample_rate] + + # Get derangement for null dist: + derangements = np.zeros((n_derangements, n_splits), dtype=int) + for i in range(n_derangements): + derangements[i, :] = generate_derangement(n_splits) + # Include the identity on top + derangements = np.vstack((np.arange(n_splits), derangements)) + + # FFT cross-corr + r_emeg = np.fft.rfft(normalize(r_emeg), n=n_timesteps, axis=-1) + f_func = np.conj(np.fft.rfft(normalize(func), n=n_timesteps, axis=-1)) + corrs = np.zeros((n_channels, n_derangements + 1, n_splits, n_timesteps // 2)) + for i, order in enumerate(derangements): + deranged_emeg = r_emeg[:, order, :] + corrs[:, i] = np.fft.irfft(deranged_emeg * f_func)[:, :, :n_timesteps//2] + + p_values = _ttest(corrs) + + latencies = np.linspace(start_latency_ms, start_latency_ms + 1000 * seconds_per_split, n_timesteps // 2) / 1000 + + es = SensorExpressionSet( + functions=function_name, + latencies=latencies, + sensors=sensor_names, + data=p_values, + ) + + return es + + +def _ttest(corrs: NDArray, f_alpha: float = 0.001, use_all_lats: bool = True): + """ + Vectorised Welch's t-test. + """ + # Fisher Z-Transformation corrs = 0.5 * np.log((1 + corrs) / (1 - corrs)) - n_channels, nDerangements, n_splits, T_steps = corrs.shape + n_channels, n_derangements, n_splits, t_steps = corrs.shape true_mean = np.mean(corrs[:, 0, :, :], axis=1) true_var = np.var(corrs[:, 0, :, :], axis=1, ddof=1) @@ -60,106 +87,23 @@ def ttest(corrs, f_alpha=0.001, use_all_lats=True): if use_all_lats: rand_mean = np.mean(corrs[:, 1:, :, :].reshape(n_channels, -1), axis=1).reshape(n_channels, 1) rand_var = np.var(corrs[:, 1:, :, :].reshape(n_channels, -1), axis=1, ddof=1).reshape(n_channels, 1) - rand_n = n_splits * nDerangements * T_steps + rand_n = n_splits * n_derangements * t_steps else: - rand_mean = np.mean(corrs[:, 1:, :, :].reshape(n_channels, -1, T_steps), axis=1) - rand_var = np.var(corrs[:, 1:, :, :].reshape(n_channels, -1, T_steps), axis=1, ddof=1) - rand_n = n_splits * nDerangements + rand_mean = np.mean(corrs[:, 1:, :, :].reshape(n_channels, -1, t_steps), axis=1) + rand_var = np.var(corrs[:, 1:, :, :].reshape(n_channels, -1, t_steps), axis=1, ddof=1) + rand_n = n_splits * n_derangements # Vectorized two-sample t-tests for all channels and time steps numerator = true_mean - rand_mean denominator = np.sqrt(true_var / true_n + rand_var / rand_n) df = ((true_var / true_n + rand_var / rand_n) ** 2 / - ((true_var / true_n) ** 2 / (true_n - 1) + - (rand_var / rand_n) ** 2 / (rand_n - 1))) + ((true_var / true_n) ** 2 / (true_n - 1) + + (rand_var / rand_n) ** 2 / (rand_n - 1))) t_stat = numerator / denominator p = stats.t.sf(np.abs(t_stat), df) * 2 # two-tailed p-value # Adjust p-values for multiple comparisons (Bonferroni correction) [NOT SURE ABOUT THIS] - pvalues_adj = p #np.minimum(1, p * T_steps * n_channels / (1 - f_alpha)) + pvalues_adj = p #np.minimum(1, p * t_steps * n_channels / (1 - f_alpha)) return pvalues_adj - -def do_gridsearch(data_root='/imaging/projects/cbu/kymata/data', - data_path='/dataset_4-english-narratives/intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data', - function_root='/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/predicted_function_contours', - function_path='/GMSloudness/stimulisig', - func_name='d_IL2', - audio_shift_correction=0.5375, - seconds_per_split=0.5, - n_splits=800, - downsample_rate=5, - nDerangements=1, - rep='-ave', - save_pvalues_path=None, - p='participant_01', - start_lat=-100): - '''Do the Kymata gridsearch over all hexels for all latencies''' - - EMEG_path = f'{data_root}/{data_path}/{p}{rep}' - t0 = time.time() - - # Load data - T_steps = int((2000 * seconds_per_split) // downsample_rate) - EMEG, EMEG_names = get_EMEG_data(EMEG_path, save_pvalues_path) - func = load_function(function_root + function_path, - func_name=func_name, - downsample_rate=downsample_rate) - func = func.reshape(n_splits, T_steps // 2) - n_channels = EMEG.shape[0] - - t1 = time.time(); print(f'Load time: {round(t1-t0, 2)}s') - - # Reshape EMEG into splits of 'seconds_per_split' s - second_start_points = [start_lat + 200 + round((1000 + audio_shift_correction) * seconds_per_split * i) for i in range(n_splits)] # correcting for audio shift in delivery - R_EMEG = np.zeros((n_channels, n_splits, T_steps)) - for i in range(n_splits): - R_EMEG[:,i,:] = EMEG[:, second_start_points[i]:second_start_points[i] + int(2000 * seconds_per_split):downsample_rate] - - # Get derangement for null dist: - permorder = np.zeros((nDerangements, n_splits), dtype=int) - for i in range(nDerangements): - permorder[i, :] = generate_derangement(n_splits) - permorder = np.vstack((np.arange(n_splits), permorder)) - - # FFT cross-corr - R_EMEG = np.fft.rfft(norm(R_EMEG), n=T_steps, axis=-1) - F_func = np.conj(np.fft.rfft(norm(func), n=T_steps, axis=-1)) - corrs = np.zeros((n_channels, nDerangements+1, n_splits, T_steps//2)) - for i, order in enumerate(permorder): - deranged_EMEG = R_EMEG[:, order, :] - corrs[:, i] = np.fft.irfft(deranged_EMEG * F_func)[:, :, :T_steps//2] - - t2 = time.time(); print(f'Corr time: {round(t2-t1, 2)}s') - - plt.figure(1) - plt.plot(np.linspace(start_lat, start_lat + 1000 * seconds_per_split, T_steps//2), np.mean(corrs[[207, 210, 5, 10], 0], axis=-2).T) - plt.plot(np.linspace(start_lat, start_lat + 1000 * seconds_per_split, T_steps//2), np.mean(corrs[0:300:15, 1], axis=-2).T, 'cyan') - plt.axvline(0, color='k') - plt.savefig('testing_savefig.png') - plt.clf() - - t2 = time.time() - - pvalues = ttest(corrs) - - t3 = time.time(); print(f'T-Test time: {round(t3-t2,2)}s' ) - - plt.figure(2) - plt.plot(np.linspace(start_lat, start_lat + 1000 * seconds_per_split, T_steps//2), -np.log(pvalues[[207,210,5,10]].T)) - plt.axvline(0, color='k') - plt.savefig('testing_fig2.png') - plt.clf() - - if save_pvalues_path: - with h5py.File(save_pvalues_path + '.h5', 'w') as f: - f.create_dataset('pvalues', data=pvalues) - f.create_dataset('latencies', data=np.linspace(start_lat, int(start_lat + (1000 * seconds_per_split)), 1000//downsample_rate)) - f.create_dataset('EMEG_names', data=EMEG_names) - - return - - -if __name__ == "__main__": - do_gridsearch() diff --git a/kymata/io/functions.py b/kymata/io/functions.py new file mode 100644 index 00000000..3db5b237 --- /dev/null +++ b/kymata/io/functions.py @@ -0,0 +1,27 @@ +from os import path + +from numpy import array, float16 +from numpy.typing import NDArray +from h5py import File +from scipy.io import loadmat + +from kymata.entities.functions import Function + + +def load_function(function_path: str, func_name: str) -> Function: + if not path.isfile(function_path + '.h5'): + mat = loadmat(function_path + '.mat')['stimulisig'] + with File(function_path + '.h5', 'w') as f: + for key in mat.dtype.names: + if key != 'name': + f.create_dataset(key, data=array(mat[key][0, 0], dtype=float16)) + func: NDArray = array(mat[func_name][0][0]) + else: + with File(function_path + '.h5', 'r') as f: + func: NDArray = f[func_name] + + return Function( + name=func_name, + values=func.flatten().squeeze(), + tstep=0.001, + ) diff --git a/kymata/io/mne.py b/kymata/io/mne.py new file mode 100644 index 00000000..577f9e84 --- /dev/null +++ b/kymata/io/mne.py @@ -0,0 +1,18 @@ +from pathlib import Path + +from mne import read_evokeds +from numpy import max +from numpy.typing import NDArray + +from kymata.io.file import path_type + + +def get_emeg_data(emeg_path: path_type) -> tuple[NDArray, list[str]]: + """Gets EMEG data from MNE format.""" + emeg_path = Path(emeg_path) + + evoked = read_evokeds(emeg_path, verbose=False) # should be len 1 list + emeg = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 406_001) + # Normalise + emeg /= max(emeg, axis=1, keepdims=True) + return emeg, evoked[0].ch_names diff --git a/kymata/math/__init__.py b/kymata/math/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/kymata/math/combinatorics.py b/kymata/math/combinatorics.py new file mode 100644 index 00000000..256640fd --- /dev/null +++ b/kymata/math/combinatorics.py @@ -0,0 +1,18 @@ +from numpy import arange +from numpy.random import randint + + +def generate_derangement(n): # approx 3ms runtime for n=400 + """ + Generates a derangement (permutation with no fixed point) of size `n`. + """ + while True: + v = arange(n) + for j in range(n - 1, -1, -1): + p = randint(0, j + 1) + if v[p] == j: + break + else: + v[j], v[p] = v[p], v[j] + else: + return v diff --git a/kymata/math/vector.py b/kymata/math/vector.py new file mode 100644 index 00000000..177e2cc2 --- /dev/null +++ b/kymata/math/vector.py @@ -0,0 +1,11 @@ +from numpy import mean, sum, sqrt +from numpy.typing import NDArray + + +def normalize(x: NDArray) -> NDArray: + """ + Remove the mean and divide by the Euclidean magnitude. + """ + x -= mean(x, axis=-1, keepdims=True) + x /= sqrt(sum(x**2, axis=-1, keepdims=True)) + return x From 3802645b50ebd2eab3afc90329301c9cd6111664 Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Fri, 15 Dec 2023 09:56:43 +0000 Subject: [PATCH 04/25] Commenting code and moving everything to seconds --- demos/demo_gridsearch.py | 7 ++- kymata/entities/functions.py | 8 ++- kymata/gridsearch/gridsearch.py | 92 +++++++++++++++++++-------------- 3 files changed, 61 insertions(+), 46 deletions(-) diff --git a/demos/demo_gridsearch.py b/demos/demo_gridsearch.py index 66b2475d..f622eb25 100644 --- a/demos/demo_gridsearch.py +++ b/demos/demo_gridsearch.py @@ -31,13 +31,12 @@ es = do_gridsearch( emeg_values=emeg, sensor_names=ch_names, - function_name=function_name, function=func, - downsample_rate=downsample_rate, seconds_per_split=0.5, - n_derangements=1, # TODO: 1? + n_derangements=1, n_splits=800, - start_latency_ms=-100, + start_latency=-100, + emeg_t_start=-200, ) expression_plot(es) diff --git a/kymata/entities/functions.py b/kymata/entities/functions.py index 19291b4d..47edb775 100644 --- a/kymata/entities/functions.py +++ b/kymata/entities/functions.py @@ -7,11 +7,15 @@ class Function: name: str values: NDArray - tstep: float + sample_rate: float # Hertz def downsampled(self, rate: int): return Function( name=self.name, values=self.values[:, ::rate], - tstep=self.tstep * rate + sample_rate=self.sample_rate / rate, ) + + @property + def time_step(self) -> float: + return 1 / self.sample_rate diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index 6315d8fb..b197bd13 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -8,62 +8,74 @@ from kymata.entities.expression import SensorExpressionSet -def do_gridsearch(function: Function, - function_name: str, - sensor_names: list[str], - emeg_values: NDArray, - n_derangements: int, - downsample_rate: int, - seconds_per_split: float, - n_splits: int, - start_latency_ms: float, - audio_shift_correction: float = 0.5375, - ) -> SensorExpressionSet: +def do_gridsearch( + emeg_values: NDArray, + function: Function, + sensor_names: list[str], + start_latency: float, # seconds + emeg_t_start: float, + emeg_sample_rate: int = 1000, # Hertz + # TODO: what are good default values? + n_derangements: int = 20, + seconds_per_split: float = 0.5, + n_splits: int = 800, + audio_shift_correction: float = 0.0005375, # second/second # TODO: describe in which direction? + ) -> SensorExpressionSet: """ - Do the Kymata gridsearch over all hexels for all latencies + Do the Kymata gridsearch over all hexels for all latencies. """ - # TODO: 2000? - n_timesteps = int((2000 * seconds_per_split) // downsample_rate) + # We'll need to downsample the EMEG to match the function's sample rate + downsample_rate = emeg_sample_rate / function.sample_rate - func = function.values.reshape(n_splits, n_timesteps // 2) + n_samples_per_split = int( + (seconds_per_split * emeg_sample_rate + * 2) # We need double the length so we can do the full cross-correlation overlap + # TODO: does this need to be integer div if we're also coercing to int? + # Now that downsample_rate is computed (and is a float), this div will + # result in a float anyway. + // downsample_rate) + + func = function.values.reshape(n_splits, n_samples_per_split // 2) n_channels = emeg_values.shape[0] - # Reshape EMEG into splits of 'seconds_per_split' s - second_start_points = [ - # TODO: 200? - start_latency_ms + 200 + round((1000 + audio_shift_correction) # correcting for audio shift in delivery - * seconds_per_split * i) + # Reshape EMEG into splits of `seconds_per_split` s + split_initial_timesteps = [ + start_latency + + round(i * seconds_per_split + # Correct for audio drift in delivery equipment + * (1 + audio_shift_correction) + ) + - emeg_t_start for i in range(n_splits) ] - r_emeg = np.zeros((n_channels, n_splits, n_timesteps)) - for i in range(n_splits): - r_emeg[:, i, :] = emeg_values[:, second_start_points[i] - :second_start_points[i] - + int(2000 * seconds_per_split) - :downsample_rate] - - # Get derangement for null dist: + emeg_reshaped = np.zeros((n_channels, n_splits, n_samples_per_split)) + for split_i in range(n_splits): + emeg_reshaped[:, split_i, :] = emeg_values[ + :, split_initial_timesteps[split_i] + :split_initial_timesteps[split_i] + int(2 * emeg_sample_rate * seconds_per_split) + :downsample_rate] + + # Derangements for null distribution derangements = np.zeros((n_derangements, n_splits), dtype=int) - for i in range(n_derangements): - derangements[i, :] = generate_derangement(n_splits) - # Include the identity on top - derangements = np.vstack((np.arange(n_splits), derangements)) + for der_i in range(n_derangements): + derangements[der_i, :] = generate_derangement(n_splits) + derangements = np.vstack((np.arange(n_splits), derangements)) # Include the identity on top # FFT cross-corr - r_emeg = np.fft.rfft(normalize(r_emeg), n=n_timesteps, axis=-1) - f_func = np.conj(np.fft.rfft(normalize(func), n=n_timesteps, axis=-1)) - corrs = np.zeros((n_channels, n_derangements + 1, n_splits, n_timesteps // 2)) - for i, order in enumerate(derangements): - deranged_emeg = r_emeg[:, order, :] - corrs[:, i] = np.fft.irfft(deranged_emeg * f_func)[:, :, :n_timesteps//2] + emeg_reshaped = np.fft.rfft(normalize(emeg_reshaped), n=n_samples_per_split, axis=-1) + f_func = np.conj(np.fft.rfft(normalize(func), n=n_samples_per_split, axis=-1)) + corrs = np.zeros((n_channels, n_derangements + 1, n_splits, n_samples_per_split // 2)) + for der_i, derangement in enumerate(derangements): + deranged_emeg = emeg_reshaped[:, derangement, :] + corrs[:, der_i] = np.fft.irfft(deranged_emeg * f_func)[:, :, :n_samples_per_split//2] p_values = _ttest(corrs) - latencies = np.linspace(start_latency_ms, start_latency_ms + 1000 * seconds_per_split, n_timesteps // 2) / 1000 + latencies = np.linspace(start_latency, start_latency + seconds_per_split, n_samples_per_split // 2) / 1000 es = SensorExpressionSet( - functions=function_name, + functions=function.name, latencies=latencies, sensors=sensor_names, data=p_values, From d0d9a365f77f8c13fb37ca4a75880f83041df121 Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Fri, 15 Dec 2023 14:19:35 +0000 Subject: [PATCH 05/25] This makes a good invoker --- demos/demo_gridsearch.py | 42 --------------------------------- invokers/run_gridsearch.py | 48 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 42 deletions(-) delete mode 100644 demos/demo_gridsearch.py create mode 100644 invokers/run_gridsearch.py diff --git a/demos/demo_gridsearch.py b/demos/demo_gridsearch.py deleted file mode 100644 index f622eb25..00000000 --- a/demos/demo_gridsearch.py +++ /dev/null @@ -1,42 +0,0 @@ -from pathlib import Path - -import numpy as np -from numpy.typing import NDArray - -from kymata.gridsearch.gridsearch import do_gridsearch -from kymata.io.functions import load_function -from kymata.io.mne import get_emeg_data -from kymata.plot.plotting import expression_plot - -downsample_rate = 5 -function_name = 'd_IL2' - -func = load_function(str(Path('/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/predicted_function_contours', '/GMSloudness/stimulisig')), - func_name=function_name) -func = func.downsampled(downsample_rate) - -emeg_dir = Path(f"/imaging/projects/cbu/kymata/data//dataset_4-english-narratives/intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data") -emeg_filename = 'participant_01-ave' - -# Load data -emeg_path_npy = Path(emeg_dir, f"{emeg_filename}.npy") -emeg_path_fif = Path(emeg_dir, f"{emeg_filename}.fif") -try: - ch_names: list[str] = [] # TODO: we'll need these - emeg: NDArray = np.load(emeg_path_npy) -except FileNotFoundError: - emeg, ch_names = get_emeg_data(emeg_path_fif) - np.save(emeg_path_npy, np.array(emeg, dtype=np.float16)) - -es = do_gridsearch( - emeg_values=emeg, - sensor_names=ch_names, - function=func, - seconds_per_split=0.5, - n_derangements=1, - n_splits=800, - start_latency=-100, - emeg_t_start=-200, -) - -expression_plot(es) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py new file mode 100644 index 00000000..6b35d222 --- /dev/null +++ b/invokers/run_gridsearch.py @@ -0,0 +1,48 @@ +from pathlib import Path + +import numpy as np +from numpy.typing import NDArray + +from kymata.gridsearch.gridsearch import do_gridsearch +from kymata.io.functions import load_function +from kymata.io.mne import get_emeg_data +from kymata.plot.plotting import expression_plot + + +def main(): + downsample_rate = 5 + function_name = 'd_IL2' + + func = load_function(str(Path('/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/predicted_function_contours', '/GMSloudness/stimulisig')), + func_name=function_name) + func = func.downsampled(downsample_rate) + + emeg_dir = Path(f"/imaging/projects/cbu/kymata/data//dataset_4-english-narratives/intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data") + emeg_filename = 'participant_01-ave' + + # Load data + emeg_path_npy = Path(emeg_dir, f"{emeg_filename}.npy") + emeg_path_fif = Path(emeg_dir, f"{emeg_filename}.fif") + try: + ch_names: list[str] = [] # TODO: we'll need these + emeg: NDArray = np.load(emeg_path_npy) + except FileNotFoundError: + emeg, ch_names = get_emeg_data(emeg_path_fif) + np.save(emeg_path_npy, np.array(emeg, dtype=np.float16)) + + es = do_gridsearch( + emeg_values=emeg, + sensor_names=ch_names, + function=func, + seconds_per_split=0.5, + n_derangements=1, + n_splits=800, + start_latency=-100, + emeg_t_start=-200, + ) + + expression_plot(es) + + +if __name__ == '__main__': + main() From f5c62bbe6e65fbd3783e9a2cbaa8ef13ac50005c Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Fri, 15 Dec 2023 14:23:18 +0000 Subject: [PATCH 06/25] Audio shift in seconds --- kymata/config/dataset3.yaml | 6 +++--- kymata/config/dataset4.yaml | 6 +++--- kymata/preproc/pipeline.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/kymata/config/dataset3.yaml b/kymata/config/dataset3.yaml index 43fb41ba..9f93fbbf 100644 --- a/kymata/config/dataset3.yaml +++ b/kymata/config/dataset3.yaml @@ -42,9 +42,9 @@ automatic_bad_channel_detection_requested: False audio_delivery_latency: 26 # in milliseconds visual_delivery_latency: 17 # in milliseconds tactile_delivery_latency: 0 # in milliseconds -audio_delivery_shift_correction: 0 # in milliseconds per second -tmin: -0.2 -tmax: 1.8 +audio_delivery_shift_correction: 0 # in seconds per second +tmin: -0.2 # seconds +tmax: 1.8 # seconds eeg_thresh: 1 #200e-6 grad_thresh: 1 #4000e-13 diff --git a/kymata/config/dataset4.yaml b/kymata/config/dataset4.yaml index 97c7b43a..5f83b93a 100644 --- a/kymata/config/dataset4.yaml +++ b/kymata/config/dataset4.yaml @@ -29,9 +29,9 @@ automatic_bad_channel_detection_requested: False audio_delivery_latency: 26 # in milliseconds visual_delivery_latency: 17 # in milliseconds tactile_delivery_latency: 0 # in milliseconds -audio_delivery_shift_correction: 0.5375 # in milliseconds per second -tmin: -0.2 -tmax: 1.8 +audio_delivery_shift_correction: 0.0005375 # in seconds per second +tmin: -0.2 # seconds +tmax: 1.8 # seconds eeg_thresh: 1 #200e-6 grad_thresh: 1 #4000e-13 diff --git a/kymata/preproc/pipeline.py b/kymata/preproc/pipeline.py index 5027874e..6f18142a 100644 --- a/kymata/preproc/pipeline.py +++ b/kymata/preproc/pipeline.py @@ -244,7 +244,7 @@ def create_trials(dataset_directory_name: str, mag_thresh: float, visual_delivery_latency: float, audio_delivery_latency: float, - audio_delivery_shift_correction: float, + audio_delivery_shift_correction: float, # seconds tmin: float, tmax: float, ): @@ -300,7 +300,7 @@ def create_trials(dataset_directory_name: str, for run, item in enumerate(audio_events_raw): for trial in range(1, number_of_trials + 1): audio_events[(trial + (number_of_trials * run)) - 1][0] = item[0] + round( - (trial - 1) * (1000 + audio_delivery_shift_correction)) + (trial - 1) * 1000 * (1 + audio_delivery_shift_correction)) audio_events[(trial + (number_of_trials * run)) - 1][1] = 0 audio_events[(trial + (number_of_trials * run)) - 1][2] = trial From 25659f7691bbe950895f7425807eea058ce90722 Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Fri, 15 Dec 2023 15:03:54 +0000 Subject: [PATCH 07/25] Tidy --- invokers/run_gridsearch.py | 11 +++++++++-- kymata/gridsearch/gridsearch.py | 12 ++++++------ kymata/io/functions.py | 28 ++++++++++++++++------------ 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index 6b35d222..12b82a65 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -13,11 +13,18 @@ def main(): downsample_rate = 5 function_name = 'd_IL2' - func = load_function(str(Path('/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/predicted_function_contours', '/GMSloudness/stimulisig')), + base_dir = Path("/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/") + + func = load_function(Path(base_dir, + 'predicted_function_contours', + 'GMSloudness/stimulisig'), func_name=function_name) func = func.downsampled(downsample_rate) - emeg_dir = Path(f"/imaging/projects/cbu/kymata/data//dataset_4-english-narratives/intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data") + emeg_dir = Path(base_dir, + "intrim_preprocessing_files", + "3_trialwise_sensorspace", + "evoked_data") emeg_filename = 'participant_01-ave' # Load data diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index b197bd13..17bd8548 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -9,17 +9,17 @@ def do_gridsearch( - emeg_values: NDArray, + emeg_values: NDArray, # chan x time function: Function, sensor_names: list[str], start_latency: float, # seconds emeg_t_start: float, emeg_sample_rate: int = 1000, # Hertz + audio_shift_correction: float = 0.000_537_5, # seconds/second # TODO: describe in which direction? # TODO: what are good default values? n_derangements: int = 20, seconds_per_split: float = 0.5, n_splits: int = 800, - audio_shift_correction: float = 0.0005375, # second/second # TODO: describe in which direction? ) -> SensorExpressionSet: """ Do the Kymata gridsearch over all hexels for all latencies. @@ -51,10 +51,10 @@ def do_gridsearch( ] emeg_reshaped = np.zeros((n_channels, n_splits, n_samples_per_split)) for split_i in range(n_splits): - emeg_reshaped[:, split_i, :] = emeg_values[ - :, split_initial_timesteps[split_i] - :split_initial_timesteps[split_i] + int(2 * emeg_sample_rate * seconds_per_split) - :downsample_rate] + # Split indexes + split_start = split_initial_timesteps[split_i] + split_stop = split_start + int(2 * emeg_sample_rate * seconds_per_split) + emeg_reshaped[:, split_i, :] = emeg_values[:, split_start:split_stop:downsample_rate] # Derangements for null distribution derangements = np.zeros((n_derangements, n_splits), dtype=int) diff --git a/kymata/io/functions.py b/kymata/io/functions.py index 3db5b237..f1959a2a 100644 --- a/kymata/io/functions.py +++ b/kymata/io/functions.py @@ -1,4 +1,4 @@ -from os import path +from pathlib import Path from numpy import array, float16 from numpy.typing import NDArray @@ -6,22 +6,26 @@ from scipy.io import loadmat from kymata.entities.functions import Function +from kymata.io.file import path_type -def load_function(function_path: str, func_name: str) -> Function: - if not path.isfile(function_path + '.h5'): - mat = loadmat(function_path + '.mat')['stimulisig'] - with File(function_path + '.h5', 'w') as f: - for key in mat.dtype.names: - if key != 'name': - f.create_dataset(key, data=array(mat[key][0, 0], dtype=float16)) - func: NDArray = array(mat[func_name][0][0]) +def load_function(function_path_without_suffix: path_type, func_name: str) -> Function: + function_path_without_suffix = Path(function_path_without_suffix) + func: NDArray + if function_path_without_suffix.with_suffix(".h5").exists(): + with File(function_path_without_suffix.with_suffix(".h5"), "r") as f: + func = f[func_name] else: - with File(function_path + '.h5', 'r') as f: - func: NDArray = f[func_name] + mat = loadmat(str(function_path_without_suffix.with_suffix(".mat")))['stimulisig'] + with File(function_path_without_suffix.with_suffix(".h5"), 'w') as f: + for key in mat.dtype.names: + if key == 'name': + continue + f.create_dataset(key, data=array(mat[key][0, 0], dtype=float16)) + func = array(mat[func_name][0][0]) return Function( name=func_name, values=func.flatten().squeeze(), - tstep=0.001, + sample_rate=1000, ) From dc1238e9e9af094b7f8098faa72800de37297163 Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Fri, 15 Dec 2023 16:09:31 +0000 Subject: [PATCH 08/25] Clarity --- kymata/gridsearch/gridsearch.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index 17bd8548..da084965 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -26,14 +26,11 @@ def do_gridsearch( """ # We'll need to downsample the EMEG to match the function's sample rate - downsample_rate = emeg_sample_rate / function.sample_rate + downsample_rate: int = int(emeg_sample_rate / function.sample_rate) n_samples_per_split = int( - (seconds_per_split * emeg_sample_rate - * 2) # We need double the length so we can do the full cross-correlation overlap - # TODO: does this need to be integer div if we're also coercing to int? - # Now that downsample_rate is computed (and is a float), this div will - # result in a float anyway. + seconds_per_split * emeg_sample_rate + * 2 # We need double the length so we can do the full cross-correlation overlap // downsample_rate) func = function.values.reshape(n_splits, n_samples_per_split // 2) @@ -62,7 +59,7 @@ def do_gridsearch( derangements[der_i, :] = generate_derangement(n_splits) derangements = np.vstack((np.arange(n_splits), derangements)) # Include the identity on top - # FFT cross-corr + # Fast cross-correlation using FFT emeg_reshaped = np.fft.rfft(normalize(emeg_reshaped), n=n_samples_per_split, axis=-1) f_func = np.conj(np.fft.rfft(normalize(func), n=n_samples_per_split, axis=-1)) corrs = np.zeros((n_channels, n_derangements + 1, n_splits, n_samples_per_split // 2)) @@ -84,25 +81,29 @@ def do_gridsearch( return es -def _ttest(corrs: NDArray, f_alpha: float = 0.001, use_all_lats: bool = True): +def _ttest(corrs: NDArray, use_all_lats: bool = True): """ Vectorised Welch's t-test. """ + n_channels, n_derangements, n_splits, t_steps = corrs.shape # Fisher Z-Transformation - corrs = 0.5 * np.log((1 + corrs) / (1 - corrs)) - n_channels, n_derangements, n_splits, t_steps = corrs.shape + corrs_z = 0.5 * np.log((1 + corrs) / (1 - corrs)) - true_mean = np.mean(corrs[:, 0, :, :], axis=1) - true_var = np.var(corrs[:, 0, :, :], axis=1, ddof=1) + # Non-deranged values are on top + true_mean = np.mean(corrs_z[:, 0, :, :], axis=1) + true_var = np.var(corrs_z[:, 0, :, :], axis=1, ddof=1) true_n = n_splits + + # Recompute mean and var for null correlations + # TODO: why looking at only 1 in the n_derangements dimension? if use_all_lats: - rand_mean = np.mean(corrs[:, 1:, :, :].reshape(n_channels, -1), axis=1).reshape(n_channels, 1) - rand_var = np.var(corrs[:, 1:, :, :].reshape(n_channels, -1), axis=1, ddof=1).reshape(n_channels, 1) + rand_mean = np.mean(corrs_z[:, 1:, :, :].reshape(n_channels, -1), axis=1).reshape(n_channels, 1) + rand_var = np.var(corrs_z[:, 1:, :, :].reshape(n_channels, -1), axis=1, ddof=1).reshape(n_channels, 1) rand_n = n_splits * n_derangements * t_steps else: - rand_mean = np.mean(corrs[:, 1:, :, :].reshape(n_channels, -1, t_steps), axis=1) - rand_var = np.var(corrs[:, 1:, :, :].reshape(n_channels, -1, t_steps), axis=1, ddof=1) + rand_mean = np.mean(corrs_z[:, 1:, :, :].reshape(n_channels, -1, t_steps), axis=1) + rand_var = np.var(corrs_z[:, 1:, :, :].reshape(n_channels, -1, t_steps), axis=1, ddof=1) rand_n = n_splits * n_derangements # Vectorized two-sample t-tests for all channels and time steps From fecaff5700ab6a7b6be00d9b207e4456795141a7 Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Fri, 22 Dec 2023 18:12:54 +0000 Subject: [PATCH 09/25] Create this with log-p values --- kymata/gridsearch/gridsearch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index da084965..9ab0987f 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -5,7 +5,7 @@ from kymata.entities.functions import Function from kymata.math.combinatorics import generate_derangement from kymata.math.vector import normalize -from kymata.entities.expression import SensorExpressionSet +from kymata.entities.expression import SensorExpressionSet, p_to_logp def do_gridsearch( @@ -75,7 +75,7 @@ def do_gridsearch( functions=function.name, latencies=latencies, sensors=sensor_names, - data=p_values, + data=p_to_logp(p_values), ) return es From 17f96dc5c68c8e92f2c3176ae1ca24b4b93f0b14 Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Wed, 3 Jan 2024 14:12:48 +0000 Subject: [PATCH 10/25] initial commit [NOT READY FOR MERGE] --- invokers/run_gridsearch.py | 6 +- invokers/run_preprocessing_dataset4.py | 23 ++++- kymata/io/mne.py | 2 +- kymata/preproc/pipeline.py | 116 +++++++++++++++++++++++++ 4 files changed, 142 insertions(+), 5 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index 12b82a65..e10fbfa7 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -3,6 +3,9 @@ import numpy as np from numpy.typing import NDArray +import sys +sys.path.append('/imaging/projects/cbu/kymata/analyses/ollie/kymata-toolbox') + from kymata.gridsearch.gridsearch import do_gridsearch from kymata.io.functions import load_function from kymata.io.mne import get_emeg_data @@ -17,7 +20,8 @@ def main(): func = load_function(Path(base_dir, 'predicted_function_contours', - 'GMSloudness/stimulisig'), + 'GMSloudness', + 'stimulisig'), func_name=function_name) func = func.downsampled(downsample_rate) diff --git a/invokers/run_preprocessing_dataset4.py b/invokers/run_preprocessing_dataset4.py index 69d3f964..40f23ec2 100644 --- a/invokers/run_preprocessing_dataset4.py +++ b/invokers/run_preprocessing_dataset4.py @@ -1,14 +1,14 @@ from pathlib import Path from kymata.io.yaml import load_config -from kymata.preproc.pipeline import run_preprocessing, create_trials +from kymata.preproc.pipeline import run_preprocessing, create_trials, create_trialwise_data # noinspection DuplicatedCode def main(): config = load_config(str(Path(Path(__file__).parent, "kymata", "config", "dataset4.yaml"))) - create_trials( + """create_trials( dataset_directory_name=config['dataset_directory_name'], list_of_participants=config['list_of_participants'], repetitions_per_runs=config['repetitions_per_runs'], @@ -23,7 +23,7 @@ def main(): audio_delivery_shift_correction=config['audio_delivery_shift_correction'], tmin=config['tmin'], tmax=config['tmax'], - ) + )""" run_preprocessing( list_of_participants=config['list_of_participants'], @@ -36,6 +36,23 @@ def main(): automatic_bad_channel_detection_requested=config['automatic_bad_channel_detection_requested'], ) + create_trialwise_data( + dataset_directory_name=config['dataset_directory_name'], + list_of_participants=config['list_of_participants'], + repetitions_per_runs=config['repetitions_per_runs'], + number_of_runs=config['number_of_runs'], + number_of_trials=config['number_of_trials'], + input_streams=config['input_streams'], + eeg_thresh=float(config['eeg_thresh']), + grad_thresh=float(config['grad_thresh']), + mag_thresh=float(config['mag_thresh']), + visual_delivery_latency=config['visual_delivery_latency'], + audio_delivery_latency=config['audio_delivery_latency'], + audio_delivery_shift_correction=config['audio_delivery_shift_correction'], + tmin=config['tmin'], + tmax=config['tmax'], + ) + if __name__ == '__main__': main() diff --git a/kymata/io/mne.py b/kymata/io/mne.py index 577f9e84..c3f2f6f1 100644 --- a/kymata/io/mne.py +++ b/kymata/io/mne.py @@ -12,7 +12,7 @@ def get_emeg_data(emeg_path: path_type) -> tuple[NDArray, list[str]]: emeg_path = Path(emeg_path) evoked = read_evokeds(emeg_path, verbose=False) # should be len 1 list - emeg = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 406_001) + emeg = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 403_001) # Normalise emeg /= max(emeg, axis=1, keepdims=True) return emeg, evoked[0].ch_names diff --git a/kymata/preproc/pipeline.py b/kymata/preproc/pipeline.py index 6f18142a..550ae617 100644 --- a/kymata/preproc/pipeline.py +++ b/kymata/preproc/pipeline.py @@ -233,6 +233,122 @@ def _remove_ecg(filt_raw, ica): ica.plot_overlay(filt_raw, exclude=ica.exclude, picks='mag') +def create_trialwise_data(dataset_directory_name: str, + list_of_participants: list[str], + repetitions_per_runs: int, + number_of_runs: int, + number_of_trials: int, + input_streams: list[str], + eeg_thresh: float, + grad_thresh: float, + mag_thresh: float, + visual_delivery_latency: float, + audio_delivery_latency: float, + audio_delivery_shift_correction: float, # seconds + tmin: float, + tmax: float, + ): + """Create trials objects from the raw data files (still in sensor space)""" + + global_droplog = [] + + print(f"{Fore.GREEN}{Style.BRIGHT}Starting trials and {Style.RESET_ALL}") + + for p in list_of_participants: + + print(f"{Fore.GREEN}{Style.BRIGHT}...Concatenating trials{Style.RESET_ALL}") + + cleaned_raws = [] + + for run in range(1, number_of_runs + 1): + raw_fname = f'{data_path}/intrim_preprocessing_files/2_cleaned/{p}_run{run}_cleaned_raw.fif.gz' + if os.path.isfile(raw_fname): + raw = mne.io.Raw(raw_fname, preload=True) + events_ = mne.find_events(raw, stim_channel='STI101', shortest_event=1) + #print([i[0] for i in mne.pick_events(events_, include=2)]) + #print(mne.pick_events(events_, include=3)) + cleaned_raws.append(raw) + + raw = mne.io.concatenate_raws(raws=cleaned_raws, preload=True) + raw_events = mne.find_events(raw, stim_channel='STI101', shortest_event=1) #, uint_cast=True) + + """print(f"{Fore.GREEN}{Style.BRIGHT}...finding visual events{Style.RESET_ALL}") + + # Extract visual events + visual_events = mne.pick_events(raw_events, include=[2, 3]) + + # Correct for visual equiptment latency error + visual_events = mne.event.shift_time_events(visual_events, [2, 3], visual_delivery_latency, 1) + + trigger_name = 1 + for trigger in visual_events: + trigger[2] = trigger_name # rename as '1,2,3 ...400' etc + if trigger_name == 400: + trigger_name = 1 + else: + trigger_name = trigger_name + 1 + + # Test there are the correct number of events + assert visual_events.shape[0] == repetitions_per_runs * number_of_runs * number_of_trials""" + + print(f"{Fore.GREEN}{Style.BRIGHT}...finding audio events{Style.RESET_ALL}") + + # Extract audio events + audio_events_raw = mne.pick_events(raw_events, include=3) + + # Correct for audio latency error + audio_events_raw = mne.event.shift_time_events(audio_events_raw, [3], audio_delivery_latency, 1) + + audio_events = np.zeros((len(audio_events_raw) * 400, 3), dtype=int) + for run, item in enumerate(audio_events_raw): + print('item: ', item) + audio_events_raw[run][2] = run # rename the audio events raw to pick apart later + for trial in range(number_of_trials): + audio_events[(trial + (number_of_trials * run))][0] = item[0] + round( + trial * (1000 + audio_delivery_shift_correction)) + audio_events[(trial + (number_of_trials * run))][1] = 0 + audio_events[(trial + (number_of_trials * run))][2] = trial + + # Test there are the correct number of events + assert audio_events.shape[0] == repetitions_per_runs * number_of_runs * number_of_trials # TODO handle overlapping visual/audio triggers + + print(f'\n Runs found: {audio_events.shape[0]} \n') + + # Denote picks + include = []; # ['MISC006'] # MISC05, trigger channels etc, if needed + picks = mne.pick_types(raw.info, meg=True, eeg=True, stim=False, exclude='bads', include=include) + + print(f"{Fore.GREEN}{Style.BRIGHT}... extract and save evoked data{Style.RESET_ALL}") + + for input_stream in input_streams: # TODO n.b. not setup for visual/tactile stream yet + if input_stream == 'auditory': + events = audio_events + else: + events = visual_events + + # Extract trial instances ('epochs') + + _tmin = -0.2 + _tmax = 400 + 0.8 + 2 # Extra space to account for audio file latency correction + epochs = mne.Epochs(raw, audio_events_raw, None, _tmin, _tmax, picks=picks, + baseline=(None, None), reject=dict(eeg=eeg_thresh, grad=grad_thresh, mag=mag_thresh), + preload=True) + + # Log which channels are worst + dropfig = epochs.plot_drop_log(subject=p) + dropfig.savefig(f'{data_path}/intrim_preprocessing_files/{save_folder}/logs/f{input_stream}_drop-log_{p}.jpg') + + global_droplog.append(f'[{input_stream}]{p}:{epochs.drop_log_stats(epochs.drop_log)}') + + for i in range(len(audio_events_raw)): + evoked = epochs[str(i)].average() + evoked.save(f'{data_path}/intrim_preprocessing_files/{save_folder}/evoked_data/{p}_rep{i}.fif', overwrite=True) + + evoked = epochs.average() + evoked.save(f'{data_path}/intrim_preprocessing_files/{save_folder}/evoked_data/{p}-ave.fif', overwrite=True) + + + def create_trials(dataset_directory_name: str, list_of_participants: list[str], repetitions_per_runs: int, From 39a19bfed7202fcf7a6c53983627d527df0ab1c4 Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Thu, 4 Jan 2024 13:48:24 +0000 Subject: [PATCH 11/25] MVP --- invokers/run_gridsearch.py | 105 +++++++++++++++------ kymata/entities/functions.py | 2 +- kymata/gridsearch/gridsearch.py | 156 +++++++++++++++++++++++--------- kymata/io/functions.py | 12 ++- kymata/io/mne.py | 61 +++++++++++-- kymata/math/combinatorics.py | 7 +- 6 files changed, 250 insertions(+), 93 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index 73ffb87e..7f79bf63 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -1,55 +1,100 @@ from pathlib import Path +import sys +sys.path.append('/imaging/projects/cbu/kymata/analyses/ollie/kymata-toolbox') + import numpy as np +import argparse from numpy.typing import NDArray from kymata.gridsearch.gridsearch import do_gridsearch from kymata.io.functions import load_function -from kymata.io.mne import get_emeg_data -from kymata.plot.plotting import expression_plot +from kymata.io.mne import load_emeg_pack +# from kymata.plot.plotting import expression_plot def main(): - downsample_rate = 5 - function_name = 'd_IL2' - base_dir = Path("/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/") + parser = argparse.ArgumentParser(description='Gridsearch Params') + parser.add_argument('--emeg_sample_rate', type=int, default=1000, + help='sampling rate of the emeg machine (always 1000 I thought?)') + parser.add_argument('--snr', type=float, default=3, + help='inverse solution snr') + parser.add_argument('--downsample_rate', type=int, default=5, + help='downsample_rate') + parser.add_argument('--base_dir', type=str, default="/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/", + help='base data directory') + parser.add_argument('--data_path', type=str, default="intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data", + help='data path after base dir') + parser.add_argument('--function_path', type=str, default="predicted_function_contours/GMSloudness/stimulisig", + help='snr') + parser.add_argument('--function_name', type=str, default="d_IL2", + help='function name in stimulisig') + parser.add_argument('--emeg_file', type=str, default="participant_01-ave", + help='emeg_file_name') + parser.add_argument('--ave_mode', type=str, default="ave", + help='either ave or add, either average over the list of repetitions or treat them as extra data') + parser.add_argument('--inverse_operator', type=str, default="intrim_preprocessing_files/4_hexel_current_reconstruction/inverse-operators", + help='inverse solution path') + parser.add_argument('--seconds_per_split', type=float, default=0.5, + help='seconds in each split of the recording, also maximum range of latencies being checked') + parser.add_argument('--n_splits', type=int, default=800, + help='number of splits to split the recording into, (set to 400/seconds_per_split for full file)') + parser.add_argument('--n_derangements', type=int, default=1, + help='inverse solution snr') + parser.add_argument('--start_latency', type=float, default=-100, + help='earliest latency to check in cross correlation') + parser.add_argument('--emeg_t_start', type=float, default=-200, + help='start of the emeg evoked files relative to the start of the function') + parser.add_argument('--audio_shift_correction', type=float, default=0.000_537_5, + help='audio shift correction, for every second of function, add this number of seconds (to the start of the emeg split) per seconds of emeg seen') + args = parser.parse_args() + args.base_dir = Path(args.base_dir) + + - func = load_function(Path(base_dir, - 'predicted_function_contours', - 'GMSloudness', - 'stimulisig'), + emeg_dir = Path(args.base_dir, args.data_path) + emeg_paths = [Path(emeg_dir, args.emeg_file)] - func_name=function_name) - func = func.downsampled(downsample_rate) + """participants = ['participant_01', + 'participant_01b', + 'participant_02', + 'participant_03', + 'participant_04', + 'participant_05', + 'pilot_01', + 'pilot_02'] - emeg_dir = Path(base_dir, - "intrim_preprocessing_files", - "3_trialwise_sensorspace", - "evoked_data") - emeg_filename = 'participant_01-ave' + reps = [f'_rep{i}' for i in range(8)] + ['-ave']""" + + emeg_paths = [Path(emeg_dir, p + r) for p in participants[:1] for r in reps[:2]] # Load data - emeg_path_npy = Path(emeg_dir, f"{emeg_filename}.npy") - emeg_path_fif = Path(emeg_dir, f"{emeg_filename}.fif") - try: - ch_names: list[str] = [] # TODO: we'll need these - emeg: NDArray = np.load(emeg_path_npy) - except FileNotFoundError: - emeg, ch_names = get_emeg_data(emeg_path_fif) - np.save(emeg_path_npy, np.array(emeg, dtype=np.float16)) + emeg, ch_names = load_emeg_pack(emeg_paths, + need_names=False, + ave_mode=args.ave_mode, + inverse_operator=None, #args.inverse_operator, + p_tshift=None, + snr=args.snr) + + func = load_function(Path(args.base_dir, args.function_path), + func_name=args.function_name) + func = func.downsampled(args.downsample_rate) es = do_gridsearch( emeg_values=emeg, sensor_names=ch_names, function=func, - seconds_per_split=0.5, - n_derangements=1, - n_splits=800, - start_latency=-100, - emeg_t_start=-200, + seconds_per_split=args.seconds_per_split, + n_derangements=args.n_derangements, + n_splits=args.n_splits, + start_latency=args.start_latency, + emeg_t_start=args.emeg_t_start, + emeg_sample_rate=args.emeg_sample_rate, + audio_shift_correction=args.audio_shift_correction, + ave_mode=args.ave_mode, ) - expression_plot(es) + # expression_plot(es) if __name__ == '__main__': diff --git a/kymata/entities/functions.py b/kymata/entities/functions.py index 47edb775..cba27b7b 100644 --- a/kymata/entities/functions.py +++ b/kymata/entities/functions.py @@ -12,7 +12,7 @@ class Function: def downsampled(self, rate: int): return Function( name=self.name, - values=self.values[:, ::rate], + values=self.values[::rate], sample_rate=self.sample_rate / rate, ) diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index 9ab0987f..8ef0858a 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -5,22 +5,24 @@ from kymata.entities.functions import Function from kymata.math.combinatorics import generate_derangement from kymata.math.vector import normalize -from kymata.entities.expression import SensorExpressionSet, p_to_logp - +#from kymata.entities.expression import SensorExpressionSet, p_to_logp +import matplotlib.pyplot as plt def do_gridsearch( emeg_values: NDArray, # chan x time function: Function, sensor_names: list[str], - start_latency: float, # seconds - emeg_t_start: float, + start_latency: float, # ms + emeg_t_start: float, # ms emeg_sample_rate: int = 1000, # Hertz audio_shift_correction: float = 0.000_537_5, # seconds/second # TODO: describe in which direction? - # TODO: what are good default values? - n_derangements: int = 20, + n_derangements: int = 1, seconds_per_split: float = 0.5, n_splits: int = 800, - ) -> SensorExpressionSet: + ave_mode: str = 'ave', # either ave or add, for averaging over input files or adding in as extra evidence + add_autocorr: bool = True, + plot_name: str = 'example' + ): """ Do the Kymata gridsearch over all hexels for all latencies. """ @@ -28,55 +30,119 @@ def do_gridsearch( # We'll need to downsample the EMEG to match the function's sample rate downsample_rate: int = int(emeg_sample_rate / function.sample_rate) - n_samples_per_split = int( - seconds_per_split * emeg_sample_rate - * 2 # We need double the length so we can do the full cross-correlation overlap - // downsample_rate) + n_samples_per_split = int(seconds_per_split * emeg_sample_rate * 2 // downsample_rate) + + if ave_mode == 'add': + n_reps = len(EMEG_paths) + else: + n_reps = 1 - func = function.values.reshape(n_splits, n_samples_per_split // 2) + func_length = n_splits * n_samples_per_split // 2 + if func_length < function.values.shape[0]: + func = function.values[:func_length].reshape(n_splits, n_samples_per_split // 2) + print(f'WARNING: not using full 400s of the file (only using {round(n_splits * seconds_per_split, 2)}s)') + else: + func = function.values.reshape(n_splits, n_samples_per_split // 2) n_channels = emeg_values.shape[0] # Reshape EMEG into splits of `seconds_per_split` s - split_initial_timesteps = [ - start_latency - + round(i * seconds_per_split - # Correct for audio drift in delivery equipment - * (1 + audio_shift_correction) - ) - - emeg_t_start - for i in range(n_splits) - ] - emeg_reshaped = np.zeros((n_channels, n_splits, n_samples_per_split)) - for split_i in range(n_splits): - # Split indexes - split_start = split_initial_timesteps[split_i] - split_stop = split_start + int(2 * emeg_sample_rate * seconds_per_split) - emeg_reshaped[:, split_i, :] = emeg_values[:, split_start:split_stop:downsample_rate] + split_initial_timesteps = [start_latency + round(i * 1000 * seconds_per_split * (1 + audio_shift_correction)) - emeg_t_start + for i in range(n_splits)] + + emeg_reshaped = np.zeros((n_channels, n_splits * n_reps, n_samples_per_split)) + for j in range(n_reps): + for split_i in range(n_splits): + split_start = split_initial_timesteps[split_i] + split_stop = split_start + int(2 * emeg_sample_rate * seconds_per_split) + emeg_reshaped[:, split_i + (j * n_splits), :] = emeg_values[:, j, split_start:split_stop:downsample_rate] + + del emeg_values # Derangements for null distribution - derangements = np.zeros((n_derangements, n_splits), dtype=int) + derangements = np.zeros((n_derangements, n_splits * n_reps), dtype=int) for der_i in range(n_derangements): - derangements[der_i, :] = generate_derangement(n_splits) - derangements = np.vstack((np.arange(n_splits), derangements)) # Include the identity on top + derangements[der_i, :] = generate_derangement(n_splits * n_reps, n_splits) + derangements = np.vstack((np.arange(n_splits * n_reps), derangements)) # Include the identity on top # Fast cross-correlation using FFT emeg_reshaped = np.fft.rfft(normalize(emeg_reshaped), n=n_samples_per_split, axis=-1) - f_func = np.conj(np.fft.rfft(normalize(func), n=n_samples_per_split, axis=-1)) - corrs = np.zeros((n_channels, n_derangements + 1, n_splits, n_samples_per_split // 2)) + F_func = np.conj(np.fft.rfft(normalize(func), n=n_samples_per_split, axis=-1)) + corrs = np.zeros((n_channels, n_derangements + 1, n_splits * n_reps, n_samples_per_split // 2)) for der_i, derangement in enumerate(derangements): deranged_emeg = emeg_reshaped[:, derangement, :] - corrs[:, der_i] = np.fft.irfft(deranged_emeg * f_func)[:, :, :n_samples_per_split//2] - - p_values = _ttest(corrs) - - latencies = np.linspace(start_latency, start_latency + seconds_per_split, n_samples_per_split // 2) / 1000 - - es = SensorExpressionSet( + corrs[:, der_i] = np.fft.irfft(deranged_emeg * F_func)[:, :, :n_samples_per_split//2] + + + if add_autocorr: + auto_corrs = np.zeros((n_splits, n_samples_per_split//2)) + noise = normalize(np.random.randn(func.shape[0], func.shape[1])) * 0 + noisy_func = normalize(np.copy(func)) + noise + nn = n_samples_per_split // 2 + + F_noisy_func = np.fft.rfft(normalize(noisy_func), n=nn, axis=-1) + F_func = np.conj(np.fft.rfft(normalize(func), n=nn, axis=-1)) + + auto_corrs = np.fft.irfft(F_noisy_func * F_func) + + del F_func, deranged_emeg, emeg_reshaped + + log_pvalues = _ttest(corrs) + + latencies = np.linspace(start_latency, start_latency + seconds_per_split * 1000, n_samples_per_split // 2) / 1000 + + if plot_name: + plt.figure(1) + maxs = np.max(-log_pvalues[:, :], axis=1) + n_amaxs = 5 + amaxs = np.argpartition(maxs, -n_amaxs)[-n_amaxs:] + amax = np.argmax(-log_pvalues) // (n_samples_per_split // 2) + amaxs = [i for i in amaxs if i != amax] + [206] + + plt.plot(latencies, np.mean(corrs[amax, 0], axis=-2).T, 'r-', label=amax) + plt.plot(latencies, np.mean(corrs[amaxs, 0], axis=-2).T, label=amaxs) + std_null = np.mean(np.std(corrs[:, 1], axis=-2), axis=0).T * 3 / np.sqrt(n_reps * n_splits) # 3 pop std.s + std_real = np.std(corrs[amax, 0], axis=-2).T * 3 / np.sqrt(n_reps * n_splits) + av_real = np.mean(corrs[amax, 0], axis=-2).T + #print(std_null) + plt.fill_between(latencies, -std_null, std_null, alpha=0.5, color='grey') + plt.fill_between(latencies, av_real - std_real, av_real + std_real, alpha=0.25, color='red') + + if add_autocorr: + peak_lat_ind = np.argmax(-log_pvalues) % (n_samples_per_split // 2) + peak_lat = latencies[peak_lat_ind] + peak_corr = np.mean(corrs[amax, 0], axis=-2)[peak_lat_ind] + print('peak lat, peak corr:', peak_lat, peak_corr) + + auto_corrs = np.mean(auto_corrs, axis=0) + plt.plot(latencies, np.roll(auto_corrs, peak_lat_ind) * peak_corr / np.max(auto_corrs), 'k--', label='func auto-corr') + + + #plt.plot(latencies, np.mean(corrs[0:300:15, 1], axis=-2).T, 'cyan') + plt.axvline(0, color='k') + plt.legend() + plt.xlabel('latencies (ms)') + plt.ylabel('Corr coef.') + plt.savefig(f'{plot_name}_1.png') + plt.clf() + + plt.figure(2) + plt.plot(latencies, -log_pvalues[amax].T / np.log(10), 'r-', label=amax) + plt.plot(latencies, -log_pvalues[amaxs].T / np.log(10), label=amaxs) + plt.axvline(0, color='k') + plt.legend() + plt.xlabel('latencies (ms)') + plt.ylabel('p-values') + plt.savefig(f'{plot_name}_2.png') + plt.clf() + + return + + """es = SensorExpressionSet( functions=function.name, latencies=latencies, sensors=sensor_names, data=p_to_logp(p_values), - ) + )""" return es @@ -114,9 +180,11 @@ def _ttest(corrs: NDArray, use_all_lats: bool = True): (rand_var / rand_n) ** 2 / (rand_n - 1))) t_stat = numerator / denominator - p = stats.t.sf(np.abs(t_stat), df) * 2 # two-tailed p-value - # Adjust p-values for multiple comparisons (Bonferroni correction) [NOT SURE ABOUT THIS] - pvalues_adj = p #np.minimum(1, p * t_steps * n_channels / (1 - f_alpha)) + if np.min(df) <= 300: + log_p = np.log(stats.t.sf(np.abs(t_stat), df) * 2) # two-tailed p-value + else: + # norm v good approx for this, (logsf for t not implemented in logspace) + log_p = stats.norm.logsf(np.abs(t_stat)) + np.log(2) - return pvalues_adj + return log_p / np.log(10) # log base correction diff --git a/kymata/io/functions.py b/kymata/io/functions.py index f1959a2a..549232e1 100644 --- a/kymata/io/functions.py +++ b/kymata/io/functions.py @@ -1,6 +1,6 @@ from pathlib import Path -from numpy import array, float16 +from numpy import array, float16, convolve from numpy.typing import NDArray from h5py import File from scipy.io import loadmat @@ -9,12 +9,12 @@ from kymata.io.file import path_type -def load_function(function_path_without_suffix: path_type, func_name: str) -> Function: +def load_function(function_path_without_suffix: path_type, func_name: str, n_derivatives: int = 0) -> Function: function_path_without_suffix = Path(function_path_without_suffix) func: NDArray if function_path_without_suffix.with_suffix(".h5").exists(): with File(function_path_without_suffix.with_suffix(".h5"), "r") as f: - func = f[func_name] + func = array(f[func_name]) else: mat = loadmat(str(function_path_without_suffix.with_suffix(".mat")))['stimulisig'] with File(function_path_without_suffix.with_suffix(".h5"), 'w') as f: @@ -24,6 +24,12 @@ def load_function(function_path_without_suffix: path_type, func_name: str) -> Fu f.create_dataset(key, data=array(mat[key][0, 0], dtype=float16)) func = array(mat[func_name][0][0]) + if func_name in ('STL', 'IL', 'LTL'): + func = func.T + + for _ in range(n_derivatives): + func = convolve(func, [-1, 1], 'same') # derivative + return Function( name=func_name, values=func.flatten().squeeze(), diff --git a/kymata/io/mne.py b/kymata/io/mne.py index f3fcff1c..8862363f 100644 --- a/kymata/io/mne.py +++ b/kymata/io/mne.py @@ -1,19 +1,60 @@ from pathlib import Path -from mne import read_evokeds -from numpy import max +from mne import read_evokeds, minimum_norm, set_eeg_reference +import numpy as np from numpy.typing import NDArray +from os.path import isfile from kymata.io.file import path_type -def get_emeg_data(emeg_path: path_type) -> tuple[NDArray, list[str]]: - """Gets EMEG data from MNE format.""" - emeg_path = Path(emeg_path) - evoked = read_evokeds(emeg_path, verbose=False) # should be len 1 list - emeg = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 403_001) +def load_single_emeg(emeg_path, need_names=False, inverse_operator=None, snr=4): + emeg_path_npy = f"{emeg_path}.npy" + emeg_path_fif = f"{emeg_path}.fif" + if isfile(emeg_path_npy) and (not need_names) and (inverse_operator is None): + ch_names: list[str] = [] # TODO: we'll need these + emeg: NDArray = np.load(emeg_path_npy) + else: + evoked = read_evokeds(emeg_path_fif, verbose=False) # should be len 1 list + if inverse_operator is not None: + lh_emeg, rh_emeg, ch_names = inverse_operate(evoked, inverse_operator, snr) + # TODO: I think ch_names here is the wrong thing + emeg = np.concatenate((lh_emeg, rh_emeg), axis=0) + # TODO: test this, also OOM potentially + del lh_emeg, rh_emeg + else: + emeg = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 403_001) + ch_names = evoked[0].ch_names + emeg /= np.max(emeg, axis=1, keepdims=True) + if not isfile(emeg_path_npy): + np.save(emeg_path_npy, np.array(emeg, dtype=np.float16)) + del evoked + return emeg, ch_names + +def inverse_operate(evoked, inverse_operator, snr=4): + lambda2 = 1.0 / snr ** 2 + inverse_operator = minimum_norm.read_inverse_operator(inverse_operator) #, verbose=False) + set_eeg_reference(evoked, projection=True) #, verbose=False) + stc = minimum_norm.apply_inverse(evoked, inverse_operator, lambda2, 'MNE', pick_ori='normal') #, verbose=False) + return np.expand_dims(stc.lh_data, 1), np.expand_dims(stc.rh_data, 1), evoked.ch_names + +def load_emeg_pack(emeg_paths, need_names=False, ave_mode=None, inverse_operator=None, p_tshift=None, snr=4): # TODO: FIX PRE-AVE-NORMALISATION + if p_tshift is None: + p_tshift = [0]*len(emeg_paths) + emeg, emeg_names = load_single_emeg(emeg_paths[0], need_names, inverse_operator, snr) + emeg = emeg[:,p_tshift[0]:402001 + p_tshift[0]] + emeg = np.expand_dims(emeg, 1) + if ave_mode == 'add': + for i in range(1, len(emeg_paths)): + t_shift = p_tshift[i] + new_emeg = load_single_emeg(emeg_paths[i], need_names, inverse_operator, snr)[0][:,t_shift:402001 + t_shift] + emeg = np.concatenate((emeg, np.expand_dims(new_emeg, 1)), axis=1) + elif ave_mode == 'ave': + for i in range(1, len(emeg_paths)): + t_shift = p_tshift[i] + emeg += np.expand_dims(load_single_emeg(emeg_paths[i], need_names, inverse_operator, snr)[0][:,t_shift:402001 + t_shift], 1) + elif len(emeg_paths) > 1: + raise NotImplementedError(f'ave_mode "{ave_mode}" not known') + return emeg, emeg_names - # Normalise - emeg /= max(emeg, axis=1, keepdims=True) - return emeg, evoked[0].ch_names diff --git a/kymata/math/combinatorics.py b/kymata/math/combinatorics.py index 256640fd..22343425 100644 --- a/kymata/math/combinatorics.py +++ b/kymata/math/combinatorics.py @@ -2,15 +2,12 @@ from numpy.random import randint -def generate_derangement(n): # approx 3ms runtime for n=400 - """ - Generates a derangement (permutation with no fixed point) of size `n`. - """ +def generate_derangement(n, mod=int(1e9)): # approx 3ms runtime for n=400 while True: v = arange(n) for j in range(n - 1, -1, -1): p = randint(0, j + 1) - if v[p] == j: + if v[p] % mod == j % mod: break else: v[j], v[p] = v[p], v[j] From d813bbc7929a338ef6988416380b673db3e2635f Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Thu, 4 Jan 2024 16:22:38 +0000 Subject: [PATCH 12/25] inv operator now almost working, also added slurm bash script for running on the queue (better when using downsample=1 or inv_op) --- kymata/gridsearch/gridsearch.py | 12 ++++++------ kymata/io/mne.py | 15 +++++++++------ submit_gridsearch.sh | 26 ++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 12 deletions(-) create mode 100755 submit_gridsearch.sh diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index 8ef0858a..22ec17ba 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -88,7 +88,7 @@ def do_gridsearch( log_pvalues = _ttest(corrs) - latencies = np.linspace(start_latency, start_latency + seconds_per_split * 1000, n_samples_per_split // 2) / 1000 + latencies = np.linspace(start_latency, start_latency + (seconds_per_split * 1000), n_samples_per_split // 2 + 1)[:-1] if plot_name: plt.figure(1) @@ -120,17 +120,17 @@ def do_gridsearch( #plt.plot(latencies, np.mean(corrs[0:300:15, 1], axis=-2).T, 'cyan') plt.axvline(0, color='k') plt.legend() - plt.xlabel('latencies (ms)') + plt.xlabel('latencies (s)') plt.ylabel('Corr coef.') plt.savefig(f'{plot_name}_1.png') plt.clf() plt.figure(2) - plt.plot(latencies, -log_pvalues[amax].T / np.log(10), 'r-', label=amax) - plt.plot(latencies, -log_pvalues[amaxs].T / np.log(10), label=amaxs) + plt.plot(latencies, -log_pvalues[amax].T, 'r-', label=amax) + plt.plot(latencies, -log_pvalues[amaxs].T, label=amaxs) plt.axvline(0, color='k') plt.legend() - plt.xlabel('latencies (ms)') + plt.xlabel('latencies (s)') plt.ylabel('p-values') plt.savefig(f'{plot_name}_2.png') plt.clf() @@ -139,7 +139,7 @@ def do_gridsearch( """es = SensorExpressionSet( functions=function.name, - latencies=latencies, + latencies=latencies / 1000, sensors=sensor_names, data=p_to_logp(p_values), )""" diff --git a/kymata/io/mne.py b/kymata/io/mne.py index 8862363f..e157ae21 100644 --- a/kymata/io/mne.py +++ b/kymata/io/mne.py @@ -18,10 +18,13 @@ def load_single_emeg(emeg_path, need_names=False, inverse_operator=None, snr=4): else: evoked = read_evokeds(emeg_path_fif, verbose=False) # should be len 1 list if inverse_operator is not None: - lh_emeg, rh_emeg, ch_names = inverse_operate(evoked, inverse_operator, snr) + lh_emeg, rh_emeg, ch_names = inverse_operate(evoked[0], inverse_operator, snr) # TODO: I think ch_names here is the wrong thing emeg = np.concatenate((lh_emeg, rh_emeg), axis=0) - # TODO: test this, also OOM potentially + # TODO: currently this goes OOM (node-h04 atleast): + # looks like this will be faster when split up anyway + # note, don't run the inv_op twice for rh and lh! + # TODO: move inverse operator to run after EMEG channel combination del lh_emeg, rh_emeg else: emeg = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 403_001) @@ -34,10 +37,10 @@ def load_single_emeg(emeg_path, need_names=False, inverse_operator=None, snr=4): def inverse_operate(evoked, inverse_operator, snr=4): lambda2 = 1.0 / snr ** 2 - inverse_operator = minimum_norm.read_inverse_operator(inverse_operator) #, verbose=False) - set_eeg_reference(evoked, projection=True) #, verbose=False) - stc = minimum_norm.apply_inverse(evoked, inverse_operator, lambda2, 'MNE', pick_ori='normal') #, verbose=False) - return np.expand_dims(stc.lh_data, 1), np.expand_dims(stc.rh_data, 1), evoked.ch_names + inverse_operator = minimum_norm.read_inverse_operator(inverse_operator, verbose=False) + set_eeg_reference(evoked, projection=True, verbose=False) + stc = minimum_norm.apply_inverse(evoked, inverse_operator, lambda2, 'MNE', pick_ori='normal', verbose=False) + return stc.lh_data, stc.rh_data, evoked.ch_names def load_emeg_pack(emeg_paths, need_names=False, ave_mode=None, inverse_operator=None, p_tshift=None, snr=4): # TODO: FIX PRE-AVE-NORMALISATION if p_tshift is None: diff --git a/submit_gridsearch.sh b/submit_gridsearch.sh new file mode 100755 index 00000000..41763242 --- /dev/null +++ b/submit_gridsearch.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +### +# To run gridsearch on the queue at the CBU, run the following command in command line: +# sbatch submit_gridsearch.sh +### + + +#SBATCH --job-name=gridsearch +#SBATCH --output=slurm_log.txt +#SBATCH --error=slurm_log.txt +#SBATCH --ntasks=1 +#SBATCH --time=05:00:00 +#SBATCH --mem=1000 +#SBATCH --array=1-1 +#SBATCH --exclusive + +conda activate mne_venv + +args=(5) # 2 3 4 5 6 7 8 9 10) +ARG=${args[$SLURM_ARRAY_TASK_ID - 1]} + +python invokers/run_gridsearch.py + # --snr $ARG # >> result3.txt + +conda deactivate From 3b25a1f4dc2f36605ae7e760394c5e57712e0263 Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Thu, 4 Jan 2024 16:42:20 +0000 Subject: [PATCH 13/25] inv_op works without going OOM (on the queue) --- kymata/io/mne.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kymata/io/mne.py b/kymata/io/mne.py index e157ae21..bd4e1525 100644 --- a/kymata/io/mne.py +++ b/kymata/io/mne.py @@ -20,11 +20,15 @@ def load_single_emeg(emeg_path, need_names=False, inverse_operator=None, snr=4): if inverse_operator is not None: lh_emeg, rh_emeg, ch_names = inverse_operate(evoked[0], inverse_operator, snr) # TODO: I think ch_names here is the wrong thing - emeg = np.concatenate((lh_emeg, rh_emeg), axis=0) + + emeg = None #np.concatenate((lh_emeg, rh_emeg), axis=0) + # TODO: currently this goes OOM (node-h04 atleast): # looks like this will be faster when split up anyway # note, don't run the inv_op twice for rh and lh! # TODO: move inverse operator to run after EMEG channel combination + + emeg = lh_emeg del lh_emeg, rh_emeg else: emeg = evoked[0].get_data() # numpy array shape (sensor_num, N) = (370, 403_001) From b0e5e5a56bb5d242c39e545c91dbb39745f2e99c Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Thu, 4 Jan 2024 16:51:14 +0000 Subject: [PATCH 14/25] run_gridsearch works immediately --- invokers/run_gridsearch.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index 7f79bf63..54ab205f 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -64,15 +64,17 @@ def main(): 'pilot_01', 'pilot_02'] - reps = [f'_rep{i}' for i in range(8)] + ['-ave']""" + reps = [f'_rep{i}' for i in range(8)] + ['-ave'] - emeg_paths = [Path(emeg_dir, p + r) for p in participants[:1] for r in reps[:2]] + emeg_paths = [Path(emeg_dir, p + r) for p in participants[1:2] for r in reps[:1]]""" + + inverse_operator = Path(args.base_dir, args.inverse_operator, f"{participants[0]}_ico5-3L-loose02-cps-nodepth.fif") # Load data emeg, ch_names = load_emeg_pack(emeg_paths, need_names=False, ave_mode=args.ave_mode, - inverse_operator=None, #args.inverse_operator, + inverse_operator=None, #inverse_operator, # set to None/inverse_operator if you want to run on sensor space/source space p_tshift=None, snr=args.snr) From be6d0792f88e375f89e15214aaf0055cca39eda0 Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Thu, 4 Jan 2024 16:57:15 +0000 Subject: [PATCH 15/25] run_gridsearch works immediately --- invokers/run_gridsearch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index 54ab205f..310ffdf4 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -55,7 +55,7 @@ def main(): emeg_dir = Path(args.base_dir, args.data_path) emeg_paths = [Path(emeg_dir, args.emeg_file)] - """participants = ['participant_01', + participants = ['participant_01', 'participant_01b', 'participant_02', 'participant_03', @@ -66,7 +66,7 @@ def main(): reps = [f'_rep{i}' for i in range(8)] + ['-ave'] - emeg_paths = [Path(emeg_dir, p + r) for p in participants[1:2] for r in reps[:1]]""" + # emeg_paths = [Path(emeg_dir, p + r) for p in participants[1:2] for r in reps[:1]] inverse_operator = Path(args.base_dir, args.inverse_operator, f"{participants[0]}_ico5-3L-loose02-cps-nodepth.fif") From 92301d0a9dd18abe0a33904055054f17c2516c6a Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Thu, 4 Jan 2024 17:12:51 +0000 Subject: [PATCH 16/25] cosmetic changes --- kymata/gridsearch/gridsearch.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index 22ec17ba..1f6bb492 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -46,7 +46,7 @@ def do_gridsearch( n_channels = emeg_values.shape[0] # Reshape EMEG into splits of `seconds_per_split` s - split_initial_timesteps = [start_latency + round(i * 1000 * seconds_per_split * (1 + audio_shift_correction)) - emeg_t_start + split_initial_timesteps = [int(start_latency + round(i * 1000 * seconds_per_split * (1 + audio_shift_correction)) - emeg_t_start) for i in range(n_splits)] emeg_reshaped = np.zeros((n_channels, n_splits * n_reps, n_samples_per_split)) @@ -116,8 +116,6 @@ def do_gridsearch( auto_corrs = np.mean(auto_corrs, axis=0) plt.plot(latencies, np.roll(auto_corrs, peak_lat_ind) * peak_corr / np.max(auto_corrs), 'k--', label='func auto-corr') - - #plt.plot(latencies, np.mean(corrs[0:300:15, 1], axis=-2).T, 'cyan') plt.axvline(0, color='k') plt.legend() plt.xlabel('latencies (s)') From 049ba7bc9e2f98d7c74de9819c4436d6b2ab99e0 Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Fri, 5 Jan 2024 13:04:44 +0000 Subject: [PATCH 17/25] create_trialwise_sensorspace working now --- invokers/run_preprocessing_dataset4.py | 9 ++++++--- kymata/gridsearch/gridsearch.py | 2 +- kymata/preproc/pipeline.py | 7 +++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/invokers/run_preprocessing_dataset4.py b/invokers/run_preprocessing_dataset4.py index 40f23ec2..ab594a77 100644 --- a/invokers/run_preprocessing_dataset4.py +++ b/invokers/run_preprocessing_dataset4.py @@ -1,12 +1,15 @@ from pathlib import Path +import sys +sys.path.append('/imaging/projects/cbu/kymata/analyses/ollie/kymata-toolbox') + from kymata.io.yaml import load_config from kymata.preproc.pipeline import run_preprocessing, create_trials, create_trialwise_data # noinspection DuplicatedCode def main(): - config = load_config(str(Path(Path(__file__).parent, "kymata", "config", "dataset4.yaml"))) + config = load_config(str(Path(Path(__file__).parent.parent, "kymata", "config", "dataset4.yaml"))) """create_trials( dataset_directory_name=config['dataset_directory_name'], @@ -25,7 +28,7 @@ def main(): tmax=config['tmax'], )""" - run_preprocessing( + """run_preprocessing( list_of_participants=config['list_of_participants'], dataset_directory_name=config['dataset_directory_name'], n_runs=config['number_of_runs'], @@ -34,7 +37,7 @@ def main(): skip_maxfilter_if_previous_runs_exist=config['skip_maxfilter_if_previous_runs_exist'], remove_veoh_and_heog=config['remove_VEOH_and_HEOG'], automatic_bad_channel_detection_requested=config['automatic_bad_channel_detection_requested'], - ) + )""" create_trialwise_data( dataset_directory_name=config['dataset_directory_name'], diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index 1f6bb492..7d75de25 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -111,7 +111,7 @@ def do_gridsearch( peak_lat_ind = np.argmax(-log_pvalues) % (n_samples_per_split // 2) peak_lat = latencies[peak_lat_ind] peak_corr = np.mean(corrs[amax, 0], axis=-2)[peak_lat_ind] - print('peak lat, peak corr:', peak_lat, peak_corr) + print(f'{function.name}: peak lat, peak corr:', peak_lat, peak_corr) auto_corrs = np.mean(auto_corrs, axis=0) plt.plot(latencies, np.roll(auto_corrs, peak_lat_ind) * peak_corr / np.max(auto_corrs), 'k--', label='func auto-corr') diff --git a/kymata/preproc/pipeline.py b/kymata/preproc/pipeline.py index 5167d6d1..5e3a1dd1 100755 --- a/kymata/preproc/pipeline.py +++ b/kymata/preproc/pipeline.py @@ -1,6 +1,6 @@ import os.path -from colorama import Fore +from colorama import Fore, Style import mne from numpy import zeros @@ -259,6 +259,9 @@ def create_trialwise_data(dataset_directory_name: str, global_droplog = [] + data_path = '/imaging/projects/cbu/kymata/data/' + dataset_directory_name + save_folder = '3_trialwise_sensorspace' + print(f"{Fore.GREEN}{Style.BRIGHT}Starting trials and {Style.RESET_ALL}") for p in list_of_participants: @@ -306,7 +309,7 @@ def create_trialwise_data(dataset_directory_name: str, # Correct for audio latency error audio_events_raw = mne.event.shift_time_events(audio_events_raw, [3], audio_delivery_latency, 1) - audio_events = np.zeros((len(audio_events_raw) * 400, 3), dtype=int) + audio_events = zeros((len(audio_events_raw) * 400, 3), dtype=int) for run, item in enumerate(audio_events_raw): print('item: ', item) audio_events_raw[run][2] = run # rename the audio events raw to pick apart later From 7f7207179ac65635826bfa3538f8cedfafaad2cf Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Tue, 9 Jan 2024 19:23:30 +0000 Subject: [PATCH 18/25] fixed a normalisation error in corr calculation, added bruce model as possible test function --- invokers/run_gridsearch.py | 14 ++++++---- kymata/gridsearch/gridsearch.py | 26 +++++++++--------- kymata/io/functions.py | 47 ++++++++++++++++++++++----------- kymata/math/vector.py | 18 ++++++++++--- 4 files changed, 70 insertions(+), 35 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index 310ffdf4..f465a575 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -16,7 +16,7 @@ def main(): parser = argparse.ArgumentParser(description='Gridsearch Params') parser.add_argument('--emeg_sample_rate', type=int, default=1000, - help='sampling rate of the emeg machine (always 1000 I thought?)') + help='sampling rate of the emeg machine (not implemented yet)') parser.add_argument('--snr', type=float, default=3, help='inverse solution snr') parser.add_argument('--downsample_rate', type=int, default=5, @@ -51,7 +51,6 @@ def main(): args.base_dir = Path(args.base_dir) - emeg_dir = Path(args.base_dir, args.data_path) emeg_paths = [Path(emeg_dir, args.emeg_file)] @@ -66,10 +65,15 @@ def main(): reps = [f'_rep{i}' for i in range(8)] + ['-ave'] - # emeg_paths = [Path(emeg_dir, p + r) for p in participants[1:2] for r in reps[:1]] + # emeg_paths = [Path(emeg_dir, p + r) for p in participants[:2] for r in reps[-1:]] inverse_operator = Path(args.base_dir, args.inverse_operator, f"{participants[0]}_ico5-3L-loose02-cps-nodepth.fif") + # args.function_path = 'predicted_function_contours/Bruce_model/neurogramResults' + # args.function_name = 'neurogram_mr' + #args.n_splits = 800 + #args.seconds_per_split = 0.5 + # Load data emeg, ch_names = load_emeg_pack(emeg_paths, need_names=False, @@ -79,7 +83,8 @@ def main(): snr=args.snr) func = load_function(Path(args.base_dir, args.function_path), - func_name=args.function_name) + func_name=args.function_name, + bruce_neurons=(5, 10)) func = func.downsampled(args.downsample_rate) es = do_gridsearch( @@ -98,6 +103,5 @@ def main(): # expression_plot(es) - if __name__ == '__main__': main() diff --git a/kymata/gridsearch/gridsearch.py b/kymata/gridsearch/gridsearch.py index 7d75de25..defb95da 100644 --- a/kymata/gridsearch/gridsearch.py +++ b/kymata/gridsearch/gridsearch.py @@ -4,7 +4,7 @@ from kymata.entities.functions import Function from kymata.math.combinatorics import generate_derangement -from kymata.math.vector import normalize +from kymata.math.vector import normalize, get_stds #from kymata.entities.expression import SensorExpressionSet, p_to_logp import matplotlib.pyplot as plt @@ -65,13 +65,14 @@ def do_gridsearch( derangements = np.vstack((np.arange(n_splits * n_reps), derangements)) # Include the identity on top # Fast cross-correlation using FFT - emeg_reshaped = np.fft.rfft(normalize(emeg_reshaped), n=n_samples_per_split, axis=-1) + emeg_reshaped = normalize(emeg_reshaped) + emeg_stds = get_stds(emeg_reshaped, n_samples_per_split // 2) + emeg_reshaped = np.fft.rfft(emeg_reshaped, n=n_samples_per_split, axis=-1) F_func = np.conj(np.fft.rfft(normalize(func), n=n_samples_per_split, axis=-1)) corrs = np.zeros((n_channels, n_derangements + 1, n_splits * n_reps, n_samples_per_split // 2)) for der_i, derangement in enumerate(derangements): deranged_emeg = emeg_reshaped[:, derangement, :] - corrs[:, der_i] = np.fft.irfft(deranged_emeg * F_func)[:, :, :n_samples_per_split//2] - + corrs[:, der_i] = np.fft.irfft(deranged_emeg * F_func)[:, :, :n_samples_per_split//2] / emeg_stds[:, derangement] if add_autocorr: auto_corrs = np.zeros((n_splits, n_samples_per_split//2)) @@ -92,11 +93,12 @@ def do_gridsearch( if plot_name: plt.figure(1) - maxs = np.max(-log_pvalues[:, :], axis=1) + corr_avrs = np.mean(corrs[:, 0]**2, axis=-2) + maxs = np.max(corr_avrs, axis=1) n_amaxs = 5 amaxs = np.argpartition(maxs, -n_amaxs)[-n_amaxs:] - amax = np.argmax(-log_pvalues) // (n_samples_per_split // 2) - amaxs = [i for i in amaxs if i != amax] + [206] + amax = np.argmax(corr_avrs) // (n_samples_per_split // 2) + amaxs = [i for i in amaxs if i != amax] # + [209] plt.plot(latencies, np.mean(corrs[amax, 0], axis=-2).T, 'r-', label=amax) plt.plot(latencies, np.mean(corrs[amaxs, 0], axis=-2).T, label=amaxs) @@ -108,17 +110,17 @@ def do_gridsearch( plt.fill_between(latencies, av_real - std_real, av_real + std_real, alpha=0.25, color='red') if add_autocorr: - peak_lat_ind = np.argmax(-log_pvalues) % (n_samples_per_split // 2) + peak_lat_ind = np.argmax(corr_avrs) % (n_samples_per_split // 2) peak_lat = latencies[peak_lat_ind] peak_corr = np.mean(corrs[amax, 0], axis=-2)[peak_lat_ind] - print(f'{function.name}: peak lat, peak corr:', peak_lat, peak_corr) + print(f'{function.name}: peak lat, peak corr, ind:', peak_lat, peak_corr, amax) auto_corrs = np.mean(auto_corrs, axis=0) plt.plot(latencies, np.roll(auto_corrs, peak_lat_ind) * peak_corr / np.max(auto_corrs), 'k--', label='func auto-corr') plt.axvline(0, color='k') plt.legend() - plt.xlabel('latencies (s)') + plt.xlabel('latencies (ms)') plt.ylabel('Corr coef.') plt.savefig(f'{plot_name}_1.png') plt.clf() @@ -128,7 +130,7 @@ def do_gridsearch( plt.plot(latencies, -log_pvalues[amaxs].T, label=amaxs) plt.axvline(0, color='k') plt.legend() - plt.xlabel('latencies (s)') + plt.xlabel('latencies (ms)') plt.ylabel('p-values') plt.savefig(f'{plot_name}_2.png') plt.clf() @@ -139,7 +141,7 @@ def do_gridsearch( functions=function.name, latencies=latencies / 1000, sensors=sensor_names, - data=p_to_logp(p_values), + data=log_pvalues, )""" return es diff --git a/kymata/io/functions.py b/kymata/io/functions.py index 549232e1..6a9f84c5 100644 --- a/kymata/io/functions.py +++ b/kymata/io/functions.py @@ -1,6 +1,7 @@ from pathlib import Path -from numpy import array, float16, convolve +from numpy import array, float16, convolve, mean +import numpy as np from numpy.typing import NDArray from h5py import File from scipy.io import loadmat @@ -9,26 +10,42 @@ from kymata.io.file import path_type -def load_function(function_path_without_suffix: path_type, func_name: str, n_derivatives: int = 0) -> Function: +def load_function(function_path_without_suffix: path_type, func_name: str, n_derivatives: int = 0, bruce_neurons: tuple = (0, 10)) -> Function: function_path_without_suffix = Path(function_path_without_suffix) func: NDArray - if function_path_without_suffix.with_suffix(".h5").exists(): - with File(function_path_without_suffix.with_suffix(".h5"), "r") as f: - func = array(f[func_name]) + if 'neurogram' in func_name: + print('USING BRUCE MODEL') + mat = loadmat(function_path_without_suffix.with_suffix('.mat'))['neurogramResults'] + func = np.array(mat[func_name][0][0]) + func = np.mean(func[bruce_neurons[0]:bruce_neurons[1]], axis=0) + tt = np.array(mat['t_'+func_name[-2:]][0][0]) + + T_end = tt[0, -1] + if func_name == 'neurogram_mr': + func = np.interp(np.linspace(0, 400, 400_000 + 1)[:-1], tt[0], func, ) + elif func_name == 'neurogram_ft': + func = np.cumsum(func * (tt[0, 1] - tt[0, 0]), axis=-1) + func = np.interp(np.linspace(0, 400, 400_000 + 1)[:], tt[0], func) + func = np.diff(func, axis=-1) + else: - mat = loadmat(str(function_path_without_suffix.with_suffix(".mat")))['stimulisig'] - with File(function_path_without_suffix.with_suffix(".h5"), 'w') as f: - for key in mat.dtype.names: - if key == 'name': - continue - f.create_dataset(key, data=array(mat[key][0, 0], dtype=float16)) - func = array(mat[func_name][0][0]) + if function_path_without_suffix.with_suffix(".h5").exists(): + with File(function_path_without_suffix.with_suffix(".h5"), "r") as f: + func = np.array(f[func_name]) + else: + mat = loadmat(str(function_path_without_suffix.with_suffix(".mat")))['stimulisig'] + with File(function_path_without_suffix.with_suffix(".h5"), 'w') as f: + for key in mat.dtype.names: + if key == 'name': + continue + f.create_dataset(key, data=np.array(mat[key][0, 0], dtype=np.float16)) + func = np.array(mat[func_name][0][0]) - if func_name in ('STL', 'IL', 'LTL'): - func = func.T + if func_name in ('STL', 'IL', 'LTL'): + func = func.T for _ in range(n_derivatives): - func = convolve(func, [-1, 1], 'same') # derivative + func = np.convolve(func, [-1, 1], 'same') # derivative return Function( name=func_name, diff --git a/kymata/math/vector.py b/kymata/math/vector.py index 177e2cc2..88b18faf 100644 --- a/kymata/math/vector.py +++ b/kymata/math/vector.py @@ -1,4 +1,4 @@ -from numpy import mean, sum, sqrt +import numpy as np from numpy.typing import NDArray @@ -6,6 +6,18 @@ def normalize(x: NDArray) -> NDArray: """ Remove the mean and divide by the Euclidean magnitude. """ - x -= mean(x, axis=-1, keepdims=True) - x /= sqrt(sum(x**2, axis=-1, keepdims=True)) + x -= np.mean(x, axis=-1, keepdims=True) + x /= np.sqrt(np.sum(x**2, axis=-1, keepdims=True)) return x + + +def get_stds(x, n): + """ + Get the stds (times sqrt(n)) to correct for normalisation difference between each set of n_samples + """ + d0, d1, d2 = x.shape + y = np.concatenate((np.zeros((d0, d1, 1)), np.cumsum(x**2, axis=-1)), axis=-1) + z = np.concatenate((np.zeros((d0, d1, 1)), np.cumsum(x, axis=-1)), axis=-1) + y = y[:,:,-n-1:-1] - y[:,:,:n] + z = z[:,:,-n-1:-1] - z[:,:,:n] + return (y - ((z**2) / n)) ** 0.5 From f9c319e2e7d82d4e8cbe0560f85632f7e52c46bd Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Tue, 9 Jan 2024 20:11:44 +0000 Subject: [PATCH 19/25] moved over to npz flies (no more h5) --- kymata/io/functions.py | 57 +++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/kymata/io/functions.py b/kymata/io/functions.py index 6a9f84c5..b5dc013e 100644 --- a/kymata/io/functions.py +++ b/kymata/io/functions.py @@ -15,30 +15,47 @@ def load_function(function_path_without_suffix: path_type, func_name: str, n_der func: NDArray if 'neurogram' in func_name: print('USING BRUCE MODEL') - mat = loadmat(function_path_without_suffix.with_suffix('.mat'))['neurogramResults'] - func = np.array(mat[func_name][0][0]) - func = np.mean(func[bruce_neurons[0]:bruce_neurons[1]], axis=0) - tt = np.array(mat['t_'+func_name[-2:]][0][0]) - - T_end = tt[0, -1] - if func_name == 'neurogram_mr': - func = np.interp(np.linspace(0, 400, 400_000 + 1)[:-1], tt[0], func, ) - elif func_name == 'neurogram_ft': - func = np.cumsum(func * (tt[0, 1] - tt[0, 0]), axis=-1) - func = np.interp(np.linspace(0, 400, 400_000 + 1)[:], tt[0], func) - func = np.diff(func, axis=-1) + if function_path_without_suffix.with_suffix(".npz").exists(): + func_dict = np.load(function_path_without_suffix.with_suffix(".npz")) + func = func_dict[func_name] + func = np.mean(func[bruce_neurons[0]:bruce_neurons[1]], axis=0) + else: + mat = loadmat(function_path_without_suffix.with_suffix('.mat'))['neurogramResults'] + func = np.array(mat[func_name][0][0]) + # func = np.mean(func[bruce_neurons[0]:bruce_neurons[1]], axis=0) + tt = np.array(mat['t_'+func_name[-2:]][0][0]) + n_chans = func.shape[0] + new_mr_arr = np.zeros((n_chans, 400_000)) + new_ft_arr = np.zeros((n_chans, 400_000)) + T_end = tt[0, -1] + if func_name == 'neurogram_mr': + for i in range(n_chans): + new_mr_arr[i] = np.interp(np.linspace(0, 400, 400_000 + 1)[:-1], tt[0], func[i]) + elif func_name == 'neurogram_ft': + func = np.cumsum(func * (tt[0, 1] - tt[0, 0]), axis=-1) + for i in range(n_chans): + func_interp = np.interp(np.linspace(0, 400, 400_000 + 1), tt[0], func[i]) + new_ft_arr[i] = np.diff(func_interp, axis=-1) + + func_dict = {'neurogram_mr': new_mr_arr, + 'neurogram_ft': new_ft_arr} + np.savez(str(function_path_without_suffix.with_suffix(".npz")), **func_dict) + + func = func_dict[func_name] + func = np.mean(func[bruce_neurons[0]:bruce_neurons[1]], axis=0) else: - if function_path_without_suffix.with_suffix(".h5").exists(): - with File(function_path_without_suffix.with_suffix(".h5"), "r") as f: - func = np.array(f[func_name]) + if function_path_without_suffix.with_suffix(".npz").exists(): + func_dict = np.load(str(function_path_without_suffix.with_suffix(".npz"))) + func = np.array(func_dict[func_name]) else: mat = loadmat(str(function_path_without_suffix.with_suffix(".mat")))['stimulisig'] - with File(function_path_without_suffix.with_suffix(".h5"), 'w') as f: - for key in mat.dtype.names: - if key == 'name': - continue - f.create_dataset(key, data=np.array(mat[key][0, 0], dtype=np.float16)) + func_dict = {} + for key in mat.dtype.names: + if key == 'name': + continue + func_dict[key] = np.array(mat[key][0, 0], dtype=np.float16) + np.savez(str(function_path_without_suffix.with_suffix(".npz")), **func_dict) func = np.array(mat[func_name][0][0]) if func_name in ('STL', 'IL', 'LTL'): From 88f36fba0f52612b1c7c4bdabc0df3f8f5a23771 Mon Sep 17 00:00:00 2001 From: Oliver Parish Date: Tue, 9 Jan 2024 20:12:28 +0000 Subject: [PATCH 20/25] removed h5 import --- kymata/io/functions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/kymata/io/functions.py b/kymata/io/functions.py index b5dc013e..92f99e1c 100644 --- a/kymata/io/functions.py +++ b/kymata/io/functions.py @@ -3,7 +3,6 @@ from numpy import array, float16, convolve, mean import numpy as np from numpy.typing import NDArray -from h5py import File from scipy.io import loadmat from kymata.entities.functions import Function From 019451aa056055e097604edac29bb98f93e60be2 Mon Sep 17 00:00:00 2001 From: Cai Wingfield Date: Fri, 12 Jan 2024 15:46:52 +0000 Subject: [PATCH 21/25] Rename preproc/pipeline.py --- invokers/run_preprocessing_dataset4.py | 2 +- kymata/preproc/{pipeline.py => data_cleansing.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename kymata/preproc/{pipeline.py => data_cleansing.py} (100%) diff --git a/invokers/run_preprocessing_dataset4.py b/invokers/run_preprocessing_dataset4.py index ab594a77..4b84c297 100644 --- a/invokers/run_preprocessing_dataset4.py +++ b/invokers/run_preprocessing_dataset4.py @@ -4,7 +4,7 @@ sys.path.append('/imaging/projects/cbu/kymata/analyses/ollie/kymata-toolbox') from kymata.io.yaml import load_config -from kymata.preproc.pipeline import run_preprocessing, create_trials, create_trialwise_data +from kymata.preproc.data_cleansing import run_preprocessing, create_trials, create_trialwise_data # noinspection DuplicatedCode diff --git a/kymata/preproc/pipeline.py b/kymata/preproc/data_cleansing.py similarity index 100% rename from kymata/preproc/pipeline.py rename to kymata/preproc/data_cleansing.py From 8d878a276077a8e026b5667fbd8286d2df95a58f Mon Sep 17 00:00:00 2001 From: neukym Date: Fri, 12 Jan 2024 16:26:03 +0000 Subject: [PATCH 22/25] Update kymata/entities/sparse_data.py Co-authored-by: Cai --- kymata/entities/sparse_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kymata/entities/sparse_data.py b/kymata/entities/sparse_data.py index 67eb6117..006116fd 100755 --- a/kymata/entities/sparse_data.py +++ b/kymata/entities/sparse_data.py @@ -1,6 +1,6 @@ from __future__ import annotations -from numpy import ndarray, nanmin, greater +from numpy import nanmin, greater from numpy.typing import NDArray import sparse from xarray import Dataset From 6318e8905d7fc74e2c9ceeec31d212aa90610c33 Mon Sep 17 00:00:00 2001 From: neukym Date: Fri, 12 Jan 2024 16:32:05 +0000 Subject: [PATCH 23/25] Update invokers/run_gridsearch.py Co-authored-by: Cai --- invokers/run_gridsearch.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index f465a575..2d2c7546 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -3,13 +3,10 @@ import sys sys.path.append('/imaging/projects/cbu/kymata/analyses/ollie/kymata-toolbox') -import numpy as np import argparse -from numpy.typing import NDArray from kymata.gridsearch.gridsearch import do_gridsearch from kymata.io.functions import load_function from kymata.io.mne import load_emeg_pack -# from kymata.plot.plotting import expression_plot def main(): From 987cd21ce7844c18a9fb58e2563a57b7a46fa56e Mon Sep 17 00:00:00 2001 From: neukym Date: Fri, 12 Jan 2024 16:36:03 +0000 Subject: [PATCH 24/25] Update invokers/run_gridsearch.py Co-authored-by: Cai --- invokers/run_gridsearch.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index 2d2c7546..18cdde49 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -66,11 +66,6 @@ def main(): inverse_operator = Path(args.base_dir, args.inverse_operator, f"{participants[0]}_ico5-3L-loose02-cps-nodepth.fif") - # args.function_path = 'predicted_function_contours/Bruce_model/neurogramResults' - # args.function_name = 'neurogram_mr' - #args.n_splits = 800 - #args.seconds_per_split = 0.5 - # Load data emeg, ch_names = load_emeg_pack(emeg_paths, need_names=False, From e9b404cebd783153f2dce97e616c2d59309ca7de Mon Sep 17 00:00:00 2001 From: neukym Date: Fri, 12 Jan 2024 16:49:02 +0000 Subject: [PATCH 25/25] Changes following PR review. --- invokers/run_gridsearch.py | 4 ---- invokers/run_preprocessing_dataset4.py | 28 -------------------------- kymata/preproc/data_cleansing.py | 26 ++++++++++++------------ 3 files changed, 13 insertions(+), 45 deletions(-) diff --git a/invokers/run_gridsearch.py b/invokers/run_gridsearch.py index 2d2c7546..23362ab5 100644 --- a/invokers/run_gridsearch.py +++ b/invokers/run_gridsearch.py @@ -1,8 +1,4 @@ from pathlib import Path - -import sys -sys.path.append('/imaging/projects/cbu/kymata/analyses/ollie/kymata-toolbox') - import argparse from kymata.gridsearch.gridsearch import do_gridsearch from kymata.io.functions import load_function diff --git a/invokers/run_preprocessing_dataset4.py b/invokers/run_preprocessing_dataset4.py index 4b84c297..bd342134 100644 --- a/invokers/run_preprocessing_dataset4.py +++ b/invokers/run_preprocessing_dataset4.py @@ -11,34 +11,6 @@ def main(): config = load_config(str(Path(Path(__file__).parent.parent, "kymata", "config", "dataset4.yaml"))) - """create_trials( - dataset_directory_name=config['dataset_directory_name'], - list_of_participants=config['list_of_participants'], - repetitions_per_runs=config['repetitions_per_runs'], - number_of_runs=config['number_of_runs'], - number_of_trials=config['number_of_trials'], - input_streams=config['input_streams'], - eeg_thresh=float(config['eeg_thresh']), - grad_thresh=float(config['grad_thresh']), - mag_thresh=float(config['mag_thresh']), - visual_delivery_latency=config['visual_delivery_latency'], - audio_delivery_latency=config['audio_delivery_latency'], - audio_delivery_shift_correction=config['audio_delivery_shift_correction'], - tmin=config['tmin'], - tmax=config['tmax'], - )""" - - """run_preprocessing( - list_of_participants=config['list_of_participants'], - dataset_directory_name=config['dataset_directory_name'], - n_runs=config['number_of_runs'], - emeg_machine_used_to_record_data=config['EMEG_machine_used_to_record_data'], - remove_ecg=config['remove_ECG'], - skip_maxfilter_if_previous_runs_exist=config['skip_maxfilter_if_previous_runs_exist'], - remove_veoh_and_heog=config['remove_VEOH_and_HEOG'], - automatic_bad_channel_detection_requested=config['automatic_bad_channel_detection_requested'], - )""" - create_trialwise_data( dataset_directory_name=config['dataset_directory_name'], list_of_participants=config['list_of_participants'], diff --git a/kymata/preproc/data_cleansing.py b/kymata/preproc/data_cleansing.py index c6af03c9..75654835 100755 --- a/kymata/preproc/data_cleansing.py +++ b/kymata/preproc/data_cleansing.py @@ -339,7 +339,7 @@ def create_trialwise_data(dataset_directory_name: str, raw = mne.io.concatenate_raws(raws=cleaned_raws, preload=True) raw_events = mne.find_events(raw, stim_channel='STI101', shortest_event=1) #, uint_cast=True) - """print(f"{Fore.GREEN}{Style.BRIGHT}...finding visual events{Style.RESET_ALL}") + print(f"{Fore.GREEN}{Style.BRIGHT}...finding visual events{Style.RESET_ALL}") # Extract visual events visual_events = mne.pick_events(raw_events, include=[2, 3]) @@ -347,16 +347,16 @@ def create_trialwise_data(dataset_directory_name: str, # Correct for visual equiptment latency error visual_events = mne.event.shift_time_events(visual_events, [2, 3], visual_delivery_latency, 1) - trigger_name = 1 - for trigger in visual_events: - trigger[2] = trigger_name # rename as '1,2,3 ...400' etc - if trigger_name == 400: - trigger_name = 1 - else: - trigger_name = trigger_name + 1 - - # Test there are the correct number of events - assert visual_events.shape[0] == repetitions_per_runs * number_of_runs * number_of_trials""" + # trigger_name = 1 + # for trigger in visual_events: + # trigger[2] = trigger_name # rename as '1,2,3 ...400' etc + # if trigger_name == 400: + # trigger_name = 1 + # else: + # trigger_name = trigger_name + 1 + # + # # Test there are the correct number of events + # assert visual_events.shape[0] == repetitions_per_runs * number_of_runs * number_of_trials print(f"{Fore.GREEN}{Style.BRIGHT}...finding audio events{Style.RESET_ALL}") @@ -390,8 +390,8 @@ def create_trialwise_data(dataset_directory_name: str, for input_stream in input_streams: # TODO n.b. not setup for visual/tactile stream yet if input_stream == 'auditory': events = audio_events - else: - events = visual_events + #else: + # events = visual_events # Extract trial instances ('epochs')