Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gridsearch nkg #127

Merged
merged 42 commits into from
Jan 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
8071bf7
Style
caiw Jan 15, 2024
77906f2
Gridsearch saves nkg files and uses standard expression-plotting
caiw Jan 15, 2024
8e311fc
Better error logging
caiw Jan 15, 2024
0340f30
Clean up style
caiw Jan 15, 2024
025cfe1
Try force getting names
caiw Jan 15, 2024
20e48be
Unpair axes on sensor gridsearch
caiw Jan 15, 2024
bbf013c
Optionally overwrite existing files
caiw Jan 15, 2024
d76b735
Fixed a bug which caused eroneous FileExistErrors when saving figures
caiw Jan 15, 2024
0dc42fb
Review fix: Move default output location to 'output' dir
caiw Jan 15, 2024
729c475
Review fix: move Ollie's paths out of argument defaults and into the …
caiw Jan 15, 2024
887748e
More usual to have hyphens rather than underscores in cli args
caiw Jan 15, 2024
8ad3a1e
Review fix: add reality check plots back in
caiw Jan 15, 2024
8784099
sample invoker arguments in a script
caiw Jan 15, 2024
a4d58ca
Save reality-check plots in output location
caiw Jan 15, 2024
de0f5e0
Bugfix
caiw Jan 15, 2024
2c2c85c
Review fix: create `output` dir if it doesn't already exist
caiw Jan 15, 2024
cc2eb95
Add inverse operator into invoker script
caiw Jan 15, 2024
679e561
Progress bar code (currently unused)
caiw Jan 15, 2024
9d08231
Comment update
caiw Jan 15, 2024
5f9636d
Default sample data location needs to be created if it doesn't alread…
caiw Jan 15, 2024
0bc50f9
Reorder lines for clarity
caiw Jan 15, 2024
0218aec
Move all plotting to plot.py (#130)
neukym Jan 15, 2024
5e0fa44
Bug fix for gridsearch figure
neukym Jan 20, 2024
220fd5c
Update plot.py
neukym Jan 20, 2024
d23c778
Update mne.py
neukym Jan 20, 2024
feac249
Update run_gridsearch.py
neukym Jan 20, 2024
ea6d6ef
Update mne.py
neukym Jan 20, 2024
5e6b70c
Update mne.py
neukym Jan 20, 2024
813d5e1
Fix hexel number bug
neukym Jan 21, 2024
1eabf8b
Update run_gridsearch.py
neukym Jan 21, 2024
68262ed
add function name to begining of outputs
neukym Jan 22, 2024
991f2e1
Got poetry to work with CBU slurm setup
neukym Jan 22, 2024
819ea81
Adds 'time taken' to output
neukym Jan 22, 2024
0ab9c75
Minor changes
neukym Jan 24, 2024
087ec5f
couple of super minor changes
Jan 24, 2024
16064c7
a couple more changes, added in inverse_operator_name
Jan 24, 2024
d3d6dec
Removes "requirements" from invoker
neukym Jan 26, 2024
1229f89
Add s ability to do both hemipheres when inverse operator is selected
neukym Jan 26, 2024
67a23ec
Remove comment from plain
neukym Jan 26, 2024
f71c148
Remove hard-coded path to Andy's toolbox install
caiw Jan 26, 2024
dc0433d
bugfix
neukym Jan 27, 2024
346a4d1
Update submit_gridsearch.sh
neukym Jan 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 74 additions & 39 deletions invokers/run_gridsearch.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,130 @@
from pathlib import Path
import argparse
import time

from kymata.datasets.data_root import data_root_path
from kymata.gridsearch.plain import do_gridsearch
from kymata.io.functions import load_function
from kymata.io.mne import load_emeg_pack
from kymata.io.nkg import save_expression_set
from kymata.plot.plot import expression_plot

_default_output_dir = Path(data_root_path(), "output")


def main():

_default_output_dir.mkdir(exist_ok=True, parents=False)

parser = argparse.ArgumentParser(description='Gridsearch Params')
parser.add_argument('--emeg_sample_rate', type=int, default=1000,
parser.add_argument('--emeg-sample-rate', type=int, default=1000,
help='sampling rate of the emeg machine (not implemented yet)')
parser.add_argument('--snr', type=float, default=3,
help='inverse solution snr')
parser.add_argument('--downsample_rate', type=int, default=5,
help='downsample_rate')
parser.add_argument('--base_dir', type=str, default="/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/",
help='base data directory')
parser.add_argument('--data_path', type=str, default="intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data",
help='data path after base dir')
parser.add_argument('--function_path', type=str, default="predicted_function_contours/GMSloudness/stimulisig",
help='snr')
parser.add_argument('--function_name', type=str, default="d_IL2",
help='function name in stimulisig')
parser.add_argument('--emeg_file', type=str, default="participant_01-ave",
help='emeg_file_name')
parser.add_argument('--ave_mode', type=str, default="ave",
parser.add_argument('--snr', type=float, default=3, help='inverse solution snr')
parser.add_argument('--downsample-rate', type=int, default=5, help='downsample_rate')
parser.add_argument('--base-dir', type=str, default='/imaging/projects/cbu/kymata/data/dataset_4-english-narratives/', help='base data directory')
parser.add_argument('--data-path', type=str, default='intrim_preprocessing_files/3_trialwise_sensorspace/evoked_data', help='data path after base dir')
parser.add_argument('--function-path', type=str, default='predicted_function_contours/GMSloudness/stimulisig', help='location of function stimulisig')
parser.add_argument('--save-expression-set-location', type=Path, default=Path(_default_output_dir),
help="Save the results of the gridsearch into an ExpressionSet .nkg file")
parser.add_argument('--save-plot-location', type=Path, default=Path(_default_output_dir),
help="Save an expression plots, and other plots, in this location")
parser.add_argument('--overwrite', action="store_true", help="Silently overwrite existing files.")
parser.add_argument('--function-name', type=str, default="IL", help='function name in stimulisig')
parser.add_argument('--emeg-file', type=str, default="participant_01-ave", help='emeg_file_name')
parser.add_argument('--ave-mode', type=str, default="ave",
help='either ave or add, either average over the list of repetitions or treat them as extra data')
parser.add_argument('--inverse_operator', type=str, default="intrim_preprocessing_files/4_hexel_current_reconstruction/inverse-operators",
help='inverse solution path')
parser.add_argument('--seconds_per_split', type=float, default=0.5,
parser.add_argument('--inverse-operator-dir', type=str, default=None, help='inverse solution path')
parser.add_argument('--inverse-operator-name', type=str, default="participant_01_ico5-3L-loose02-cps-nodepth-fusion.fif",
help='inverse solution name')
parser.add_argument('--seconds-per-split', type=float, default=0.5,
help='seconds in each split of the recording, also maximum range of latencies being checked')
parser.add_argument('--n_splits', type=int, default=800,
parser.add_argument('--n-splits', type=int, default=800,
help='number of splits to split the recording into, (set to 400/seconds_per_split for full file)')
parser.add_argument('--n_derangements', type=int, default=1,
help='inverse solution snr')
parser.add_argument('--start_latency', type=float, default=-100,
parser.add_argument('--n-derangements', type=int, default=1,
help='number of deragements for the null distribution')
parser.add_argument('--start-latency', type=float, default=-100,
help='earliest latency to check in cross correlation')
parser.add_argument('--emeg_t_start', type=float, default=-200,
parser.add_argument('--emeg-t-start', type=float, default=-200,
help='start of the emeg evoked files relative to the start of the function')
parser.add_argument('--audio_shift_correction', type=float, default=0.000_537_5,
parser.add_argument('--audio-shift-correction', type=float, default=0.000_537_5,
help='audio shift correction, for every second of function, add this number of seconds (to the start of the emeg split) per seconds of emeg seen')
args = parser.parse_args()
args.base_dir = Path(args.base_dir)


emeg_dir = Path(args.base_dir, args.data_path)
emeg_paths = [Path(emeg_dir, args.emeg_file)]

participants = ['participant_01',
participants = ['pilot_01',
'pilot_02',
'participant_01',
'participant_01b',
'participant_02',
'participant_03',
'participant_04',
'participant_05',
'pilot_01',
'pilot_02']
'participant_07',
'participant_08',
'participant_09',
'participant_10',
'participant_11',
'participant_12',
'participant_13',
'participant_14',
'participant_15',
'participant_16',
'participant_17'
]

reps = [f'_rep{i}' for i in range(8)] + ['-ave']

# emeg_paths = [Path(emeg_dir, p + r) for p in participants[:2] for r in reps[-1:]]

inverse_operator = Path(args.base_dir, args.inverse_operator, f"{participants[0]}_ico5-3L-loose02-cps-nodepth.fif")
start = time.time()

if args.inverse_operator_dir is None:
inverse_operator = None
else:
inverse_operator = Path(args.base_dir, args.inverse_operator_dir, args.inverse_operator_name)

# Load data
emeg, ch_names = load_emeg_pack(emeg_paths,
need_names=False,
ave_mode=args.ave_mode,
inverse_operator=None, #inverse_operator, # set to None/inverse_operator if you want to run on sensor space/source space
p_tshift=None,
snr=args.snr)
emeg_values, ch_names = load_emeg_pack(emeg_paths,
need_names=True,
ave_mode=args.ave_mode,
inverse_operator=inverse_operator,
p_tshift=None,
snr=args.snr)

func = load_function(Path(args.base_dir, args.function_path),
func_name=args.function_name,
bruce_neurons=(5, 10))
func = func.downsampled(args.downsample_rate)

channel_space = "source" if inverse_operator is not None else "sensor"

es = do_gridsearch(
emeg_values=emeg,
sensor_names=ch_names,
emeg_values=emeg_values,
channel_names=ch_names,
channel_space=channel_space,
function=func,
seconds_per_split=args.seconds_per_split,
n_derangements=args.n_derangements,
n_splits=args.n_splits,
start_latency=args.start_latency,
plot_location=args.save_plot_location,
emeg_t_start=args.emeg_t_start,
emeg_sample_rate=args.emeg_sample_rate,
audio_shift_correction=args.audio_shift_correction,
ave_mode=args.ave_mode,
overwrite=args.overwrite,
)

# expression_plot(es)
if args.save_expression_set_location is not None:
save_expression_set(es, to_path_or_file = Path(args.save_expression_set_location, args.function_name + '_gridsearch.nkg'), overwrite=args.overwrite)

expression_plot(es, paired_axes=channel_space == "source", save_to=Path(args.save_plot_location, args.function_name + '_gridsearch.png'), overwrite=args.overwrite)

print(f'Time taken for code to run: {time.time() - start:.4f}')


if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions kymata/config/dataset4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ supress_excessive_plots_and_prompts: True
# Inverse operator
eeg: True
meg: True
inverse_operator: "intrim_preprocessing_files/4_hexel_current_reconstruction/inverse-operators"

# Method to estimate noise covariance matrix
cov_method: 'grand_ave' # grand_ave | empty_room | run_start
Expand Down
73 changes: 73 additions & 0 deletions kymata/datasets/data_root.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from os import getcwd, getenv
from pathlib import Path
from typing import Optional

from kymata.io.file import path_type


_DATA_PATH_ENVIRONMENT_VAR_NAME = "KYMATA_DATA_ROOT"
DATA_DIR_NAME = "kymata-toolbox-data"

# Places downloaded datasets could go, in order of preference
_preferred_default_data_locations = [
Path(Path(__file__).parent.parent.parent), # kymata/../data_dir (next to kymata dir)
Path(getcwd()), # <cwd>/data_dir
Path(Path.home(), "Documents"), # ~/Documents/data_dir
Path(Path.home()), # ~/data_dir
]


def data_root_path(data_root: Optional[path_type] = None) -> Path:

# Check if the data root has been specified

# Might be in an environmental variable
if data_root is None:
data_root: path_type | None = getenv(_DATA_PATH_ENVIRONMENT_VAR_NAME, default=None)

# Might have been supplied as an argument
if data_root is not None:
if isinstance(data_root, str):
data_root = Path(data_root)
# Data root specified
if not data_root.exists():
raise FileNotFoundError(f"data_root {str(data_root)} specified but does not exist")
if not data_root.is_dir():
raise NotADirectoryError(f"Please specify a directory ({str(data_root)} is not a directory)")

return data_root

else:
# Data root not specified

# Check if the data root already exists
for loc in _preferred_default_data_locations:
if (here := Path(loc, DATA_DIR_NAME)).exists():
data_root = here
break

# If not, attempt to create it
if data_root is None:
here: Path | None = None
for loc in _preferred_default_data_locations:
here = Path(loc, DATA_DIR_NAME)
try:
here.mkdir()
break
# If it fails for sensible reasons, no sweat, we'll fall through to the next option
except (FileNotFoundError, OSError):
# Parent didn't exist, not writeable, etc
pass
# Did we make it?
if here is not None and here.exists():
data_root = here
else:
raise FileNotFoundError("Failed to create data root directory")

# Data root location has been derived, rather than prespecified, so feed that back to the user to avoid a
# different location somehow being derived next time
print(f"Data root set at {str(data_root)}.")
print(f"Consider setting this as environmental variable {_DATA_PATH_ENVIRONMENT_VAR_NAME} to ensure it's reused"
f" next time.")
print(f"Hint: $> {_DATA_PATH_ENVIRONMENT_VAR_NAME}=\"{str(data_root)}\"")
return data_root
76 changes: 8 additions & 68 deletions kymata/datasets/sample.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,15 @@
from abc import ABC, abstractmethod
from os import getenv, getcwd, remove, rmdir
from os import remove, rmdir
from pathlib import Path
from typing import Optional
from urllib import request

from kymata.datasets.data_root import data_root_path
from kymata.entities.expression import HexelExpressionSet, SensorExpressionSet
from kymata.io.file import path_type
from kymata.io.nkg import load_expression_set

_DATA_PATH_ENVIRONMENT_VAR_NAME = "KYMATA_DATA_ROOT"
_DATA_DIR_NAME = "kymata-toolbox-data/tutorial_nkg_data"

# Places downloaded datasets could go, in order of preference
_preferred_default_data_locations = [
Path(Path(__file__).parent.parent.parent), # kymata/../data_dir (next to kymata dir)
Path(getcwd()), # <cwd>/data_dir
Path(Path.home(), "Documents"), # ~/Documents/data_dir
Path(Path.home()), # ~/data_dir
]
_SAMPLE_DATA_DIR_NAME = "tutorial_nkg_data"


class SampleDataset(ABC):
Expand All @@ -36,9 +28,13 @@ def __init__(self,
download: bool):
self.name: str = name
self.filenames: list[str] = filenames
self.data_root: Path = data_root_path(data_root)
self.data_root: Path = Path(data_root_path(data_root), _SAMPLE_DATA_DIR_NAME)
self.remote_root: str = remote_root

# Create the default location, if it's being used
if data_root is None:
self.data_root.mkdir(exist_ok=True)

if download:
self.download()

Expand Down Expand Up @@ -141,62 +137,6 @@ def to_expressionset(self) -> SensorExpressionSet:
return es


def data_root_path(data_root: Optional[path_type] = None) -> Path:

# Check if the data root has been specified

# Might be in an environmental variable
if data_root is None:
data_root: path_type | None = getenv(_DATA_PATH_ENVIRONMENT_VAR_NAME, default=None)

# Might have been supplied as an argument
if data_root is not None:
if isinstance(data_root, str):
data_root = Path(data_root)
# Data root specified
if not data_root.exists():
raise FileNotFoundError(f"data_root {str(data_root)} specified but does not exist")
if not data_root.is_dir():
raise NotADirectoryError(f"Please specify a directory ({str(data_root)} is not a directory)")

return data_root

else:
# Data root not specified

# Check if the data root already exists
for loc in _preferred_default_data_locations:
if (here := Path(loc, _DATA_DIR_NAME)).exists():
data_root = here
break

# If not, attempt to create it
if data_root is None:
here: Path | None = None
for loc in _preferred_default_data_locations:
here = Path(loc, _DATA_DIR_NAME)
try:
here.mkdir()
break
# If it fails for sensible reasons, no sweat, we'll fall through to the next option
except (FileNotFoundError, OSError):
# Parent didn't exist, not writeable, etc
pass
# Did we make it?
if here is not None and here.exists():
data_root = here
else:
raise FileNotFoundError("Failed to create data root directory")

# Data root location has been derived, rather than prespecified, so feed that back to the user to avoid a
# different location somehow being derived next time
print(f"Data root set at {str(data_root)}.")
print(f"Consider setting this as environmental variable {_DATA_PATH_ENVIRONMENT_VAR_NAME} to ensure it's reused"
f" next time.")
print(f"Hint: $> {_DATA_PATH_ENVIRONMENT_VAR_NAME}=\"{str(data_root)}\"")
return data_root


def delete_dataset(local_dataset: SampleDataset):
# Make sure it's not silent
print(f"Deleting dataset {local_dataset.name}")
Expand Down
7 changes: 5 additions & 2 deletions kymata/entities/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def __init__(self,
data = data[i]
data = self._init_prep_data(data)
# Check validity of input data dimensions
assert len(channels) == data.shape[0], f"{channel_coord_name} mismatch for {f}"
assert len(latencies) == data.shape[1], f"Latencies mismatch for {f}"
assert len(channels) == data.shape[0], f"{channel_coord_name} mismatch for {f}: {len(channels)} {channel_coord_name} versus data shape {data.shape}"
assert len(latencies) == data.shape[1], f"Latencies mismatch for {f}: {len(latencies)} latencies versus data shape {data.shape}"
dataset_dict[layer] = DataArray(
data=data,
dims=self._dims,
Expand Down Expand Up @@ -403,6 +403,9 @@ def best_functions(self) -> DataFrame:
return super()._best_functions_for_layer(LAYER_SCALP)


log_base = 10


def p_to_logp(arraylike: ArrayLike) -> ArrayLike:
"""The one-stop-shop for converting from p-values to log p-values."""
return log10(arraylike)
Expand Down
Loading
Loading