Skip to content

Commit

Permalink
Merge pull request #103 from kymata-atlas/big-preprocessing-refactor
Browse files Browse the repository at this point in the history
Refactor of preprocessing to improve readability
  • Loading branch information
neukym authored Jan 12, 2024
2 parents f1915fb + 8277912 commit 175c308
Show file tree
Hide file tree
Showing 25 changed files with 783 additions and 719 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Possible default location for downloaded data
kymata_data/
kymata-toolbox-data/
kymata-toolbox-data/tutorial_nkg_data/
kymata-toolbox-data/emeg_study_data/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
40 changes: 23 additions & 17 deletions demos/demo_plotting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-12-22T16:30:49.472955Z",
"start_time": "2023-12-22T16:30:47.863690Z"
"end_time": "2023-12-28T13:08:05.862533Z",
"start_time": "2023-12-28T13:08:04.737184Z"
}
},
"outputs": [],
Expand All @@ -30,21 +30,24 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data root set at /Users/administration/Github/kymata-toolbox/kymata-toolbox-data/tutorial_nkg_data.\n",
"Consider setting this as environmental variable KYMATA_DATA_ROOT to ensure it's reused next time.\n",
"Hint: $> KYMATA_DATA_ROOT=\"/Users/administration/Github/kymata-toolbox/kymata-toolbox-data/tutorial_nkg_data\"\n",
"Downloading dataset: kymata_mirror_Q3_2023\n",
"Local file already exists: /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/kymata_mirror_Q3_2023/kymata_mirror_Q3_2023_expression_endtable.nkg\n"
"Local file already exists: /Users/administration/Github/kymata-toolbox/kymata-toolbox-data/tutorial_nkg_data/kymata_mirror_Q3_2023/kymata_mirror_Q3_2023_expression_endtable.nkg\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/kymata/io/nkg.py:135: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
"/Users/administration/Github/kymata-toolbox/kymata/io/nkg.py:134: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
" warn(\"This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\")\n"
]
},
Expand Down Expand Up @@ -110,8 +113,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-22T16:30:52.740123Z",
"start_time": "2023-12-22T16:30:49.474267Z"
"end_time": "2023-12-28T13:08:10.250272Z",
"start_time": "2023-12-28T13:08:08.080442Z"
}
},
"id": "4023a711ee8a1675"
Expand All @@ -136,8 +139,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-22T16:31:01.921789Z",
"start_time": "2023-12-22T16:30:52.737613Z"
"end_time": "2023-12-28T12:57:34.406897Z",
"start_time": "2023-12-28T12:57:30.396543Z"
}
},
"id": "d466d1d098417e8"
Expand Down Expand Up @@ -168,8 +171,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-22T16:31:03.879649Z",
"start_time": "2023-12-22T16:31:01.920104Z"
"end_time": "2023-12-28T12:57:37.252960Z",
"start_time": "2023-12-28T12:57:35.973644Z"
}
},
"id": "d77e82f5aaaa4e74"
Expand Down Expand Up @@ -287,21 +290,24 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Data root set at /Users/administration/Github/kymata-toolbox/kymata-toolbox-data/demo_nkg_data.\n",
"Consider setting this as environmental variable KYMATA_DATA_ROOT to ensure it's reused next time.\n",
"Hint: $> KYMATA_DATA_ROOT=\"/Users/administration/Github/kymata-toolbox/kymata-toolbox-data/demo_nkg_data\"\n",
"Downloading dataset: TVL_2020_delta_ins_tontop_chan1_loudness_only_sensors\n",
"Local file already exists: /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/TVL_2020_delta_ins_tontop_chan1_loudness_only_sensors/TVL_2020_delta_ins_tontop_chan1_loudness_only_sensors.nkg\n"
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//TVL_2020_delta_ins_tontop_chan1_loudness_only_sensors.nkg to /Users/administration/Github/kymata-toolbox/kymata-toolbox-data/demo_nkg_data/TVL_2020_delta_ins_tontop_chan1_loudness_only_sensors/TVL_2020_delta_ins_tontop_chan1_loudness_only_sensors.nkg\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/kymata/io/nkg.py:135: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
"/Users/administration/Github/kymata-toolbox/kymata/io/nkg.py:134: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
" warn(\"This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\")\n"
]
},
Expand All @@ -321,8 +327,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-22T16:31:11.066324Z",
"start_time": "2023-12-22T16:31:10.397819Z"
"end_time": "2023-12-28T13:05:33.163591Z",
"start_time": "2023-12-28T13:05:32.853865Z"
}
},
"id": "b3b78bf3b02b4d62"
Expand Down
38 changes: 19 additions & 19 deletions demos/demo_save_load.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"outputs": [],
"source": [
"from os import path\n",
Expand All @@ -15,27 +15,27 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-22T15:33:23.949940Z",
"start_time": "2023-12-22T15:33:23.014875Z"
"end_time": "2023-12-28T13:00:48.773941Z",
"start_time": "2023-12-28T13:00:48.771379Z"
}
},
"id": "initial_id"
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading dataset: kymata_mirror_Q3_2023\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//kymata_mirror_Q3_2023_expression_endtable.nkg to /Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/kymata-toolbox-data/kymata_mirror_Q3_2023/kymata_mirror_Q3_2023_expression_endtable.nkg\n",
"Local file already exists: /Users/administration/Github/kymata-toolbox/kymata-toolbox-data/tutorial_nkg_data/kymata_mirror_Q3_2023/kymata_mirror_Q3_2023_expression_endtable.nkg\n",
"kymata_mirror_Q3_2023_expression_endtable.nkg\n",
"Downloading dataset: TVL_2020_ins_loudness_only\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//TVL_2020_ins_loudness_only.nkg to /Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/kymata-toolbox-data/TVL_2020_ins_loudness_only/TVL_2020_ins_loudness_only.nkg\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//TVL_2020_ins_loudness_only.nkg to /Users/administration/Github/kymata-toolbox/kymata-toolbox-data/tutorial_nkg_data/TVL_2020_ins_loudness_only/TVL_2020_ins_loudness_only.nkg\n",
"Downloading dataset: TVL_2020_delta_ins_tontop_chan1_loudness_only\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//TVL_2020_delta_ins_tontop_chan1_loudness_only.nkg to /Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/kymata-toolbox-data/TVL_2020_delta_ins_tontop_chan1_loudness_only/TVL_2020_delta_ins_tontop_chan1_loudness_only.nkg\n"
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//TVL_2020_delta_ins_tontop_chan1_loudness_only.nkg to /Users/administration/Github/kymata-toolbox/kymata-toolbox-data/tutorial_nkg_data/TVL_2020_delta_ins_tontop_chan1_loudness_only/TVL_2020_delta_ins_tontop_chan1_loudness_only.nkg\n"
]
}
],
Expand All @@ -44,7 +44,7 @@
"## data from a gridsearch.\n",
"\n",
"# set location of tutorial data\n",
"sample_data_dir = Path(Path(path.abspath(\"\")).parent, \"kymata-toolbox-data\")\n",
"sample_data_dir = Path(Path(path.abspath(\"\")).parent, \"kymata-toolbox-data\", \"tutorial_nkg_data\")\n",
"sample_data_dir.mkdir(exist_ok=True)\n",
"\n",
"# First we'll download a sample .nkg file which loads a range of functions,\n",
Expand All @@ -66,21 +66,21 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-22T15:33:24.935416Z",
"start_time": "2023-12-22T15:33:23.954497Z"
"end_time": "2023-12-28T13:04:38.941620Z",
"start_time": "2023-12-28T13:04:38.360906Z"
}
},
"id": "1f36e2e91b52522f"
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 7,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/kymata/io/nkg.py:135: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
"/Users/administration/Github/kymata-toolbox/kymata/io/nkg.py:134: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
" warn(\"This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\")\n"
]
}
Expand All @@ -92,23 +92,23 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-22T15:33:26.934443Z",
"start_time": "2023-12-22T15:33:24.936282Z"
"end_time": "2023-12-28T13:04:48.370724Z",
"start_time": "2023-12-28T13:04:46.202660Z"
}
},
"id": "4df17a3a727b7c02"
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 8,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/kymata/io/nkg.py:135: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
"/Users/administration/Github/kymata-toolbox/kymata/io/nkg.py:134: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
" warn(\"This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\")\n",
"/Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/kymata/io/nkg.py:135: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
"/Users/administration/Github/kymata-toolbox/kymata/io/nkg.py:134: UserWarning: This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\n",
" warn(\"This file uses an old format. Please consider re-saving the data to avoid future incompatibility.\")\n"
]
}
Expand All @@ -122,8 +122,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-12-22T15:33:27.438884Z",
"start_time": "2023-12-22T15:33:26.934318Z"
"end_time": "2023-12-28T13:04:51.930974Z",
"start_time": "2023-12-28T13:04:51.269543Z"
}
},
"id": "681594ea282bf0f"
Expand Down
71 changes: 0 additions & 71 deletions invokers/invoker_preproc_pipeline.py

This file was deleted.

56 changes: 56 additions & 0 deletions invokers/invoker_run_data_cleansing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from pathlib import Path
from colorama import Fore

from kymata.io.yaml import load_config
from kymata.io.cli import print_with_color
from kymata.preproc.data_cleansing import run_first_pass_cleansing_and_maxwell_filtering, run_second_pass_cleansing_and_EOG_removal


# noinspection DuplicatedCode
def main():
config = load_config(str(Path(Path(__file__).parent.parent, "kymata", "config", "dataset4.yaml")))

if config['data_location'] == "local":
data_root_dir = str(Path(Path(__file__).parent.parent, "kymata-toolbox-data", "emeg_study_data")) + "/"
elif config['data_location'] == "cbu":
data_root_dir = '/imaging/projects/cbu/kymata/data/'
elif config['data_location'] == "cbu-local":
data_root_dir = '//cbsu/data/imaging/projects/cbu/kymata/data/'
else:
raise Exception("The 'data_location' parameter in the config file must be either 'cbu' or 'local'.")

run_first_pass_cleansing_and_maxwell_filtering(
data_root_dir = data_root_dir,
list_of_participants=config['list_of_participants'],
dataset_directory_name=config['dataset_directory_name'],
n_runs=config['number_of_runs'],
emeg_machine_used_to_record_data=config['EMEG_machine_used_to_record_data'],
skip_maxfilter_if_previous_runs_exist=config['skip_maxfilter_if_previous_runs_exist'],
automatic_bad_channel_detection_requested=config['automatic_bad_channel_detection_requested'],
supress_excessive_plots_and_prompts=config['supress_excessive_plots_and_prompts'],
)

run_second_pass_cleansing_and_EOG_removal(
data_root_dir=data_root_dir,
list_of_participants=config['list_of_participants'],
dataset_directory_name=config['dataset_directory_name'],
n_runs=config['number_of_runs'],
remove_ecg=config['remove_ECG'],
remove_veoh_and_heog=config['remove_VEOH_and_HEOG'],
skip_ica_if_previous_runs_exist=config['skip_ica_if_previous_runs_exist'],
supress_excessive_plots_and_prompts=config['supress_excessive_plots_and_prompts'],
)

def _display_welcome_message_to_terminal():
"""Runs welcome message"""
print_with_color("-----------------------------------------------", Fore.BLUE)
print_with_color(" Kymata Preprocessing and Analysis Pipeline ", Fore.BLUE)
print_with_color("-----------------------------------------------", Fore.BLUE)
print_with_color("", Fore.BLUE)

def _run_cleanup():
"""Runs clean up"""
print_with_color("Exited successfully.", Fore.GREEN)

if __name__ == '__main__':
main()
Loading

0 comments on commit 175c308

Please sign in to comment.