Skip to content

Commit

Permalink
Merge pull request #58 from kymata-atlas/simplify-loading-sample-dataset
Browse files Browse the repository at this point in the history
Simplify loading ExpressionSet from SampleDataset
  • Loading branch information
caiw authored Nov 24, 2023
2 parents 5ee1460 + 38bf8b1 commit 1625f36
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 174 deletions.
47 changes: 26 additions & 21 deletions demos/demo_ippm.ipynb

Large diffs are not rendered by default.

46 changes: 22 additions & 24 deletions demos/demo_plotting.ipynb

Large diffs are not rendered by default.

109 changes: 62 additions & 47 deletions demos/demo_save_load.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-11-10T20:02:56.737736Z",
"start_time": "2023-11-10T20:02:56.215552Z"
"end_time": "2023-11-24T15:03:28.701768Z",
"start_time": "2023-11-24T15:03:27.973888Z"
}
},
"outputs": [],
"source": [
"from os import path\n",
"from pathlib import Path\n",
"from tempfile import NamedTemporaryFile\n",
"\n",
"from kymata.datasets.sample import get_dataset_kymata_mirror_q3_2023, get_dataset_gm_loudness, \\\n",
" get_dataset_d_ins_loudness_tonop_chan1\n",
"from kymata.entities.expression import ExpressionSet\n",
"from kymata.io.matlab import load_matab_expression_files"
"from kymata.datasets.sample import KymataMirror2023Q3Dataset, TVLInsLoudnessOnlyDataset, TVLDeltaInsTC1LoudnessOnlyDataset\n",
"from kymata.entities.expression import ExpressionSet"
]
},
{
Expand All @@ -31,70 +30,86 @@
"output_type": "stream",
"text": [
"Downloading dataset: kymata_mirror_Q3_2023\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//kymata_mirror_Q3_2023_expression_endtable.nkg to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/kymata_mirror_Q3_2023/kymata_mirror_Q3_2023_expression_endtable.nkg\n"
"Local file already exists: /Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/data/sample-data/kymata_mirror_Q3_2023/kymata_mirror_Q3_2023_expression_endtable.nkg\n",
"kymata_mirror_Q3_2023_expression_endtable.nkg\n",
"Downloading dataset: TVL_2020_ins_loudness_only\n",
"Local file already exists: /Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/data/sample-data/TVL_2020_ins_loudness_only/TVL_2020_ins_loudness_only.nkg\n",
"Downloading dataset: TVL_2020_delta_ins_tontop_chan1_loudness_only\n",
"Local file already exists: /Users/cai/Dox/Work/Kymata lab/Code/kymata-toolbox/data/sample-data/TVL_2020_delta_ins_tontop_chan1_loudness_only/TVL_2020_delta_ins_tontop_chan1_loudness_only.nkg\n"
]
}
],
"source": [
"# Start with sample dataset\n",
"dataset_q3_2023 = get_dataset_kymata_mirror_q3_2023()\n",
"expression_data_kymata_mirror = ExpressionSet.load(from_path_or_file=Path(dataset_q3_2023.path, dataset_q3_2023.filenames[0]))"
"## Download sample data. This cell can be ignored if you wish to load your own\n",
"## data from a gridsearch.\n",
"\n",
"# set location of tutorial data\n",
"sample_data_dir = Path(Path(path.abspath(\"\")).parent, \"data\", \"sample-data\")\n",
"\n",
"# First we'll download a sample .nkg file which loads a range of functions,\n",
"# from the Kymata Research Group. nkg files contain both lefthand and\n",
"# righthand data for a set of functions\n",
"\n",
"sample_dataset = KymataMirror2023Q3Dataset(data_root=sample_data_dir, download=True)\n",
"nkg_path = Path(sample_dataset.path, sample_dataset.filenames[0])\n",
"print(nkg_path.name)\n",
"\n",
"# Second we will download two .nkg files which only contain one\n",
"# function each - 'ins_loudness' and 'd_ins_tc1_loudness':\n",
"ins_loudness_only_dataset = TVLInsLoudnessOnlyDataset(data_root=sample_data_dir, download=True)\n",
"ins_loudness_path = Path(ins_loudness_only_dataset.path, ins_loudness_only_dataset.filenames[0])\n",
"\n",
"d_ins_tc1_loudness_only_dataset = TVLDeltaInsTC1LoudnessOnlyDataset(data_root=sample_data_dir, download=True)\n",
"d_ins_tc1_loudness_path = Path(d_ins_tc1_loudness_only_dataset.path, d_ins_tc1_loudness_only_dataset.filenames[0])"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-10T20:03:00.508482Z",
"start_time": "2023-11-10T20:02:56.738612Z"
"end_time": "2023-11-24T15:03:28.707981Z",
"start_time": "2023-11-24T15:03:28.704356Z"
}
},
"id": "4df17a3a727b7c02"
"id": "1f36e2e91b52522f"
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading dataset: GMLoudness\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//GMLoudness_lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/GMLoudness/GMLoudness_lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//GMLoudness_rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/GMLoudness/GMLoudness_rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\n",
"Downloading dataset: GMLoudness\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/GMLoudness/GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/GMLoudness/GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\n"
]
}
"outputs": [],
"source": [
"# Let's load the KymataMirror2023Q3 .nkg file. This contains around 30 functions.\n",
"expression_data_kymata_mirror = ExpressionSet.load(from_path_or_file=nkg_path)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-24T15:03:32.308893Z",
"start_time": "2023-11-24T15:03:28.707787Z"
}
},
"id": "4df17a3a727b7c02"
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [],
"source": [
"# Get a second sample dataset\n",
"\n",
"# Create new expression set object for the new results (or you can just add to an existing expressionSet directly using '+=' ).\n",
"dataset_gm_loudness = get_dataset_gm_loudness()\n",
"expression_data_new_results = load_matab_expression_files(\n",
" function_name=\"ins_loudness_2020\",\n",
" lh_file=Path(dataset_gm_loudness.path, dataset_gm_loudness.filenames[0]),\n",
" rh_file=Path(dataset_gm_loudness.path, dataset_gm_loudness.filenames[1]),\n",
")\n",
"dataset_d_ins_loudness_tonop_chan1 = get_dataset_d_ins_loudness_tonop_chan1()\n",
"expression_data_new_results += load_matab_expression_files(\n",
" function_name=\"delta_ins_loudness_tonotop_chan1_2020\",\n",
" lh_file=Path(dataset_d_ins_loudness_tonop_chan1.path, dataset_d_ins_loudness_tonop_chan1.filenames[0]),\n",
" rh_file=Path(dataset_d_ins_loudness_tonop_chan1.path, dataset_d_ins_loudness_tonop_chan1.filenames[1]),\n",
")"
"# Let's seperately load the 'ins_loudness' .nkg file, and then load and add the\n",
"# d_ins_tc1_loudness to it using '+='. 'expression_data_new_results' now contains two functions.\n",
"expression_data_new_results = ExpressionSet.load(from_path_or_file=ins_loudness_path)\n",
"expression_data_new_results += ExpressionSet.load(from_path_or_file=d_ins_tc1_loudness_path)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-10T20:03:11.535225Z",
"start_time": "2023-11-10T20:03:00.507022Z"
"end_time": "2023-11-24T15:03:33.159669Z",
"start_time": "2023-11-24T15:03:32.309852Z"
}
},
"id": "681594ea282bf0f"
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"outputs": [
{
"name": "stderr",
Expand All @@ -106,7 +121,7 @@
}
],
"source": [
"# You can add two ExpressionSets together\n",
"# We can add these two ExpressionSets together with '+' or \"=+\"\n",
"expression_data_extended = expression_data_kymata_mirror + expression_data_new_results\n",
"\n",
"# Save new expressionSet for use again in the future.\n",
Expand All @@ -116,8 +131,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-10T20:03:18.279668Z",
"start_time": "2023-11-10T20:03:11.536073Z"
"end_time": "2023-11-24T15:03:44.793458Z",
"start_time": "2023-11-24T15:03:33.162168Z"
}
},
"id": "77c6c3617357edbb"
Expand Down
162 changes: 88 additions & 74 deletions kymata/datasets/sample.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from dataclasses import dataclass
from abc import ABC, abstractmethod
from os import getenv, getcwd, remove, rmdir
from pathlib import Path
from typing import Optional
from urllib import request

from kymata.entities.expression import ExpressionSet
from kymata.io.file import path_type


_DATA_PATH_ENVIRONMENT_VAR_NAME = "KYMATA_DATA_ROOT"
_DATA_DIR_NAME = "kymata-toolbox-data"

Expand All @@ -19,21 +19,100 @@
]


@dataclass
class SampleDataset:
class SampleDataset(ABC):
"""
Info required to retrieve a dataset stored locally.
Names in `.filenames` refer to local files, which (if `remote_root` is specified) are paired with identically
named remote files.
"""
name: str
path: Path
filenames: list[str]
remote_root: Optional[str] = None

def __init__(self,
name: str,
filenames: list[str],
data_root: Optional[path_type],
remote_root: Optional[str],
download: bool):
self.name: str = name
self.filenames: list[str] = filenames
self.data_root: Path = data_root_path(data_root)
self.remote_root: str = remote_root

if download:
self.download()

@property
def path(self) -> Path:
return Path(self.data_root, self.name)

def download(self):
_download_dataset(self)
print(f"Downloading dataset: {self.name}")
if self.remote_root is None:
raise ValueError("No remote root provided")
self.path.mkdir(exist_ok=True)
for filename in self.filenames:
remote = self.remote_root + "/" + filename
local = Path(self.path, filename)
if local.exists():
print(f"Local file already exists: {local}")
else:
print(f"Downloading {remote} to {local}")
request.urlretrieve(remote, local)

@abstractmethod
def to_expressionset(self) -> ExpressionSet:
raise NotImplementedError()


class KymataMirror2023Q3Dataset(SampleDataset):
def __init__(self, data_root: Optional[path_type] = None, download: bool = True):
name = "kymata_mirror_Q3_2023"
super().__init__(
name=name,
filenames=[
"kymata_mirror_Q3_2023_expression_endtable.nkg",
],
data_root=data_root,
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
download=download,
)

def to_expressionset(self) -> ExpressionSet:
return ExpressionSet.load(from_path_or_file=Path(self.path, self.filenames[0]))


class TVLInsLoudnessOnlyDataset(SampleDataset):
def __init__(self, data_root: Optional[path_type] = None, download: bool = True):
name = "TVL_2020_ins_loudness_only"
super().__init__(
name=name,
filenames=[
"TVL_2020_ins_loudness_only.nkg",
],
data_root=data_root,
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
download=download,
)

def to_expressionset(self) -> ExpressionSet:
return ExpressionSet.load(from_path_or_file=Path(self.path, self.filenames[0]))


class TVLDeltaInsTC1LoudnessOnlyDataset(SampleDataset):
def __init__(self, data_root: Optional[path_type] = None, download: bool = True):
name = "TVL_2020_delta_ins_tontop_chan1_loudness_only"
super().__init__(
name=name,
filenames=[
"TVL_2020_delta_ins_tontop_chan1_loudness_only.nkg"
],
data_root=data_root,
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
download=download,
)

def to_expressionset(self) -> ExpressionSet:
return ExpressionSet.load(from_path_or_file=Path(self.path, self.filenames[0]))


def data_root_path(data_root: Optional[path_type] = None) -> Path:
Expand Down Expand Up @@ -92,71 +171,6 @@ def data_root_path(data_root: Optional[path_type] = None) -> Path:
return data_root


def _download_dataset(local_dataset):
print(f"Downloading dataset: {local_dataset.name}")
if local_dataset.remote_root is None:
raise ValueError("No remote root provided")
local_dataset.path.mkdir(exist_ok=True)
for filename in local_dataset.filenames:
remote = local_dataset.remote_root + "/" + filename
local = Path(local_dataset.path, filename)
if local.exists():
print(f"Local file already exists: {local}")
else:
print(f"Downloading {remote} to {local}")
request.urlretrieve(remote, local)


def get_dataset_kymata_mirror_q3_2023(download: bool = True, data_root: Optional[path_type] = None) -> SampleDataset:
name = "kymata_mirror_Q3_2023"

local_dataset = SampleDataset(
name=name,
path=Path(data_root_path(data_root=data_root), name),
filenames=[
"kymata_mirror_Q3_2023_expression_endtable.nkg",
],
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
)
if download:
local_dataset.download()
return local_dataset


def get_dataset_gm_loudness(download: bool = True, data_root: Optional[path_type] = None) -> SampleDataset:
name = "GMLoudness"

local_dataset = SampleDataset(
name=name,
path=Path(data_root_path(data_root=data_root), name),
filenames=[
"GMLoudness_lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat",
"GMLoudness_rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat",
],
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
)
if download:
local_dataset.download()
return local_dataset


def get_dataset_d_ins_loudness_tonop_chan1(download: bool = True, data_root: Optional[path_type] = None) -> SampleDataset:
name = "GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1"

local_dataset = SampleDataset(
name=name,
path=Path(data_root_path(data_root=data_root), name),
filenames=[
"GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat",
"GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat",
],
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
)
if download:
local_dataset.download()
return local_dataset


def delete_dataset(local_dataset: SampleDataset):
# Make sure it's not silent
print(f"Deleting dataset {local_dataset.name}")
Expand Down
8 changes: 3 additions & 5 deletions kymata/plot/plotting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from os import path
from pathlib import Path
from itertools import cycle
from typing import Optional, Sequence, Dict
Expand Down Expand Up @@ -203,10 +202,9 @@ def _get_yticks(ylim):


if __name__ == '__main__':
from kymata.datasets.sample import get_dataset_kymata_mirror_q3_2023
from kymata.datasets.sample import KymataMirror2023Q3Dataset

# create new expression set object and add to it
dataset_q3_2023 = get_dataset_kymata_mirror_q3_2023()
expression_data_kymata_mirror = ExpressionSet.load(from_path_or_file=Path(dataset_q3_2023.path, dataset_q3_2023.filenames[0]))
expression_data_kymata_mirror = KymataMirror2023Q3Dataset().to_expressionset()

expression_plot(expression_data_kymata_mirror, save_to=Path("/Users/cai/Desktop/temp.png"), ylim=1e-172)
expression_plot(expression_data_kymata_mirror, show_only=expression_data_kymata_mirror.functions[1:], save_to=Path("/Users/cai/Desktop/temp.png"))
Loading

0 comments on commit 1625f36

Please sign in to comment.