Skip to content

Commit

Permalink
Merge branch 'main' into final-adjustments-to-plot_expression_plot-ylim
Browse files Browse the repository at this point in the history
# Conflicts:
#	demos/demo_plotting.ipynb
  • Loading branch information
caiw committed Nov 16, 2023
2 parents e339d20 + c682850 commit 644b7bd
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 36 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

data/output-graphs/*
# Possible default location for downloaded data
kymata_data/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
41 changes: 23 additions & 18 deletions demos/demo_plotting.ipynb

Large diffs are not rendered by default.

61 changes: 45 additions & 16 deletions demos/demo_save_load.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-11-03T11:14:59.760748Z",
"start_time": "2023-11-03T11:14:58.864754Z"
"end_time": "2023-11-10T20:02:56.737736Z",
"start_time": "2023-11-10T20:02:56.215552Z"
}
},
"outputs": [],
Expand All @@ -17,52 +17,81 @@
"from pathlib import Path\n",
"from tempfile import NamedTemporaryFile\n",
"\n",
"from kymata.datasets.sample import get_dataset_kymata_mirror_q3_2023, get_dataset_gm_loudness, \\\n",
" get_dataset_d_ins_loudness_tonop_chan1\n",
"from kymata.entities.expression import ExpressionSet\n",
"from kymata.io.matlab import load_matab_expression_files"
]
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading dataset: kymata_mirror_Q3_2023\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//kymata_mirror_Q3_2023_expression_endtable.nkg to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/kymata_mirror_Q3_2023/kymata_mirror_Q3_2023_expression_endtable.nkg\n"
]
}
],
"source": [
"# set location of tutorial data\n",
"sample_data_dir = Path(Path(path.abspath(\"\")).parent, \"data\", \"sample-data\")\n",
"\n",
"# Load in an existing expression set object\n",
"expression_data_kymata_mirror = ExpressionSet.load(from_path_or_file=Path(sample_data_dir, \"kymata_mirror_Q3_2023_expression_endtable.nkg\"))\n"
"# Start with sample dataset\n",
"dataset_q3_2023 = get_dataset_kymata_mirror_q3_2023()\n",
"expression_data_kymata_mirror = ExpressionSet.load(from_path_or_file=Path(dataset_q3_2023.path, dataset_q3_2023.filenames[0]))\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-03T11:15:02.415275Z",
"start_time": "2023-11-03T11:14:59.762100Z"
"end_time": "2023-11-10T20:03:00.508482Z",
"start_time": "2023-11-10T20:02:56.738612Z"
}
},
"id": "4df17a3a727b7c02"
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading dataset: GMLoudness\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//GMLoudness_lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/GMLoudness/GMLoudness_lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//GMLoudness_rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/GMLoudness/GMLoudness_rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\n",
"Downloading dataset: GMLoudness\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/GMLoudness/GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\n",
"Downloading https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data//GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat to /Users/cai/Dox/Work/Kymata lab/Data/kymata_data/GMLoudness/GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\n"
]
}
],
"source": [
"# Get a second sample dataset\n",
"\n",
"# Create new expression set object for the new results (or you can just add to an existing expressionSet directly using '+=' ).\n",
"dataset_gm_loudness = get_dataset_gm_loudness()\n",
"expression_data_new_results = load_matab_expression_files(\n",
" function_name=\"ins_loudness_2020\",\n",
" lh_file=Path(sample_data_dir, \"GMloudness_lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\"),\n",
" rh_file=Path(sample_data_dir, \"GMloudness_rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\"),\n",
" lh_file=Path(dataset_gm_loudness.path, dataset_gm_loudness.filenames[0]),\n",
" rh_file=Path(dataset_gm_loudness.path, dataset_gm_loudness.filenames[1]),\n",
")\n",
"dataset_d_ins_loudness_tonop_chan1 = get_dataset_d_ins_loudness_tonop_chan1()\n",
"expression_data_new_results += load_matab_expression_files(\n",
" function_name=\"delta_ins_loudness_tonotop_chan1_2020\",\n",
" lh_file=Path(sample_data_dir, \"GMloudness_tonotop_82dB__d_ins_loudness_tonop_chan1__lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\"),\n",
" rh_file=Path(sample_data_dir, \"GMloudness_tonotop_82dB__d_ins_loudness_tonop_chan1__rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat\"),\n",
" lh_file=Path(dataset_d_ins_loudness_tonop_chan1.path, dataset_d_ins_loudness_tonop_chan1.filenames[0]),\n",
" rh_file=Path(dataset_d_ins_loudness_tonop_chan1.path, dataset_d_ins_loudness_tonop_chan1.filenames[1]),\n",
")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-03T11:15:08.725199Z",
"start_time": "2023-11-03T11:15:02.416078Z"
"end_time": "2023-11-10T20:03:11.535225Z",
"start_time": "2023-11-10T20:03:00.507022Z"
}
},
"id": "681594ea282bf0f"
Expand Down Expand Up @@ -91,8 +120,8 @@
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-11-03T11:15:09.899937Z",
"start_time": "2023-11-03T11:15:08.725716Z"
"end_time": "2023-11-10T20:03:18.279668Z",
"start_time": "2023-11-10T20:03:11.536073Z"
}
},
"id": "77c6c3617357edbb"
Expand Down
Empty file added kymata/datasets/__init__.py
Empty file.
175 changes: 175 additions & 0 deletions kymata/datasets/sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from dataclasses import dataclass
from os import getenv, getcwd, remove, rmdir
from pathlib import Path
from typing import Optional
from urllib import request

from kymata.io.file import path_type


_DATA_PATH_ENVIRONMENT_VAR_NAME = "KYMATA_DATA_ROOT"
_DATA_DIR_NAME = "kymata_data"

# Places downloaded datasets could go, in order of preference
_preferred_default_data_locations = [
Path(Path(__file__).parent.parent.parent), # kymata/../data_dir (next to kymata dir)
Path(getcwd()), # <cwd>/data_dir
Path(Path.home(), "Documents"), # ~/Documents/data_dir
Path(Path.home()), # ~/data_dir
]


@dataclass
class SampleDataset:
"""
Info required to retrieve a dataset stored locally.
Names in `.filenames` refer to local files, which (if `remote_root` is specified) are paired with identically
named remote files.
"""
name: str
path: Path
filenames: list[str]
remote_root: Optional[str] = None

def download(self):
_download_dataset(self)


def data_root_path(data_root: Optional[path_type] = None) -> Path:

# Check if the data root has been specified

# Might be in an environmental variable
if data_root is None:
data_root: path_type | None = getenv(_DATA_PATH_ENVIRONMENT_VAR_NAME, default=None)

# Might have been supplied as an argument
if data_root is not None:
if isinstance(data_root, str):
data_root = Path(data_root)
# Data root specified
if not data_root.exists():
raise FileNotFoundError(f"data_root {str(data_root)} specified but does not exist")
if not data_root.is_dir():
raise NotADirectoryError(f"Please specify a directory ({str(data_root)} is not a directory)")

return data_root

else:
# Data root not specified

# Check if the data root already exists
for loc in _preferred_default_data_locations:
if (here := Path(loc, _DATA_DIR_NAME)).exists():
data_root = here
break

# If not, attempt to create it
if data_root is None:
here: Path | None = None
for loc in _preferred_default_data_locations:
here = Path(loc, _DATA_DIR_NAME)
try:
here.mkdir()
break
# If it fails for sensible reasons, no sweat, we'll fall through to the next option
except (FileNotFoundError, OSError):
# Parent didn't exist, not writeable, etc
pass
# Did we make it?
if here is not None and here.exists():
data_root = here
else:
raise FileNotFoundError("Failed to create data root directory")

# Data root location has been derived, rather than prespecified, so feed that back to the user to avoid a
# different location somehow being derived next time
print(f"Data root set at {str(data_root)}.")
print(f"Consider setting this as environmental variable {_DATA_PATH_ENVIRONMENT_VAR_NAME} to ensure it's reused"
f" next time.")
print(f"Hint: $> {_DATA_PATH_ENVIRONMENT_VAR_NAME}=\"{str(data_root)}\"")
return data_root


def _download_dataset(local_dataset):
print(f"Downloading dataset: {local_dataset.name}")
if local_dataset.remote_root is None:
raise ValueError("No remote root provided")
local_dataset.path.mkdir(exist_ok=True)
for filename in local_dataset.filenames:
remote = local_dataset.remote_root + "/" + filename
local = Path(local_dataset.path, filename)
if local.exists():
print(f"Local file already exists: {local}")
else:
print(f"Downloading {remote} to {local}")
request.urlretrieve(remote, local)


def get_dataset_kymata_mirror_q3_2023(download: bool = True, data_root: Optional[path_type] = None) -> SampleDataset:
name = "kymata_mirror_Q3_2023"

local_dataset = SampleDataset(
name=name,
path=Path(data_root_path(data_root=data_root), name),
filenames=[
"kymata_mirror_Q3_2023_expression_endtable.nkg",
],
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
)
if download:
local_dataset.download()
return local_dataset


def get_dataset_gm_loudness(download: bool = True, data_root: Optional[path_type] = None) -> SampleDataset:
name = "GMLoudness"

local_dataset = SampleDataset(
name=name,
path=Path(data_root_path(data_root=data_root), name),
filenames=[
"GMLoudness_lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat",
"GMLoudness_rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat",
],
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
)
if download:
local_dataset.download()
return local_dataset


def get_dataset_d_ins_loudness_tonop_chan1(download: bool = True, data_root: Optional[path_type] = None) -> SampleDataset:
name = "GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1"

local_dataset = SampleDataset(
name=name,
path=Path(data_root_path(data_root=data_root), name),
filenames=[
"GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__lh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat",
"GMloudness_tonotop_82dB_d_ins_loudness_tonop_chan1__rh_10242verts_-200-800ms_cuttoff1000_5perms_ttestpval.mat",
],
remote_root="https://kymata.org/assets_kymata_toolbox_tutorial_data/gridsearch-result-data/",
)
if download:
local_dataset.download()
return local_dataset


def delete_dataset(local_dataset: SampleDataset):
# Make sure it's not silent
print(f"Deleting dataset {local_dataset.name}")
# Only allow deletion if the specified url is within the data dir
assert data_root_path() in local_dataset.path.parents, f"Cannot delete dataset outside of data root directory"
if not local_dataset.path.exists():
# Nothing to delete
print(f"{str(local_dataset.path)} doesn't exist")
return

for file in local_dataset.filenames:
to_delete = Path(local_dataset.path, file)
print(f"Deleting file {str(to_delete)}")
remove(to_delete)
print(f"Deleting directory {str(local_dataset.path)}")
rmdir(local_dataset.path)
27 changes: 27 additions & 0 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from pathlib import Path

from kymata.datasets.sample import get_dataset_kymata_mirror_q3_2023, delete_dataset, get_dataset_gm_loudness


def test_download_and_delete_q3_2023_data_files():
dataset = get_dataset_kymata_mirror_q3_2023(download=False)
try:
dataset.download()
for filename in dataset.filenames:
assert Path(dataset.path, filename).exists()
finally:
delete_dataset(dataset)
for filename in dataset.filenames:
assert not Path(dataset.path, filename).exists()


def test_download_and_delete_gm_loudness3_data_files():
dataset = get_dataset_gm_loudness(download=False)
try:
dataset.download()
for filename in dataset.filenames:
assert Path(dataset.path, filename).exists()
finally:
delete_dataset(dataset)
for filename in dataset.filenames:
assert not Path(dataset.path, filename).exists()

0 comments on commit 644b7bd

Please sign in to comment.