Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nimads module with implementations of NIMADS objects #628

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions nimare/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,43 @@
}


def convert_nimads_to_dataset(studyset, annotation=None):
"""Convert nimads studyset object to a dataset."""

def _analysis_to_dict(study, analysis, annotation=None):
result = {
"metadata": {
"authors": study.name,
"journal": study.publication,
"title": study.name,
},
"coords": {
"space": analysis.points[0].space,
"x": [p.x for p in analysis.points],
"y": [p.y for p in analysis.points],
"z": [p.z for p in analysis.points],
},
}

if annotation:
notes = next(n for n in annotation.notes if n["analysis"] == analysis.id)
result["labels"] = notes["note"]

return result

def _study_to_dict(study, annotation=None):
return {
"metadata": {
"authors": study.authors,
"journal": study.publication,
"title": study.name,
},
"contrasts": {a.id: _analysis_to_dict(study, a, annotation) for a in study.analyses},
}

return Dataset({s.id: _study_to_dict(s, annotation=annotation) for s in studyset.studies})


def convert_neurosynth_to_dict(
coordinates_file,
metadata_file,
Expand Down
301 changes: 301 additions & 0 deletions nimare/nimads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
"""NIMADS-related classes for NiMARE."""
import json


class Studyset:
"""A collection of studies for meta-analysis.

This is the primary target for Estimators and Transformers in NiMARE.

Attributes
----------
studies : list of Study objects
The Study objects comprising the Studyset.
"""

def __init__(self, source, target_space=None, mask=None):
self.id = source["id"]
self.name = source["name"] or ""
self.studies = [Study(s) for s in source["studies"]]
self._annotations = []

def __repr__(self):
"""My Simple representation."""
return repr("Studyset: " + self.id)

def __str__(self):
"""Give useful information about the Studyset."""
return str(" ".join(["Studyset:", self.name, "::", f"studies: {len(self.studies)}"]))
jdkent marked this conversation as resolved.
Show resolved Hide resolved

@property
def annotations(self):
"""Return existing Annotations."""
return self._annotations

@annotations.setter
def annotations(self, annotation):
# some logic to compare ids
# ss_analysis_ids = set([a.id for s in self.studies for a in s.analyses])
# annot_analysis_ids = set([n['analysis'] for n in annotation.notes])
self._annotations.append(annotation)

@annotations.deleter
def annotations(self, annotation_id=None):
if annotation_id:
self._annotations = [a for a in self._annotations if a.id != annotation_id]
else:
self._annotations = []

@classmethod
def from_nimads(cls, filename):
"""Create a Studyset from a NIMADS JSON file."""
with open(filename, "r+") as fn:
nimads = json.load(fn)

return cls(nimads)

def to_nimads(self, filename):
"""Write the Studyset to a NIMADS JSON file."""
...

def load(self, filename):
"""Load a Studyset from a pickled file."""
...

def save(self, filename):
"""Write the Studyset to a pickled file."""
...

def copy(self):
"""Create a copy of the Studyset."""
...

def slice(self, analyses):
"""Create a new Studyset with only requested Analyses."""
...

def merge(self, right):
"""Merge a separate Studyset into the current one."""
...

def update_image_path(self, new_path):
"""Point to a new location for image files on the local filesystem."""
...

def get_analyses_by_coordinates(self, xyz, r=None, n=None):
Copy link
Member Author

@tsalo tsalo Apr 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The get* methods can be removed, since they'll be in DatasetSearcher in the future (post #677).

"""Extract a list of Analyses with at least one Point near the requested coordinates."""
...

def get_analyses_by_mask(self, img):
"""Extract a list of Analyses with at least one Point in the specified mask."""
...

def get_analyses_by_annotations(self):
"""Extract a list of Analyses with a given label/annotation."""
...

def get_analyses_by_texts(self):
"""Extract a list of Analyses with a given text."""
...

def get_analyses_by_images(self):
"""Extract a list of Analyses with a given image."""
...

def get_analyses_by_metadata(self):
"""Extract a list of Analyses with a metadata field/value."""
...

def get_points(self, analyses):
"""Collect Points associated with specified Analyses."""
...

def get_annotations(self, analyses):
"""Collect Annotations associated with specified Analyses."""
...

def get_texts(self, analyses):
"""Collect texts associated with specified Analyses."""
...

def get_images(self, analyses):
"""Collect image files associated with specified Analyses."""
...

def get_metadata(self, analyses):
"""Collect metadata associated with specified Analyses."""
...


class Study:
"""A collection of Analyses from the same paper.

Attributes
----------
id : str
A unique identifier for the Study.
analyses : list of Analysis objects
The Analysis objects comprising the Study.
"""

def __init__(self, source):
self.id = source["id"]
self.name = source["name"] or ""
self.authors = source["authors"] or ""
self.publication = source["publication"] or ""
self.analyses = [Analysis(a) for a in source["analyses"]]

def __repr__(self):
"""My Simple representation."""
return repr(self.id)

def __str__(self):
"""My Simple representation."""
return str(" ".join([self.name, f"analyses: {len(self.analyses)}"]))

def get_analyses(self):
"""Collect Analyses from the Study.

Notes
-----
What filters, if any, should we support in this method?
"""
...


class Analysis:
"""A single statistical analyses from a Study.

Attributes
----------
id : str
A unique identifier for the Analysis.
conditions : list of Condition objects
The Conditions in the Analysis.
annotations : list of Annotation objects
Any Annotations available for the Analysis.
Each Annotation should come from the same Annotator.
texts : dict
A dictionary of source: text pairs.
images : dict of Image objects
A dictionary of type: Image pairs.
points : list of Point objects
Any significant Points from the Analysis.

Notes
-----
Should the images attribute be a list instead, if the Images contain type information?

Should the conditions be linked to the annotations, images, and points at all?
"""

def __init__(self, source):
self.id = source["id"]
self.name = source["name"]
self.conditions = [
Condition(c, w) for c, w in zip(source["conditions"], source["weights"])
]
self.images = [Image(i) for i in source["images"]]
self.points = [Point(p) for p in source["points"]]

def __repr__(self):
"""My Simple representation."""
return repr(self.id)

def __str__(self):
"""My Simple representation."""
return str(
" ".join([self.name, f"images: {len(self.images)}", f"points: {len(self.points)}"])
)


class Condition:
"""A condition within an Analysis.

Attributes
----------
name
description
tsalo marked this conversation as resolved.
Show resolved Hide resolved
weight

Notes
-----
Condition-level Annotations, like condition-wise trial counts, are stored in the parent
Analysis's Annotations, preferably with names that make it clear that they correspond to a
specific Condition.
"""

def __init__(self, condition, weight):
self.name = condition["name"]
self.description = condition["description"]
self.weight = weight


class Annotation:
"""A collection of labels and associated weights from the same Annotator.

Attributes
----------
term_weights : :obj:`pandas.DataFrame`
A pandas DataFrame containing the annotation group's labels and weights.
This is the main attribute of interest for NeuroStore.
A dictionary could also work.

Notes
-----
Where would p(term|topic) and p(voxel|topic) arrays/DataFrames go? Having one Annotation per
Analysis (for each Annotator), and storing these arrays in the Annotation, would make for
*a lot* of duplication.
The same goes for metadata/provenance, but that will generally be much lighter on memory than
the arrays.

Could be a dictionary with analysis objects as keys?
(need to define __hash__ and __eq__ for Analysis)
Or could use Analysis.id as key.
"""

def __init__(self, source):
self.name = source["name"]
self.id = source["id"]
self.notes = source["notes"]


class Image:
"""A single statistical map from an Analysis.

Attributes
----------
filename
type?

Notes
-----
Should we support remote paths, with some kind of fetching method?
"""

def __init__(self, source):
self.url = source["url"]
self.filename = source["filename"]
self.space = source["space"]
self.value_type = source["value_type"]


class Point:
"""A single peak coordinate from an Analysis.

Attributes
----------
x : float
y : float
z : float
space
kind
image
point_values
"""

def __init__(self, source):
self.space = source["space"]
self.x = source["coordinates"][0]
self.y = source["coordinates"][1]
self.z = source["coordinates"][2]
17 changes: 17 additions & 0 deletions nimare/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,30 @@
import os

import pytest
import requests

import nimare
from nimare import io
from nimare.tests.utils import get_test_data_path
from nimare.utils import get_template


def test_convert_nimads_to_dataset():
"""Conversion of nimads JSON to nimare dataset"""
nimads_data = requests.get(
"https://neurostore.xyz/api/studysets/78rWEjjjuC65?nested=true"
).json()
studyset = nimare.nimads.Studyset(nimads_data)

annotation_data = requests.get("https://neurostore.xyz/api/annotations/4aLPSznu6jJa").json()

annotation = nimare.nimads.Annotation(annotation_data)

dset = io.convert_nimads_to_dataset(studyset, annotation)

assert isinstance(dset, nimare.dataset.Dataset)


def test_convert_sleuth_to_dataset_smoke():
"""Smoke test for Sleuth text file conversion."""
sleuth_file = os.path.join(get_test_data_path(), "test_sleuth_file.txt")
Expand Down
17 changes: 17 additions & 0 deletions nimare/tests/test_nimads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import requests

from nimare import nimads


def test_load_nimads():
nimads_data = requests.get(
"https://neurostore.xyz/api/studysets/78rWEjjjuC65?nested=true"
).json()
studyset = nimads.Studyset(nimads_data)
assert isinstance(studyset, nimads.Studyset)

annotation_data = requests.get("https://neurostore.xyz/api/annotations/4aLPSznu6jJa").json()

annotation = nimads.Annotation(annotation_data)

studyset.annotations = annotation
Loading