diff --git a/nimare/io.py b/nimare/io.py index 9155f2f16..1976ce957 100644 --- a/nimare/io.py +++ b/nimare/io.py @@ -26,6 +26,49 @@ } +def _create_name(resource): + """Take study/analysis object and try to create dataframe friendly/readable name""" + return '_'.join(resource.name.split()) if resource.name else resource.id + + +def convert_nimads_to_dataset(studyset, annotation=None): + """Convert nimads studyset object to a dataset.""" + + def _analysis_to_dict(study, analysis, annotation=None): + result = { + "metadata": { + "authors": study.name, + "journal": study.publication, + "title": study.name, + "sample_sizes": [study.metadata.get("sample_size")], + }, + "coords": { + "space": analysis.points[0].space, + "x": [p.x for p in analysis.points], + "y": [p.y for p in analysis.points], + "z": [p.z for p in analysis.points], + }, + } + + if annotation: + notes = next(n for n in annotation.notes if n["analysis"] == analysis.id) + result["labels"] = notes["note"] + + return result + + def _study_to_dict(study, annotation=None): + return { + "metadata": { + "authors": study.authors, + "journal": study.publication, + "title": study.name, + }, + "contrasts": {_create_name(a): _analysis_to_dict(study, a, annotation) for a in study.analyses}, + } + + return Dataset({_create_name(s): _study_to_dict(s, annotation=annotation) for s in studyset.studies}) + + def convert_neurosynth_to_dict( coordinates_file, metadata_file, diff --git a/nimare/nimads.py b/nimare/nimads.py new file mode 100644 index 000000000..ec7070f75 --- /dev/null +++ b/nimare/nimads.py @@ -0,0 +1,318 @@ +"""NIMADS-related classes for NiMARE.""" +import json + + +class Studyset: + """A collection of studies for meta-analysis. + + This is the primary target for Estimators and Transformers in NiMARE. + + Attributes + ---------- + studies : list of Study objects + The Study objects comprising the Studyset. + """ + + def __init__(self, source, target_space=None, mask=None): + self.id = source["id"] + self.name = source["name"] or "" + self.studies = [Study(s) for s in source["studies"]] + self._annotations = [] + + def __repr__(self): + """My Simple representation.""" + return repr("Studyset: " + self.id) + + def __str__(self): + """Give useful information about the Studyset.""" + return f"Studyset: {self.name} :: studies: {len(self.studies)}" + + @property + def annotations(self): + """Return existing Annotations.""" + return self._annotations + + @annotations.setter + def annotations(self, annotation): + # some logic to compare ids + # ss_analysis_ids = set([a.id for s in self.studies for a in s.analyses]) + # annot_analysis_ids = set([n['analysis'] for n in annotation.notes]) + self._annotations.append(annotation) + + @annotations.deleter + def annotations(self, annotation_id=None): + if annotation_id: + self._annotations = [a for a in self._annotations if a.id != annotation_id] + else: + self._annotations = [] + + @classmethod + def from_nimads(cls, filename): + """Create a Studyset from a NIMADS JSON file.""" + with open(filename, "r+") as fn: + nimads = json.load(fn) + + return cls(nimads) + + def to_nimads(self, filename): + """Write the Studyset to a NIMADS JSON file.""" + ... + + def load(self, filename): + """Load a Studyset from a pickled file.""" + ... + + def save(self, filename): + """Write the Studyset to a pickled file.""" + ... + + def copy(self): + """Create a copy of the Studyset.""" + ... + + def slice(self, analyses): + """Create a new Studyset with only requested Analyses.""" + ... + + def merge(self, right): + """Merge a separate Studyset into the current one.""" + ... + + def update_image_path(self, new_path): + """Point to a new location for image files on the local filesystem.""" + ... + + def get_analyses_by_coordinates(self, xyz, r=None, n=None): + """Extract a list of Analyses with at least one Point near the requested coordinates.""" + ... + + def get_analyses_by_mask(self, img): + """Extract a list of Analyses with at least one Point in the specified mask.""" + ... + + def get_analyses_by_annotations(self): + """Extract a list of Analyses with a given label/annotation.""" + ... + + def get_analyses_by_texts(self): + """Extract a list of Analyses with a given text.""" + ... + + def get_analyses_by_images(self): + """Extract a list of Analyses with a given image.""" + ... + + def get_analyses_by_metadata(self): + """Extract a list of Analyses with a metadata field/value.""" + ... + + def get_points(self, analyses): + """Collect Points associated with specified Analyses.""" + ... + + def get_annotations(self, analyses): + """Collect Annotations associated with specified Analyses.""" + ... + + def get_texts(self, analyses): + """Collect texts associated with specified Analyses.""" + ... + + def get_images(self, analyses): + """Collect image files associated with specified Analyses.""" + ... + + def get_metadata(self, analyses): + """Collect metadata associated with specified Analyses.""" + ... + + +class Study: + """A collection of Analyses from the same paper. + + Attributes + ---------- + id : str + A unique identifier for the Study. + analyses : list of Analysis objects + The Analysis objects comprising the Study. + """ + + def __init__(self, source): + self.id = source["id"] + self.name = source["name"] or "" + self.authors = source["authors"] or "" + self.publication = source["publication"] or "" + self.metadata = source.get("metadata", {}) + self.analyses = [Analysis(a) for a in source["analyses"]] + + def __repr__(self): + """My Simple representation.""" + return repr(self.id) + + def __str__(self): + """My Simple representation.""" + return str(" ".join([self.name, f"analyses: {len(self.analyses)}"])) + + def get_analyses(self): + """Collect Analyses from the Study. + + Notes + ----- + What filters, if any, should we support in this method? + """ + ... + + +class Analysis: + """A single statistical analyses from a Study. + + Attributes + ---------- + id : str + A unique identifier for the Analysis. + conditions : list of Condition objects + The Conditions in the Analysis. + annotations : list of Annotation objects + Any Annotations available for the Analysis. + Each Annotation should come from the same Annotator. + texts : dict + A dictionary of source: text pairs. + images : dict of Image objects + A dictionary of type: Image pairs. + points : list of Point objects + Any significant Points from the Analysis. + + Notes + ----- + Should the images attribute be a list instead, if the Images contain type information? + + Should the conditions be linked to the annotations, images, and points at all? + """ + + def __init__(self, source): + self.id = source["id"] + self.name = source["name"] + self.conditions = [ + Condition(c, w) for c, w in zip(source["conditions"], source["weights"]) + ] + self.images = [Image(i) for i in source["images"]] + self.points = [Point(p) for p in source["points"]] + + def __repr__(self): + """My Simple representation.""" + return repr(self.id) + + def __str__(self): + """My Simple representation.""" + return str( + " ".join([self.name, f"images: {len(self.images)}", f"points: {len(self.points)}"]) + ) + + +class Condition: + """A condition within an Analysis. + + Attributes + ---------- + name + description + weight + + Notes + ----- + Condition-level Annotations, like condition-wise trial counts, are stored in the parent + Analysis's Annotations, preferably with names that make it clear that they correspond to a + specific Condition. + """ + + def __init__(self, condition, weight): + self.name = condition["name"] + self.description = condition["description"] + self.weight = weight + + +class Annotation: + """A collection of labels and associated weights from the same Annotator. + + Attributes + ---------- + term_weights : :obj:`pandas.DataFrame` + A pandas DataFrame containing the annotation group's labels and weights. + This is the main attribute of interest for NeuroStore. + A dictionary could also work. + + Notes + ----- + Where would p(term|topic) and p(voxel|topic) arrays/DataFrames go? Having one Annotation per + Analysis (for each Annotator), and storing these arrays in the Annotation, would make for + *a lot* of duplication. + The same goes for metadata/provenance, but that will generally be much lighter on memory than + the arrays. + + Could be a dictionary with analysis objects as keys? + (need to define __hash__ and __eq__ for Analysis) + Or could use Analysis.id as key. + """ + + def __init__(self, source): + self.name = source["name"] + self.id = source["id"] + self.notes = source["notes"] + + +class Note: + """A Note within an annotation. + + Attributes + ---------- + analysis : Analysis object + the analysis the note is associated with + note : dict + the attributes pertaining to the analysis + """ + + def __init__(self, analysis, note): + self.analysis = analysis + self.note = note + + +class Image: + """A single statistical map from an Analysis. + + Attributes + ---------- + filename + type? + + Notes + ----- + Should we support remote paths, with some kind of fetching method? + """ + + def __init__(self, source): + self.url = source["url"] + self.filename = source["filename"] + self.space = source["space"] + self.value_type = source["value_type"] + + +class Point: + """A single peak coordinate from an Analysis. + + Attributes + ---------- + x : float + y : float + z : float + space + kind + image + point_values + """ + + def __init__(self, source): + self.space = source["space"] + self.x = source["coordinates"][0] + self.y = source["coordinates"][1] + self.z = source["coordinates"][2] diff --git a/nimare/tests/test_io.py b/nimare/tests/test_io.py index c65be81ae..44bdaa327 100644 --- a/nimare/tests/test_io.py +++ b/nimare/tests/test_io.py @@ -2,6 +2,7 @@ import os import pytest +import requests import nimare from nimare import io @@ -9,6 +10,22 @@ from nimare.utils import get_template +def test_convert_nimads_to_dataset(): + """Conversion of nimads JSON to nimare dataset""" + nimads_data = requests.get( + "https://neurostore.xyz/api/studysets/78rWEjjjuC65?nested=true" + ).json() + studyset = nimare.nimads.Studyset(nimads_data) + + annotation_data = requests.get("https://neurostore.xyz/api/annotations/4aLPSznu6jJa").json() + + annotation = nimare.nimads.Annotation(annotation_data) + + dset = io.convert_nimads_to_dataset(studyset, annotation) + + assert isinstance(dset, nimare.dataset.Dataset) + + def test_convert_sleuth_to_dataset_smoke(): """Smoke test for Sleuth text file conversion.""" sleuth_file = os.path.join(get_test_data_path(), "test_sleuth_file.txt") diff --git a/nimare/tests/test_nimads.py b/nimare/tests/test_nimads.py new file mode 100644 index 000000000..d5d6f49aa --- /dev/null +++ b/nimare/tests/test_nimads.py @@ -0,0 +1,17 @@ +import requests + +from nimare import nimads + + +def test_load_nimads(): + nimads_data = requests.get( + "https://neurostore.xyz/api/studysets/78rWEjjjuC65?nested=true" + ).json() + studyset = nimads.Studyset(nimads_data) + assert isinstance(studyset, nimads.Studyset) + + annotation_data = requests.get("https://neurostore.xyz/api/annotations/4aLPSznu6jJa").json() + + annotation = nimads.Annotation(annotation_data) + + studyset.annotations = annotation diff --git a/nimare/tests/test_workflows.py b/nimare/tests/test_workflows.py index 98de585a6..c43c01d5d 100644 --- a/nimare/tests/test_workflows.py +++ b/nimare/tests/test_workflows.py @@ -5,6 +5,11 @@ from nimare.tests.utils import get_test_data_path +def test_neurosynth_compose_workflow(): + """Run test using neurosynth-compose.""" + workflows.compose_workflow("6YA5FqTPNJEq") + + def test_ale_workflow_function_smoke(tmp_path_factory): """Run smoke test for Sleuth ALE workflow.""" tmpdir = tmp_path_factory.mktemp("test_ale_workflow_function_smoke") diff --git a/nimare/workflows/__init__.py b/nimare/workflows/__init__.py index ecc0c2ab6..7ef8b348a 100644 --- a/nimare/workflows/__init__.py +++ b/nimare/workflows/__init__.py @@ -2,5 +2,6 @@ from .ale import ale_sleuth_workflow from .macm import macm_workflow +from .neurosynth_compose import compose_workflow -__all__ = ["ale_sleuth_workflow", "macm_workflow"] +__all__ = ["ale_sleuth_workflow", "macm_workflow", "compose_workflow"] diff --git a/nimare/workflows/neurosynth_compose.py b/nimare/workflows/neurosynth_compose.py new file mode 100644 index 000000000..ab0ea6211 --- /dev/null +++ b/nimare/workflows/neurosynth_compose.py @@ -0,0 +1,93 @@ +"""Perform neurosynth-compose meta-analyses.""" +from importlib import import_module + +import requests + +from ..io import convert_nimads_to_dataset +from ..nimads import Annotation, Studyset + +COMPOSE_URL = "https://compose.neurosynth.org" +STORE_URL = "https://neurostore.org" + + +def compose_workflow(meta_id): + """Run meta-analysis. + + Parameters + ---------- + meta_id: str + id corresponding to neurosynth + """ + data = requests.get(f"{COMPOSE_URL}/api/meta-analyses/{meta_id}?nested=true").json() + dset = load_meta_analysis(data["studyset"], data.get("annotation")) + workflow = load_specification(data["specification"]) + + return workflow(dset) + + +def load_meta_analysis(studyset, annotation=None): + """Download requisite data and load it into nimare.""" + if not studyset["snapshot"]: + ss = Studyset( + requests.get( + f"{STORE_URL}/api/studysets/{studyset['neurostore_id']}?nested=true" + ).json() + ) + else: + ss = Studyset(studyset["snapshot"]) + + if annotation: + if not annotation["snapshot"]: + annot = Annotation( + requests.get(f"{STORE_URL}/api/annotations/{annotation['neurostore_id']}").json() + ) + else: + annot = Annotation(annotation["snapshot"]) + else: + annot = None + + return convert_nimads_to_dataset(ss, annotation=annot) + + +def load_specification(spec): + """Returns function to run analysis on dataset.""" + est_mod = import_module(".".join(["nimare", "meta", spec["type"].lower()])) + estimator = getattr(est_mod, spec["estimator"]["type"]) + if spec["estimator"].get("args"): + if spec["estimator"]["args"].get("**kwargs") is not None: + for k, v in spec["estimator"]["args"]["**kwargs"].items(): + spec["estimator"]["args"][k] = v + spec["estimator"]["args"].pop("**kwargs") + estimator_init = estimator(**spec["estimator"]["args"]) + else: + estimator_init = estimator() + + if spec.get("corrector"): + cor_mod = import_module(".".join(["nimare", "correct"])) + corrector = getattr(cor_mod, spec["corrector"]["type"]) + corrector_args = spec["corrector"].get("args") + if corrector_args: + if corrector_args.get("**kwargs") is not None: + for k, v in corrector_args["**kwargs"].items(): + corrector_args[k] = v + corrector_args.pop("**kwargs") + corrector_init = corrector(**corrector_args) + else: + corrector_init = corrector() + else: + corrector_init = None + + if corrector_init: + return lambda dset: corrector_init.transform(estimator_init.fit(dset)) + else: + return lambda dset: estimator_init.fit(dset) + + +def filter_analyses(specification, annotation): + """Filter which analyses should be run in this meta-analysis.""" + column = specification["filter"] + keep_ids = [] + for annot in annotation["notes"]: + if annot["note"].get(column): + keep_ids.append(f"{annot['study']}-{annot['analysis']}") + return keep_ids