diff --git a/dandi/dandiapi.py b/dandi/dandiapi.py index 63712dfae..1e9fb2df7 100644 --- a/dandi/dandiapi.py +++ b/dandi/dandiapi.py @@ -63,6 +63,11 @@ T = TypeVar("T") +DATA_STANDARD_MAP = dict( + NWB="RRID:SCR_015242", + BIDS="RRID:SCR_016124", +) + class AssetType(Enum): """ @@ -1223,6 +1228,35 @@ def iter_upload_raw_asset( self, metadata=asset_metadata, jobs=jobs, replacing=replace_asset ) + def has_data_standard(self, data_standard: str) -> bool: + """ + Returns True if the Dandiset contains one or more files of the indicated + standard. Otherwise, returns False. This is determined by checking for + the RRID of the standard in the "dataStandard" field of the assetsSummary of + the dandiset. + + :param data_standard: can be "NWB", "BIDS", or an RRID of a standard. + :type data_standard: str + """ + if data_standard in DATA_STANDARD_MAP: + rrid = DATA_STANDARD_MAP[data_standard] + elif data_standard.startswith("RRID:"): + rrid = data_standard + else: + raise ValueError( + "'data_standard' must be an RRID (of form 'RRID:XXX_NNNNNNN`) or one " + f"of the following values: {', '.join(DATA_STANDARD_MAP.keys())}" + ) + assets_summary = self.get_raw_metadata().get("assetsSummary") + if assets_summary is None: + warnings.warn( + f"The raw metadata of RemoteDandiset {self.identifier} does not contain 'assetsSummary'. " + f"Assuming that it does not contain {data_standard}.") + return False + if "dataStandard" not in assets_summary: + return False + return any(x["identifier"] == rrid for x in assets_summary["dataStandard"]) + class BaseRemoteAsset(ABC, APIBase): """ diff --git a/dandi/tests/test_dandiapi.py b/dandi/tests/test_dandiapi.py index 09717d7b9..8d1c140b2 100644 --- a/dandi/tests/test_dandiapi.py +++ b/dandi/tests/test_dandiapi.py @@ -701,3 +701,24 @@ def test_rename_type_mismatch(text_dandiset: SampleDandiset, dest: str) -> None: assert asset1a.get_raw_metadata()["path"] == "file.txt" with pytest.raises(NotFoundError): text_dandiset.dandiset.get_asset_by_path(dest) + + +def test_dandiset_has_data_standard(): + with DandiAPIClient() as client: + dandiset = client.get_dandiset("000003", version_id="0.210812.1448") + assert dandiset.has_data_standard("NWB") + assert dandiset.has_data_standard("RRID:SCR_015242") + assert not dandiset.has_data_standard("RRID:XXX_000000") + assert not dandiset.has_data_standard("BIDS") + + +def test_dandiset_has_data_standard_incorrect_arg(): + with DandiAPIClient() as client: + dandiset = client.get_dandiset("000003", version_id="0.210812.1448") + with pytest.raises(ValueError) as exc_info: + dandiset.has_data_standard("NWC") + assert ( + str(exc_info.value) + == "'data_standard' must be an RRID (of form 'RRID:XXX_NNNNNNN`) or one of the " + "following values: NWB, BIDS" + )