diff --git a/cumulus_library/apis/umls.py b/cumulus_library/apis/umls.py index b6592b6e..74f778ec 100644 --- a/cumulus_library/apis/umls.py +++ b/cumulus_library/apis/umls.py @@ -57,7 +57,11 @@ def __init__(self, api_key: str | None = None, validator_key: str | None = None) self.session.auth = requests.auth.HTTPBasicAuth("apikey", api_key) def get_vsac_valuesets( - self, url: str | None = None, oid: str | None = None, action: str = "expansion" + self, + url: str | None = None, + oid: str | None = None, + action: str = "expansion", + offset: int = 0, ) -> list[dict]: """Gets a valueset, and any nested valuesets, from the VSAC API @@ -82,7 +86,7 @@ def get_vsac_valuesets( if oid: url = f"{url}/{oid}" # TODO: Do we need to support the remaining FHIR operators? - if action == "expansion": + if action == "expansion" and "/$expand" not in url: url = url + "/$expand" # If we're inspecting url references in a VSAC response, they come back # specifying a url that does not align with the actual implemented rest @@ -91,16 +95,32 @@ def get_vsac_valuesets( url = url.replace("http:", "https:") if "/res/" not in url: url = url.replace("/fhir/", "/fhir/res/") - response = self.session.get(url) + match action: + case "definition": + response = self.session.get(url) + case "expansion": + response = self.session.get(url + f"?offset={offset}") if response.status_code == 404: raise errors.ApiError(f"Url not found: {url}") all_responses = [response.json()] + if action == "expansion": + # Do we need to fetch the next page? + # Note: the VSAC API hard codes the page size to 1000 + # and ignores the count param in the FHIR ValueSet expand API + # (https://www.hl7.org/fhir/ValueSet-operation-expand.html), + # so we just hardcode an expansion of 1000 and hope it + # doesn't change + if all_responses[0]["expansion"]["total"] > offset + 1000: + all_responses += self.get_vsac_valuesets( + action=action, url=url, offset=offset + 1000 + ) if action == "definition": included_records = all_responses[0].get("compose", {}).get("include", []) for record in included_records: if "valueSet" in record: valueset = self.get_vsac_valuesets(action=action, url=record["valueSet"][0]) all_responses.append(valueset[0]) + return all_responses def get_latest_umls_file_release(self, target: str): diff --git a/tests/test_data/apis/umls/expansion_valueset.json b/tests/test_data/apis/umls/expansion_valueset.json index f17321c6..d6731e33 100644 --- a/tests/test_data/apis/umls/expansion_valueset.json +++ b/tests/test_data/apis/umls/expansion_valueset.json @@ -25,7 +25,7 @@ "expansion": { "identifier": "urn:uuid:a0298877-7908-4db0-a19f-449c857b3c44", "timestamp": "2024-07-25T14:55:42-04:00", - "total": 836, + "total": 1001, "offset": 0, "parameter": [ { "name": "count", diff --git a/tests/test_umls_api.py b/tests/test_umls_api.py index e458d148..99c8a30f 100644 --- a/tests/test_umls_api.py +++ b/tests/test_umls_api.py @@ -116,7 +116,7 @@ def get_valueset_data(file_name): "expansion", VALUESET_URL + "/" + EXPANSION_VALUESET_OID, None, - [EXPANSION_VALUESET_OID], + [EXPANSION_VALUESET_OID, EXPANSION_VALUESET_OID], does_not_raise(), ), ("definition", None, None, [], pytest.raises(errors.ApiError)), @@ -142,7 +142,13 @@ def test_get_valueset(action, url, oid, expected_oids, raises): ) responses.add( responses.GET, - VALUESET_URL + "/" + EXPANSION_VALUESET_OID + "/$expand", + VALUESET_URL + "/" + EXPANSION_VALUESET_OID + "/$expand?offset=0", + body=get_valueset_data("expansion_valueset.json"), + status=200, + ) + responses.add( + responses.GET, + VALUESET_URL + "/" + EXPANSION_VALUESET_OID + "/$expand?offset=1000", body=get_valueset_data("expansion_valueset.json"), status=200, )