Skip to content

Commit

Permalink
UMLS API (#223)
Browse files Browse the repository at this point in the history
* UMLS API

* PR feedback

* changed expected files post download
  • Loading branch information
dogversioning authored Apr 25, 2024
1 parent b6a79fe commit ad9d082
Show file tree
Hide file tree
Showing 11 changed files with 496 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ data_export/
cumulus_library_columns.json
output.sql
*generated.md
MRCONSO.RRF
*.zip

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
Empty file.
148 changes: 148 additions & 0 deletions cumulus_library/apis/umls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Class for communicating with the umls API"""
import os
import pathlib

import requests

from cumulus_library import base_utils, errors

VALID_UMLS_DOWNLOADS = [
"rxnorm-full-monthly-release",
"rxnorm-weekly-updates",
"rxnorm-prescribable-content-monthly-release",
"rxnorm-prescribable-content-weekly-updates",
"rxnav-in-a-box",
"snomed-ct-us-edition",
"snomed-ct-us-edition-transitive-closure-resources",
"snomed-ct-international-edition",
"snomed-ct-core-problem-list-subset",
"snomed-ct-to-icd-10-cm-mapping-resources",
"snomed-ct-spanish-edition",
"umls-metathesaurus-full-subset",
"umls-metathesaurus-mrconso-file",
"umls-full-release",
]


class UmlsApi:
def __init__(self, api_key: str | None = None, validator_key: str | None = None):
"""Creates a requests session for future calls, and validates the API key
:keyword api_key: A UMLS API key (will check for an env var named UMLS_API_KEY
if None)
:keyword validator_key: A UMLS API key for the calling application. Will be
set to the value of api_key if None (which is the current expected
behavior, since we don't want to be distributing UMLS keys)
"""

if api_key is None:
api_key = os.environ.get("UMLS_API_KEY")
if api_key is None:
raise errors.ApiError("No UMLS API key provided")
self.api_key = api_key
self.validator_key = validator_key or api_key

auth_payload = {"validatorApiKey": self.validator_key, "apiKey": self.api_key}
self.session = requests.Session()
response = self.session.get(
"https://utslogin.nlm.nih.gov/validateUser", params=auth_payload
)
if response.status_code == 401:
raise errors.ApiError("Invalid UMLS API validator key")
if response.text != "true":
raise errors.ApiError("Invalid UMLS API key")
self.session.auth = requests.auth.HTTPBasicAuth("apikey", api_key)

def get_vsac_valuesets(
self, url: str | None = None, oid: str | None = None
) -> list[dict]:
"""Gets a valueset, and any nested valuesets, from the VSAC API
:keyword url: an URL to target for a valueset (typically expected)
:keyword oid: A valuset OID
:returns: A list, containing the valueset and any referenced
valuesets.
Documentation on this API is available at
https://www.nlm.nih.gov/vsac/support/usingvsac/vsacfhirapi.html
TODO: do we need to support the FHIR operators?
TODO: do we need to support the v2 API?
https://www.nlm.nih.gov/vsac/support/usingvsac/vsacsvsapiv2.html
"""
if url is None:
url = "https://cts.nlm.nih.gov/fhir/res/ValueSet"
if oid:
url = f"{url}/{oid}"

# If we're inspecting url references in a VSAC response, they come back
# specifying a url that does not align with the actual implemented rest
# APIs, so we do some massaging
if "http:" in url:
url = url.replace("http:", "https:")
if "/res/" not in url:
url = url.replace("/fhir/", "/fhir/res/")
response = self.session.get(url)
if response.status_code == 404:
raise errors.ApiError(f"Url not found: {url}")
all_responses = [response.json()]
included_records = all_responses[0].get("compose", {}).get("include", [])
for record in included_records:
if "valueSet" in record:
valueset = self.get_vsac_valuesets(url=record["valueSet"][0])
all_responses.append(valueset[0])
return all_responses

def download_umls_files(
self,
target: str = "umls-metathesaurus-mrconso-file",
path: pathlib.Path | None = None,
):
"""Downloads an available file from the UMLS Download API and unzips it
target: the UMLS resource to download (default: the MRCONSO.RRF file)
path: the path on disk to write to
See https://documentation.uts.nlm.nih.gov/automating-downloads.html for more
info about the available downloads
"""
if target not in VALID_UMLS_DOWNLOADS:
raise errors.ApiError(
f"'{target}' is not a valid umls download type.\n\n"
f"Expected values: {','.join(VALID_UMLS_DOWNLOADS)}"
)
if path is None:
path = pathlib.Path.cwd()
release_payload = {"releaseType": target, "current": "true"}
file_meta = self.session.get(
"https://uts-ws.nlm.nih.gov/releases", params=release_payload
).json()[0]

# This particular endpoint requires the API key as a param rather than a
# basic auth header ¯\_(ツ)_/¯.
download_payload = {
"url": file_meta["downloadUrl"],
"apiKey": self.api_key,
}
download_res = requests.get(
"https://uts-ws.nlm.nih.gov/download", params=download_payload, stream=True
)

with open(path / file_meta["fileName"], "wb") as f:
chunks_read = 0
with base_utils.get_progress_bar() as progress:
task = progress.add_task(
f"Downloading {file_meta['fileName']}", total=None
)
for chunk in download_res.iter_content(chunk_size=1024):
f.write(chunk)
chunks_read += 1
progress.update(
task,
description=(
f"Downloading {file_meta['fileName']}: "
f"{chunks_read/1000} MB"
),
)
base_utils.unzip_file(path / file_meta["fileName"], path)
(path / file_meta["fileName"]).unlink()
8 changes: 8 additions & 0 deletions cumulus_library/base_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import datetime
import json
import os
import pathlib
import shutil
import zipfile
from contextlib import contextmanager
Expand Down Expand Up @@ -99,3 +100,10 @@ def zip_dir(read_path, write_path, archive_name):
f.write(file, file.relative_to(read_path))
file.unlink()
shutil.rmtree(read_path)


def unzip_file(file_path: pathlib.Path, write_path: pathlib.Path):
"""Expands a zip archive"""
with zipfile.ZipFile(file_path, mode="r") as z:
for file in z.namelist():
z.extract(file, write_path)
4 changes: 4 additions & 0 deletions cumulus_library/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,7 @@ class StudyManifestParsingError(Exception):

class StudyManifestQueryError(Exception):
"""Errors related to data queries from StudyManifestParser"""


class ApiError(Exception):
"""Errors from external API calls"""
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dev = [
test = [
"freezegun",
"pytest",
"requests-mock",
"responses"
]

[project.urls]
Expand Down
23 changes: 12 additions & 11 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from unittest import mock

import pytest
import requests_mock
import responses
import toml

from cumulus_library import cli, errors
Expand Down Expand Up @@ -522,22 +522,23 @@ def test_cli_stats_rebuild(tmp_path):
),
],
)
@responses.activate
def test_cli_upload_studies(mock_glob, args, status, login_error, raises):
mock_glob.side_effect = [
[Path(__file__)],
[Path(str(Path(__file__).parent) + "/test_data/count_synthea_patient.parquet")],
]
with raises:
with requests_mock.Mocker() as r:
if login_error:
r.post("https://upload.url.test/upload/", status_code=401)
else:
r.post(
"https://upload.url.test/upload/",
json={"url": "https://presigned.url.test", "fields": {"a": "b"}},
)
r.post("https://presigned.url.test", status_code=status)
cli.main(cli_args=[*args, "--url", "https://upload.url.test/upload/"])
if login_error:
responses.add(responses.POST, "https://upload.url.test/upload/", status=401)
else:
responses.add(
responses.POST,
"https://upload.url.test/upload/",
json={"url": "https://presigned.url.test", "fields": {"a": "b"}},
)
responses.add(responses.POST, "https://presigned.url.test", status=status)
cli.main(cli_args=[*args, "--url", "https://upload.url.test/upload/"])


@pytest.mark.parametrize(
Expand Down
62 changes: 62 additions & 0 deletions tests/test_data/apis/umls/include_valueset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"--comment--": "this file is an example of a VSAC valueset respsonse which includes references to other value sets. It is part of the public COVID data set, and is otherwise unmodified except for this comment, and changing the included valueset references to point at the single_valueset.json",
"resourceType": "ValueSet",
"id": "2.16.840.1.113883.3.3616.200.110.102.7001",
"meta": {
"versionId": "17",
"lastUpdated": "2023-12-21T17:43:03.000-05:00",
"profile": [
"http://hl7.org/fhir/StructureDefinition/shareablevalueset",
"http://hl7.org/fhir/us/cqfmeasures/StructureDefinition/computable-valueset-cqfm",
"http://hl7.org/fhir/us/cqfmeasures/StructureDefinition/publishable-valueset-cqfm"
]
},
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/valueset-author",
"valueString": "Clinical Architecture"
},
{
"url": "http://hl7.org/fhir/StructureDefinition/resource-lastReviewDate",
"valueDate": "2023-06-21"
},
{
"url": "http://hl7.org/fhir/StructureDefinition/valueset-effectiveDate",
"valueDate": "2022-12-13"
}
],
"url": "http://cts.nlm.nih.gov/fhir/ValueSet/2.16.840.1.113883.3.3616.200.110.102.7001",
"identifier": [
{
"system": "urn:ietf:rfc:3986",
"value": "urn:oid:2.16.840.1.113883.3.3616.200.110.102.7001"
}
],
"version": "20221213",
"name": "COVID19ICD10CMValueSets",
"title": "COVID19 ICD10CM Value Sets",
"status": "retired",
"date": "2023-06-21T18:16:37-04:00",
"publisher": "Clinical Architecture",
"description": "This is a collection of value sets for signs, symptoms, diagnoses, administrative data, and other concepts related to COVID-19. These value sets are curated by Clinical Architecture on behalf of CareEvolution, MITRE, SNOMED, and other COVID-19 Interoperability Alliance (https://covid19ia.org) collaborators.",
"jurisdiction": [
{
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason",
"valueCode": "unknown"
}
]
}
],
"purpose": "(Clinical Focus: ),(Data Element Scope: ),(Inclusion Criteria: ),(Exclusion Criteria: )",
"compose": {
"include": [
{
"valueSet": [
"http://cts.nlm.nih.gov/fhir/ValueSet/2.16.840.1.113883.3.3616.200.110.102.3186"
]
}
]
}
}
86 changes: 86 additions & 0 deletions tests/test_data/apis/umls/single_valueset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{
"--comment--": "this file is an example of a VSAC valueset respsonse. It is part of the public COVID data set, and is otherwise unmodified except for this comment",
"resourceType": "ValueSet",
"id": "2.16.840.1.113883.3.3616.200.110.102.3186",
"meta": {
"versionId": "5",
"lastUpdated": "2023-12-21T17:43:03.000-05:00",
"profile": [
"http://hl7.org/fhir/StructureDefinition/shareablevalueset",
"http://hl7.org/fhir/us/cqfmeasures/StructureDefinition/computable-valueset-cqfm",
"http://hl7.org/fhir/us/cqfmeasures/StructureDefinition/publishable-valueset-cqfm"
]
},
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/valueset-author",
"valueString": "Clinical Architecture"
},
{
"url": "http://hl7.org/fhir/StructureDefinition/resource-lastReviewDate",
"valueDate": "2023-06-21"
},
{
"url": "http://hl7.org/fhir/StructureDefinition/valueset-effectiveDate",
"valueDate": "2020-07-14"
}
],
"url": "http://cts.nlm.nih.gov/fhir/ValueSet/2.16.840.1.113883.3.3616.200.110.102.3186",
"identifier": [
{
"system": "urn:ietf:rfc:3986",
"value": "urn:oid:2.16.840.1.113883.3.3616.200.110.102.3186"
}
],
"version": "20200714",
"name": "COVID19ICD10CMValueSetForAorticValveStenosis",
"title": "COVID19 ICD10CM Value Set for Aortic Valve Stenosis",
"status": "retired",
"experimental": false,
"date": "2023-06-21T18:16:37-04:00",
"publisher": "Clinical Architecture",
"jurisdiction": [
{
"extension": [
{
"url": "http://hl7.org/fhir/StructureDefinition/data-absent-reason",
"valueCode": "unknown"
}
]
}
],
"purpose": "(Clinical Focus: This set of values contains terms related to aortic valve stenosis.),(Data Element Scope: ),(Inclusion Criteria: Includes aortic valve stenosis),(Exclusion Criteria: )",
"compose": {
"include": [
{
"system": "http://hl7.org/fhir/sid/icd-10-cm",
"concept": [
{
"code": "I06.0",
"display": "Rheumatic aortic stenosis"
},
{
"code": "I06.2",
"display": "Rheumatic aortic stenosis with insufficiency"
},
{
"code": "I35.0",
"display": "Nonrheumatic aortic (valve) stenosis"
},
{
"code": "I35.2",
"display": "Nonrheumatic aortic (valve) stenosis with insufficiency"
},
{
"code": "Q23.0",
"display": "Congenital stenosis of aortic valve"
},
{
"code": "Q25.3",
"display": "Supravalvular aortic stenosis"
}
]
}
]
}
}
Binary file added tests/test_data/apis/umls/umls.zip
Binary file not shown.
Loading

0 comments on commit ad9d082

Please sign in to comment.