Skip to content

Commit

Permalink
feat: support input database without patient extensions
Browse files Browse the repository at this point in the history
Features:
- The core__patient no longer requires the presence of race/ethnicity
  extensions to build (it now uses the deep-schema checking code)

Bug Fixes:
- If there are both detailed and ombCategory values for an extension,
  no longer report both - just prefer the ombCategory version.
  • Loading branch information
mikix committed Apr 23, 2024
1 parent c578855 commit 0d0b56e
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 53 deletions.
60 changes: 49 additions & 11 deletions cumulus_library/studies/core/builder_patient.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,52 @@
class PatientBuilder(BaseTableBuilder):
display_text = "Creating Patient tables..."

@staticmethod
def make_extension_query(
schema: str,
cursor: databases.DatabaseCursor,
parser: databases.DatabaseParser,
name: str,
url: str,
) -> str:
has_extensions = sql_utils.is_field_present(
schema=schema,
cursor=cursor,
parser=parser,
source_table="patient",
source_col="extension",
expected={
"extension": {
"url": {},
"valueCoding": [
"code",
"system",
],
},
"url": {},
},
)
if has_extensions:
config = sql_utils.ExtensionConfig(
source_table="patient",
source_id="id",
target_table=f"core__patient_ext_{name}",
target_col_prefix=name,
fhir_extension=url,
ext_systems=["ombCategory", "detailed"],
is_array=True,
)
return base_templates.get_extension_denormalize_query(config)
else:
return base_templates.get_ctas_empty_query(
schema_name=schema,
table_name=f"core__patient_ext_{name}",
table_cols=["id", "system", f"{name}_code", f"{name}_display"],
)

def prepare_queries(
self,
cursor: object,
cursor: databases.DatabaseCursor,
schema: str,
*args,
parser: databases.DatabaseParser = None,
Expand All @@ -41,18 +84,13 @@ def prepare_queries(
"fhirpath": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
},
]

for extension in extension_types:
config = sql_utils.ExtensionConfig(
source_table="patient",
source_id="id",
target_table=f"core__patient_ext_{extension['name']}",
target_col_prefix=extension["name"],
fhir_extension=extension["fhirpath"],
ext_systems=["ombCategory", "detailed", "text"],
is_array=True,
self.queries.append(
self.make_extension_query(
schema, cursor, parser, extension["name"], extension["fhirpath"]
)
)
self.queries.append(base_templates.get_extension_denormalize_query(config))

validated_schema = sql_utils.validate_schema(
cursor, schema, expected_table_cols, parser
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ CREATE TABLE {{ target_table }} AS (
{%- endif %}
ROW_NUMBER()
OVER (
PARTITION BY id, system
PARTITION BY id
ORDER BY priority ASC
) AS available_priority
FROM union_table
Expand Down
8 changes: 4 additions & 4 deletions cumulus_library/template_sql/sql_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def is_field_populated(
parser: databases.DatabaseParser,
source_table: str,
hierarchy: list[tuple],
expected: list | None = None,
expected: list | dict | None = None,
) -> bool:
"""Traverses a complex field and determines if it exists and has data
Expand All @@ -217,7 +217,7 @@ def is_field_populated(
If none, we assume it is a CodeableConcept.
:returns: a boolean indicating if valid data is present.
"""
if not _check_schema_if_exists(
if not is_field_present(
schema=schema,
cursor=cursor,
parser=parser,
Expand Down Expand Up @@ -256,14 +256,14 @@ def is_field_populated(
return True


def _check_schema_if_exists(
def is_field_present(
*,
schema: str,
cursor: databases.DatabaseCursor,
parser: databases.DatabaseParser,
source_table: str,
source_col: str,
expected: str | None = None,
expected: list | dict | None = None,
) -> bool:
"""Validation check for a column existing, and having the expected schema
Expand Down
20 changes: 0 additions & 20 deletions tests/core/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,6 @@ def test_core_medication_query(medication_datasources, contains, omits):
assert item not in query


# Patient schemas aren't fully pre-examined yet (we currently assume extensions exist).
# So we expect this to fail at the moment.
@pytest.mark.xfail
def test_core_empty_database(tmp_path):
"""Verify that we can still generate core tables with no data filled in"""
testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False)
Expand All @@ -193,23 +190,6 @@ def test_core_tiny_database(tmp_path):
assert {e[0] for e in encounters} == {"EncA"}


def test_core_multiple_patient_addresses(tmp_path):
"""Verify that a patient with multiple addresses resolves to a single entry"""
testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False)
testbed.add_patient("None")
testbed.add_patient(
"Multi",
address=[
{"city": "Boston"}, # null postal code - should not be picked up
{"postalCode": "12345"},
{"postalCode": "00000"},
],
)
con = testbed.build()
patients = con.sql("SELECT id, postalCode_3 FROM core__patient").fetchall()
assert {("None", "cumulus__none"), ("Multi", "123")} == set(patients)


def test_core_multiple_doc_encounters(tmp_path):
"""Verify that a DocRef with multiple encounters resolves to multiple entries"""
testbed = testbed_utils.LocalTestbed(tmp_path)
Expand Down
136 changes: 136 additions & 0 deletions tests/core/test_core_patient.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""Tests for core__patient"""

import pytest

from tests import testbed_utils


@pytest.mark.parametrize(
"addresses,expected",
[
(None, "cumulus__none"), # no address
([{"city": "Boston"}], "cumulus__none"), # partial, but useless address
( # multiple addresses
[
{"city": "Boston"}, # null postal code - should not be picked up
{"postalCode": "12345"},
{"postalCode": "00000"},
],
"123",
),
],
)
def test_core_patient_addresses(tmp_path, addresses, expected):
"""Verify that addresses are parsed out"""
testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False)
testbed.add_patient("A", address=addresses)
con = testbed.build()
codes = con.sql("SELECT postalCode_3 FROM core__patient").fetchall()
assert [(expected,)] == codes


@pytest.mark.parametrize(
"extensions,expected_ethnicity,expected_race",
[
(None, "unknown", "unknown"), # no extension
( # basic ombCategory
[
{
"url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
"extension": [
{
"url": "detailed", # ignored in favor of ombCategory
"valueCoding": {"display": "EthDetailed"},
},
{
"url": "ombCategory",
"valueCoding": {"display": "EthA"},
},
],
},
{
"url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
"extension": [
{
"url": "ombCategory",
"valueCoding": {"display": "RaceA"},
},
{
"url": "detailed", # ignored in favor of ombCategory
"valueCoding": {"display": "RaceDetailed"},
},
],
},
],
"etha",
"racea",
),
( # will use detailed if we must
[
{
"url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
"extension": [
{
"url": "detailed",
"valueCoding": {"display": "EthDetailed"},
}
],
},
{
"url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
"extension": [
{
"url": "detailed",
"valueCoding": {"display": "RaceDetailed"},
}
],
},
],
"ethdetailed",
"racedetailed",
),
( # multiples get joined
[
{
"url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
"extension": [
{
"url": "detailed",
"valueCoding": {"display": "EthB"},
},
{
"url": "detailed",
"valueCoding": {"display": "EthA"},
},
],
},
{
"url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race",
"extension": [
{
"url": "ombCategory",
"valueCoding": {"display": "RaceA"},
},
{
"url": "ombCategory",
"valueCoding": {"display": "RaceB"},
},
],
},
],
"etha; ethb",
"racea; raceb",
),
],
)
def test_core_patient_extensions(
tmp_path, extensions, expected_ethnicity, expected_race
):
"""Verify that we grab race & ethnicity correctly"""
testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False)
testbed.add_patient("A", extension=extensions)
con = testbed.build()
displays = con.sql(
"SELECT ethnicity_display, race_display FROM core__patient"
).fetchall()
assert [(expected_ethnicity, expected_race)] == displays
17 changes: 0 additions & 17 deletions tests/testbed_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,23 +163,6 @@ def add_patient(
"id": row_id,
"birthDate": birth_date,
"gender": gender,
# TODO: fix the core SQL to check for extensions in the schema
# before querying them. In the meantime, we can just ensure
# those fields exist, ready to be queried.
"extension": [
{
"url": "",
"extension": [
{
"url": "",
"valueCoding": {
"code": "",
"display": "",
},
}
],
}
],
**kwargs,
},
)
Expand Down

0 comments on commit 0d0b56e

Please sign in to comment.