From 0d0b56ed0f2b4446a212a3ee84f73725bd1ad598 Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Tue, 23 Apr 2024 15:44:43 -0400 Subject: [PATCH] feat: support input database without patient extensions Features: - The core__patient no longer requires the presence of race/ethnicity extensions to build (it now uses the deep-schema checking code) Bug Fixes: - If there are both detailed and ombCategory values for an extension, no longer report both - just prefer the ombCategory version. --- .../studies/core/builder_patient.py | 60 ++++++-- .../extension_denormalize.sql.jinja | 2 +- cumulus_library/template_sql/sql_utils.py | 8 +- tests/core/test_core.py | 20 --- tests/core/test_core_patient.py | 136 ++++++++++++++++++ tests/testbed_utils.py | 17 --- 6 files changed, 190 insertions(+), 53 deletions(-) create mode 100644 tests/core/test_core_patient.py diff --git a/cumulus_library/studies/core/builder_patient.py b/cumulus_library/studies/core/builder_patient.py index 253515df..0b0e28d6 100644 --- a/cumulus_library/studies/core/builder_patient.py +++ b/cumulus_library/studies/core/builder_patient.py @@ -18,9 +18,52 @@ class PatientBuilder(BaseTableBuilder): display_text = "Creating Patient tables..." + @staticmethod + def make_extension_query( + schema: str, + cursor: databases.DatabaseCursor, + parser: databases.DatabaseParser, + name: str, + url: str, + ) -> str: + has_extensions = sql_utils.is_field_present( + schema=schema, + cursor=cursor, + parser=parser, + source_table="patient", + source_col="extension", + expected={ + "extension": { + "url": {}, + "valueCoding": [ + "code", + "system", + ], + }, + "url": {}, + }, + ) + if has_extensions: + config = sql_utils.ExtensionConfig( + source_table="patient", + source_id="id", + target_table=f"core__patient_ext_{name}", + target_col_prefix=name, + fhir_extension=url, + ext_systems=["ombCategory", "detailed"], + is_array=True, + ) + return base_templates.get_extension_denormalize_query(config) + else: + return base_templates.get_ctas_empty_query( + schema_name=schema, + table_name=f"core__patient_ext_{name}", + table_cols=["id", "system", f"{name}_code", f"{name}_display"], + ) + def prepare_queries( self, - cursor: object, + cursor: databases.DatabaseCursor, schema: str, *args, parser: databases.DatabaseParser = None, @@ -41,18 +84,13 @@ def prepare_queries( "fhirpath": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", }, ] - for extension in extension_types: - config = sql_utils.ExtensionConfig( - source_table="patient", - source_id="id", - target_table=f"core__patient_ext_{extension['name']}", - target_col_prefix=extension["name"], - fhir_extension=extension["fhirpath"], - ext_systems=["ombCategory", "detailed", "text"], - is_array=True, + self.queries.append( + self.make_extension_query( + schema, cursor, parser, extension["name"], extension["fhirpath"] + ) ) - self.queries.append(base_templates.get_extension_denormalize_query(config)) + validated_schema = sql_utils.validate_schema( cursor, schema, expected_table_cols, parser ) diff --git a/cumulus_library/template_sql/extension_denormalize.sql.jinja b/cumulus_library/template_sql/extension_denormalize.sql.jinja index 6ed53ec6..f664fd16 100644 --- a/cumulus_library/template_sql/extension_denormalize.sql.jinja +++ b/cumulus_library/template_sql/extension_denormalize.sql.jinja @@ -72,7 +72,7 @@ CREATE TABLE {{ target_table }} AS ( {%- endif %} ROW_NUMBER() OVER ( - PARTITION BY id, system + PARTITION BY id ORDER BY priority ASC ) AS available_priority FROM union_table diff --git a/cumulus_library/template_sql/sql_utils.py b/cumulus_library/template_sql/sql_utils.py index 7493f8a1..4e4bf787 100644 --- a/cumulus_library/template_sql/sql_utils.py +++ b/cumulus_library/template_sql/sql_utils.py @@ -203,7 +203,7 @@ def is_field_populated( parser: databases.DatabaseParser, source_table: str, hierarchy: list[tuple], - expected: list | None = None, + expected: list | dict | None = None, ) -> bool: """Traverses a complex field and determines if it exists and has data @@ -217,7 +217,7 @@ def is_field_populated( If none, we assume it is a CodeableConcept. :returns: a boolean indicating if valid data is present. """ - if not _check_schema_if_exists( + if not is_field_present( schema=schema, cursor=cursor, parser=parser, @@ -256,14 +256,14 @@ def is_field_populated( return True -def _check_schema_if_exists( +def is_field_present( *, schema: str, cursor: databases.DatabaseCursor, parser: databases.DatabaseParser, source_table: str, source_col: str, - expected: str | None = None, + expected: list | dict | None = None, ) -> bool: """Validation check for a column existing, and having the expected schema diff --git a/tests/core/test_core.py b/tests/core/test_core.py index dc9035c3..8a2beeae 100644 --- a/tests/core/test_core.py +++ b/tests/core/test_core.py @@ -169,9 +169,6 @@ def test_core_medication_query(medication_datasources, contains, omits): assert item not in query -# Patient schemas aren't fully pre-examined yet (we currently assume extensions exist). -# So we expect this to fail at the moment. -@pytest.mark.xfail def test_core_empty_database(tmp_path): """Verify that we can still generate core tables with no data filled in""" testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False) @@ -193,23 +190,6 @@ def test_core_tiny_database(tmp_path): assert {e[0] for e in encounters} == {"EncA"} -def test_core_multiple_patient_addresses(tmp_path): - """Verify that a patient with multiple addresses resolves to a single entry""" - testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False) - testbed.add_patient("None") - testbed.add_patient( - "Multi", - address=[ - {"city": "Boston"}, # null postal code - should not be picked up - {"postalCode": "12345"}, - {"postalCode": "00000"}, - ], - ) - con = testbed.build() - patients = con.sql("SELECT id, postalCode_3 FROM core__patient").fetchall() - assert {("None", "cumulus__none"), ("Multi", "123")} == set(patients) - - def test_core_multiple_doc_encounters(tmp_path): """Verify that a DocRef with multiple encounters resolves to multiple entries""" testbed = testbed_utils.LocalTestbed(tmp_path) diff --git a/tests/core/test_core_patient.py b/tests/core/test_core_patient.py new file mode 100644 index 00000000..87e61123 --- /dev/null +++ b/tests/core/test_core_patient.py @@ -0,0 +1,136 @@ +"""Tests for core__patient""" + +import pytest + +from tests import testbed_utils + + +@pytest.mark.parametrize( + "addresses,expected", + [ + (None, "cumulus__none"), # no address + ([{"city": "Boston"}], "cumulus__none"), # partial, but useless address + ( # multiple addresses + [ + {"city": "Boston"}, # null postal code - should not be picked up + {"postalCode": "12345"}, + {"postalCode": "00000"}, + ], + "123", + ), + ], +) +def test_core_patient_addresses(tmp_path, addresses, expected): + """Verify that addresses are parsed out""" + testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False) + testbed.add_patient("A", address=addresses) + con = testbed.build() + codes = con.sql("SELECT postalCode_3 FROM core__patient").fetchall() + assert [(expected,)] == codes + + +@pytest.mark.parametrize( + "extensions,expected_ethnicity,expected_race", + [ + (None, "unknown", "unknown"), # no extension + ( # basic ombCategory + [ + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", + "extension": [ + { + "url": "detailed", # ignored in favor of ombCategory + "valueCoding": {"display": "EthDetailed"}, + }, + { + "url": "ombCategory", + "valueCoding": {"display": "EthA"}, + }, + ], + }, + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race", + "extension": [ + { + "url": "ombCategory", + "valueCoding": {"display": "RaceA"}, + }, + { + "url": "detailed", # ignored in favor of ombCategory + "valueCoding": {"display": "RaceDetailed"}, + }, + ], + }, + ], + "etha", + "racea", + ), + ( # will use detailed if we must + [ + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", + "extension": [ + { + "url": "detailed", + "valueCoding": {"display": "EthDetailed"}, + } + ], + }, + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race", + "extension": [ + { + "url": "detailed", + "valueCoding": {"display": "RaceDetailed"}, + } + ], + }, + ], + "ethdetailed", + "racedetailed", + ), + ( # multiples get joined + [ + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", + "extension": [ + { + "url": "detailed", + "valueCoding": {"display": "EthB"}, + }, + { + "url": "detailed", + "valueCoding": {"display": "EthA"}, + }, + ], + }, + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race", + "extension": [ + { + "url": "ombCategory", + "valueCoding": {"display": "RaceA"}, + }, + { + "url": "ombCategory", + "valueCoding": {"display": "RaceB"}, + }, + ], + }, + ], + "etha; ethb", + "racea; raceb", + ), + ], +) +def test_core_patient_extensions( + tmp_path, extensions, expected_ethnicity, expected_race +): + """Verify that we grab race & ethnicity correctly""" + testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False) + testbed.add_patient("A", extension=extensions) + con = testbed.build() + displays = con.sql( + "SELECT ethnicity_display, race_display FROM core__patient" + ).fetchall() + assert [(expected_ethnicity, expected_race)] == displays diff --git a/tests/testbed_utils.py b/tests/testbed_utils.py index f8c6ae80..f68f5692 100644 --- a/tests/testbed_utils.py +++ b/tests/testbed_utils.py @@ -163,23 +163,6 @@ def add_patient( "id": row_id, "birthDate": birth_date, "gender": gender, - # TODO: fix the core SQL to check for extensions in the schema - # before querying them. In the meantime, we can just ensure - # those fields exist, ready to be queried. - "extension": [ - { - "url": "", - "extension": [ - { - "url": "", - "valueCoding": { - "code": "", - "display": "", - }, - } - ], - } - ], **kwargs, }, )