diff --git a/cumulus_library/studies/core/builder_patient.py b/cumulus_library/studies/core/builder_patient.py index 253515df..425e9d37 100644 --- a/cumulus_library/studies/core/builder_patient.py +++ b/cumulus_library/studies/core/builder_patient.py @@ -18,9 +18,49 @@ class PatientBuilder(BaseTableBuilder): display_text = "Creating Patient tables..." + @staticmethod + def make_extension_query( + schema: str, + cursor: databases.DatabaseCursor, + parser: databases.DatabaseParser, + name: str, + url: str, + ) -> str: + has_extensions = sql_utils.is_field_present( + schema=schema, + cursor=cursor, + parser=parser, + source_table="patient", + source_col="extension", + expected={ + "extension": { + "url": {}, + "valueCoding": ["code", "display", "system"], + }, + "url": {}, + }, + ) + if has_extensions: + config = sql_utils.ExtensionConfig( + source_table="patient", + source_id="id", + target_table=f"core__patient_ext_{name}", + target_col_prefix=name, + fhir_extension=url, + ext_systems=["ombCategory", "detailed"], + is_array=True, + ) + return base_templates.get_extension_denormalize_query(config) + else: + return base_templates.get_ctas_empty_query( + schema_name=schema, + table_name=f"core__patient_ext_{name}", + table_cols=["id", "system", f"{name}_code", f"{name}_display"], + ) + def prepare_queries( self, - cursor: object, + cursor: databases.DatabaseCursor, schema: str, *args, parser: databases.DatabaseParser = None, @@ -41,18 +81,13 @@ def prepare_queries( "fhirpath": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", }, ] - for extension in extension_types: - config = sql_utils.ExtensionConfig( - source_table="patient", - source_id="id", - target_table=f"core__patient_ext_{extension['name']}", - target_col_prefix=extension["name"], - fhir_extension=extension["fhirpath"], - ext_systems=["ombCategory", "detailed", "text"], - is_array=True, + self.queries.append( + self.make_extension_query( + schema, cursor, parser, extension["name"], extension["fhirpath"] + ) ) - self.queries.append(base_templates.get_extension_denormalize_query(config)) + validated_schema = sql_utils.validate_schema( cursor, schema, expected_table_cols, parser ) diff --git a/cumulus_library/studies/core/reference_sql/builder_condition.sql b/cumulus_library/studies/core/reference_sql/builder_condition.sql index fcb6b80f..e0c2740e 100644 --- a/cumulus_library/studies/core/reference_sql/builder_condition.sql +++ b/cumulus_library/studies/core/reference_sql/builder_condition.sql @@ -419,7 +419,6 @@ CREATE TABLE core__condition AS WITH temp_condition AS ( SELECT c.id, - c.category, c.subject.reference AS subject_ref, c.encounter.reference AS encounter_ref, cca.code, @@ -431,9 +430,7 @@ WITH temp_condition AS ( date_trunc('month', date(from_iso8601_timestamp(c."recordedDate"))) AS recordedDate_month, date_trunc('year', date(from_iso8601_timestamp(c."recordedDate"))) - AS recordedDate_year, - c.verificationStatus, - c.clinicalStatus + AS recordedDate_year FROM condition AS c LEFT JOIN core__condition_codable_concepts_all AS cca ON c.id = cca.id ) diff --git a/cumulus_library/studies/core/reference_sql/builder_documentreference.sql b/cumulus_library/studies/core/reference_sql/builder_documentreference.sql index afeb8ca8..e8fdb4b5 100644 --- a/cumulus_library/studies/core/reference_sql/builder_documentreference.sql +++ b/cumulus_library/studies/core/reference_sql/builder_documentreference.sql @@ -153,7 +153,6 @@ CREATE TABLE core__documentreference AS WITH temp_documentreference AS ( SELECT DISTINCT dr.id, - dr.type, dr.status, dr.docStatus, dr.context, diff --git a/cumulus_library/studies/core/reference_sql/builder_encounter.sql b/cumulus_library/studies/core/reference_sql/builder_encounter.sql index 8ae6ba58..0c6180b0 100644 --- a/cumulus_library/studies/core/reference_sql/builder_encounter.sql +++ b/cumulus_library/studies/core/reference_sql/builder_encounter.sql @@ -273,9 +273,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__encounter_dn_servicetype" AS ( SELECT * FROM ( VALUES - (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) + (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) ) - AS t ("id","code","code_system","display") + AS t ("id","row","code","code_system","display") WHERE 1 = 0 -- ensure empty table ); @@ -285,9 +285,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__encounter_dn_priority" AS ( SELECT * FROM ( VALUES - (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) + (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) ) - AS t ("id","code","code_system","display") + AS t ("id","row","code","code_system","display") WHERE 1 = 0 -- ensure empty table ); @@ -571,9 +571,9 @@ temp_encounter_nullable AS ( SELECT DISTINCT e.id, e.status, - e.class, + e.class.code AS class_code, + e.class.system AS class_code_system, e.subject.reference AS subject_ref, - e.period, date(from_iso8601_timestamp(e.period.start)) AS period_start, date_trunc('day', date(from_iso8601_timestamp(e."period"."end"))) AS period_end_day, @@ -598,7 +598,8 @@ temp_encounter AS ( SELECT DISTINCT e.id, e.status, - e.class, + e.class_code, + e.class_code_system, e.subject_ref, e.period_start, e.period_start_day, @@ -664,7 +665,7 @@ SELECT DISTINCT concat('Encounter/', e.id) AS encounter_ref FROM temp_encounter AS e LEFT JOIN core__fhir_mapping_expected_act_encounter_code_v3 AS eac - ON e.class.code = eac.found + ON e.class_code = eac.found LEFT JOIN core__fhir_act_encounter_code_v3 AS ac ON eac.expected = ac.code INNER JOIN core__patient AS p ON e.subject_ref = p.subject_ref WHERE diff --git a/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql b/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql index 52ee94bc..3b2b0213 100644 --- a/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql +++ b/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql @@ -100,7 +100,6 @@ WITH temp_mr AS ( date(from_iso8601_timestamp(mr.authoredOn)) AS authoredOn, date_trunc('month', date(from_iso8601_timestamp(mr."authoredOn"))) AS authoredOn_month, - cast(NULL as varchar) AS display, mr.reportedBoolean, mr.dosageInstruction, mr.subject.reference AS subject_ref, diff --git a/cumulus_library/studies/core/reference_sql/builder_observation.sql b/cumulus_library/studies/core/reference_sql/builder_observation.sql index d9f7a226..0a24dd33 100644 --- a/cumulus_library/studies/core/reference_sql/builder_observation.sql +++ b/cumulus_library/studies/core/reference_sql/builder_observation.sql @@ -244,9 +244,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__observation_component_valuecodeableconc AS ( SELECT * FROM ( VALUES - (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) + (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) ) - AS t ("id","code","code_system","display") + AS t ("id","row","code","code_system","display") WHERE 1 = 0 -- ensure empty table ); @@ -256,9 +256,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__observation_dn_interpretation" AS ( SELECT * FROM ( VALUES - (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) + (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) ) - AS t ("id","code","code_system","display") + AS t ("id","row","code","code_system","display") WHERE 1 = 0 -- ensure empty table ); @@ -304,9 +304,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__observation_dn_dataabsentreason" AS ( SELECT * FROM ( VALUES - (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) + (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) ) - AS t ("id","code","code_system","display") + AS t ("id","row","code","code_system","display") WHERE 1 = 0 -- ensure empty table ); @@ -319,6 +319,8 @@ WITH temp_observation AS ( SELECT o.id, o.status, + o.encounter.reference AS encounter_ref, + o.subject.reference AS subject_ref, o.valueString, o.valueQuantity.value AS valueQuantity_value, o.valueQuantity.comparator AS valueQuantity_comparator, @@ -345,9 +347,7 @@ WITH temp_observation AS ( odvcc.display AS valueCodeableConcept_display, odda.code AS dataAbsentReason_code, odda.code_system AS dataAbsentReason_code_system, - odda.display AS dataAbsentReason_display, - o.subject.reference AS subject_ref, - o.encounter.reference AS encounter_ref + odda.display AS dataAbsentReason_display FROM observation AS o LEFT JOIN core__observation_dn_category AS odcat ON o.id = odcat.id LEFT JOIN core__observation_dn_code AS odc ON o.id = odc.id @@ -375,7 +375,7 @@ SELECT valueCodeableConcept_display, valueQuantity_value, valueQuantity_comparator, - valueQuantity_unit,valueQuantity_code_system AS valueQuantity_system, -- old alias + valueQuantity_unit, valueQuantity_code_system, valueQuantity_code, valueString, diff --git a/cumulus_library/studies/core/reference_sql/builder_patient.sql b/cumulus_library/studies/core/reference_sql/builder_patient.sql index 968144e2..4e8bcbc8 100644 --- a/cumulus_library/studies/core/reference_sql/builder_patient.sql +++ b/cumulus_library/studies/core/reference_sql/builder_patient.sql @@ -43,23 +43,6 @@ CREATE TABLE core__patient_ext_race AS ( AND ext_child.ext.valuecoding.display != '' ), - system_text AS ( - SELECT DISTINCT - s.id AS id, - '2' AS priority, - 'text' AS system, -- noqa: RF04 - ext_child.ext.valuecoding.code AS race_code, - ext_child.ext.valuecoding.display AS race_display - FROM - patient AS s, - UNNEST(s.extension) AS ext_parent (ext), - UNNEST(ext_parent.ext.extension) AS ext_child (ext) - WHERE - ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race' - AND ext_child.ext.url = 'text' - AND ext_child.ext.valuecoding.display != '' - ), - union_table AS ( SELECT id, @@ -76,14 +59,6 @@ CREATE TABLE core__patient_ext_race AS ( race_code, race_display FROM system_detailed - UNION - SELECT - id, - priority, - system, - race_code, - race_display - FROM system_text ORDER BY id, priority ) @@ -118,7 +93,7 @@ CREATE TABLE core__patient_ext_race AS ( ) AS race_display, ROW_NUMBER() OVER ( - PARTITION BY id, system + PARTITION BY id ORDER BY priority ASC ) AS available_priority FROM union_table @@ -166,23 +141,6 @@ CREATE TABLE core__patient_ext_ethnicity AS ( AND ext_child.ext.valuecoding.display != '' ), - system_text AS ( - SELECT DISTINCT - s.id AS id, - '2' AS priority, - 'text' AS system, -- noqa: RF04 - ext_child.ext.valuecoding.code AS ethnicity_code, - ext_child.ext.valuecoding.display AS ethnicity_display - FROM - patient AS s, - UNNEST(s.extension) AS ext_parent (ext), - UNNEST(ext_parent.ext.extension) AS ext_child (ext) - WHERE - ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity' - AND ext_child.ext.url = 'text' - AND ext_child.ext.valuecoding.display != '' - ), - union_table AS ( SELECT id, @@ -199,14 +157,6 @@ CREATE TABLE core__patient_ext_ethnicity AS ( ethnicity_code, ethnicity_display FROM system_detailed - UNION - SELECT - id, - priority, - system, - ethnicity_code, - ethnicity_display - FROM system_text ORDER BY id, priority ) @@ -241,7 +191,7 @@ CREATE TABLE core__patient_ext_ethnicity AS ( ) AS ethnicity_display, ROW_NUMBER() OVER ( - PARTITION BY id, system + PARTITION BY id ORDER BY priority ASC ) AS available_priority FROM union_table diff --git a/cumulus_library/template_sql/extension_denormalize.sql.jinja b/cumulus_library/template_sql/extension_denormalize.sql.jinja index 6ed53ec6..f664fd16 100644 --- a/cumulus_library/template_sql/extension_denormalize.sql.jinja +++ b/cumulus_library/template_sql/extension_denormalize.sql.jinja @@ -72,7 +72,7 @@ CREATE TABLE {{ target_table }} AS ( {%- endif %} ROW_NUMBER() OVER ( - PARTITION BY id, system + PARTITION BY id ORDER BY priority ASC ) AS available_priority FROM union_table diff --git a/cumulus_library/template_sql/sql_utils.py b/cumulus_library/template_sql/sql_utils.py index 7493f8a1..4e4bf787 100644 --- a/cumulus_library/template_sql/sql_utils.py +++ b/cumulus_library/template_sql/sql_utils.py @@ -203,7 +203,7 @@ def is_field_populated( parser: databases.DatabaseParser, source_table: str, hierarchy: list[tuple], - expected: list | None = None, + expected: list | dict | None = None, ) -> bool: """Traverses a complex field and determines if it exists and has data @@ -217,7 +217,7 @@ def is_field_populated( If none, we assume it is a CodeableConcept. :returns: a boolean indicating if valid data is present. """ - if not _check_schema_if_exists( + if not is_field_present( schema=schema, cursor=cursor, parser=parser, @@ -256,14 +256,14 @@ def is_field_populated( return True -def _check_schema_if_exists( +def is_field_present( *, schema: str, cursor: databases.DatabaseCursor, parser: databases.DatabaseParser, source_table: str, source_col: str, - expected: str | None = None, + expected: list | dict | None = None, ) -> bool: """Validation check for a column existing, and having the expected schema diff --git a/tests/core/test_core.py b/tests/core/test_core.py index dc9035c3..8a2beeae 100644 --- a/tests/core/test_core.py +++ b/tests/core/test_core.py @@ -169,9 +169,6 @@ def test_core_medication_query(medication_datasources, contains, omits): assert item not in query -# Patient schemas aren't fully pre-examined yet (we currently assume extensions exist). -# So we expect this to fail at the moment. -@pytest.mark.xfail def test_core_empty_database(tmp_path): """Verify that we can still generate core tables with no data filled in""" testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False) @@ -193,23 +190,6 @@ def test_core_tiny_database(tmp_path): assert {e[0] for e in encounters} == {"EncA"} -def test_core_multiple_patient_addresses(tmp_path): - """Verify that a patient with multiple addresses resolves to a single entry""" - testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False) - testbed.add_patient("None") - testbed.add_patient( - "Multi", - address=[ - {"city": "Boston"}, # null postal code - should not be picked up - {"postalCode": "12345"}, - {"postalCode": "00000"}, - ], - ) - con = testbed.build() - patients = con.sql("SELECT id, postalCode_3 FROM core__patient").fetchall() - assert {("None", "cumulus__none"), ("Multi", "123")} == set(patients) - - def test_core_multiple_doc_encounters(tmp_path): """Verify that a DocRef with multiple encounters resolves to multiple entries""" testbed = testbed_utils.LocalTestbed(tmp_path) diff --git a/tests/core/test_core_patient.py b/tests/core/test_core_patient.py new file mode 100644 index 00000000..f0fbb03b --- /dev/null +++ b/tests/core/test_core_patient.py @@ -0,0 +1,170 @@ +"""Tests for core__patient""" + +import pytest + +from tests import testbed_utils + + +@pytest.mark.parametrize( + "addresses,expected", + [ + (None, "cumulus__none"), # no address + ([{"city": "Boston"}], "cumulus__none"), # partial, but useless address + ( # multiple addresses + [ + {"city": "Boston"}, # null postal code - should not be picked up + {"postalCode": "12345"}, + {"postalCode": "00000"}, + ], + "123", + ), + ], +) +def test_core_patient_addresses(tmp_path, addresses, expected): + """Verify that addresses are parsed out""" + testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False) + testbed.add_patient("A", address=addresses) + con = testbed.build() + codes = con.sql("SELECT postalCode_3 FROM core__patient").fetchall() + assert [(expected,)] == codes + + +@pytest.mark.parametrize( + "extensions,expected_ethnicity,expected_race", + [ + (None, "unknown", "unknown"), # no extension + ( # basic ombCategory + [ + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", + "extension": [ + { + "url": "detailed", # ignored in favor of ombCategory + "valueCoding": {"display": "EthDetailed"}, + }, + { + "url": "ombCategory", + "valueCoding": {"display": "EthA"}, + }, + ], + }, + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race", + "extension": [ + { + "url": "ombCategory", + "valueCoding": {"display": "RaceA"}, + }, + { + "url": "detailed", # ignored in favor of ombCategory + "valueCoding": {"display": "RaceDetailed"}, + }, + ], + }, + ], + "etha", + "racea", + ), + ( # will use detailed if we must + [ + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", + "extension": [ + { + "url": "detailed", + "valueCoding": {"display": "EthDetailed"}, + } + ], + }, + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race", + "extension": [ + { + "url": "detailed", + "valueCoding": {"display": "RaceDetailed"}, + } + ], + }, + ], + "ethdetailed", + "racedetailed", + ), + ( # will ignore entries without a display + [ + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", + "extension": [ + { + "url": "ombCategory", + "valueCoding": { + "display": "" + }, # empty string (instead of null) + }, + { + "url": "ombCategory", + "valueCoding": {"display": "EthB"}, + }, + ], + }, + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race", + "extension": [ + { + "url": "ombCategory", + "valueCoding": {"code": "just-a-code"}, + }, + { + "url": "detailed", + "valueCoding": {"display": "RaceDetailed"}, + }, + ], + }, + ], + "ethb", + "racedetailed", + ), + ( # multiples get joined + [ + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity", + "extension": [ + { + "url": "detailed", + "valueCoding": {"display": "EthB"}, + }, + { + "url": "detailed", + "valueCoding": {"display": "EthA"}, + }, + ], + }, + { + "url": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-race", + "extension": [ + { + "url": "ombCategory", + "valueCoding": {"display": "RaceA"}, + }, + { + "url": "ombCategory", + "valueCoding": {"display": "RaceB"}, + }, + ], + }, + ], + "etha; ethb", + "racea; raceb", + ), + ], +) +def test_core_patient_extensions( + tmp_path, extensions, expected_ethnicity, expected_race +): + """Verify that we grab race & ethnicity correctly""" + testbed = testbed_utils.LocalTestbed(tmp_path, with_patient=False) + testbed.add_patient("A", extension=extensions) + con = testbed.build() + displays = con.sql( + "SELECT ethnicity_display, race_display FROM core__patient" + ).fetchall() + assert [(expected_ethnicity, expected_race)] == displays diff --git a/tests/test_templates.py b/tests/test_templates.py index 2b61e240..146d09fd 100644 --- a/tests/test_templates.py +++ b/tests/test_templates.py @@ -362,7 +362,7 @@ def test_extension_denormalize_creation(): ) AS prefix_display, ROW_NUMBER() OVER ( - PARTITION BY id, system + PARTITION BY id ORDER BY priority ASC ) AS available_priority FROM union_table diff --git a/tests/testbed_utils.py b/tests/testbed_utils.py index f8c6ae80..f68f5692 100644 --- a/tests/testbed_utils.py +++ b/tests/testbed_utils.py @@ -163,23 +163,6 @@ def add_patient( "id": row_id, "birthDate": birth_date, "gender": gender, - # TODO: fix the core SQL to check for extensions in the schema - # before querying them. In the meantime, we can just ensure - # those fields exist, ready to be queried. - "extension": [ - { - "url": "", - "extension": [ - { - "url": "", - "valueCoding": { - "code": "", - "display": "", - }, - } - ], - } - ], **kwargs, }, )