From 69a194c7db49d50a6057c056096a20c39acafcd7 Mon Sep 17 00:00:00 2001 From: Michael Terry Date: Mon, 26 Aug 2024 12:50:21 -0400 Subject: [PATCH] Add AllergyIntolerance support to core study - Adds core__allergyintolerance - Adds core__count_allergyintolerance_month --- cumulus_library/.sqlfluff | 14 + cumulus_library/__init__.py | 2 +- cumulus_library/statistics/counts.py | 32 +- .../statistics_templates/count.sql.jinja | 21 +- .../statistics_templates/counts_templates.py | 3 + .../core/builder_allergyintolerance.py | 66 ++++ .../allergyintolerance.sql.jinja | 138 +++++++ cumulus_library/studies/core/count_core.py | 12 + cumulus_library/studies/core/manifest.toml | 10 +- .../builder_allergyintolerance.sql | 340 ++++++++++++++++++ .../core/reference_sql/builder_condition.sql | 9 +- .../builder_documentreference.sql | 39 +- .../core/reference_sql/builder_encounter.sql | 137 ++++++- .../builder_medicationrequest.sql | 4 +- .../reference_sql/builder_observation.sql | 215 ++++++++--- .../core/reference_sql/builder_patient.sql | 200 ++++++++++- .../studies/core/reference_sql/count_core.sql | 99 +++++ .../codeable_concept_denormalize.sql.jinja | 3 +- docs/core-study-details.md | 41 +++ tests/conftest.py | 11 + tests/core/test_core.py | 5 + tests/core/test_core_allergy.py | 239 ++++++++++++ tests/core/test_core_meds.py | 8 +- .../core__count_allergyintolerance_month.csv | 33 ++ ...re__count_allergyintolerance_month.parquet | Bin 0 -> 2401 bytes tests/test_base_templates.py | 3 +- tests/test_cli.py | 2 +- tests/test_counts_templates.py | 9 + .../core/core__allergyintolerance.txt | 35 ++ .../core__count_allergyintolerance_month.txt | 2 + .../duckdb_data/allergyintolerance.ndjson | 20 ++ .../core__count_allergyintolerance_month.csv | 3 + tests/testbed_utils.py | 12 + 33 files changed, 1634 insertions(+), 133 deletions(-) create mode 100644 cumulus_library/studies/core/builder_allergyintolerance.py create mode 100644 cumulus_library/studies/core/core_templates/allergyintolerance.sql.jinja create mode 100644 cumulus_library/studies/core/reference_sql/builder_allergyintolerance.sql create mode 100644 tests/core/test_core_allergy.py create mode 100644 tests/regression/reference/core__count_allergyintolerance_month.csv create mode 100644 tests/regression/reference/core__count_allergyintolerance_month.parquet create mode 100644 tests/test_data/core/core__allergyintolerance.txt create mode 100644 tests/test_data/core/core__count_allergyintolerance_month.txt create mode 100644 tests/test_data/duckdb_data/allergyintolerance.ndjson create mode 100644 tests/test_data/duckdb_data/expected_export/core/core__count_allergyintolerance_month.csv diff --git a/cumulus_library/.sqlfluff b/cumulus_library/.sqlfluff index 091e6ccc..476f0807 100644 --- a/cumulus_library/.sqlfluff +++ b/cumulus_library/.sqlfluff @@ -105,6 +105,20 @@ remote_location = s3://bucket/study/data/ schema_name = test_schema schema = { + 'allergyintolerance': { + 'category': True, + 'criticality': True, + 'encounter': { + 'reference': True, + }, + 'id': True, + 'patient': { + 'reference': True, + }, + 'reaction': { + 'severity': True, + }, + }, 'condition': { 'category': { 'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True diff --git a/cumulus_library/__init__.py b/cumulus_library/__init__.py index 6fdf222f..d2692184 100644 --- a/cumulus_library/__init__.py +++ b/cumulus_library/__init__.py @@ -6,4 +6,4 @@ from .study_manifest import StudyManifest __all__ = ["BaseTableBuilder", "CountsBuilder", "StudyConfig", "StudyManifest"] -__version__ = "3.0.0" +__version__ = "3.1.0" diff --git a/cumulus_library/statistics/counts.py b/cumulus_library/statistics/counts.py index 48d6bbe2..b4cf5d96 100644 --- a/cumulus_library/statistics/counts.py +++ b/cumulus_library/statistics/counts.py @@ -79,6 +79,7 @@ def get_count_query( "where_clauses", "fhir_resource", "filter_resource", + "patient_link", ]: raise errors.CountsBuilderError(f"count_query received unexpected key: {key}") return counts_templates.get_count_query(table_name, source_table, table_cols, **kwargs) @@ -88,6 +89,33 @@ def get_count_query( # We're not trying to be overly clever about this to persist the docstrings as the # primary interface that study authors would see when using these functions. + def count_allergyintolerance( + self, + table_name: str, + source_table: str, + table_cols: list, + where_clauses: list | None = None, + min_subject: int = 10, + ) -> str: + """wrapper method for constructing allergyintolerance counts tables + + :param table_name: The name of the table to create. Must start with study prefix + :param source_table: The table to create counts data from + :param table_cols: The columns from the source table to add to the count table + :param where_clauses: An array of where clauses to use for filtering the data + :param min_subject: An integer setting the minimum bin size for inclusion + (default: 10) + """ + return self.get_count_query( + table_name, + source_table, + table_cols, + where_clauses=where_clauses, + min_subject=min_subject, + fhir_resource="allergyintolerance", + patient_link="patient_ref", + ) + def count_condition( self, table_name: str, @@ -112,7 +140,7 @@ def count_condition( where_clauses=where_clauses, min_subject=min_subject, fhir_resource="condition", - filter_resource="encounter", + filter_resource=True, ) def count_documentreference( @@ -139,7 +167,7 @@ def count_documentreference( where_clauses=where_clauses, min_subject=min_subject, fhir_resource="documentreference", - filter_resource="encounter", + filter_resource=True, ) def count_encounter( diff --git a/cumulus_library/statistics/statistics_templates/count.sql.jinja b/cumulus_library/statistics/statistics_templates/count.sql.jinja index de7413c8..063b83a6 100644 --- a/cumulus_library/statistics/statistics_templates/count.sql.jinja +++ b/cumulus_library/statistics/statistics_templates/count.sql.jinja @@ -2,12 +2,11 @@ {#- TODO: move secondary/tertiary resource declaration upstream to statistics/counts.py level -#} {%- macro secondary_resource(fhir_resource) -%} -{%- if fhir_resource in ('encounter', 'condition') -%} +{%- if fhir_resource in ('allergyintolerance', 'documentreference', 'encounter', 'observation') -%} +{{ fhir_resource }}_ref +{%- elif fhir_resource == 'condition' -%} +{#- count condition by encounter, because they are often duplicated for billing purposes -#} encounter_ref -{%- elif fhir_resource == 'documentreference' -%} -documentreference_ref -{%- elif fhir_resource == 'observation'-%} -observation_ref {%- else -%} {#- implicit null -#} {%- endif -%} @@ -65,7 +64,7 @@ CREATE TABLE {{ table_name }} AS ( {%- if filter_resource %} filtered_table AS ( SELECT - s.subject_ref, + s.{{ patient_link }}, {%- if secondary %} s.{{ secondary }}, {%- endif -%} @@ -102,19 +101,19 @@ CREATE TABLE {{ table_name }} AS ( {%- endif -%} {#- resource specific filtering conditions -#} - {%- if fhir_resource == 'documentreference' and filter_resource %} + {%- if fhir_resource == 'documentreference' %} WHERE (s.status = 'current') AND (s.docStatus IS null OR s.docStatus IN ('final', 'amended')) - {%- elif fhir_resource == 'encounter' and filter_resource %} + {%- elif fhir_resource == 'encounter' %} WHERE s.status = 'finished' - {%- elif fhir_resource == 'observation' and filter_resource %} + {%- elif fhir_resource == 'observation' %} WHERE (s.status = 'final' OR s.status= 'amended') {%- endif %} ), {% endif %} null_replacement AS ( SELECT - subject_ref, + {{ patient_link }}, {%- if secondary %} {{ secondary }}, {%- endif -%} @@ -177,7 +176,7 @@ CREATE TABLE {{ table_name }} AS ( powerset AS ( SELECT - count(DISTINCT subject_ref) AS cnt_subject_ref, + count(DISTINCT {{ patient_link }}) AS cnt_subject_ref, {{-cols_delineated_list(table_cols, fhir_resource)}}, concat_ws( '-', diff --git a/cumulus_library/statistics/statistics_templates/counts_templates.py b/cumulus_library/statistics/statistics_templates/counts_templates.py index 7e1320d6..55d10d87 100644 --- a/cumulus_library/statistics/statistics_templates/counts_templates.py +++ b/cumulus_library/statistics/statistics_templates/counts_templates.py @@ -15,6 +15,7 @@ class CountableFhirResource(Enum): ids. """ + ALLERGYINTOLERANCE = "allergyintolerance" CONDITION = "condition" DOCUMENTREFERENCE = "documentreference" ENCOUNTER = "encounter" @@ -40,6 +41,7 @@ def get_count_query( where_clauses: list | None = None, fhir_resource: str | None = None, filter_resource: bool | None = True, + patient_link: str = "subject_ref", ) -> str: """Generates count tables for generating study outputs""" path = Path(__file__).parent @@ -67,6 +69,7 @@ def get_count_query( where_clauses=where_clauses, fhir_resource=fhir_resource, filter_resource=filter_resource, + patient_link=patient_link, ) # workaround for conflicting sqlfluff enforcement return query.replace("-- noqa: disable=LT02\n", "") diff --git a/cumulus_library/studies/core/builder_allergyintolerance.py b/cumulus_library/studies/core/builder_allergyintolerance.py new file mode 100644 index 00000000..9ccbe12d --- /dev/null +++ b/cumulus_library/studies/core/builder_allergyintolerance.py @@ -0,0 +1,66 @@ +import cumulus_library +from cumulus_library.studies.core.core_templates import core_templates +from cumulus_library.template_sql import sql_utils + +expected_table_cols = { + "allergyintolerance": { + "id": [], + "type": [], + "category": [], + "criticality": [], + "recordedDate": [], + "patient": sql_utils.REFERENCE, + "encounter": sql_utils.REFERENCE, + "reaction": ["severity"], + } +} + + +class CoreAllergyIntoleranceBuilder(cumulus_library.BaseTableBuilder): + display_text = "Creating AllergyIntolerance tables..." + + def prepare_queries(self, *args, config: cumulus_library.StudyConfig, **kwargs): + code_sources = [ + sql_utils.CodeableConceptConfig( + source_table="allergyintolerance", + column_hierarchy=[("clinicalStatus", dict)], + target_table="core__allergyintolerance_dn_clinical_status", + filter_priority=True, + code_systems=[ + # Restrict to just this required binding system + "http://terminology.hl7.org/CodeSystem/allergyintolerance-clinical", + ], + ), + sql_utils.CodeableConceptConfig( + source_table="allergyintolerance", + column_hierarchy=[("verificationStatus", dict)], + target_table="core__allergyintolerance_dn_verification_status", + filter_priority=True, + code_systems=[ + # Restrict to just this required binding system + "http://terminology.hl7.org/CodeSystem/allergyintolerance-verification", + ], + ), + sql_utils.CodeableConceptConfig( + source_table="allergyintolerance", + column_hierarchy=[("code", dict)], + target_table="core__allergyintolerance_dn_code", + ), + sql_utils.CodeableConceptConfig( + source_table="allergyintolerance", + column_hierarchy=[("reaction", list), ("substance", dict)], + target_table="core__allergyintolerance_dn_reaction_substance", + expected={"substance": sql_utils.CODEABLE_CONCEPT}, + ), + sql_utils.CodeableConceptConfig( + source_table="allergyintolerance", + column_hierarchy=[("reaction", list), ("manifestation", list)], + target_table="core__allergyintolerance_dn_reaction_manifestation", + expected={"manifestation": sql_utils.CODEABLE_CONCEPT}, + ), + ] + self.queries += sql_utils.denormalize_complex_objects(config.db, code_sources) + validated_schema = sql_utils.validate_schema(config.db, expected_table_cols) + self.queries.append( + core_templates.get_core_template("allergyintolerance", validated_schema) + ) diff --git a/cumulus_library/studies/core/core_templates/allergyintolerance.sql.jinja b/cumulus_library/studies/core/core_templates/allergyintolerance.sql.jinja new file mode 100644 index 00000000..3f706cf6 --- /dev/null +++ b/cumulus_library/studies/core/core_templates/allergyintolerance.sql.jinja @@ -0,0 +1,138 @@ +{% import 'core_utils.jinja' as utils %} +{% import 'unnest_utils.jinja' as unnest_utils %} + +CREATE TABLE core__allergyintolerance AS +WITH temp_allergyintolerance AS ( + SELECT + {{- + utils.basic_cols( + 'allergyintolerance', + 'a', + [ + 'id', + ] + ) + }}, + {{- + utils.nullable_cols( + 'allergyintolerance', + 'a', + [ + 'type', + 'category', + 'criticality', + ('patient', 'reference', 'patient_ref'), + ('encounter', 'reference', 'encounter_ref'), + ], + schema + ) + }}, + {{- + utils.date_cols_from_str( + 'allergyintolerance', + 'a', + ['recordedDate'], + schema + ) + }}, + {{- + utils.truncate_date_cols( + 'allergyintolerance', + 'a', + [ + ('recordedDate', 'week'), + ('recordedDate', 'month'), + ('recordedDate', 'year'), + ], + schema + ) + }} + FROM allergyintolerance AS a +), + +temp_category AS ( + SELECT + a.id, + t.category + FROM + allergyintolerance AS a, + UNNEST(a.category) AS t (category) +), + +flattened_reaction AS ({{ unnest_utils.flatten('allergyintolerance', 'reaction') }}), + +temp_reaction AS ( + SELECT + r.id, + r.row, + dn_subs.code AS substance_code, + dn_subs.system AS substance_system, + dn_subs.display AS substance_display, + dn_man.code AS manifestation_code, + dn_man.system AS manifestation_system, + dn_man.display AS manifestation_display, + {{- + utils.nullable_cols( + 'allergyintolerance', + 'r', + [ + ('reaction', 'severity', 'severity'), + ], + schema + ) + }} + FROM flattened_reaction AS r + LEFT JOIN core__allergyintolerance_dn_reaction_substance AS dn_subs + ON r.id = dn_subs.id AND r.row = dn_subs.row + LEFT JOIN core__allergyintolerance_dn_reaction_manifestation AS dn_man + ON r.id = dn_man.id AND r.row = dn_man.row +) + +SELECT + ta.id, + CONCAT('AllergyIntolerance/', ta.id) AS allergyintolerance_ref, + + {#- We don't expose system for these next two since we filter + down to a single system in the denormalization table #} + dn_cstat.code AS clinicalStatus_code, + dn_vstat.code AS verificationStatus_code, + + {#- type, category, and criticality are not in US Core. + But they are useful for clinical interpretation. #} + ta.type, + tcat.category, + ta.criticality, + + dn_code.code AS code_code, + dn_code.system AS code_system, + dn_code.display AS code_display, + + ta.patient_ref, + ta.encounter_ref, + + {#- recordedDate is not in US Core. + But it's useful for looking at only a study period of data. #} + ta.recordedDate, + ta.recordedDate_week, + ta.recordedDate_month, + ta.recordedDate_year, + + {#- reaction.substance and reaction.severity are not in US Core. + But they are useful for clinical interpretation. #} + tr.row AS reaction_row, + tr.substance_code AS reaction_substance_code, + tr.substance_system AS reaction_substance_system, + tr.substance_display AS reaction_substance_display, + tr.manifestation_code AS reaction_manifestation_code, + tr.manifestation_system AS reaction_manifestation_system, + tr.manifestation_display AS reaction_manifestation_display, + tr.severity AS reaction_severity + +FROM temp_allergyintolerance AS ta +LEFT JOIN temp_reaction AS tr ON ta.id = tr.id +LEFT JOIN core__allergyintolerance_dn_code AS dn_code ON ta.id = dn_code.id +LEFT JOIN core__allergyintolerance_dn_clinical_status AS dn_cstat ON ta.id = dn_cstat.id +LEFT JOIN core__allergyintolerance_dn_verification_status AS dn_vstat + ON ta.id = dn_vstat.id +LEFT JOIN temp_category AS tcat ON ta.id = tcat.id +WHERE ta.recordedDate BETWEEN DATE('2016-01-01') AND CURRENT_DATE; diff --git a/cumulus_library/studies/core/count_core.py b/cumulus_library/studies/core/count_core.py index 7471451f..213d5eb1 100644 --- a/cumulus_library/studies/core/count_core.py +++ b/cumulus_library/studies/core/count_core.py @@ -6,6 +6,17 @@ class CoreCountsBuilder(cumulus_library.CountsBuilder): display_text = "Creating core counts..." + def count_core_allergyintolerance(self, duration: str = "month"): + table_name = self.get_table_name("count_allergyintolerance", duration=duration) + from_table = self.get_table_name("allergyintolerance") + cols = [ + ["category", "varchar", None], + [f"recordedDate_{duration}", "date", None], + ["code_display", "varchar", None], + ["reaction_manifestation_display", "varchar", None], + ] + return self.count_allergyintolerance(table_name, from_table, cols) + def count_core_condition(self, duration: str = "month"): table_name = self.get_table_name("count_condition", duration=duration) from_table = self.get_table_name("condition") @@ -105,6 +116,7 @@ def count_core_patient(self): def prepare_queries(self, *args, **kwargs): self.queries = [ + self.count_core_allergyintolerance(duration="month"), self.count_core_condition(duration="month"), self.count_core_documentreference(duration="month"), self.count_core_encounter(duration="month"), diff --git a/cumulus_library/studies/core/manifest.toml b/cumulus_library/studies/core/manifest.toml index 929d39fc..6826ef62 100644 --- a/cumulus_library/studies/core/manifest.toml +++ b/cumulus_library/studies/core/manifest.toml @@ -3,6 +3,7 @@ study_prefix = "core" [table_builder_config] file_names = [ "builder_prereq_tables.py", + "builder_allergyintolerance.py", "builder_condition.py", "builder_patient.py", "builder_encounter.py", @@ -24,17 +25,18 @@ file_names = [ [export_config] export_list = [ - "core__count_patient", + "core__count_allergyintolerance_month", + "core__count_condition_month", + "core__count_documentreference_month", "core__count_encounter_month", "core__count_encounter_all_types", "core__count_encounter_all_types_month", "core__count_encounter_service_month", "core__count_encounter_priority_month", "core__count_encounter_type_month", - "core__count_documentreference_month", - "core__count_observation_lab_month", - "core__count_condition_month", "core__count_medicationrequest_month", + "core__count_observation_lab_month", + "core__count_patient", "core__meta_date", "core__meta_version", ] diff --git a/cumulus_library/studies/core/reference_sql/builder_allergyintolerance.sql b/cumulus_library/studies/core/reference_sql/builder_allergyintolerance.sql new file mode 100644 index 00000000..30abf6c4 --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_allergyintolerance.sql @@ -0,0 +1,340 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library +-- CLI. Its format is tied to the specific database it was run against, +-- and it may not be correct for all databases. Use the CLI's build +-- option to derive the best SQL for your dataset. + +-- ########################################################### + +CREATE TABLE core__allergyintolerance_dn_clinical_status AS ( + WITH + + system_clinicalStatus_0 AS ( + SELECT DISTINCT + s.id AS id, + 0 AS row, + '0' AS priority, + u.coding.code, + u.coding.display, + u.coding.system, + u.coding.userSelected + FROM + allergyintolerance AS s, + UNNEST(s.clinicalStatus.coding) AS u (coding) + WHERE + REGEXP_LIKE(u.coding.system, '^http://terminology\.hl7\.org/CodeSystem/allergyintolerance-clinical$') + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + row, + priority, + system, + code, + display, + userSelected + FROM system_clinicalStatus_0 + + ), + + partitioned_table AS ( + SELECT + id, + row, + code, + system, + display, + userSelected, + priority, + ROW_NUMBER() + OVER ( + PARTITION BY id + ORDER BY priority ASC, code ASC + ) AS available_priority + FROM union_table + GROUP BY + id, row, priority, system, code, display, userSelected + ) + + SELECT + id, + code, + system, + display, + userSelected + FROM partitioned_table + WHERE available_priority = 1 +); + + +-- ########################################################### + +CREATE TABLE core__allergyintolerance_dn_verification_status AS ( + WITH + + system_verificationStatus_0 AS ( + SELECT DISTINCT + s.id AS id, + 0 AS row, + '0' AS priority, + u.coding.code, + u.coding.display, + u.coding.system, + u.coding.userSelected + FROM + allergyintolerance AS s, + UNNEST(s.verificationStatus.coding) AS u (coding) + WHERE + REGEXP_LIKE(u.coding.system, '^http://terminology\.hl7\.org/CodeSystem/allergyintolerance-verification$') + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + row, + priority, + system, + code, + display, + userSelected + FROM system_verificationStatus_0 + + ), + + partitioned_table AS ( + SELECT + id, + row, + code, + system, + display, + userSelected, + priority, + ROW_NUMBER() + OVER ( + PARTITION BY id + ORDER BY priority ASC, code ASC + ) AS available_priority + FROM union_table + GROUP BY + id, row, priority, system, code, display, userSelected + ) + + SELECT + id, + code, + system, + display, + userSelected + FROM partitioned_table + WHERE available_priority = 1 +); + + +-- ########################################################### + +CREATE TABLE core__allergyintolerance_dn_code AS ( + WITH + + system_code_0 AS ( + SELECT DISTINCT + s.id AS id, + 0 AS row, + u.coding.code, + u.coding.display, + u.coding.system, + u.coding.userSelected + FROM + allergyintolerance AS s, + UNNEST(s.code.coding) AS u (coding) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + row, + system, + code, + display, + userSelected + FROM system_code_0 + + ) + SELECT + id, + code, + system, + display, + userSelected + FROM union_table +); + + +-- ########################################################### + +CREATE TABLE IF NOT EXISTS "main"."core__allergyintolerance_dn_reaction_substance" +AS ( + SELECT * FROM ( + VALUES + (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS boolean)) + ) + AS t ("id","row","code","system","display","userSelected") + WHERE 1 = 0 -- ensure empty table +); + +-- ########################################################### + +CREATE TABLE core__allergyintolerance_dn_reaction_manifestation AS ( + WITH + + flattened_rows AS ( + SELECT DISTINCT + t.id AS id, + ROW_NUMBER() OVER (PARTITION BY id) AS row, + r."manifestation" + FROM + allergyintolerance AS t, + UNNEST(t."reaction") AS parent (r) + ), + + child_flattened_rows AS ( + SELECT DISTINCT + s.id, + s.row, -- keep the parent row number + u."manifestation" + FROM + flattened_rows AS s, + UNNEST(s.manifestation) AS u ("manifestation") + ), + + system_manifestation_0 AS ( + SELECT DISTINCT + s.id AS id, + s.row, + u.coding.code, + u.coding.display, + u.coding.system, + u.coding.userSelected + FROM + child_flattened_rows AS s, + UNNEST(s.manifestation.coding) AS u (coding) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + row, + system, + code, + display, + userSelected + FROM system_manifestation_0 + + ) + SELECT + id, + row, + code, + system, + display, + userSelected + FROM union_table +); + + +-- ########################################################### + + + + +CREATE TABLE core__allergyintolerance AS +WITH temp_allergyintolerance AS ( + SELECT + a.id, + a.type, + a.category, + a.criticality, + a.patient.reference AS patient_ref, + a.encounter.reference AS encounter_ref, + date(from_iso8601_timestamp(a.recordedDate)) AS recordedDate, + date_trunc('week', date(from_iso8601_timestamp(a."recordedDate"))) + AS recordedDate_week, + date_trunc('month', date(from_iso8601_timestamp(a."recordedDate"))) + AS recordedDate_month, + date_trunc('year', date(from_iso8601_timestamp(a."recordedDate"))) + AS recordedDate_year + FROM allergyintolerance AS a +), + +temp_category AS ( + SELECT + a.id, + t.category + FROM + allergyintolerance AS a, + UNNEST(a.category) AS t (category) +), + +flattened_reaction AS (SELECT DISTINCT + t.id AS id, + ROW_NUMBER() OVER (PARTITION BY id) AS row, + r."reaction" + FROM + allergyintolerance AS t, + UNNEST(t."reaction") AS r ("reaction")), + +temp_reaction AS ( + SELECT + r.id, + r.row, + dn_subs.code AS substance_code, + dn_subs.system AS substance_system, + dn_subs.display AS substance_display, + dn_man.code AS manifestation_code, + dn_man.system AS manifestation_system, + dn_man.display AS manifestation_display, + r.reaction.severity AS severity + FROM flattened_reaction AS r + LEFT JOIN core__allergyintolerance_dn_reaction_substance AS dn_subs + ON r.id = dn_subs.id AND r.row = dn_subs.row + LEFT JOIN core__allergyintolerance_dn_reaction_manifestation AS dn_man + ON r.id = dn_man.id AND r.row = dn_man.row +) + +SELECT + ta.id, + CONCAT('AllergyIntolerance/', ta.id) AS allergyintolerance_ref, + dn_cstat.code AS clinicalStatus_code, + dn_vstat.code AS verificationStatus_code, + ta.type, + tcat.category, + ta.criticality, + + dn_code.code AS code_code, + dn_code.system AS code_system, + dn_code.display AS code_display, + + ta.patient_ref, + ta.encounter_ref, + ta.recordedDate, + ta.recordedDate_week, + ta.recordedDate_month, + ta.recordedDate_year, + tr.row AS reaction_row, + tr.substance_code AS reaction_substance_code, + tr.substance_system AS reaction_substance_system, + tr.substance_display AS reaction_substance_display, + tr.manifestation_code AS reaction_manifestation_code, + tr.manifestation_system AS reaction_manifestation_system, + tr.manifestation_display AS reaction_manifestation_display, + tr.severity AS reaction_severity + +FROM temp_allergyintolerance AS ta +LEFT JOIN temp_reaction AS tr ON ta.id = tr.id +LEFT JOIN core__allergyintolerance_dn_code AS dn_code ON ta.id = dn_code.id +LEFT JOIN core__allergyintolerance_dn_clinical_status AS dn_cstat ON ta.id = dn_cstat.id +LEFT JOIN core__allergyintolerance_dn_verification_status AS dn_vstat + ON ta.id = dn_vstat.id +LEFT JOIN temp_category AS tcat ON ta.id = tcat.id +WHERE ta.recordedDate BETWEEN DATE('2016-01-01') AND CURRENT_DATE; diff --git a/cumulus_library/studies/core/reference_sql/builder_condition.sql b/cumulus_library/studies/core/reference_sql/builder_condition.sql index 492481fb..4d6aa56d 100644 --- a/cumulus_library/studies/core/reference_sql/builder_condition.sql +++ b/cumulus_library/studies/core/reference_sql/builder_condition.sql @@ -100,12 +100,11 @@ CREATE TABLE core__condition_dn_clinical_status AS ( ROW_NUMBER() OVER ( PARTITION BY id - ORDER BY priority ASC + ORDER BY priority ASC, code ASC ) AS available_priority FROM union_table GROUP BY id, row, priority, system, code, display, userSelected - ORDER BY priority ASC ) SELECT @@ -321,12 +320,11 @@ CREATE TABLE core__condition_codable_concepts_display AS ( ROW_NUMBER() OVER ( PARTITION BY id - ORDER BY priority ASC + ORDER BY priority ASC, code ASC ) AS available_priority FROM union_table GROUP BY id, row, priority, system, code, display, userSelected - ORDER BY priority ASC ) SELECT @@ -425,12 +423,11 @@ CREATE TABLE core__condition_dn_verification_status AS ( ROW_NUMBER() OVER ( PARTITION BY id - ORDER BY priority ASC + ORDER BY priority ASC, code ASC ) AS available_priority FROM union_table GROUP BY id, row, priority, system, code, display, userSelected - ORDER BY priority ASC ) SELECT diff --git a/cumulus_library/studies/core/reference_sql/builder_documentreference.sql b/cumulus_library/studies/core/reference_sql/builder_documentreference.sql index 74ed6738..f2f651de 100644 --- a/cumulus_library/studies/core/reference_sql/builder_documentreference.sql +++ b/cumulus_library/studies/core/reference_sql/builder_documentreference.sql @@ -99,12 +99,11 @@ CREATE TABLE core__documentreference_dn_category AS ( ROW_NUMBER() OVER ( PARTITION BY id - ORDER BY priority ASC + ORDER BY priority ASC, code ASC ) AS available_priority FROM union_table GROUP BY id, row, priority, system, code, display, userSelected - ORDER BY priority ASC ) SELECT @@ -121,16 +120,38 @@ CREATE TABLE core__documentreference_dn_category AS ( -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__documentreference_dn_format" -AS ( - SELECT * FROM ( - VALUES - (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) +CREATE TABLE core__documentreference_dn_format AS ( + WITH + + system_format_0 AS ( + SELECT DISTINCT + s.id AS id, + u.parent_col.format.code, + u.parent_col.format.display, + u.parent_col.format.system + FROM + documentreference AS s, + UNNEST(s.content) AS u (parent_col) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + system, + code, + display + FROM system_format_0 + ) - AS t ("id","code","system","display") - WHERE 1 = 0 -- ensure empty table + SELECT + id, + code, + system, + display + FROM union_table ); + -- ########################################################### diff --git a/cumulus_library/studies/core/reference_sql/builder_encounter.sql b/cumulus_library/studies/core/reference_sql/builder_encounter.sql index 339b6468..1f10b883 100644 --- a/cumulus_library/studies/core/reference_sql/builder_encounter.sql +++ b/cumulus_library/studies/core/reference_sql/builder_encounter.sql @@ -268,12 +268,11 @@ CREATE TABLE core__encounter_dn_type AS ( ROW_NUMBER() OVER ( PARTITION BY id - ORDER BY priority ASC + ORDER BY priority ASC, code ASC ) AS available_priority FROM union_table GROUP BY id, row, priority, system, code, display, userSelected - ORDER BY priority ASC ) SELECT @@ -290,7 +289,7 @@ CREATE TABLE core__encounter_dn_type AS ( -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__encounter_dn_servicetype" +CREATE TABLE IF NOT EXISTS "main"."core__encounter_dn_servicetype" AS ( SELECT * FROM ( VALUES @@ -302,7 +301,7 @@ AS ( -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__encounter_dn_priority" +CREATE TABLE IF NOT EXISTS "main"."core__encounter_dn_priority" AS ( SELECT * FROM ( VALUES @@ -498,12 +497,11 @@ CREATE TABLE core__encounter_dn_reasoncode AS ( ROW_NUMBER() OVER ( PARTITION BY id - ORDER BY priority ASC + ORDER BY priority ASC, code ASC ) AS available_priority FROM union_table GROUP BY id, row, priority, system, code, display, userSelected - ORDER BY priority ASC ) SELECT @@ -520,23 +518,91 @@ CREATE TABLE core__encounter_dn_reasoncode AS ( -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__encounter_dn_dischargedisposition" -AS ( - SELECT * FROM ( - VALUES - (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS boolean)) +CREATE TABLE core__encounter_dn_dischargedisposition AS ( + WITH + + system_dischargedisposition_0 AS ( + SELECT DISTINCT + s.id AS id, + 0 AS row, + u.coding.code, + u.coding.display, + u.coding.system, + u.coding.userSelected + FROM + encounter AS s, + UNNEST(s.hospitalization.dischargedisposition.coding) AS u (coding) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + row, + system, + code, + display, + userSelected + FROM system_dischargedisposition_0 + ) - AS t ("id","row","code","system","display","userSelected") - WHERE 1 = 0 -- ensure empty table + SELECT + id, + code, + system, + display, + userSelected + FROM union_table ); + -- ########################################################### CREATE TABLE core__encounter AS WITH -temp_encounter_completion AS (SELECT cast('' AS varchar) AS id, FALSE AS is_complete WHERE 1=0), +temp_encounter_completion AS ( + WITH + -- Start by grabbing group names and exports times for each Encounter. + temp_completion_times AS ( + SELECT + ece.encounter_id, + -- note that we don't chop the export time down to a DATE, + -- as is typical in the core study + min(from_iso8601_timestamp(ece.export_time)) AS earliest_export + FROM etl__completion_encounters AS ece + GROUP BY ece.encounter_id + ), + + -- Then examine all tables that are at least as recently loaded as the + -- Encounter. (This is meant to detect Conditions that maybe aren't + -- loaded into Athena yet for the Encounter.) + -- Make sure that we have all the tables we care about. + temp_completed_tables AS ( + SELECT + ece.encounter_id, + ( + BOOL_OR(ec.table_name = 'condition') + AND BOOL_OR(ec.table_name = 'documentreference') + AND BOOL_OR(ec.table_name = 'medicationrequest') + AND BOOL_OR(ec.table_name = 'observation') + ) AS is_complete + FROM etl__completion_encounters AS ece + INNER JOIN temp_completion_times AS tct ON tct.encounter_id = ece.encounter_id + INNER JOIN etl__completion AS ec ON ec.group_name = ece.group_name + WHERE tct.earliest_export <= from_iso8601_timestamp(ec.export_time) + GROUP BY ece.encounter_id + ) + + -- Left join back with main completion_encounters table, + -- to catch rows that are completion-tracked but not in + -- temp_completed_tables. + SELECT + ece.encounter_id AS id, + (is_complete IS NOT NULL AND is_complete) AS is_complete + FROM etl__completion_encounters AS ece + LEFT JOIN temp_completed_tables AS tct ON tct.encounter_id = ece.encounter_id +), temp_encounter_nullable AS ( SELECT DISTINCT @@ -647,7 +713,48 @@ WHERE CREATE TABLE core__incomplete_encounter AS WITH -temp_encounter_completion AS (SELECT cast('' AS varchar) AS id, FALSE AS is_complete WHERE 1=0) +temp_encounter_completion AS ( + WITH + -- Start by grabbing group names and exports times for each Encounter. + temp_completion_times AS ( + SELECT + ece.encounter_id, + -- note that we don't chop the export time down to a DATE, + -- as is typical in the core study + min(from_iso8601_timestamp(ece.export_time)) AS earliest_export + FROM etl__completion_encounters AS ece + GROUP BY ece.encounter_id + ), + + -- Then examine all tables that are at least as recently loaded as the + -- Encounter. (This is meant to detect Conditions that maybe aren't + -- loaded into Athena yet for the Encounter.) + -- Make sure that we have all the tables we care about. + temp_completed_tables AS ( + SELECT + ece.encounter_id, + ( + BOOL_OR(ec.table_name = 'condition') + AND BOOL_OR(ec.table_name = 'documentreference') + AND BOOL_OR(ec.table_name = 'medicationrequest') + AND BOOL_OR(ec.table_name = 'observation') + ) AS is_complete + FROM etl__completion_encounters AS ece + INNER JOIN temp_completion_times AS tct ON tct.encounter_id = ece.encounter_id + INNER JOIN etl__completion AS ec ON ec.group_name = ece.group_name + WHERE tct.earliest_export <= from_iso8601_timestamp(ec.export_time) + GROUP BY ece.encounter_id + ) + + -- Left join back with main completion_encounters table, + -- to catch rows that are completion-tracked but not in + -- temp_completed_tables. + SELECT + ece.encounter_id AS id, + (is_complete IS NOT NULL AND is_complete) AS is_complete + FROM etl__completion_encounters AS ece + LEFT JOIN temp_completed_tables AS tct ON tct.encounter_id = ece.encounter_id +) SELECT DISTINCT tec.id FROM temp_encounter_completion AS tec diff --git a/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql b/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql index 401d68e3..d48a9e06 100644 --- a/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql +++ b/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql @@ -6,7 +6,7 @@ -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__medication_dn_code" +CREATE TABLE IF NOT EXISTS "main"."core__medication_dn_code" AS ( SELECT * FROM ( VALUES @@ -57,7 +57,7 @@ CREATE TABLE core__medicationrequest_dn_inline_code AS ( -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__medicationrequest_dn_contained_code" +CREATE TABLE IF NOT EXISTS "main"."core__medicationrequest_dn_contained_code" AS ( SELECT * FROM ( VALUES diff --git a/cumulus_library/studies/core/reference_sql/builder_observation.sql b/cumulus_library/studies/core/reference_sql/builder_observation.sql index 0883c588..3692d8e0 100644 --- a/cumulus_library/studies/core/reference_sql/builder_observation.sql +++ b/cumulus_library/studies/core/reference_sql/builder_observation.sql @@ -95,43 +95,167 @@ CREATE TABLE core__observation_dn_code AS ( -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__observation_component_code" -AS ( - SELECT * FROM ( - VALUES - (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS boolean)) +CREATE TABLE core__observation_component_code AS ( + WITH + + flattened_rows AS ( + SELECT DISTINCT + t.id AS id, + ROW_NUMBER() OVER (PARTITION BY id) AS row, + r."code" + FROM + observation AS t, + UNNEST(t."component") AS parent (r) + ), + + system_code_0 AS ( + SELECT DISTINCT + s.id AS id, + s.row, + u.coding.code, + u.coding.display, + u.coding.system, + u.coding.userSelected + FROM + flattened_rows AS s, + UNNEST(s.code.coding) AS u (coding) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + row, + system, + code, + display, + userSelected + FROM system_code_0 + ) - AS t ("id","row","code","system","display","userSelected") - WHERE 1 = 0 -- ensure empty table + SELECT + id, + row, + code, + system, + display, + userSelected + FROM union_table ); + -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__observation_component_dataabsentreason" -AS ( - SELECT * FROM ( - VALUES - (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS boolean)) +CREATE TABLE core__observation_component_dataabsentreason AS ( + WITH + + flattened_rows AS ( + SELECT DISTINCT + t.id AS id, + ROW_NUMBER() OVER (PARTITION BY id) AS row, + r."dataabsentreason" + FROM + observation AS t, + UNNEST(t."component") AS parent (r) + ), + + system_dataabsentreason_0 AS ( + SELECT DISTINCT + s.id AS id, + s.row, + u.coding.code, + u.coding.display, + u.coding.system, + u.coding.userSelected + FROM + flattened_rows AS s, + UNNEST(s.dataabsentreason.coding) AS u (coding) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + row, + system, + code, + display, + userSelected + FROM system_dataabsentreason_0 + ) - AS t ("id","row","code","system","display","userSelected") - WHERE 1 = 0 -- ensure empty table + SELECT + id, + row, + code, + system, + display, + userSelected + FROM union_table ); + -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__observation_component_interpretation" -AS ( - SELECT * FROM ( - VALUES - (cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS boolean)) +CREATE TABLE core__observation_component_interpretation AS ( + WITH + + flattened_rows AS ( + SELECT DISTINCT + t.id AS id, + ROW_NUMBER() OVER (PARTITION BY id) AS row, + r."interpretation" + FROM + observation AS t, + UNNEST(t."component") AS parent (r) + ), + + child_flattened_rows AS ( + SELECT DISTINCT + s.id, + s.row, -- keep the parent row number + u."interpretation" + FROM + flattened_rows AS s, + UNNEST(s.interpretation) AS u ("interpretation") + ), + + system_interpretation_0 AS ( + SELECT DISTINCT + s.id AS id, + s.row, + u.coding.code, + u.coding.display, + u.coding.system, + u.coding.userSelected + FROM + child_flattened_rows AS s, + UNNEST(s.interpretation.coding) AS u (coding) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + row, + system, + code, + display, + userSelected + FROM system_interpretation_0 + ) - AS t ("id","row","code","system","display","userSelected") - WHERE 1 = 0 -- ensure empty table + SELECT + id, + row, + code, + system, + display, + userSelected + FROM union_table ); + -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__observation_component_valuecodeableconcept" +CREATE TABLE IF NOT EXISTS "main"."core__observation_component_valuecodeableconcept" AS ( SELECT * FROM ( VALUES @@ -143,7 +267,7 @@ AS ( -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__observation_dn_interpretation" +CREATE TABLE IF NOT EXISTS "main"."core__observation_dn_interpretation" AS ( SELECT * FROM ( VALUES @@ -194,7 +318,7 @@ CREATE TABLE core__observation_dn_valuecodeableconcept AS ( -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__observation_dn_dataabsentreason" +CREATE TABLE IF NOT EXISTS "main"."core__observation_dn_dataabsentreason" AS ( SELECT * FROM ( VALUES @@ -291,40 +415,13 @@ WHERE CREATE TABLE core__observation_component_valuequantity AS ( - WITH - - flattened_rows AS ( - SELECT DISTINCT - t.id AS id, - ROW_NUMBER() OVER (PARTITION BY id) AS row, - r."component" - FROM - observation AS t, - UNNEST(t."component") AS r ("component") - ), - - flattened_quantities AS ( - SELECT - f.id, - f.row, - f.component.valueQuantity.value AS value, - cast(NULL as varchar) AS comparator, - f.component.valueQuantity.unit AS unit, - f.component.valueQuantity.system AS system, - f.component.valueQuantity.code AS code - FROM flattened_rows AS f - WHERE f.component.valueQuantity IS NOT NULL - ) - SELECT - f.id, - f.row, - -- We ensure value is a double, because nullable_cols() above will cast - -- as varchar if value isn't in the schema. - CAST(f.value AS DOUBLE) AS value, -- noqa: disable=L029 - f.comparator, - f.unit, - f.system, - f.code - FROM flattened_quantities AS f + 'x' AS id, + CAST(NULL AS BIGINT) AS row, + CAST(NULL AS DOUBLE) AS value, -- noqa: disable=L029 + 'x' AS comparator, + 'x' AS unit, + 'x' AS system, + 'x' AS code + WHERE 1=0 -- empty table ); diff --git a/cumulus_library/studies/core/reference_sql/builder_patient.sql b/cumulus_library/studies/core/reference_sql/builder_patient.sql index 1afbeed1..4e8bcbc8 100644 --- a/cumulus_library/studies/core/reference_sql/builder_patient.sql +++ b/cumulus_library/studies/core/reference_sql/builder_patient.sql @@ -6,26 +6,198 @@ -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__patient_ext_race" -AS ( - SELECT * FROM ( - VALUES - (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) +CREATE TABLE core__patient_ext_race AS ( + WITH + + system_ombCategory AS ( + SELECT DISTINCT + s.id AS id, + '0' AS priority, + 'ombCategory' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS race_code, + ext_child.ext.valuecoding.display AS race_display + FROM + patient AS s, + UNNEST(s.extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race' + AND ext_child.ext.url = 'ombCategory' + AND ext_child.ext.valuecoding.display != '' + ), + + system_detailed AS ( + SELECT DISTINCT + s.id AS id, + '1' AS priority, + 'detailed' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS race_code, + ext_child.ext.valuecoding.display AS race_display + FROM + patient AS s, + UNNEST(s.extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race' + AND ext_child.ext.url = 'detailed' + AND ext_child.ext.valuecoding.display != '' + ), + + union_table AS ( + SELECT + id, + priority, + system, + race_code, + race_display + FROM system_ombCategory + UNION + SELECT + id, + priority, + system, + race_code, + race_display + FROM system_detailed + + ORDER BY id, priority + ) + + SELECT + id, + system, + race_code, + race_display + FROM ( + SELECT + id, + system, + LOWER( + ARRAY_JOIN( + ARRAY_SORT( + ARRAY_AGG( + race_code + ) + ), '; ' + ) + ) + AS race_code, + LOWER( + ARRAY_JOIN( + ARRAY_SORT( + ARRAY_AGG( + race_display + ) + ), '; ' + ) + ) AS race_display, + ROW_NUMBER() + OVER ( + PARTITION BY id + ORDER BY priority ASC + ) AS available_priority + FROM union_table + GROUP BY id, system, priority ) - AS t ("id","system","race_code","race_display") - WHERE 1 = 0 -- ensure empty table + WHERE available_priority = 1 ); -- ########################################################### -CREATE TABLE IF NOT EXISTS "cumulus_mhg_dev_db"."core__patient_ext_ethnicity" -AS ( - SELECT * FROM ( - VALUES - (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) +CREATE TABLE core__patient_ext_ethnicity AS ( + WITH + + system_ombCategory AS ( + SELECT DISTINCT + s.id AS id, + '0' AS priority, + 'ombCategory' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS ethnicity_code, + ext_child.ext.valuecoding.display AS ethnicity_display + FROM + patient AS s, + UNNEST(s.extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity' + AND ext_child.ext.url = 'ombCategory' + AND ext_child.ext.valuecoding.display != '' + ), + + system_detailed AS ( + SELECT DISTINCT + s.id AS id, + '1' AS priority, + 'detailed' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS ethnicity_code, + ext_child.ext.valuecoding.display AS ethnicity_display + FROM + patient AS s, + UNNEST(s.extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity' + AND ext_child.ext.url = 'detailed' + AND ext_child.ext.valuecoding.display != '' + ), + + union_table AS ( + SELECT + id, + priority, + system, + ethnicity_code, + ethnicity_display + FROM system_ombCategory + UNION + SELECT + id, + priority, + system, + ethnicity_code, + ethnicity_display + FROM system_detailed + + ORDER BY id, priority + ) + + SELECT + id, + system, + ethnicity_code, + ethnicity_display + FROM ( + SELECT + id, + system, + LOWER( + ARRAY_JOIN( + ARRAY_SORT( + ARRAY_AGG( + ethnicity_code + ) + ), '; ' + ) + ) + AS ethnicity_code, + LOWER( + ARRAY_JOIN( + ARRAY_SORT( + ARRAY_AGG( + ethnicity_display + ) + ), '; ' + ) + ) AS ethnicity_display, + ROW_NUMBER() + OVER ( + PARTITION BY id + ORDER BY priority ASC + ) AS available_priority + FROM union_table + GROUP BY id, system, priority ) - AS t ("id","system","ethnicity_code","ethnicity_display") - WHERE 1 = 0 -- ensure empty table + WHERE available_priority = 1 ); -- ########################################################### diff --git a/cumulus_library/studies/core/reference_sql/count_core.sql b/cumulus_library/studies/core/reference_sql/count_core.sql index 1ac0e58b..96d2f614 100644 --- a/cumulus_library/studies/core/reference_sql/count_core.sql +++ b/cumulus_library/studies/core/reference_sql/count_core.sql @@ -6,6 +6,105 @@ -- ########################################################### +CREATE TABLE core__count_allergyintolerance_month AS ( + WITH + filtered_table AS ( + SELECT + s.patient_ref, + s.allergyintolerance_ref, + --noqa: disable=RF03, AL02 + s."category", + s."recordedDate_month", + s."code_display", + s."reaction_manifestation_display" + --noqa: enable=RF03, AL02 + FROM core__allergyintolerance AS s + ), + + null_replacement AS ( + SELECT + patient_ref, + allergyintolerance_ref, + coalesce( + cast(category AS varchar), + 'cumulus__none' + ) AS category, + coalesce( + cast(recordedDate_month AS varchar), + 'cumulus__none' + ) AS recordedDate_month, + coalesce( + cast(code_display AS varchar), + 'cumulus__none' + ) AS code_display, + coalesce( + cast(reaction_manifestation_display AS varchar), + 'cumulus__none' + ) AS reaction_manifestation_display + FROM filtered_table + ), + secondary_powerset AS ( + SELECT + count(DISTINCT allergyintolerance_ref) AS cnt_allergyintolerance_ref, + "category", + "recordedDate_month", + "code_display", + "reaction_manifestation_display", + concat_ws( + '-', + COALESCE("category",''), + COALESCE("recordedDate_month",''), + COALESCE("code_display",''), + COALESCE("reaction_manifestation_display",'') + ) AS id + FROM null_replacement + GROUP BY + cube( + "category", + "recordedDate_month", + "code_display", + "reaction_manifestation_display" + ) + ), + + powerset AS ( + SELECT + count(DISTINCT patient_ref) AS cnt_subject_ref, + "category", + "recordedDate_month", + "code_display", + "reaction_manifestation_display", + concat_ws( + '-', + COALESCE("category",''), + COALESCE("recordedDate_month",''), + COALESCE("code_display",''), + COALESCE("reaction_manifestation_display",'') + ) AS id + FROM null_replacement + GROUP BY + cube( + "category", + "recordedDate_month", + "code_display", + "reaction_manifestation_display" + ) + ) + + SELECT + s.cnt_allergyintolerance_ref AS cnt, + p."category", + p."recordedDate_month", + p."code_display", + p."reaction_manifestation_display" + FROM powerset AS p + JOIN secondary_powerset AS s on s.id = p.id + WHERE + cnt_subject_ref >= 10 +); + +-- ########################################################### + CREATE TABLE core__count_condition_month AS ( WITH filtered_table AS ( diff --git a/cumulus_library/template_sql/codeable_concept_denormalize.sql.jinja b/cumulus_library/template_sql/codeable_concept_denormalize.sql.jinja index 9a9ac8f1..bf14b81c 100644 --- a/cumulus_library/template_sql/codeable_concept_denormalize.sql.jinja +++ b/cumulus_library/template_sql/codeable_concept_denormalize.sql.jinja @@ -120,13 +120,12 @@ CREATE TABLE {{ target_table }} AS ( ROW_NUMBER() OVER ( PARTITION BY id - ORDER BY priority ASC + ORDER BY priority ASC, code ASC ) AS available_priority FROM union_table GROUP BY id, row, priority, system, code, display, userSelected {%- for extra_field in extra_fields %}, {{ extra_field[1] }}{% endfor %} - ORDER BY priority ASC ) SELECT diff --git a/docs/core-study-details.md b/docs/core-study-details.md index 9bf79279..c8ca14d8 100644 --- a/docs/core-study-details.md +++ b/docs/core-study-details.md @@ -93,6 +93,17 @@ vital signs) instead. ## core count tables +### core__count_allergyintolerance_month + +| Column | Type |Description| +|------------------------------|-------|-----------| +|cnt |bigint | | +|category |varchar| | +|recordeddate_month |varchar| | +|code_display |varchar| | +|reaction_manifestation_display|varchar| | + + ### core__count_condition_month | Column | Type |Description| @@ -213,6 +224,36 @@ vital signs) instead. ## core base tables +### core__allergyintolerance + +| Column | Type |Description| +|------------------------------|-------|-----------| +|id |varchar| | +|allergyintolerance_ref |varchar| | +|clinicalstatus_code |varchar| | +|verificationstatus_code |varchar| | +|type |varchar| | +|category |varchar| | +|criticality |varchar| | +|code_code |varchar| | +|code_system |varchar| | +|code_display |varchar| | +|patient_ref |varchar| | +|encounter_ref |varchar| | +|recordeddate |date | | +|recordeddate_week |date | | +|recordeddate_month |date | | +|recordeddate_year |date | | +|reaction_row |bigint | | +|reaction_substance_code |varchar| | +|reaction_substance_system |varchar| | +|reaction_substance_display |varchar| | +|reaction_manifestation_code |varchar| | +|reaction_manifestation_system |varchar| | +|reaction_manifestation_display|varchar| | +|reaction_severity |varchar| | + + ### core__condition | Column | Type |Description| diff --git a/tests/conftest.py b/tests/conftest.py index f2eb60c7..4e4b06d7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ """pytest mocks and testing utility classes/methods""" import copy +import datetime import json from pathlib import Path @@ -81,6 +82,16 @@ def duckdb_args(args: list, tmp_path, stats=False): return [*args, "--db-type", "duckdb", "--database", f"{tmp_path}/duck.db"] +def date_to_epoch(year: int, month: int, day: int) -> int: + """Convert a date to a seconds-since-epoch count. + + The round trip from duckdb to pandas seems to do a timestamp conversion and when + comparing against values pulled from duckdb/pandas, you can use this to get the right + number of seconds. + """ + return int(datetime.datetime(year, month, day, tzinfo=datetime.UTC).timestamp()) + + def ndjson_data_generator(source_dir: Path, target_dir: Path, iterations: int): """Uses the test data as a template to create large datasets diff --git a/tests/core/test_core.py b/tests/core/test_core.py index a7fc7ea7..7d37cc39 100644 --- a/tests/core/test_core.py +++ b/tests/core/test_core.py @@ -11,6 +11,7 @@ @pytest.mark.parametrize( "table", [ + ("core__allergyintolerance"), ("core__condition"), ("core__documentreference"), ("core__encounter"), @@ -19,6 +20,7 @@ ("core__observation_lab"), ("core__observation_vital_signs"), ("core__patient"), + ("core__count_allergyintolerance_month"), ("core__count_condition_month"), ("core__count_documentreference_month"), ("core__count_encounter_month"), @@ -125,12 +127,15 @@ def test_core_tiny_database(tmp_path): """Verify that we can generate core tables with some minimal data filled in""" testbed = testbed_utils.LocalTestbed(tmp_path) # Just add bare resources, with minimal data + testbed.add_allergy_intolerance("AllA") testbed.add_condition("ConA") testbed.add_encounter("EncA") testbed.add_medication_request("MedReqA") con = testbed.build() patients = con.sql("SELECT id FROM core__patient").fetchall() assert {e[0] for e in patients} == {"A"} + rows = con.sql("SELECT id FROM core__allergyintolerance").fetchall() + assert {r[0] for r in rows} == {"AllA"} conditions = con.sql("SELECT id FROM core__condition").fetchall() assert {c[0] for c in conditions} == {"ConA"} encounters = con.sql("SELECT id FROM core__encounter").fetchall() diff --git a/tests/core/test_core_allergy.py b/tests/core/test_core_allergy.py new file mode 100644 index 00000000..5cbe1d87 --- /dev/null +++ b/tests/core/test_core_allergy.py @@ -0,0 +1,239 @@ +"""Tests for core__allergyintolerance""" + +import functools +import itertools +import json + +from tests import conftest, testbed_utils + + +def combine_dictionaries(*combos: list[dict]) -> list[dict]: + return [ + functools.reduce(lambda x, y: x | y, tuple_of_dicts) + for tuple_of_dicts in itertools.product(*combos) + ] + + +def dict_set_from_list(rows: list[dict]) -> set[tuple]: + return {tuple(sorted(row.items())) for row in rows} + + +def test_core_allergy_many_cases(tmp_path): + """Verify that we multiply rows as needed when multiple options appear""" + testbed = testbed_utils.LocalTestbed(tmp_path) + testbed.add_allergy_intolerance("Nothing") + testbed.add_allergy_intolerance( + "Multiple Rows", + clinicalStatus={ + "coding": [ + { + "code": "inactive", + "system": "http://terminology.hl7.org/CodeSystem/allergyintolerance-clinical", + "display": "Inactive", + }, + { + "code": "resolved", + "system": "http://terminology.hl7.org/CodeSystem/allergyintolerance-clinical", + "display": "Resolved", + }, + {"code": "extra", "system": "http://example.com/", "display": "Extra"}, + ], + }, + verificationStatus={ + "coding": [ + { + "code": "unconfirmed", + "system": "http://terminology.hl7.org/CodeSystem/allergyintolerance-verification", + "display": "Unconfirmed", + }, + { + "code": "presumed", + "system": "http://terminology.hl7.org/CodeSystem/allergyintolerance-verification", + "display": "Presumed", + }, + {"code": "extra", "system": "http://example.com/", "display": "Extra"}, + ], + }, + type="allergy", + category=["food", "environment"], + criticality="low", + code={ + "coding": [ + { + "code": "10156", + "system": "http://www.nlm.nih.gov/research/umls/rxnorm", + "display": "sucralfate", + }, + {"code": "extra", "system": "http://example.com/", "display": "Extra"}, + ], + }, + patient={"reference": "Patient/P1"}, + encounter={"reference": "Encounter/E1"}, + recordedDate="2019-12-11T10:10:10+05:00", + reaction=[ + { + "manifestation": [ + { + "coding": [ + {"code": "0", "system": "http://example.com/", "display": "Zero"}, + ] + }, + ], + }, + { + "substance": { + "coding": [ + {"code": "1", "system": "http://example.com/", "display": "One"}, + {"code": "2", "system": "http://example.com/", "display": "Two"}, + ] + }, + "manifestation": [ + { + "coding": [ + {"code": "3", "system": "http://example.com/", "display": "Three"}, + {"code": "4", "system": "http://example.com/", "display": "Four"}, + ] + }, + { + "coding": [ + {"code": "5", "system": "http://example.com/", "display": "Five"}, + ] + }, + ], + "severity": "mild", + }, + ], + ) + + con = testbed.build() + df = con.sql("SELECT * FROM core__allergyintolerance").df() + rows = json.loads(df.to_json(orient="records")) + + assert 29 == len(rows) + + nothing = { + "id": "Nothing", + "allergyintolerance_ref": "AllergyIntolerance/Nothing", + "clinicalStatus_code": None, + "verificationStatus_code": None, + "type": None, + "category": None, + "criticality": None, + "code_code": None, + "code_system": None, + "code_display": None, + "patient_ref": None, + "encounter_ref": None, + "recordedDate": conftest.date_to_epoch(2020, 1, 1), + "recordedDate_week": conftest.date_to_epoch(2019, 12, 30), + "recordedDate_month": conftest.date_to_epoch(2020, 1, 1), + "recordedDate_year": conftest.date_to_epoch(2020, 1, 1), + "reaction_row": None, + "reaction_substance_code": None, + "reaction_substance_system": None, + "reaction_substance_display": None, + "reaction_manifestation_code": None, + "reaction_manifestation_system": None, + "reaction_manifestation_display": None, + "reaction_severity": None, + } + combos = combine_dictionaries( + # Start with a list of size one - all the consistent elements across all rows + [ + { + "id": "Multiple Rows", + "allergyintolerance_ref": "AllergyIntolerance/Multiple Rows", + # These next two only have one value in the results, despite having two matching + # code systems. This is because the builder code filters out extra codes and only + # keeps one (which is fine for this use case). + "clinicalStatus_code": "inactive", + "verificationStatus_code": "presumed", + "type": "allergy", + "criticality": "low", + "patient_ref": "Patient/P1", + "encounter_ref": "Encounter/E1", + "recordedDate": conftest.date_to_epoch(2019, 12, 11), + "recordedDate_week": conftest.date_to_epoch(2019, 12, 9), + "recordedDate_month": conftest.date_to_epoch(2019, 12, 1), + "recordedDate_year": conftest.date_to_epoch(2019, 1, 1), + }, + ], + [ + {"category": "food"}, + {"category": "environment"}, + ], + [ + { + "code_code": "10156", + "code_system": "http://www.nlm.nih.gov/research/umls/rxnorm", + "code_display": "sucralfate", + }, + {"code_code": "extra", "code_system": "http://example.com/", "code_display": "Extra"}, + ], + [ + { + "reaction_row": 1.0, + "reaction_substance_code": None, + "reaction_substance_system": None, + "reaction_substance_display": None, + "reaction_manifestation_code": "0", + "reaction_manifestation_system": "http://example.com/", + "reaction_manifestation_display": "Zero", + "reaction_severity": None, + }, + *combine_dictionaries( + [ + # consistent elements + { + "reaction_row": 2.0, + "reaction_severity": "mild", + }, + ], + [ + { + "reaction_substance_code": "1", + "reaction_substance_system": "http://example.com/", + "reaction_substance_display": "One", + }, + { + "reaction_substance_code": "2", + "reaction_substance_system": "http://example.com/", + "reaction_substance_display": "Two", + }, + ], + [ + { + "reaction_manifestation_code": "3", + "reaction_manifestation_system": "http://example.com/", + "reaction_manifestation_display": "Three", + }, + { + "reaction_manifestation_code": "4", + "reaction_manifestation_system": "http://example.com/", + "reaction_manifestation_display": "Four", + }, + { + "reaction_manifestation_code": "5", + "reaction_manifestation_system": "http://example.com/", + "reaction_manifestation_display": "Five", + }, + ], + ), + ], + ) + assert 28 == len(combos) # sanity check our product math + + expected_set = dict_set_from_list(combos) + expected_set |= dict_set_from_list([nothing]) + assert expected_set == dict_set_from_list(rows) + + +def test_core_allergy_date_cutoff(tmp_path): + """Verify that we ignore rows before 2016""" + testbed = testbed_utils.LocalTestbed(tmp_path) + testbed.add_allergy_intolerance("Old", recorded="2015") + testbed.add_allergy_intolerance("New", recorded="2016") + + con = testbed.build() + df = con.sql("SELECT id FROM core__allergyintolerance").df() + assert ["New"] == list(df.id) diff --git a/tests/core/test_core_meds.py b/tests/core/test_core_meds.py index 66666afa..5f00a469 100644 --- a/tests/core/test_core_meds.py +++ b/tests/core/test_core_meds.py @@ -1,9 +1,8 @@ """Tests for core__medicationrequest""" -import datetime import json -from tests import testbed_utils +from tests import conftest, testbed_utils def test_core_med_all_types(tmp_path): @@ -54,9 +53,8 @@ def test_core_med_all_types(tmp_path): "reported_ref": "Patient/Q", "subject_ref": "Patient/P", "encounter_ref": "Encounter/E", - # The round trip from duckdb to pandas seems to do a timestamp conversion on these - "authoredOn": int(datetime.datetime(2021, 10, 16, tzinfo=datetime.UTC).timestamp()), - "authoredOn_month": int(datetime.datetime(2021, 10, 1, tzinfo=datetime.UTC).timestamp()), + "authoredOn": conftest.date_to_epoch(2021, 10, 16), + "authoredOn_month": conftest.date_to_epoch(2021, 10, 1), "medication_code": "c", "medication_system": "letters", "medication_display": "C", diff --git a/tests/regression/reference/core__count_allergyintolerance_month.csv b/tests/regression/reference/core__count_allergyintolerance_month.csv new file mode 100644 index 00000000..eb698243 --- /dev/null +++ b/tests/regression/reference/core__count_allergyintolerance_month.csv @@ -0,0 +1,33 @@ +"cnt","category","recordeddate_month","code_display","reaction_manifestation_display" +119,,,, +80,"environment",,, +62,,,,"cumulus__none" +52,"environment",,,"cumulus__none" +32,,,,"Eruption of skin (disorder)" +32,"food",,, +24,,,,"Wheal (finding)" +19,"food",,,"Eruption of skin (disorder)" +16,,,"Animal dander (substance)", +16,"environment",,"Animal dander (substance)", +15,,,,"Rhinoconjunctivitis (disorder)" +15,,,"Mold (organism)", +15,,,"House dust mite (organism)", +15,,,"House dust mite (organism)","cumulus__none" +15,"environment",,"Mold (organism)", +15,"environment",,"House dust mite (organism)", +15,"environment",,"House dust mite (organism)","cumulus__none" +13,,,,"Cough (finding)" +12,,,"Tree pollen (substance)", +12,,,"Tree pollen (substance)","cumulus__none" +12,,,"Grass pollen (substance)", +12,,,"Grass pollen (substance)","cumulus__none" +12,"environment",,,"Rhinoconjunctivitis (disorder)" +12,"environment",,,"Eruption of skin (disorder)" +12,"environment",,"Tree pollen (substance)", +12,"environment",,"Tree pollen (substance)","cumulus__none" +12,"environment",,"Grass pollen (substance)", +12,"environment",,"Grass pollen (substance)","cumulus__none" +11,,,"Animal dander (substance)","Rhinoconjunctivitis (disorder)" +11,,,"Animal dander (substance)","Eruption of skin (disorder)" +11,"environment",,"Animal dander (substance)","Rhinoconjunctivitis (disorder)" +11,"environment",,"Animal dander (substance)","Eruption of skin (disorder)" diff --git a/tests/regression/reference/core__count_allergyintolerance_month.parquet b/tests/regression/reference/core__count_allergyintolerance_month.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4cd9fb19b752af0263692e08daf859672d192e09 GIT binary patch literal 2401 zcmc&$O>7%Q6rQ!6*h%chgwAH&uA)8Al_S!SxJi^CKprQtL+h5rB(W1BrR&{|y|F!O z_9t$8tU!vmA#p$maX@hCkpmJcstQ6qRqBlc;=rK?1QN;_F1(qzv0E8cpaLV$o0<3j zzBfDaDk)6k5WXwmYk4eTG=k98PzwzRXmubQLa)UlXa=SNQ-b+C%s9+pn6X&ol_)wM zL+uqKJcVZ_a2m&PKkk>MNUJT8B9C-LlL-F4`8#nE6HqV00R$ut3qo+7#Hl=EL;0g6mZ(V& zEXSq|lX@XI?%B2h`Z88JwaqEX5aUZjUCN+)YNik9z^!k;c_0L%aireslX0}2d_>ng zvtm2fc?Dkz;vGT#KBOLoB=e%%&^3fL`c08?!+E(Ox<=2 z(=Z^1-G)uQD$inF+*WUf)rbA^TpINZ_+9}2Ams5N5BaWelubYoLal~YR}76BrlTZX zzwCM%)y?r?5MH+HhLW_MibgHBF@6SEE4J^Nis8GS(y%;}Nn>O4qaay#OjBvv^}0zX z^GK0^m2))Ly(qKtCh$~Oc~Ww;OT9a+l=#bxz@fMv1{z0wf4mwOg5mUMD`&rYA2{mG zXa}LFZX4#VVY$t^cHE(#gW}gg_3og0FCx$Vv)DZmJreN46GX2ay%mF0bid)(eRp@4 z+SEJ?jAh4fdX`NUdrxs|7FCjv8tb6LF)mh3z(2WXQNyB@@iDL{R4r=jHr@BB?pX(x zXSqr;HH+pKZNF0O=mM-sJkbT~nPlv-@Y3)T7U2AJo?q-AU-`L9+$sc{8;C^1y8A{ttzGya$@J-eWsrAJMt`^hX3Q^`S0(W z_;FNBs}b>wq0XA74M|r{GI$q)3JkvIdilON6vYrb4oWRLEk5^D>}t zne7${d7Z6rezd}A1Lo2U<9m#(w18wf^2HkvWFC0S*<~hQVubK5-({F-=K6LqyKm&` z2j%*Lw^f{NZZ|e>7`g0}R$TB)sX|k~ka4$4HSoU#{uW%8Iy1}^xEG!&E2KSmzFVn< z+V None: # # All other args can be specified as a kwarg, like add() itself does. + def add_allergy_intolerance(self, row_id: str, recorded: str = "2020", **kwargs) -> None: + """Adds a Condition with all the SQL-required fields filled out""" + self.add( + "allergyintolerance", + { + "resourceType": "AllergyIntolerance", + "id": row_id, + "recordedDate": recorded, + **kwargs, + }, + ) + def add_condition(self, row_id: str, recorded: str = "2020", **kwargs) -> None: """Adds a Condition with all the SQL-required fields filled out""" self.add(