diff --git a/cumulus_library/.sqlfluff b/cumulus_library/.sqlfluff index 100251c6..afd494f5 100644 --- a/cumulus_library/.sqlfluff +++ b/cumulus_library/.sqlfluff @@ -33,36 +33,29 @@ code_system_tables = [ { table_name":"hasarray", - "column_name":"acol", - "is_bare_coding":False, - "is_array":True, + "column_hierarchy":[("acol",list),("bcol",dict)], "has_data": True }, { "table_name":"noarray", - "column_name":"col", - "is_bare_coding":False, - "is_array":False, + "column_hierarchy":[("acol.bcol", list)], "has_data": True }, { "table_name":"bare", - "column_name":"bcol", - "is_bare_coding":True, - "is_array":False, + "column_hierarchy":[("bcol", dict)], "has_data": True }, { "table_name":"empty", - "column_name":"empty", - "is_bare_coding":False, - "is_array":False, + "column_hierarchy":[("empty",dict], "has_data": False } ] column_name = 'bar' column_names = ['foo', 'bar'] conditions = ["1 > 0", "1 < 2"] +column_hierarchy = [('a', list),('b',dict)] config = { "medication_datasources" : { diff --git a/cumulus_library/studies/discovery/code_definitions.py b/cumulus_library/studies/discovery/code_definitions.py index 8f76d9c7..68905be1 100644 --- a/cumulus_library/studies/discovery/code_definitions.py +++ b/cumulus_library/studies/discovery/code_definitions.py @@ -1,74 +1,87 @@ # A collection of codes & codeableConcepts to extract available codes from. -# Two optional booleans are available for use: -# - is_array: the field in question is an array of CodeableConcepts -# - is_bare_coding: the field in question is a Coding not wrapped in concepts -# - otherwise, it is assumed to be a 0..1 or 1..1 CodeableConcept -# TODO: if another state is needed, move to an Enum + code_list = [ # Condition - {"table_name": "condition", "column_name": "category", "is_array": True}, { "table_name": "condition", - "column_name": "code", + "column_hierarchy": [("category", list), ("coding", list)], + }, + { + "table_name": "condition", + "column_hierarchy": [("code", dict), ("coding", list)], }, # DocumentReference { "table_name": "documentreference", - "column_name": "type", + "column_hierarchy": [("type", dict), ("coding", list)], + }, + { + "table_name": "documentreference", + "column_hierarchy": [("category", list), ("coding", list)], }, - {"table_name": "documentreference", "column_name": "category", "is_array": True}, # Encounter { "table_name": "encounter", - "column_name": "class", - "is_bare_coding": True, + "column_hierarchy": [("class", dict)], + }, + { + "table_name": "encounter", + "column_hierarchy": [("type", list), ("coding", list)], }, { "table_name": "encounter", - "column_name": "type", - "is_array": True, + "column_hierarchy": [("servicetype", dict), ("coding", list)], }, { "table_name": "encounter", - "column_name": "servicetype", + "column_hierarchy": [("priority", dict), ("coding", list)], }, { "table_name": "encounter", - "column_name": "priority", + "column_hierarchy": [("reasoncode", list), ("coding", list)], }, - {"table_name": "encounter", "column_name": "reasoncode", "is_array": True}, { "table_name": "encounter", - "column_name": "hospitalization.dischargedisposition", + "column_hierarchy": [ + ("hospitalization", dict), + ("dischargedisposition", dict), + ("coding", list), + ], }, # Medication { "table_name": "medication", - "column_name": "codecodeableconcept ", + "column_hierarchy": [("codecodeableconcept", dict), ("coding", list)], }, { "table_name": "medication", - "column_name": "medicationcode", + "column_hierarchy": [("medicationcode", dict), ("coding", list)], }, # Observation - {"table_name": "observation", "column_name": "category", "is_array": True}, { "table_name": "observation", - "column_name": "code", + "column_hierarchy": [("category", list), ("coding", list)], + }, + { + "table_name": "observation", + "column_hierarchy": [("code", dict), ("coding", list)], + }, + { + "table_name": "observation", + "column_hierarchy": [("interpretation", list), ("coding", list)], }, - {"table_name": "observation", "column_name": "interpretation", "is_array": True}, { "table_name": "observation", - "column_name": "valuecodeableconcept", + "column_hierarchy": [("valuecodeableconcept", dict), ("coding", list)], }, { "table_name": "observation", - "column_name": "dataabsentreason", + "column_hierarchy": [("dataabsentreason", dict), ("coding", list)], }, # Patient { "table_name": "patient", - "column_name": "maritalstatus", + "column_hierarchy": [("maritalstatus", dict), ("coding", list)], }, ] diff --git a/cumulus_library/studies/discovery/code_detection.py b/cumulus_library/studies/discovery/code_detection.py index 4c5a4951..2ce96c91 100644 --- a/cumulus_library/studies/discovery/code_detection.py +++ b/cumulus_library/studies/discovery/code_detection.py @@ -2,7 +2,8 @@ from cumulus_library import base_table_builder, base_utils from cumulus_library.studies.discovery import code_definitions -from cumulus_library.template_sql import base_templates, sql_utils +from cumulus_library.studies.discovery.discovery_templates import discovery_templates +from cumulus_library.template_sql import sql_utils class CodeDetectionBuilder(base_table_builder.BaseTableBuilder): @@ -11,27 +12,11 @@ class CodeDetectionBuilder(base_table_builder.BaseTableBuilder): def _check_coding_against_db(self, code_source, schema, cursor): """selects the appropriate DB query to run""" - if code_source["is_array"]: - return sql_utils.is_field_populated( - schema=schema, - source_table=code_source["table_name"], - hierarchy=[(code_source["column_name"], list)], - expected=sql_utils.CODEABLE_CONCEPT, - cursor=cursor, - ) - elif code_source["is_bare_coding"]: - return sql_utils.is_field_populated( - schema=schema, - source_table=code_source["table_name"], - hierarchy=[(code_source["column_name"], dict)], - expected=sql_utils.CODING, - cursor=cursor, - ) return sql_utils.is_field_populated( schema=schema, source_table=code_source["table_name"], - hierarchy=[(code_source["column_name"], dict)], - expected=sql_utils.CODEABLE_CONCEPT, + hierarchy=code_source["column_hierarchy"], + expected=sql_utils.CODING, cursor=cursor, ) @@ -59,24 +44,21 @@ def prepare_queries(self, cursor: object, schema: str, *args, **kwargs): """ code_sources = [] + required_keys = {"table_name", "column_hierarchy"} for code_definition in code_definitions.code_list: - if any( - x not in code_definition.keys() for x in ["table_name", "column_name"] - ): + if not required_keys.issubset(code_definition): raise KeyError( - "Expected table_name and column_name keys in " + "Expected table_name and column_hierarchy keys in " f"{code_definition!s}" ) code_source = { - "is_bare_coding": False, - "is_array": False, "has_data": False, } for key in code_definition.keys(): code_source[key] = code_definition[key] code_sources.append(code_source) code_sources = self._check_codes_in_fields(code_sources, schema, cursor) - query = base_templates.get_code_system_pairs( + query = discovery_templates.get_code_system_pairs( "discovery__code_sources", code_sources ) self.queries.append(query) diff --git a/cumulus_library/studies/discovery/code_detection.sql b/cumulus_library/studies/discovery/code_detection.sql deleted file mode 100644 index 8d29bcb6..00000000 --- a/cumulus_library/studies/discovery/code_detection.sql +++ /dev/null @@ -1,126 +0,0 @@ --- noqa: disable=all -/* -This is a reference output of the SQL generated by builder_code_detection.py -that is used by the core__encounter_type table, against the synthea dataset. -It is provided as a form of documentation only and will not be invoked directly. -*/ - -CREATE TABLE discovery__code_sources AS -SELECT DISTINCT - 'condition' AS table_name, - 'category' AS column_name, - t2.row2.code, - t2.row2.display, - t2.row2.system -FROM condition, -UNNEST(category) AS t1 (row1), -UNNEST(t1.row1.coding) AS t2 (row2) -UNION -SELECT DISTINCT - 'condition' AS table_name, - 'code' AS column_name, - t.row.code, - t.row.display, - t.row.system -FROM condition, -UNNEST(code.coding) AS t (row) -UNION -SELECT DISTINCT - 'documentreference' AS table_name, - 'type' AS column_name, - t.row.code, - t.row.display, - t.row.system -FROM documentreference, -UNNEST(type.coding) AS t (row) -UNION -SELECT DISTINCT - 'documentreference' AS table_name, - 'category' AS column_name, - t2.row2.code, - t2.row2.display, - t2.row2.system -FROM documentreference, -UNNEST(category) AS t1 (row1), -UNNEST(t1.row1.coding) AS t2 (row2) -UNION -SELECT * - FROM ( - VALUES ( - ('encounter','class', '', '', '') - ) - ) -AS t ( table_name, column_name, code, display, system ) -- noqa: L025 -UNION -SELECT DISTINCT - 'encounter' AS table_name, - 'type' AS column_name, - t2.row2.code, - t2.row2.display, - t2.row2.system -FROM encounter, -UNNEST(type) AS t1 (row1), -UNNEST(t1.row1.coding) AS t2 (row2) -UNION -SELECT * - FROM ( - VALUES ( - ('encounter','servicetype', '', '', '') - ) - ) -AS t ( table_name, column_name, code, display, system ) -- noqa: L025 -UNION -SELECT * - FROM ( - VALUES ( - ('encounter','priority', '', '', '') - ) - ) -AS t ( table_name, column_name, code, display, system ) -- noqa: L025 -UNION -SELECT DISTINCT - 'encounter' AS table_name, - 'reasoncode' AS column_name, - t2.row2.code, - t2.row2.display, - t2.row2.system -FROM encounter, -UNNEST(reasoncode) AS t1 (row1), -UNNEST(t1.row1.coding) AS t2 (row2) -UNION -SELECT DISTINCT - 'medication' AS table_name, - 'code' AS column_name, - t.row.code, - t.row.display, - t.row.system -FROM medication, -UNNEST(code.coding) AS t (row) -UNION -SELECT DISTINCT - 'observation' AS table_name, - 'category' AS column_name, - t2.row2.code, - t2.row2.display, - t2.row2.system -FROM observation, -UNNEST(category) AS t1 (row1), -UNNEST(t1.row1.coding) AS t2 (row2) -UNION -SELECT DISTINCT - 'observation' AS table_name, - 'code' AS column_name, - t.row.code, - t.row.display, - t.row.system -FROM observation, -UNNEST(code.coding) AS t (row) -UNION -SELECT DISTINCT - 'patient' AS table_name, - 'maritalstatus' AS column_name, - t.row.code, - t.row.display, - t.row.system -FROM patient, -UNNEST(maritalstatus.coding) AS t (row) diff --git a/cumulus_library/studies/discovery/discovery_templates/code_system_pairs.sql.jinja b/cumulus_library/studies/discovery/discovery_templates/code_system_pairs.sql.jinja new file mode 100644 index 00000000..29a0aa7d --- /dev/null +++ b/cumulus_library/studies/discovery/discovery_templates/code_system_pairs.sql.jinja @@ -0,0 +1,52 @@ +{%- import 'syntax.sql.jinja' as syntax -%} + +{%- macro get_source_col(column_hierarchy) -%} +{%- if column_hierarchy|length >1 or column_hierarchy[0][1].__name__ == 'list' -%} +table_{{ column_hierarchy|length }}.col_{{ column_hierarchy|length }} +{%- else -%} +{{ column_hierarchy[0][0] }} +{%- endif %} +{%- endmacro -%} +{#- Implementation note: since iterating/combining complex objects in jinja is a fool's +errand, this query assumes that the table_hierarchy has been preproccessed before +this template is called. + +discovery_templates.py is combining all dict types with the next deepest list in the +hierarchy, since those should be accessed with . seperators rather than being unnested. + +So we can make the assumption here that the elements of the hierarchy are arrays, and +only the deepest element in that should be of a variable type.-#} +CREATE TABLE {{ output_table_name }} AS +{%- for table in code_system_tables %} +{%- if table.has_data %} +SELECT DISTINCT + '{{ table.table_name }}' AS table_name, + '{{ table.column_display_name }}' AS column_name, + {{ get_source_col(table.column_hierarchy) }}.code, + {{ get_source_col(table.column_hierarchy) }}.display, + {{ get_source_col(table.column_hierarchy) }}.system +FROM {{ table.table_name }} +{%- if table.column_hierarchy|length >1 or table.column_hierarchy[0][1].__name__ == 'list' -%}, +UNNEST({{ table.column_hierarchy[0][0] }}) AS table_1 (col_1) +{%- endif %} +{%- for index in range(1,table.column_hierarchy|length) %} +{%- if table.column_hierarchy[index][1].__name__ == 'list' -%}, +UNNEST(col_{{ index }}.{{ table.column_hierarchy[index][0].split('.')[0] }}) as table_{{ index +1 }} (col_{{ index +1 }}) +{%- endif %} +{%- endfor %} +{%- else %} +SELECT * +FROM ( + VALUES ( + '{{ table.table_name }}', + '{{ table.column_display_name }}', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) +{%- endif %} + +{{ syntax.union_delineate(loop) }} +{% endfor %} diff --git a/cumulus_library/studies/discovery/discovery_templates/discovery_templates.py b/cumulus_library/studies/discovery/discovery_templates/discovery_templates.py new file mode 100644 index 00000000..05445db6 --- /dev/null +++ b/cumulus_library/studies/discovery/discovery_templates/discovery_templates.py @@ -0,0 +1,37 @@ +import pathlib + +from cumulus_library.template_sql import base_templates + + +def get_code_system_pairs(output_table_name: str, code_system_tables: list) -> str: + """Extracts code system details as a standalone table""" + + # Since it's easier to wrangle data before SQL, this code block does + # the following: given a datastructure like: + # [('a',list),('b', dict),('c', dict),('d', list), ('e', dict)] + # Since data access in sql by nested dicts is just joining operators, + # it will flatten columns together by combining dicts up to the next + # list instance, i.e. when the next unnest would be needed: + # [('a',list),('b.c.d', list), ('e', dict)] + # It also creates a display name along the way + for table in code_system_tables: + unnest_layer = "" + squashed_hierarchy = [] + display_col = "" + for column in table["column_hierarchy"]: + unnest_layer = ".".join(x for x in [unnest_layer, column[0]] if x) + display_col = ".".join(x for x in [display_col, column[0]] if x) + if column[1] == list: + squashed_hierarchy.append((unnest_layer, list)) + unnest_layer = "" + if unnest_layer != "": + squashed_hierarchy.append((unnest_layer, dict)) + table["column_hierarchy"] = squashed_hierarchy + table["column_display_name"] = display_col.removesuffix(".coding") + + return base_templates.get_base_template( + "code_system_pairs", + pathlib.Path(__file__).parent, + output_table_name=output_table_name, + code_system_tables=code_system_tables, + ) diff --git a/cumulus_library/studies/discovery/reference_sql/code_detection.sql b/cumulus_library/studies/discovery/reference_sql/code_detection.sql new file mode 100644 index 00000000..cfa8dc42 --- /dev/null +++ b/cumulus_library/studies/discovery/reference_sql/code_detection.sql @@ -0,0 +1,233 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library +-- CLI. Its format is tied to the specific database it was run against, +-- and it may not be correct for all databases. Use the CLI's build +-- option to derive the best SQL for your dataset. + +-- ########################################################### + +CREATE TABLE discovery__code_sources AS +SELECT DISTINCT + 'condition' AS table_name, + 'category' AS column_name, + table_2.col_2.code, + table_2.col_2.display, + table_2.col_2.system +FROM condition, +UNNEST(category) AS table_1 (col_1), +UNNEST(col_1.coding) as table_2 (col_2) + +UNION + +SELECT DISTINCT + 'condition' AS table_name, + 'code' AS column_name, + table_1.col_1.code, + table_1.col_1.display, + table_1.col_1.system +FROM condition, +UNNEST(code.coding) AS table_1 (col_1) + +UNION + +SELECT DISTINCT + 'documentreference' AS table_name, + 'type' AS column_name, + table_1.col_1.code, + table_1.col_1.display, + table_1.col_1.system +FROM documentreference, +UNNEST(type.coding) AS table_1 (col_1) + +UNION + +SELECT DISTINCT + 'documentreference' AS table_name, + 'category' AS column_name, + table_2.col_2.code, + table_2.col_2.display, + table_2.col_2.system +FROM documentreference, +UNNEST(category) AS table_1 (col_1), +UNNEST(col_1.coding) as table_2 (col_2) + +UNION + +SELECT * +FROM ( + VALUES ( + 'encounter', + 'class', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) + +UNION + +SELECT DISTINCT + 'encounter' AS table_name, + 'type' AS column_name, + table_2.col_2.code, + table_2.col_2.display, + table_2.col_2.system +FROM encounter, +UNNEST(type) AS table_1 (col_1), +UNNEST(col_1.coding) as table_2 (col_2) + +UNION + +SELECT * +FROM ( + VALUES ( + 'encounter', + 'servicetype', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) + +UNION + +SELECT * +FROM ( + VALUES ( + 'encounter', + 'priority', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) + +UNION + +SELECT DISTINCT + 'encounter' AS table_name, + 'reasoncode' AS column_name, + table_2.col_2.code, + table_2.col_2.display, + table_2.col_2.system +FROM encounter, +UNNEST(reasoncode) AS table_1 (col_1), +UNNEST(col_1.coding) as table_2 (col_2) + +UNION + +SELECT DISTINCT + 'encounter' AS table_name, + 'hospitalization.dischargedisposition' AS column_name, + table_1.col_1.code, + table_1.col_1.display, + table_1.col_1.system +FROM encounter, +UNNEST(hospitalization.dischargedisposition.coding) AS table_1 (col_1) + +UNION + +SELECT * +FROM ( + VALUES ( + 'medication', + 'codecodeableconcept', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) + +UNION + +SELECT * +FROM ( + VALUES ( + 'medication', + 'medicationcode', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) + +UNION + +SELECT DISTINCT + 'observation' AS table_name, + 'category' AS column_name, + table_2.col_2.code, + table_2.col_2.display, + table_2.col_2.system +FROM observation, +UNNEST(category) AS table_1 (col_1), +UNNEST(col_1.coding) as table_2 (col_2) + +UNION + +SELECT DISTINCT + 'observation' AS table_name, + 'code' AS column_name, + table_1.col_1.code, + table_1.col_1.display, + table_1.col_1.system +FROM observation, +UNNEST(code.coding) AS table_1 (col_1) + +UNION + +SELECT * +FROM ( + VALUES ( + 'observation', + 'interpretation', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) + +UNION + +SELECT DISTINCT + 'observation' AS table_name, + 'valuecodeableconcept' AS column_name, + table_1.col_1.code, + table_1.col_1.display, + table_1.col_1.system +FROM observation, +UNNEST(valuecodeableconcept.coding) AS table_1 (col_1) + +UNION + +SELECT * +FROM ( + VALUES ( + 'observation', + 'dataabsentreason', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) + +UNION + +SELECT DISTINCT + 'patient' AS table_name, + 'maritalstatus' AS column_name, + table_1.col_1.code, + table_1.col_1.display, + table_1.col_1.system +FROM patient, +UNNEST(maritalstatus.coding) AS table_1 (col_1) + + + diff --git a/cumulus_library/template_sql/base_templates.py b/cumulus_library/template_sql/base_templates.py index 85e19926..552d227b 100644 --- a/cumulus_library/template_sql/base_templates.py +++ b/cumulus_library/template_sql/base_templates.py @@ -50,15 +50,6 @@ def get_alias_table_query(source_table: str, target_table: str): ) -def get_code_system_pairs(output_table_name: str, code_system_tables: list) -> str: - """Extracts code system details as a standalone table""" - return get_base_template( - "code_system_pairs", - output_table_name=output_table_name, - code_system_tables=code_system_tables, - ) - - def get_codeable_concept_denormalize_query( config: sql_utils.CodeableConceptConfig, ) -> str: diff --git a/cumulus_library/template_sql/code_system_pairs.sql.jinja b/cumulus_library/template_sql/code_system_pairs.sql.jinja deleted file mode 100644 index 8d08f9d3..00000000 --- a/cumulus_library/template_sql/code_system_pairs.sql.jinja +++ /dev/null @@ -1,48 +0,0 @@ -CREATE TABLE {{ output_table_name }} AS -{%- for source in code_system_tables %} -{%- if source.has_data %} -{%- if source.is_bare_coding %} -SELECT DISTINCT - '{{ source.table_name }}' AS table_name, - '{{ source.column_name }}' AS column_name, - {{ source.column_name }}.code, - {{ source.column_name }}.display, - {{ source.column_name }}.system -FROM {{ source.table_name }} -{%- elif source.is_array %} -SELECT DISTINCT - '{{ source.table_name }}' AS table_name, - '{{ source.column_name }}' AS column_name, - t2.row2.code, - t2.row2.display, - t2.row2.system -FROM {{ source.table_name }}, -UNNEST({{ source.column_name }}) AS t1 (row1), -UNNEST(t1.row1.coding) AS t2 (row2) -{%- else %} -SELECT DISTINCT - '{{ source.table_name }}' AS table_name, - '{{ source.column_name }}' AS column_name, - t.row.code, - t.row.display, - t.row.system -FROM {{ source.table_name }}, -UNNEST({{ source.column_name }}.coding) AS t (row) -{%- endif %} -{%- else %} -SELECT * -FROM ( - VALUES ( - '{{ source.table_name }}', - '{{ source.column_name }}', - '', - '', - '' - ) -) - AS t (table_name, column_name, code, display, system) -{%- endif -%} -{%- if not loop.last %} -UNION -{%- endif -%} -{% endfor %} diff --git a/cumulus_library/template_sql/shared_macros/syntax.sql.jinja b/cumulus_library/template_sql/shared_macros/syntax.sql.jinja index 853af262..59e26c96 100644 --- a/cumulus_library/template_sql/shared_macros/syntax.sql.jinja +++ b/cumulus_library/template_sql/shared_macros/syntax.sql.jinja @@ -6,6 +6,12 @@ {%- endif -%} {%- endmacro -%} +{%- macro period_delineate(loop) -%} +{%- if not loop.last -%} +. +{%- endif -%} +{%- endmacro -%} + {%- macro union_delineate(loop) -%} {%- if not loop.last -%} UNION diff --git a/tests/test_data/discovery/discovery__code_sources.txt b/tests/test_data/discovery/discovery__code_sources.txt index b96c1885..9b304149 100644 --- a/tests/test_data/discovery/discovery__code_sources.txt +++ b/tests/test_data/discovery/discovery__code_sources.txt @@ -15,7 +15,7 @@ documentreference,type,34111-5,Emergency department note,http://loinc.org documentreference,type,51847-2,Evaluation + Plan note,http://loinc.org encounter,class,AMB,None,http://terminology.hl7.org/CodeSystem/v3-ActCode encounter,class,EMER,None,http://terminology.hl7.org/CodeSystem/v3-ActCode -encounter,hospitalization.dischargedisposition,,, +encounter,hospitalization.dischargedisposition,01,Discharged to home care or self care (routine discharge),http://www.nubc.org/patient-discharge encounter,priority,,, encounter,reasoncode,10509002,Acute bronchitis (disorder),http://snomed.info/sct encounter,reasoncode,161665007,History of renal transplant (situation),http://snomed.info/sct @@ -42,7 +42,7 @@ encounter,type,448337001,Telemedicine consultation with patient,http://snomed.in encounter,type,50849002,Emergency room admission (procedure),http://snomed.info/sct encounter,type,698314001,Consultation for treatment,http://snomed.info/sct encounter,type,702927004,Urgent care clinic (environment),http://snomed.info/sct -medication,codecodeableconcept ,,, +medication,codecodeableconcept,,, medication,medicationcode,,, observation,category,laboratory,Laboratory,http://terminology.hl7.org/CodeSystem/observation-category observation,code,34533-0,Odor of Urine,http://loinc.org diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 244fc8ed..c87a64cb 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -3,6 +3,7 @@ from unittest import mock from cumulus_library import cli, databases +from cumulus_library.studies.discovery.discovery_templates import discovery_templates from tests import conftest @@ -62,3 +63,80 @@ def test_discovery(tmp_path): if ref_row[pos] == "None": ref_row[pos] = None assert tuple(ref_row) in table_rows + + +def test_get_code_system_pairs(): + expected = """CREATE TABLE output_table AS +SELECT DISTINCT + 'arrays' AS table_name, + 'acol' AS column_name, + table_2.col_2.code, + table_2.col_2.display, + table_2.col_2.system +FROM arrays, +UNNEST(acol) AS table_1 (col_1), +UNNEST(col_1.coding) as table_2 (col_2) + +UNION + +SELECT DISTINCT + 'dictarray' AS table_name, + 'col' AS column_name, + table_1.col_1.code, + table_1.col_1.display, + table_1.col_1.system +FROM dictarray, +UNNEST(col.coding) AS table_1 (col_1) + +UNION + +SELECT DISTINCT + 'bare' AS table_name, + 'bcol' AS column_name, + bcol.coding.code, + bcol.coding.display, + bcol.coding.system +FROM bare + +UNION + +SELECT * +FROM ( + VALUES ( + 'empty', + 'empty', + '', + '', + '' + ) +) + AS t (table_name, column_name, code, display, system) + + +""" + query = discovery_templates.get_code_system_pairs( + "output_table", + [ + { + "table_name": "arrays", + "column_hierarchy": [("acol", list), ("coding", list)], + "has_data": True, + }, + { + "table_name": "dictarray", + "column_hierarchy": [("col", dict), ("coding", list)], + "has_data": True, + }, + { + "table_name": "bare", + "column_hierarchy": [("bcol", dict), ("coding", dict)], + "has_data": True, + }, + { + "table_name": "empty", + "column_hierarchy": [("empty", dict), ("coding", dict)], + "has_data": False, + }, + ], + ) + assert query == expected diff --git a/tests/test_templates.py b/tests/test_templates.py index e1f2e0fb..e04f4c2e 100644 --- a/tests/test_templates.py +++ b/tests/test_templates.py @@ -467,79 +467,3 @@ def test_is_table_not_empty(): conditions=["field_name LIKE 's%'", "field_name IS NOT NULL"], ) assert query == expected - - -def test_get_code_system_pairs(): - expected = """CREATE TABLE output_table AS -SELECT DISTINCT - 'hasarray' AS table_name, - 'acol' AS column_name, - t2.row2.code, - t2.row2.display, - t2.row2.system -FROM hasarray, -UNNEST(acol) AS t1 (row1), -UNNEST(t1.row1.coding) AS t2 (row2) -UNION -SELECT DISTINCT - 'noarray' AS table_name, - 'col' AS column_name, - t.row.code, - t.row.display, - t.row.system -FROM noarray, -UNNEST(col.coding) AS t (row) -UNION -SELECT DISTINCT - 'bare' AS table_name, - 'bcol' AS column_name, - bcol.code, - bcol.display, - bcol.system -FROM bare -UNION -SELECT * -FROM ( - VALUES ( - 'empty', - 'empty', - '', - '', - '' - ) -) - AS t (table_name, column_name, code, display, system)""" - query = base_templates.get_code_system_pairs( - "output_table", - [ - { - "table_name": "hasarray", - "column_name": "acol", - "is_bare_coding": False, - "is_array": True, - "has_data": True, - }, - { - "table_name": "noarray", - "column_name": "col", - "is_bare_coding": False, - "is_array": False, - "has_data": True, - }, - { - "table_name": "bare", - "column_name": "bcol", - "is_bare_coding": True, - "is_array": False, - "has_data": True, - }, - { - "table_name": "empty", - "column_name": "empty", - "is_bare_coding": False, - "is_array": False, - "has_data": False, - }, - ], - ) - assert query == expected