diff --git a/cumulus_library/databases.py b/cumulus_library/databases.py index 789d415d..368de1a1 100644 --- a/cumulus_library/databases.py +++ b/cumulus_library/databases.py @@ -44,7 +44,9 @@ def fetchall(self) -> list[list] | None: class DatabaseParser(abc.ABC): """Parses information_schema results from a database""" - def _parse_found_schema(self, expected: dict[dict[list]], schema: list[list]): + def _parse_found_schema( + self, expected: dict[dict[list]], schema: dict[list] + ) -> dict: """Checks for presence of field for each column in a table :param expected: A nested dict describing the expected data format of @@ -64,36 +66,33 @@ def _parse_found_schema(self, expected: dict[dict[list]], schema: list[list]): is a first pass, ignoring complexities of differing database variable types, just iterating through looking for column names. - - TODO: on a per database instance, consider a more nuanced approach - if needed + TODO: on a per database instance, consider a more nuanced approach if needed + (compared to just checking if the schema contains the field name) """ output = {} - for column, _ in expected.items(): - output[column] = {} - if col_schema := schema[column.lower()]: - # is this an object column? - if len(expected[column]) > 0: - for field in expected[column]: - col_schema = col_schema.split(field, 1) - if len(col_schema) != 2: - output[column][field] = False - col_schema = col_schema[0] - else: - output[column][field] = True - col_schema = col_schema[1] - # otherwise this is a primitive col - else: - output[column] = True + + for column, fields in expected.items(): + column_lower = column.lower() + + # is this an object column? (like: "subject": ["reference"]) + if fields: + col_schema = schema.get(column_lower, "").lower() + output[column] = { + # TODO: make this check more robust + field: field.lower() in col_schema + for field in fields + } + + # otherwise this is a primitive col (like: "recordedDate": None) else: - for field in expected[column]: - output[column][field] = False + output[column] = column_lower in schema + return output @abc.abstractmethod def validate_table_schema( - self, expected: dict[dict[list]], schema: list[tuple] - ) -> dict[bool]: + self, expected: dict[str, list[str]], schema: list[tuple] + ) -> dict: """Public interface for investigating if fields are in a table schema. This method should lightly format results and pass them to @@ -182,7 +181,7 @@ def close(self) -> None: class AthenaParser(DatabaseParser): def validate_table_schema( self, expected: dict[dict[list]], schema: list[list] - ) -> bool: + ) -> dict: schema = dict(schema) return self._parse_found_schema(expected, schema) @@ -291,6 +290,8 @@ def _compat_from_iso8601_timestamp( else: return datetime.datetime(int(pieces[0]), int(pieces[1]), 1) + # TODO: return timezone-aware datetimes, like Athena does + # (this currently generates naive datetimes, in UTC local time) return datetime.datetime.fromisoformat(value) def cursor(self) -> duckdb.DuckDBPyConnection: diff --git a/cumulus_library/studies/core/core_templates/core_utils.jinja b/cumulus_library/studies/core/core_templates/core_utils.jinja index 5ba06746..5b711286 100644 --- a/cumulus_library/studies/core/core_templates/core_utils.jinja +++ b/cumulus_library/studies/core/core_templates/core_utils.jinja @@ -90,7 +90,7 @@ targets is assumed to be a list of tuples of one of the following format: {%- for col in targets %} {%- if col is not string and col|length ==4%} {%- if schema[table][col[0]][col[1]] %} - date_trunc('{{ col[3] }}', date(from_iso8601_timestamp({{ alias }}.{{ col[0] }}.{{ col[1] }}))) + date_trunc('{{ col[3] }}', date(from_iso8601_timestamp({{ alias }}."{{ col[0] }}"."{{ col[1] }}"))) AS {{ col[2] }} {%- else %} cast(NULL AS date) AS {{col[2]}} @@ -98,14 +98,14 @@ targets is assumed to be a list of tuples of one of the following format: {#- depth two nested column -#} {%- elif col is not string and col|length ==5%} {%- if schema[table][col[0]][col[2]] %} - date_trunc('{{ col[4] }}', date(from_iso8601_timestamp({{ alias }}.{{ col[0] }}.{{ col[1] }}.{{col[2]}}))) + date_trunc('{{ col[4] }}', date(from_iso8601_timestamp({{ alias }}."{{ col[0] }}"."{{ col[1] }}"."{{col[2]}}"))) AS {{ col[3] }} {%- else %} cast(NULL AS date) AS {{col[3]}} {%- endif %} {#- SQL primitive column column-#} {%- elif schema[table][col[0]] %} - date_trunc('{{ col[1] }}', date(from_iso8601_timestamp({{ alias }}.{{ col[0] }}))) + date_trunc('{{ col[1] }}', date(from_iso8601_timestamp({{ alias }}."{{ col[0] }}"))) AS {{ col[0] }}_{{ col[1] }} {%- else %} cast(NULL AS date) AS {{ col[0] }}_{{ col[1] }} diff --git a/cumulus_library/template_sql/column_datatype.sql.jinja b/cumulus_library/template_sql/column_datatype.sql.jinja index 334def33..b3c568aa 100644 --- a/cumulus_library/template_sql/column_datatype.sql.jinja +++ b/cumulus_library/template_sql/column_datatype.sql.jinja @@ -4,5 +4,5 @@ SELECT FROM information_schema.columns WHERE table_schema = '{{ schema_name }}' - AND table_name = '{{ table_name }}' - AND LOWER(column_name) IN ('{{ column_names|join("', '") }}') --noqa: LT05 + AND table_name = '{{ table_name|lower }}' + AND LOWER(column_name) IN ('{{ column_names|join("', '")|lower }}') --noqa: LT05 diff --git a/tests/test_data/core/core__encounter.txt b/tests/test_data/core/core__encounter.txt index a2c34dec..a4871e67 100644 --- a/tests/test_data/core/core__encounter.txt +++ b/tests/test_data/core/core__encounter.txt @@ -21,7 +21,7 @@ ('75312bd2-d5ac-c62e-c9df-0004783725c7', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, None, None, 11, datetime.date(2018, 7, 2), datetime.date(2018, 7, 2), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/3ae095ec-8fe0-133b-36d4-8785a6ad8df3', 'Encounter/75312bd2-d5ac-c62e-c9df-0004783725c7', 'female', 'white', 'not hispanic or latino', '662') ('75b68644-535d-bdc1-4c31-aa9fe7e1822f', {'id': None, 'code': 'EMER', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'EMER', 'emergency', 'finished', None, None, None, None, None, None, '55680006', 'http://snomed.info/sct', 25, datetime.date(2018, 6, 30), datetime.date(2018, 6, 30), datetime.date(2018, 6, 25), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/82b8a670-4700-30e8-24a0-b83efa3c5e0a', 'Encounter/75b68644-535d-bdc1-4c31-aa9fe7e1822f', 'female', 'white', 'not hispanic or latino', '672') ('79d8f213-7847-646b-8a66-5da208cc1c27', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, '431857002', 'http://snomed.info/sct', 77, datetime.date(2018, 7, 14), datetime.date(2018, 7, 14), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/26a3984f-b2a8-e67f-7abc-ff147a0e6e35', 'Encounter/79d8f213-7847-646b-8a66-5da208cc1c27', 'male', 'white', 'not hispanic or latino', '674') -('83d0d564-3bbf-48eb-7445-bd2b81130671', {'id': None, 'code': 'EMER', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'EMER', 'emergency', 'finished', None, None, None, None, None, None, '55680006', 'http://snomed.info/sct', 69, datetime.date(2018, 6, 16), datetime.date(2018, 6, 16), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/16be855b-ece2-8b96-1ef9-a4d93adf3289', 'Encounter/83d0d564-3bbf-48eb-7445-bd2b81130671', 'male', 'white', 'not hispanic or latino', '660') +('83d0d564-3bbf-48eb-7445-bd2b81130671', {'id': None, 'code': 'EMER', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'EMER', 'emergency', 'finished', None, None, None, None, None, None, '55680006', 'http://snomed.info/sct', 69, datetime.date(2018, 6, 16), datetime.date(2018, 6, 17), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/16be855b-ece2-8b96-1ef9-a4d93adf3289', 'Encounter/83d0d564-3bbf-48eb-7445-bd2b81130671', 'male', 'white', 'not hispanic or latino', '660') ('8ff1dc01-5a28-b2d8-3b42-4b7a7d539970', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, None, None, 55, datetime.date(2018, 7, 15), datetime.date(2018, 7, 15), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/c1bfec36-dc2c-afc8-c767-3d35ed2bf6f0', 'Encounter/8ff1dc01-5a28-b2d8-3b42-4b7a7d539970', 'female', 'white', 'not hispanic or latino', '662') ('91f94a9d-69a7-e30a-cd1a-68c52dc01e70', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, None, None, 22, datetime.date(2018, 6, 29), datetime.date(2018, 6, 29), datetime.date(2018, 6, 25), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/a28be3e1-a6bd-7df4-fc81-1140848f8453', 'Encounter/91f94a9d-69a7-e30a-cd1a-68c52dc01e70', 'female', 'white', 'not hispanic or latino', '662') ('98d4bd14-d78e-debb-e7dc-2df7786aedf3', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, None, None, 89, datetime.date(2018, 7, 9), datetime.date(2018, 7, 9), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/e455ca3f-fc16-6ffc-297a-adc27e2db183', 'Encounter/98d4bd14-d78e-debb-e7dc-2df7786aedf3', 'male', 'white', 'hispanic or latino', '667') @@ -45,6 +45,6 @@ ('e613f29d-7505-6f2e-a1f5-bfbec300752d', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, '88805009', 'http://snomed.info/sct', 84, datetime.date(2018, 6, 29), datetime.date(2018, 6, 29), datetime.date(2018, 6, 25), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/1c498b42-61fd-6341-69c3-053f6e4fe404', 'Encounter/e613f29d-7505-6f2e-a1f5-bfbec300752d', 'female', 'white', 'not hispanic or latino', '667') ('e922a884-7039-a171-a65e-78051fe7afe6', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, None, None, 1, datetime.date(2018, 6, 13), datetime.date(2018, 6, 13), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/8022fbbe-aaa4-056c-d0f5-ec074bf0656c', 'Encounter/e922a884-7039-a171-a65e-78051fe7afe6', 'male', 'white', 'not hispanic or latino', '665') ('ed151e04-3dd6-8cb7-a3e5-777c8a8667f1', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, '195662009', 'http://snomed.info/sct', 57, datetime.date(2018, 6, 6), datetime.date(2018, 6, 6), datetime.date(2018, 6, 4), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/19158de4-66a2-f70f-e3bf-4396b312c8f1', 'Encounter/ed151e04-3dd6-8cb7-a3e5-777c8a8667f1', 'female', 'white', 'not hispanic or latino', '000') -('f2752dd7-1bf1-739d-dd8c-40122d0b63bc', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, '161665007', 'http://snomed.info/sct', 77, datetime.date(2018, 6, 1), datetime.date(2018, 6, 1), datetime.date(2018, 5, 28), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/9c8d8539-0b1e-73e2-b64f-83f1ea601fa4', 'Encounter/f2752dd7-1bf1-739d-dd8c-40122d0b63bc', 'male', 'white', 'not hispanic or latino', '674') +('f2752dd7-1bf1-739d-dd8c-40122d0b63bc', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, '161665007', 'http://snomed.info/sct', 77, datetime.date(2018, 6, 1), datetime.date(2018, 6, 2), datetime.date(2018, 5, 28), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/9c8d8539-0b1e-73e2-b64f-83f1ea601fa4', 'Encounter/f2752dd7-1bf1-739d-dd8c-40122d0b63bc', 'male', 'white', 'not hispanic or latino', '674') ('f964be66-3fcd-95c8-0021-71c7d24c91b7', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, '431857002', 'http://snomed.info/sct', 63, datetime.date(2018, 6, 5), datetime.date(2018, 6, 5), datetime.date(2018, 6, 4), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/a5bc08ea-9462-c4f5-1bd2-ff342598ac99', 'Encounter/f964be66-3fcd-95c8-0021-71c7d24c91b7', 'female', 'white', 'not hispanic or latino', '661') ('fd0754a4-e96d-cba7-b3c0-77697a09c86e', {'id': None, 'code': 'AMB', 'display': None, 'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode', 'userSelected': None, 'version': None}, 'AMB', 'ambulatory', 'finished', None, None, None, None, None, None, None, None, 100, datetime.date(2018, 7, 10), datetime.date(2018, 7, 10), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/6385ddd7-2639-6505-3789-0521b8f66c8b', 'Encounter/fd0754a4-e96d-cba7-b3c0-77697a09c86e', 'female', 'white', 'not hispanic or latino', '672') diff --git a/tests/test_data/core/core__encounter_type.txt b/tests/test_data/core/core__encounter_type.txt index 308fda57..732a4af7 100644 --- a/tests/test_data/core/core__encounter_type.txt +++ b/tests/test_data/core/core__encounter_type.txt @@ -21,7 +21,7 @@ ('75312bd2-d5ac-c62e-c9df-0004783725c7', 'AMB', 'ambulatory', 'http://snomed.info/sct', '410620009', 'Well child visit (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'finished', 11, datetime.date(2018, 7, 2), datetime.date(2018, 7, 2), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/3ae095ec-8fe0-133b-36d4-8785a6ad8df3', 'Encounter/75312bd2-d5ac-c62e-c9df-0004783725c7', 'female', 'white', 'not hispanic or latino', '662') ('75b68644-535d-bdc1-4c31-aa9fe7e1822f', 'EMER', 'emergency', 'http://snomed.info/sct', '50849002', 'Emergency room admission (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', '55680006', 'Drug overdose', 'finished', 25, datetime.date(2018, 6, 30), datetime.date(2018, 6, 30), datetime.date(2018, 6, 25), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/82b8a670-4700-30e8-24a0-b83efa3c5e0a', 'Encounter/75b68644-535d-bdc1-4c31-aa9fe7e1822f', 'female', 'white', 'not hispanic or latino', '672') ('79d8f213-7847-646b-8a66-5da208cc1c27', 'AMB', 'ambulatory', 'http://snomed.info/sct', '185347001', 'Encounter for problem (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', '431857002', 'Chronic kidney disease stage 4 (disorder)', 'finished', 77, datetime.date(2018, 7, 14), datetime.date(2018, 7, 14), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/26a3984f-b2a8-e67f-7abc-ff147a0e6e35', 'Encounter/79d8f213-7847-646b-8a66-5da208cc1c27', 'male', 'white', 'not hispanic or latino', '674') -('83d0d564-3bbf-48eb-7445-bd2b81130671', 'EMER', 'emergency', 'http://snomed.info/sct', '50849002', 'Emergency room admission (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', '55680006', 'Drug overdose', 'finished', 69, datetime.date(2018, 6, 16), datetime.date(2018, 6, 16), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/16be855b-ece2-8b96-1ef9-a4d93adf3289', 'Encounter/83d0d564-3bbf-48eb-7445-bd2b81130671', 'male', 'white', 'not hispanic or latino', '660') +('83d0d564-3bbf-48eb-7445-bd2b81130671', 'EMER', 'emergency', 'http://snomed.info/sct', '50849002', 'Emergency room admission (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', '55680006', 'Drug overdose', 'finished', 69, datetime.date(2018, 6, 16), datetime.date(2018, 6, 17), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/16be855b-ece2-8b96-1ef9-a4d93adf3289', 'Encounter/83d0d564-3bbf-48eb-7445-bd2b81130671', 'male', 'white', 'not hispanic or latino', '660') ('8ff1dc01-5a28-b2d8-3b42-4b7a7d539970', 'AMB', 'ambulatory', 'http://snomed.info/sct', '185349003', 'Encounter for check up (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'finished', 55, datetime.date(2018, 7, 15), datetime.date(2018, 7, 15), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/c1bfec36-dc2c-afc8-c767-3d35ed2bf6f0', 'Encounter/8ff1dc01-5a28-b2d8-3b42-4b7a7d539970', 'female', 'white', 'not hispanic or latino', '662') ('91f94a9d-69a7-e30a-cd1a-68c52dc01e70', 'AMB', 'ambulatory', 'http://snomed.info/sct', '308335008', 'Patient encounter procedure', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'finished', 22, datetime.date(2018, 6, 29), datetime.date(2018, 6, 29), datetime.date(2018, 6, 25), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/a28be3e1-a6bd-7df4-fc81-1140848f8453', 'Encounter/91f94a9d-69a7-e30a-cd1a-68c52dc01e70', 'female', 'white', 'not hispanic or latino', '662') ('98d4bd14-d78e-debb-e7dc-2df7786aedf3', 'AMB', 'ambulatory', 'http://snomed.info/sct', '185349003', 'Encounter for check up (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'finished', 89, datetime.date(2018, 7, 9), datetime.date(2018, 7, 9), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/e455ca3f-fc16-6ffc-297a-adc27e2db183', 'Encounter/98d4bd14-d78e-debb-e7dc-2df7786aedf3', 'male', 'white', 'hispanic or latino', '667') @@ -45,6 +45,6 @@ ('e613f29d-7505-6f2e-a1f5-bfbec300752d', 'AMB', 'ambulatory', 'http://snomed.info/sct', '448337001', 'Telemedicine consultation with patient', 'None', 'None', 'None', 'None', 'None', 'None', '88805009', 'Chronic congestive heart failure (disorder)', 'finished', 84, datetime.date(2018, 6, 29), datetime.date(2018, 6, 29), datetime.date(2018, 6, 25), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/1c498b42-61fd-6341-69c3-053f6e4fe404', 'Encounter/e613f29d-7505-6f2e-a1f5-bfbec300752d', 'female', 'white', 'not hispanic or latino', '667') ('e922a884-7039-a171-a65e-78051fe7afe6', 'AMB', 'ambulatory', 'http://snomed.info/sct', '410620009', 'Well child visit (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'finished', 1, datetime.date(2018, 6, 13), datetime.date(2018, 6, 13), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/8022fbbe-aaa4-056c-d0f5-ec074bf0656c', 'Encounter/e922a884-7039-a171-a65e-78051fe7afe6', 'male', 'white', 'not hispanic or latino', '665') ('ed151e04-3dd6-8cb7-a3e5-777c8a8667f1', 'AMB', 'ambulatory', 'http://snomed.info/sct', '185345009', 'Encounter for symptom', 'None', 'None', 'None', 'None', 'None', 'None', '195662009', 'Acute viral pharyngitis (disorder)', 'finished', 57, datetime.date(2018, 6, 6), datetime.date(2018, 6, 6), datetime.date(2018, 6, 4), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/19158de4-66a2-f70f-e3bf-4396b312c8f1', 'Encounter/ed151e04-3dd6-8cb7-a3e5-777c8a8667f1', 'female', 'white', 'not hispanic or latino', '000') -('f2752dd7-1bf1-739d-dd8c-40122d0b63bc', 'AMB', 'ambulatory', 'http://snomed.info/sct', '439740005', 'Postoperative follow-up visit (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', '161665007', 'History of renal transplant (situation)', 'finished', 77, datetime.date(2018, 6, 1), datetime.date(2018, 6, 1), datetime.date(2018, 5, 28), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/9c8d8539-0b1e-73e2-b64f-83f1ea601fa4', 'Encounter/f2752dd7-1bf1-739d-dd8c-40122d0b63bc', 'male', 'white', 'not hispanic or latino', '674') +('f2752dd7-1bf1-739d-dd8c-40122d0b63bc', 'AMB', 'ambulatory', 'http://snomed.info/sct', '439740005', 'Postoperative follow-up visit (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', '161665007', 'History of renal transplant (situation)', 'finished', 77, datetime.date(2018, 6, 1), datetime.date(2018, 6, 2), datetime.date(2018, 5, 28), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/9c8d8539-0b1e-73e2-b64f-83f1ea601fa4', 'Encounter/f2752dd7-1bf1-739d-dd8c-40122d0b63bc', 'male', 'white', 'not hispanic or latino', '674') ('f964be66-3fcd-95c8-0021-71c7d24c91b7', 'AMB', 'ambulatory', 'http://snomed.info/sct', '185347001', 'Encounter for problem (procedure)', 'None', 'None', 'None', 'None', 'None', 'None', '431857002', 'Chronic kidney disease stage 4 (disorder)', 'finished', 63, datetime.date(2018, 6, 5), datetime.date(2018, 6, 5), datetime.date(2018, 6, 4), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/a5bc08ea-9462-c4f5-1bd2-ff342598ac99', 'Encounter/f964be66-3fcd-95c8-0021-71c7d24c91b7', 'female', 'white', 'not hispanic or latino', '661') ('fd0754a4-e96d-cba7-b3c0-77697a09c86e', 'AMB', 'ambulatory', 'http://snomed.info/sct', '702927004', 'Urgent care clinic (environment)', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'None', 'finished', 100, datetime.date(2018, 7, 10), datetime.date(2018, 7, 10), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/6385ddd7-2639-6505-3789-0521b8f66c8b', 'Encounter/fd0754a4-e96d-cba7-b3c0-77697a09c86e', 'female', 'white', 'not hispanic or latino', '672') diff --git a/tests/test_duckdb.py b/tests/test_duckdb.py index 517463ac..2db351c7 100644 --- a/tests/test_duckdb.py +++ b/tests/test_duckdb.py @@ -1,15 +1,17 @@ """ tests for duckdb backend support """ import glob +import json import os import tempfile -from datetime import datetime, timedelta, timezone +from datetime import datetime from pathlib import Path from unittest import mock import pytest from cumulus_library import cli, databases +from cumulus_library.template_sql import base_templates @mock.patch.dict( @@ -55,12 +57,12 @@ def test_duckdb_core_build_and_export(): @pytest.mark.parametrize( "timestamp,expected", [ - ("2021", datetime(2021, 1, 1, tzinfo=timezone.utc)), - ("2019-10", datetime(2019, 10, 1, tzinfo=timezone.utc)), - ("1923-01-23", datetime(1923, 1, 23, tzinfo=timezone.utc)), + ("2021", datetime(2021, 1, 1)), + ("2019-10", datetime(2019, 10, 1)), + ("1923-01-23", datetime(1923, 1, 23)), ( "2023-01-16T07:55:25-05:00", - datetime(2023, 1, 16, 7, 55, 25, tzinfo=timezone(timedelta(hours=-5))), + datetime(2023, 1, 16, 12, 55, 25), ), ], ) @@ -71,4 +73,57 @@ def test_duckdb_from_iso8601_timestamp(timestamp, expected): .execute(f"select from_iso8601_timestamp('{timestamp}')") .fetchone()[0] ) - assert parsed, expected + assert parsed == expected + + +def test_duckdb_table_schema(): + """Verify we can detect schemas correctly, even for nested camel case fields""" + db = databases.DuckDatabaseBackend(":memory:") + + with tempfile.TemporaryDirectory() as tmpdir: + os.mkdir(f"{tmpdir}/observation") + with open(f"{tmpdir}/observation/test.ndjson", "w", encoding="utf8") as ndjson: + json.dump( + { + "id": "test", + "component": [{"dataAbsentReason": {"text": "Dunno"}}], + "valueBoolean": False, + }, + ndjson, + ) + + db.insert_tables(databases.read_ndjson_dir(tmpdir)) + + # Look for a mix of camel-cased and lower-cased fields. Both should work. + target_schema = { + "bodySite": [], + "CoMpOnEnT": ["dataabsentreason", "valueQuantity"], + "not_a_real_field": [], + "valueboolean": [], + } + + # Query database for what exists right now as a schema + query = base_templates.get_column_datatype_query( + # Use a mixed-case table name + db.schema_name, + "Observation", + list(target_schema.keys()), + ) + actual_schema = db.cursor().execute(query).fetchall() + + # Validate that schema against what we were looking for + validated_schema = db.parser().validate_table_schema( + target_schema, actual_schema + ) + # Note the all mixed-case results. + # These are guaranteed to be the same case as the expected/target schema. + expected_schema = { + "bodySite": True, # real toplevel fields are guaranteed to be in schema + "CoMpOnEnT": { + "dataabsentreason": True, + "valueQuantity": False, + }, + "not_a_real_field": False, + "valueboolean": True, + } + assert validated_schema == expected_schema diff --git a/tests/test_templates.py b/tests/test_templates.py index ab89b3ec..8c5d85bb 100644 --- a/tests/test_templates.py +++ b/tests/test_templates.py @@ -168,8 +168,8 @@ def test_get_column_datatype_query(): query = base_templates.get_column_datatype_query( schema_name="schema_name", - table_name="table_name", - column_names=["foo", "bar"], + table_name="TABLE_NAME", + column_names=["foo", "BAR"], ) assert query == expected