diff --git a/.gitignore b/.gitignore index f91bced3..a7e4d2bc 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ data_export/ .DS_Store cumulus_library_columns.json output.sql +*generated.md # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/cumulus_library/.sqlfluff b/cumulus_library/.sqlfluff index 460d1cc2..676cdc68 100644 --- a/cumulus_library/.sqlfluff +++ b/cumulus_library/.sqlfluff @@ -55,6 +55,7 @@ source_id = source_id table_cols = ["a","b"] table_cols_types = ["varchar", "varchar"] table_name = test_table +table_names = ["test_table"] table_suffix = 2024_01_01_11_11_11 target_col_prefix = prefix target_table = target_table diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py index 35fcd330..3d8d908d 100755 --- a/cumulus_library/cli.py +++ b/cumulus_library/cli.py @@ -226,6 +226,22 @@ def generate_study_sql( parser=self.db.parser(), ) + def generate_study_markdown( + self, + target: pathlib.Path, + ) -> None: + """Materializes study sql from templates + + :param target: A path to the study directory + """ + studyparser = study_parser.StudyManifestParser(target) + studyparser.run_generate_markdown( + self.cursor, + self.schema_name, + verbose=self.verbose, + parser=self.db.parser(), + ) + def get_abs_posix_path(path: str) -> pathlib.Path: """Convenience method for handling abs vs rel paths""" @@ -373,6 +389,10 @@ def run_cli(args: dict): elif args["action"] == "generate-sql": for target in args["target"]: runner.generate_study_sql(study_dict[target]) + + elif args["action"] == "generate-md": + for target in args["target"]: + runner.generate_study_markdown(study_dict[target]) finally: db_backend.close() diff --git a/cumulus_library/cli_parser.py b/cumulus_library/cli_parser.py index d367294c..8f310f98 100644 --- a/cumulus_library/cli_parser.py +++ b/cumulus_library/cli_parser.py @@ -229,4 +229,15 @@ def get_parser() -> argparse.ArgumentParser: add_target_argument(generate) add_study_dir_argument(generate) add_db_config(generate) + + # Generate markdown tables for documentation + markdown = actions.add_parser( + "generate-md", help="Generates markdown tables for study documentation" + ) + add_target_argument(markdown) + add_study_dir_argument(markdown) + add_data_path_argument(markdown) + add_db_config(markdown) + add_verbose_argument(markdown) + return parser diff --git a/cumulus_library/studies/core/core_templates/observation.sql.jinja b/cumulus_library/studies/core/core_templates/observation.sql.jinja index 4ca97e8d..cbf694c4 100644 --- a/cumulus_library/studies/core/core_templates/observation.sql.jinja +++ b/cumulus_library/studies/core/core_templates/observation.sql.jinja @@ -36,7 +36,6 @@ WITH temp_observation AS ( odvcc.code AS valuecodeableconcept_code, odvcc.code_system AS valuecodeableconcept_code_system, odvcc.display AS valuecodeableconcept_display, - o.component, o.referencerange, o.valuequantity, o.subject.reference AS subject_ref, @@ -52,15 +51,12 @@ SELECT id, category_code, category_code_system, - component, status, observation_code, observation_code_system, interpretation_code, interpretation_code_system, interpretation_display, - referencerange, - valuequantity, valuecodeableconcept_code, valuecodeableconcept_code_system, valuecodeableconcept_display, diff --git a/cumulus_library/studies/core/observation_type.sql b/cumulus_library/studies/core/observation_type.sql index 12ccb132..8e1a1e30 100644 --- a/cumulus_library/studies/core/observation_type.sql +++ b/cumulus_library/studies/core/observation_type.sql @@ -29,13 +29,10 @@ SELECT co.valuecodeableconcept_code, co.valuecodeableconcept_code_system, co.valuecodeableconcept_display, - co.component, co.status, co.interpretation_code, co.interpretation_code_system, co.interpretation_display, - co.referencerange, - co.valuequantity, co.obs_date, co.obs_week, co.obs_month, diff --git a/cumulus_library/study_parser.py b/cumulus_library/study_parser.py index c7daa87c..f45b1f00 100644 --- a/cumulus_library/study_parser.py +++ b/cumulus_library/study_parser.py @@ -5,7 +5,10 @@ import inspect import pathlib import sys +import typing +import pandas +import pytablewriter import toml from rich.progress import Progress, TaskID, track @@ -552,6 +555,56 @@ def run_generate_sql( verbose=verbose, ) + def run_generate_markdown( + self, + cursor: databases.DatabaseCursor, + schema: str, + parser: databases.DatabaseParser = None, + verbose: bool = False, + ) -> None: + """Generates reference SQL from builders listed in the manifest + + :param cursor: A DatabaseCursor object + :param schema: The name of the schema to write tables to + :param verbose: toggle from progress bar to query output + """ + + query = base_templates.get_show_tables( + schema_name=schema, prefix=f"{self.get_study_prefix()}__" + ) + + tables = [x[0] for x in cursor.execute(query).fetchall()] + query = base_templates.get_column_datatype_query( + schema_name=schema, table_names=tables, include_table_names=True + ) + study_df = pandas.DataFrame( + cursor.execute(query).fetchall(), columns=["Column", "Type", "Table"] + ) + with open( + self._study_path / f"{self.get_study_prefix()}_generated.md", "w" + ) as f: + table_list = sorted(study_df["Table"].unique()) + count_tables = [t for t in table_list if "__count_" in t] + base_tables = [t for t in table_list if "__count_" not in t] + if len(count_tables) > 0: + f.write(f"## {self.get_study_prefix()} count tables\n\n") + for table in count_tables: + self._write_md_table(table, study_df, f) + if len(base_tables) > 0: + f.write(f"## {self.get_study_prefix()} base tables\n\n") + for table in base_tables: + self._write_md_table(table, study_df, f) + + def _write_md_table(self, name: str, df: pandas.DataFrame, file: typing.IO): + table_df = df[df["Table"] == name].drop("Table", axis=1) + table_df = table_df.assign(Description="") + writer = pytablewriter.MarkdownTableWriter(dataframe=table_df) + writer.table_name = f"{name}\n" + writer.set_indent_level(2) + writer.stream = file + writer.write_table() + file.write("\n\n") + def build_study( self, cursor: databases.DatabaseCursor, diff --git a/cumulus_library/template_sql/base_templates.py b/cumulus_library/template_sql/base_templates.py index 7314e890..331ce42c 100644 --- a/cumulus_library/template_sql/base_templates.py +++ b/cumulus_library/template_sql/base_templates.py @@ -90,13 +90,21 @@ def get_codeable_concept_denormalize_query( ) -def get_column_datatype_query(schema_name: str, table_name: str, column_names: list): +def get_column_datatype_query( + schema_name: str, + table_names: str | list, + column_names: list | None = None, + include_table_names: bool | None = False, +): """Gets the in-database data representation of a given column""" + if isinstance(table_names, str): + table_names = [table_names] return get_base_template( "column_datatype", schema_name=schema_name, - table_name=table_name, + table_names=table_names, column_names=column_names, + include_table_names=include_table_names, ) diff --git a/cumulus_library/template_sql/column_datatype.sql.jinja b/cumulus_library/template_sql/column_datatype.sql.jinja index b3c568aa..00f5b7a1 100644 --- a/cumulus_library/template_sql/column_datatype.sql.jinja +++ b/cumulus_library/template_sql/column_datatype.sql.jinja @@ -1,8 +1,13 @@ SELECT column_name, data_type + {%- if include_table_names -%}, + table_name --noqa: LT02 + {%- endif %} FROM information_schema.columns WHERE table_schema = '{{ schema_name }}' - AND table_name = '{{ table_name|lower }}' - AND LOWER(column_name) IN ('{{ column_names|join("', '")|lower }}') --noqa: LT05 + AND table_name IN ('{{ table_names|join("', '")|lower }}') +{%- if column_names %} + AND LOWER(column_name) IN ('{{ column_names|join("', '")|lower }}') --noqa: LT02,LT05 +{%- endif %} diff --git a/docs/core-study-details.md b/docs/core-study-details.md index b577bc93..9cbd3958 100644 --- a/docs/core-study-details.md +++ b/docs/core-study-details.md @@ -8,98 +8,596 @@ nav_order: 5 # Core study details -The core study calculates the **patient count** for every patient group using the [SQL CUBE function](https://prestodb.io/docs/current/sql/select.html#group-by-clause). -*THRESHOLDS* are applied to ensure *no patient group has < 10 patients*. - -Patient count can be the -- number of **unique patients** -- number of **unique patient encounters** -- number of **unique patient encounters with documented medical note** -- Other types of counts are also possible, these are the most common. +The core study aims to provide the following features: -Example +- **FHIR US Core V4 Profile data** - For resources that are supported by Cumulus, +we attempt to extract all required/extensible fields from a FHIR dataset, if present +- **Flattened FHIR resources** - The core study provided flattened tables from nested +FHIR resources, making queries easier to construct without having to worry about the +nuances of conditional unnesting against potentially missing data +- **Summary Exports** - The core study will provide some basic count data across the +entire cohort that you've processed via ETL, which can be useful as a verification step +for validating the integrity of the data you're extracting from your EHR system - #count (total) = - #count patients age 9 = - #count patients age 9 and rtPCR POS = - #count patients age 9 and rtPCR NEG = - #count rtPCR POS = - #count rtPCR NEG = +If you are authoring a study, and are focused only on clinical analysis (i.e. you +aren't working on data quality/data governance issues), we **strongly** recommend you +use the core study as the starting point for your own work. See +[Creating Studies](./creating-studies.md) +for more information. -[SQL CUBE](https://prestodb.io/docs/current/sql/select.html#group-by-clause) produces a "[Mathematical Power Set](http://en.wikipedia.org/wiki/Power_set)" for every patient subgroup. -These numbers are useful inputs for maths that leverage [Joint Probability Distributions](https://en.wikipedia.org/wiki/Joint_probability_distribution). +# Table format -Examples: +## core count tables -- [Odds Ratio](https://en.wikipedia.org/wiki/Odds_ratio) of patient group A vs B -- [Relative Risk Ratio](https://en.wikipedia.org/wiki/Relative_risk) of patient group A vs B -- [Chi-Squared Test](https://en.wikipedia.org/wiki/Chi-squared_test) significance of difference between patient groups -- [Entropy and Mutual Information](https://en.wikipedia.org/wiki/Mutual_information) (core information theory measures) -- [Decision Tree](https://en.wikipedia.org/wiki/Decision_tree) sorts patients into different predicted classes, with visualized tree -- [Naive Bayes Classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier) very fast probabilistic classifier -- others -## Core study exportable counts tables +### core__count_condition_month -### count_core_condition_icd10_month +| Column | Type |Description| +|------------------|-------|-----------| +|cnt |bigint |count | +|cond_category_code|varchar|Encounter Code (Healthcare Setting)| +|cond_month |varchar|Month condition recorded| +|cond_code_display |varchar|Condition code| -| Variable | Description | -|:------------------|:------------------------------------| -| cnt | Count | -| cond_month | Month condition recorded | -| cond_code_display | Condition code | -| enc_class_code | Encounter Code (Healthcare Setting) | +### core__count_documentreference_month -### count_core_documentreference_month +| Column | Type |Description| +|-----------------|-------|-----------| +|cnt |bigint |Count | +|doc_type_display |varchar|Type of Document (display)| +|author_month |varchar|Month document was authored| +|enc_class_display|varchar|Encounter Code (Healthcare Setting)| -| Variable | Description | -|:-----------------|:------------------------------------| -| cnt | Count | -| author_month | Month document was authored | -| enc_class_code | Encounter Code (Healthcare Setting) | -| doc_type_display | Type of Document (display) | +### core__count_encounter_enc_type_month -### count_core_encounter_day +| Column | Type |Description| +|-----------------|-------|-----------| +|cnt |bigint |Count | +|enc_class_display|varchar|Encounter Code (Healthcare Setting)| +|enc_type_display |varchar|Encounter Type| +|start_month |varchar|Month encounter recorded| -| Variable | Description | -|:---------------|:------------------------------------| -| cnt | Count | -| enc_class_code | Encounter Code (Healthcare Setting) | -| start_date | Day patient encounter started | +### core__count_encounter_month -### count_core_encounter_month +| Column | Type |Description| +|-----------------|-------|-----------| +|cnt |bigint |Count | +|start_month |varchar|Month encounter recorded| +|enc_class_display|varchar|Encounter Code (Healthcare Setting| +|age_at_visit |varchar|Patient Age at Encounter| +|gender |varchar|Biological sex at birth| +|race_display |varchar|Patient reported race| +|ethnicity_display|varchar|Patient reported ethnicity| -| Variable | Description | -|:---------------|:------------------------------------| -| cnt | Count | -| enc_class_code | Encounter Code (Healthcare Setting) | -| start_month | Month patient encounter started | -| age_at_visit | Patient Age at Encounter | -| gender | Biological sex at birth | -| race_display | Patient reported race | -| postalcode3 | Patient 3 digit zip | +### core__count_encounter_priority_month -### count_core_observation_lab_month +| Column | Type |Description| +|--------------------|-------|-----------| +|cnt |bigint |Count | +|enc_class_display |varchar|Encounter Code (Healthcare Setting)| +|enc_priority_display|varchar|Encounter Priority| +|start_month |varchar|Month encounter recorded| -| Variable | Description | -|:-------------------|:------------------------------------| -| cnt | Count | -| lab_month | Month of lab result | -| lab_code | Laboratory Code | -| lab_result_display | Laboratory result | -| enc_class_code | Encounter Code (Healthcare Setting) | +### core__count_encounter_service_month -### count_core_patient +| Column | Type |Description| +|-------------------|-------|-----------| +|cnt |bigint |Count | +|enc_class_display |varchar|Encounter Code (Healthcare Setting)| +|enc_service_display|varchar|Encounter Service| +|start_month |varchar|Month encounter recorded| -| Variable | Description | -|:-------------|:----------------------------------| -| cnt | Count | -| gender | Biological sex at birth | -| age | Age in years calculated since DOB | -| race_display | Patient reported race | -| postalcode3 | Patient 3 digit zip | + +### core__count_encounter_type + +| Column | Type |Description| +|--------------------|-------|-----------| +|cnt |bigint |Count | +|enc_class_display |varchar|Encounter Code (Healthcare Setting)| +|enc_type_display |varchar|Encounter Type| +|enc_service_display |varchar|Encounter Service| +|enc_priority_display|varchar|Encounter Priority| + + +### core__count_encounter_type_month + +| Column | Type |Description| +|--------------------|-------|-----------| +|cnt |bigint |Count | +|enc_class_display |varchar|Encounter Code (Healthcare Setting)| +|enc_type_display |varchar|Encounter Type| +|enc_service_display |varchar|Encounter Service| +|enc_priority_display|varchar|Encounter Priority| +|start_month |varchar|Month encounter recorded| + + +### core__count_medicationrequest_month + +| Column | Type |Description| +|----------------|-------|-----------| +|cnt |bigint |Count | +|status |varchar|Perscribing event state| +|intent |varchar|Medication order kind| +|authoredon_month|varchar|Month medication request issued| +|rx_display |varchar|Medication Name| + + +### core__count_observation_lab_month + +| Column | Type |Description| +|------------------|-------|-----------| +|cnt |bigint |Count | +|lab_month |varchar|Month of lab result| +|lab_code |varchar|Lab result coding| +|lab_result_display|varchar|Lab result display text| +|enc_class_display |varchar|Encounter Code (Healthcare Setting)| + + +### core__count_patient + +| Column | Type |Description| +|-----------------|-------|-----------| +|cnt |bigint |Count | +|gender |varchar|Biological sex at birth| +|race_display |varchar|Patient reported race| +|ethnicity_display|varchar|Patient reported ethnicity| + +## core base tables + + +### core__condition + +| Column | Type |Description| +|----------------|-------|-----------| +|id |varchar| | +|category_code |varchar| | +|category_display|varchar| | +|code |varchar| | +|code_system |varchar| | +|code_display |varchar| | +|subject_ref |varchar| | +|encounter_ref |varchar| | +|condition_ref |varchar| | +|recordeddate |date | | +|recorded_week |date | | +|recorded_month |date | | +|recorded_year |date | | + + +### core__condition_codable_concepts_all + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__condition_codable_concepts_display + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__documentreference + +| Column | Type |Description| +|--------------------|-------|-----------| +|id |varchar| | +|doc_type_code |varchar| | +|doc_type_code_system|varchar| | +|doc_type_display |varchar| | +|status |varchar| | +|docstatus |varchar| | +|encounter_ref |varchar| | +|author_date |date | | +|author_week |date | | +|author_month |date | | +|author_year |date | | +|subject_ref |varchar| | +|doc_ref |varchar| | + + +### core__documentreference_dn_type + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__ed_note + +| Column | Type |Description| +|-----------|-----------|-----------| +|from_system|varchar(3) | | +|from_code |varchar(14)| | +|analyte |varchar(28)| | +|code_system|varchar(16)| | +|code |varchar(7) | | +|display |varchar(33)| | + + +### core__encounter + +| Column | Type |Description| +|----------------------|-----------|-----------| +|id |varchar | | +|enc_class_code |varchar(6) | | +|enc_class_display |varchar(21)| | +|status |varchar | | +|type_code |varchar | | +|type_code_system |varchar | | +|sevicetype_code |varchar | | +|sevicetype_code_system|varchar | | +|priority_code |varchar | | +|priority_code_system |varchar | | +|reasoncode_code |varchar | | +|reasoncode_code_system|varchar | | +|age_at_visit |bigint | | +|start_date |date | | +|end_date |date | | +|start_week |date | | +|start_month |date | | +|start_year |date | | +|subject_ref |varchar | | +|encounter_ref |varchar | | +|gender |varchar | | +|race_display |varchar | | +|ethnicity_display |varchar | | +|postalcode3 |varchar | | + + +### core__encounter_dn_priority + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__encounter_dn_reasoncode + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__encounter_dn_servicetype + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__encounter_dn_type + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__encounter_type + +| Column | Type |Description| +|----------------------|-----------|-----------| +|id |varchar | | +|enc_class_code |varchar(6) | | +|enc_class_display |varchar(21)| | +|enc_type_system |varchar | | +|enc_type_code |varchar | | +|enc_type_display |varchar | | +|enc_service_system |varchar | | +|enc_service_code |varchar | | +|enc_service_display |varchar | | +|enc_priority_system |varchar | | +|enc_priority_code |varchar | | +|enc_priority_display |varchar | | +|enc_reasoncode_code |varchar | | +|enc_reasoncode_display|varchar | | +|status |varchar | | +|age_at_visit |bigint | | +|start_date |date | | +|end_date |date | | +|start_week |date | | +|start_month |date | | +|start_year |date | | +|subject_ref |varchar | | +|encounter_ref |varchar | | +|gender |varchar | | +|race_display |varchar | | +|ethnicity_display |varchar | | +|postalcode3 |varchar | | + + +### core__fhir_act_encounter_code_v3 + +|Column | Type |Description| +|-------|-----------|-----------| +|code |varchar(6) | | +|display|varchar(21)| | + + +### core__fhir_mapping_code_system_uri + +| Column | Type |Description| +|-----------|-----------|-----------| +|code_system|varchar(6) | | +|uri |varchar(37)| | + + +### core__fhir_mapping_expected_act_encounter_code_v3 + +| Column | Type |Description| +|--------|----------|-----------| +|expected|varchar(5)| | +|found |varchar(5)| | + + +### core__fhir_mapping_resource_uri + +| Column | Type |Description| +|--------|-----------|-----------| +|resource|varchar(25)| | +|uri |varchar(73)| | + + +### core__lib_transactions + +| Column | Type |Description| +|---------------|------------|-----------| +|study_name |varchar | | +|library_version|varchar | | +|status |varchar | | +|event_time |timestamp(3)| | + + +### core__medication + +| Column | Type |Description| +|-------------|-------|-----------| +|id |varchar| | +|encounter_ref|varchar| | +|patient_ref |varchar| | +|code |varchar| | +|display |varchar| | +|code_system |varchar| | +|userselected |boolean| | + + +### core__medicationrequest + +| Column | Type |Description| +|--------------------|-------|-----------| +|id |varchar| | +|status |varchar| | +|intent |varchar| | +|authoredon |date | | +|authoredon_month |date | | +|category_code |varchar| | +|category_code_system|varchar| | +|rx_code_system |varchar| | +|rx_code |varchar| | +|rx_display |varchar| | +|subject_ref |varchar| | + + +### core__medicationrequest_dn_category + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__meta_date + +| Column |Type|Description| +|--------|----|-----------| +|min_date|date| | +|max_date|date| | + + +### core__meta_version + +| Column | Type |Description| +|--------------------|-------|-----------| +|data_package_version|integer| | + + +### core__observation + +| Column | Type |Description| +|--------------------------------|-------|-----------| +|id |varchar| | +|category_code |varchar| | +|category_code_system |varchar| | +|status |varchar| | +|observation_code |varchar| | +|observation_code_system |varchar| | +|interpretation_code |varchar| | +|interpretation_code_system |varchar| | +|interpretation_display |varchar| | +|valuecodeableconcept_code |varchar| | +|valuecodeableconcept_code_system|varchar| | +|valuecodeableconcept_display |varchar| | +|obs_date |date | | +|obs_week |date | | +|obs_month |date | | +|obs_year |date | | +|subject_ref |varchar| | +|encounter_ref |varchar| | +|observation_ref |varchar| | + + +### core__observation_dn_category + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__observation_dn_code + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__observation_dn_interpretation + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__observation_dn_valuecodeableconcept + +| Column | Type |Description| +|-----------|-------|-----------| +|id |varchar| | +|code |varchar| | +|code_system|varchar| | +|display |varchar| | + + +### core__observation_lab + +| Column | Type |Description| +|----------------------|-------|-----------| +|id |varchar| | +|lab_code |varchar| | +|lab_code_system |varchar| | +|category_code |varchar| | +|category_code_system |varchar| | +|lab_result_code |varchar| | +|lab_result_code_system|varchar| | +|lab_result_display |varchar| | +|lab_date |date | | +|lab_week |date | | +|lab_month |date | | +|lab_year |date | | +|status |varchar| | +|subject_ref |varchar| | +|encounter_ref |varchar| | +|observation_ref |varchar| | + + +### core__observation_vital_signs + +| Column | Type |Description| +|--------------------------------|-------|-----------| +|id |varchar| | +|observation_code |varchar| | +|observation_code_system |varchar| | +|category_code |varchar| | +|category_code_system |varchar| | +|valuecodeableconcept_code |varchar| | +|valuecodeableconcept_code_system|varchar| | +|valuecodeableconcept_display |varchar| | +|status |varchar| | +|interpretation_code |varchar| | +|interpretation_code_system |varchar| | +|interpretation_display |varchar| | +|obs_date |date | | +|obs_week |date | | +|obs_month |date | | +|obs_year |date | | +|subject_ref |varchar| | +|encounter_ref |varchar| | +|observation_ref |varchar| | + + +### core__patient + +| Column | Type |Description| +|-----------------|-------|-----------| +|id |varchar| | +|gender |varchar| | +|birthdate |date | | +|postalcode3 |varchar| | +|subject_ref |varchar| | +|race_display |varchar| | +|ethnicity_display|varchar| | + + +### core__patient_ext_ethnicity + +| Column | Type |Description| +|-----------------|-----------|-----------| +|id |varchar | | +|system |varchar(11)| | +|ethnicity_code |varchar | | +|ethnicity_display|varchar | | + + +### core__patient_ext_race + +| Column | Type |Description| +|------------|-----------|-----------| +|id |varchar | | +|system |varchar(11)| | +|race_code |varchar | | +|race_display|varchar | | + + +### core__study_period + +| Column | Type |Description| +|------------------|-----------|-----------| +|start_date |date | | +|start_week |date | | +|start_month |date | | +|end_date |date | | +|age_at_visit |bigint | | +|author_date |date | | +|author_week |date | | +|author_month |date | | +|author_year |date | | +|gender |varchar | | +|race_display |varchar | | +|ethnicity_display |varchar | | +|subject_ref |varchar | | +|encounter_ref |varchar | | +|status |varchar | | +|doc_ref |varchar | | +|diff_enc_note_days|bigint | | +|enc_class_code |varchar(6) | | +|enc_class_display |varchar(21)| | +|doc_type_code |varchar | | +|doc_type_display |varchar | | +|ed_note |boolean | | diff --git a/docs/creating-sql-with-python.md b/docs/creating-sql-with-python.md index 918549dd..005ba13a 100644 --- a/docs/creating-sql-with-python.md +++ b/docs/creating-sql-with-python.md @@ -10,7 +10,9 @@ nav_order: 4 Before jumping into this doc, take a look at [Creating Studies](creating-studies.md). -If you're just working with `core` tables related to the US Core FHIR profiles, you +If you're just working with the +[Core study tables](core-study-details.md) +related to the US Core FHIR profiles, you may not be interested in this, or only need to look at the [Working with TableBuilders](#working-with-tablebuilders) and the @@ -22,7 +24,9 @@ sections. There are three main reasons why you would need to use Python to generate SQL: - You would like to make use of the [helper class we've built](#generating-counts-tables) -for ease of creating count tables in a structured manner. +for ease of creating count tables in a structured manner, or one of the +[statistics packages](statistics.md) we provide for automating common numerical +tasks. - You have a dataset you'd like to [load into a table from a static file](#adding-a-static-dataset), separate from the ETL tables. @@ -44,6 +48,17 @@ You'll see examples of all three cases in this guide. There are two main bits of infrastructure we use for programmatic tables: The `TableBuilder` class, and the collection of template SQL. +If you include a table builder in your study, and you want to see what the +query being executed looks like, you can use the `generate-sql` command +in the Cumulus library CLI to write out example queries. They will go into +a folder inside your study called `reference_sql`. + +To document your study strucuture, you can use the `generate-md` command +to create markdown tables you can copy into your study docs. Note that, +as of this writing, you'll need to supply a description for each field by +hand. This output will be generated inside your study, in a file named +`{study name}_generated.md`. + ### Working with TableBuilders We have a base @@ -85,7 +100,10 @@ templates - instead, using the [template function library](https://github.com/smart-on-fhir/cumulus-library/blob/main/cumulus_library/template_sql/base_templates.py) you can provide arguments to these templates that will allow you to generate standard types of SQL tables, as well as using templates targeted for -bespoke operations. +bespoke operations. But you _can_ write study specific templates if you have +a complex use case. The Core study has +[study specific templates](https://github.com/smart-on-fhir/cumulus-library/tree/main/cumulus_library/studies/core/core_templates) +to generate flat tables from nested FHIR tables, as an example. When you're thinking about a query that you'd need to create, first check the template function library to see if something already exists. Basic creation and inspection @@ -96,7 +114,7 @@ queries should be covered, as well as unnestings for some common FHIR objects. ### Generating counts tables A thing we do over and over as part of studies is generate powerset counts tables against a filtered resource to get data about a certain kind of clinical population. -Since this is so common, we created a class just for this, and we're using it in all +Since this is so common we created a class just for this, and we're using it in studies the Cumulus team is directly authoring. The [CountsBuilder class](https://github.com/smart-on-fhir/cumulus-library/blob/main/cumulus_library/statistics/counts.py) @@ -117,13 +135,6 @@ for filtering, or can change the minimum bin size used to include data case we're not covering, you can use this interface directly. We'd love to hear about it - we'd consider covering it and/or take PRs for new features -As a convenience, if you include a `if __name__ == "__main__":` clause like you -see in `count_core.py`, you can invoke the builder's output by invoking it with -python, which is a nice way to get example SQL output for inclusion in github. -This is where the -[count core sql output](https://github.com/smart-on-fhir/cumulus-library/blob/main/cumulus_library/studies/core/reference_sql/count_core.sql) -originated from. - Add your count generator file to the `counts_builder_config` section of your `manifest.toml` to include it in your build invocations. diff --git a/docs/creating-studies.md b/docs/creating-studies.md index b02ceb0e..7b44c648 100644 --- a/docs/creating-studies.md +++ b/docs/creating-studies.md @@ -24,64 +24,90 @@ to any build/export call to tell it where to look for your work. ## Creating a new study -There are two ways to get started with a new study: - -1. Use `cumulus-library` to create a manifest for you. You can do this with by running: -```bash -cumulus-library create ./path/to/your/study/dir -``` -We'll create that folder if it doesn't already exist. - -2. Fork the [ -Cumulus Library template repo](https://github.com/smart-on-fhir/cumulus-library-template), -renaming your fork, and cloning it directly from github. - -We recommend you use a name relevant to your study (we'll use `my_study` for this -document). This folder name is what you will pass as a `--target` to -`cumulus-library` when you run your study's queries. - -Once you've made a new study, -the `manifest.toml` file is where you can change your study's configuration. -The initial manifest has comments describing all the possible configuration parameters -you can supply, but for most studies you'll have something that looks like this: - -``` +If you're authoring a study, you just need to do two things to get started: + +- Make a new directory inside the directory you're keeping studies in. The name of this +directory will be the name you use to run it using the `cumulus-library` cli command. +In this document, we're calling this directory `my_study` as an example. +- Make a new file, `manifest.toml`. A +[toml file](https://toml.io/en/) +is a config file format - you don't need to worry too much about the details of this +format, as we'll show you in this document how the library uses these files to run your +study. You can copy the following template as an example, which has comments describing +what each section does: + +```toml +# 'study_prefix' should be a string at the start of each table. We'll use this +# to clean up queries, so it should be unique. Name your tables in the following +# format: [study_prefix]__[table_name]. It should probably, but not necessarily, +# be the same name as the folder the study definition is in. study_prefix = "my_study" +# For most use cases, this should not be required, but if you need to programmatically +# build tables, you can provide a list of files implementing BaseTableBuilder. +# See vocab and core studies for examples of this pattern. These run before +# any SQL execution +# [table_builder_config] +# file_names = [ +# "my_table_builder.py", +# ] + +# The following section describes all tables that should be generated directly +# from SQL files. [sql_config] +# 'file_names' defines a list of sql files to execute, in order, in this folder. +# Recommended order: Any ancillary config (like a list of condition codes), +# tables/view selecting subsets of data from FHIR data, tables/views creating +# summary statistics. file_names = [ - "my_setup.sql", - "my_cross_tables.sql", - "my_counts.sql", + "setup.sql", + "lab_observations.sql", + "counts.sql", + "date_range.sql" ] + +# The following section defines parameters related to exporting study data from +# your athena database [export_config] +# The following tables will be output to disk when an export is run. In most cases, +# only count tables should be output in this way. export_list = [ - "my_study__counts_month", + "template__count_influenza_test_month", ] -``` -Talking about what these three sections do: - - `study_prefix` is the expected prefix you will be adding to all tables your - study creates. We'll autocheck this to make sure in several places - this helps - to guarantee another researcher doesn't have a study artifact that collides - with yours. - - `sql_config.file_names` is the list of sql files that your study will run (in order). - We recommend having one sql file per topic. They should all be in the same - folder as your manifest file. - - `export_config.export_list` is the list of tables that will be downloaded - when `cumulus-library export` is run. - Cumulus is designed with the idea of shipping around aggregate - counts to reduce exposure of limited datasets, and so we recommend only exporting - "count" tables. +# For generating counts table in a more standardized manner, we have a class in the +# main library you can extend that will handle most of the logic of assembling +# queries for you. We use this pattern for generating the core tables, as well +# other studies authored inside BCH. These will always be run after any other +# SQL queries have been generated +# [counts_builder_config] +# file_names = [ +# "count.py" +# ] + +# For more specialized statistics, we provide a toml-based config entrypoint. The +# details of these configs will vary, depending on which statistical method you're +# invoking. For more details, see the statistics section of the docs for a list of +# supported approaches. +# These will run last, so all the data in your study will exist by the time these +# are invoked. +# [statistics_config] +# file_names = +# [ +# "psm_config.toml" +# ] + +``` There are other hooks you can use in the manifest for more advanced control over -how you can generate SQL. See [Creating SQL with python](creating-sql-with-python.md) +how you can generate sql - these are commented out in the above template, and you can +delete them if you don't need them. See +[Creating SQL with python](creating-sql-with-python.md) for more information. -We recommend creating a git repo per study, to help version your study data, which -you can do in the same directory as the manifest file. If you've forked your study from -the template, you've already checked this step off. +If you're familiar with git workflows, we recommend creating a git repo for your study, to +help version your study in case of changes. ### Writing SQL queries @@ -95,6 +121,9 @@ Most users have a workflow that looks like this: the `medicationrequest` raw resource tables, but the `core__medication` hides that complexity and is always available, regardless of the specific EHR approach. + You can look at the + [Core study documentation](core-study-details.md) + for details about that study's contents. If you _do_ need some data that is not available in the `core` tables, make sure you look at the [Creating SQL with python](creating-sql-with-python.md) @@ -102,15 +131,46 @@ Most users have a workflow that looks like this: - Move queries to a file as you finalize them - Build your study with the CLI to make sure your queries load correctly. +__Important detail on FHIR arrays__: When we flatten a FHIR element that +is specified as being potentially an array (like many instances of +CodeableConcept, for example), we create a seperate table from that +field. It can be joined back to the table it was extracted from by the +id field present in both tables. + +However - in your study design, you will need to handle cases where +multiple items may exist in these tables. It is common for multiple +code systems to be used for a single record. + +As an example, the Condition resource has a base level CodeableConcept +that _should_ contain a SNOMED code, but often has only an ICD9/10 code, +or a EHR vendor specific code. We handle this case in two ways: + - The __core__condition_codable_concepts_display__ table contains one + record per resource, where we specify a priority order and take the + first valid code we find, which is ok for cases where you aren't + very concerned about a specific coding and are just looking to get + an idea of what data you have + - The __core__condition_codable_concepts_all__ table contains + every code for every system found. This is useful when you are specifically + looking for data associated with a given clinical coding system, but + if you are not careful, you can cause a condition to be counted twice + by not specifying a coding system when joining this table with the + base condition table. + +Your approach to handling this is going to be dictated by the specific +clinical context you're working with. In cases where we don't specify +two table types for an array resource, you should assume that we are +following the second pattern and account for that in your queries. + #### sqlfluff We use [sqlfluff](https://github.com/sqlfluff/sqlfluff) to help maintain a consistent -style across many different SQL query authors. We recommend using `sqlfluff` as you +style across many different SQL query authors. We recommend using sqlfluff as you are developing your queries to ensure your SQL is matching the style of other -authors. We copy over our `sqlfluff` rules when you use `cumulus-library` to create -a study, so no additional configuration should be needed. +authors, but it is not required. You can copy our +[sqlfluff config](https://github.com/smart-on-fhir/cumulus-library/blob/main/cumulus_library/.sqlfluff) +into your study if you'd like to use the same style we are. -There are two commands you will want to run inside your study's directory: +There are two commands you can run inside your study's directory to check formatting: - `sqlfluff lint` will show you any variations from the expected styling rules - `sqlfluff fix` will try to make your autocorrect your queries to match the expected style @@ -130,11 +190,12 @@ styling. For example, `my_study__nlp_counts` would cause an error, but `my_study__counts_nlp` would be fine. + #### Requirements for accepting PRs - - **Count tables must use the CUBE function** to create powersets of data. See the - [CUBE section of the Presto docs](https://prestodb.io/docs/current/sql/select.html#group-by-clause) - for more information about this `GROUP BY` type. - The `core` and `template` projects contain examples. + - **Count tables must use the CUBE operator** to create powersets of data. See the + [Trino docs](https://trino.io/docs/current/sql/select.html#cube) + for more information about its syntax. The core study, and other studies produced + by the core Cumulus team, provide examples of its usage. - For PHI reverse identification protection, **exclude rows from count tables if they have a small number of members**, e.g. less than 10. @@ -223,7 +284,7 @@ Not only is this faster than talking to Athena, but you can edit the local ndjson to add interest edge cases that you want your SQL to be able to handle. -We use this feature in some of our studies to even add automated unit tests. +We use this feature in the library and our studies for automated unit testing. ## Sharing studies @@ -233,3 +294,11 @@ we can talk more about what makes sense for your use case. If you write a paper using the Cumulus library, please [cite the project](https://smarthealthit.org/cumulus/) + +## Snapshotting/archiving studies + +If you need to freeze a study at a specific point in time (like if you're working +on a publication), you can create an archive of that study using the `archive` +command in the Cumulus library CLI. Just be aware that this archive may contain +sensitive data, and so make sure your store the archive someplace that complies +with your organization's security policies. diff --git a/pyproject.toml b/pyproject.toml index 53bd53cb..ea02cac3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "psmpy <1, >=0.3.13", "pyarrow >= 11.0", "pyathena >= 2.23", + "pytablewriter >= 1.2", "requests >= 2.28", "rich >= 13.2", "sqlfluff >= 2.3.4", diff --git a/tests/test_cli.py b/tests/test_cli.py index 37356982..899e75c8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,7 @@ """ tests for the cli interface to studies """ import builtins +import filecmp import glob import io import os @@ -193,6 +194,55 @@ def test_generate_sql(mock_path, tmp_path): assert "CREATE TABLE IF NOT EXISTS study_python_valid__table" in query +@mock.patch.dict( + os.environ, + clear=True, +) +@mock.patch("sysconfig.get_path") +def test_generate_md(mock_path, tmp_path): + mock_path.return_value = f"{tmp_path}/study_python_valid/" + with does_not_raise(): + shutil.copytree( + f"{Path(__file__).resolve().parents[0]}/test_data/study_python_valid", + f"{tmp_path}/study_python_valid/", + ) + args = duckdb_args( + [ + "build", + "-t", + "study_python_valid", + "-s", + f"{tmp_path}", + "--database", + "test", + ], + tmp_path, + ) + + cli.main(cli_args=args) + args = duckdb_args( + [ + "generate-md", + "-t", + "study_python_valid", + "-s", + f"{tmp_path}", + "--database", + "test", + ], + tmp_path, + ) + cli.main(cli_args=args) + test_file = f"{tmp_path}/study_python_valid/study_python_valid_generated.md" + with open(test_file) as f: + print(f.read()) + ref_file = ( + pathlib.Path(__file__).resolve().parent + / "test_data/study_python_valid_generated.md" + ) + assert filecmp.cmp(test_file, ref_file, shallow=True) + + @mock.patch.dict( os.environ, clear=True, diff --git a/tests/test_data/core/core__observation.txt b/tests/test_data/core/core__observation.txt index f3d1267e..e8d545e4 100644 --- a/tests/test_data/core/core__observation.txt +++ b/tests/test_data/core/core__observation.txt @@ -1,20 +1,20 @@ -('05e6184c-f3ef-017b-ce21-599d375020b3', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 6, 2), datetime.date(2018, 5, 28), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/8877ef1f-7cd7-3242-d7f0-73cf3f7165f4', 'Encounter/299b6495-3fe7-8db3-c494-6e1ce8b7986d', 'Observation/05e6184c-f3ef-017b-ce21-599d375020b3') -('0935feb5-36bc-6a6c-7969-841bacbabdfe', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 11), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/5ce2e599-fb6e-9b4d-3c2e-87310619b957', 'Encounter/4b03a408-6694-88e3-0e63-3ee464ecd6cd', 'Observation/0935feb5-36bc-6a6c-7969-841bacbabdfe') -('0d35c92c-8982-9737-2bae-79789514fde8', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 2), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/17fde357-dcc9-af8b-a8d3-4bd213afeb22', 'Encounter/32d0ae2d-1be8-9e90-a4da-4c222abd88a9', 'Observation/0d35c92c-8982-9737-2bae-79789514fde8') -('29a5c64c-9973-93d2-24a2-b2bcea3bdf4b', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 13), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/9eaa056b-1efc-0cc8-70ff-62c8f704cc13', 'Encounter/5c994000-aa78-2be5-e6cf-99f230d50c2f', 'Observation/29a5c64c-9973-93d2-24a2-b2bcea3bdf4b') -('34347a5f-a90a-f723-8121-919ca5364b7c', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 6, 14), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/267fc42d-cd9e-8527-1f9e-887fe7776147', 'Encounter/4c4d0730-201f-5b75-c657-8d0de09cc28f', 'Observation/34347a5f-a90a-f723-8121-919ca5364b7c') -('5ba3f97b-8ecc-0b87-bee1-abeffa7580b7', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 6, 14), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/267fc42d-cd9e-8527-1f9e-887fe7776147', 'Encounter/4c4d0730-201f-5b75-c657-8d0de09cc28f', 'Observation/5ba3f97b-8ecc-0b87-bee1-abeffa7580b7') -('701e9048-be5f-5b42-b582-97d1ff465438', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 15), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/c1bfec36-dc2c-afc8-c767-3d35ed2bf6f0', 'Encounter/8ff1dc01-5a28-b2d8-3b42-4b7a7d539970', 'Observation/701e9048-be5f-5b42-b582-97d1ff465438') -('84d65045-3a6e-5968-c35b-001d125b26d2', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 6, 13), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/3cf7af45-2bee-aa9c-d524-40b487149d60', 'Encounter/d2782687-6885-037c-957d-579fbd681d2a', 'Observation/84d65045-3a6e-5968-c35b-001d125b26d2') -('8930d451-d73c-5360-4b65-a8f9fdb43eae', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 6, 2), datetime.date(2018, 5, 28), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/8877ef1f-7cd7-3242-d7f0-73cf3f7165f4', 'Encounter/299b6495-3fe7-8db3-c494-6e1ce8b7986d', 'Observation/8930d451-d73c-5360-4b65-a8f9fdb43eae') -('8cc39753-79b9-2be9-77a7-157ca24ad5cf', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 6, 13), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/3cf7af45-2bee-aa9c-d524-40b487149d60', 'Encounter/d2782687-6885-037c-957d-579fbd681d2a', 'Observation/8cc39753-79b9-2be9-77a7-157ca24ad5cf') -('916b97ca-fb5f-71ce-0cff-70383a1aa668', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 9), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/e455ca3f-fc16-6ffc-297a-adc27e2db183', 'Encounter/98d4bd14-d78e-debb-e7dc-2df7786aedf3', 'Observation/916b97ca-fb5f-71ce-0cff-70383a1aa668') -('93b68c85-7046-025d-19a1-bffd42469601', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 7), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/ac91b90d-97e4-4fc5-41cd-036bac49e6e8', 'Encounter/dc5ed645-3979-e765-3e03-6ba2173027c3', 'Observation/93b68c85-7046-025d-19a1-bffd42469601') -('b1467052-96e1-a697-fd94-feffb6e1453b', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 10), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/6385ddd7-2639-6505-3789-0521b8f66c8b', 'Encounter/fd0754a4-e96d-cba7-b3c0-77697a09c86e', 'Observation/b1467052-96e1-a697-fd94-feffb6e1453b') -('bc4dafcf-05ad-9b01-049b-f9c75bd8d53f', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 7), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/ac91b90d-97e4-4fc5-41cd-036bac49e6e8', 'Encounter/dc5ed645-3979-e765-3e03-6ba2173027c3', 'Observation/bc4dafcf-05ad-9b01-049b-f9c75bd8d53f') -('bdcea012-7a9a-193f-eb45-80dbceca6095', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 9), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/e455ca3f-fc16-6ffc-297a-adc27e2db183', 'Encounter/98d4bd14-d78e-debb-e7dc-2df7786aedf3', 'Observation/bdcea012-7a9a-193f-eb45-80dbceca6095') -('c62885ce-5f40-32a4-d8f6-ae945824b172', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 15), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/c1bfec36-dc2c-afc8-c767-3d35ed2bf6f0', 'Encounter/8ff1dc01-5a28-b2d8-3b42-4b7a7d539970', 'Observation/c62885ce-5f40-32a4-d8f6-ae945824b172') -('c802cfaf-6b9e-36fb-c7b1-1836cbc4f653', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 10), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/6385ddd7-2639-6505-3789-0521b8f66c8b', 'Encounter/fd0754a4-e96d-cba7-b3c0-77697a09c86e', 'Observation/c802cfaf-6b9e-36fb-c7b1-1836cbc4f653') -('d9871961-3f7a-7cf7-0ced-460368db48e0', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 2), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/17fde357-dcc9-af8b-a8d3-4bd213afeb22', 'Encounter/32d0ae2d-1be8-9e90-a4da-4c222abd88a9', 'Observation/d9871961-3f7a-7cf7-0ced-460368db48e0') -('e47346a6-9fb7-bc9b-b8bb-006bfeebbd02', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '5778-6', 'http://loinc.org', None, None, None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 13), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/9eaa056b-1efc-0cc8-70ff-62c8f704cc13', 'Encounter/5c994000-aa78-2be5-e6cf-99f230d50c2f', 'Observation/e47346a6-9fb7-bc9b-b8bb-006bfeebbd02') -('efe12fc4-932e-90ab-05d5-05169bd815b1', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', None, 'final', '34533-0', 'http://loinc.org', None, None, None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 11), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/5ce2e599-fb6e-9b4d-3c2e-87310619b957', 'Encounter/4b03a408-6694-88e3-0e63-3ee464ecd6cd', 'Observation/efe12fc4-932e-90ab-05d5-05169bd815b1') +('05e6184c-f3ef-017b-ce21-599d375020b3', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 6, 2), datetime.date(2018, 5, 28), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/8877ef1f-7cd7-3242-d7f0-73cf3f7165f4', 'Encounter/299b6495-3fe7-8db3-c494-6e1ce8b7986d', 'Observation/05e6184c-f3ef-017b-ce21-599d375020b3') +('0935feb5-36bc-6a6c-7969-841bacbabdfe', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 11), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/5ce2e599-fb6e-9b4d-3c2e-87310619b957', 'Encounter/4b03a408-6694-88e3-0e63-3ee464ecd6cd', 'Observation/0935feb5-36bc-6a6c-7969-841bacbabdfe') +('0d35c92c-8982-9737-2bae-79789514fde8', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 2), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/17fde357-dcc9-af8b-a8d3-4bd213afeb22', 'Encounter/32d0ae2d-1be8-9e90-a4da-4c222abd88a9', 'Observation/0d35c92c-8982-9737-2bae-79789514fde8') +('29a5c64c-9973-93d2-24a2-b2bcea3bdf4b', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 13), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/9eaa056b-1efc-0cc8-70ff-62c8f704cc13', 'Encounter/5c994000-aa78-2be5-e6cf-99f230d50c2f', 'Observation/29a5c64c-9973-93d2-24a2-b2bcea3bdf4b') +('34347a5f-a90a-f723-8121-919ca5364b7c', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 6, 14), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/267fc42d-cd9e-8527-1f9e-887fe7776147', 'Encounter/4c4d0730-201f-5b75-c657-8d0de09cc28f', 'Observation/34347a5f-a90a-f723-8121-919ca5364b7c') +('5ba3f97b-8ecc-0b87-bee1-abeffa7580b7', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 6, 14), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/267fc42d-cd9e-8527-1f9e-887fe7776147', 'Encounter/4c4d0730-201f-5b75-c657-8d0de09cc28f', 'Observation/5ba3f97b-8ecc-0b87-bee1-abeffa7580b7') +('701e9048-be5f-5b42-b582-97d1ff465438', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 15), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/c1bfec36-dc2c-afc8-c767-3d35ed2bf6f0', 'Encounter/8ff1dc01-5a28-b2d8-3b42-4b7a7d539970', 'Observation/701e9048-be5f-5b42-b582-97d1ff465438') +('84d65045-3a6e-5968-c35b-001d125b26d2', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 6, 13), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/3cf7af45-2bee-aa9c-d524-40b487149d60', 'Encounter/d2782687-6885-037c-957d-579fbd681d2a', 'Observation/84d65045-3a6e-5968-c35b-001d125b26d2') +('8930d451-d73c-5360-4b65-a8f9fdb43eae', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 6, 2), datetime.date(2018, 5, 28), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/8877ef1f-7cd7-3242-d7f0-73cf3f7165f4', 'Encounter/299b6495-3fe7-8db3-c494-6e1ce8b7986d', 'Observation/8930d451-d73c-5360-4b65-a8f9fdb43eae') +('8cc39753-79b9-2be9-77a7-157ca24ad5cf', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 6, 13), datetime.date(2018, 6, 11), datetime.date(2018, 6, 1), datetime.date(2018, 1, 1), 'Patient/3cf7af45-2bee-aa9c-d524-40b487149d60', 'Encounter/d2782687-6885-037c-957d-579fbd681d2a', 'Observation/8cc39753-79b9-2be9-77a7-157ca24ad5cf') +('916b97ca-fb5f-71ce-0cff-70383a1aa668', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 9), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/e455ca3f-fc16-6ffc-297a-adc27e2db183', 'Encounter/98d4bd14-d78e-debb-e7dc-2df7786aedf3', 'Observation/916b97ca-fb5f-71ce-0cff-70383a1aa668') +('93b68c85-7046-025d-19a1-bffd42469601', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 7), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/ac91b90d-97e4-4fc5-41cd-036bac49e6e8', 'Encounter/dc5ed645-3979-e765-3e03-6ba2173027c3', 'Observation/93b68c85-7046-025d-19a1-bffd42469601') +('b1467052-96e1-a697-fd94-feffb6e1453b', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 10), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/6385ddd7-2639-6505-3789-0521b8f66c8b', 'Encounter/fd0754a4-e96d-cba7-b3c0-77697a09c86e', 'Observation/b1467052-96e1-a697-fd94-feffb6e1453b') +('bc4dafcf-05ad-9b01-049b-f9c75bd8d53f', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 7), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/ac91b90d-97e4-4fc5-41cd-036bac49e6e8', 'Encounter/dc5ed645-3979-e765-3e03-6ba2173027c3', 'Observation/bc4dafcf-05ad-9b01-049b-f9c75bd8d53f') +('bdcea012-7a9a-193f-eb45-80dbceca6095', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 9), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/e455ca3f-fc16-6ffc-297a-adc27e2db183', 'Encounter/98d4bd14-d78e-debb-e7dc-2df7786aedf3', 'Observation/bdcea012-7a9a-193f-eb45-80dbceca6095') +('c62885ce-5f40-32a4-d8f6-ae945824b172', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 15), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/c1bfec36-dc2c-afc8-c767-3d35ed2bf6f0', 'Encounter/8ff1dc01-5a28-b2d8-3b42-4b7a7d539970', 'Observation/c62885ce-5f40-32a4-d8f6-ae945824b172') +('c802cfaf-6b9e-36fb-c7b1-1836cbc4f653', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 10), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/6385ddd7-2639-6505-3789-0521b8f66c8b', 'Encounter/fd0754a4-e96d-cba7-b3c0-77697a09c86e', 'Observation/c802cfaf-6b9e-36fb-c7b1-1836cbc4f653') +('d9871961-3f7a-7cf7-0ced-460368db48e0', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 2), datetime.date(2018, 7, 2), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/17fde357-dcc9-af8b-a8d3-4bd213afeb22', 'Encounter/32d0ae2d-1be8-9e90-a4da-4c222abd88a9', 'Observation/d9871961-3f7a-7cf7-0ced-460368db48e0') +('e47346a6-9fb7-bc9b-b8bb-006bfeebbd02', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '5778-6', 'http://loinc.org', None, None, None, '371254008', 'http://snomed.info/sct', 'Brown color (qualifier value)', datetime.date(2018, 7, 13), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/9eaa056b-1efc-0cc8-70ff-62c8f704cc13', 'Encounter/5c994000-aa78-2be5-e6cf-99f230d50c2f', 'Observation/e47346a6-9fb7-bc9b-b8bb-006bfeebbd02') +('efe12fc4-932e-90ab-05d5-05169bd815b1', 'laboratory', 'http://terminology.hl7.org/CodeSystem/observation-category', 'final', '34533-0', 'http://loinc.org', None, None, None, '167248002', 'http://snomed.info/sct', 'Urine smell ammoniacal (finding)', datetime.date(2018, 7, 11), datetime.date(2018, 7, 9), datetime.date(2018, 7, 1), datetime.date(2018, 1, 1), 'Patient/5ce2e599-fb6e-9b4d-3c2e-87310619b957', 'Encounter/4b03a408-6694-88e3-0e63-3ee464ecd6cd', 'Observation/efe12fc4-932e-90ab-05d5-05169bd815b1') diff --git a/tests/test_data/study_python_valid_generated.md b/tests/test_data/study_python_valid_generated.md new file mode 100644 index 00000000..00e40822 --- /dev/null +++ b/tests/test_data/study_python_valid_generated.md @@ -0,0 +1,19 @@ +## study_python_valid base tables + +### study_python_valid__lib_transactions + +| Column | Type |Description| +|---------------|---------|-----------| +|study_name |VARCHAR | | +|library_version|VARCHAR | | +|status |VARCHAR | | +|event_time |TIMESTAMP| | + + +### study_python_valid__table + +|Column| Type |Description| +|------|-------|-----------| +|test |INTEGER| | + + diff --git a/tests/test_templates.py b/tests/test_templates.py index 571636c2..238fa855 100644 --- a/tests/test_templates.py +++ b/tests/test_templates.py @@ -164,13 +164,28 @@ def test_get_column_datatype_query(): FROM information_schema.columns WHERE table_schema = 'schema_name' - AND table_name = 'table_name' - AND LOWER(column_name) IN ('foo', 'bar') --noqa: LT05""" + AND table_name IN ('table_name')""" query = base_templates.get_column_datatype_query( schema_name="schema_name", - table_name="TABLE_NAME", + table_names="TABLE_NAME", + ) + assert query == expected + expected = """SELECT + column_name, + data_type, + table_name --noqa: LT02 +FROM information_schema.columns +WHERE + table_schema = 'schema_name' + AND table_name IN ('table_name') + AND LOWER(column_name) IN ('foo', 'bar') --noqa: LT02,LT05""" + + query = base_templates.get_column_datatype_query( + schema_name="schema_name", + table_names="TABLE_NAME", column_names=["foo", "BAR"], + include_table_names=True, ) assert query == expected