diff --git a/cumulus_library/base_table_builder.py b/cumulus_library/base_table_builder.py index d3f37f5e..b293b2cb 100644 --- a/cumulus_library/base_table_builder.py +++ b/cumulus_library/base_table_builder.py @@ -1,5 +1,6 @@ """ abstract base for python-based study executors """ +import pathlib import re import sys @@ -102,20 +103,26 @@ def post_execution( """Hook for any additional actions to run after execute_queries""" pass - def comment_queries(self): + def comment_queries(self, doc_str=None): """Convenience method for annotating outputs of template generators to disk""" commented_queries = ["-- noqa: disable=all"] + if doc_str: + commented_queries.append(doc_str) + commented_queries.append( + "\n-- ###########################################################\n" + ) for query in self.queries: commented_queries.append(query) commented_queries.append( - "\n-- ###########################################################" + "\n-- ###########################################################\n" ) commented_queries.pop() self.queries = commented_queries - def write_queries(self, filename: str = "output.sql"): + def write_queries(self, path: pathlib.Path = pathlib.Path.cwd() / "output.sql"): + path.parents[0].mkdir(parents=True, exist_ok=True) """writes all queries constructed by prepare_queries to disk""" - with open(filename, "w", encoding="utf-8") as file: + with open(path, "w", encoding="utf-8") as file: for query in self.queries: file.write(query) file.write("\n") diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py index b2f6f969..d0ff8606 100755 --- a/cumulus_library/cli.py +++ b/cumulus_library/cli.py @@ -3,35 +3,36 @@ import json import os +import pathlib import sys import sysconfig -from pathlib import Path, PosixPath -from typing import Dict, List, Optional -from rich.console import Console -from rich.table import Table +from typing import Dict, List, Optional -from cumulus_library import __version__, errors, helper -from cumulus_library.cli_parser import get_parser -from cumulus_library.databases import ( - DatabaseBackend, - create_db_backend, +import rich + +from cumulus_library import ( + __version__, + cli_parser, + databases, + enums, + errors, + helper, + protected_table_builder, + study_parser, + upload, ) -from cumulus_library.enums import ProtectedTables -from cumulus_library.protected_table_builder import TRANSACTIONS_COLS -from cumulus_library.study_parser import StudyManifestParser -from cumulus_library.template_sql.templates import get_insert_into_query -from cumulus_library.upload import upload_files +from cumulus_library.template_sql import templates -class StudyBuilder: - """Class for managing Athena cursors and executing Cumulus queries""" +class StudyRunner: + """Class for managing cursors and executing Cumulus queries""" verbose = False schema_name = None - def __init__(self, db: DatabaseBackend, data_path: str): + def __init__(self, db: databases.DatabaseBackend, data_path: str): self.db = db self.data_path = data_path self.cursor = db.cursor() @@ -40,9 +41,9 @@ def __init__(self, db: DatabaseBackend, data_path: str): def update_transactions(self, prefix: str, status: str): """Adds a record to a study's transactions table""" self.cursor.execute( - get_insert_into_query( - f"{prefix}__{ProtectedTables.TRANSACTIONS.value}", - TRANSACTIONS_COLS, + templates.get_insert_into_query( + f"{prefix}__{enums.ProtectedTables.TRANSACTIONS.value}", + protected_table_builder.TRANSACTIONS_COLS, [ [ prefix, @@ -83,7 +84,7 @@ def clean_study( ) for target in targets: if prefix: - parser = StudyManifestParser() + parser = study_parser.StudyManifestParser() parser.clean_study( self.cursor, self.schema_name, @@ -92,7 +93,7 @@ def clean_study( prefix=target, ) else: - parser = StudyManifestParser(study_dict[target]) + parser = study_parser.StudyManifestParser(study_dict[target]) parser.clean_study( self.cursor, self.schema_name, @@ -102,18 +103,18 @@ def clean_study( def clean_and_build_study( self, - target: PosixPath, + target: pathlib.Path, *, stats_build: bool, continue_from: str = None, ) -> None: """Recreates study views/tables - :param target: A PosixPath to the study directory + :param target: A path to the study directory :param stats_build: if True, forces creation of new stats tables :keyword continue_from: Restart a run from a specific sql file (for dev only) """ - studyparser = StudyManifestParser(target, self.data_path) + studyparser = study_parser.StudyManifestParser(target, self.data_path) try: if not continue_from: studyparser.run_protected_table_builder( @@ -159,14 +160,14 @@ def clean_and_build_study( raise e def run_single_table_builder( - self, target: PosixPath, table_builder_name: str + self, target: pathlib.Path, table_builder_name: str ) -> None: """Runs a single table builder - :param target: A PosixPath to the study directory + :param target: A path to the study directory :param table_builder_name: a builder file referenced in the study's manifest """ - studyparser = StudyManifestParser(target) + studyparser = study_parser.StudyManifestParser(target) studyparser.run_single_table_builder( self.cursor, self.schema_name, @@ -181,7 +182,7 @@ def clean_and_build_all(self, study_dict: Dict, stats_build: bool) -> None: NOTE: By design, this method will always exclude the `template` study dir, since 99% of the time you don't need a live copy in the database. - :param study_dict: A dict of PosixPaths + :param study_dict: A dict of paths :param stats_build: if True, regen stats tables """ study_dict = dict(study_dict) @@ -195,34 +196,50 @@ def clean_and_build_all(self, study_dict: Dict, stats_build: bool) -> None: self.clean_and_build_study(study_dict[key], stats_build=stats_build) ### Data exporters - def export_study(self, target: PosixPath, data_path: PosixPath) -> None: + def export_study(self, target: pathlib.Path, data_path: pathlib.Path) -> None: """Exports aggregates defined in a manifest - :param target: A PosixPath to the study directory + :param target: A path to the study directory """ if data_path is None: sys.exit("Missing destination - please provide a path argument.") - studyparser = StudyManifestParser(target, data_path) + studyparser = study_parser.StudyManifestParser(target, data_path) studyparser.export_study(self.db, data_path) - def export_all(self, study_dict: Dict, data_path: PosixPath): + def export_all(self, study_dict: Dict, data_path: pathlib.Path): """Exports all defined count tables to disk""" for key in study_dict.keys(): self.export_study(study_dict[key], data_path) + def generate_study_sql( + self, + target: pathlib.Path, + ) -> None: + """Materializes study sql from templates -def get_abs_posix_path(path: str) -> PosixPath: + :param target: A path to the study directory + """ + studyparser = study_parser.StudyManifestParser(target) + studyparser.run_generate_sql( + self.cursor, + self.schema_name, + verbose=self.verbose, + parser=self.db.parser(), + ) + + +def get_abs_posix_path(path: str) -> pathlib.Path: """Convenience method for handling abs vs rel paths""" if path[0] == "/": - return Path(path) + return pathlib.Path(path) else: - return Path(Path.cwd(), path) + return pathlib.Path(pathlib.Path.cwd(), path) def create_template(path: str) -> None: """Creates a manifest in target dir if one doesn't exist""" abs_path = get_abs_posix_path(path) - manifest_path = Path(abs_path, "manifest.toml") + manifest_path = pathlib.Path(abs_path, "manifest.toml") if manifest_path.exists(): sys.exit(f"A manifest.toml already exists at {abs_path}, skipping creation") abs_path.mkdir(parents=True, exist_ok=True) @@ -232,33 +249,33 @@ def create_template(path: str) -> None: [".sqlfluff", ".sqlfluff"], ] for source, dest in copy_lists: - source_path = Path(Path(__file__).resolve().parents[0], source) - dest_path = Path(abs_path, dest) + source_path = pathlib.Path(pathlib.Path(__file__).resolve().parents[0], source) + dest_path = pathlib.Path(abs_path, dest) dest_path.write_bytes(source_path.read_bytes()) -def get_study_dict(alt_dir_paths: List) -> Optional[Dict[str, PosixPath]]: +def get_study_dict(alt_dir_paths: List) -> Optional[Dict[str, pathlib.Path]]: """Gets valid study targets from ./studies/, and any pip installed studies - :returns: A list of pathlib.PosixPath objects + :returns: A list of Path objects """ manifest_studies = {} - cli_path = Path(__file__).resolve().parents[0] + cli_path = pathlib.Path(__file__).resolve().parents[0] # first, we'll get any installed public studies with open( - Path(cli_path, "./module_allowlist.json"), "r", encoding="utf-8" + pathlib.Path(cli_path, "./module_allowlist.json"), "r", encoding="utf-8" ) as study_allowlist_json: study_allowlist = json.load(study_allowlist_json)["allowlist"] site_packages_dir = sysconfig.get_path("purelib") for study, subdir in study_allowlist.items(): - study_path = Path(site_packages_dir, subdir) + study_path = pathlib.Path(site_packages_dir, subdir) if study_path.exists(): manifest_studies[study] = study_path # then we'll get all studies inside the project directory, followed by # any user supplied paths last. These take precedence. - paths = [Path(cli_path, "studies")] + paths = [pathlib.Path(cli_path, "studies")] if alt_dir_paths is not None: paths = paths + alt_dir_paths for path in paths: @@ -267,7 +284,7 @@ def get_study_dict(alt_dir_paths: List) -> Optional[Dict[str, PosixPath]]: return manifest_studies -def get_studies_by_manifest_path(path: PosixPath) -> dict: +def get_studies_by_manifest_path(path: pathlib.Path) -> dict: """Recursively search for manifest.toml files from a given path""" manifest_paths = {} for child_path in path.iterdir(): @@ -284,17 +301,17 @@ def run_cli(args: Dict): create_template(args["create_dir"]) elif args["action"] == "upload": - upload_files(args) + upload.upload_files(args) # all other actions require connecting to the database else: - db_backend = create_db_backend(args) + db_backend = databases.create_db_backend(args) try: - builder = StudyBuilder(db_backend, data_path=args.get("data_path")) - if args["verbose"]: - builder.verbose = True + runner = StudyRunner(db_backend, data_path=args.get("data_path")) + if args.get("verbose"): + runner.verbose = True print("Testing connection to database...") - builder.cursor.execute("SHOW DATABASES") + runner.cursor.execute("SHOW DATABASES") study_dict = get_study_dict(args["study_dir"]) if "prefix" not in args.keys(): @@ -308,7 +325,7 @@ def run_cli(args: Dict): "you include `-s path/to/study/dir` as an arugment." ) if args["action"] == "clean": - builder.clean_study( + runner.clean_study( args["target"], study_dict, stats_clean=args["stats_clean"], @@ -316,15 +333,15 @@ def run_cli(args: Dict): ) elif args["action"] == "build": if "all" in args["target"]: - builder.clean_and_build_all(study_dict, args["stats_build"]) + runner.clean_and_build_all(study_dict, args["stats_build"]) else: for target in args["target"]: if args["builder"]: - builder.run_single_table_builder( + runner.run_single_table_builder( study_dict[target], args["builder"] ) else: - builder.clean_and_build_study( + runner.clean_and_build_study( study_dict[target], stats_build=args["stats_build"], continue_from=args["continue_from"], @@ -332,10 +349,18 @@ def run_cli(args: Dict): elif args["action"] == "export": if "all" in args["target"]: - builder.export_all(study_dict, args["data_path"]) + runner.export_all(study_dict, args["data_path"]) + else: + for target in args["target"]: + runner.export_study(study_dict[target], args["data_path"]) + + elif args["action"] == "generate-sql": + if "all" in args["target"]: + for target in study_dict.keys(): + runner.generate_all_sql(study_dict[target]) else: for target in args["target"]: - builder.export_study(study_dict[target], args["data_path"]) + runner.generate_study_sql(study_dict[target]) finally: db_backend.close() @@ -343,7 +368,7 @@ def run_cli(args: Dict): def main(cli_args=None): """Reads CLI input/environment variables and invokes library calls""" - parser = get_parser() + parser = cli_parser.get_parser() args = vars(parser.parse_args(cli_args)) if args["version"]: print(__version__) @@ -380,7 +405,7 @@ def main(cli_args=None): read_env_vars.append([pair[1], env_val]) if len(read_env_vars) > 0: - table = Table(title="Values read from environment variables") + table = rich.table.Table(title="Values read from environment variables") table.add_column("Environment Variable", style="green") table.add_column("Value", style="cyan") for row in read_env_vars: @@ -388,7 +413,7 @@ def main(cli_args=None): table.add_row(row[0], "#########") else: table.add_row(row[0], row[1]) - console = Console() + console = rich.console.Console() console.print(table) if args.get("study_dir"): diff --git a/cumulus_library/cli_parser.py b/cumulus_library/cli_parser.py index f125b2bc..5604936b 100644 --- a/cumulus_library/cli_parser.py +++ b/cumulus_library/cli_parser.py @@ -229,4 +229,11 @@ def get_parser() -> argparse.ArgumentParser: help="Run pre-fetch and prepare upload, but log output instead of sending.", ) + # Generate a study's template-driven sql + generate = actions.add_parser( + "generate-sql", help="Generates a study's template-driven sql for reference" + ) + add_target_argument(generate) + add_study_dir_argument(generate) + add_db_config(generate) return parser diff --git a/cumulus_library/studies/core/reference_sql/builder_condition.sql b/cumulus_library/studies/core/reference_sql/builder_condition.sql new file mode 100644 index 00000000..674610bd --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_condition.sql @@ -0,0 +1,182 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__condition_codable_concepts_display AS ( + WITH + + system_code_0 AS ( + SELECT DISTINCT + s.id AS id, + '0' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + condition AS s, + UNNEST(s.code.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'http://snomed.info/sct' + ), --noqa: LT07 + + system_code_1 AS ( + SELECT DISTINCT + s.id AS id, + '1' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + condition AS s, + UNNEST(s.code.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-10-cm' + ), --noqa: LT07 + + system_code_2 AS ( + SELECT DISTINCT + s.id AS id, + '2' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + condition AS s, + UNNEST(s.code.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-9-cm' + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + priority, + code_system, + code, + display + FROM system_code_0 + UNION + SELECT + id, + priority, + code_system, + code, + display + FROM system_code_1 + UNION + SELECT + id, + priority, + code_system, + code, + display + FROM system_code_2 + ), + + partitioned_table AS ( + SELECT + id, + code, + code_system, + display, + priority, + ROW_NUMBER() + OVER ( + PARTITION BY id + ORDER BY priority ASC + ) AS available_priority + FROM union_table + GROUP BY id, priority, code_system, code, display + ORDER BY priority ASC + ) + + SELECT + id, + code, + code_system, + display + FROM partitioned_table + WHERE available_priority = 1 +); + + +-- ########################################################### + +CREATE TABLE core__condition_codable_concepts_all AS ( + WITH + + system_code_0 AS ( + SELECT DISTINCT + s.id AS id, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + condition AS s, + UNNEST(s.code.coding) AS u (codeable_concept) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + code_system, + code, + display + FROM system_code_0 + ) + SELECT + id, + code, + code_system, + display + FROM union_table +); + + +-- ########################################################### + + + +CREATE TABLE core__condition AS +WITH temp_condition AS ( + SELECT + c.id, + c.category, + NULL AS verificationStatus, + NULL AS clinicalStatus, + c.subject.reference AS subject_ref, + c.encounter.reference AS encounter_ref, + cca.code, + cca.code_system, + cca.display, + date(from_iso8601_timestamp(c.recordeddate)) AS recordeddate, + date_trunc('week', date(from_iso8601_timestamp(c.recordeddate))) + AS recordeddate_week, + date_trunc('month', date(from_iso8601_timestamp(c.recordeddate))) + AS recordeddate_month, + date_trunc('year', date(from_iso8601_timestamp(c.recordeddate))) + AS recordeddate_year + FROM condition AS c + LEFT JOIN core__condition_codable_concepts_all AS cca ON c.id = cca.id +) + +SELECT + t_category_coding.category_row.code AS category_code, + t_category_coding.category_row.display AS category_display, + tc.code, + tc.code_system, + tc.display AS code_display, + tc.subject_ref, + tc.encounter_ref, + tc.id AS condition_id, + concat('Condition/', tc.id) AS condition_ref, + tc.recordeddate, + tc.recordeddate_week AS recorded_week, + tc.recordeddate_month AS recorded_month, + tc.recordeddate_year AS recorded_year +FROM temp_condition AS tc, + unnest(category) AS t_category (category_coding), + unnest(category_coding.coding) AS t_category_coding (category_row) + +WHERE tc.recordeddate BETWEEN date('2016-01-01') AND current_date; diff --git a/cumulus_library/studies/core/reference_sql/builder_documentreference.sql b/cumulus_library/studies/core/reference_sql/builder_documentreference.sql new file mode 100644 index 00000000..801453d9 --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_documentreference.sql @@ -0,0 +1,82 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__documentreference_dn_type AS ( + WITH + + system_type_0 AS ( + SELECT DISTINCT + s.id AS id, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + documentreference AS s, + UNNEST(s.type.coding) AS u (codeable_concept) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + code_system, + code, + display + FROM system_type_0 + ) + SELECT + id, + code, + code_system, + display + FROM union_table +); + + +-- ########################################################### + + +-- noqa: PRS +CREATE TABLE core__documentreference AS +WITH temp_documentreference AS ( + SELECT DISTINCT + dr.id, + dr.type, + dr.status, + dr.docstatus, + dr.context, + dr.subject.reference AS subject_ref, + dr.context.period.start AS author_date, + date_trunc('day', date(from_iso8601_timestamp(dr.context.period.start))) + AS author_day, + date_trunc('week', date(from_iso8601_timestamp(dr.context.period.start))) + AS author_week, + date_trunc('month', date(from_iso8601_timestamp(dr.context.period.start))) + AS author_month, + date_trunc('year', date(from_iso8601_timestamp(dr.context.period.start))) + AS author_year, + cdrt.code, + cdrt.code_system, + cdrt.display + FROM documentreference AS dr + LEFT JOIN core__documentreference_dn_type AS cdrt ON dr.id = cdrt.id +) + +SELECT DISTINCT + tdr.id, + tdr.code AS doc_type_code, + tdr.code_system AS doc_type_code_system, + tdr.display AS doc_type_display, + tdr.status, + tdr.docstatus, + context_encounter.encounter.reference AS encounter_ref, + tdr.author_day AS author_date, + tdr.author_week, + tdr.author_month, + tdr.author_year, + tdr.subject_ref, + concat('DocumentReference/', tdr.id) AS doc_ref +FROM temp_documentreference AS tdr, + unnest(context.encounter) AS context_encounter (encounter) --noqa +WHERE date(tdr.author_day) BETWEEN date('2016-06-01') AND current_date; diff --git a/cumulus_library/studies/core/reference_sql/builder_encounter.sql b/cumulus_library/studies/core/reference_sql/builder_encounter.sql new file mode 100644 index 00000000..6488f67e --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_encounter.sql @@ -0,0 +1,316 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__encounter_dn_type AS ( + WITH + + system_type_0 AS ( + SELECT DISTINCT + s.id AS id, + '0' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + encounter AS s, + UNNEST(s.type) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'http://terminology.hl7.org/CodeSystem/encounter-type' + ), --noqa: LT07 + + system_type_1 AS ( + SELECT DISTINCT + s.id AS id, + '1' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + encounter AS s, + UNNEST(s.type) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'http://terminology.hl7.org/CodeSystem/v2-0004' + ), --noqa: LT07 + + system_type_2 AS ( + SELECT DISTINCT + s.id AS id, + '2' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + encounter AS s, + UNNEST(s.type) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'urn:oid:2.16.840.1.113883.4.642.3.248' + ), --noqa: LT07 + + system_type_3 AS ( + SELECT DISTINCT + s.id AS id, + '3' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + encounter AS s, + UNNEST(s.type) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'http://snomed.info/sct' + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + priority, + code_system, + code, + display + FROM system_type_0 + UNION + SELECT + id, + priority, + code_system, + code, + display + FROM system_type_1 + UNION + SELECT + id, + priority, + code_system, + code, + display + FROM system_type_2 + UNION + SELECT + id, + priority, + code_system, + code, + display + FROM system_type_3 + ), + + partitioned_table AS ( + SELECT + id, + code, + code_system, + display, + priority, + ROW_NUMBER() + OVER ( + PARTITION BY id + ORDER BY priority ASC + ) AS available_priority + FROM union_table + GROUP BY id, priority, code_system, code, display + ORDER BY priority ASC + ) + + SELECT + id, + code, + code_system, + display + FROM partitioned_table + WHERE available_priority = 1 +); + + +-- ########################################################### + +CREATE TABLE IF NOT EXISTS "main"."core__encounter_dn_servicetype" +AS ( + SELECT * FROM ( + VALUES + (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) + ) + AS t ("id","code","code_system","display") +); + +-- ########################################################### + +CREATE TABLE IF NOT EXISTS "main"."core__encounter_dn_priority" +AS ( + SELECT * FROM ( + VALUES + (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) + ) + AS t ("id","code","code_system","display") +); + +-- ########################################################### + +CREATE TABLE core__encounter_dn_reasoncode AS ( + WITH + + system_reasoncode_0 AS ( + SELECT DISTINCT + s.id AS id, + '0' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + encounter AS s, + UNNEST(s.reasoncode) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'http://terminology.hl7.org/CodeSystem/v3-ActPriority' + ), --noqa: LT07 + + system_reasoncode_1 AS ( + SELECT DISTINCT + s.id AS id, + '1' AS priority, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + encounter AS s, + UNNEST(s.reasoncode) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + WHERE + u.codeable_concept.system = 'http://snomed.info/sct' + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + priority, + code_system, + code, + display + FROM system_reasoncode_0 + UNION + SELECT + id, + priority, + code_system, + code, + display + FROM system_reasoncode_1 + ), + + partitioned_table AS ( + SELECT + id, + code, + code_system, + display, + priority, + ROW_NUMBER() + OVER ( + PARTITION BY id + ORDER BY priority ASC + ) AS available_priority + FROM union_table + GROUP BY id, priority, code_system, code, display + ORDER BY priority ASC + ) + + SELECT + id, + code, + code_system, + display + FROM partitioned_table + WHERE available_priority = 1 +); + + +-- ########################################################### + +CREATE TABLE core__encounter AS +WITH temp_encounter_nullable AS ( + SELECT DISTINCT + e.id, + e.status, + e.class, + e.subject.reference AS subject_ref, + e.period, + date(from_iso8601_timestamp(e.period.start)) AS period_start, + cast(NULL AS date) AS period_end_day + , + date_trunc('day', date(from_iso8601_timestamp(e.period.start))) + AS period_start_day, + date_trunc('week', date(from_iso8601_timestamp(e.period.start))) + AS period_start_week, + date_trunc('month', date(from_iso8601_timestamp(e.period.start))) + AS period_start_month, + date_trunc('year', date(from_iso8601_timestamp(e.period.start))) + AS period_start_year + FROM encounter AS e +), + +temp_encounter AS ( + SELECT DISTINCT + e.id, + e.status, + e.class, + e.subject_ref, + e.period_start, + e.period_start_day, + e.period_end_day, + e.period_start_week, + e.period_start_month, + e.period_start_year, + edt.code AS type_code, + edt.code_system AS type_code_system, + eds.code AS sevicetype_code, + eds.code_system AS sevicetype_code_system, + edp.code AS priority_code, + edp.code_system AS priority_code_system, + edr.code AS reasoncode_code, + edr.code_system AS reasoncode_code_system + + FROM temp_encounter_nullable AS e + LEFT JOIN core__encounter_dn_priority AS edt ON e.id = edt.id + LEFT JOIN core__encounter_dn_servicetype AS eds ON e.id = eds.id + LEFT JOIN core__encounter_dn_priority AS edp ON e.id = edp.id + LEFT JOIN core__encounter_dn_reasoncode AS edr ON e.id = edr.id +) + +SELECT DISTINCT + e.class AS enc_class, + ac.code AS enc_class_code, + ac.display AS enc_class_display, + e.status, + e.type_code, + e.type_code_system, + e.sevicetype_code, + e.sevicetype_code_system, + e.priority_code, + e.priority_code_system, + e.reasoncode_code, + e.reasoncode_code_system, + date_diff('year', date(p.birthdate), e.period_start_day) AS age_at_visit, + e.period_start_day AS start_date, + coalesce(e.period_end_day, e.period_start_day) AS end_date, + e.period_start_week AS start_week, + e.period_start_month AS start_month, + e.period_start_year AS start_year, + e.subject_ref, + concat('Encounter/', e.id) AS encounter_ref, + e.id AS encounter_id, + p.gender, + p.race_display, + p.ethnicity_display, + p.postalcode3 +FROM temp_encounter AS e +LEFT JOIN core__fhir_mapping_expected_act_encounter_code_v3 AS eac + ON e.class.code = eac.found +LEFT JOIN core__fhir_act_encounter_code_v3 AS ac ON eac.expected = ac.code +INNER JOIN core__patient AS p ON e.subject_ref = p.subject_ref +WHERE + e.period_start BETWEEN date('2016-06-01') AND current_date; diff --git a/cumulus_library/studies/core/reference_sql/builder_medication.sql b/cumulus_library/studies/core/reference_sql/builder_medication.sql new file mode 100644 index 00000000..f6130bdb --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_medication.sql @@ -0,0 +1,46 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__medication AS ( + WITH + + mcc_nonnull AS ( + SELECT + id, + encounter, + subject, + medicationcodeableconcept + FROM medicationrequest + WHERE medicationcodeableconcept IS NOT NULL + AND encounter IS NOT NULL + ), + inline_medication AS ( + SELECT + id, + subject.reference as patient_ref, + encounter.reference AS encounter_ref, + t.r.code, + t.r.display, + t.r.system AS code_system, + + false AS userselected + + FROM mcc_nonnull, + unnest(medicationcodeableconcept.coding) AS t(r) + ) + + + + SELECT + id, + encounter_ref, + patient_ref, + code, + display, + code_system, + userselected + FROM + inline_medication +); diff --git a/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql b/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql new file mode 100644 index 00000000..bb9cfd8b --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_medicationrequest.sql @@ -0,0 +1,76 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__medicationrequest_dn_category AS ( + WITH + + system_category_0 AS ( + SELECT DISTINCT + s.id AS id, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + medicationrequest AS s, + UNNEST(s.category) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + code_system, + code, + display + FROM system_category_0 + ) + SELECT + id, + code, + code_system, + display + FROM union_table +); + + +-- ########################################################### + + + +CREATE TABLE core__medicationrequest AS +WITH temp_mr AS ( + SELECT + mr.id, + mr.status, + mr.intent, + date(from_iso8601_timestamp(mr.authoredon)) AS authoredon, + date_trunc('month', date(from_iso8601_timestamp(mr.authoredon))) + AS authoredon_month, + NULL AS display, + mr.subject.reference AS subject_ref, + cm.code AS rx_code, + cm.code_system AS rx_code_system, + cm.display AS rx_display, + mrc.code AS category_code, + mrc.code_system AS category_code_system + FROM medicationrequest AS mr + INNER JOIN core__medication AS cm ON mr.id = cm.id + LEFT JOIN core__medicationrequest_dn_category AS mrc ON mr.id = mrc.id + WHERE cm.code_system = 'http://www.nlm.nih.gov/research/umls/rxnorm' +) + +SELECT + id, + status, + intent, + authoredon, + authoredon_month, + category_code, + category_code_system, + rx_code_system, + rx_code, + rx_display, + subject_ref +FROM temp_mr diff --git a/cumulus_library/studies/core/reference_sql/builder_observation.sql b/cumulus_library/studies/core/reference_sql/builder_observation.sql new file mode 100644 index 00000000..49d4d4a5 --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_observation.sql @@ -0,0 +1,181 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__observation_dn_category AS ( + WITH + + system_category_0 AS ( + SELECT DISTINCT + s.id AS id, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + observation AS s, + UNNEST(s.category) AS cc (cc_row), + UNNEST(cc.cc_row.coding) AS u (codeable_concept) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + code_system, + code, + display + FROM system_category_0 + ) + SELECT + id, + code, + code_system, + display + FROM union_table +); + + +-- ########################################################### + +CREATE TABLE core__observation_dn_code AS ( + WITH + + system_code_0 AS ( + SELECT DISTINCT + s.id AS id, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + observation AS s, + UNNEST(s.code.coding) AS u (codeable_concept) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + code_system, + code, + display + FROM system_code_0 + ) + SELECT + id, + code, + code_system, + display + FROM union_table +); + + +-- ########################################################### + +CREATE TABLE IF NOT EXISTS "main"."core__observation_dn_interpretation" +AS ( + SELECT * FROM ( + VALUES + (cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar)) + ) + AS t ("id","code","code_system","display") +); + +-- ########################################################### + +CREATE TABLE core__observation_dn_valuecodeableconcept AS ( + WITH + + system_valuecodeableconcept_0 AS ( + SELECT DISTINCT + s.id AS id, + u.codeable_concept.code AS code, + u.codeable_concept.display AS display, + u.codeable_concept.system AS code_system + FROM + observation AS s, + UNNEST(s.valuecodeableconcept.coding) AS u (codeable_concept) + ), --noqa: LT07 + + union_table AS ( + SELECT + id, + code_system, + code, + display + FROM system_valuecodeableconcept_0 + ) + SELECT + id, + code, + code_system, + display + FROM union_table +); + + +-- ########################################################### + + + +CREATE TABLE core__observation AS +WITH temp_observation AS ( + SELECT + o.id, + o.status, + date_trunc('day', date(from_iso8601_timestamp(o.effectivedatetime))) + AS effectivedatetime_day, + date_trunc('week', date(from_iso8601_timestamp(o.effectivedatetime))) + AS effectivedatetime_week, + date_trunc('month', date(from_iso8601_timestamp(o.effectivedatetime))) + AS effectivedatetime_month, + date_trunc('year', date(from_iso8601_timestamp(o.effectivedatetime))) + AS effectivedatetime_year, + odc.code AS observation_code, + odc.code_system AS observation_code_system, + odcat.code AS category_code, + odcat.code_system AS category_code_system, + odi.code AS interpretation_code, + odi.code_system AS interpretation_code_system, + odi.display AS interpretation_display, + odvcc.code AS valuecodeableconcept_code, + odvcc.code_system AS valuecodeableconcept_code_system, + odvcc.display AS valuecodeableconcept_display, + o.component, + o.referencerange, + o.valuequantity, + o.subject.reference AS subject_ref, + o.encounter.reference AS encounter_ref + FROM observation AS o + LEFT JOIN core__observation_dn_category AS odcat ON o.id = odcat.id + LEFT JOIN core__observation_dn_code AS odc ON o.id = odc.id + LEFT JOIN core__observation_dn_interpretation AS odi ON o.id = odi.id + LEFT JOIN core__observation_dn_valuecodeableconcept AS odvcc ON o.id = odvcc.id +) + +SELECT + id, + category_code, + category_code_system, + component, + status, + observation_code, + observation_code_system, + interpretation_code, + interpretation_code_system, + interpretation_display, + referencerange, + valuequantity, + valuecodeableconcept_code, + valuecodeableconcept_code_system, + valuecodeableconcept_display, + effectivedatetime_day AS obs_date, + effectivedatetime_week AS obs_week, + effectivedatetime_month AS obs_month, + effectivedatetime_year AS obs_year, + subject_ref, + encounter_ref, + concat('Observation/', id) AS observation_ref +FROM temp_observation +WHERE + effectivedatetime_day BETWEEN date( + from_iso8601_timestamp('2016-06-01') + ) AND current_date; diff --git a/cumulus_library/studies/core/reference_sql/builder_patient.sql b/cumulus_library/studies/core/reference_sql/builder_patient.sql new file mode 100644 index 00000000..879b1203 --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_patient.sql @@ -0,0 +1,285 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__patient_ext_race AS ( + WITH + + system_ombCategory AS ( + SELECT DISTINCT + s.id AS id, + '0' AS priority, + 'ombCategory' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS race_code, + ext_child.ext.valuecoding.display AS race_display + FROM + patient AS s, + UNNEST(extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race' + AND ext_child.ext.url = 'ombCategory' + AND ext_child.ext.valuecoding.display != '' + ), + + system_detailed AS ( + SELECT DISTINCT + s.id AS id, + '1' AS priority, + 'detailed' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS race_code, + ext_child.ext.valuecoding.display AS race_display + FROM + patient AS s, + UNNEST(extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race' + AND ext_child.ext.url = 'detailed' + AND ext_child.ext.valuecoding.display != '' + ), + + system_text AS ( + SELECT DISTINCT + s.id AS id, + '2' AS priority, + 'text' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS race_code, + ext_child.ext.valuecoding.display AS race_display + FROM + patient AS s, + UNNEST(extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race' + AND ext_child.ext.url = 'text' + AND ext_child.ext.valuecoding.display != '' + ), + + union_table AS ( + SELECT + id, + priority, + system, + race_code, + race_display + FROM system_ombCategory + UNION + SELECT + id, + priority, + system, + race_code, + race_display + FROM system_detailed + UNION + SELECT + id, + priority, + system, + race_code, + race_display + FROM system_text + ORDER BY id, priority + ) + + SELECT + id, + system, + race_code, + race_display + FROM ( + SELECT + id, + system, + LOWER( + ARRAY_JOIN( + ARRAY_SORT( + ARRAY_AGG( + race_code + ) + ), '; ' + ) + ) + AS race_code, + LOWER( + ARRAY_JOIN( + ARRAY_SORT( + ARRAY_AGG( + race_display + ) + ), '; ' + ) + ) AS race_display, + ROW_NUMBER() + OVER ( + PARTITION BY id, system + ORDER BY priority ASC + ) AS available_priority + FROM union_table + GROUP BY id, system, priority + ) + WHERE available_priority = 1 +); + +-- ########################################################### + +CREATE TABLE core__patient_ext_ethnicity AS ( + WITH + + system_ombCategory AS ( + SELECT DISTINCT + s.id AS id, + '0' AS priority, + 'ombCategory' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS ethnicity_code, + ext_child.ext.valuecoding.display AS ethnicity_display + FROM + patient AS s, + UNNEST(extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity' + AND ext_child.ext.url = 'ombCategory' + AND ext_child.ext.valuecoding.display != '' + ), + + system_detailed AS ( + SELECT DISTINCT + s.id AS id, + '1' AS priority, + 'detailed' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS ethnicity_code, + ext_child.ext.valuecoding.display AS ethnicity_display + FROM + patient AS s, + UNNEST(extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity' + AND ext_child.ext.url = 'detailed' + AND ext_child.ext.valuecoding.display != '' + ), + + system_text AS ( + SELECT DISTINCT + s.id AS id, + '2' AS priority, + 'text' AS system, -- noqa: RF04 + ext_child.ext.valuecoding.code AS ethnicity_code, + ext_child.ext.valuecoding.display AS ethnicity_display + FROM + patient AS s, + UNNEST(extension) AS ext_parent (ext), + UNNEST(ext_parent.ext.extension) AS ext_child (ext) + WHERE + ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity' + AND ext_child.ext.url = 'text' + AND ext_child.ext.valuecoding.display != '' + ), + + union_table AS ( + SELECT + id, + priority, + system, + ethnicity_code, + ethnicity_display + FROM system_ombCategory + UNION + SELECT + id, + priority, + system, + ethnicity_code, + ethnicity_display + FROM system_detailed + UNION + SELECT + id, + priority, + system, + ethnicity_code, + ethnicity_display + FROM system_text + ORDER BY id, priority + ) + + SELECT + id, + system, + ethnicity_code, + ethnicity_display + FROM ( + SELECT + id, + system, + LOWER( + ARRAY_JOIN( + ARRAY_SORT( + ARRAY_AGG( + ethnicity_code + ) + ), '; ' + ) + ) + AS ethnicity_code, + LOWER( + ARRAY_JOIN( + ARRAY_SORT( + ARRAY_AGG( + ethnicity_display + ) + ), '; ' + ) + ) AS ethnicity_display, + ROW_NUMBER() + OVER ( + PARTITION BY id, system + ORDER BY priority ASC + ) AS available_priority + FROM union_table + GROUP BY id, system, priority + ) + WHERE available_priority = 1 +); + +-- ########################################################### + + +CREATE TABLE core__patient AS +WITH temp_patient AS ( + SELECT DISTINCT + p.id, + p.gender, + p.address, + date(from_iso8601_timestamp(p.birthdate)) AS birthdate, + er.race_display, + ee.ethnicity_display + FROM + patient AS p + LEFT JOIN core__patient_ext_race AS er ON p.id = er.id + LEFT JOIN core__patient_ext_ethnicity AS ee ON p.id = ee.id +) + +SELECT DISTINCT + tp.gender, + tp.birthdate, + CASE + WHEN + t_address.addr_row.postalcode IS NOT NULL + THEN substr(t_address.addr_row.postalcode, 1, 3) + ELSE 'None' + END AS postalcode3, + tp.id AS subject_id, + concat('Patient/', tp.id) AS subject_ref, + coalesce(tp.race_display, 'unknown') AS race_display, + coalesce(tp.ethnicity_display, 'unknown') AS ethnicity_display +FROM + temp_patient AS tp, + unnest(tp.address) AS t_address (addr_row) + +WHERE + tp.birthdate IS NOT NULL + AND tp.gender IS NOT NULL; diff --git a/cumulus_library/studies/core/reference_sql/builder_prereq_tables.sql b/cumulus_library/studies/core/reference_sql/builder_prereq_tables.sql new file mode 100644 index 00000000..c466985e --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/builder_prereq_tables.sql @@ -0,0 +1,277 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__meta_version AS +SELECT 3 AS data_package_version; + +-- ########################################################### + +-- Emergency Department Notes +-- +CREATE TABLE core__ed_note AS +SELECT + t.from_system, + t.from_code, + t.analyte, + t.code_system, + t.code, + t.display +FROM + ( + VALUES + ( + 'BCH', + 'NOTE:149798455', + 'Emergency MD', + 'http://loinc.org', + '34878-9', + 'Emergency medicine Note' + ), + ( + 'BCH', + 'NOTE:159552404', + 'ED Note Scanned', + 'http://loinc.org', + '34878-9', + 'Emergency medicine Note' + ), + ( + 'BCH', + 'NOTE:3807712', + 'ED Note', + 'http://loinc.org', + '34878-9', + 'Emergency medicine Note' + ), + ( + 'BCH', + 'NOTE:189094644', + 'Emergency Dept Scanned Forms', + 'http://loinc.org', + '34878-9', + 'Emergency medicine Note' + ), + ( + 'BCH', + 'NOTE:189094576', + 'ED Scanned', + 'http://loinc.org', + '34878-9', + 'Emergency medicine Note' + ), + ( + 'BCH', + 'NOTE:3710480', + 'ED Consultation', + 'http://loinc.org', + '51846-4', + 'Emergency department Consult note' + ) + ) + AS t (from_system, from_code, analyte, code_system, code, display); + +-- ########################################################### + +-- http://hl7.org/fhir/r4/v3/ActEncounterCode/vs.html +CREATE TABLE core__fhir_act_encounter_code_v3 AS +SELECT + t.code, + t.display +FROM + ( + VALUES + ( + 'AMB', + 'ambulatory' + ), + ( + 'EMER', + 'emergency' + ), + ( + 'FLD', + 'field' + ), + ( + 'HH', + 'home health' + ), + ( + 'IMP', + 'inpatient encounter' + ), + ( + 'ACUTE', + 'inpatient acute' + ), + ( + 'NONAC', + 'inpatient non-acute' + ), + ( + 'OBSENC', + 'observation encounter' + ), + ( + 'PRENC', + 'pre-admission' + ), + ( + 'SS', + 'short stay' + ), + ( + 'VR', + 'virtual' + ) + ) + AS t (code, display); + +-- ########################################################### + +-- ############################################################ +-- FHIR mapping of code systems to URIs +-- This includes both the expected URI, as well as several found URIs from +-- various source systems + +CREATE TABLE core__fhir_mapping_code_system_uri AS SELECT * FROM + ( + VALUES + ('ICD10', 'http://hl7.org/fhir/sid/icd-10-cm'), + ('ICD10', '2.16.840.1.113883.6.90'), + ('ICD10', 'ICD10'), + ('ICD10', 'ICD-10'), + ('ICD10', 'ICD-10-CM'), + ('ICD10', 'ICD10-CM'), + + ('ICD9', 'http://hl7.org/fhir/sid/icd-9-cm'), + ('ICD9', '2.16.840.1.113883.6.103'), + ('ICD9', 'ICD9'), + ('ICD9', 'ICD-9'), + ('ICD9', 'ICD-9-CM'), + ('ICD9', 'ICD9-CM'), + + ('SNOMED', 'http://snomed.info/sct'), + ('SNOMED', '2.16.840.1.113883.6.96'), + ('SNOMED', 'SNOMEDCT'), + ('SNOMED', 'SNOMEDCT_US'), + ('SNOMED', 'SNOMED'), + + ('LOINC', 'http://loinc.org'), + ('LOINC', '2.16.840.1.113883.6.1'), + ('LOINC', 'LOINC'), + ('LOINC', 'LNC'), + + ('RXNORM', 'http://www.nlm.nih.gov/research/umls/'), + ('RXNORM', '2.16.840.1.113883.6.88'), + ('RXNORM', 'RXNORM'), + + ('UMLS', 'http://www.nlm.nih.gov/research/umls/'), + ('UMLS', 'UMLS'), + + ('CPT', 'http://www.ama-assn.org/go/cpt'), + ('CPT', 'CPT') + ) AS t (code_system, uri); + +-- ########################################################### + +-- ############################################################ +-- FHIR mapping of Resource names to expected URIs + +CREATE TABLE core__fhir_mapping_resource_uri AS +SELECT * FROM + ( + VALUES + ( + 'Patient', + 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient' + ), + ('Gender', 'http://hl7.org/fhir/ValueSet/administrative-gender'), + ( + 'Race', + 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race' + ), + ( + 'Ethnicity', + 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity' + ), + ( + 'PostalCode', + 'http://hl7.org/fhir/datatypes-definitions.html#Address.postalCode' + ), + ( + 'PatientClass', + 'http://terminology.hl7.org/CodeSystem/v2-0004' + ), + ( + 'Encounter', + 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-encounter' + ), + ('EncounterStatus', 'http://hl7.org/fhir/ValueSet/encounter-status'), + ('EncounterType', 'http://hl7.org/fhir/ValueSet/encounter-type'), + ('EncounterReason', 'http://hl7.org/fhir/ValueSet/encounter-reason'), + ('EncounterCode', 'http://terminology.hl7.org/ValueSet/v3-ActEncounterCode'), + ('EncounterPriority', 'http://terminology.hl7.org/CodeSystem/v3-ActPriority'), + ( + 'EncounterLocationStatus', + 'http://hl7.org/fhir/ValueSet/encounter-location-status' + ), + ('Period', 'http://hl7.org/fhir/datatypes.html#Period'), + ('Coding', 'http://hl7.org/fhir/datatypes.html#Coding'), + ( + 'DocumentReference', + 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-documentreference' + ), + ('DocumentType', 'http://hl7.org/fhir/ValueSet/c80-doc-typecodes'), + ('Condition', 'http://hl7.org/fhir/condition-definitions.html'), + ('ConditionCode', 'http://hl7.org/fhir/ValueSet/condition-code'), + ( + 'ConditionCategory', + 'http://hl7.org/fhir/ValueSet/condition-category' + ), + ( + 'ObservationLab', + 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-lab' + ), + ('ObservationCode', 'http://hl7.org/fhir/ValueSet/observation-codes'), + ( + 'ObservationCategory', + 'http://hl7.org/fhir/ValueSet/observation-category' + ), + ( + 'ObservationInterpretation', + 'http://hl7.org/fhir/ValueSet/observation-interpretation' + ), + ( + 'ObservationValue', + 'http://hl7.org/fhir/observation-definitions.html#Observation.value_x_' + ), + ('VitalSign', 'http://hl7.org/fhir/observation-vitalsigns.html') + ) AS t (resource, uri); + +-- ########################################################### + +-- ############################################################ +-- FHIR mapping of as found Encounter codes to the expected encounter code from +-- http://hl7.org/fhir/STU3/v3/ActEncounterCode/vs.html + +CREATE TABLE core__fhir_mapping_expected_act_encounter_code_v3 AS +SELECT * FROM + ( + VALUES + ('AMB', 'AMB'), + ('AMB', 'R'), + ('AMB', 'O'), + ('EMER', 'EMER'), + ('EMER', 'E'), + ('FLD', 'FLD'), + ('HH', 'HH'), + ('IMP', 'IMP'), + ('ACUTE', 'ACUTE'), + ('NONAC', 'NONAC'), + ('PRENC', 'PRENC'), + ('SS', 'SS'), + ('VR', 'VR') + ) AS t (expected, found); diff --git a/cumulus_library/studies/core/reference_sql/count_core.sql b/cumulus_library/studies/core/reference_sql/count_core.sql new file mode 100644 index 00000000..eb0c32db --- /dev/null +++ b/cumulus_library/studies/core/reference_sql/count_core.sql @@ -0,0 +1,756 @@ +-- noqa: disable=all +-- This sql was autogenerated as a reference example using the library CLI. +-- Its format is tied to the specific database it was run against, and it may not +-- be correct for all databases. Use the CLI's build option to derive the best SQL +-- for your dataset. +CREATE TABLE core__count_condition_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + cccd.display AS cond_code_display, + --noqa: disable=RF03, AL02 + s."category_code" AS cond_category_code, + s."recorded_month" AS cond_month + --noqa: enable=RF03, AL02 + FROM core__condition AS s + LEFT JOIN core__condition_codable_concepts_display AS cccd + ON s.condition_id = cccd.id + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(cond_code_display AS varchar), + 'cumulus__missing-or-null' + ) AS cond_code_display, + coalesce( + cast(cond_category_code AS varchar), + 'cumulus__missing-or-null' + ) AS cond_category_code, + coalesce( + cast(cond_month AS varchar), + 'cumulus__missing-or-null' + ) AS cond_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "cond_category_code", + "cond_month", + cond_code_display + FROM null_replacement + GROUP BY + cube( + "cond_category_code", + "cond_month", + cond_code_display + ) + ) + + SELECT + cnt_encounter_ref AS cnt, + "cond_category_code", + "cond_month", + cond_code_display + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_documentreference_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + s.doc_ref, + e.enc_class_display, + --noqa: disable=RF03, AL02 + s."doc_type_display", + s."author_month" + --noqa: enable=RF03, AL02 + FROM core__documentreference AS s + INNER JOIN core__encounter AS e + ON s.encounter_ref = e.encounter_ref + WHERE (s.status = 'current') + AND (s.docStatus IS null OR s.docStatus IN ('final', 'amended')) + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + doc_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_class_display, + coalesce( + cast(doc_type_display AS varchar), + 'cumulus__missing-or-null' + ) AS doc_type_display, + coalesce( + cast(author_month AS varchar), + 'cumulus__missing-or-null' + ) AS author_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref,count(DISTINCT doc_ref) AS cnt_doc_ref, + "doc_type_display", + "author_month", + enc_class_display + + FROM null_replacement + GROUP BY + cube( + "doc_type_display", + "author_month", + enc_class_display + + ) + ) + + SELECT + cnt_doc_ref AS cnt, + "doc_type_display", + "author_month", + enc_class_display + + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_encounter_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."start_month", + s."enc_class_display", + s."age_at_visit", + s."gender", + s."race_display", + s."ethnicity_display" + --noqa: enable=RF03, AL02 + FROM core__encounter AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(start_month AS varchar), + 'cumulus__missing-or-null' + ) AS start_month, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_class_display, + coalesce( + cast(age_at_visit AS varchar), + 'cumulus__missing-or-null' + ) AS age_at_visit, + coalesce( + cast(gender AS varchar), + 'cumulus__missing-or-null' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__missing-or-null' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__missing-or-null' + ) AS ethnicity_display + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "start_month", + "enc_class_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display" + FROM null_replacement + GROUP BY + cube( + "start_month", + "enc_class_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display" + ) + ) + + SELECT + cnt_encounter_ref AS cnt, + "start_month", + "enc_class_display", + "age_at_visit", + "gender", + "race_display", + "ethnicity_display" + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_encounter_type AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."enc_class_display", + s."enc_type_display", + s."enc_service_display", + s."enc_priority_display" + --noqa: enable=RF03, AL02 + FROM core__encounter_type AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_type_display, + coalesce( + cast(enc_service_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_service_display, + coalesce( + cast(enc_priority_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_priority_display + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "enc_class_display", + "enc_type_display", + "enc_service_display", + "enc_priority_display" + FROM null_replacement + GROUP BY + cube( + "enc_class_display", + "enc_type_display", + "enc_service_display", + "enc_priority_display" + ) + ) + + SELECT + cnt_encounter_ref AS cnt, + "enc_class_display", + "enc_type_display", + "enc_service_display", + "enc_priority_display" + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_encounter_type_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."enc_class_display", + s."enc_type_display", + s."enc_service_display", + s."enc_priority_display", + s."start_month" + --noqa: enable=RF03, AL02 + FROM core__encounter_type AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_type_display, + coalesce( + cast(enc_service_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_service_display, + coalesce( + cast(enc_priority_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_priority_display, + coalesce( + cast(start_month AS varchar), + 'cumulus__missing-or-null' + ) AS start_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "enc_class_display", + "enc_type_display", + "enc_service_display", + "enc_priority_display", + "start_month" + FROM null_replacement + GROUP BY + cube( + "enc_class_display", + "enc_type_display", + "enc_service_display", + "enc_priority_display", + "start_month" + ) + ) + + SELECT + cnt_encounter_ref AS cnt, + "enc_class_display", + "enc_type_display", + "enc_service_display", + "enc_priority_display", + "start_month" + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_encounter_enc_type_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."enc_class_display", + s."enc_type_display", + s."start_month" + --noqa: enable=RF03, AL02 + FROM core__encounter_type AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_class_display, + coalesce( + cast(enc_type_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_type_display, + coalesce( + cast(start_month AS varchar), + 'cumulus__missing-or-null' + ) AS start_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "enc_class_display", + "enc_type_display", + "start_month" + FROM null_replacement + GROUP BY + cube( + "enc_class_display", + "enc_type_display", + "start_month" + ) + ) + + SELECT + cnt_encounter_ref AS cnt, + "enc_class_display", + "enc_type_display", + "start_month" + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_encounter_service_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."enc_class_display", + s."enc_service_display", + s."start_month" + --noqa: enable=RF03, AL02 + FROM core__encounter_type AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_class_display, + coalesce( + cast(enc_service_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_service_display, + coalesce( + cast(start_month AS varchar), + 'cumulus__missing-or-null' + ) AS start_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "enc_class_display", + "enc_service_display", + "start_month" + FROM null_replacement + GROUP BY + cube( + "enc_class_display", + "enc_service_display", + "start_month" + ) + ) + + SELECT + cnt_encounter_ref AS cnt, + "enc_class_display", + "enc_service_display", + "start_month" + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_encounter_priority_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + --noqa: disable=RF03, AL02 + s."enc_class_display", + s."enc_priority_display", + s."start_month" + --noqa: enable=RF03, AL02 + FROM core__encounter_type AS s + WHERE s.status = 'finished' + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_class_display, + coalesce( + cast(enc_priority_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_priority_display, + coalesce( + cast(start_month AS varchar), + 'cumulus__missing-or-null' + ) AS start_month + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref, + "enc_class_display", + "enc_priority_display", + "start_month" + FROM null_replacement + GROUP BY + cube( + "enc_class_display", + "enc_priority_display", + "start_month" + ) + ) + + SELECT + cnt_encounter_ref AS cnt, + "enc_class_display", + "enc_priority_display", + "start_month" + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_medicationrequest_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."status", + s."intent", + s."authoredon_month", + s."rx_display" + --noqa: enable=RF03, AL02 + FROM core__medicationrequest AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(status AS varchar), + 'cumulus__missing-or-null' + ) AS status, + coalesce( + cast(intent AS varchar), + 'cumulus__missing-or-null' + ) AS intent, + coalesce( + cast(authoredon_month AS varchar), + 'cumulus__missing-or-null' + ) AS authoredon_month, + coalesce( + cast(rx_display AS varchar), + 'cumulus__missing-or-null' + ) AS rx_display + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + "status", + "intent", + "authoredon_month", + "rx_display" + FROM null_replacement + GROUP BY + cube( + "status", + "intent", + "authoredon_month", + "rx_display" + ) + ) + + SELECT + cnt_subject AS cnt, + "status", + "intent", + "authoredon_month", + "rx_display" + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_observation_lab_month AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + s.encounter_ref, + s.observation_ref, + e.enc_class_display, + --noqa: disable=RF03, AL02 + s."lab_month", + s."lab_code", + s."lab_result_display" + --noqa: enable=RF03, AL02 + FROM core__observation_lab AS s + INNER JOIN core__encounter AS e + ON s.encounter_ref = e.encounter_ref + WHERE (s.status = 'final' OR s.status= 'amended') + ), + + null_replacement AS ( + SELECT + subject_ref, + encounter_ref, + observation_ref, + coalesce( + cast(enc_class_display AS varchar), + 'cumulus__missing-or-null' + ) AS enc_class_display, + coalesce( + cast(lab_month AS varchar), + 'cumulus__missing-or-null' + ) AS lab_month, + coalesce( + cast(lab_code AS varchar), + 'cumulus__missing-or-null' + ) AS lab_code, + coalesce( + cast(lab_result_display AS varchar), + 'cumulus__missing-or-null' + ) AS lab_result_display + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + count(DISTINCT encounter_ref) AS cnt_encounter_ref,count(DISTINCT observation_ref) AS cnt_observation_ref, + "lab_month", + "lab_code", + "lab_result_display", + enc_class_display + + FROM null_replacement + GROUP BY + cube( + "lab_month", + "lab_code", + "lab_result_display", + enc_class_display + + ) + ) + + SELECT + cnt_observation_ref AS cnt, + "lab_month", + "lab_code", + "lab_result_display", + enc_class_display + + FROM powerset + WHERE + cnt_subject >= 10 +); + +-- ########################################################### + +CREATE TABLE core__count_patient AS ( + WITH + filtered_table AS ( + SELECT + s.subject_ref, + --noqa: disable=RF03, AL02 + s."gender", + s."race_display", + s."ethnicity_display" + --noqa: enable=RF03, AL02 + FROM core__patient AS s + ), + + null_replacement AS ( + SELECT + subject_ref, + coalesce( + cast(gender AS varchar), + 'cumulus__missing-or-null' + ) AS gender, + coalesce( + cast(race_display AS varchar), + 'cumulus__missing-or-null' + ) AS race_display, + coalesce( + cast(ethnicity_display AS varchar), + 'cumulus__missing-or-null' + ) AS ethnicity_display + FROM filtered_table + ), + + powerset AS ( + SELECT + count(DISTINCT subject_ref) AS cnt_subject, + "gender", + "race_display", + "ethnicity_display" + FROM null_replacement + GROUP BY + cube( + "gender", + "race_display", + "ethnicity_display" + ) + ) + + SELECT + cnt_subject AS cnt, + "gender", + "race_display", + "ethnicity_display" + FROM powerset + WHERE + cnt_subject >= 10 +); diff --git a/cumulus_library/study_parser.py b/cumulus_library/study_parser.py index bbbce81c..46d8a4d2 100644 --- a/cumulus_library/study_parser.py +++ b/cumulus_library/study_parser.py @@ -319,9 +319,11 @@ def _load_and_execute_builder( filename: str, cursor: databases.DatabaseCursor, schema: str, - verbose: bool, + verbose: bool = False, drop_table: bool = False, parser: databases.DatabaseParser = None, + write_reference_sql: bool = False, + doc_str: str = None, ) -> None: """Loads a table builder from a file. @@ -369,9 +371,17 @@ def _load_and_execute_builder( # execute, since the subclass would otherwise hang around. table_builder_class = table_builder_subclasses[0] table_builder = table_builder_class() - table_builder.execute_queries( - cursor, schema, verbose=verbose, drop_table=drop_table, parser=parser - ) + if write_reference_sql: + table_builder.prepare_queries(cursor, schema, parser=parser) + table_builder.comment_queries(doc_str=doc_str) + new_filename = pathlib.Path(f"{filename}").stem + ".sql" + table_builder.write_queries( + path=pathlib.Path(f"{self._study_path}/reference_sql/" + new_filename) + ) + else: + table_builder.execute_queries( + cursor, schema, verbose=verbose, drop_table=drop_table, parser=parser + ) # After running the executor code, we'll remove # it so it doesn't interfere with the next python module to @@ -411,7 +421,9 @@ def run_table_builder( :param verbose: toggle from progress bar to query output """ for file in self.get_table_builder_file_list(): - self._load_and_execute_builder(file, cursor, schema, verbose, parser=parser) + self._load_and_execute_builder( + file, cursor, schema, verbose=verbose, parser=parser + ) def run_counts_builders( self, cursor: databases.DatabaseCursor, schema: str, verbose: bool = False @@ -428,7 +440,7 @@ def run_counts_builders( :param verbose: toggle from progress bar to query output """ for file in self.get_counts_builder_file_list(): - self._load_and_execute_builder(file, cursor, schema, verbose) + self._load_and_execute_builder(file, cursor, schema, verbose=verbose) def run_statistics_builders( self, @@ -503,8 +515,42 @@ def run_single_table_builder( if not name.endswith(".py"): name = f"{name}.py" self._load_and_execute_builder( - name, cursor, schema, verbose, drop_table=True, parser=parser + name, cursor, schema, verbose=verbose, drop_table=True, parser=parser + ) + + def run_generate_sql( + self, + cursor: databases.DatabaseCursor, + schema: str, + parser: databases.DatabaseParser = None, + **kwargs, + ) -> None: + """Generates reference SQL from all BaseTableBuilder-derived classes in the manifest + + :param cursor: A DatabaseCursor object + :param schema: The name of the schema to write tables to + :param verbose: toggle from progress bar to query output + """ + all_generators = ( + self.get_table_builder_file_list() + + self.get_counts_builder_file_list() + + self.get_statistics_file_list() + ) + doc_str = ( + "-- This sql was autogenerated as a reference example using the library CLI.\n" + "-- Its format is tied to the specific database it was run against, and it may not\n" + "-- be correct for all databases. Use the CLI's build option to derive the best SQL\n" + "-- for your dataset." ) + for file in all_generators: + self._load_and_execute_builder( + file, + cursor, + schema, + parser=parser, + write_reference_sql=True, + doc_str=doc_str, + ) def build_study( self, diff --git a/tests/conftest.py b/tests/conftest.py index 72431d3c..14ad4420 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ import pytest import numpy -from cumulus_library.cli import StudyBuilder +from cumulus_library.cli import StudyRunner from cumulus_library.databases import create_db_backend, DatabaseCursor MOCK_DATA_DIR = f"{Path(__file__).parent}/test_data/duckdb_data" @@ -172,7 +172,7 @@ def mock_db(tmp_path): @pytest.fixture def mock_db_core(tmp_path, mock_db): # pylint: disable=redefined-outer-name """Provides a DuckDatabaseBackend with the core study ran for local testing""" - builder = StudyBuilder(mock_db, data_path=f"{tmp_path}/data_path") + builder = StudyRunner(mock_db, data_path=f"{tmp_path}/data_path") builder.clean_and_build_study( f"{Path(__file__).parent.parent}/cumulus_library/studies/core", stats_build=True ) @@ -190,7 +190,7 @@ def mock_db_stats(tmp_path): "load_ndjson_dir": f"{tmp_path}/mock_data", } ) - builder = StudyBuilder(db, data_path=f"{tmp_path}/data_path") + builder = StudyRunner(db, data_path=f"{tmp_path}/data_path") builder.clean_and_build_study( f"{Path(__file__).parent.parent}/cumulus_library/studies/core", stats_build=True ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 9ab73b3f..825298b2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,6 +3,7 @@ import builtins import glob import os +import shutil import sysconfig from contextlib import nullcontext as does_not_raise @@ -146,6 +147,42 @@ def test_count_builder_mapping(mock_path, tmp_path): ] == db.cursor().execute("show tables").fetchall() +@mock.patch.dict( + os.environ, + clear=True, +) +@mock.patch("sysconfig.get_path") +def test_generate_sql(mock_path, tmp_path): + mock_path.return_value = f"{tmp_path}/study_python_valid/" + with does_not_raise(): + shutil.copytree( + f"{Path(__file__).resolve().parents[0]}/test_data/study_python_valid", + f"{tmp_path}/study_python_valid/", + ) + args = duckdb_args( + [ + "generate-sql", + "-t", + "study_python_valid", + "-s", + f"{tmp_path}", + "--database", + "test", + ], + tmp_path, + ) + cli.main(cli_args=args) + files = glob.glob(f"{tmp_path}/study_python_valid/reference_sql/**") + assert len(files) == 2 + assert "module1.sql" in ",".join(files) + for file in files: + if file.endswith("module1.sql"): + with open(file, "r") as f: + query = "\n".join(line.rstrip() for line in f) + assert "This sql was autogenerated" in query + assert "CREATE TABLE IF NOT EXISTS study_python_valid__table" in query + + @mock.patch.dict( os.environ, clear=True, diff --git a/tests/test_core.py b/tests/test_core.py index f1f768a3..1dab6731 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -7,7 +7,7 @@ import pytest import toml -from cumulus_library.cli import StudyBuilder +from cumulus_library.cli import StudyRunner from cumulus_library.studies.core.core_templates import core_templates from tests.conftest import modify_resource_column from tests.conftest import ResourceTableIdPos as idpos # pylint: disable=unused-import @@ -85,7 +85,7 @@ def test_core_count_missing_data(tmp_path, mock_db): cursor = mock_db.cursor() modify_resource_column(cursor, "encounter", "class", null_code_class) - builder = StudyBuilder(mock_db, f"{tmp_path}/data_path/") + builder = StudyRunner(mock_db, f"{tmp_path}/data_path/") builder.clean_and_build_study( f"{Path(__file__).parent.parent}/cumulus_library/studies/core", stats_build=False, diff --git a/tests/test_psm.py b/tests/test_psm.py index 9429be85..dbfc9ac0 100644 --- a/tests/test_psm.py +++ b/tests/test_psm.py @@ -7,7 +7,7 @@ from freezegun import freeze_time -from cumulus_library.cli import StudyBuilder +from cumulus_library.cli import StudyRunner from cumulus_library.statistics.psm import PsmBuilder @@ -62,7 +62,7 @@ def test_psm_create( expected_first_record, expected_last_record, ): - builder = StudyBuilder(mock_db_stats, data_path=Path(tmp_path)) + builder = StudyRunner(mock_db_stats, data_path=Path(tmp_path)) psm = PsmBuilder( f"{Path(__file__).parent}/test_data/psm/{toml_def}", Path(tmp_path) )