From e0c072891053c9dc0ddca75a3dbc0d6af96d5e56 Mon Sep 17 00:00:00 2001 From: matt garber Date: Tue, 19 Nov 2024 09:23:22 -0500 Subject: [PATCH] Converted manifest to single list of files (#317) * Converted manifest to single list of files * coverage, PR feedback --- cumulus_library/actions/builder.py | 158 ++++++++---------- .../builders/protected_table_builder.py | 34 +++- cumulus_library/cli.py | 12 +- cumulus_library/enums.py | 6 + cumulus_library/studies/core/manifest.toml | 12 +- .../studies/discovery/manifest.toml | 2 +- cumulus_library/study_manifest.py | 46 ++++- docs/creating-studies.md | 51 ++---- docs/study-list.md | 2 + docs/targeted-builders.md | 49 ------ docs/workflows.md | 40 +++++ .../propensity-score-matching.md | 0 docs/{builders => workflows}/valueset.md | 0 tests/test_actions.py | 7 +- tests/test_cli.py | 18 ++ tests/test_data/psm/manifest.toml | 6 +- .../manifest.toml | 7 + .../study_invalid_unsupported_file/test.txt | 1 + .../study_python_counts_valid/manifest.toml | 2 +- .../study_python_valid/manifest.toml | 2 +- .../test_data/study_static_file/manifest.toml | 2 +- tests/test_data/study_valid/manifest.toml | 2 +- tests/test_study_parser.py | 2 +- 23 files changed, 231 insertions(+), 230 deletions(-) delete mode 100644 docs/targeted-builders.md create mode 100644 docs/workflows.md rename docs/{builders => workflows}/propensity-score-matching.md (100%) rename docs/{builders => workflows}/valueset.md (100%) create mode 100644 tests/test_data/study_invalid_unsupported_file/manifest.toml create mode 100644 tests/test_data/study_invalid_unsupported_file/test.txt diff --git a/cumulus_library/actions/builder.py b/cumulus_library/actions/builder.py index dc720c99..119463a9 100644 --- a/cumulus_library/actions/builder.py +++ b/cumulus_library/actions/builder.py @@ -135,60 +135,14 @@ def run_protected_table_builder( ) -def run_table_builder( - config: base_utils.StudyConfig, - manifest: study_manifest.StudyManifest, - *, - db_parser: databases.DatabaseParser = None, +def _run_workflow( + config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str ) -> None: - """Loads modules from a manifest and executes code via BaseTableBuilder + """Loads workflow config from toml definitions and executes workflow :param config: a StudyConfig object :param manifest: a StudyManifest object - :keyword db_parser: an object implementing DatabaseParser for the target database """ - for file in manifest.get_table_builder_file_list(): - _load_and_execute_builder( - config=config, - manifest=manifest, - filename=file, - db_parser=db_parser, - ) - - -def run_counts_builders( - config: base_utils.StudyConfig, - manifest: study_manifest.StudyManifest, -) -> None: - """Loads counts modules from a manifest and executes code via BaseTableBuilder - - While a count is a form of statistics, it is treated separately from other - statistics because it is, by design, always going to be static against a - given dataset, where other statistical methods may use sampling techniques - or adjustable input parameters that may need to be preserved for later review. - - :param config: a StudyConfig object - :param manifest: a StudyManifest object - """ - for file in manifest.get_counts_builder_file_list(): - _load_and_execute_builder( - config=config, - manifest=manifest, - filename=file, - ) - - -def run_statistics_builders( - config: base_utils.StudyConfig, - manifest: study_manifest.StudyManifest, -) -> None: - """Loads statistics modules from toml definitions and executes - - :param config: a StudyConfig object - :param manifest: a StudyManifest object - """ - if len(manifest.get_statistics_file_list()) == 0: - return existing_stats = [] if not config.stats_build: existing_stats = ( @@ -199,40 +153,41 @@ def run_statistics_builders( ) .fetchall() ) - for file in manifest.get_statistics_file_list(): - # This open is a bit redundant with the open inside of the PSM builder, - # but we're letting it slide so that builders function similarly - # across the board - safe_timestamp = base_utils.get_tablename_safe_iso_timestamp() - toml_path = pathlib.Path(f"{manifest._study_path}/{file}") - with open(toml_path, "rb") as file: - stats_config = tomllib.load(file) - config_type = stats_config["config_type"] - target_table = stats_config.get("target_table", stats_config.get("table_prefix", "")) - - if (target_table,) in existing_stats and not config.stats_build: - continue - if config_type == "psm": + # This open is a bit redundant with the open inside of the PSM builder, + # but we're letting it slide so that builders function similarly + # across the board + safe_timestamp = base_utils.get_tablename_safe_iso_timestamp() + toml_path = pathlib.Path(f"{manifest._study_path}/{filename}") + with open(toml_path, "rb") as file: + workflow_config = tomllib.load(file) + config_type = workflow_config["config_type"] + target_table = workflow_config.get("target_table", workflow_config.get("table_prefix", "")) + + if (target_table,) in existing_stats and not config.stats_build: + return + match config_type: + case "psm": builder = psm_builder.PsmBuilder( toml_config_path=toml_path, - config=stats_config, + config=workflow_config, data_path=manifest.data_path / f"{manifest.get_study_prefix()}/psm", ) - elif config_type == "valueset": + case "valueset": builder = valueset_builder.ValuesetBuilder( toml_config_path=toml_path, - config=stats_config, + config=workflow_config, data_path=manifest.data_path / f"{manifest.get_study_prefix()}/valueset", ) - else: - raise errors.StudyManifestParsingError( # pragma: no cover - f"{toml_path} references an invalid statistics type {config_type}." + case _: # pragma: no cover + raise errors.StudyManifestParsingError( + f"{toml_path} references an invalid workflow type {config_type}." ) - builder.execute_queries( - config=config, - manifest=manifest, - table_suffix=safe_timestamp, - ) + builder.execute_queries( + config=config, + manifest=manifest, + table_suffix=safe_timestamp, + ) + if config_type in set(item.value for item in enums.StatisticsTypes): log_utils.log_statistics( config=config, manifest=manifest, @@ -242,11 +197,11 @@ def run_statistics_builders( ) -def run_matching_table_builder( +def build_matching_files( config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, *, - builder: str, + builder: str | None, db_parser: databases.DatabaseParser = None, ): """targets all table builders matching a target string for running @@ -256,36 +211,55 @@ def run_matching_table_builder( :keyword builder: filename of a module implementing a TableBuilder :keyword db_parser: an object implementing DatabaseParser for the target database""" all_generators = manifest.get_all_generators() - for file in all_generators: - if builder and file.find(builder) == -1: - continue - _load_and_execute_builder( - config=config, - manifest=manifest, - filename=file, - db_parser=db_parser, - ) + matches = [] + if not builder: # pragma: no cover + matches = all_generators + else: + for file in all_generators: + if file.find(builder) != -1: + matches.append(file) + build_study(config, manifest, db_parser=db_parser, file_list=matches) def build_study( config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, *, + db_parser: databases.DatabaseParser = None, continue_from: str | None = None, + file_list: list | None = None, ) -> list: """Creates tables in the schema by iterating through the sql_config.file_names :param config: a StudyConfig object :param manifest: a StudyManifest object - :keyword continue_from: Name of a sql file to resume table creation from + :keyword continue_from: Name of a file to resume table creation from :returns: loaded queries (for unit testing only) """ + if file_list is None: + file_list = manifest.get_file_list(continue_from) + for file in file_list: + if file.endswith(".py"): + _load_and_execute_builder( + config=config, + manifest=manifest, + filename=file, + db_parser=db_parser, + ) + elif file.endswith(".toml"): + _run_workflow(config=config, manifest=manifest, filename=file) + elif file.endswith(".sql"): + _run_raw_queries(config=config, manifest=manifest, filename=file) + else: + raise errors.StudyManifestParsingError + + +def _run_raw_queries( + config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str +): queries = [] - for file in manifest.get_sql_file_list(continue_from): - for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{file}")): - queries.append([query, file]) - if len(queries) == 0: - return [] + for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{filename}")): + queries.append([query, filename]) for query in queries: query[0] = base_utils.update_query_if_schema_specified(query[0], manifest) query[0] = query[0].replace( @@ -298,7 +272,7 @@ def build_study( # We want to only show a progress bar if we are :not: printing SQL lines with base_utils.get_progress_bar(disable=config.verbose) as progress: task = progress.add_task( - f"Creating {manifest.get_study_prefix()} study in db...", + f"Building tables from {filename}...", total=len(queries), visible=not config.verbose, ) diff --git a/cumulus_library/builders/protected_table_builder.py b/cumulus_library/builders/protected_table_builder.py index 9ca8e3c9..94bda02c 100644 --- a/cumulus_library/builders/protected_table_builder.py +++ b/cumulus_library/builders/protected_table_builder.py @@ -1,5 +1,8 @@ """Builder for creating tables for tracking state/logging changes""" +import pathlib +import tomllib + from cumulus_library import ( BaseTableBuilder, base_utils, @@ -64,12 +67,25 @@ def prepare_queries( TRANSACTION_COLS_TYPES, ) ) - if manifest._study_config.get("statistics_config"): - self.queries.append( - base_templates.get_ctas_empty_query( - db_schema, - statistics, - STATISTICS_COLS, - STATISTICS_COLS_TYPES, - ) - ) + files = manifest.get_all_workflows() + if len(files) == 0: + return + stats_types = set(item.value for item in enums.StatisticsTypes) + # In this loop, we are just checking to see if :any: workflow is a stats + # type workflow - if so, we'll create a table to hold data of stats runs + # (if it doesn't already exist) outside of the study lifecycle for + # persistence reasons + for file in files: + toml_path = pathlib.Path(f"{manifest._study_path}/{file}") + with open(toml_path, "rb") as file: + workflow_config = tomllib.load(file) + if workflow_config["config_type"] in stats_types: + self.queries.append( + base_templates.get_ctas_empty_query( + db_schema, + statistics, + STATISTICS_COLS, + STATISTICS_COLS_TYPES, + ) + ) + return diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py index 22b1471c..f2cd0b71 100755 --- a/cumulus_library/cli.py +++ b/cumulus_library/cli.py @@ -113,7 +113,6 @@ def clean_and_build_study( config=self.get_config(manifest), manifest=manifest, ) - builder.run_table_builder(config=self.get_config(manifest), manifest=manifest) else: log_utils.log_transaction( @@ -127,11 +126,6 @@ def clean_and_build_study( manifest=manifest, continue_from=continue_from, ) - builder.run_counts_builders(config=self.get_config(manifest), manifest=manifest) - builder.run_statistics_builders( - config=self.get_config(manifest), - manifest=manifest, - ) log_utils.log_transaction( config=self.get_config(manifest), manifest=manifest, @@ -150,7 +144,7 @@ def clean_and_build_study( ) raise e - def run_matching_table_builder( + def build_matching_files( self, target: pathlib.Path, table_builder_name: str, @@ -164,7 +158,7 @@ def run_matching_table_builder( :param options: The dictionary of study-specific options """ manifest = study_manifest.StudyManifest(target, options=options) - builder.run_matching_table_builder( + builder.build_matching_files( config=self.get_config(manifest), manifest=manifest, builder=table_builder_name, @@ -330,7 +324,7 @@ def run_cli(args: dict): elif args["action"] == "build": for target in args["target"]: if args["builder"]: - runner.run_matching_table_builder( + runner.build_matching_files( study_dict[target], args["builder"], options=args["options"] ) else: diff --git a/cumulus_library/enums.py b/cumulus_library/enums.py index e3c7e899..b1891acf 100644 --- a/cumulus_library/enums.py +++ b/cumulus_library/enums.py @@ -18,6 +18,12 @@ class ProtectedTables(enum.Enum): TRANSACTIONS = "lib_transactions" +class StatisticsTypes(enum.Enum): + """A subset of workflows that create statistics sampling artifacts""" + + PSM = "psm" + + class LogStatuses(enum.Enum): DEBUG = "debug" ERROR = "error" diff --git a/cumulus_library/studies/core/manifest.toml b/cumulus_library/studies/core/manifest.toml index fc192710..3e478a76 100644 --- a/cumulus_library/studies/core/manifest.toml +++ b/cumulus_library/studies/core/manifest.toml @@ -1,6 +1,6 @@ study_prefix = "core" -[table_builder_config] +[file_config] file_names = [ "builder_prereq_tables.py", "builder_allergyintolerance.py", @@ -9,17 +9,9 @@ file_names = [ "builder_encounter.py", "builder_documentreference.py", "builder_medicationrequest.py", - "builder_observation.py" -] - -[sql_config] -file_names = [ + "builder_observation.py", "observation_type.sql", "meta_date.sql", -] - -[counts_builder_config] -file_names = [ "count_core.py" ] diff --git a/cumulus_library/studies/discovery/manifest.toml b/cumulus_library/studies/discovery/manifest.toml index 354ea4c3..a9b51dc6 100644 --- a/cumulus_library/studies/discovery/manifest.toml +++ b/cumulus_library/studies/discovery/manifest.toml @@ -1,6 +1,6 @@ study_prefix = "discovery" -[table_builder_config] +[file_config] file_names = [ "code_detection.py", ] diff --git a/cumulus_library/study_manifest.py b/cumulus_library/study_manifest.py index 6b9349a1..ce20b1fa 100644 --- a/cumulus_library/study_manifest.py +++ b/cumulus_library/study_manifest.py @@ -91,6 +91,31 @@ def get_dedicated_schema(self) -> str | None: options = self._study_config.get("advanced_options", {}) return options.get("dedicated_schema") + def get_file_list(self, continue_from: str | None = None) -> list[str] | None: + """Reads the contents of the file_config array from the manifest + + :returns: An array of files from the manifest, or None if not found. + """ + config = self._study_config.get("file_config", {}) + files = config.get("file_names", []) or [] + if not files: + files = ( + self.get_table_builder_file_list() + + self.get_sql_file_list() + + self.get_counts_builder_file_list() + + self.get_statistics_file_list() + ) + if continue_from: + for pos, file in enumerate(files): + if continue_from.split(".", 1)[0] == file.split(".", 1)[0]: + files = files[pos:] + break + else: + raise errors.StudyManifestParsingError(f"No files matching '{continue_from}' found") + return files + + # The following four functions are considered deprecated, and can be removed + # after we update studies to use the new methodology def get_sql_file_list(self, continue_from: str | None = None) -> list[str] | None: """Reads the contents of the sql_config array from the manifest @@ -98,7 +123,7 @@ def get_sql_file_list(self, continue_from: str | None = None) -> list[str] | Non """ sql_config = self._study_config.get("sql_config", {}) sql_files = sql_config.get("file_names", []) or [] - if continue_from: + if continue_from: # pragma: no cover for pos, file in enumerate(sql_files): if continue_from.replace(".sql", "") == file.replace(".sql", ""): sql_files = sql_files[pos:] @@ -134,6 +159,8 @@ def get_statistics_file_list(self) -> list[str] | None: stats_config = self._study_config.get("statistics_config", {}) return stats_config.get("file_names", []) + # End of deprecated section + def get_export_table_list(self) -> list[ManifestExport] | None: """Reads the contents of the export_list array from the manifest @@ -177,13 +204,18 @@ def get_export_table_list(self) -> list[ManifestExport] | None: found_name.add(export.name) return export_table_list + def get_all_files(self, file_type: str): + """Convenience method for getting files of a type from a manifest""" + files = self.get_file_list() + return [file for file in files if file.endswith(file_type)] + def get_all_generators(self) -> list[str]: - """Convenience method for getting files that generate sql queries""" - return ( - self.get_table_builder_file_list() - + self.get_counts_builder_file_list() - + self.get_statistics_file_list() - ) + """Convenience method for getting builder-based files""" + return self.get_all_files(".py") + + def get_all_workflows(self) -> list[str]: + """Convenience method for getting workflow config files""" + return self.get_all_files(".toml") def get_prefix_with_seperator(self) -> str: """Convenience method for getting the appropriate prefix for tables""" diff --git a/docs/creating-studies.md b/docs/creating-studies.md index 49579be5..42acb12a 100644 --- a/docs/creating-studies.md +++ b/docs/creating-studies.md @@ -50,34 +50,29 @@ what each section does: # be the same name as the folder the study definition is in. study_prefix = "my_study" -# For most use cases, this should not be required, but if you need to programmatically -# build tables, you can provide a list of files implementing BaseTableBuilder. -# See the core study for examples of this pattern. These run before -# any SQL execution - -# [table_builder_config] -# file_names = [ -# "my_table_builder.py", -# ] # The following section describes all tables that should be generated directly # from SQL files. -[sql_config] +[file_config] -# 'file_names' defines a list of sql files to execute, in order, in this folder. -# Recommended order: Any ancillary config (like a list of condition codes), -# tables/view selecting subsets of data from FHIR data, tables/views creating -# summary statistics. +# 'file_names' defines a list of files to execute, in order, in this folder. +# Three file types are supported: +# - Raw SQL files +# - Python files, which should contain a class that is based off of +# BaseTableBuilder (or a derivative) from builders/base_table_builder.py +# - TOML files, which provide a set of configuration params to a workflow +# These files will be executed in the order provided. file_names = [ "setup.sql", + "builder.py", "lab_observations.sql", "counts.sql", - "date_range.sql" + "date_range.sql", + "stats_config.toml" ] - # The following section defines parameters related to exporting study data from # your athena database @@ -109,30 +104,6 @@ meta_list = [ "my_study__meta_version", ] -# For generating counts table in a more standardized manner, we have a class in the -# main library you can extend that will handle most of the logic of assembling -# queries for you. We use this pattern for generating the core tables, as well -# other studies authored inside BCH. These will always be run after any other -# SQL queries have been generated - -# [counts_builder_config] -# file_names = [ -# "count.py" -# ] - -# For more specialized statistics, we provide a toml-based config entrypoint. The -# details of these configs will vary, depending on which statistical method you're -# invoking. For more details, see the statistics section of the docs for a list of -# supported approaches. -# These will run last, so all the data in your study will exist by the time these -# are invoked. - -# [statistics_config] -# file_names = -# [ -# "psm_config.toml" -# ] - # The following section is for advanced/unusual study use cases # [options] diff --git a/docs/study-list.md b/docs/study-list.md index 63d60884..701f5501 100644 --- a/docs/study-list.md +++ b/docs/study-list.md @@ -21,6 +21,8 @@ for more details and any associated publications. - [Hypertension](https://github.com/smart-on-fhir/cumulus-library-hypertension) - [UMLS vocabulary](https://github.com/smart-on-fhir/cumulus-library-umls) - [Data Metrics](https://github.com/smart-on-fhir/cumulus-library-data-metrics) +- [RxNorm vocabulary](https://github.com/smart-on-fhir/cumulus-library-rxnorm) +- [ICD-10 accuracy for suicidal ideation](https://github.com/smart-on-fhir/cumulus-library-suicidality-icd10) ## Third party studies diff --git a/docs/targeted-builders.md b/docs/targeted-builders.md deleted file mode 100644 index c8b49243..00000000 --- a/docs/targeted-builders.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -title: Targeted Builders -parent: Library -nav_order: 8 -has_children: true -# audience: Clinical researchers interested in publications -# type: reference ---- - -# Statistics reference - -This page contains detailed documentation on table building utilities utilities provided -for use in Cumulus studies. - -## Specific builder modules - -- [Propensity Score Matching](builders/propensity-score-matching.md). -- [Valueset Compilation](builders/valueset.md). - -## General usage guidelines - -You can invoke a statistic task from your study's manifest the same way that you -would run SQL or python files - the only difference is that you point it at another -toml file, which allows stats configs to have different input parameters depending -on the analysis you're trying to perform. - -In your manifest, you'd add a section like this: -```toml -[statistics_config] -file_names = [ - "psm_config.toml" -] -``` - -We'll use this as a way to load statistics configurations. Since some of these -statistical methods may be quasi-experimental (i.e. perform a random sampling), -we will persist these outputs outside of a study lifecycle. - -The first time you run a `build` against a study with a statistics config that -has not previously been run before, it will be executed, and it should generate -a table in your database with a timestamp, along with a view that points to that -table. Subsequent updates will not replace that data, unless you provide the -`--statistics` argument. If you do, it will create a new timestamped table, -point the view to your newest table, and leave the old one in place in case -you need to get those results back at a later point in time. - -When you `clean` a study with statistics, by default all statistics artifacts -will be ignored, and persist in the database. If you want to remove these, -the `--statistics` argument will remove all these stats artifacts. \ No newline at end of file diff --git a/docs/workflows.md b/docs/workflows.md new file mode 100644 index 00000000..7b15a281 --- /dev/null +++ b/docs/workflows.md @@ -0,0 +1,40 @@ +--- +title: Targeted Builders +parent: Library +nav_order: 8 +has_children: true +# audience: Clinical researchers interested in publications +# type: reference +--- + +# Workflow reference + +This page contains detailed documentation on table building utilities utilities provided +for use in Cumulus studies. + +## Specific workflows + +- [Propensity Score Matching](workflows/propensity-score-matching.md). +- [Valueset Compilation](workflows/valueset.md). + +## General usage guidelines + +You can invoke a workflow from your study's manifest the same way that you +would run SQL or python files. All toml files in a manifest's study list are +assumed to be workflow configurations + +Since some of these workflow methods may be quasi-experimental statistics +(i.e. perform a random sampling), +in those cases, we will persist these outputs outside of a study lifecycle. + +The first time you run a `build` against a study with a workflow with statistics that +has not previously been run before, it will be executed, and it should generate +a table in your database with a timestamp, along with a view that points to that +table. Subsequent updates will not replace that data, unless you provide the +`--statistics` argument. If you do, it will create a new timestamped table, +point the view to your newest table, and leave the old one in place in case +you need to get those results back at a later point in time. + +When you `clean` a study with a workflow that generates statistics, by default all +statistics artifacts will be ignored, and persist in the database. If you want to +remove these, the `--statistics` argument will remove all these stats artifacts. \ No newline at end of file diff --git a/docs/builders/propensity-score-matching.md b/docs/workflows/propensity-score-matching.md similarity index 100% rename from docs/builders/propensity-score-matching.md rename to docs/workflows/propensity-score-matching.md diff --git a/docs/builders/valueset.md b/docs/workflows/valueset.md similarity index 100% rename from docs/builders/valueset.md rename to docs/workflows/valueset.md diff --git a/tests/test_actions.py b/tests/test_actions.py index 9db70b87..9e3a27aa 100644 --- a/tests/test_actions.py +++ b/tests/test_actions.py @@ -172,7 +172,7 @@ def test_run_protected_table_builder(mock_db_config, study_path, stats): def test_table_builder(mock_db_config, study_path, verbose, expects, raises): with raises: manifest = study_manifest.StudyManifest(pathlib.Path(study_path)) - builder.run_table_builder( + builder.build_study( config=mock_db_config, manifest=manifest, ) @@ -310,7 +310,6 @@ def test_run_psm_statistics_builders( view_name="psm_test__psm_encounter_covariate", ) builder.build_study(config=config, manifest=manifest) - builder.run_statistics_builders(config=config, manifest=manifest) tables = ( mock_db_stats_config.db.cursor() .execute("SELECT distinct(table_name) FROM information_schema.tables") @@ -334,7 +333,7 @@ def test_invoke_valueset_builder(mock_builder, mock_db_config, tmp_path): config = base_utils.StudyConfig( db=mock_db_config.db, schema=mock_db_config.schema, stats_build=True ) - builder.run_statistics_builders( + builder.build_study( config=config, manifest=manifest, ) @@ -568,7 +567,7 @@ def test_generate_md(mock_db_config, tmp_path): manifest = study_manifest.StudyManifest( study_path=pathlib.Path(f"{tmp_path}/study_python_valid/") ) - builder.run_table_builder(config=mock_db_config, manifest=manifest) + builder.build_study(config=mock_db_config, manifest=manifest) file_generator.run_generate_markdown(config=mock_db_config, manifest=manifest) with open(f"{tmp_path}/study_python_valid/study_python_valid_generated.md") as f: generated_md = f.read() diff --git a/tests/test_cli.py b/tests/test_cli.py index 3a68d2ab..43b9247d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -426,6 +426,24 @@ def test_clean(tmp_path, args, expected, raises): 2, pytest.raises(errors.StudyManifestParsingError), ), + ( + [ + "build", + "-t", + "study_invalid_unsupported_file", + "-s", + "tests/test_data/study_invalid_unsupported_file/", + ], + [ + "export", + "-t", + "study_invalid_unsupported_file", + "-s", + "tests/test_data/study_invalid_unsupported_file/", + ], + 2, + pytest.raises(errors.StudyManifestParsingError), + ), ], ) def test_cli_executes_queries(tmp_path, build_args, export_args, expected_tables, raises): diff --git a/tests/test_data/psm/manifest.toml b/tests/test_data/psm/manifest.toml index 5d38c1bb..77fcbf90 100644 --- a/tests/test_data/psm/manifest.toml +++ b/tests/test_data/psm/manifest.toml @@ -1,7 +1,5 @@ study_prefix = "psm_test" -[sql_config] -file_names = ["psm_cohort.sql"] +[file_config] +file_names = ["psm_cohort.sql","psm_config.toml"] -[statistics_config] -file_names = ["psm_config.toml"] diff --git a/tests/test_data/study_invalid_unsupported_file/manifest.toml b/tests/test_data/study_invalid_unsupported_file/manifest.toml new file mode 100644 index 00000000..7be81de3 --- /dev/null +++ b/tests/test_data/study_invalid_unsupported_file/manifest.toml @@ -0,0 +1,7 @@ +study_prefix = "study_invalid_unsupported_file" + +[file_config] +file_names = ["test.txt"] + +[export_config] +export_list = ["study_invalid_unsupported_file__table"] diff --git a/tests/test_data/study_invalid_unsupported_file/test.txt b/tests/test_data/study_invalid_unsupported_file/test.txt new file mode 100644 index 00000000..7ac89884 --- /dev/null +++ b/tests/test_data/study_invalid_unsupported_file/test.txt @@ -0,0 +1 @@ +This should not be a study file \ No newline at end of file diff --git a/tests/test_data/study_python_counts_valid/manifest.toml b/tests/test_data/study_python_counts_valid/manifest.toml index 021d38f5..9cf2f81e 100644 --- a/tests/test_data/study_python_counts_valid/manifest.toml +++ b/tests/test_data/study_python_counts_valid/manifest.toml @@ -1,6 +1,6 @@ study_prefix = "study_python_counts_valid" -[counts_builder_config] +[file_config] file_names = [ "module1.py", "module1.py", "module2.py" ] diff --git a/tests/test_data/study_python_valid/manifest.toml b/tests/test_data/study_python_valid/manifest.toml index 6509804b..6baad106 100644 --- a/tests/test_data/study_python_valid/manifest.toml +++ b/tests/test_data/study_python_valid/manifest.toml @@ -1,6 +1,6 @@ study_prefix = "study_python_valid" -[table_builder_config] +[file_config] file_names = ["module1.py", "module1.py", "module2.py"] [export_config] diff --git a/tests/test_data/study_static_file/manifest.toml b/tests/test_data/study_static_file/manifest.toml index dac0a0bd..56027322 100644 --- a/tests/test_data/study_static_file/manifest.toml +++ b/tests/test_data/study_static_file/manifest.toml @@ -1,6 +1,6 @@ study_prefix = "study_static_file" -[table_builder_config] +[file_config] file_names = [ "static_file_builder.py", ] diff --git a/tests/test_data/study_valid/manifest.toml b/tests/test_data/study_valid/manifest.toml index 05ad4b48..50daf5a9 100644 --- a/tests/test_data/study_valid/manifest.toml +++ b/tests/test_data/study_valid/manifest.toml @@ -1,6 +1,6 @@ study_prefix = "study_valid" -[sql_config] +[file_config] file_names = [ "test.sql", "test2.sql" diff --git a/tests/test_study_parser.py b/tests/test_study_parser.py index 1acafe56..2c0556c9 100644 --- a/tests/test_study_parser.py +++ b/tests/test_study_parser.py @@ -16,7 +16,7 @@ ( "test_data/study_valid", ( - "{'study_prefix': 'study_valid', 'sql_config': {'file_names': " + "{'study_prefix': 'study_valid', 'file_config': {'file_names': " "['test.sql', 'test2.sql']}, 'export_config': {" "'count_list': ['study_valid__table', 'study_valid__table2']}}" ),