From e0c072891053c9dc0ddca75a3dbc0d6af96d5e56 Mon Sep 17 00:00:00 2001
From: matt garber <Matthew.Garber@childrens.harvard.edu>
Date: Tue, 19 Nov 2024 09:23:22 -0500
Subject: [PATCH] Converted manifest to single list of files (#317)

* Converted manifest to single list of files

* coverage, PR feedback
---
 cumulus_library/actions/builder.py            | 158 ++++++++----------
 .../builders/protected_table_builder.py       |  34 +++-
 cumulus_library/cli.py                        |  12 +-
 cumulus_library/enums.py                      |   6 +
 cumulus_library/studies/core/manifest.toml    |  12 +-
 .../studies/discovery/manifest.toml           |   2 +-
 cumulus_library/study_manifest.py             |  46 ++++-
 docs/creating-studies.md                      |  51 ++----
 docs/study-list.md                            |   2 +
 docs/targeted-builders.md                     |  49 ------
 docs/workflows.md                             |  40 +++++
 .../propensity-score-matching.md              |   0
 docs/{builders => workflows}/valueset.md      |   0
 tests/test_actions.py                         |   7 +-
 tests/test_cli.py                             |  18 ++
 tests/test_data/psm/manifest.toml             |   6 +-
 .../manifest.toml                             |   7 +
 .../study_invalid_unsupported_file/test.txt   |   1 +
 .../study_python_counts_valid/manifest.toml   |   2 +-
 .../study_python_valid/manifest.toml          |   2 +-
 .../test_data/study_static_file/manifest.toml |   2 +-
 tests/test_data/study_valid/manifest.toml     |   2 +-
 tests/test_study_parser.py                    |   2 +-
 23 files changed, 231 insertions(+), 230 deletions(-)
 delete mode 100644 docs/targeted-builders.md
 create mode 100644 docs/workflows.md
 rename docs/{builders => workflows}/propensity-score-matching.md (100%)
 rename docs/{builders => workflows}/valueset.md (100%)
 create mode 100644 tests/test_data/study_invalid_unsupported_file/manifest.toml
 create mode 100644 tests/test_data/study_invalid_unsupported_file/test.txt

diff --git a/cumulus_library/actions/builder.py b/cumulus_library/actions/builder.py
index dc720c99..119463a9 100644
--- a/cumulus_library/actions/builder.py
+++ b/cumulus_library/actions/builder.py
@@ -135,60 +135,14 @@ def run_protected_table_builder(
     )
 
 
-def run_table_builder(
-    config: base_utils.StudyConfig,
-    manifest: study_manifest.StudyManifest,
-    *,
-    db_parser: databases.DatabaseParser = None,
+def _run_workflow(
+    config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str
 ) -> None:
-    """Loads modules from a manifest and executes code via BaseTableBuilder
+    """Loads workflow config from toml definitions and executes workflow
 
     :param config: a StudyConfig object
     :param manifest: a StudyManifest object
-    :keyword db_parser: an object implementing DatabaseParser for the target database
     """
-    for file in manifest.get_table_builder_file_list():
-        _load_and_execute_builder(
-            config=config,
-            manifest=manifest,
-            filename=file,
-            db_parser=db_parser,
-        )
-
-
-def run_counts_builders(
-    config: base_utils.StudyConfig,
-    manifest: study_manifest.StudyManifest,
-) -> None:
-    """Loads counts modules from a manifest and executes code via BaseTableBuilder
-
-    While a count is a form of statistics, it is treated separately from other
-    statistics because it is, by design, always going to be static against a
-    given dataset, where other statistical methods may use sampling techniques
-    or adjustable input parameters that may need to be preserved for later review.
-
-    :param config: a StudyConfig object
-    :param manifest: a StudyManifest object
-    """
-    for file in manifest.get_counts_builder_file_list():
-        _load_and_execute_builder(
-            config=config,
-            manifest=manifest,
-            filename=file,
-        )
-
-
-def run_statistics_builders(
-    config: base_utils.StudyConfig,
-    manifest: study_manifest.StudyManifest,
-) -> None:
-    """Loads statistics modules from toml definitions and executes
-
-    :param config: a StudyConfig object
-    :param manifest: a StudyManifest object
-    """
-    if len(manifest.get_statistics_file_list()) == 0:
-        return
     existing_stats = []
     if not config.stats_build:
         existing_stats = (
@@ -199,40 +153,41 @@ def run_statistics_builders(
             )
             .fetchall()
         )
-    for file in manifest.get_statistics_file_list():
-        # This open is a bit redundant with the open inside of the PSM builder,
-        # but we're letting it slide so that builders function similarly
-        # across the board
-        safe_timestamp = base_utils.get_tablename_safe_iso_timestamp()
-        toml_path = pathlib.Path(f"{manifest._study_path}/{file}")
-        with open(toml_path, "rb") as file:
-            stats_config = tomllib.load(file)
-            config_type = stats_config["config_type"]
-            target_table = stats_config.get("target_table", stats_config.get("table_prefix", ""))
-
-        if (target_table,) in existing_stats and not config.stats_build:
-            continue
-        if config_type == "psm":
+    # This open is a bit redundant with the open inside of the PSM builder,
+    # but we're letting it slide so that builders function similarly
+    # across the board
+    safe_timestamp = base_utils.get_tablename_safe_iso_timestamp()
+    toml_path = pathlib.Path(f"{manifest._study_path}/{filename}")
+    with open(toml_path, "rb") as file:
+        workflow_config = tomllib.load(file)
+        config_type = workflow_config["config_type"]
+        target_table = workflow_config.get("target_table", workflow_config.get("table_prefix", ""))
+
+    if (target_table,) in existing_stats and not config.stats_build:
+        return
+    match config_type:
+        case "psm":
             builder = psm_builder.PsmBuilder(
                 toml_config_path=toml_path,
-                config=stats_config,
+                config=workflow_config,
                 data_path=manifest.data_path / f"{manifest.get_study_prefix()}/psm",
             )
-        elif config_type == "valueset":
+        case "valueset":
             builder = valueset_builder.ValuesetBuilder(
                 toml_config_path=toml_path,
-                config=stats_config,
+                config=workflow_config,
                 data_path=manifest.data_path / f"{manifest.get_study_prefix()}/valueset",
             )
-        else:
-            raise errors.StudyManifestParsingError(  # pragma: no cover
-                f"{toml_path} references an invalid statistics type {config_type}."
+        case _:  # pragma: no cover
+            raise errors.StudyManifestParsingError(
+                f"{toml_path} references an invalid workflow type {config_type}."
             )
-        builder.execute_queries(
-            config=config,
-            manifest=manifest,
-            table_suffix=safe_timestamp,
-        )
+    builder.execute_queries(
+        config=config,
+        manifest=manifest,
+        table_suffix=safe_timestamp,
+    )
+    if config_type in set(item.value for item in enums.StatisticsTypes):
         log_utils.log_statistics(
             config=config,
             manifest=manifest,
@@ -242,11 +197,11 @@ def run_statistics_builders(
         )
 
 
-def run_matching_table_builder(
+def build_matching_files(
     config: base_utils.StudyConfig,
     manifest: study_manifest.StudyManifest,
     *,
-    builder: str,
+    builder: str | None,
     db_parser: databases.DatabaseParser = None,
 ):
     """targets all table builders matching a target string for running
@@ -256,36 +211,55 @@ def run_matching_table_builder(
     :keyword builder: filename of a module implementing a TableBuilder
     :keyword db_parser: an object implementing DatabaseParser for the target database"""
     all_generators = manifest.get_all_generators()
-    for file in all_generators:
-        if builder and file.find(builder) == -1:
-            continue
-        _load_and_execute_builder(
-            config=config,
-            manifest=manifest,
-            filename=file,
-            db_parser=db_parser,
-        )
+    matches = []
+    if not builder:  # pragma: no cover
+        matches = all_generators
+    else:
+        for file in all_generators:
+            if file.find(builder) != -1:
+                matches.append(file)
+    build_study(config, manifest, db_parser=db_parser, file_list=matches)
 
 
 def build_study(
     config: base_utils.StudyConfig,
     manifest: study_manifest.StudyManifest,
     *,
+    db_parser: databases.DatabaseParser = None,
     continue_from: str | None = None,
+    file_list: list | None = None,
 ) -> list:
     """Creates tables in the schema by iterating through the sql_config.file_names
 
     :param config: a StudyConfig object
     :param manifest: a StudyManifest object
-    :keyword continue_from: Name of a sql file to resume table creation from
+    :keyword continue_from: Name of a file to resume table creation from
     :returns: loaded queries (for unit testing only)
     """
+    if file_list is None:
+        file_list = manifest.get_file_list(continue_from)
+    for file in file_list:
+        if file.endswith(".py"):
+            _load_and_execute_builder(
+                config=config,
+                manifest=manifest,
+                filename=file,
+                db_parser=db_parser,
+            )
+        elif file.endswith(".toml"):
+            _run_workflow(config=config, manifest=manifest, filename=file)
+        elif file.endswith(".sql"):
+            _run_raw_queries(config=config, manifest=manifest, filename=file)
+        else:
+            raise errors.StudyManifestParsingError
+
+
+def _run_raw_queries(
+    config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str
+):
     queries = []
-    for file in manifest.get_sql_file_list(continue_from):
-        for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{file}")):
-            queries.append([query, file])
-    if len(queries) == 0:
-        return []
+    for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{filename}")):
+        queries.append([query, filename])
     for query in queries:
         query[0] = base_utils.update_query_if_schema_specified(query[0], manifest)
         query[0] = query[0].replace(
@@ -298,7 +272,7 @@ def build_study(
     # We want to only show a progress bar if we are :not: printing SQL lines
     with base_utils.get_progress_bar(disable=config.verbose) as progress:
         task = progress.add_task(
-            f"Creating {manifest.get_study_prefix()} study in db...",
+            f"Building tables from {filename}...",
             total=len(queries),
             visible=not config.verbose,
         )
diff --git a/cumulus_library/builders/protected_table_builder.py b/cumulus_library/builders/protected_table_builder.py
index 9ca8e3c9..94bda02c 100644
--- a/cumulus_library/builders/protected_table_builder.py
+++ b/cumulus_library/builders/protected_table_builder.py
@@ -1,5 +1,8 @@
 """Builder for creating tables for tracking state/logging changes"""
 
+import pathlib
+import tomllib
+
 from cumulus_library import (
     BaseTableBuilder,
     base_utils,
@@ -64,12 +67,25 @@ def prepare_queries(
                 TRANSACTION_COLS_TYPES,
             )
         )
-        if manifest._study_config.get("statistics_config"):
-            self.queries.append(
-                base_templates.get_ctas_empty_query(
-                    db_schema,
-                    statistics,
-                    STATISTICS_COLS,
-                    STATISTICS_COLS_TYPES,
-                )
-            )
+        files = manifest.get_all_workflows()
+        if len(files) == 0:
+            return
+        stats_types = set(item.value for item in enums.StatisticsTypes)
+        # In this loop, we are just checking to see if :any: workflow is a stats
+        # type workflow - if so, we'll create a table to hold data of stats runs
+        # (if it doesn't already exist) outside of the study lifecycle for
+        # persistence reasons
+        for file in files:
+            toml_path = pathlib.Path(f"{manifest._study_path}/{file}")
+            with open(toml_path, "rb") as file:
+                workflow_config = tomllib.load(file)
+                if workflow_config["config_type"] in stats_types:
+                    self.queries.append(
+                        base_templates.get_ctas_empty_query(
+                            db_schema,
+                            statistics,
+                            STATISTICS_COLS,
+                            STATISTICS_COLS_TYPES,
+                        )
+                    )
+                    return
diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py
index 22b1471c..f2cd0b71 100755
--- a/cumulus_library/cli.py
+++ b/cumulus_library/cli.py
@@ -113,7 +113,6 @@ def clean_and_build_study(
                     config=self.get_config(manifest),
                     manifest=manifest,
                 )
-                builder.run_table_builder(config=self.get_config(manifest), manifest=manifest)
 
             else:
                 log_utils.log_transaction(
@@ -127,11 +126,6 @@ def clean_and_build_study(
                 manifest=manifest,
                 continue_from=continue_from,
             )
-            builder.run_counts_builders(config=self.get_config(manifest), manifest=manifest)
-            builder.run_statistics_builders(
-                config=self.get_config(manifest),
-                manifest=manifest,
-            )
             log_utils.log_transaction(
                 config=self.get_config(manifest),
                 manifest=manifest,
@@ -150,7 +144,7 @@ def clean_and_build_study(
             )
             raise e
 
-    def run_matching_table_builder(
+    def build_matching_files(
         self,
         target: pathlib.Path,
         table_builder_name: str,
@@ -164,7 +158,7 @@ def run_matching_table_builder(
         :param options: The dictionary of study-specific options
         """
         manifest = study_manifest.StudyManifest(target, options=options)
-        builder.run_matching_table_builder(
+        builder.build_matching_files(
             config=self.get_config(manifest),
             manifest=manifest,
             builder=table_builder_name,
@@ -330,7 +324,7 @@ def run_cli(args: dict):
             elif args["action"] == "build":
                 for target in args["target"]:
                     if args["builder"]:
-                        runner.run_matching_table_builder(
+                        runner.build_matching_files(
                             study_dict[target], args["builder"], options=args["options"]
                         )
                     else:
diff --git a/cumulus_library/enums.py b/cumulus_library/enums.py
index e3c7e899..b1891acf 100644
--- a/cumulus_library/enums.py
+++ b/cumulus_library/enums.py
@@ -18,6 +18,12 @@ class ProtectedTables(enum.Enum):
     TRANSACTIONS = "lib_transactions"
 
 
+class StatisticsTypes(enum.Enum):
+    """A subset of workflows that create statistics sampling artifacts"""
+
+    PSM = "psm"
+
+
 class LogStatuses(enum.Enum):
     DEBUG = "debug"
     ERROR = "error"
diff --git a/cumulus_library/studies/core/manifest.toml b/cumulus_library/studies/core/manifest.toml
index fc192710..3e478a76 100644
--- a/cumulus_library/studies/core/manifest.toml
+++ b/cumulus_library/studies/core/manifest.toml
@@ -1,6 +1,6 @@
 study_prefix = "core"
 
-[table_builder_config]
+[file_config]
 file_names = [
     "builder_prereq_tables.py",
     "builder_allergyintolerance.py",
@@ -9,17 +9,9 @@ file_names = [
     "builder_encounter.py",
     "builder_documentreference.py",
     "builder_medicationrequest.py",
-    "builder_observation.py"
-]
-
-[sql_config]
-file_names = [
+    "builder_observation.py",
     "observation_type.sql",
     "meta_date.sql",
-]
-
-[counts_builder_config]
-file_names = [
     "count_core.py"
 ]
 
diff --git a/cumulus_library/studies/discovery/manifest.toml b/cumulus_library/studies/discovery/manifest.toml
index 354ea4c3..a9b51dc6 100644
--- a/cumulus_library/studies/discovery/manifest.toml
+++ b/cumulus_library/studies/discovery/manifest.toml
@@ -1,6 +1,6 @@
 study_prefix = "discovery"
 
-[table_builder_config]
+[file_config]
 file_names = [
     "code_detection.py",
 ]
diff --git a/cumulus_library/study_manifest.py b/cumulus_library/study_manifest.py
index 6b9349a1..ce20b1fa 100644
--- a/cumulus_library/study_manifest.py
+++ b/cumulus_library/study_manifest.py
@@ -91,6 +91,31 @@ def get_dedicated_schema(self) -> str | None:
         options = self._study_config.get("advanced_options", {})
         return options.get("dedicated_schema")
 
+    def get_file_list(self, continue_from: str | None = None) -> list[str] | None:
+        """Reads the contents of the file_config array from the manifest
+
+        :returns: An array of files from the manifest, or None if not found.
+        """
+        config = self._study_config.get("file_config", {})
+        files = config.get("file_names", []) or []
+        if not files:
+            files = (
+                self.get_table_builder_file_list()
+                + self.get_sql_file_list()
+                + self.get_counts_builder_file_list()
+                + self.get_statistics_file_list()
+            )
+        if continue_from:
+            for pos, file in enumerate(files):
+                if continue_from.split(".", 1)[0] == file.split(".", 1)[0]:
+                    files = files[pos:]
+                    break
+            else:
+                raise errors.StudyManifestParsingError(f"No files matching '{continue_from}' found")
+        return files
+
+    # The following four functions are considered deprecated, and can be removed
+    # after we update studies to use the new methodology
     def get_sql_file_list(self, continue_from: str | None = None) -> list[str] | None:
         """Reads the contents of the sql_config array from the manifest
 
@@ -98,7 +123,7 @@ def get_sql_file_list(self, continue_from: str | None = None) -> list[str] | Non
         """
         sql_config = self._study_config.get("sql_config", {})
         sql_files = sql_config.get("file_names", []) or []
-        if continue_from:
+        if continue_from:  # pragma: no cover
             for pos, file in enumerate(sql_files):
                 if continue_from.replace(".sql", "") == file.replace(".sql", ""):
                     sql_files = sql_files[pos:]
@@ -134,6 +159,8 @@ def get_statistics_file_list(self) -> list[str] | None:
         stats_config = self._study_config.get("statistics_config", {})
         return stats_config.get("file_names", [])
 
+    # End of deprecated section
+
     def get_export_table_list(self) -> list[ManifestExport] | None:
         """Reads the contents of the export_list array from the manifest
 
@@ -177,13 +204,18 @@ def get_export_table_list(self) -> list[ManifestExport] | None:
                 found_name.add(export.name)
         return export_table_list
 
+    def get_all_files(self, file_type: str):
+        """Convenience method for getting files of a type from a manifest"""
+        files = self.get_file_list()
+        return [file for file in files if file.endswith(file_type)]
+
     def get_all_generators(self) -> list[str]:
-        """Convenience method for getting files that generate sql queries"""
-        return (
-            self.get_table_builder_file_list()
-            + self.get_counts_builder_file_list()
-            + self.get_statistics_file_list()
-        )
+        """Convenience method for getting builder-based files"""
+        return self.get_all_files(".py")
+
+    def get_all_workflows(self) -> list[str]:
+        """Convenience method for getting workflow config files"""
+        return self.get_all_files(".toml")
 
     def get_prefix_with_seperator(self) -> str:
         """Convenience method for getting the appropriate prefix for tables"""
diff --git a/docs/creating-studies.md b/docs/creating-studies.md
index 49579be5..42acb12a 100644
--- a/docs/creating-studies.md
+++ b/docs/creating-studies.md
@@ -50,34 +50,29 @@ what each section does:
 # be the same name as the folder the study definition is in.
 study_prefix = "my_study"
 
-# For most use cases, this should not be required, but if you need to programmatically
-# build tables, you can provide a list of files implementing BaseTableBuilder.
-# See the core study for examples of this pattern. These run before
-# any SQL execution
-
-# [table_builder_config]
-# file_names = [
-#     "my_table_builder.py",
-# ]
 
 # The following section describes all tables that should be generated directly
 # from SQL files.
 
-[sql_config]
+[file_config]
 
-# 'file_names' defines a list of sql files to execute, in order, in this folder.
-# Recommended order: Any ancillary config (like a list of condition codes),
-# tables/view selecting subsets of data from FHIR data, tables/views creating 
-# summary statistics.
+# 'file_names' defines a list of files to execute, in order, in this folder.
+# Three file types are supported:
+#   - Raw SQL files
+#   - Python files, which should contain a class that is based off of
+#       BaseTableBuilder (or a derivative) from builders/base_table_builder.py
+#   - TOML files, which provide a set of configuration params to a workflow
+# These files will be executed in the order provided.
 
 file_names = [
     "setup.sql",
+    "builder.py",
     "lab_observations.sql",
     "counts.sql",
-    "date_range.sql"
+    "date_range.sql",
+    "stats_config.toml"
 ]
 
-
 # The following section defines parameters related to exporting study data from
 # your athena database
 
@@ -109,30 +104,6 @@ meta_list = [
   "my_study__meta_version",
 ]
 
-# For generating counts table in a more standardized manner, we have a class in the 
-# main library you can extend that will handle most of the logic of assembling 
-# queries for you. We use this pattern for generating the core tables, as well
-# other studies authored inside BCH. These will always be run after any other
-# SQL queries have been generated
-
-# [counts_builder_config]
-# file_names = [
-#    "count.py"
-# ]
-
-# For more specialized statistics, we provide a toml-based config entrypoint. The
-# details of these configs will vary, depending on which statistical method you're
-# invoking. For more details, see the statistics section of the docs for a list of
-# supported approaches.
-# These will run last, so all the data in your study will exist by the time these
-# are invoked.
-
-# [statistics_config]
-# file_names = 
-# [
-#    "psm_config.toml"
-# ]
-
 # The following section is for advanced/unusual study use cases
 
 # [options]
diff --git a/docs/study-list.md b/docs/study-list.md
index 63d60884..701f5501 100644
--- a/docs/study-list.md
+++ b/docs/study-list.md
@@ -21,6 +21,8 @@ for more details and any associated publications.
 - [Hypertension](https://github.com/smart-on-fhir/cumulus-library-hypertension)
 - [UMLS vocabulary](https://github.com/smart-on-fhir/cumulus-library-umls)
 - [Data Metrics](https://github.com/smart-on-fhir/cumulus-library-data-metrics)
+- [RxNorm vocabulary](https://github.com/smart-on-fhir/cumulus-library-rxnorm)
+- [ICD-10 accuracy for suicidal ideation](https://github.com/smart-on-fhir/cumulus-library-suicidality-icd10)
 
 ## Third party studies
 
diff --git a/docs/targeted-builders.md b/docs/targeted-builders.md
deleted file mode 100644
index c8b49243..00000000
--- a/docs/targeted-builders.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-title: Targeted Builders
-parent: Library
-nav_order: 8
-has_children: true
-# audience: Clinical researchers interested in publications
-# type: reference
----
-
-# Statistics reference
-
-This page contains detailed documentation on table building utilities utilities provided 
-for use in Cumulus studies.
-
-## Specific builder modules
-
-- [Propensity Score Matching](builders/propensity-score-matching.md).
-- [Valueset Compilation](builders/valueset.md).
-
-## General usage guidelines
-
-You can invoke a statistic task from your study's manifest the same way that you
-would run SQL or python files - the only difference is that you point it at another
-toml file, which allows stats configs to have different input parameters depending
-on the analysis you're trying to perform.
-
-In your manifest, you'd add a section like this:
-```toml
-[statistics_config]
-file_names = [
-    "psm_config.toml"
-]
-```
-
-We'll use this as a way to load statistics configurations. Since some of these 
-statistical methods may be quasi-experimental (i.e. perform a random sampling),
-we will persist these outputs outside of a study lifecycle. 
-
-The first time you run a `build` against a study with a statistics config that 
-has not previously been run before, it will be executed, and it should generate
-a table in your database with a timestamp, along with a view that points to that
-table. Subsequent updates will not replace that data, unless you provide the
-`--statistics` argument. If you do, it will create a new timestamped table,
-point the view to your newest table, and leave the old one in place in case
-you need to get those results back at a later point in time.
-
-When you `clean` a study with statistics, by default all statistics artifacts
-will be ignored, and persist in the database. If you want to remove these,
-the `--statistics` argument will remove all these stats artifacts.
\ No newline at end of file
diff --git a/docs/workflows.md b/docs/workflows.md
new file mode 100644
index 00000000..7b15a281
--- /dev/null
+++ b/docs/workflows.md
@@ -0,0 +1,40 @@
+---
+title: Targeted Builders
+parent: Library
+nav_order: 8
+has_children: true
+# audience: Clinical researchers interested in publications
+# type: reference
+---
+
+# Workflow reference
+
+This page contains detailed documentation on table building utilities utilities provided 
+for use in Cumulus studies.
+
+## Specific workflows
+
+- [Propensity Score Matching](workflows/propensity-score-matching.md).
+- [Valueset Compilation](workflows/valueset.md).
+
+## General usage guidelines
+
+You can invoke a workflow from your study's manifest the same way that you
+would run SQL or python files. All toml files in a manifest's study list are
+assumed to be workflow configurations
+
+Since some of these workflow methods may be quasi-experimental statistics
+(i.e. perform a random sampling),
+in those cases, we will persist these outputs outside of a study lifecycle. 
+
+The first time you run a `build` against a study with a workflow with statistics that 
+has not previously been run before, it will be executed, and it should generate
+a table in your database with a timestamp, along with a view that points to that
+table. Subsequent updates will not replace that data, unless you provide the
+`--statistics` argument. If you do, it will create a new timestamped table,
+point the view to your newest table, and leave the old one in place in case
+you need to get those results back at a later point in time.
+
+When you `clean` a study with a workflow that generates statistics, by default all
+statistics artifacts will be ignored, and persist in the database. If you want to 
+remove these, the `--statistics` argument will remove all these stats artifacts.
\ No newline at end of file
diff --git a/docs/builders/propensity-score-matching.md b/docs/workflows/propensity-score-matching.md
similarity index 100%
rename from docs/builders/propensity-score-matching.md
rename to docs/workflows/propensity-score-matching.md
diff --git a/docs/builders/valueset.md b/docs/workflows/valueset.md
similarity index 100%
rename from docs/builders/valueset.md
rename to docs/workflows/valueset.md
diff --git a/tests/test_actions.py b/tests/test_actions.py
index 9db70b87..9e3a27aa 100644
--- a/tests/test_actions.py
+++ b/tests/test_actions.py
@@ -172,7 +172,7 @@ def test_run_protected_table_builder(mock_db_config, study_path, stats):
 def test_table_builder(mock_db_config, study_path, verbose, expects, raises):
     with raises:
         manifest = study_manifest.StudyManifest(pathlib.Path(study_path))
-        builder.run_table_builder(
+        builder.build_study(
             config=mock_db_config,
             manifest=manifest,
         )
@@ -310,7 +310,6 @@ def test_run_psm_statistics_builders(
                 view_name="psm_test__psm_encounter_covariate",
             )
         builder.build_study(config=config, manifest=manifest)
-        builder.run_statistics_builders(config=config, manifest=manifest)
         tables = (
             mock_db_stats_config.db.cursor()
             .execute("SELECT distinct(table_name) FROM information_schema.tables")
@@ -334,7 +333,7 @@ def test_invoke_valueset_builder(mock_builder, mock_db_config, tmp_path):
     config = base_utils.StudyConfig(
         db=mock_db_config.db, schema=mock_db_config.schema, stats_build=True
     )
-    builder.run_statistics_builders(
+    builder.build_study(
         config=config,
         manifest=manifest,
     )
@@ -568,7 +567,7 @@ def test_generate_md(mock_db_config, tmp_path):
         manifest = study_manifest.StudyManifest(
             study_path=pathlib.Path(f"{tmp_path}/study_python_valid/")
         )
-        builder.run_table_builder(config=mock_db_config, manifest=manifest)
+        builder.build_study(config=mock_db_config, manifest=manifest)
         file_generator.run_generate_markdown(config=mock_db_config, manifest=manifest)
         with open(f"{tmp_path}/study_python_valid/study_python_valid_generated.md") as f:
             generated_md = f.read()
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 3a68d2ab..43b9247d 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -426,6 +426,24 @@ def test_clean(tmp_path, args, expected, raises):
             2,
             pytest.raises(errors.StudyManifestParsingError),
         ),
+        (
+            [
+                "build",
+                "-t",
+                "study_invalid_unsupported_file",
+                "-s",
+                "tests/test_data/study_invalid_unsupported_file/",
+            ],
+            [
+                "export",
+                "-t",
+                "study_invalid_unsupported_file",
+                "-s",
+                "tests/test_data/study_invalid_unsupported_file/",
+            ],
+            2,
+            pytest.raises(errors.StudyManifestParsingError),
+        ),
     ],
 )
 def test_cli_executes_queries(tmp_path, build_args, export_args, expected_tables, raises):
diff --git a/tests/test_data/psm/manifest.toml b/tests/test_data/psm/manifest.toml
index 5d38c1bb..77fcbf90 100644
--- a/tests/test_data/psm/manifest.toml
+++ b/tests/test_data/psm/manifest.toml
@@ -1,7 +1,5 @@
 study_prefix = "psm_test"
 
-[sql_config]
-file_names = ["psm_cohort.sql"]
+[file_config]
+file_names = ["psm_cohort.sql","psm_config.toml"]
 
-[statistics_config]
-file_names = ["psm_config.toml"]
diff --git a/tests/test_data/study_invalid_unsupported_file/manifest.toml b/tests/test_data/study_invalid_unsupported_file/manifest.toml
new file mode 100644
index 00000000..7be81de3
--- /dev/null
+++ b/tests/test_data/study_invalid_unsupported_file/manifest.toml
@@ -0,0 +1,7 @@
+study_prefix = "study_invalid_unsupported_file"
+
+[file_config]
+file_names = ["test.txt"]
+
+[export_config]
+export_list = ["study_invalid_unsupported_file__table"]
diff --git a/tests/test_data/study_invalid_unsupported_file/test.txt b/tests/test_data/study_invalid_unsupported_file/test.txt
new file mode 100644
index 00000000..7ac89884
--- /dev/null
+++ b/tests/test_data/study_invalid_unsupported_file/test.txt
@@ -0,0 +1 @@
+This should not be a study file
\ No newline at end of file
diff --git a/tests/test_data/study_python_counts_valid/manifest.toml b/tests/test_data/study_python_counts_valid/manifest.toml
index 021d38f5..9cf2f81e 100644
--- a/tests/test_data/study_python_counts_valid/manifest.toml
+++ b/tests/test_data/study_python_counts_valid/manifest.toml
@@ -1,6 +1,6 @@
 study_prefix = "study_python_counts_valid"
 
-[counts_builder_config]
+[file_config]
 file_names = [
     "module1.py", "module1.py", "module2.py"
 ]
diff --git a/tests/test_data/study_python_valid/manifest.toml b/tests/test_data/study_python_valid/manifest.toml
index 6509804b..6baad106 100644
--- a/tests/test_data/study_python_valid/manifest.toml
+++ b/tests/test_data/study_python_valid/manifest.toml
@@ -1,6 +1,6 @@
 study_prefix = "study_python_valid"
 
-[table_builder_config]
+[file_config]
 file_names = ["module1.py", "module1.py", "module2.py"]
 
 [export_config]
diff --git a/tests/test_data/study_static_file/manifest.toml b/tests/test_data/study_static_file/manifest.toml
index dac0a0bd..56027322 100644
--- a/tests/test_data/study_static_file/manifest.toml
+++ b/tests/test_data/study_static_file/manifest.toml
@@ -1,6 +1,6 @@
 study_prefix = "study_static_file"
 
-[table_builder_config]
+[file_config]
 file_names = [
     "static_file_builder.py",
 ]
diff --git a/tests/test_data/study_valid/manifest.toml b/tests/test_data/study_valid/manifest.toml
index 05ad4b48..50daf5a9 100644
--- a/tests/test_data/study_valid/manifest.toml
+++ b/tests/test_data/study_valid/manifest.toml
@@ -1,6 +1,6 @@
 study_prefix = "study_valid"
 
-[sql_config]
+[file_config]
 file_names = [
     "test.sql",
     "test2.sql"
diff --git a/tests/test_study_parser.py b/tests/test_study_parser.py
index 1acafe56..2c0556c9 100644
--- a/tests/test_study_parser.py
+++ b/tests/test_study_parser.py
@@ -16,7 +16,7 @@
         (
             "test_data/study_valid",
             (
-                "{'study_prefix': 'study_valid', 'sql_config': {'file_names': "
+                "{'study_prefix': 'study_valid', 'file_config': {'file_names': "
                 "['test.sql', 'test2.sql']}, 'export_config': {"
                 "'count_list': ['study_valid__table', 'study_valid__table2']}}"
             ),