Skip to content

Commit

Permalink
Converted manifest to single list of files (#317)
Browse files Browse the repository at this point in the history
* Converted manifest to single list of files

* coverage, PR feedback
  • Loading branch information
dogversioning authored Nov 19, 2024
1 parent 2528c51 commit e0c0728
Show file tree
Hide file tree
Showing 23 changed files with 231 additions and 230 deletions.
158 changes: 66 additions & 92 deletions cumulus_library/actions/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,60 +135,14 @@ def run_protected_table_builder(
)


def run_table_builder(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
*,
db_parser: databases.DatabaseParser = None,
def _run_workflow(
config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str
) -> None:
"""Loads modules from a manifest and executes code via BaseTableBuilder
"""Loads workflow config from toml definitions and executes workflow
:param config: a StudyConfig object
:param manifest: a StudyManifest object
:keyword db_parser: an object implementing DatabaseParser for the target database
"""
for file in manifest.get_table_builder_file_list():
_load_and_execute_builder(
config=config,
manifest=manifest,
filename=file,
db_parser=db_parser,
)


def run_counts_builders(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
) -> None:
"""Loads counts modules from a manifest and executes code via BaseTableBuilder
While a count is a form of statistics, it is treated separately from other
statistics because it is, by design, always going to be static against a
given dataset, where other statistical methods may use sampling techniques
or adjustable input parameters that may need to be preserved for later review.
:param config: a StudyConfig object
:param manifest: a StudyManifest object
"""
for file in manifest.get_counts_builder_file_list():
_load_and_execute_builder(
config=config,
manifest=manifest,
filename=file,
)


def run_statistics_builders(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
) -> None:
"""Loads statistics modules from toml definitions and executes
:param config: a StudyConfig object
:param manifest: a StudyManifest object
"""
if len(manifest.get_statistics_file_list()) == 0:
return
existing_stats = []
if not config.stats_build:
existing_stats = (
Expand All @@ -199,40 +153,41 @@ def run_statistics_builders(
)
.fetchall()
)
for file in manifest.get_statistics_file_list():
# This open is a bit redundant with the open inside of the PSM builder,
# but we're letting it slide so that builders function similarly
# across the board
safe_timestamp = base_utils.get_tablename_safe_iso_timestamp()
toml_path = pathlib.Path(f"{manifest._study_path}/{file}")
with open(toml_path, "rb") as file:
stats_config = tomllib.load(file)
config_type = stats_config["config_type"]
target_table = stats_config.get("target_table", stats_config.get("table_prefix", ""))

if (target_table,) in existing_stats and not config.stats_build:
continue
if config_type == "psm":
# This open is a bit redundant with the open inside of the PSM builder,
# but we're letting it slide so that builders function similarly
# across the board
safe_timestamp = base_utils.get_tablename_safe_iso_timestamp()
toml_path = pathlib.Path(f"{manifest._study_path}/{filename}")
with open(toml_path, "rb") as file:
workflow_config = tomllib.load(file)
config_type = workflow_config["config_type"]
target_table = workflow_config.get("target_table", workflow_config.get("table_prefix", ""))

if (target_table,) in existing_stats and not config.stats_build:
return
match config_type:
case "psm":
builder = psm_builder.PsmBuilder(
toml_config_path=toml_path,
config=stats_config,
config=workflow_config,
data_path=manifest.data_path / f"{manifest.get_study_prefix()}/psm",
)
elif config_type == "valueset":
case "valueset":
builder = valueset_builder.ValuesetBuilder(
toml_config_path=toml_path,
config=stats_config,
config=workflow_config,
data_path=manifest.data_path / f"{manifest.get_study_prefix()}/valueset",
)
else:
raise errors.StudyManifestParsingError( # pragma: no cover
f"{toml_path} references an invalid statistics type {config_type}."
case _: # pragma: no cover
raise errors.StudyManifestParsingError(
f"{toml_path} references an invalid workflow type {config_type}."
)
builder.execute_queries(
config=config,
manifest=manifest,
table_suffix=safe_timestamp,
)
builder.execute_queries(
config=config,
manifest=manifest,
table_suffix=safe_timestamp,
)
if config_type in set(item.value for item in enums.StatisticsTypes):
log_utils.log_statistics(
config=config,
manifest=manifest,
Expand All @@ -242,11 +197,11 @@ def run_statistics_builders(
)


def run_matching_table_builder(
def build_matching_files(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
*,
builder: str,
builder: str | None,
db_parser: databases.DatabaseParser = None,
):
"""targets all table builders matching a target string for running
Expand All @@ -256,36 +211,55 @@ def run_matching_table_builder(
:keyword builder: filename of a module implementing a TableBuilder
:keyword db_parser: an object implementing DatabaseParser for the target database"""
all_generators = manifest.get_all_generators()
for file in all_generators:
if builder and file.find(builder) == -1:
continue
_load_and_execute_builder(
config=config,
manifest=manifest,
filename=file,
db_parser=db_parser,
)
matches = []
if not builder: # pragma: no cover
matches = all_generators
else:
for file in all_generators:
if file.find(builder) != -1:
matches.append(file)
build_study(config, manifest, db_parser=db_parser, file_list=matches)


def build_study(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
*,
db_parser: databases.DatabaseParser = None,
continue_from: str | None = None,
file_list: list | None = None,
) -> list:
"""Creates tables in the schema by iterating through the sql_config.file_names
:param config: a StudyConfig object
:param manifest: a StudyManifest object
:keyword continue_from: Name of a sql file to resume table creation from
:keyword continue_from: Name of a file to resume table creation from
:returns: loaded queries (for unit testing only)
"""
if file_list is None:
file_list = manifest.get_file_list(continue_from)
for file in file_list:
if file.endswith(".py"):
_load_and_execute_builder(
config=config,
manifest=manifest,
filename=file,
db_parser=db_parser,
)
elif file.endswith(".toml"):
_run_workflow(config=config, manifest=manifest, filename=file)
elif file.endswith(".sql"):
_run_raw_queries(config=config, manifest=manifest, filename=file)
else:
raise errors.StudyManifestParsingError


def _run_raw_queries(
config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str
):
queries = []
for file in manifest.get_sql_file_list(continue_from):
for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{file}")):
queries.append([query, file])
if len(queries) == 0:
return []
for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{filename}")):
queries.append([query, filename])
for query in queries:
query[0] = base_utils.update_query_if_schema_specified(query[0], manifest)
query[0] = query[0].replace(
Expand All @@ -298,7 +272,7 @@ def build_study(
# We want to only show a progress bar if we are :not: printing SQL lines
with base_utils.get_progress_bar(disable=config.verbose) as progress:
task = progress.add_task(
f"Creating {manifest.get_study_prefix()} study in db...",
f"Building tables from {filename}...",
total=len(queries),
visible=not config.verbose,
)
Expand Down
34 changes: 25 additions & 9 deletions cumulus_library/builders/protected_table_builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Builder for creating tables for tracking state/logging changes"""

import pathlib
import tomllib

from cumulus_library import (
BaseTableBuilder,
base_utils,
Expand Down Expand Up @@ -64,12 +67,25 @@ def prepare_queries(
TRANSACTION_COLS_TYPES,
)
)
if manifest._study_config.get("statistics_config"):
self.queries.append(
base_templates.get_ctas_empty_query(
db_schema,
statistics,
STATISTICS_COLS,
STATISTICS_COLS_TYPES,
)
)
files = manifest.get_all_workflows()
if len(files) == 0:
return
stats_types = set(item.value for item in enums.StatisticsTypes)
# In this loop, we are just checking to see if :any: workflow is a stats
# type workflow - if so, we'll create a table to hold data of stats runs
# (if it doesn't already exist) outside of the study lifecycle for
# persistence reasons
for file in files:
toml_path = pathlib.Path(f"{manifest._study_path}/{file}")
with open(toml_path, "rb") as file:
workflow_config = tomllib.load(file)
if workflow_config["config_type"] in stats_types:
self.queries.append(
base_templates.get_ctas_empty_query(
db_schema,
statistics,
STATISTICS_COLS,
STATISTICS_COLS_TYPES,
)
)
return
12 changes: 3 additions & 9 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def clean_and_build_study(
config=self.get_config(manifest),
manifest=manifest,
)
builder.run_table_builder(config=self.get_config(manifest), manifest=manifest)

else:
log_utils.log_transaction(
Expand All @@ -127,11 +126,6 @@ def clean_and_build_study(
manifest=manifest,
continue_from=continue_from,
)
builder.run_counts_builders(config=self.get_config(manifest), manifest=manifest)
builder.run_statistics_builders(
config=self.get_config(manifest),
manifest=manifest,
)
log_utils.log_transaction(
config=self.get_config(manifest),
manifest=manifest,
Expand All @@ -150,7 +144,7 @@ def clean_and_build_study(
)
raise e

def run_matching_table_builder(
def build_matching_files(
self,
target: pathlib.Path,
table_builder_name: str,
Expand All @@ -164,7 +158,7 @@ def run_matching_table_builder(
:param options: The dictionary of study-specific options
"""
manifest = study_manifest.StudyManifest(target, options=options)
builder.run_matching_table_builder(
builder.build_matching_files(
config=self.get_config(manifest),
manifest=manifest,
builder=table_builder_name,
Expand Down Expand Up @@ -330,7 +324,7 @@ def run_cli(args: dict):
elif args["action"] == "build":
for target in args["target"]:
if args["builder"]:
runner.run_matching_table_builder(
runner.build_matching_files(
study_dict[target], args["builder"], options=args["options"]
)
else:
Expand Down
6 changes: 6 additions & 0 deletions cumulus_library/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ class ProtectedTables(enum.Enum):
TRANSACTIONS = "lib_transactions"


class StatisticsTypes(enum.Enum):
"""A subset of workflows that create statistics sampling artifacts"""

PSM = "psm"


class LogStatuses(enum.Enum):
DEBUG = "debug"
ERROR = "error"
Expand Down
12 changes: 2 additions & 10 deletions cumulus_library/studies/core/manifest.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
study_prefix = "core"

[table_builder_config]
[file_config]
file_names = [
"builder_prereq_tables.py",
"builder_allergyintolerance.py",
Expand All @@ -9,17 +9,9 @@ file_names = [
"builder_encounter.py",
"builder_documentreference.py",
"builder_medicationrequest.py",
"builder_observation.py"
]

[sql_config]
file_names = [
"builder_observation.py",
"observation_type.sql",
"meta_date.sql",
]

[counts_builder_config]
file_names = [
"count_core.py"
]

Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/studies/discovery/manifest.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
study_prefix = "discovery"

[table_builder_config]
[file_config]
file_names = [
"code_detection.py",
]
Expand Down
Loading

0 comments on commit e0c0728

Please sign in to comment.