Skip to content

Commit

Permalink
renaming StudyConfig params, cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed May 7, 2024
1 parent 097bb0b commit 1986440
Show file tree
Hide file tree
Showing 11 changed files with 33 additions and 36 deletions.
6 changes: 3 additions & 3 deletions cumulus_library/base_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ class StudyConfig:
:keyword umls: A UMLS API key
"""

db_backend: databases.DatabaseBackend
db: databases.DatabaseBackend
db_type: str | None = None
replace_existing: bool = False
force_upload: bool = False
stats_build: bool = False
umls: str | None = None
umls_key: str | None = None


def load_text(path: str) -> str:
Expand Down
10 changes: 5 additions & 5 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,13 +346,13 @@ def run_cli(args: dict):
else:
config = base_utils.StudyConfig(
db_type=args.get("db_type"),
db_backend=databases.create_db_backend(args),
replace_existing=args.get("replace_existing", False),
db=databases.create_db_backend(args),
force_upload=args.get("replace_existing", False),
stats_build=args.get("stats_build", False),
umls=args.get("umls"),
umls_key=args.get("umls"),
)
try:
runner = StudyRunner(config.db_backend, data_path=args.get("data_path"))
runner = StudyRunner(config.db, data_path=args.get("data_path"))
if args.get("verbose"):
runner.verbose = True
console.print("[italic] Connecting to database...")
Expand Down Expand Up @@ -424,7 +424,7 @@ def run_cli(args: dict):
for target in args["target"]:
runner.generate_study_markdown(study_dict[target])
finally:
config.db_backend.close()
config.db.close()


def main(cli_args=None):
Expand Down
4 changes: 2 additions & 2 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,11 @@ def get_parser() -> argparse.ArgumentParser:
dest="stats_build",
)
build.add_argument(
"--umls",
"--umls-key",
help="An API Key for the UMLS API",
)
build.add_argument(
"--replace-existing",
"--force-upload",
action="store_true",
help="Forces file downloads/uploads to occur, even if they already exist",
)
Expand Down
16 changes: 8 additions & 8 deletions cumulus_library/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,17 +152,20 @@ def execute_as_pandas(self, sql: str) -> pandas.DataFrame:
def parser(self) -> DatabaseParser:
"""Returns parser object for interrogating DB schemas"""

@abc.abstractmethod
def upload_file(
self,
*,
file: pathlib.Path,
study: str,
topic: str,
remote_filename: str | None = None,
replace_existing=False,
force_upload=False,
) -> str | None:
"""If the db is file based, upload a file to it; if not, return None"""
"""Handler for remote database file upload.
By default, this should return None. Only override this for databases that
have an API for file upload (i.e. cloud databases)"""
return None

@abc.abstractmethod
def close(self) -> None:
Expand Down Expand Up @@ -218,7 +221,7 @@ def upload_file(
study: str,
topic: str,
remote_filename: str | None = None,
replace_existing=False,
force_upload=False,
) -> str | None:
# We'll investigate the connection to get the relevant S3 upload path.
wg_conf = self.connection._client.get_work_group(WorkGroup=self.work_group)[
Expand All @@ -244,7 +247,7 @@ def upload_file(

session = boto3.Session(profile_name=self.connection.profile_name)
s3_client = session.client("s3")
if not replace_existing:
if not force_upload:
res = s3_client.list_objects_v2(
Bucket=bucket,
Prefix=f"{s3_key}/{remote_filename}",
Expand Down Expand Up @@ -445,9 +448,6 @@ def execute_as_pandas(self, sql: str) -> pandas.DataFrame:
def parser(self) -> DatabaseParser:
return DuckDbParser()

def upload_file(self, *args, **kwargs) -> None:
return None

def close(self) -> None:
self.connection.close()

Expand Down
4 changes: 2 additions & 2 deletions cumulus_library/studies/vocab/vocab_icd_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ def prepare_queries(
if not parquet_path.is_file():
df = pandas.read_csv(file, delimiter="|", names=headers)
df.to_parquet(parquet_path)
remote_path = config.db_backend.upload_file(
remote_path = config.db.upload_file(
file=parquet_path,
study="vocab",
topic="icd",
remote_filename=f"{file.stem}.parquet",
replace_existing=config.replace_existing,
force_upload=config.force_upload,
)
# Since we are building one table from these three files, it's fine to just
# use the last value of remote location
Expand Down
4 changes: 1 addition & 3 deletions cumulus_library/study_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,7 @@ def _load_and_execute_builder(
table_builder.comment_queries(doc_str=doc_str)
new_filename = pathlib.Path(f"{filename}").stem + ".sql"
table_builder.write_queries(
path=pathlib.Path(
f"{self._study_path}/reference_sql/" + new_filename, config=config
)
path=pathlib.Path(f"{self._study_path}/reference_sql/" + new_filename)
)
else:
table_builder.execute_queries(
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def mock_db_core(tmp_path, mock_db): # pylint: disable=redefined-outer-name
builder = cli.StudyRunner(mock_db, data_path=f"{tmp_path}/data_path")
builder.clean_and_build_study(
f"{Path(__file__).parent.parent}/cumulus_library/studies/core",
config=base_utils.StudyConfig(stats_build=True, db_backend=mock_db),
config=base_utils.StudyConfig(stats_build=True, db=mock_db),
)
yield mock_db

Expand All @@ -230,6 +230,6 @@ def mock_db_stats(tmp_path):
builder = cli.StudyRunner(db, data_path=f"{tmp_path}/data_path")
builder.clean_and_build_study(
f"{Path(__file__).parent.parent}/cumulus_library/studies/core",
config=base_utils.StudyConfig(stats_build=True, db_backend=db),
config=base_utils.StudyConfig(stats_build=True, db=db),
)
yield db
6 changes: 3 additions & 3 deletions tests/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_schema_parsing():
@mock.patch("botocore.session.Session")
def test_upload_parquet(s3_session_mock):
path = pathlib.Path(__file__).resolve().parent
db_backend = databases.AthenaDatabaseBackend(
db = databases.AthenaDatabaseBackend(
region="us-east-1",
work_group="work_group",
profile="profile",
Expand All @@ -59,12 +59,12 @@ def test_upload_parquet(s3_session_mock):
client = mock.MagicMock()
with open(path / "test_data/aws/boto3.client.athena.get_work_group.json") as f:
client.get_work_group.return_value = json.load(f)
db_backend.connection._client = client
db.connection._client = client
s3_client = mock.MagicMock()
with open(path / "test_data/aws/boto3.client.s3.list_objects_v2.json") as f:
s3_client.list_objects_v2.return_value = json.load(f)
s3_session_mock.return_value.create_client.return_value = s3_client
resp = db_backend.upload_file(
resp = db.upload_file(
file=path / "test_data/count_synthea_patient.parquet",
study="test_study",
topic="count_patient",
Expand Down
3 changes: 1 addition & 2 deletions tests/test_psm.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ def test_psm_create(
):
builder = cli.StudyRunner(mock_db_stats, data_path=Path(tmp_path))
psmbuilder = psm.PsmBuilder(
f"{Path(__file__).parent}/test_data/psm/{toml_def}",
Path(tmp_path), # config=study_parser.StudyConfig(db_type='duckdb')
f"{Path(__file__).parent}/test_data/psm/{toml_def}", Path(tmp_path)
)
mock_db_stats.cursor().execute(
"create table psm_test__psm_cohort as (select * from core__condition "
Expand Down
10 changes: 5 additions & 5 deletions tests/test_study_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def test_clean_study(mock_db, schema, verbose, prefix, confirm, stats, target, r
parser.run_protected_table_builder(
mock_db.cursor(),
schema,
config=base_utils.StudyConfig(db_backend=mock_db),
config=base_utils.StudyConfig(db=mock_db),
)

# We're mocking stats tables since creating them programmatically
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_clean_study(mock_db, schema, verbose, prefix, confirm, stats, target, r
def test_run_protected_table_builder(mock_db, study_path, stats):
parser = study_parser.StudyManifestParser(study_path)
parser.run_protected_table_builder(
mock_db.cursor(), "main", config=base_utils.StudyConfig(db_backend=mock_db)
mock_db.cursor(), "main", config=base_utils.StudyConfig(db=mock_db)
)
tables = (
mock_db.cursor()
Expand Down Expand Up @@ -230,7 +230,7 @@ def test_table_builder(mock_db, study_path, verbose, expects, raises):
mock_db.cursor(),
"main",
verbose=verbose,
config=base_utils.StudyConfig(db_backend=mock_db),
config=base_utils.StudyConfig(db=mock_db),
)
tables = (
mock_db.cursor()
Expand Down Expand Up @@ -293,7 +293,7 @@ def test_build_study(mock_db, study_path, verbose, expects, raises):
parser.build_study(
mock_db.cursor(),
verbose=verbose,
config=base_utils.StudyConfig(db_backend=mock_db),
config=base_utils.StudyConfig(db=mock_db),
)
tables = (
mock_db.cursor()
Expand Down Expand Up @@ -325,7 +325,7 @@ def test_run_statistics_builders(
):
with raises:
parser = study_parser.StudyManifestParser(study_path, data_path=tmp_path)
config = base_utils.StudyConfig(db_backend=mock_db_stats, stats_build=stats)
config = base_utils.StudyConfig(db=mock_db_stats, stats_build=stats)
parser.run_protected_table_builder(
mock_db_stats.cursor(), "main", config=config
)
Expand Down
2 changes: 1 addition & 1 deletion tests/testbed_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,6 @@ def build(self, study="core") -> duckdb.DuckDBPyConnection:
# builder.verbose = True
builder.clean_and_build_study(
Path(__file__).parent.parent / "cumulus_library/studies" / study,
config=base_utils.StudyConfig(db_backend=db, stats_build=False),
config=base_utils.StudyConfig(db=db, stats_build=False),
)
return duckdb.connect(db_file)

0 comments on commit 1986440

Please sign in to comment.