renaming StudyConfig params, cleanup

smart-on-fhir · May 7, 2024 · 1986440 · 1986440
1 parent 097bb0b
commit 1986440
Show file tree

Hide file tree

Showing 11 changed files with 33 additions and 36 deletions.
diff --git a/cumulus_library/base_utils.py b/cumulus_library/base_utils.py
@@ -33,11 +33,11 @@ class StudyConfig:
     :keyword umls: A UMLS API key
     """
 
-    db_backend: databases.DatabaseBackend
+    db: databases.DatabaseBackend
     db_type: str | None = None
-    replace_existing: bool = False
+    force_upload: bool = False
     stats_build: bool = False
-    umls: str | None = None
+    umls_key: str | None = None
 
 
 def load_text(path: str) -> str:

diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py
@@ -346,13 +346,13 @@ def run_cli(args: dict):
     else:
         config = base_utils.StudyConfig(
             db_type=args.get("db_type"),
-            db_backend=databases.create_db_backend(args),
-            replace_existing=args.get("replace_existing", False),
+            db=databases.create_db_backend(args),
+            force_upload=args.get("replace_existing", False),
             stats_build=args.get("stats_build", False),
-            umls=args.get("umls"),
+            umls_key=args.get("umls"),
         )
         try:
-            runner = StudyRunner(config.db_backend, data_path=args.get("data_path"))
+            runner = StudyRunner(config.db, data_path=args.get("data_path"))
             if args.get("verbose"):
                 runner.verbose = True
             console.print("[italic] Connecting to database...")
@@ -424,7 +424,7 @@ def run_cli(args: dict):
                 for target in args["target"]:
                     runner.generate_study_markdown(study_dict[target])
         finally:
-            config.db_backend.close()
+            config.db.close()
 
 
 def main(cli_args=None):

diff --git a/cumulus_library/cli_parser.py b/cumulus_library/cli_parser.py
@@ -183,11 +183,11 @@ def get_parser() -> argparse.ArgumentParser:
         dest="stats_build",
     )
     build.add_argument(
-        "--umls",
+        "--umls-key",
         help="An API Key for the UMLS API",
     )
     build.add_argument(
-        "--replace-existing",
+        "--force-upload",
         action="store_true",
         help="Forces file downloads/uploads to occur, even if they already exist",
     )

diff --git a/cumulus_library/databases.py b/cumulus_library/databases.py
@@ -152,17 +152,20 @@ def execute_as_pandas(self, sql: str) -> pandas.DataFrame:
     def parser(self) -> DatabaseParser:
         """Returns parser object for interrogating DB schemas"""
 
-    @abc.abstractmethod
     def upload_file(
         self,
         *,
         file: pathlib.Path,
         study: str,
         topic: str,
         remote_filename: str | None = None,
-        replace_existing=False,
+        force_upload=False,
     ) -> str | None:
-        """If the db is file based, upload a file to it; if not, return None"""
+        """Handler for remote database file upload.
+
+        By default, this should return None. Only override this for databases that
+        have an API for file upload (i.e. cloud databases)"""
+        return None
 
     @abc.abstractmethod
     def close(self) -> None:
@@ -218,7 +221,7 @@ def upload_file(
         study: str,
         topic: str,
         remote_filename: str | None = None,
-        replace_existing=False,
+        force_upload=False,
     ) -> str | None:
         # We'll investigate the connection to get the relevant S3 upload path.
         wg_conf = self.connection._client.get_work_group(WorkGroup=self.work_group)[
@@ -244,7 +247,7 @@ def upload_file(
 
         session = boto3.Session(profile_name=self.connection.profile_name)
         s3_client = session.client("s3")
-        if not replace_existing:
+        if not force_upload:
             res = s3_client.list_objects_v2(
                 Bucket=bucket,
                 Prefix=f"{s3_key}/{remote_filename}",
@@ -445,9 +448,6 @@ def execute_as_pandas(self, sql: str) -> pandas.DataFrame:
     def parser(self) -> DatabaseParser:
         return DuckDbParser()
 
-    def upload_file(self, *args, **kwargs) -> None:
-        return None
-
     def close(self) -> None:
         self.connection.close()
 

diff --git a/cumulus_library/studies/vocab/vocab_icd_builder.py b/cumulus_library/studies/vocab/vocab_icd_builder.py
@@ -47,12 +47,12 @@ def prepare_queries(
             if not parquet_path.is_file():
                 df = pandas.read_csv(file, delimiter="|", names=headers)
                 df.to_parquet(parquet_path)
-            remote_path = config.db_backend.upload_file(
+            remote_path = config.db.upload_file(
                 file=parquet_path,
                 study="vocab",
                 topic="icd",
                 remote_filename=f"{file.stem}.parquet",
-                replace_existing=config.replace_existing,
+                force_upload=config.force_upload,
             )
         # Since we are building one table from these three files, it's fine to just
         # use the last value of remote location

diff --git a/cumulus_library/study_parser.py b/cumulus_library/study_parser.py
@@ -388,9 +388,7 @@ def _load_and_execute_builder(
             table_builder.comment_queries(doc_str=doc_str)
             new_filename = pathlib.Path(f"{filename}").stem + ".sql"
             table_builder.write_queries(
-                path=pathlib.Path(
-                    f"{self._study_path}/reference_sql/" + new_filename, config=config
-                )
+                path=pathlib.Path(f"{self._study_path}/reference_sql/" + new_filename)
             )
         else:
             table_builder.execute_queries(

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -211,7 +211,7 @@ def mock_db_core(tmp_path, mock_db):  # pylint: disable=redefined-outer-name
     builder = cli.StudyRunner(mock_db, data_path=f"{tmp_path}/data_path")
     builder.clean_and_build_study(
         f"{Path(__file__).parent.parent}/cumulus_library/studies/core",
-        config=base_utils.StudyConfig(stats_build=True, db_backend=mock_db),
+        config=base_utils.StudyConfig(stats_build=True, db=mock_db),
     )
     yield mock_db
 
@@ -230,6 +230,6 @@ def mock_db_stats(tmp_path):
     builder = cli.StudyRunner(db, data_path=f"{tmp_path}/data_path")
     builder.clean_and_build_study(
         f"{Path(__file__).parent.parent}/cumulus_library/studies/core",
-        config=base_utils.StudyConfig(stats_build=True, db_backend=db),
+        config=base_utils.StudyConfig(stats_build=True, db=db),
     )
     yield db
diff --git a/tests/test_athena.py b/tests/test_athena.py
@@ -50,7 +50,7 @@ def test_schema_parsing():
 @mock.patch("botocore.session.Session")
 def test_upload_parquet(s3_session_mock):
     path = pathlib.Path(__file__).resolve().parent
-    db_backend = databases.AthenaDatabaseBackend(
+    db = databases.AthenaDatabaseBackend(
         region="us-east-1",
         work_group="work_group",
         profile="profile",
@@ -59,12 +59,12 @@ def test_upload_parquet(s3_session_mock):
     client = mock.MagicMock()
     with open(path / "test_data/aws/boto3.client.athena.get_work_group.json") as f:
         client.get_work_group.return_value = json.load(f)
-    db_backend.connection._client = client
+    db.connection._client = client
     s3_client = mock.MagicMock()
     with open(path / "test_data/aws/boto3.client.s3.list_objects_v2.json") as f:
         s3_client.list_objects_v2.return_value = json.load(f)
     s3_session_mock.return_value.create_client.return_value = s3_client
-    resp = db_backend.upload_file(
+    resp = db.upload_file(
         file=path / "test_data/count_synthea_patient.parquet",
         study="test_study",
         topic="count_patient",

diff --git a/tests/test_psm.py b/tests/test_psm.py
@@ -63,8 +63,7 @@ def test_psm_create(
 ):
     builder = cli.StudyRunner(mock_db_stats, data_path=Path(tmp_path))
     psmbuilder = psm.PsmBuilder(
-        f"{Path(__file__).parent}/test_data/psm/{toml_def}",
-        Path(tmp_path),  # config=study_parser.StudyConfig(db_type='duckdb')
+        f"{Path(__file__).parent}/test_data/psm/{toml_def}", Path(tmp_path)
     )
     mock_db_stats.cursor().execute(
         "create table psm_test__psm_cohort as (select * from core__condition "

diff --git a/tests/test_study_parser.py b/tests/test_study_parser.py
@@ -112,7 +112,7 @@ def test_clean_study(mock_db, schema, verbose, prefix, confirm, stats, target, r
             parser.run_protected_table_builder(
                 mock_db.cursor(),
                 schema,
-                config=base_utils.StudyConfig(db_backend=mock_db),
+                config=base_utils.StudyConfig(db=mock_db),
             )
 
             # We're mocking stats tables since creating them programmatically
@@ -168,7 +168,7 @@ def test_clean_study(mock_db, schema, verbose, prefix, confirm, stats, target, r
 def test_run_protected_table_builder(mock_db, study_path, stats):
     parser = study_parser.StudyManifestParser(study_path)
     parser.run_protected_table_builder(
-        mock_db.cursor(), "main", config=base_utils.StudyConfig(db_backend=mock_db)
+        mock_db.cursor(), "main", config=base_utils.StudyConfig(db=mock_db)
     )
     tables = (
         mock_db.cursor()
@@ -230,7 +230,7 @@ def test_table_builder(mock_db, study_path, verbose, expects, raises):
             mock_db.cursor(),
             "main",
             verbose=verbose,
-            config=base_utils.StudyConfig(db_backend=mock_db),
+            config=base_utils.StudyConfig(db=mock_db),
         )
         tables = (
             mock_db.cursor()
@@ -293,7 +293,7 @@ def test_build_study(mock_db, study_path, verbose, expects, raises):
         parser.build_study(
             mock_db.cursor(),
             verbose=verbose,
-            config=base_utils.StudyConfig(db_backend=mock_db),
+            config=base_utils.StudyConfig(db=mock_db),
         )
         tables = (
             mock_db.cursor()
@@ -325,7 +325,7 @@ def test_run_statistics_builders(
 ):
     with raises:
         parser = study_parser.StudyManifestParser(study_path, data_path=tmp_path)
-        config = base_utils.StudyConfig(db_backend=mock_db_stats, stats_build=stats)
+        config = base_utils.StudyConfig(db=mock_db_stats, stats_build=stats)
         parser.run_protected_table_builder(
             mock_db_stats.cursor(), "main", config=config
         )

diff --git a/tests/testbed_utils.py b/tests/testbed_utils.py
@@ -236,6 +236,6 @@ def build(self, study="core") -> duckdb.DuckDBPyConnection:
         # builder.verbose = True
         builder.clean_and_build_study(
             Path(__file__).parent.parent / "cumulus_library/studies" / study,
-            config=base_utils.StudyConfig(db_backend=db, stats_build=False),
+            config=base_utils.StudyConfig(db=db, stats_build=False),
         )
         return duckdb.connect(db_file)