smart-on-fhir · dogversioning · Oct 24, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 24, 2023
diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py
@@ -89,7 +89,7 @@ def reset_data_path(self, study: PosixPath) -> None:
 
     ### Creating studies
 
-    def clean_study(self, targets: List[str], prefix=False) -> None:
+    def clean_study(self, targets: List[str], study_dict, prefix=False) -> None:
         """Removes study table/views from Athena.
 
         While this is usually not required, since it it done as part of a build,
@@ -104,23 +104,26 @@ def clean_study(self, targets: List[str], prefix=False) -> None:
             )
         for target in targets:
             if prefix:
-                prefix = target
+                parser = StudyManifestParser()
+                parser.clean_study(
+                    self.cursor, self.schema_name, self.verbose, prefix=target
+                )
             else:
-                prefix = f"{target}__"
-            parser = StudyManifestParser()
-            parser.clean_study(
-                self.cursor, self.schema_name, self.verbose, prefix=prefix
-            )
+                parser = StudyManifestParser(study_dict[target])
+                parser.clean_study(self.cursor, self.schema_name, self.verbose)
 
-    def clean_and_build_study(self, target: PosixPath) -> None:
+    def clean_and_build_study(
+        self, target: PosixPath, continue_from: str = None
+    ) -> None:
         """Recreates study views/tables
 
         :param target: A PosixPath to the study directory
         """
         studyparser = StudyManifestParser(target)
-        studyparser.clean_study(self.cursor, self.schema_name, self.verbose)
-        studyparser.run_table_builder(self.cursor, self.schema_name, self.verbose)
-        studyparser.build_study(self.cursor, self.verbose)
+        if not continue_from:
+            studyparser.clean_study(self.cursor, self.schema_name, self.verbose)
+            studyparser.run_table_builder(self.cursor, self.schema_name, self.verbose)
+        studyparser.build_study(self.cursor, self.verbose, continue_from)
         studyparser.run_counts_builder(self.cursor, self.schema_name, self.verbose)
 
     def run_single_table_builder(
@@ -256,11 +259,8 @@ def run_cli(args: Dict):
         print("Testing connection to athena...")
         builder.cursor.execute("SHOW DATABASES")
 
-        if args["action"] == "clean":
-            builder.clean_study(args["target"], args["prefix"])
-
-        else:
-            study_dict = get_study_dict(args["study_dir"])
+        study_dict = get_study_dict(args["study_dir"])
+        if "prefix" not in args.keys():
             if args["target"]:
                 for target in args["target"]:
                     if target not in study_dict:
@@ -270,25 +270,32 @@ def run_cli(args: Dict):
                             "If you are trying to run a custom study, make sure "
                             "you include `-s path/to/study/dir` as an arugment."
                         )
+        if args["action"] == "clean":
+            builder.clean_study(
+                args["target"],
+                study_dict,
+                args["prefix"],
+            )
+        elif args["action"] == "build":
+            if "all" in args["target"]:
+                builder.clean_and_build_all(study_dict)
+            else:
+                for target in args["target"]:
+                    if args["builder"]:
+                        builder.run_single_table_builder(
+                            study_dict[target], args["builder"]
+                        )
+                    else:
+                        builder.clean_and_build_study(
+                            study_dict[target], continue_from=args["continue_from"]
+                        )
 
-            if args["action"] == "build":
-                if "all" in args["target"]:
-                    builder.clean_and_build_all(study_dict)
-                else:
-                    for target in args["target"]:
-                        if args["builder"]:
-                            builder.run_single_table_builder(
-                                study_dict[target], args["builder"]
-                            )
-                        else:
-                            builder.clean_and_build_study(study_dict[target])
-
-            elif args["action"] == "export":
-                if "all" in args["target"]:
-                    builder.export_all(study_dict, args["data_path"])
-                else:
-                    for target in args["target"]:
-                        builder.export_study(study_dict[target], args["data_path"])
+        elif args["action"] == "export":
+            if "all" in args["target"]:
+                builder.export_all(study_dict, args["data_path"])
+            else:
+                for target in args["target"]:
+                    builder.export_study(study_dict[target], args["data_path"])
 
         # returning the builder for ease of unit testing
         return builder

diff --git a/cumulus_library/cli_parser.py b/cumulus_library/cli_parser.py
@@ -123,6 +123,7 @@ def get_parser() -> argparse.ArgumentParser:
     )
 
     add_target_argument(clean)
+    add_study_dir_argument(clean)
     add_verbose_argument(clean)
     add_aws_config(clean)
     clean.add_argument(
@@ -141,6 +142,12 @@ def get_parser() -> argparse.ArgumentParser:
     add_verbose_argument(build)
     add_aws_config(build)
 
+    build.add_argument(
+        "--continue",
+        dest="continue_from",
+        help=(argparse.SUPPRESS),
+    )
+
     export = actions.add_parser(
         "export", help="Generates files on disk from Athena views"
     )

diff --git a/cumulus_library/study_parser.py b/cumulus_library/study_parser.py
@@ -86,13 +86,25 @@ def get_study_prefix(self) -> Optional[str]:
         """
         return self._study_config.get("study_prefix")
 
-    def get_sql_file_list(self) -> Optional[StrList]:
+    def get_sql_file_list(self, continue_from: str = None) -> Optional[StrList]:
         """Reads the contents of the sql_config array from the manifest
 
         :returns: An array of sql files from the manifest, or None if not found.
         """
         sql_config = self._study_config.get("sql_config", {})
-        return sql_config.get("file_names", [])
+        sql_files = sql_config.get("file_names", [])
+        if continue_from:
+            match_found = False
+            for file in sql_files:
+                print(continue_from.replace(".sql", ""))
+                print(file.replace(".sql", ""))
+                if continue_from.replace(".sql", "") == file.replace(".sql", ""):
+                    sql_files = sql_files[sql_files.index(file) :]
+                    match_found = True
+                    break
+            if not match_found:
+                sys.exit(f"No tables matching '{continue_from}' found")
+        return sql_files
 
     def get_table_builder_file_list(self) -> Optional[StrList]:
         """Reads the contents of the table_builder_config array from the manifest
@@ -190,6 +202,13 @@ def clean_study(
             ):
                 view_table_list.remove(view_table)
         # We want to only show a progress bar if we are :not: printing SQL lines
+        if prefix:
+            print("The following views/tables were selected by prefix:")
+            for view_table in view_table_list:
+                print(f"  {view_table[0]}")
+            confirm = input("Remove these tables? (Y/N)")
+            if confirm.lower() not in ("y", "yes"):
+                sys.exit("Table cleaning aborted")
         with get_progress_bar(disable=verbose) as progress:
             task = progress.add_task(
                 f"Removing {display_prefix} study artifacts...",
@@ -314,7 +333,9 @@ def run_single_table_builder(
             name = f"{name}.py"
         self._load_and_execute_builder(name, cursor, schema, verbose, drop_table=True)
 
-    def build_study(self, cursor: object, verbose: bool = False) -> List:
+    def build_study(
+        self, cursor: object, verbose: bool = False, continue_from: str = None
+    ) -> List:
         """Creates tables in the schema by iterating through the sql_config.file_names
 
         :param cursor: A PEP-249 compatible cursor object
@@ -323,7 +344,7 @@ def build_study(self, cursor: object, verbose: bool = False) -> List:
         :returns: loaded queries (for unit testing only)
         """
         queries = []
-        for file in self.get_sql_file_list():
+        for file in self.get_sql_file_list(continue_from):
             for query in parse_sql(load_text(f"{self._study_path}/{file}")):
                 queries.append([query, file])
         if len(queries) == 0:

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -116,11 +116,11 @@ def test_count_builder_mapping(
             [
                 "clean",
                 "-t",
-                "study_python_counts_valid",
+                "core",
                 "--database",
                 "test",
             ],
-            "study_python_counts_valid__",
+            "core__",
         ),
         (
             [

diff --git a/tests/test_study_parser.py b/tests/test_study_parser.py
@@ -1,4 +1,5 @@
 """ tests for study parser against mocks in test_data """
+import builtins
 import pathlib
 from contextlib import nullcontext as does_not_raise
 from unittest import mock
@@ -74,37 +75,38 @@ def test_manifest_data(manifest_key, raises):
 
 
 @pytest.mark.parametrize(
-    "schema,verbose,prefix,query_res,raises",
+    "schema,verbose,prefix,confirm,query_res,raises",
     [
-        ("schema", True, None, "study_valid__table", does_not_raise()),
-        ("schema", False, None, "study_valid__table", does_not_raise()),
-        ("schema", None, None, "study_valid__table", does_not_raise()),
-        (None, True, None, [], pytest.raises(ValueError)),
-        ("schema", None, None, "study_valid__etl_table", does_not_raise()),
-        ("schema", None, None, "study_valid__nlp_table", does_not_raise()),
-        ("schema", None, None, "study_valid__lib_table", does_not_raise()),
-        ("schema", None, None, "study_valid__lib", does_not_raise()),
-        ("schema", None, "foo", "foo_table", does_not_raise()),
+        ("schema", True, None, None, "study_valid__table", does_not_raise()),
+        ("schema", False, None, None, "study_valid__table", does_not_raise()),
+        ("schema", None, None, None, "study_valid__table", does_not_raise()),
+        (None, True, None, None, [], pytest.raises(ValueError)),
+        ("schema", None, None, None, "study_valid__etl_table", does_not_raise()),
+        ("schema", None, None, None, "study_valid__nlp_table", does_not_raise()),
+        ("schema", None, None, None, "study_valid__lib_table", does_not_raise()),
+        ("schema", None, None, None, "study_valid__lib", does_not_raise()),
+        ("schema", None, "foo", "y", "foo_table", does_not_raise()),
+        ("schema", None, "foo", "n", "foo_table", pytest.raises(SystemExit)),
     ],
 )
 @mock.patch("cumulus_library.helper.query_console_output")
-def test_clean_study(mock_output, schema, verbose, prefix, query_res, raises):
+def test_clean_study(mock_output, schema, verbose, prefix, confirm, query_res, raises):
     with raises:
-        mock_cursor = mock.MagicMock()
-        mock_cursor.__iter__.return_value = [[query_res]]
-        parser = StudyManifestParser("./tests/test_data/study_valid/")
-        tables = parser.clean_study(mock_cursor, schema, verbose, prefix=prefix)
+        with mock.patch.object(builtins, "input", lambda _: confirm):
+            mock_cursor = mock.MagicMock()
+            mock_cursor.__iter__.return_value = [[query_res]]
+            parser = StudyManifestParser("./tests/test_data/study_valid/")
+            tables = parser.clean_study(mock_cursor, schema, verbose, prefix=prefix)
 
-        if "study_valid__table" not in query_res and prefix is None:
-            print("wha")
-            assert not tables
-        else:
-            assert tables == [[query_res, "VIEW"]]
-            if prefix is not None:
-                assert prefix in mock_cursor.execute.call_args.args[0]
+            if "study_valid__table" not in query_res and prefix is None:
+                assert not tables
             else:
-                assert "study_valid__" in mock_cursor.execute.call_args.args[0]
-        assert mock_output.is_called()
+                assert tables == [[query_res, "VIEW"]]
+                if prefix is not None:
+                    assert prefix in mock_cursor.execute.call_args.args[0]
+                else:
+                    assert "study_valid__" in mock_cursor.execute.call_args.args[0]
+            assert mock_output.is_called()
 
 
 @pytest.mark.parametrize(