Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Confirm drop by prefix, add --continue #140

Merged
merged 3 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 41 additions & 34 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def reset_data_path(self, study: PosixPath) -> None:

### Creating studies

def clean_study(self, targets: List[str], prefix=False) -> None:
def clean_study(self, targets: List[str], study_dict, prefix=False) -> None:
"""Removes study table/views from Athena.

While this is usually not required, since it it done as part of a build,
Expand All @@ -104,23 +104,26 @@ def clean_study(self, targets: List[str], prefix=False) -> None:
)
for target in targets:
if prefix:
prefix = target
parser = StudyManifestParser()
parser.clean_study(
self.cursor, self.schema_name, self.verbose, prefix=target
)
else:
prefix = f"{target}__"
parser = StudyManifestParser()
parser.clean_study(
self.cursor, self.schema_name, self.verbose, prefix=prefix
)
parser = StudyManifestParser(study_dict[target])
parser.clean_study(self.cursor, self.schema_name, self.verbose)

def clean_and_build_study(self, target: PosixPath) -> None:
def clean_and_build_study(
self, target: PosixPath, continue_from: str = None
) -> None:
"""Recreates study views/tables

:param target: A PosixPath to the study directory
"""
studyparser = StudyManifestParser(target)
studyparser.clean_study(self.cursor, self.schema_name, self.verbose)
studyparser.run_table_builder(self.cursor, self.schema_name, self.verbose)
studyparser.build_study(self.cursor, self.verbose)
if not continue_from:
studyparser.clean_study(self.cursor, self.schema_name, self.verbose)
studyparser.run_table_builder(self.cursor, self.schema_name, self.verbose)
studyparser.build_study(self.cursor, self.verbose, continue_from)
studyparser.run_counts_builder(self.cursor, self.schema_name, self.verbose)

def run_single_table_builder(
Expand Down Expand Up @@ -256,11 +259,8 @@ def run_cli(args: Dict):
print("Testing connection to athena...")
builder.cursor.execute("SHOW DATABASES")

if args["action"] == "clean":
builder.clean_study(args["target"], args["prefix"])

else:
study_dict = get_study_dict(args["study_dir"])
study_dict = get_study_dict(args["study_dir"])
if "prefix" not in args.keys():
if args["target"]:
for target in args["target"]:
if target not in study_dict:
Expand All @@ -270,25 +270,32 @@ def run_cli(args: Dict):
"If you are trying to run a custom study, make sure "
"you include `-s path/to/study/dir` as an arugment."
)
if args["action"] == "clean":
builder.clean_study(
args["target"],
study_dict,
args["prefix"],
)
elif args["action"] == "build":
if "all" in args["target"]:
builder.clean_and_build_all(study_dict)
else:
for target in args["target"]:
if args["builder"]:
builder.run_single_table_builder(
study_dict[target], args["builder"]
)
else:
builder.clean_and_build_study(
study_dict[target], continue_from=args["continue_from"]
)

if args["action"] == "build":
if "all" in args["target"]:
builder.clean_and_build_all(study_dict)
else:
for target in args["target"]:
if args["builder"]:
builder.run_single_table_builder(
study_dict[target], args["builder"]
)
else:
builder.clean_and_build_study(study_dict[target])

elif args["action"] == "export":
if "all" in args["target"]:
builder.export_all(study_dict, args["data_path"])
else:
for target in args["target"]:
builder.export_study(study_dict[target], args["data_path"])
elif args["action"] == "export":
if "all" in args["target"]:
builder.export_all(study_dict, args["data_path"])
else:
for target in args["target"]:
builder.export_study(study_dict[target], args["data_path"])

# returning the builder for ease of unit testing
return builder
Expand Down
7 changes: 7 additions & 0 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def get_parser() -> argparse.ArgumentParser:
)

add_target_argument(clean)
add_study_dir_argument(clean)
add_verbose_argument(clean)
add_aws_config(clean)
clean.add_argument(
Expand All @@ -141,6 +142,12 @@ def get_parser() -> argparse.ArgumentParser:
add_verbose_argument(build)
add_aws_config(build)

build.add_argument(
"--continue",
dest="continue_from",
help=(argparse.SUPPRESS),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the parens?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copy pasted from elsewhere - why i did that in the first place, ¯\_(ツ)_/¯

)

export = actions.add_parser(
"export", help="Generates files on disk from Athena views"
)
Expand Down
29 changes: 25 additions & 4 deletions cumulus_library/study_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,25 @@ def get_study_prefix(self) -> Optional[str]:
"""
return self._study_config.get("study_prefix")

def get_sql_file_list(self) -> Optional[StrList]:
def get_sql_file_list(self, continue_from: str = None) -> Optional[StrList]:
"""Reads the contents of the sql_config array from the manifest

:returns: An array of sql files from the manifest, or None if not found.
"""
sql_config = self._study_config.get("sql_config", {})
return sql_config.get("file_names", [])
sql_files = sql_config.get("file_names", [])
if continue_from:
match_found = False
for file in sql_files:
print(continue_from.replace(".sql", ""))
print(file.replace(".sql", ""))
dogversioning marked this conversation as resolved.
Show resolved Hide resolved
if continue_from.replace(".sql", "") == file.replace(".sql", ""):
sql_files = sql_files[sql_files.index(file) :]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you could save yourself an index() call by doing for i, file in enumerate(sql_files): above, but 🤷

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ooh, i dig it

match_found = True
break
if not match_found:
sys.exit(f"No tables matching '{continue_from}' found")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You ready for this Python nonsense? Just do else: and skip match_found. For/else lets you run code if the for loop never broke.

i.e.:

for file in sql_files:
    if continue_from.replace(".sql", "") == file.replace(".sql", ""):
        sql_files = sql_files[sql_files.index(file) :]
        break
else:
    sys.exit(f"No tables matching '{continue_from}' found")

Alternatively, you could also do return sql_files[sql_files.index(file) :] and unconditionally exit after the loop.

Or keep it! Whatever works for your sense of code cleanliness.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

taking the for\else approach, just in case there's other post loop stuff that might end up here in the future.

return sql_files

def get_table_builder_file_list(self) -> Optional[StrList]:
"""Reads the contents of the table_builder_config array from the manifest
Expand Down Expand Up @@ -190,6 +202,13 @@ def clean_study(
):
view_table_list.remove(view_table)
# We want to only show a progress bar if we are :not: printing SQL lines
if prefix:
print("The following views/tables were selected by prefix:")
for view_table in view_table_list:
print(f" {view_table[0]}")
confirm = input("Remove these tables? (Y/N)")
dogversioning marked this conversation as resolved.
Show resolved Hide resolved
if confirm.lower() not in ("y", "yes"):
sys.exit("Table cleaning aborted")
with get_progress_bar(disable=verbose) as progress:
task = progress.add_task(
f"Removing {display_prefix} study artifacts...",
Expand Down Expand Up @@ -314,7 +333,9 @@ def run_single_table_builder(
name = f"{name}.py"
self._load_and_execute_builder(name, cursor, schema, verbose, drop_table=True)

def build_study(self, cursor: object, verbose: bool = False) -> List:
def build_study(
self, cursor: object, verbose: bool = False, continue_from: str = None
) -> List:
"""Creates tables in the schema by iterating through the sql_config.file_names

:param cursor: A PEP-249 compatible cursor object
Expand All @@ -323,7 +344,7 @@ def build_study(self, cursor: object, verbose: bool = False) -> List:
:returns: loaded queries (for unit testing only)
"""
queries = []
for file in self.get_sql_file_list():
for file in self.get_sql_file_list(continue_from):
for query in parse_sql(load_text(f"{self._study_path}/{file}")):
queries.append([query, file])
if len(queries) == 0:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,11 @@ def test_count_builder_mapping(
[
"clean",
"-t",
"study_python_counts_valid",
"core",
"--database",
"test",
],
"study_python_counts_valid__",
"core__",
),
(
[
Expand Down
50 changes: 26 additions & 24 deletions tests/test_study_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" tests for study parser against mocks in test_data """
import builtins
import pathlib
from contextlib import nullcontext as does_not_raise
from unittest import mock
Expand Down Expand Up @@ -74,37 +75,38 @@ def test_manifest_data(manifest_key, raises):


@pytest.mark.parametrize(
"schema,verbose,prefix,query_res,raises",
"schema,verbose,prefix,confirm,query_res,raises",
[
("schema", True, None, "study_valid__table", does_not_raise()),
("schema", False, None, "study_valid__table", does_not_raise()),
("schema", None, None, "study_valid__table", does_not_raise()),
(None, True, None, [], pytest.raises(ValueError)),
("schema", None, None, "study_valid__etl_table", does_not_raise()),
("schema", None, None, "study_valid__nlp_table", does_not_raise()),
("schema", None, None, "study_valid__lib_table", does_not_raise()),
("schema", None, None, "study_valid__lib", does_not_raise()),
("schema", None, "foo", "foo_table", does_not_raise()),
("schema", True, None, None, "study_valid__table", does_not_raise()),
("schema", False, None, None, "study_valid__table", does_not_raise()),
("schema", None, None, None, "study_valid__table", does_not_raise()),
(None, True, None, None, [], pytest.raises(ValueError)),
("schema", None, None, None, "study_valid__etl_table", does_not_raise()),
("schema", None, None, None, "study_valid__nlp_table", does_not_raise()),
("schema", None, None, None, "study_valid__lib_table", does_not_raise()),
("schema", None, None, None, "study_valid__lib", does_not_raise()),
("schema", None, "foo", "y", "foo_table", does_not_raise()),
("schema", None, "foo", "n", "foo_table", pytest.raises(SystemExit)),
],
)
@mock.patch("cumulus_library.helper.query_console_output")
def test_clean_study(mock_output, schema, verbose, prefix, query_res, raises):
def test_clean_study(mock_output, schema, verbose, prefix, confirm, query_res, raises):
with raises:
mock_cursor = mock.MagicMock()
mock_cursor.__iter__.return_value = [[query_res]]
parser = StudyManifestParser("./tests/test_data/study_valid/")
tables = parser.clean_study(mock_cursor, schema, verbose, prefix=prefix)
with mock.patch.object(builtins, "input", lambda _: confirm):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fun

mock_cursor = mock.MagicMock()
mock_cursor.__iter__.return_value = [[query_res]]
parser = StudyManifestParser("./tests/test_data/study_valid/")
tables = parser.clean_study(mock_cursor, schema, verbose, prefix=prefix)

if "study_valid__table" not in query_res and prefix is None:
print("wha")
assert not tables
else:
assert tables == [[query_res, "VIEW"]]
if prefix is not None:
assert prefix in mock_cursor.execute.call_args.args[0]
if "study_valid__table" not in query_res and prefix is None:
assert not tables
else:
assert "study_valid__" in mock_cursor.execute.call_args.args[0]
assert mock_output.is_called()
assert tables == [[query_res, "VIEW"]]
if prefix is not None:
assert prefix in mock_cursor.execute.call_args.args[0]
else:
assert "study_valid__" in mock_cursor.execute.call_args.args[0]
assert mock_output.is_called()


@pytest.mark.parametrize(
Expand Down