Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ruff update #272

Merged
merged 2 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
default_install_hook_types: [pre-commit, pre-push]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.4 # if you update this, also update pyproject.toml
rev: v0.5.5 # if you update this, also update pyproject.toml
hooks:
- name: Ruff formatting
id: ruff-format
Expand Down
14 changes: 4 additions & 10 deletions cumulus_library/actions/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,7 @@ def _load_and_execute_builder(
table_builder_class = table_builder_subclasses[0]
table_builder = table_builder_class()
if write_reference_sql:
table_builder.prepare_queries(
config=config, manifest=manifest, parser=db_parser
)
table_builder.prepare_queries(config=config, manifest=manifest, parser=db_parser)
table_builder.comment_queries(doc_str=doc_str)
new_filename = pathlib.Path(f"{filename}").stem + ".sql"
table_builder.write_queries(
Expand Down Expand Up @@ -191,7 +189,7 @@ def run_statistics_builders(
existing_stats = (
config.db.cursor()
.execute(
"SELECT view_name FROM "
"SELECT view_name FROM " # noqa: S608
f"{manifest.get_study_prefix()}__{enums.ProtectedTables.STATISTICS.value}"
)
.fetchall()
Expand Down Expand Up @@ -273,9 +271,7 @@ def build_study(
"""
queries = []
for file in manifest.get_sql_file_list(continue_from):
for query in base_utils.parse_sql(
base_utils.load_text(f"{manifest._study_path}/{file}")
):
for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{file}")):
queries.append([query, file])
if len(queries) == 0:
return []
Expand Down Expand Up @@ -377,9 +373,7 @@ def _execute_build_queries(
"start with a string like `study_prefix__`.",
)
try:
with base_utils.query_console_output(
config.verbose, query[0], progress, task
):
with base_utils.query_console_output(config.verbose, query[0], progress, task):
cursor.execute(query[0])
except Exception as e: # pylint: disable=broad-exception-caught
_query_error(
Expand Down
6 changes: 2 additions & 4 deletions cumulus_library/actions/cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def get_unprotected_stats_view_table(
protected_list = cursor.execute(
f"""SELECT {artifact_type.lower()}_name
FROM {drop_prefix}{enums.ProtectedTables.STATISTICS.value}
WHERE study_name = '{display_prefix}'"""
WHERE study_name = '{display_prefix}'""" # noqa: S608
).fetchall()
for protected_tuple in protected_list:
if protected_tuple in db_contents:
Expand All @@ -66,9 +66,7 @@ def get_unprotected_stats_view_table(
# this check handles athena reporting views as also being tables,
# so we don't waste time dropping things that don't exist
if artifact_type == "TABLE":
if not any(
db_row_tuple[0] in iter_q_and_t for iter_q_and_t in unprotected_list
):
if not any(db_row_tuple[0] in iter_q_and_t for iter_q_and_t in unprotected_list):
unprotected_list.append([db_row_tuple[0], artifact_type])
else:
unprotected_list.append([db_row_tuple[0], artifact_type])
Expand Down
10 changes: 3 additions & 7 deletions cumulus_library/actions/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ def reset_counts_exports(
def _write_chunk(writer, chunk, arrow_schema):
writer.write(
pyarrow.Table.from_pandas(
chunk.sort_values(
by=list(chunk.columns), ascending=False, na_position="first"
),
chunk.sort_values(by=list(chunk.columns), ascending=False, na_position="first"),
preserve_index=False,
schema=arrow_schema,
)
Expand Down Expand Up @@ -70,11 +68,9 @@ def export_study(
table_list,
description=f"Exporting {manifest.get_study_prefix()} data...",
):
query = f"SELECT * FROM {table}"
query = f"SELECT * FROM {table}" # noqa: S608
query = base_utils.update_query_if_schema_specified(query, manifest)
dataframe_chunks, db_schema = config.db.execute_as_pandas(
query, chunksize=chunksize
)
dataframe_chunks, db_schema = config.db.execute_as_pandas(query, chunksize=chunksize)
path.mkdir(parents=True, exist_ok=True)
arrow_schema = pyarrow.schema(config.db.col_pyarrow_types_from_sql(db_schema))
with parquet.ParquetWriter(f"{path}/{table}.parquet", arrow_schema) as p_writer:
Expand Down
12 changes: 3 additions & 9 deletions cumulus_library/actions/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@

def _create_table_from_parquet(archive, file, study_name, config):
try:
parquet_path = pathlib.Path(
archive.extract(file), path=tempfile.TemporaryFile()
)
parquet_path = pathlib.Path(archive.extract(file), path=tempfile.TemporaryFile())
# While convenient to access, this exposes us to panda's type system,
# which is messy - this could be optionally be replaced by pyarrow if it
# becomes problematic.
Expand Down Expand Up @@ -56,13 +54,9 @@ def import_archive(config: base_utils.StudyConfig, *, archive_path: pathlib.Path
files = archive.namelist()
files = [file for file in files if file.endswith(".parquet")]
except zipfile.BadZipFile as e:
raise errors.StudyImportError(
f"File {archive_path} is not a valid archive."
) from e
raise errors.StudyImportError(f"File {archive_path} is not a valid archive.") from e
if not any("__" in file for file in files):
raise errors.StudyImportError(
f"File {archive_path} contains non-study parquet files."
)
raise errors.StudyImportError(f"File {archive_path} contains non-study parquet files.")
study_name = files[0].split("__")[0]
for file in files[1:]:
if file.split("__")[0] != study_name:
Expand Down
7 changes: 2 additions & 5 deletions cumulus_library/actions/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ def upload_files(args: dict):
"""Wrapper to prep files & console output"""
if args["data_path"] is None:
sys.exit(
"No data directory provided - please provide a path to your"
"study export folder."
"No data directory provided - please provide a path to your" "study export folder."
)
file_paths = list(args["data_path"].glob("**/*.parquet"))
if args["target"]:
Expand All @@ -79,9 +78,7 @@ def upload_files(args: dict):
if not args["user"] or not args["id"]:
sys.exit("user/id not provided, please pass --user and --id")
try:
meta_version = next(
filter(lambda x: str(x).endswith("__meta_version.parquet"), file_paths)
)
meta_version = next(filter(lambda x: str(x).endswith("__meta_version.parquet"), file_paths))
version = str(read_parquet(meta_version)["data_package_version"][0])
file_paths.remove(meta_version)
except StopIteration:
Expand Down
13 changes: 4 additions & 9 deletions cumulus_library/apis/umls.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,7 @@ def get_vsac_valuesets(
included_records = all_responses[0].get("compose", {}).get("include", [])
for record in included_records:
if "valueSet" in record:
valueset = self.get_vsac_valuesets(
action=action, url=record["valueSet"][0]
)
valueset = self.get_vsac_valuesets(action=action, url=record["valueSet"][0])
all_responses.append(valueset[0])
return all_responses

Expand Down Expand Up @@ -143,24 +141,21 @@ def download_umls_files(
"url": file_meta["downloadUrl"],
"apiKey": self.api_key,
}
download_res = requests.get(
download_res = requests.get( # noqa: S113
"https://uts-ws.nlm.nih.gov/download", params=download_payload, stream=True
)

with open(path / file_meta["fileName"], "wb") as f:
chunks_read = 0
with base_utils.get_progress_bar() as progress:
task = progress.add_task(
f"Downloading {file_meta['fileName']}", total=None
)
task = progress.add_task(f"Downloading {file_meta['fileName']}", total=None)
for chunk in download_res.iter_content(chunk_size=1024):
f.write(chunk)
chunks_read += 1
progress.update(
task,
description=(
f"Downloading {file_meta['fileName']}: "
f"{chunks_read/1000} MB"
f"Downloading {file_meta['fileName']}: " f"{chunks_read/1000} MB"
),
)
if unzip:
Expand Down
18 changes: 5 additions & 13 deletions cumulus_library/base_table_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,13 @@ def execute_queries(
table_names = []
for query in self.queries:
# Get the first non-whitespace word after create table
table_name = re.search(
'(?i)(?<=create table )(([a-zA-Z0-9_".-]+))', query
)
table_name = re.search('(?i)(?<=create table )(([a-zA-Z0-9_".-]+))', query)

if table_name:
if table_name[0] == "IF":
# Edge case - if we're doing an empty conditional CTAS creation,
# we need to run a slightly different regex
table_name = re.search(
'(?i)(?<=not exists )(([a-zA-Z0-9_".-]+))', query
)
table_name = re.search('(?i)(?<=not exists )(([a-zA-Z0-9_".-]+))', query)

table_name = table_name[0]
table_names.append(table_name)
Expand All @@ -81,20 +77,16 @@ def execute_queries(
for query in self.queries:
try:
query = base_utils.update_query_if_schema_specified(query, manifest)
with base_utils.query_console_output(
config.verbose, query, progress, task
):
with base_utils.query_console_output(config.verbose, query, progress, task):
cursor.execute(query)
except Exception as e: # pylint: disable=broad-exception-caught
sys.exit(
"An error occured executing this query:\n----\n"
f"{query}\n----\n"
f"{e}"
"An error occured executing this query:\n----\n" f"{query}\n----\n" f"{e}"
)

self.post_execution(config, *args, **kwargs)

def post_execution( # noqa: B027 - this looks like, but is not, an abstract method
def post_execution(
self,
config: base_utils.StudyConfig,
*args,
Expand Down
4 changes: 1 addition & 3 deletions cumulus_library/base_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,7 @@ def zip_dir(read_path, write_path, archive_name):
shutil.rmtree(read_path)


def update_query_if_schema_specified(
query: str, manifest: study_manifest.StudyManifest
):
def update_query_if_schema_specified(query: str, manifest: study_manifest.StudyManifest):
if manifest and manifest.get_dedicated_schema():
# External queries in athena require a schema to be specified already, so
# rather than splitting and ending up with a table name like
Expand Down
36 changes: 9 additions & 27 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,7 @@ def clean_and_build_study(
"""
manifest = study_manifest.StudyManifest(target, self.data_path)
try:
builder.run_protected_table_builder(
config=self.get_config(manifest), manifest=manifest
)
builder.run_protected_table_builder(config=self.get_config(manifest), manifest=manifest)
if not continue_from:
log_utils.log_transaction(
config=self.get_config(manifest),
Expand All @@ -111,9 +109,7 @@ def clean_and_build_study(
config=self.get_config(manifest),
manifest=manifest,
)
builder.run_table_builder(
config=self.get_config(manifest), manifest=manifest
)
builder.run_table_builder(config=self.get_config(manifest), manifest=manifest)

else:
log_utils.log_transaction(
Expand All @@ -127,9 +123,7 @@ def clean_and_build_study(
manifest=manifest,
continue_from=continue_from,
)
builder.run_counts_builders(
config=self.get_config(manifest), manifest=manifest
)
builder.run_counts_builders(config=self.get_config(manifest), manifest=manifest)
builder.run_statistics_builders(
config=self.get_config(manifest),
manifest=manifest,
Expand Down Expand Up @@ -170,9 +164,7 @@ def run_matching_table_builder(
)

### Data exporters
def export_study(
self, target: pathlib.Path, data_path: pathlib.Path, archive: bool
) -> None:
def export_study(self, target: pathlib.Path, data_path: pathlib.Path, archive: bool) -> None:
"""Exports aggregates defined in a manifest

:param target: A path to the study directory
Expand All @@ -199,9 +191,7 @@ def generate_study_sql(
:keyword builder: Specify a single builder to generate sql from
"""
manifest = study_manifest.StudyManifest(target)
file_generator.run_generate_sql(
config=self.get_config(manifest), manifest=manifest
)
file_generator.run_generate_sql(config=self.get_config(manifest), manifest=manifest)

def generate_study_markdown(
self,
Expand All @@ -212,9 +202,7 @@ def generate_study_markdown(
:param target: A path to the study directory
"""
manifest = study_manifest.StudyManifest(target)
file_generator.run_generate_markdown(
config=self.get_config(manifest), manifest=manifest
)
file_generator.run_generate_markdown(config=self.get_config(manifest), manifest=manifest)


def get_abs_path(path: str) -> pathlib.Path:
Expand Down Expand Up @@ -324,9 +312,7 @@ def run_cli(args: dict):
elif args["action"] == "build":
for target in args["target"]:
if args["builder"]:
runner.run_matching_table_builder(
study_dict[target], args["builder"]
)
runner.run_matching_table_builder(study_dict[target], args["builder"])
else:
runner.clean_and_build_study(
study_dict[target],
Expand All @@ -350,9 +336,7 @@ def run_cli(args: dict):
if response.lower() != "y":
sys.exit()
for target in args["target"]:
runner.export_study(
study_dict[target], args["data_path"], args["archive"]
)
runner.export_study(study_dict[target], args["data_path"], args["archive"])

elif args["action"] == "import":
for archive in args["archive_path"]:
Expand All @@ -361,9 +345,7 @@ def run_cli(args: dict):

elif args["action"] == "generate-sql":
for target in args["target"]:
runner.generate_study_sql(
study_dict[target], builder=args["builder"]
)
runner.generate_study_sql(study_dict[target], builder=args["builder"])

elif args["action"] == "generate-md":
for target in args["target"]:
Expand Down
20 changes: 5 additions & 15 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,7 @@ def get_parser() -> argparse.ArgumentParser:

# Database export

export = actions.add_parser(
"export", help="Generates files on disk from Athena tables/views"
)
export = actions.add_parser("export", help="Generates files on disk from Athena tables/views")
add_custom_option(export)
add_target_argument(export)
add_study_dir_argument(export)
Expand All @@ -245,9 +243,7 @@ def get_parser() -> argparse.ArgumentParser:

# Database import

importer = actions.add_parser(
"import", help="Recreates a study from an exported archive"
)
importer = actions.add_parser("import", help="Recreates a study from an exported archive")
add_db_config(importer)
add_verbose_argument(importer)
importer.add_argument(
Expand All @@ -258,15 +254,11 @@ def get_parser() -> argparse.ArgumentParser:
)
# Aggregator upload

upload = actions.add_parser(
"upload", help="Bulk uploads data to Cumulus aggregator"
)
upload = actions.add_parser("upload", help="Bulk uploads data to Cumulus aggregator")
add_data_path_argument(upload)
add_target_argument(upload)

upload.add_argument(
"--id", help="Site ID. Default is value of CUMULUS_AGGREGATOR_ID"
)
upload.add_argument("--id", help="Site ID. Default is value of CUMULUS_AGGREGATOR_ID")
upload.add_argument(
"--preview",
default=False,
Expand All @@ -281,9 +273,7 @@ def get_parser() -> argparse.ArgumentParser:
),
default="https://aggregator.smartcumulus.org/upload/",
)
upload.add_argument(
"--user", help="Cumulus user. Default is value of CUMULUS_AGGREGATOR_USER"
)
upload.add_argument("--user", help="Cumulus user. Default is value of CUMULUS_AGGREGATOR_USER")

# Generate a study's template-driven sql

Expand Down
Loading
Loading