Skip to content

Commit

Permalink
PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Feb 2, 2024
1 parent ff299df commit 7b79c8e
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 44 deletions.
5 changes: 2 additions & 3 deletions cumulus_library/base_table_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,8 @@ def execute_queries(
)
for query in self.queries:
try:
helper.query_console_verbose(verbose, query)
cursor.execute(query)
helper.query_console_progress(verbose, progress, task)
with helper.query_console_output(verbose, query, progress, task):
cursor.execute(query)
except Exception as e: # pylint: disable=broad-exception-caught
sys.exit(e)

Expand Down
8 changes: 2 additions & 6 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,8 @@ def run_cli(args: Dict):
runner.export_study(study_dict[target], args["data_path"])

elif args["action"] == "generate-sql":
if "all" in args["target"]:
for target in study_dict.keys(): # pylint: disable= C0206, C0201
runner.generate_all_sql(study_dict[target])
else:
for target in args["target"]:
runner.generate_study_sql(study_dict[target])
for target in args["target"]:
runner.generate_study_sql(study_dict[target])
finally:
db_backend.close()

Expand Down
24 changes: 15 additions & 9 deletions cumulus_library/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,15 +217,20 @@ def __init__(self, db_file: str):
duckdb.typing.TIMESTAMP,
)
self.connection.create_function(
# This is in support of the md5 function - duckdb's implementation of
# md5 just takes a string (very reasonable!), but trino's implementation
# takes a varbinary type (which duckdb does not support), so we have to
# run a string through a UTF8 conversion in production environments.
# For duckdb's purposes, we'll just return the argument passed with no
# modifications.
# When trying to calculate an MD5 hash in Trino/Athena, the implementation
# expects to recieve a varbinary type, so if you're hashing a string,
# you would invoke it like `SELECT md5(to_utf8(string_col)) from table`.
#
# DuckDB's md5() function accepts a varchar instead, and does not have a
# to_utf() function or varbinary type, so we patch this with a UDF that
# just provides back the original string. As a result, these functions
# have different signatures, but for cases like this where you're
# conforming an argument to another function, it provides appropriate
# function mocking
#
# NOTE: currently we do not have a use case beyond experimentation where
# this provides a benefit. Until we do, it is not required to support this
# in other DatabaseBackend implementations.
# using MD5 hashes provide a benefit. Until we do, it is not required to
# support this in other DatabaseBackend implementations.
"to_utf8",
self._compat_to_utf8,
None,
Expand Down Expand Up @@ -261,7 +266,8 @@ def _compat_date(
raise ValueError("Unexpected date() argument:", type(value), value)

@staticmethod
def _compat_to_utf8(value: str) -> Optional[datetime.date]:
def _compat_to_utf8(value: Optional[str]) -> Optional[datetime.date]:
"""See the create_function() call for to_utf8 for more background"""
return value

@staticmethod
Expand Down
19 changes: 10 additions & 9 deletions cumulus_library/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import datetime
import os
import json

from contextlib import contextmanager
from typing import List

from rich import progress


Expand Down Expand Up @@ -48,19 +51,17 @@ def list_coding(code_display: dict, system=None) -> List[dict]:
return as_list


def query_console_progress(
verbose: bool, progress_bar: progress.Progress, task: progress.Task
@contextmanager
def query_console_output(
verbose: bool, query: str, progress_bar: progress.Progress, task: progress.Task
):
"""Convenience function for updating progress bar"""
if not verbose:
progress_bar.advance(task)


def query_console_verbose(verbose: bool, query: str):
"""Convenience function for printing verbose queries"""
"""Convenience context manager for handling console output"""
if verbose:
print()
print(query)
yield
if not verbose:
progress_bar.advance(task)


def get_progress_bar(**kwargs) -> progress.Progress:
Expand Down
14 changes: 8 additions & 6 deletions cumulus_library/study_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,9 +311,10 @@ def _execute_drop_queries(
drop_view_table = templates.get_drop_view_table(
name=view_table[0], view_or_table=view_table[1]
)
helper.query_console_verbose(verbose, drop_view_table)
cursor.execute(drop_view_table)
helper.query_console_progress(verbose, progress, task)
with helper.query_console_output(
verbose, drop_view_table, progress, task
) as v:
cursor.execute(drop_view_table)

def _load_and_execute_builder(
self,
Expand Down Expand Up @@ -653,9 +654,10 @@ def _execute_build_queries(
"start with a string like `study_prefix__`.",
)
try:
helper.query_console_verbose(verbose, query[0])
cursor.execute(query[0])
helper.query_console_progress(verbose, progress, task)
with helper.query_console_output(
verbose, query[0], progress, task
) as v:
cursor.execute(query[0])
except Exception as e: # pylint: disable=broad-exception-caught
self._query_error(
query,
Expand Down
14 changes: 7 additions & 7 deletions cumulus_library/template_sql/statistics/count.sql.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ infrastructure for future/moving resource specific logic to template generator -
{#- implicit null -#}
{%- endmacro -%}

{%- macro cols_delenated_list(table_cols, fhir_resource)-%}
{%- macro cols_delineated_list(table_cols, fhir_resource)-%}
{% for col in table_cols %}
"{{ column_or_alias(col) }}"
{{- comma_delineate(loop) }}
Expand Down Expand Up @@ -145,15 +145,15 @@ CREATE TABLE {{ table_name }} AS (
tertiary_powerset AS(
SELECT
count(DISTINCT {{tertiary}}) AS cnt_{{tertiary}},
{{-cols_delenated_list(table_cols, fhir_resource)}},
{{-cols_delineated_list(table_cols, fhir_resource)}},
concat_ws(
'-',
{{-cols_coalesced_list(table_cols, fhir_resource)}}
) AS id
FROM null_replacement
GROUP BY
cube(
{{-cols_delenated_list(table_cols, fhir_resource)}}
{{-cols_delineated_list(table_cols, fhir_resource)}}
)

),
Expand All @@ -162,31 +162,31 @@ CREATE TABLE {{ table_name }} AS (
secondary_powerset AS (
SELECT
count(DISTINCT {{secondary}}) AS cnt_{{secondary}},
{{-cols_delenated_list(table_cols, fhir_resource)}},
{{-cols_delineated_list(table_cols, fhir_resource)}},
concat_ws(
'-',
{{-cols_coalesced_list(table_cols, fhir_resource)}}
) AS id
FROM null_replacement
GROUP BY
cube(
{{-cols_delenated_list(table_cols, fhir_resource)}}
{{-cols_delineated_list(table_cols, fhir_resource)}}
)
),
{%- endif %}

powerset AS (
SELECT
count(DISTINCT subject_ref) AS cnt_subject_ref,
{{-cols_delenated_list(table_cols, fhir_resource)}},
{{-cols_delineated_list(table_cols, fhir_resource)}},
concat_ws(
'-',
{{-cols_coalesced_list(table_cols, fhir_resource)}}
) AS id
FROM null_replacement
GROUP BY
cube(
{{-cols_delenated_list(table_cols, fhir_resource)}}
{{-cols_delineated_list(table_cols, fhir_resource)}}
)
)

Expand Down
2 changes: 1 addition & 1 deletion docs/creating-studies.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ styling.
- All your tables **must** start with a string like `my_study__`.
- Relatedly, **`__` is a reserved character string**. Your table names should have
exactly one of these. We :might: add other use cases for these in the future,
but as of this writing we don't plan to. C
but as of this writing we don't plan to.
- We have **three reserved post-study prefrix substrings: `etl_`, `nlp_`, and
`lib_`** so that we don't drop tables created by other processes. These are fine
to use elsewhere; `my_study__nlp_counts` would cause an error, but
Expand Down
9 changes: 9 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@
from tests.conftest import ResourceTableIdPos as idpos # pylint: disable=unused-import


def get_sorted_table_data(cursor, table):
num_cols = cursor.execute(
"SELECT count(*) FROM information_schema.columns " f"WHERE table_name='{table}'"
).fetchone()[0]
return cursor.execute(
f"SELECT * FROM '{table}' ORDER BY " f"{','.join(map(str, range(1,num_cols)))}"
)


@pytest.mark.parametrize(
"table",
[
Expand Down
4 changes: 2 additions & 2 deletions tests/test_counts_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,6 @@
def test_count_query(expected, kwargs):
query = get_count_query("test_table", "test_source", ["age", "sex"], **kwargs)
# Snippet for getting updated template output
with open("output.sql", "w") as f:
f.write(query)
# with open("output.sql", "w") as f:
# f.write(query)
assert query == expected
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
cnt,cond_category_code,cond_month,code_display
cnt,cond_category_code,cond_month,cond_code_display
15,,,
15,encounter-diagnosis,,

0 comments on commit 7b79c8e

Please sign in to comment.