Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2.0 docs update #186

Merged
merged 8 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ data_export/
.DS_Store
cumulus_library_columns.json
output.sql
*generated.md

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
1 change: 1 addition & 0 deletions cumulus_library/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ source_id = source_id
table_cols = ["a","b"]
table_cols_types = ["varchar", "varchar"]
table_name = test_table
table_names = ["test_table"]
table_suffix = 2024_01_01_11_11_11
target_col_prefix = prefix
target_table = target_table
Expand Down
20 changes: 20 additions & 0 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,22 @@ def generate_study_sql(
parser=self.db.parser(),
)

def generate_study_markdown(
self,
target: pathlib.Path,
) -> None:
"""Materializes study sql from templates

:param target: A path to the study directory
"""
studyparser = study_parser.StudyManifestParser(target)
studyparser.run_generate_markdown(
self.cursor,
self.schema_name,
verbose=self.verbose,
parser=self.db.parser(),
)


def get_abs_posix_path(path: str) -> pathlib.Path:
"""Convenience method for handling abs vs rel paths"""
Expand Down Expand Up @@ -373,6 +389,10 @@ def run_cli(args: dict):
elif args["action"] == "generate-sql":
for target in args["target"]:
runner.generate_study_sql(study_dict[target])

elif args["action"] == "generate-md":
dogversioning marked this conversation as resolved.
Show resolved Hide resolved
for target in args["target"]:
runner.generate_study_markdown(study_dict[target])
finally:
db_backend.close()

Expand Down
11 changes: 11 additions & 0 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,15 @@ def get_parser() -> argparse.ArgumentParser:
add_target_argument(generate)
add_study_dir_argument(generate)
add_db_config(generate)

# Generate markdown tables for documentation
markdown = actions.add_parser(
"generate-md", help="Generates markdown tables for study documentation"
)
add_target_argument(markdown)
add_study_dir_argument(markdown)
add_data_path_argument(markdown)
add_db_config(markdown)
add_verbose_argument(markdown)

return parser
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ WITH temp_observation AS (
odvcc.code AS valuecodeableconcept_code,
odvcc.code_system AS valuecodeableconcept_code_system,
odvcc.display AS valuecodeableconcept_display,
o.component,
o.referencerange,
o.valuequantity,
o.subject.reference AS subject_ref,
Expand All @@ -52,15 +51,12 @@ SELECT
id,
category_code,
category_code_system,
component,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the fields removed here were not flattened - i'll swing back around on the required/must support fields on these.

status,
observation_code,
observation_code_system,
interpretation_code,
interpretation_code_system,
interpretation_display,
referencerange,
valuequantity,
valuecodeableconcept_code,
valuecodeableconcept_code_system,
valuecodeableconcept_display,
Expand Down
3 changes: 0 additions & 3 deletions cumulus_library/studies/core/observation_type.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,10 @@ SELECT
co.valuecodeableconcept_code,
co.valuecodeableconcept_code_system,
co.valuecodeableconcept_display,
co.component,
co.status,
co.interpretation_code,
co.interpretation_code_system,
co.interpretation_display,
co.referencerange,
co.valuequantity,
co.obs_date,
co.obs_week,
co.obs_month,
Expand Down
53 changes: 53 additions & 0 deletions cumulus_library/study_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import inspect
import pathlib
import sys
import typing

import pandas
import pytablewriter
import toml
from rich.progress import Progress, TaskID, track

Expand Down Expand Up @@ -552,6 +555,56 @@ def run_generate_sql(
verbose=verbose,
)

def run_generate_markdown(
self,
cursor: databases.DatabaseCursor,
schema: str,
parser: databases.DatabaseParser = None,
verbose: bool = False,
) -> None:
"""Generates reference SQL from builders listed in the manifest

:param cursor: A DatabaseCursor object
:param schema: The name of the schema to write tables to
:param verbose: toggle from progress bar to query output
"""

query = base_templates.get_show_tables(
schema_name=schema, prefix=f"{self.get_study_prefix()}__"
)

tables = [x[0] for x in cursor.execute(query).fetchall()]
query = base_templates.get_column_datatype_query(
schema_name=schema, table_names=tables, include_table_names=True
)
study_df = pandas.DataFrame(
cursor.execute(query).fetchall(), columns=["Column", "Type", "Table"]
)
with open(
self._study_path / f"{self.get_study_prefix()}_generated.md", "w"
) as f:
table_list = sorted(study_df["Table"].unique())
count_tables = [t for t in table_list if "__count_" in t]
base_tables = [t for t in table_list if "__count_" not in t]
if len(count_tables) > 0:
f.write(f"## {self.get_study_prefix()} count tables\n\n")
for table in count_tables:
self._write_md_table(table, study_df, f)
if len(base_tables) > 0:
f.write(f"## {self.get_study_prefix()} base tables\n\n")
for table in base_tables:
self._write_md_table(table, study_df, f)

def _write_md_table(self, name: str, df: pandas.DataFrame, file: typing.IO):
table_df = df[df["Table"] == name].drop("Table", axis=1)
table_df = table_df.assign(Description="")
writer = pytablewriter.MarkdownTableWriter(dataframe=table_df)
writer.table_name = f"{name}\n"
writer.set_indent_level(2)
writer.stream = file
writer.write_table()
file.write("\n\n")

def build_study(
self,
cursor: databases.DatabaseCursor,
Expand Down
12 changes: 10 additions & 2 deletions cumulus_library/template_sql/base_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,21 @@ def get_codeable_concept_denormalize_query(
)


def get_column_datatype_query(schema_name: str, table_name: str, column_names: list):
def get_column_datatype_query(
schema_name: str,
table_names: str | list,
column_names: list | None = None,
include_table_names: bool | None = False,
):
"""Gets the in-database data representation of a given column"""
if isinstance(table_names, str):
table_names = [table_names]
return get_base_template(
"column_datatype",
schema_name=schema_name,
table_name=table_name,
table_names=table_names,
column_names=column_names,
include_table_names=include_table_names,
)


Expand Down
9 changes: 7 additions & 2 deletions cumulus_library/template_sql/column_datatype.sql.jinja
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
SELECT
column_name,
data_type
{%- if include_table_names -%},
table_name --noqa: LT02
{%- endif %}
FROM information_schema.columns
WHERE
table_schema = '{{ schema_name }}'
AND table_name = '{{ table_name|lower }}'
AND LOWER(column_name) IN ('{{ column_names|join("', '")|lower }}') --noqa: LT05
AND table_name IN ('{{ table_names|join("', '")|lower }}')
{%- if column_names %}
AND LOWER(column_name) IN ('{{ column_names|join("', '")|lower }}') --noqa: LT02,LT05
{%- endif %}
Loading
Loading