Skip to content

Commit

Permalink
v2.0 docs update (#186)
Browse files Browse the repository at this point in the history
* v2.0 docs update

* Updated tests

* markdown generation test

* Low hanging PR fruit

* spacing update

* docs update, data_col ordering

* sqlfluff pedantics

* more sqlfluff pedantics
  • Loading branch information
dogversioning authored Feb 28, 2024
1 parent 10e197a commit 53b5948
Show file tree
Hide file tree
Showing 17 changed files with 925 additions and 170 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ data_export/
.DS_Store
cumulus_library_columns.json
output.sql
*generated.md

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
1 change: 1 addition & 0 deletions cumulus_library/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ source_id = source_id
table_cols = ["a","b"]
table_cols_types = ["varchar", "varchar"]
table_name = test_table
table_names = ["test_table"]
table_suffix = 2024_01_01_11_11_11
target_col_prefix = prefix
target_table = target_table
Expand Down
20 changes: 20 additions & 0 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,22 @@ def generate_study_sql(
parser=self.db.parser(),
)

def generate_study_markdown(
self,
target: pathlib.Path,
) -> None:
"""Materializes study sql from templates
:param target: A path to the study directory
"""
studyparser = study_parser.StudyManifestParser(target)
studyparser.run_generate_markdown(
self.cursor,
self.schema_name,
verbose=self.verbose,
parser=self.db.parser(),
)


def get_abs_posix_path(path: str) -> pathlib.Path:
"""Convenience method for handling abs vs rel paths"""
Expand Down Expand Up @@ -373,6 +389,10 @@ def run_cli(args: dict):
elif args["action"] == "generate-sql":
for target in args["target"]:
runner.generate_study_sql(study_dict[target])

elif args["action"] == "generate-md":
for target in args["target"]:
runner.generate_study_markdown(study_dict[target])
finally:
db_backend.close()

Expand Down
11 changes: 11 additions & 0 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,15 @@ def get_parser() -> argparse.ArgumentParser:
add_target_argument(generate)
add_study_dir_argument(generate)
add_db_config(generate)

# Generate markdown tables for documentation
markdown = actions.add_parser(
"generate-md", help="Generates markdown tables for study documentation"
)
add_target_argument(markdown)
add_study_dir_argument(markdown)
add_data_path_argument(markdown)
add_db_config(markdown)
add_verbose_argument(markdown)

return parser
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ WITH temp_observation AS (
odvcc.code AS valuecodeableconcept_code,
odvcc.code_system AS valuecodeableconcept_code_system,
odvcc.display AS valuecodeableconcept_display,
o.component,
o.referencerange,
o.valuequantity,
o.subject.reference AS subject_ref,
Expand All @@ -52,15 +51,12 @@ SELECT
id,
category_code,
category_code_system,
component,
status,
observation_code,
observation_code_system,
interpretation_code,
interpretation_code_system,
interpretation_display,
referencerange,
valuequantity,
valuecodeableconcept_code,
valuecodeableconcept_code_system,
valuecodeableconcept_display,
Expand Down
3 changes: 0 additions & 3 deletions cumulus_library/studies/core/observation_type.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,10 @@ SELECT
co.valuecodeableconcept_code,
co.valuecodeableconcept_code_system,
co.valuecodeableconcept_display,
co.component,
co.status,
co.interpretation_code,
co.interpretation_code_system,
co.interpretation_display,
co.referencerange,
co.valuequantity,
co.obs_date,
co.obs_week,
co.obs_month,
Expand Down
53 changes: 53 additions & 0 deletions cumulus_library/study_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import inspect
import pathlib
import sys
import typing

import pandas
import pytablewriter
import toml
from rich.progress import Progress, TaskID, track

Expand Down Expand Up @@ -552,6 +555,56 @@ def run_generate_sql(
verbose=verbose,
)

def run_generate_markdown(
self,
cursor: databases.DatabaseCursor,
schema: str,
parser: databases.DatabaseParser = None,
verbose: bool = False,
) -> None:
"""Generates reference SQL from builders listed in the manifest
:param cursor: A DatabaseCursor object
:param schema: The name of the schema to write tables to
:param verbose: toggle from progress bar to query output
"""

query = base_templates.get_show_tables(
schema_name=schema, prefix=f"{self.get_study_prefix()}__"
)

tables = [x[0] for x in cursor.execute(query).fetchall()]
query = base_templates.get_column_datatype_query(
schema_name=schema, table_names=tables, include_table_names=True
)
study_df = pandas.DataFrame(
cursor.execute(query).fetchall(), columns=["Column", "Type", "Table"]
)
with open(
self._study_path / f"{self.get_study_prefix()}_generated.md", "w"
) as f:
table_list = sorted(study_df["Table"].unique())
count_tables = [t for t in table_list if "__count_" in t]
base_tables = [t for t in table_list if "__count_" not in t]
if len(count_tables) > 0:
f.write(f"## {self.get_study_prefix()} count tables\n\n")
for table in count_tables:
self._write_md_table(table, study_df, f)
if len(base_tables) > 0:
f.write(f"## {self.get_study_prefix()} base tables\n\n")
for table in base_tables:
self._write_md_table(table, study_df, f)

def _write_md_table(self, name: str, df: pandas.DataFrame, file: typing.IO):
table_df = df[df["Table"] == name].drop("Table", axis=1)
table_df = table_df.assign(Description="")
writer = pytablewriter.MarkdownTableWriter(dataframe=table_df)
writer.table_name = f"{name}\n"
writer.set_indent_level(2)
writer.stream = file
writer.write_table()
file.write("\n\n")

def build_study(
self,
cursor: databases.DatabaseCursor,
Expand Down
12 changes: 10 additions & 2 deletions cumulus_library/template_sql/base_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,21 @@ def get_codeable_concept_denormalize_query(
)


def get_column_datatype_query(schema_name: str, table_name: str, column_names: list):
def get_column_datatype_query(
schema_name: str,
table_names: str | list,
column_names: list | None = None,
include_table_names: bool | None = False,
):
"""Gets the in-database data representation of a given column"""
if isinstance(table_names, str):
table_names = [table_names]
return get_base_template(
"column_datatype",
schema_name=schema_name,
table_name=table_name,
table_names=table_names,
column_names=column_names,
include_table_names=include_table_names,
)


Expand Down
9 changes: 7 additions & 2 deletions cumulus_library/template_sql/column_datatype.sql.jinja
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
SELECT
column_name,
data_type
{%- if include_table_names -%},
table_name --noqa: LT02
{%- endif %}
FROM information_schema.columns
WHERE
table_schema = '{{ schema_name }}'
AND table_name = '{{ table_name|lower }}'
AND LOWER(column_name) IN ('{{ column_names|join("', '")|lower }}') --noqa: LT05
AND table_name IN ('{{ table_names|join("', '")|lower }}')
{%- if column_names %}
AND LOWER(column_name) IN ('{{ column_names|join("', '")|lower }}') --noqa: LT02,LT05
{%- endif %}
Loading

0 comments on commit 53b5948

Please sign in to comment.