Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added archival mode #183

Merged
merged 4 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions cumulus_library/base_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import datetime
import json
import os
import shutil
import zipfile
from contextlib import contextmanager

from rich import progress
Expand Down Expand Up @@ -82,3 +84,18 @@ def get_tablename_safe_iso_timestamp() -> str:
iso_timestamp = get_utc_datetime().isoformat()
safe_timestamp = iso_timestamp.replace(":", "_").replace("-", "_").replace("+", "_")
return safe_timestamp


def zip_dir(read_path, write_path, archive_name):
"""Moves a directory to an archive"""
file_list = [file for file in read_path.glob("**/*") if file.is_file()]
timestamp = get_utc_datetime().isoformat().replace("+00:00", "Z")
with zipfile.ZipFile(
f"{write_path}/{archive_name}_{timestamp}.zip",
"w",
zipfile.ZIP_DEFLATED,
) as f:
for file in file_list:
f.write(file, file.relative_to(read_path))
file.unlink()
shutil.rmtree(read_path)
34 changes: 27 additions & 7 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,20 +193,22 @@ def clean_and_build_all(self, study_dict: dict, stats_build: bool) -> None:
self.clean_and_build_study(study_dict[key], stats_build=stats_build)

### Data exporters
def export_study(self, target: pathlib.Path, data_path: pathlib.Path) -> None:
def export_study(
self, target: pathlib.Path, data_path: pathlib.Path, archive: bool
) -> None:
"""Exports aggregates defined in a manifest

:param target: A path to the study directory
"""
if data_path is None:
sys.exit("Missing destination - please provide a path argument.")
studyparser = study_parser.StudyManifestParser(target, data_path)
studyparser.export_study(self.db, data_path)
studyparser.export_study(self.db, self.schema_name, data_path, archive)

def export_all(self, study_dict: dict, data_path: pathlib.Path):
def export_all(self, study_dict: dict, data_path: pathlib.Path, archive: bool):
"""Exports all defined count tables to disk"""
for key in study_dict.keys():
self.export_study(study_dict[key], data_path)
self.export_study(study_dict[key], data_path, archive)

def generate_study_sql(
self,
Expand Down Expand Up @@ -294,6 +296,7 @@ def get_studies_by_manifest_path(path: pathlib.Path) -> dict:

def run_cli(args: dict):
"""Controls which library tasks are run based on CLI arguments"""
console = rich.console.Console()
if args["action"] == "create":
create_template(args["create_dir"])

Expand All @@ -307,7 +310,7 @@ def run_cli(args: dict):
runner = StudyRunner(db_backend, data_path=args.get("data_path"))
if args.get("verbose"):
runner.verbose = True
print("Testing connection to database...")
console.print("[italic] Connecting to database...")
runner.cursor.execute("SHOW DATABASES")
study_dict = get_study_dict(args["study_dir"])
if "prefix" not in args.keys():
Expand Down Expand Up @@ -344,11 +347,28 @@ def run_cli(args: dict):
)

elif args["action"] == "export":
if args["archive"]:
Comment on lines 349 to +350
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I initially had a thought that maybe this should be a toplevel action instead of a flag on export but after thinking about it more, decided you had the right of it. Just talking out loud - in case you had thoughts on that too. But I think this makes sense.

Copy link
Contributor Author

@dogversioning dogversioning Feb 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i started off that way too, but in digging in, since it's 90% the same, it was A) faster to get it in here, B) a little DRYer, and C) i kind of like keeping the number of base level cli verbs low.

warning_text = (
"🚨[bold red] This will export all study tables [/bold red]🚨"
"\n\nDepending on your study definition, this data may contain "
"data that would be characterized as a [italic]limited data "
"set[/italic], primarily dates, on a per patient level.\n\n"
"[bold]By doing this, you are assuming the responsibility for "
"meeting your organization's security requirements for "
"storing this data in a secure manager.[/bold]\n\n"
"Type Y to proceed, or any other value to quit.\n"
)
console.print(warning_text)
response = input()
if response.lower() != "y":
sys.exit()
if "all" in args["target"]:
runner.export_all(study_dict, args["data_path"])
runner.export_all(study_dict, args["data_path"], args["archive"])
else:
for target in args["target"]:
runner.export_study(study_dict[target], args["data_path"])
runner.export_study(
study_dict[target], args["data_path"], args["archive"]
)

elif args["action"] == "generate-sql":
for target in args["target"]:
Expand Down
21 changes: 5 additions & 16 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,21 +127,6 @@ def get_parser() -> argparse.ArgumentParser:
dest="action",
)

# Study creation

create = actions.add_parser(
"create", help="Create a study instance from a template"
)
create.add_argument(
"create_dir",
default="./",
nargs="?",
help=(
"The the directory the study will be created in. Default is "
"the current directory."
),
)

# Database cleaning

clean = actions.add_parser(
Expand Down Expand Up @@ -204,7 +189,11 @@ def get_parser() -> argparse.ArgumentParser:
add_data_path_argument(export)
add_verbose_argument(export)
add_db_config(export)

export.add_argument(
"--archive",
action="store_true",
help="Generates archive of :all: study tables, ignoring manifest export list.",
)
# Aggregator upload

upload = actions.add_parser(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ temp_encounter AS (

SELECT DISTINCT
e.id,
e.class AS enc_class,
ac.code AS enc_class_code,
ac.display AS enc_class_display,
e.status,
Expand Down
110 changes: 100 additions & 10 deletions cumulus_library/studies/core/reference_sql/builder_condition.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
-- noqa: disable=all
-- This sql was autogenerated as a reference example using the library CLI.
-- Its format is tied to the specific database it was run against, and it may not
-- be correct for all databases. Use the CLI's build option to derive the best SQL
-- for your dataset.
-- This sql was autogenerated as a reference example using the library
-- CLI.Its format is tied to the specific database it was run against,
-- and it may not be correct for all databases. Use the CLI's build
-- option to derive the best SQL for your dataset.

-- ###########################################################

Expand All @@ -20,7 +20,7 @@ CREATE TABLE core__condition_codable_concepts_display AS (
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system = 'http://snomed.info/sct'
u.codeable_concept.system LIKE 'http://snomed.info/sct'
), --noqa: LT07

system_code_1 AS (
Expand All @@ -34,7 +34,7 @@ CREATE TABLE core__condition_codable_concepts_display AS (
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-10-cm'
u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-10-cm'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm lightly curious about the performance cost of this. Correctness is more important obvi, but if this is noticeably slower, I imagine the jinja could switch to like if it detects a wildcard.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is a half baked idea in my head of moving the system extraction logic into here in some way, or in some other way doing db introspection to help build these queries out in a more nuanced fashion.

but - only doing LIKE when required seems like low hanging fruit in the interim.

), --noqa: LT07

system_code_2 AS (
Expand All @@ -48,7 +48,63 @@ CREATE TABLE core__condition_codable_concepts_display AS (
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system = 'http://hl7.org/fhir/sid/icd-9-cm'
u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-9-cm'
), --noqa: LT07

system_code_3 AS (
SELECT DISTINCT
s.id AS id,
'3' AS priority,
u.codeable_concept.code AS code,
u.codeable_concept.display AS display,
u.codeable_concept.system AS code_system
FROM
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system LIKE 'http://hl7.org/fhir/sid/icd-9-cm/diagnosis'
), --noqa: LT07

system_code_4 AS (
SELECT DISTINCT
s.id AS id,
'4' AS priority,
u.codeable_concept.code AS code,
u.codeable_concept.display AS display,
u.codeable_concept.system AS code_system
FROM
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system LIKE 'urn:oid:1.2.840.114350.1.13.71.2.7.2.728286'
), --noqa: LT07

system_code_5 AS (
SELECT DISTINCT
s.id AS id,
'5' AS priority,
u.codeable_concept.code AS code,
u.codeable_concept.display AS display,
u.codeable_concept.system AS code_system
FROM
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system LIKE 'urn:oid:1.2.840.114350.1.13.71.2.7.4.698084.10375'
), --noqa: LT07

system_code_6 AS (
SELECT DISTINCT
s.id AS id,
'6' AS priority,
u.codeable_concept.code AS code,
u.codeable_concept.display AS display,
u.codeable_concept.system AS code_system
FROM
condition AS s,
UNNEST(s.code.coding) AS u (codeable_concept)
WHERE
u.codeable_concept.system LIKE 'http://terminology.hl7.org/CodeSystem/data-absent-reason'
), --noqa: LT07

union_table AS (
Expand All @@ -75,6 +131,39 @@ CREATE TABLE core__condition_codable_concepts_display AS (
code,
display
FROM system_code_2
UNION
SELECT
id,
priority,
code_system,
code,
display
FROM system_code_3
UNION
SELECT
id,
priority,
code_system,
code,
display
FROM system_code_4
UNION
SELECT
id,
priority,
code_system,
code,
display
FROM system_code_5
UNION
SELECT
id,
priority,
code_system,
code,
display
FROM system_code_6

),

partitioned_table AS (
Expand Down Expand Up @@ -127,6 +216,7 @@ CREATE TABLE core__condition_codable_concepts_all AS (
code,
display
FROM system_code_0

)
SELECT
id,
Expand Down Expand Up @@ -154,11 +244,11 @@ WITH temp_condition AS (
cca.code_system,
cca.display,
date(from_iso8601_timestamp(c.recordeddate)) AS recordeddate,
date_trunc('week', date(from_iso8601_timestamp(c.recordeddate)))
date_trunc('week', date(from_iso8601_timestamp(c."recordeddate")))
AS recordeddate_week,
date_trunc('month', date(from_iso8601_timestamp(c.recordeddate)))
date_trunc('month', date(from_iso8601_timestamp(c."recordeddate")))
AS recordeddate_month,
date_trunc('year', date(from_iso8601_timestamp(c.recordeddate)))
date_trunc('year', date(from_iso8601_timestamp(c."recordeddate")))
AS recordeddate_year
FROM condition AS c
LEFT JOIN core__condition_codable_concepts_all AS cca ON c.id = cca.id
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
-- noqa: disable=all
-- This sql was autogenerated as a reference example using the library CLI.
-- Its format is tied to the specific database it was run against, and it may not
-- be correct for all databases. Use the CLI's build option to derive the best SQL
-- for your dataset.
-- This sql was autogenerated as a reference example using the library
-- CLI.Its format is tied to the specific database it was run against,
-- and it may not be correct for all databases. Use the CLI's build
-- option to derive the best SQL for your dataset.

-- ###########################################################

Expand All @@ -27,6 +27,7 @@ CREATE TABLE core__documentreference_dn_type AS (
code,
display
FROM system_type_0

)
SELECT
id,
Expand All @@ -51,13 +52,13 @@ WITH temp_documentreference AS (
dr.context,
dr.subject.reference AS subject_ref,
dr.context.period.start AS author_date,
date_trunc('day', date(from_iso8601_timestamp(dr.context.period.start)))
date_trunc('day', date(from_iso8601_timestamp(dr."context"."period"."start")))
AS author_day,
date_trunc('week', date(from_iso8601_timestamp(dr.context.period.start)))
date_trunc('week', date(from_iso8601_timestamp(dr."context"."period"."start")))
AS author_week,
date_trunc('month', date(from_iso8601_timestamp(dr.context.period.start)))
date_trunc('month', date(from_iso8601_timestamp(dr."context"."period"."start")))
AS author_month,
date_trunc('year', date(from_iso8601_timestamp(dr.context.period.start)))
date_trunc('year', date(from_iso8601_timestamp(dr."context"."period"."start")))
AS author_year,
cdrt.code,
cdrt.code_system,
Expand Down
Loading
Loading