Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored remaining core tables #166

Merged
merged 8 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .sqlfluffignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
/scratch/
# This is a common destination for debugging sql generation
output.sql

# These files have an error which is related to getting a two level deep
# date column, which is likely a Sqlfluff error
documentreference.sql.jinja
encounter.sql.jinja

# This file has namespace collisions with vars in other sqlfluff templates.
# Ignoring for now - could be addressed with an in-folder .sqlfluff config
# or by a refactor of variable names
count.sql.jinja
7 changes: 4 additions & 3 deletions cumulus_library/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,27 @@ code_system_tables = [{table_name":"hasarray","column_name":"acol","is_bare_codi
column_name = 'bar'
column_names = ['foo', 'bar']
conditions = ["1 > 0", "1 < 2"]
config = {"medication_datasources" : {"by_contained_ref" : True, "by_external_ref" : True}, 'has_userselected': False}
count_ref = count_ref
count_table = count_table
dataset = [["foo","foo"],["bar","bar"]]
dependent_variable = is_flu
ext_systems = ["omb", "text"]
field = 'column_name'
filter_table = filter_table
fhir_extension = fhir_extension
filter_resource = True
fhir_extension = condition
fhir_resource = patient
id = 'id'
join_cols_by_table = { "join_table": { "join_id": "enc_ref","included_cols": [["a"], ["b", "c"]]}}
join_id = subject_ref
medication_datasources = {"by_contained_ref" : True, "by_external_ref" : True}
neg_source_table = neg_source_table
output_table_name = 'created_table'
prefix = Test
primary_ref = encounter_ref
pos_source_table = pos_source_table
schema_name = test_schema
schema = {'condition': {'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'clinicalstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'id': True, 'recordeddate': True, 'verificationstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'encounter': {'reference': True, 'display': False, 'type': True}}}
schema = {'condition': {'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'clinicalstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'id': True, 'recordeddate': True, 'verificationstatus': {'coding': True, 'code': True, 'display': True, 'system': True, 'userSelected': True, 'version': True, 'text': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'encounter': {'reference': True, 'display': False, 'type': True}}, 'documentreference': {'id': True, 'type': True, 'status': True, 'docstatus': True, 'context': {'period': True, 'start': True}, 'subject': {'reference': True}}, 'encounter': {'status': True, 'period': {'start': True, 'end': False}, 'class': {'code': True, 'system': True, 'display': False, 'userSelected': True, 'version': True}, 'subject': {'reference': True, 'display': False, 'type': True}, 'id': True}, 'medicationrequest': {'id': True, 'status': True, 'intent': True, 'authoredon': True, 'category': {'code': True, 'system': True, 'display': False}, 'subject': {'reference': True}}, 'observation': {'id': True, 'category': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'status': True, 'code': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'interpretation': {'coding': True, 'code': True, 'display': True, 'system': True, 'text': True}, 'referencerange': {'low': False, 'high': False, 'normalvalue': False, 'type': False, 'appliesto': False, 'age': False, 'text': True}, 'effectivedatetime': True, 'valuequantity': {'value': True, 'comparator': False, 'unit': False, 'system': False, 'code': False}, 'valuecodeableconcept': {'coding': True, 'code': True, 'display': True, 'system': True}, 'subject': {'reference': True}, 'encounter': {'reference': True}}, 'patient': {'id': True, 'gender': True, 'address': True, 'birthdate': True}}
source_table = source_table
source_id = source_id
table_cols = ["a","b"]
Expand Down
1 change: 1 addition & 0 deletions cumulus_library/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"""Package metadata"""

__version__ = "2.0.0"
11 changes: 10 additions & 1 deletion cumulus_library/base_table_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" abstract base for python-based study executors """

import re
import sys

Expand Down Expand Up @@ -57,8 +58,16 @@ def execute_queries(
# Get the first non-whitespace word after create table
table_name = re.search(
'(?i)(?<=create table )(([a-zA-Z0-9_".-]+))', query
) # [0]
)

if table_name:
if table_name[0] == "IF":
# Edge case - if we're doing an empty conditional CTAS creation,
# we need to run a slightly different regex
table_name = re.search(
'(?i)(?<=not exists )(([a-zA-Z0-9_".-]+))', query
)

table_name = table_name[0]
# if it contains a schema, remove it (usually it won't, but some CTAS
# forms may)
Expand Down
1 change: 1 addition & 0 deletions cumulus_library/cli_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Manages configuration for argparse"""

import argparse


Expand Down
1 change: 1 addition & 0 deletions cumulus_library/enums.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Holds enums used across more than one module """

from enum import Enum


Expand Down
1 change: 1 addition & 0 deletions cumulus_library/helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Collection of small commonly used utility functions """

import datetime
import os
import json
Expand Down
1 change: 1 addition & 0 deletions cumulus_library/protected_table_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Builder for creating tables for tracking state/logging changes"""

from cumulus_library.base_table_builder import BaseTableBuilder
from cumulus_library.enums import ProtectedTables
from cumulus_library.template_sql.templates import (
Expand Down
34 changes: 31 additions & 3 deletions cumulus_library/statistics/counts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Class for generating counts tables from templates"""

import sys

from pathlib import Path
Expand Down Expand Up @@ -122,15 +123,15 @@ def count_condition(
filter_resource="encounter",
)

def count_document(
def count_documentreference(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing document counts tables
"""wrapper method for constructing documentreference counts tables

:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
Expand All @@ -145,7 +146,8 @@ def count_document(
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="document",
fhir_resource="documentreference",
filter_resource="encounter",
)

def count_encounter(
Expand Down Expand Up @@ -174,6 +176,32 @@ def count_encounter(
fhir_resource="encounter",
)

def count_medicationrequest(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: Union[list, None] = None,
min_subject: int = 10,
) -> str:
"""wrapper method for constructing medicationrequests counts tables

:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="medicationrequest",
)

def count_observation(
self,
table_name: str,
Expand Down
18 changes: 6 additions & 12 deletions cumulus_library/studies/core/builder_condition.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from cumulus_library import base_table_builder
from cumulus_library import databases
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import templates
from cumulus_library.template_sql import templates, utils


expected_table_cols = {
Expand Down Expand Up @@ -42,8 +42,10 @@


class CoreConditionBuilder(base_table_builder.BaseTableBuilder):
display_text = "Creating Condition tables..."

def denormalize_codes(self):
preferred_config = templates.CodeableConceptConfig(
preferred_config = utils.CodeableConceptConfig(
source_table="condition",
source_id="id",
column_name="code",
Expand All @@ -60,7 +62,7 @@ def denormalize_codes(self):
templates.get_codeable_concept_denormalize_query(preferred_config)
)

all_config = templates.CodeableConceptConfig(
all_config = utils.CodeableConceptConfig(
source_table="condition",
source_id="id",
column_name="code",
Expand All @@ -72,14 +74,6 @@ def denormalize_codes(self):
templates.get_codeable_concept_denormalize_query(all_config)
)

def validate_schema(self, cursor: object, schema: str, expected_table_cols, parser):
validated_schema = {}
for table, cols in expected_table_cols.items():
query = templates.get_column_datatype_query(schema, table, cols.keys())
table_schema = cursor.execute(query).fetchall()
validated_schema[table] = parser.validate_table_schema(cols, table_schema)
return validated_schema

def prepare_queries(
self,
cursor: object,
Expand All @@ -89,7 +83,7 @@ def prepare_queries(
**kwargs,
):
self.denormalize_codes()
validated_schema = self.validate_schema(
validated_schema = core_templates.validate_schema(
cursor, schema, expected_table_cols, parser
)
self.queries.append(
Expand Down
176 changes: 0 additions & 176 deletions cumulus_library/studies/core/builder_condition.sql

This file was deleted.

Loading
Loading