Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support input database without patient extensions #220

Merged
merged 1 commit into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 46 additions & 11 deletions cumulus_library/studies/core/builder_patient.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,49 @@
class PatientBuilder(BaseTableBuilder):
display_text = "Creating Patient tables..."

@staticmethod
def make_extension_query(
schema: str,
cursor: databases.DatabaseCursor,
parser: databases.DatabaseParser,
name: str,
url: str,
) -> str:
has_extensions = sql_utils.is_field_present(
schema=schema,
cursor=cursor,
parser=parser,
source_table="patient",
source_col="extension",
expected={
"extension": {
"url": {},
"valueCoding": sql_utils.CODING,
},
"url": {},
},
)
if has_extensions:
config = sql_utils.ExtensionConfig(
source_table="patient",
source_id="id",
target_table=f"core__patient_ext_{name}",
target_col_prefix=name,
fhir_extension=url,
ext_systems=["ombCategory", "detailed"],
mikix marked this conversation as resolved.
Show resolved Hide resolved
is_array=True,
)
return base_templates.get_extension_denormalize_query(config)
else:
return base_templates.get_ctas_empty_query(
schema_name=schema,
table_name=f"core__patient_ext_{name}",
table_cols=["id", "system", f"{name}_code", f"{name}_display"],
)

def prepare_queries(
self,
cursor: object,
cursor: databases.DatabaseCursor,
schema: str,
*args,
parser: databases.DatabaseParser = None,
Expand All @@ -41,18 +81,13 @@ def prepare_queries(
"fhirpath": "http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity",
},
]

for extension in extension_types:
config = sql_utils.ExtensionConfig(
source_table="patient",
source_id="id",
target_table=f"core__patient_ext_{extension['name']}",
target_col_prefix=extension["name"],
fhir_extension=extension["fhirpath"],
ext_systems=["ombCategory", "detailed", "text"],
is_array=True,
self.queries.append(
self.make_extension_query(
schema, cursor, parser, extension["name"], extension["fhirpath"]
)
)
self.queries.append(base_templates.get_extension_denormalize_query(config))

validated_schema = sql_utils.validate_schema(
cursor, schema, expected_table_cols, parser
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,6 @@ CREATE TABLE core__condition AS
WITH temp_condition AS (
SELECT
c.id,
c.category,
c.subject.reference AS subject_ref,
c.encounter.reference AS encounter_ref,
cca.code,
Expand All @@ -431,9 +430,7 @@ WITH temp_condition AS (
date_trunc('month', date(from_iso8601_timestamp(c."recordedDate")))
AS recordedDate_month,
date_trunc('year', date(from_iso8601_timestamp(c."recordedDate")))
AS recordedDate_year,
c.verificationStatus,
c.clinicalStatus
AS recordedDate_year
FROM condition AS c
LEFT JOIN core__condition_codable_concepts_all AS cca ON c.id = cca.id
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ CREATE TABLE core__documentreference AS
WITH temp_documentreference AS (
SELECT DISTINCT
dr.id,
dr.type,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there some dangling changes from a previous PR in here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, looks like I forgot to run generate-sql at the tail end of my deep-schema-support PR, after review changes.

dr.status,
dr.docStatus,
dr.context,
Expand Down
17 changes: 9 additions & 8 deletions cumulus_library/studies/core/reference_sql/builder_encounter.sql
Original file line number Diff line number Diff line change
Expand Up @@ -273,9 +273,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__encounter_dn_servicetype"
AS (
SELECT * FROM (
VALUES
(cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
(cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
)
AS t ("id","code","code_system","display")
AS t ("id","row","code","code_system","display")
WHERE 1 = 0 -- ensure empty table
);

Expand All @@ -285,9 +285,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__encounter_dn_priority"
AS (
SELECT * FROM (
VALUES
(cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
(cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
)
AS t ("id","code","code_system","display")
AS t ("id","row","code","code_system","display")
WHERE 1 = 0 -- ensure empty table
);

Expand Down Expand Up @@ -571,9 +571,9 @@ temp_encounter_nullable AS (
SELECT DISTINCT
e.id,
e.status,
e.class,
e.class.code AS class_code,
e.class.system AS class_code_system,
e.subject.reference AS subject_ref,
e.period,
date(from_iso8601_timestamp(e.period.start)) AS period_start,
date_trunc('day', date(from_iso8601_timestamp(e."period"."end")))
AS period_end_day,
Expand All @@ -598,7 +598,8 @@ temp_encounter AS (
SELECT DISTINCT
e.id,
e.status,
e.class,
e.class_code,
e.class_code_system,
e.subject_ref,
e.period_start,
e.period_start_day,
Expand Down Expand Up @@ -664,7 +665,7 @@ SELECT DISTINCT
concat('Encounter/', e.id) AS encounter_ref
FROM temp_encounter AS e
LEFT JOIN core__fhir_mapping_expected_act_encounter_code_v3 AS eac
ON e.class.code = eac.found
ON e.class_code = eac.found
LEFT JOIN core__fhir_act_encounter_code_v3 AS ac ON eac.expected = ac.code
INNER JOIN core__patient AS p ON e.subject_ref = p.subject_ref
WHERE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ WITH temp_mr AS (
date(from_iso8601_timestamp(mr.authoredOn)) AS authoredOn,
date_trunc('month', date(from_iso8601_timestamp(mr."authoredOn")))
AS authoredOn_month,
cast(NULL as varchar) AS display,
mr.reportedBoolean,
mr.dosageInstruction,
mr.subject.reference AS subject_ref,
Expand Down
20 changes: 10 additions & 10 deletions cumulus_library/studies/core/reference_sql/builder_observation.sql
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__observation_component_valuecodeableconc
AS (
SELECT * FROM (
VALUES
(cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
(cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
)
AS t ("id","code","code_system","display")
AS t ("id","row","code","code_system","display")
WHERE 1 = 0 -- ensure empty table
);

Expand All @@ -256,9 +256,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__observation_dn_interpretation"
AS (
SELECT * FROM (
VALUES
(cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
(cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
)
AS t ("id","code","code_system","display")
AS t ("id","row","code","code_system","display")
WHERE 1 = 0 -- ensure empty table
);

Expand Down Expand Up @@ -304,9 +304,9 @@ CREATE TABLE IF NOT EXISTS "main"."core__observation_dn_dataabsentreason"
AS (
SELECT * FROM (
VALUES
(cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
(cast(NULL AS varchar),cast(NULL AS bigint),cast(NULL AS varchar),cast(NULL AS varchar),cast(NULL AS varchar))
)
AS t ("id","code","code_system","display")
AS t ("id","row","code","code_system","display")
WHERE 1 = 0 -- ensure empty table
);

Expand All @@ -319,6 +319,8 @@ WITH temp_observation AS (
SELECT
o.id,
o.status,
o.encounter.reference AS encounter_ref,
o.subject.reference AS subject_ref,
o.valueString,
o.valueQuantity.value AS valueQuantity_value,
o.valueQuantity.comparator AS valueQuantity_comparator,
Expand All @@ -345,9 +347,7 @@ WITH temp_observation AS (
odvcc.display AS valueCodeableConcept_display,
odda.code AS dataAbsentReason_code,
odda.code_system AS dataAbsentReason_code_system,
odda.display AS dataAbsentReason_display,
o.subject.reference AS subject_ref,
o.encounter.reference AS encounter_ref
odda.display AS dataAbsentReason_display
FROM observation AS o
LEFT JOIN core__observation_dn_category AS odcat ON o.id = odcat.id
LEFT JOIN core__observation_dn_code AS odc ON o.id = odc.id
Expand Down Expand Up @@ -375,7 +375,7 @@ SELECT
valueCodeableConcept_display,
valueQuantity_value,
valueQuantity_comparator,
valueQuantity_unit,valueQuantity_code_system AS valueQuantity_system, -- old alias
valueQuantity_unit,
valueQuantity_code_system,
valueQuantity_code,
valueString,
Expand Down
54 changes: 2 additions & 52 deletions cumulus_library/studies/core/reference_sql/builder_patient.sql
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,6 @@ CREATE TABLE core__patient_ext_race AS (
AND ext_child.ext.valuecoding.display != ''
),

system_text AS (
SELECT DISTINCT
s.id AS id,
'2' AS priority,
'text' AS system, -- noqa: RF04
ext_child.ext.valuecoding.code AS race_code,
ext_child.ext.valuecoding.display AS race_display
FROM
patient AS s,
UNNEST(s.extension) AS ext_parent (ext),
UNNEST(ext_parent.ext.extension) AS ext_child (ext)
WHERE
ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race'
AND ext_child.ext.url = 'text'
AND ext_child.ext.valuecoding.display != ''
),

union_table AS (
SELECT
id,
Expand All @@ -76,14 +59,6 @@ CREATE TABLE core__patient_ext_race AS (
race_code,
race_display
FROM system_detailed
UNION
SELECT
id,
priority,
system,
race_code,
race_display
FROM system_text

ORDER BY id, priority
)
Expand Down Expand Up @@ -118,7 +93,7 @@ CREATE TABLE core__patient_ext_race AS (
) AS race_display,
ROW_NUMBER()
OVER (
PARTITION BY id, system
PARTITION BY id
ORDER BY priority ASC
) AS available_priority
FROM union_table
Expand Down Expand Up @@ -166,23 +141,6 @@ CREATE TABLE core__patient_ext_ethnicity AS (
AND ext_child.ext.valuecoding.display != ''
),

system_text AS (
SELECT DISTINCT
s.id AS id,
'2' AS priority,
'text' AS system, -- noqa: RF04
ext_child.ext.valuecoding.code AS ethnicity_code,
ext_child.ext.valuecoding.display AS ethnicity_display
FROM
patient AS s,
UNNEST(s.extension) AS ext_parent (ext),
UNNEST(ext_parent.ext.extension) AS ext_child (ext)
WHERE
ext_parent.ext.url = 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity'
AND ext_child.ext.url = 'text'
AND ext_child.ext.valuecoding.display != ''
),

union_table AS (
SELECT
id,
Expand All @@ -199,14 +157,6 @@ CREATE TABLE core__patient_ext_ethnicity AS (
ethnicity_code,
ethnicity_display
FROM system_detailed
UNION
SELECT
id,
priority,
system,
ethnicity_code,
ethnicity_display
FROM system_text

ORDER BY id, priority
)
Expand Down Expand Up @@ -241,7 +191,7 @@ CREATE TABLE core__patient_ext_ethnicity AS (
) AS ethnicity_display,
ROW_NUMBER()
OVER (
PARTITION BY id, system
PARTITION BY id
ORDER BY priority ASC
) AS available_priority
FROM union_table
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ CREATE TABLE {{ target_table }} AS (
{%- endif %}
ROW_NUMBER()
OVER (
PARTITION BY id, system
PARTITION BY id
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bug fix, afaict - when you have both ombCategory and detailed, this was partitioning in such a way that both would have available_priority = 1 and both show up in the extension table. Then when you joined with patients, you'd get duplicate patient rows.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think that's a bug - it's left to the study author to determine the appropriate system for their use case. the core study preserves what it finds, and the user should use distinct() on patient IDs when they're counting

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We talked about this on slack - we think it is a bug, and will leave the code change in. But we have lots of questions about best approaches for race/ethnicity. Should race_detailed be a separate column? What to do with text? Problems for another day.

ORDER BY priority ASC
) AS available_priority
FROM union_table
Expand Down
11 changes: 7 additions & 4 deletions cumulus_library/template_sql/sql_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,13 @@ def is_field_populated(
parser: databases.DatabaseParser,
source_table: str,
hierarchy: list[tuple],
expected: list | None = None,
expected: list | dict | None = None,
) -> bool:
"""Traverses a complex field and determines if it exists and has data

Non-core studies that rely on the core tables shouldn't need this method.
This is just to examine the weird and wonderful world of the raw FHIR tables.

:keyword schema: The schema/database name
:keyword cursor: a PEP-249 compliant database cursor
:keyword source_table: The table to query against
Expand All @@ -217,7 +220,7 @@ def is_field_populated(
If none, we assume it is a CodeableConcept.
:returns: a boolean indicating if valid data is present.
"""
if not _check_schema_if_exists(
if not is_field_present(
schema=schema,
cursor=cursor,
parser=parser,
Expand Down Expand Up @@ -256,14 +259,14 @@ def is_field_populated(
return True


def _check_schema_if_exists(
def is_field_present(
mikix marked this conversation as resolved.
Show resolved Hide resolved
*,
schema: str,
cursor: databases.DatabaseCursor,
parser: databases.DatabaseParser,
source_table: str,
source_col: str,
expected: str | None = None,
expected: list | dict | None = None,
) -> bool:
"""Validation check for a column existing, and having the expected schema

Expand Down
Loading