diff --git a/src/ehrdata/io/omop/_check_arguments.py b/src/ehrdata/io/omop/_check_arguments.py index 8b145cf..cc89cae 100644 --- a/src/ehrdata/io/omop/_check_arguments.py +++ b/src/ehrdata/io/omop/_check_arguments.py @@ -12,7 +12,7 @@ VALID_OBSERVATION_TABLES_SINGLE = ["person"] VALID_OBSERVATION_TABLES_JOIN = ["person_cohort", "person_observation_period", "person_visit_occurrence"] VALID_VARIABLE_TABLES = ["measurement", "observation", "specimen"] -VALID_INTERVAL_VARIABLE_TABLES = ["drug_exposure"] +VALID_INTERVAL_VARIABLE_TABLES = ["drug_exposure", "condition_occurrence"] VALID_KEEP_DATES = ["start", "end", "interval"] diff --git a/src/ehrdata/io/omop/_queries.py b/src/ehrdata/io/omop/_queries.py index 080c8e3..99bf9c0 100644 --- a/src/ehrdata/io/omop/_queries.py +++ b/src/ehrdata/io/omop/_queries.py @@ -24,7 +24,16 @@ "observation": "observation", "specimen": "specimen", "drug_exposure": "drug", + "condition_occurrence": "condition", } +DATA_TABLE_DATE_TRUNK = { + "measurement": "measurement", + "observation": "observation", + "specimen": "specimen", + "drug_exposure": "drug_exposure", + "condition_occurrence": "condition", +} + AGGREGATION_STRATEGY_KEY = { "last": "LAST", @@ -147,7 +156,7 @@ def time_interval_table_query_long_format( ) \ SELECT lfi.person_id, lfi.data_table_concept_id, interval_step, interval_start, interval_end, {_generate_value_query("data_table_with_presence_indicator", data_field_to_keep, AGGREGATION_STRATEGY_KEY[aggregation_strategy])} \ FROM long_format_intervals as lfi \ - LEFT JOIN data_table_with_presence_indicator ON lfi.person_id = data_table_with_presence_indicator.person_id AND lfi.data_table_concept_id = data_table_with_presence_indicator.{DATA_TABLE_CONCEPT_ID_TRUNK[data_table]}_concept_id AND data_table_with_presence_indicator.{data_table}_{date_prefix}date BETWEEN lfi.interval_start AND lfi.interval_end \ + LEFT JOIN data_table_with_presence_indicator ON lfi.person_id = data_table_with_presence_indicator.person_id AND lfi.data_table_concept_id = data_table_with_presence_indicator.{DATA_TABLE_CONCEPT_ID_TRUNK[data_table]}_concept_id AND data_table_with_presence_indicator.{DATA_TABLE_DATE_TRUNK[data_table]}_{date_prefix}date BETWEEN lfi.interval_start AND lfi.interval_end \ GROUP BY lfi.person_id, lfi.data_table_concept_id, interval_step, interval_start, interval_end """ ).df() @@ -222,9 +231,9 @@ def time_interval_table_for_interval_tables_query_long_format( FROM long_format_intervals as lfi \ LEFT JOIN data_table_with_presence_indicator ON lfi.person_id = data_table_with_presence_indicator.person_id \ AND lfi.data_table_concept_id = data_table_with_presence_indicator.{DATA_TABLE_CONCEPT_ID_TRUNK[data_table]}_concept_id \ - AND (data_table_with_presence_indicator.{data_table}_start_date BETWEEN lfi.interval_start AND lfi.interval_end \ - OR data_table_with_presence_indicator.{data_table}_end_date BETWEEN lfi.interval_start AND lfi.interval_end \ - OR (data_table_with_presence_indicator.{data_table}_start_date < lfi.interval_start AND data_table_with_presence_indicator.{data_table}_end_date > lfi.interval_end)) \ + AND (data_table_with_presence_indicator.{DATA_TABLE_DATE_TRUNK[data_table]}_start_date BETWEEN lfi.interval_start AND lfi.interval_end \ + OR data_table_with_presence_indicator.{DATA_TABLE_DATE_TRUNK[data_table]}_end_date BETWEEN lfi.interval_start AND lfi.interval_end \ + OR (data_table_with_presence_indicator.{DATA_TABLE_DATE_TRUNK[data_table]}_start_date < lfi.interval_start AND data_table_with_presence_indicator.{DATA_TABLE_DATE_TRUNK[data_table]}_end_date > lfi.interval_end)) \ GROUP BY lfi.person_id, lfi.data_table_concept_id, interval_step, interval_start, interval_end """ ).df() diff --git a/tests/data/toy_omop/vanilla/condition_occurrence.csv b/tests/data/toy_omop/vanilla/condition_occurrence.csv new file mode 100644 index 0000000..0efb7e0 --- /dev/null +++ b/tests/data/toy_omop/vanilla/condition_occurrence.csv @@ -0,0 +1,10 @@ +condition_occurrence_id,person_id,condition_concept_id,condition_start_date,condition_start_datetime,condition_end_date,condition_end_datetime,condition_type_concept_id,condition_status_concept_id,stop_reason,provider_id,visit_occurrence_id,visit_detail_id,condition_source_value,condition_source_concept_id,condition_status_source_value +1,1,43530622,2100-01-01,2100-01-01 12:00:00,2100-01-31,2100-01-31 00:00:00,38000175,0,,0,31,,10,1121000119107, +2,1,43530622,2100-02-01,2100-02-01 12:00:00,2100-02-28,2100-02-28 00:00:00,38000175,0,,0,28,,10,1121000119107, +3,1,4112343,2100-01-01,2100-01-01 12:00:00,2100-01-31,2100-01-31 00:00:00,38000175,0,,0,31,,15,4112343, +4,2,43530622,2100-01-01,2100-01-01 12:00:00,2100-01-31,2100-01-31 00:00:00,38000175,0,,0,31,,10,1121000119107, +5,2,43530622,2100-02-01,2100-02-01 12:00:00,2100-02-28,2100-02-28 00:00:00,38000175,0,,0,28,,10,1121000119107, +6,2,4112343,2100-01-01,2100-01-01 12:00:00,2100-01-31,2100-01-31 00:00:00,38000175,0,,0,31,,15,4112343, +7,3,43530622,2100-01-01,2100-01-01 12:00:00,2100-01-31,2100-01-31 00:00:00,38000175,0,,0,31,,10,1121000119107, +8,3,43530622,2100-02-01,2100-02-01 12:00:00,2100-02-28,2100-02-28 00:00:00,38000175,0,,0,28,,10,1121000119107, +9,3,4112343,2100-01-01,2100-01-01 12:00:00,2100-01-31,2100-01-31 00:00:00,38000175,0,,0,31,,15,4112343, diff --git a/tests/test_io/test_omop.py b/tests/test_io/test_omop.py index 1ef28c9..6bc101d 100644 --- a/tests/test_io/test_omop.py +++ b/tests/test_io/test_omop.py @@ -16,6 +16,7 @@ "observation": 2, "specimen": 2, "drug_exposure": 2, + "condition_occurrence": 2, } # constants for setup_variables @@ -261,7 +262,66 @@ def test_setup_variables( [[1, 1, 1, 1], [1, 1, 1, 1]], ], ), - # (["condition_occurrence"], ["is_present"]), # TODO: write test file + ( + ["condition_occurrence"], + ["condition_source_value"], + "start", + [ + [[15, np.nan, np.nan, np.nan], [10, np.nan, np.nan, np.nan]], + [[15, np.nan, np.nan, np.nan], [10, np.nan, np.nan, np.nan]], + [[15, np.nan, np.nan, np.nan], [10, np.nan, np.nan, np.nan]], + ], + ), + ( + ["condition_occurrence"], + ["condition_source_value"], + "end", + [ + [[np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan]], + [[np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan]], + [[np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan]], + ], + ), + ( + ["condition_occurrence"], + ["condition_source_value"], + "interval", + [ + [[15, 15, 15, 15], [10, 10, 10, 10]], + [[15, 15, 15, 15], [10, 10, 10, 10]], + [[15, 15, 15, 15], [10, 10, 10, 10]], + ], + ), + ( + ["condition_occurrence"], + ["is_present"], + "start", + [ + [[1, np.nan, np.nan, np.nan], [1, np.nan, np.nan, np.nan]], + [[1, np.nan, np.nan, np.nan], [1, np.nan, np.nan, np.nan]], + [[1, np.nan, np.nan, np.nan], [1, np.nan, np.nan, np.nan]], + ], + ), + ( + ["condition_occurrence"], + ["is_present"], + "end", + [ + [[np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan]], + [[np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan]], + [[np.nan, np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan, np.nan]], + ], + ), + ( + ["condition_occurrence"], + ["is_present"], + "interval", + [ + [[1, 1, 1, 1], [1, 1, 1, 1]], + [[1, 1, 1, 1], [1, 1, 1, 1]], + [[1, 1, 1, 1], [1, 1, 1, 1]], + ], + ), # (["procedure_occurrence"], ["is_present"]), # TODO: write test file # (["device_exposure"], ["is_present"]), # TODO: write test file # (["note"], ["is_present"]),