Skip to content

Commit

Permalink
Added support for list of age breaks, supporting overlapping age brea…
Browse files Browse the repository at this point in the history
…k intervals.

Using StandardAnalysisAPI 1.6.0-SNAPSHOT.
Updated StrataSettings R6 class with new field: ageBreakList.

Fixes #30.
  • Loading branch information
chrisknoll committed Jul 8, 2024
1 parent 449a68c commit 3c83e66
Show file tree
Hide file tree
Showing 22 changed files with 461 additions and 211 deletions.
4 changes: 2 additions & 2 deletions R/Builders.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,13 @@ createCohortSubgroup <- function (id, name, description, cohortRef) {
#' @param ageBreaks a vector of integers indicating the age group bounds.
#' @return an R list containing name-value pairs that will serialize into a org.ohdsi.analysis.cohortincidence.design.StratifySettings JSON format.
#' @export
createStrataSettings <- function (byAge = F, byGender = F, byYear = F, ageBreaks) {
createStrataSettings <- function (byAge = F, byGender = F, byYear = F, ageBreaks, ageBreakList) {
strataSettings <- StrataSettings$new()

strataSettings$byAge <- byAge;
strataSettings$byGender <- byGender;
strataSettings$byYear <- byYear;
if(byAge == T && missing(ageBreaks)) stop ("Error: ageBreaks must be a list of integers with at least 1 element")
if(byAge == T && (missing(ageBreaks) || missing(ageBreakList))) stop ("Error: when byAge = TRUE, ageBreaks or ageBreakList must be provided.")
if (!missing(ageBreaks)) strataSettings$ageBreaks <- ageBreaks;

return(strataSettings);
Expand Down
22 changes: 20 additions & 2 deletions R/Classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,8 @@ StrataSettings <- R6::R6Class("StrataSettings",
.byAge = F,
.byGender = F,
.byYear = F,
.ageBreaks = NA
.ageBreaks = NA,
.ageBreakList = NA
),
active = list (
#' @field byAge enables stratification by age
Expand Down Expand Up @@ -803,6 +804,21 @@ StrataSettings <- R6::R6Class("StrataSettings",
private$.ageBreaks <- ageBreaks
self
}
},
#' @field ageBreakList a list of age breaks
ageBreakList = function(ageBreakList) {
if (missing(ageBreakList)) {
private$.ageBreakList
} else {
# check type
checkmate::assertList(as.list(ageBreakList), types="list")
checkmate::assertTRUE(all(sapply(ageBreakList,
function(x) {
checkmate::testList(x) && all(sapply(x, checkmate::testNumeric))
})))
private$.ageBreakList <- ageBreakList
self
}
}
),
public = list(
Expand All @@ -816,6 +832,7 @@ StrataSettings <- R6::R6Class("StrataSettings",
if ("byGender" %in% names (dataList)) self$byGender <- dataList$byGender
if ("byYear" %in% names (dataList)) self$byYear <- dataList$byYear
if ("ageBreaks" %in% names (dataList)) self$ageBreaks <- dataList$ageBreaks
if ("ageBreakList" %in% names (dataList)) self$ageBreakList <- dataList$ageBreakList

},
#' @description
Expand All @@ -825,7 +842,8 @@ StrataSettings <- R6::R6Class("StrataSettings",
byAge = jsonlite::unbox(private$.byAge),
byGender = jsonlite::unbox(private$.byGender),
byYear = jsonlite::unbox(private$.byYear),
ageBreaks = .toJsonArray(private$.ageBreaks)
ageBreaks = .toJsonArray(private$.ageBreaks),
ageBreakList = lapply(private$.ageBreakList, .toJsonArray)
))
},
#' @description
Expand Down
195 changes: 33 additions & 162 deletions docs/articles/using-cohortincidence.html

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions docs/news/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ pkgdown: 2.0.9
pkgdown_sha: ~
articles:
using-cohortincidence: using-cohortincidence.html
last_built: 2024-06-27T19:57Z
last_built: 2024-07-08T17:23Z

4 changes: 4 additions & 0 deletions docs/reference/StrataSettings.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion docs/reference/createStrataSettings.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file modified inst/java/CohortIncidence-4.0.0.jar
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public class CohortIncidenceQueryBuilder {
private static final String COHORT_SUBGROUP_TEMPTABLE_TEMPLATE = ResourceHelper.GetResourceAsString("/resources/cohortincidence/sql/cohortSubgroupTempTable.sql");
private static final String TAR_STRATA_QUERY_TEMPTABLE_TEMPLATE = ResourceHelper.GetResourceAsString("/resources/cohortincidence/sql/tarStrataQueryTemplate.sql");
private static final String OUTCOME_STRATA_QUERY_TEMPTABLE_TEMPLATE = ResourceHelper.GetResourceAsString("/resources/cohortincidence/sql/outcomeStrataQueryTemplate.sql");
private static final String AGE_GROUP_JOIN = ResourceHelper.GetResourceAsString("/resources/cohortincidence/sql/ageGroupJoin.sql");
private static final String AGE_GROUP_SELECT_TEMPLATE = "select CAST(%d as int) as age_group_id, '%s' as age_group_name, cast(%s as int) as min_age, cast(%s as int) as max_age";

private static final String NULL_STRATA = "cast(null as int)";
Expand Down Expand Up @@ -273,61 +274,63 @@ private String buildStrataQuery(String strataTemplate, String[] selectCols, Stri
private String getStrataQueries(String strataTemplate) {
ArrayList<String> queries = new ArrayList<>();

// Note: because age strata can contain overlapping (ie: non-stratified) age breaks, we need to
// apply the ageGroupJoin only on the subqueries that involve age group stratification.
// overall strata
queries.add(buildStrataQuery(
strataTemplate,
StringUtils.replace(strataTemplate, "@ageGroupJoin", ""),
new String[] {NULL_STRATA + " as age_group_id", NULL_STRATA + " as gender_id", NULL_STRATA + " as start_year"},
new String[] {}
));

// by age
if (this.design.strataSettings != null && this.design.strataSettings.byAge) {
queries.add(buildStrataQuery(
strataTemplate,
new String[] {"t1.age_group_id", NULL_STRATA + " as gender_id", NULL_STRATA + " as start_year"},
new String[] {"t1.age_group_id"}
StringUtils.replace(strataTemplate, "@ageGroupJoin", AGE_GROUP_JOIN),
new String[]{"ag.age_group_id", NULL_STRATA + " as gender_id", NULL_STRATA + " as start_year"},
new String[]{"ag.age_group_id"}
));

// by age, by gender
if (this.design.strataSettings.byGender) {
queries.add(buildStrataQuery(
strataTemplate,
new String[] {"t1.age_group_id", "t1.gender_id", NULL_STRATA + " as start_year"},
new String[] {"t1.age_group_id", "t1.gender_id"}
StringUtils.replace(strataTemplate, "@ageGroupJoin", AGE_GROUP_JOIN),
new String[]{"ag.age_group_id", "t1.gender_id", NULL_STRATA + " as start_year"},
new String[]{"ag.age_group_id", "t1.gender_id"}
));
}

// by age, by year
if (this.design.strataSettings.byYear) {
queries.add(buildStrataQuery(
strataTemplate,
new String[] {"t1.age_group_id", NULL_STRATA + " as gender_id", "t1.start_year"},
new String[] {"t1.age_group_id", "t1.start_year"}
StringUtils.replace(strataTemplate, "@ageGroupJoin", AGE_GROUP_JOIN),
new String[] {"ag.age_group_id", NULL_STRATA + " as gender_id", "t1.start_year"},
new String[] {"ag.age_group_id", "t1.start_year"}
));
}

// by age, by gender, by year
if (this.design.strataSettings.byGender && this.design.strataSettings.byYear) {
queries.add(buildStrataQuery(
strataTemplate,
new String[] {"t1.age_group_id", "t1.gender_id", "t1.start_year"},
new String[] {"t1.age_group_id", "t1.gender_id", "t1.start_year"}
StringUtils.replace(strataTemplate, "@ageGroupJoin", AGE_GROUP_JOIN),
new String[] {"ag.age_group_id", "t1.gender_id", "t1.start_year"},
new String[] {"ag.age_group_id", "t1.gender_id", "t1.start_year"}
));
}
}

// by gender
if (this.design.strataSettings != null && this.design.strataSettings.byGender) {
queries.add(buildStrataQuery(
strataTemplate,
StringUtils.replace(strataTemplate, "@ageGroupJoin", ""),
new String[]{NULL_STRATA + " as age_group_id", "t1.gender_id", NULL_STRATA + " as start_year"},
new String[]{"t1.gender_id"}
));

// by gender, by year
if (this.design.strataSettings.byYear) {
queries.add(buildStrataQuery(
strataTemplate,
StringUtils.replace(strataTemplate, "@ageGroupJoin", ""),
new String[] {NULL_STRATA + " as age_group_id", "t1.gender_id", "t1.start_year"},
new String[] {"t1.gender_id", "t1.start_year"}
));
Expand All @@ -337,7 +340,7 @@ private String getStrataQueries(String strataTemplate) {
// by year
if (this.design.strataSettings != null && this.design.strataSettings.byYear) {
queries.add(buildStrataQuery(
strataTemplate,
StringUtils.replace(strataTemplate, "@ageGroupJoin", ""),
new String[]{NULL_STRATA + " as age_group_id", NULL_STRATA + "as gender_id", "t1.start_year"},
new String[]{"t1.start_year"}
));
Expand All @@ -356,18 +359,27 @@ private String getAgeGroupInsert() {
if (this.design.strataSettings == null || this.design.strataSettings.byAge == false)
return "";

if (this.design.strataSettings.ageBreaks.isEmpty())
throw new IllegalArgumentException("Invalid strataSettings: ageBreaks can not be empty.");
if (this.design.strataSettings.ageBreaks.isEmpty() &&
(this.design.strataSettings.ageBreakList.isEmpty() || this.design.strataSettings.ageBreakList.stream().anyMatch(List::isEmpty)))
throw new IllegalArgumentException("Invalid strataSettings: ageBreaks and ageBreaksList can not both be empty.");

ArrayList<String> selects = new ArrayList<>();
List<Integer> ageBreaks = this.design.strataSettings.ageBreaks;
selects.add(String.format(AGE_GROUP_SELECT_TEMPLATE, 1, "<" + ageBreaks.get(0),"null", ageBreaks.get(0)));
List<List<Integer>> ageBreakList = new ArrayList<>(this.design.strataSettings.ageBreakList);
if (!this.design.strataSettings.ageBreaks.isEmpty()) {
// put the breaks from ageBreaks in the front of the list (for backwards compatability)
ageBreakList.add(0,this.design.strataSettings.ageBreaks);
}

for (int i = 0; i < ageBreaks.size() - 1; i++)
{
selects.add(String.format(AGE_GROUP_SELECT_TEMPLATE, i+2, "" + ageBreaks.get(i) + " - " + (ageBreaks.get(i+1)-1),ageBreaks.get(i), ageBreaks.get(i+1)));
int ageGroupId = 1;
for (List<Integer> ageBreaks : ageBreakList) {
selects.add(String.format(AGE_GROUP_SELECT_TEMPLATE, ageGroupId++, "<" + ageBreaks.get(0),"null", ageBreaks.get(0)));

for (int i = 0; i < ageBreaks.size() - 1; i++)
{
selects.add(String.format(AGE_GROUP_SELECT_TEMPLATE, ageGroupId++, "" + ageBreaks.get(i) + " - " + (ageBreaks.get(i+1)-1),ageBreaks.get(i), ageBreaks.get(i+1)));
}
selects.add(String.format(AGE_GROUP_SELECT_TEMPLATE, ageGroupId++, ">=" + ageBreaks.get(ageBreaks.size()-1),ageBreaks.get(ageBreaks.size()-1), "null"));
}
selects.add(String.format(AGE_GROUP_SELECT_TEMPLATE, ageBreaks.size()+1, ">=" + ageBreaks.get(ageBreaks.size()-1),ageBreaks.get(ageBreaks.size()-1), "null"));

return String.format("insert into @results_database_schema.age_group_def (ref_id, age_group_id, age_group_name, min_age, max_age)\nselect CAST(@ref_id as int) as ref_id, age_group_id, age_group_name, min_age, max_age from (\n%s\n) ag;",
StringUtils.join(selects, "\nUNION ALL\n"));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
LEFT JOIN @results_database_schema.age_group_def ag ON ag.ref_id = @ref_id
and t1.age >= coalesce(ag.min_age, -999)
and t1.age < coalesce(ag.max_age, 999)
Original file line number Diff line number Diff line change
Expand Up @@ -231,22 +231,19 @@ FROM (
/*
5) aggregate tar and excluded+outcome
*/
WITH tar_overall (target_cohort_definition_id, tar_id, subgroup_id, subject_id, start_date, end_date, age_group_id, gender_id, start_year)
WITH tar_overall (target_cohort_definition_id, tar_id, subgroup_id, subject_id, start_date, end_date, age, gender_id, start_year)
AS (
SELECT te.cohort_definition_id as target_cohort_definition_id,
te.tar_id,
te.subgroup_id,
te.subject_id,
te.start_date,
te.end_date,
ag.age_group_id,
YEAR(te.start_date) - p.year_of_birth as age,
p.gender_concept_id as gender_id,
YEAR(te.start_date) as start_year
FROM #TTAR_erafied te
JOIN @cdm_database_schema.person p on te.subject_id = p.person_id
LEFT JOIN @results_database_schema.age_group_def ag ON YEAR(te.start_date) - p.year_of_birth >= coalesce(ag.min_age, -999)
and YEAR(te.start_date) - p.year_of_birth < coalesce(ag.max_age, 999)
and ag.ref_id = @ref_id
)
select target_cohort_definition_id, tar_id, subgroup_id, age_group_id, gender_id, start_year, person_days_pe, persons_at_risk_pe
INTO #tar_agg
Expand All @@ -255,15 +252,15 @@ FROM (
) T_OVERALL
;

WITH outcomes_overall (target_cohort_definition_id, tar_id, subgroup_id, outcome_id, subject_id, age_group_id, gender_id, start_year, excluded_days, tar_days, outcomes_pe, outcomes)
WITH outcomes_overall (target_cohort_definition_id, tar_id, subgroup_id, outcome_id, subject_id, age, gender_id, start_year, excluded_days, tar_days, outcomes_pe, outcomes)
AS (
SELECT
t1.cohort_definition_id as target_cohort_definition_id,
t1.tar_id,
t1.subgroup_id,
op.outcome_id,
t1.subject_id,
ag.age_group_id,
YEAR(t1.start_date) - p.year_of_birth as age,
p.gender_concept_id as gender_id,
YEAR(t1.start_date) as start_year,
coalesce(e1.person_days, 0) as excluded_days,
Expand All @@ -272,9 +269,6 @@ WITH outcomes_overall (target_cohort_definition_id, tar_id, subgroup_id, outcome
coalesce(o1.outcomes, 0) as outcomes
FROM #TTAR_erafied t1
JOIN @cdm_database_schema.person p ON t1.subject_id = p.person_id
LEFT JOIN @results_database_schema.age_group_def ag ON YEAR(t1.start_date) - p.year_of_birth >= coalesce(ag.min_age, -999)
AND YEAR(t1.start_date) - p.year_of_birth < coalesce(ag.max_age, 999)
AND ag.ref_id = @ref_id
JOIN ( -- get the list of TTSO of anyone with excluded time or outcomes to limit result
select target_cohort_definition_id, tar_id, subgroup_id, outcome_id, subject_id, start_date FROM #excluded_person_days
UNION -- will remove dupes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@
SUM(t1.outcomes_pe) as outcomes_pe,
SUM(t1.outcomes) as outcomes
FROM outcomes_overall t1
@ageGroupJoin
GROUP BY target_cohort_definition_id, tar_id, subgroup_id, outcome_id@groupCols
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
SUM(CAST((DATEDIFF(day,t1.start_date,t1.end_date) + 1) as bigint)) as person_days_pe,
COUNT(distinct t1.subject_id) as persons_at_risk_pe
FROM tar_overall t1
@ageGroupJoin
GROUP BY t1.target_cohort_definition_id, t1.tar_id, t1.subgroup_id@groupCols
29 changes: 29 additions & 0 deletions java/test/java/org/ohdsi/cohortincidence/Incidence_5_0_Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,35 @@ public void strataByAgeTest() throws Exception {
this.executeTest(params);
}

/**
* Tests multiple people with different age/gender/year strata, but only requests by-age strata
* Person 1: Male, 2 outcomes, 1 excluded, 2 TARs (age 32 and 37).
* Person 2: Female, 2 outcomes, 0 excluded, 2 TARs (age 35 and 37)
* Special notes: Person 1 will start in 1 year but have the TAR exclusion make followup start in next year
* to test that start_year is correct (it should use the erafied-start date)
*
* @throws Exception
*/
@Test
public void strataByAgeBreakListTest() throws Exception {
TestParams params = new TestParams();

params.resultSchema = "strata_age_list"; // this must be all lower case for DBUnit to work
params.prepDataSets = new String[]{
"/datasets/vocabulary.json",
"/cohortincidence/timeAtRisk/strataByAgeBreakList_PREP.json"
};
params.designJson = ResourceHelper.GetResourceAsString("/cohortincidence/timeAtRisk/strataByAgeBreakListTest.json");
params.verifyDataSets = new String[]{"/cohortincidence/timeAtRisk/strataByAgeBreakList_VERIFY.json"};
params.verifyCols = Arrays.asList(new String[]{COL_REF_ID, COL_TARGET_COHORT_ID, COL_TAR_ID, COL_SUBGROUP_ID, COL_OUTCOME_ID,
COL_AGE_GROUP_ID, COL_GENDER_ID, COL_YEAR_ID,
COL_PERSONS_PRE_EXCLUDE, COL_PERSONS_AT_RISK, COL_PERSONS_DAYS_PRE_EXCLUDE, COL_PERSON_DAYS,
COL_PERSON_OUTCOMES_PRE_EXCLUDE, COL_PERSON_OUTCOMES, COL_OUTCOMES_PRE_EXCLUDE, COL_OUTCOMES,
COL_INCIDENCE_PROPORTION_P100P, COL_INCIDENCE_RATE_P100PY});

this.executeTest(params);
}

/**
* Tests multiple people with different age/gender/year strata, but only requests by-age strata
* Person 1: Male, 2 outcomes, 1 excluded, 2 TARs (age 32 and 37).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"targetDefs": [
{
"id": 100,
"name": "Target Cohort 1"
}
],
"outcomeDefs": [
{
"id": 1,
"name": "Outcome Cohort 1",
"cohortId": 200,
"cleanWindow": 30
}
],
"timeAtRiskDefs": [
{
"id": 1,
"start": {
"dateField": "start",
"offset": 0
},
"end": {
"dateField": "start",
"offset": 90
}
}
],
"strataSettings": {
"byAge": true,
"byGender": false,
"byYear": false,
"ageBreaks": [17, 35, 65],
"ageBreakList": [[17], [35], [65]]
},
"analysisList": [
{
"targets": [100],
"outcomes": [1],
"tars": [1]
}
]
}
Loading

0 comments on commit 3c83e66

Please sign in to comment.