From f633b6abca1a30fa7115bda38e4305215b41a1ce Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Wed, 1 May 2024 12:01:23 -0400 Subject: [PATCH] Enforce unique cohorts before generating - fixes #130 --- R/CohortConstruction.R | 6 +++++ R/NegativeControlCohorts.R | 7 +++++ .../test-CohortConstructionAndStats.R | 26 +++++++++++++++++++ tests/testthat/test-NegativeControlCohorts.R | 24 +++++++++++++++++ 4 files changed, 63 insertions(+) diff --git a/R/CohortConstruction.R b/R/CohortConstruction.R index d29953b..0706287 100644 --- a/R/CohortConstruction.R +++ b/R/CohortConstruction.R @@ -76,6 +76,12 @@ generateCohortSet <- function(connectionDetails = NULL, "sql" ) ) + # Verify that cohort IDs are not repeated in the cohort definition + # set before generating + if (length(unique(cohortDefinitionSet$cohortId)) != length(cohortDefinitionSet$cohortId)) { + duplicatedCohortIds <- cohortDefinitionSet$cohortId[duplicated(cohortDefinitionSet$cohortId)] + stop("Cannot generate! Duplicate cohort IDs found in your cohortDefinitionSet: ", paste(duplicatedCohortIds, sep=","), ". Please fix your cohortDefinitionSet and try again.") + } if (is.null(connection) && is.null(connectionDetails)) { stop("You must provide either a database connection or the connection details.") } diff --git a/R/NegativeControlCohorts.R b/R/NegativeControlCohorts.R index fc634f7..bef2695 100644 --- a/R/NegativeControlCohorts.R +++ b/R/NegativeControlCohorts.R @@ -118,6 +118,13 @@ generateNegativeControlOutcomeCohorts <- function(connectionDetails = NULL, min.rows = 1 ) + # Verify that cohort IDs are not repeated in the negative control + # cohort definition set before generating + if (length(unique(negativeControlOutcomeCohortSet$cohortId)) != length(negativeControlOutcomeCohortSet$cohortId)) { + duplicatedCohortIds <- negativeControlOutcomeCohortSet$cohortId[duplicated(negativeControlOutcomeCohortSet$cohortId)] + stop("Cannot generate! Duplicate cohort IDs found in your negativeControlOutcomeCohortSet: ", paste(duplicatedCohortIds, sep=","), ". Please fix your negativeControlOutcomeCohortSet and try again.") + } + if (incremental) { if (is.null(incrementalFolder)) { stop("Must specify incrementalFolder when incremental = TRUE") diff --git a/tests/testthat/test-CohortConstructionAndStats.R b/tests/testthat/test-CohortConstructionAndStats.R index 0088390..18956b4 100644 --- a/tests/testthat/test-CohortConstructionAndStats.R +++ b/tests/testthat/test-CohortConstructionAndStats.R @@ -19,6 +19,32 @@ test_that("Call generateCohortSet with default parameters", { ) }) +test_that("Call generateCohortSet with cohortDefinitionSet containing duplicate IDs", { + cohortDefinitionSet <- data.frame( + cohortId = 1, + cohortName = "duplicate #1", + sql = "", + json = "" + ) + cohortDefinitionSet <- rbind( + cohortDefinitionSet, + data.frame( + cohortId = 1, + cohortName = "duplicate #2", + sql = "", + json = "" + ) + ) + expect_error( + generateCohortSet( + connectionDetails = connectionDetails, + cohortDefinitionSet = cohortDefinitionSet + ), + message = "(Cannot generate! Duplicate cohort IDs found in your cohortDefinitionSet)" + ) +}) + + test_that("Call instatiateCohortSet with malformed cohortDefinitionSet parameter", { expect_error( generateCohortSet( diff --git a/tests/testthat/test-NegativeControlCohorts.R b/tests/testthat/test-NegativeControlCohorts.R index 1064644..19c4d02 100644 --- a/tests/testthat/test-NegativeControlCohorts.R +++ b/tests/testthat/test-NegativeControlCohorts.R @@ -12,6 +12,30 @@ test_that("Call generateNegativeControlOutcomeCohorts without connection or conn expect_error(generateNegativeControlOutcomeCohorts()) }) +test_that("Call generateNegativeControlOutcomeCohorts with negativeControlOutcomeCohortSet containing duplicate IDs", { + negativeControlOutcomeCohortSet <- data.frame( + cohortId = 1, + cohortName = "duplicate #1", + outcomeConceptId = 1 + ) + negativeControlOutcomeCohortSet <- rbind( + negativeControlOutcomeCohortSet, + data.frame( + cohortId = 1, + cohortName = "duplicate #2", + outcomeConceptId = 1 + ) + ) + expect_error( + generateNegativeControlOutcomeCohorts( + connectionDetails = connectionDetails, + cdmDatabaseSchema = "main", + negativeControlOutcomeCohortSet = negativeControlOutcomeCohortSet + ), + message = "(Cannot generate! Duplicate cohort IDs found in your negativeControlOutcomeCohortSet)" + ) +}) + test_that("Call generateNegativeControlOutcomeCohorts before creating cohort table fails", { cohortTableNames <- getCohortTableNames(cohortTable = "missing_cohort_table") ncSet <- getNegativeControlOutcomeCohortsForTest()