Skip to content

Commit

Permalink
Enforce unique cohorts before generating - fixes #130
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonysena committed May 1, 2024
1 parent 0c0e487 commit f633b6a
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 0 deletions.
6 changes: 6 additions & 0 deletions R/CohortConstruction.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ generateCohortSet <- function(connectionDetails = NULL,
"sql"
)
)
# Verify that cohort IDs are not repeated in the cohort definition
# set before generating
if (length(unique(cohortDefinitionSet$cohortId)) != length(cohortDefinitionSet$cohortId)) {
duplicatedCohortIds <- cohortDefinitionSet$cohortId[duplicated(cohortDefinitionSet$cohortId)]
stop("Cannot generate! Duplicate cohort IDs found in your cohortDefinitionSet: ", paste(duplicatedCohortIds, sep=","), ". Please fix your cohortDefinitionSet and try again.")
}
if (is.null(connection) && is.null(connectionDetails)) {
stop("You must provide either a database connection or the connection details.")
}
Expand Down
7 changes: 7 additions & 0 deletions R/NegativeControlCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ generateNegativeControlOutcomeCohorts <- function(connectionDetails = NULL,
min.rows = 1
)

# Verify that cohort IDs are not repeated in the negative control
# cohort definition set before generating
if (length(unique(negativeControlOutcomeCohortSet$cohortId)) != length(negativeControlOutcomeCohortSet$cohortId)) {
duplicatedCohortIds <- negativeControlOutcomeCohortSet$cohortId[duplicated(negativeControlOutcomeCohortSet$cohortId)]
stop("Cannot generate! Duplicate cohort IDs found in your negativeControlOutcomeCohortSet: ", paste(duplicatedCohortIds, sep=","), ". Please fix your negativeControlOutcomeCohortSet and try again.")
}

if (incremental) {
if (is.null(incrementalFolder)) {
stop("Must specify incrementalFolder when incremental = TRUE")
Expand Down
26 changes: 26 additions & 0 deletions tests/testthat/test-CohortConstructionAndStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,32 @@ test_that("Call generateCohortSet with default parameters", {
)
})

test_that("Call generateCohortSet with cohortDefinitionSet containing duplicate IDs", {
cohortDefinitionSet <- data.frame(
cohortId = 1,
cohortName = "duplicate #1",
sql = "",
json = ""
)
cohortDefinitionSet <- rbind(
cohortDefinitionSet,
data.frame(
cohortId = 1,
cohortName = "duplicate #2",
sql = "",
json = ""
)
)
expect_error(
generateCohortSet(
connectionDetails = connectionDetails,
cohortDefinitionSet = cohortDefinitionSet
),
message = "(Cannot generate! Duplicate cohort IDs found in your cohortDefinitionSet)"
)
})


test_that("Call instatiateCohortSet with malformed cohortDefinitionSet parameter", {
expect_error(
generateCohortSet(
Expand Down
24 changes: 24 additions & 0 deletions tests/testthat/test-NegativeControlCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,30 @@ test_that("Call generateNegativeControlOutcomeCohorts without connection or conn
expect_error(generateNegativeControlOutcomeCohorts())
})

test_that("Call generateNegativeControlOutcomeCohorts with negativeControlOutcomeCohortSet containing duplicate IDs", {
negativeControlOutcomeCohortSet <- data.frame(
cohortId = 1,
cohortName = "duplicate #1",
outcomeConceptId = 1
)
negativeControlOutcomeCohortSet <- rbind(
negativeControlOutcomeCohortSet,
data.frame(
cohortId = 1,
cohortName = "duplicate #2",
outcomeConceptId = 1
)
)
expect_error(
generateNegativeControlOutcomeCohorts(
connectionDetails = connectionDetails,
cdmDatabaseSchema = "main",
negativeControlOutcomeCohortSet = negativeControlOutcomeCohortSet
),
message = "(Cannot generate! Duplicate cohort IDs found in your negativeControlOutcomeCohortSet)"
)
})

test_that("Call generateNegativeControlOutcomeCohorts before creating cohort table fails", {
cohortTableNames <- getCohortTableNames(cohortTable = "missing_cohort_table")
ncSet <- getNegativeControlOutcomeCohortsForTest()
Expand Down

0 comments on commit f633b6a

Please sign in to comment.