diff --git a/.Rbuildignore b/.Rbuildignore index e91063e..2642258 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,9 @@ ^\.Rproj\.user$ ^\.idea$ ^\.github$ +_pkgdown\.yml +compare_versions +deploy.sh +docs +extras +man-roxygen diff --git a/DESCRIPTION b/DESCRIPTION index ceaf1c6..7882e2c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: CohortGenerator Type: Package Title: An R Package for Cohort Generation Against the OMOP CDM -Version: 0.9.0 -Date: 2024-05-28 +Version: 0.10.0 +Date: 2024-07-14 Authors@R: c( person("Anthony", "Sena", email = "sena@ohdsi.org", role = c("aut", "cre")), person("Jamie", "Gilbert", role = c("aut")), diff --git a/NEWS.md b/NEWS.md index d68e06c..d81818c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,20 @@ +CohortGenerator 0.10.0 +======================= +New Features +- Add `runCohortGeneration` function (Issue #165) +- Adopt ResultModelManager for handling results data models & uploading. Extend results data model to include information on cohort subsets(#154, #162) +- Remove REMOTES entries for CirceR and Eunomia which are now in CRAN (#145) +- Unit tests now running on all OHDSI DB Platforms (#151) + +Bug Fixes +- Negation of cohort subset operator must join on `subject_id` AND `start_date` (#167) +- Allow integer as cohort ID (#146) +- Use native messaging functions for output vs. ParallelLogger (#97) +- Prevent upload of inclusion rule information (#78) +- Expose `colTypes` when working with .csv files (#59) +- Remove `bit64` from package (mostly) (#152) +- Updated documentation for cohort subset negate feature (#111) + CohortGenerator 0.9.0 ======================= - Random sample functionality (for development only) (Issue #129) diff --git a/R/CohortDefinitionSet.R b/R/CohortDefinitionSet.R index 316f9b8..4404951 100644 --- a/R/CohortDefinitionSet.R +++ b/R/CohortDefinitionSet.R @@ -50,7 +50,7 @@ createEmptyCohortDefinitionSet <- function(verbose = FALSE) { cohortDefinitionSetColumns <- colnames(df) matchingColumns <- intersect(x = colnames(x), y = cohortDefinitionSetColumns) columnNamesMatch <- setequal(matchingColumns, cohortDefinitionSetColumns) - + if (!columnNamesMatch && emitWarning) { columnsMissing <- setdiff(x = cohortDefinitionSetColumns, y = colnames(x)) warningMessage <- paste0( @@ -144,7 +144,7 @@ checkAndFixCohortDefinitionSetDataTypes <- function(x, fixDataTypes = TRUE, emit # Check if the data types match # NOTE: createEmptyCohortDefinitionSet() is the reference for the data # types. cohortId is declared as a numeric but an integer is also fine - dataTypesMatch <- (xDataTypes[1] %in% c('integer', 'double') && all(xDataTypes[2:4] == "character")) + dataTypesMatch <- (xDataTypes[1] %in% c("integer", "double") && all(xDataTypes[2:4] == "character")) # Create the cohortDefinitionSetSpec from the names/data types for reference cohortDefinitionSetSpec <- data.frame( columnName = names(xDataTypes), @@ -520,7 +520,7 @@ checkLargeInteger <- function(x, columnName = "cohortId") { # NOTE: suppressWarnings used to mask # warning from R which may happen for # large values in X. - res <- all(suppressWarnings(x%%1) == 0) + res <- all(suppressWarnings(x %% 1) == 0) if (!isTRUE(res)) { errorMessage <- paste0("The column ", columnName, " included non-integer values. Please update and re-try") return(errorMessage) diff --git a/R/CohortStats.R b/R/CohortStats.R index c024d7d..2ddaa84 100644 --- a/R/CohortStats.R +++ b/R/CohortStats.R @@ -58,7 +58,7 @@ insertInclusionRuleNames <- function(connectionDetails = NULL, } inclusionRules <- getCohortInclusionRules(cohortDefinitionSet) - + # Remove any existing data to prevent duplication DatabaseConnector::renderTranslateExecuteSql( connection = connection, @@ -129,7 +129,7 @@ getStatsTable <- function(connectionDetails, } #' Get Cohort Inclusion Stats Table Data -#' +#' #' @description #' This function returns a data frame of the data in the Cohort Inclusion Tables. #' Results are organized in to a list with 5 different data frames: @@ -203,23 +203,23 @@ getCohortStats <- function(connectionDetails, #' Get Cohort Inclusion Rules from a cohort definition set -#' +#' #' @description #' This function returns a data frame of the inclusion rules defined #' in a cohort definition set. -#' +#' #' @md #' @template CohortDefinitionSet -#' +#' #' @export getCohortInclusionRules <- function(cohortDefinitionSet) { checkmate::assertDataFrame(cohortDefinitionSet, min.rows = 1, col.names = "named") checkmate::assertNames(colnames(cohortDefinitionSet), - must.include = c( - "cohortId", - "cohortName", - "json" - ) + must.include = c( + "cohortId", + "cohortName", + "json" + ) ) # Assemble the cohort inclusion rules @@ -231,7 +231,7 @@ getCohortInclusionRules <- function(cohortDefinitionSet) { name = character(), description = character() ) - + # Remove any cohort definitions that do not include the JSON property cohortDefinitionSet <- cohortDefinitionSet[!(is.null(cohortDefinitionSet$json) | is.na(cohortDefinitionSet$json)), ] for (i in 1:nrow(cohortDefinitionSet)) { @@ -261,6 +261,6 @@ getCohortInclusionRules <- function(cohortDefinitionSet) { } } } - + invisible(inclusionRules) -} \ No newline at end of file +} diff --git a/R/CsvHelper.R b/R/CsvHelper.R index ab730f2..a1a55b1 100644 --- a/R/CsvHelper.R +++ b/R/CsvHelper.R @@ -27,7 +27,7 @@ #' @param file The .csv file to read. #' @param warnOnCaseMismatch When TRUE, raise a warning if column headings #' in the .csv are not in snake_case format -#' +#' #' @param colTypes Corresponds to the `col_types` in the `readr::read_csv` function. #' One of `NULL`, a [readr::cols()] specification, or #' a string. See `vignette("readr")` for more details. @@ -38,7 +38,7 @@ #' `guess_max` or supply the correct types yourself. #' #' Column specifications created by [list()] or [cols()] must contain -#' one column specification for each column. +#' one column specification for each column. #' #' Alternatively, you can use a compact string representation where each #' character represents one column: @@ -57,7 +57,7 @@ #' By default, reading a file without a column specification will print a #' message showing what `readr` guessed they were. To remove this message, #' set `show_col_types = FALSE` or set `options(readr.show_col_types = FALSE)`. -#' +#' #' @return #' A tibble with the .csv contents #' diff --git a/R/Export.R b/R/Export.R index 9f633a3..bab20f6 100644 --- a/R/Export.R +++ b/R/Export.R @@ -20,12 +20,12 @@ #' This function retrieves the data from the cohort statistics tables and #' writes them to the inclusion statistics folder specified in the function #' call. NOTE: inclusion rule names are handled in one of two ways: -#' -#' 1. You can specify the cohortDefinitionSet parameter and the inclusion rule -#' names will be extracted from the data.frame. +#' +#' 1. You can specify the cohortDefinitionSet parameter and the inclusion rule +#' names will be extracted from the data.frame. #' 2. You can insert the inclusion rule names into the database using the -#' insertInclusionRuleNames function of this package. -#' +#' insertInclusionRuleNames function of this package. +#' #' The first approach is preferred as to avoid the warning emitted. #' #' @template Connection @@ -45,9 +45,9 @@ #' #' @param databaseId Optional - when specified, the databaseId will be added #' to the exported results -#' +#' #' @template CohortDefinitionSet -#' +#' #' @param tablePrefix Optional - allows to append a prefix to the exported #' file names. #' @@ -72,7 +72,7 @@ exportCohortStatsTables <- function(connectionDetails, if (!dir.exists(cohortStatisticsFolder)) { dir.create(cohortStatisticsFolder, recursive = TRUE) } - + # Internal function to export the stats exportStats <- function(data, fileName, @@ -91,7 +91,7 @@ exportCohortStatsTables <- function(connectionDetails, .writeCsv(x = data, file = fullFileName) } } - + tablesToExport <- data.frame( tableName = c("cohortInclusionResultTable", "cohortInclusionStatsTable", "cohortSummaryStatsTable", "cohortCensorStatsTable"), fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv") @@ -122,7 +122,7 @@ exportCohortStatsTables <- function(connectionDetails, snakeCaseToCamelCase = snakeCaseToCamelCase, cohortTableNames = cohortTableNames ) - + for (i in 1:nrow(tablesToExport)) { fileName <- ifelse(test = fileNamesInSnakeCase, yes = tablesToExport$fileName[i], @@ -143,39 +143,42 @@ exportCohortDefinitionSet <- function(outputFolder, cohortDefinitionSet = NULL) cdsCohortSubsets <- getSubsetDefinitions(cohortDefinitionSet) if (length(cdsCohortSubsets) > 0) { for (i in seq_along(cdsCohortSubsets)) { - cohortSubsets <- rbind(cohortSubsets, - data.frame( - subsetDefinitionId = cdsCohortSubsets[[i]]$definitionId, - json = as.character(cdsCohortSubsets[[i]]$toJSON()) - )) + cohortSubsets <- rbind( + cohortSubsets, + data.frame( + subsetDefinitionId = cdsCohortSubsets[[i]]$definitionId, + json = as.character(cdsCohortSubsets[[i]]$toJSON()) + ) + ) } } else { # NOTE: In this case the cohortDefinitionSet has no subsets defined # and so we need to add the additional columns that are defined - # in the function: addCohortSubsetDefinition. To do this, + # in the function: addCohortSubsetDefinition. To do this, # we'll construct a copy of the cohortDefinitionSet with a single # subset to get the proper structure and filter it to the # cohorts of interest. - cdsCopy <- cohortDefinitionSet %>% + cdsCopy <- cohortDefinitionSet %>% addCohortSubsetDefinition( cohortSubsetDefintion = createCohortSubsetDefinition( - definitionId = 1, - name="empty", + definitionId = 1, + name = "empty", subsetOperators = list( createDemographicSubset() ) ) - ) %>% dplyr::filter(cohortId == cohortDefinitionSet$cohortId) + ) %>% + dplyr::filter(.data$cohortId == cohortDefinitionSet$cohortId) cohortDefinitionSet <- cdsCopy } # Massage and save the cohort definition set colsToRename <- c("cohortId", "cohortName", "sql", "json") colInd <- which(names(cohortDefinitionSet) %in% colsToRename) names(cohortDefinitionSet)[colInd] <- c("cohortDefinitionId", "cohortName", "sqlCommand", "json") - if (! "description" %in% names(cohortDefinitionSet)) { + if (!"description" %in% names(cohortDefinitionSet)) { cohortDefinitionSet$description <- "" } - cohortDefinitions <- cohortDefinitionSet[,intersect(names(cohortDefinitions), names(cohortDefinitionSet))] + cohortDefinitions <- cohortDefinitionSet[, intersect(names(cohortDefinitions), names(cohortDefinitionSet))] } writeCsv( x = cohortDefinitions, @@ -199,4 +202,4 @@ createEmptyResult <- function(tableName) { result <- tibble::as_tibble(t(result), name_repair = "check_unique") result <- result[FALSE, ] return(result) -} \ No newline at end of file +} diff --git a/R/NegativeControlCohorts.R b/R/NegativeControlCohorts.R index 7489d98..a031078 100644 --- a/R/NegativeControlCohorts.R +++ b/R/NegativeControlCohorts.R @@ -100,7 +100,7 @@ generateNegativeControlOutcomeCohorts <- function(connectionDetails = NULL, ) assertLargeInteger(negativeControlOutcomeCohortSet$cohortId) assertLargeInteger(negativeControlOutcomeCohortSet$outcomeConceptId, columnName = "outcomeConceptId") - + # Verify that cohort IDs are not repeated in the negative control # cohort definition set before generating if (length(unique(negativeControlOutcomeCohortSet$cohortId)) != length(negativeControlOutcomeCohortSet$cohortId)) { @@ -186,11 +186,12 @@ createNegativeControlOutcomesQuery <- function(connection, detectOnDescendants, negativeControlOutcomeCohortSet) { selectClause <- "" - for (i in 1:nrow(negativeControlOutcomeCohortSet)){ - selectClause <- paste0(selectClause, - "SELECT CAST(", negativeControlOutcomeCohortSet$cohortId[i], " AS BIGINT), ", - "CAST(", negativeControlOutcomeCohortSet$outcomeConceptId[i], " AS BIGINT)" - ) + for (i in 1:nrow(negativeControlOutcomeCohortSet)) { + selectClause <- paste0( + selectClause, + "SELECT CAST(", negativeControlOutcomeCohortSet$cohortId[i], " AS BIGINT), ", + "CAST(", negativeControlOutcomeCohortSet$outcomeConceptId[i], " AS BIGINT)" + ) if (i < nrow(negativeControlOutcomeCohortSet)) { selectClause <- paste0(selectClause, "\nUNION\n") } diff --git a/R/ResultsDataModel.R b/R/ResultsDataModel.R index 06f8c31..07643f0 100644 --- a/R/ResultsDataModel.R +++ b/R/ResultsDataModel.R @@ -43,10 +43,10 @@ createResultsDataModel <- function(connectionDetails = NULL, if (connectionDetails$dbms == "sqlite" & databaseSchema != "main") { stop("Invalid schema for sqlite, use databaseSchema = 'main'") } - + connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) - + # Create first version of results model: sql <- SqlRender::readSql(system.file("sql/sql_server/CreateResultsDataModel.sql", package = "CohortGenerator", mustWork = TRUE)) sql <- SqlRender::render( @@ -115,9 +115,11 @@ uploadResults <- function(connectionDetails, #' @export migrateDataModel <- function(connectionDetails, databaseSchema, tablePrefix = "") { ParallelLogger::logInfo("Migrating data set") - migrator <- getDataMigrator(connectionDetails = connectionDetails, - databaseSchema = databaseSchema, - tablePrefix = tablePrefix) + migrator <- getDataMigrator( + connectionDetails = connectionDetails, + databaseSchema = databaseSchema, + tablePrefix = tablePrefix + ) migrator$executeMigrations() migrator$finalize() } @@ -142,4 +144,4 @@ getDataMigrator <- function(connectionDetails, databaseSchema, tablePrefix = "") migrationPath = "migrations", packageName = "CohortGenerator" ) -} \ No newline at end of file +} diff --git a/R/RunCohortGeneration.R b/R/RunCohortGeneration.R index a44d5b6..e1565d0 100644 --- a/R/RunCohortGeneration.R +++ b/R/RunCohortGeneration.R @@ -19,11 +19,11 @@ #' @details #' Run a cohort generation for a set of cohorts and negative control outcomes. #' This function will also export the results of the run to the `outputFolder`. -#' +#' #' @param connectionDetails An object of type \code{connectionDetails} as created using the #' \code{\link[DatabaseConnector]{createConnectionDetails}} function in the -#' DatabaseConnector package. -#' +#' DatabaseConnector package. +#' #' @template CdmDatabaseSchema #' #' @template TempEmulationSchema @@ -34,37 +34,37 @@ #' #' @template NegativeControlOutcomeCohortSet #' -#' @param occurrenceType For negative controls outcomes, the occurrenceType -#' will detect either: the first time an -#' outcomeConceptId occurs or all times the -#' outcomeConceptId occurs for a person. Values +#' @param occurrenceType For negative controls outcomes, the occurrenceType +#' will detect either: the first time an +#' outcomeConceptId occurs or all times the +#' outcomeConceptId occurs for a person. Values #' accepted: 'all' or 'first'. #' -#' @param detectOnDescendants For negative controls outcomes, when set to TRUE, -#' detectOnDescendants will use the vocabulary to -#' find negative control outcomes using the -#' outcomeConceptId and all descendants via the -#' concept_ancestor table. When FALSE, only the exact -#' outcomeConceptId will be used to detect the +#' @param detectOnDescendants For negative controls outcomes, when set to TRUE, +#' detectOnDescendants will use the vocabulary to +#' find negative control outcomes using the +#' outcomeConceptId and all descendants via the +#' concept_ancestor table. When FALSE, only the exact +#' outcomeConceptId will be used to detect the #' outcome. -#' -#' @param stopOnError If an error happens while generating one of the -#' cohorts in the cohortDefinitionSet, should we -#' stop processing the other cohorts? The default is -#' TRUE; when set to FALSE, failures will be +#' +#' @param stopOnError If an error happens while generating one of the +#' cohorts in the cohortDefinitionSet, should we +#' stop processing the other cohorts? The default is +#' TRUE; when set to FALSE, failures will be #' identified in the return value from this function. -#' +#' #' @param outputFolder Name of the folder where all the outputs will written to. #' #' @param databaseId A unique ID for the database. This will be appended to #' most tables. -#' +#' #' @param incremental Create only cohorts that haven't been created before? #' -#' @param incrementalFolder If \code{incremental = TRUE}, specify a folder where -#' records are kept of which definition has been +#' @param incrementalFolder If \code{incremental = TRUE}, specify a folder where +#' records are kept of which definition has been #' executed. -#' +#' #' @export runCohortGeneration <- function(connectionDetails, cdmDatabaseSchema, @@ -81,7 +81,7 @@ runCohortGeneration <- function(connectionDetails, incremental = FALSE, incrementalFolder = NULL) { if (is.null(cohortDefinitionSet) && is.null(negativeControlOutcomeCohortSet)) { - stop("You must supply at least 1 cohortDefinitionSet OR 1 negativeControlOutcomeCohortSet") + stop("You must supply at least 1 cohortDefinitionSet OR 1 negativeControlOutcomeCohortSet") } errorMessages <- checkmate::makeAssertCollection() if (is(connectionDetails, "connectionDetails")) { @@ -99,16 +99,16 @@ runCohortGeneration <- function(connectionDetails, checkmate::assert_logical(detectOnDescendants, add = errorMessages) checkmate::assert_logical(stopOnError, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) - + # Establish the connection and ensure the cleanup is performed connection <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(connection)) - + # Create the export folder if (!dir.exists(outputFolder)) { dir.create(outputFolder, recursive = T) } - + # Create the cohort tables createCohortTables( connection = connection, @@ -116,7 +116,7 @@ runCohortGeneration <- function(connectionDetails, cohortTableNames = cohortTableNames, incremental = incremental ) - + generateAndExportCohorts( connection = connection, cdmDatabaseSchema = cdmDatabaseSchema, @@ -128,9 +128,9 @@ runCohortGeneration <- function(connectionDetails, outputFolder = outputFolder, databaseId = databaseId, incremental = incremental, - incrementalFolder = incrementalFolder + incrementalFolder = incrementalFolder ) - + generateAndExportNegativeControls( connection = connection, cdmDatabaseSchema = cdmDatabaseSchema, @@ -151,7 +151,7 @@ runCohortGeneration <- function(connectionDetails, from = system.file("csv", "resultsDataModelSpecification.csv", package = "CohortGenerator"), to = outputFolder ) - + rlang::inform("Cohort generation complete.") } @@ -184,7 +184,7 @@ generateAndExportCohorts <- function(connection, incremental = incremental, incrementalFolder = incrementalFolder ) - + cohortCountsFromDb <- getCohortCounts( connection = connection, cohortDatabaseSchema = cohortDatabaseSchema, @@ -192,11 +192,11 @@ generateAndExportCohorts <- function(connection, cohortDefinitionSet = cohortDefinitionSet, databaseId = databaseId ) - + # Filter to columns in the results data model cohortCounts <- cohortCountsFromDb[names(cohortCounts)] } - + # Save the generation information rlang::inform("Saving cohort generation information") if (!is.null(cohortsGenerated) && nrow(cohortsGenerated) > 0) { @@ -218,13 +218,13 @@ generateAndExportCohorts <- function(connection, ) } } - + rlang::inform("Saving cohort counts") writeCsv( x = cohortCounts, file = cohortCountsFileName ) - + rlang::inform("Saving cohort statistics") exportCohortStatsTables( connection = connection, @@ -238,7 +238,7 @@ generateAndExportCohorts <- function(connection, cohortDefinitionSet = cohortDefinitionSet, tablePrefix = "cg_" ) - + # Export the cohort definition set rlang::inform("Saving cohort definition set") exportCohortDefinitionSet(outputFolder, cohortDefinitionSet) @@ -274,33 +274,33 @@ generateAndExportNegativeControls <- function(connection, incremental = incremental, incrementalFolder = incrementalFolder ) - + # Assemble the negativeControlOutcomes for export negativeControlOutcomes <- cbind( negativeControlOutcomeCohortSet, occurrenceType = rep(occurrenceType, nrow(negativeControlOutcomeCohortSet)), detectOnDescendants = rep(detectOnDescendants, nrow(negativeControlOutcomeCohortSet)) ) - + # Count the negative controls cohortCountsNegativeControlOutcomes <- getCohortCounts( connection = connection, cohortDatabaseSchema = cohortDatabaseSchema, cohortTable = cohortTableNames$cohortTable, databaseId = databaseId, - cohortDefinitionSet = negativeControlOutcomeCohortSet[,c("cohortId"), drop = FALSE] + cohortDefinitionSet = negativeControlOutcomeCohortSet[, c("cohortId"), drop = FALSE] ) } - + rlang::inform("Saving negative control outcome cohort definition") writeCsv( x = negativeControlOutcomes, file = negativeControlOutcomesFileName ) - + rlang::inform("Saving negative control outcome cohort counts") writeCsv( x = cohortCountsNegativeControlOutcomes, file = cohortCountsNegativeControlOutcomesFileName ) -} \ No newline at end of file +} diff --git a/R/Subsets.R b/R/Subsets.R index a784ac5..c157f72 100644 --- a/R/Subsets.R +++ b/R/Subsets.R @@ -35,7 +35,7 @@ # SubsetCohortWindow ------------- -#' SubsetCohortWindow settings +#' @title Time Window For Cohort Subset Operator #' @export #' @description #' Representation of a time window to use when subsetting a target cohort with a subset cohort @@ -47,7 +47,6 @@ SubsetCohortWindow <- R6::R6Class( .targetAnchor = "cohortStart" ), public = list( - #' @title to List #' @description List representation of object toList = function() { objRepr <- list() @@ -128,7 +127,7 @@ createSubsetCohortWindow <- function(startDay, endDay, targetAnchor) { } # SubsetOperator ------------------------------ -#' @title SubsetOperator +#' @title Abstract base class for subsets. #' @export #' @description #' Abstract Base Class for subsets. Subsets should inherit from this and implement their own requirements. @@ -418,7 +417,10 @@ createCohortSubset <- function(name = NULL, cohortIds, cohortCombinationOperator } # DemographicSubsetOperator ------------------------------ -#' Criteria Subset +#' @title Demographic Subset Operator +#' @description +#' Operators for subsetting a cohort by demographic criteria +#' #' @export DemographicSubsetOperator <- R6::R6Class( classname = "DemographicSubsetOperator", diff --git a/_pkgdown.yml b/_pkgdown.yml index d0aa7e1..1258641 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -8,6 +8,13 @@ home: href: http://forums.ohdsi.org reference: + - title: "Cohort Generation" + desc: > + Functions that support generating cohorts. + contents: + - runCohortGeneration + - generateCohortSet + - title: "Cohort Tables" desc: > Functions that support creating the necessary cohort tables. @@ -25,13 +32,6 @@ reference: - checkAndFixCohortDefinitionSetDataTypes - isCohortDefinitionSet - - title: "Cohort Generation" - desc: > - Functions that support generating cohorts. - contents: - - generateCohortSet - - createEmptyCohortDefinitionSet - - title: "Cohort Counts" desc: > Function for obtaining the counts of subjects and events for one or @@ -39,23 +39,27 @@ reference: contents: - getCohortCounts - - title: "Cohort Subset" + - title: "Cohort Subset Functions" desc: > - Functions and R6 classes for creating cohort subset definitions and subset - operators. + Functions for creating cohort subset definitions and subset operators. contents: - addCohortSubsetDefinition - - CohortSubsetDefinition - - CohortSubsetOperator - createCohortSubset - createCohortSubsetDefinition - createDemographicSubset - createLimitSubset - createSubsetCohortWindow - - DemographicSubsetOperator - getSubsetDefinitions - - LimitSubsetOperator - saveCohortSubsetDefinition + + - title: "Cohort Subset Classes" + desc: > + R6 classes for cohort subset definitions and subset operators. + contents: + - CohortSubsetDefinition + - CohortSubsetOperator + - DemographicSubsetOperator + - LimitSubsetOperator - SubsetCohortWindow - SubsetOperator @@ -63,9 +67,12 @@ reference: desc: > Functions for inserting inclusion rule names from a cohort definition, exporting the cohort statistics to the file system and a helper function - for dropping those tables when they are no longer needed. + for dropping those tables when they are no longer needed. These functions + assume you are using [Circe](https://github.com/OHDSI/circe-be) for + inclusion rules and cohort statistics. contents: - getCohortStats + - getCohortInclusionRules - insertInclusionRuleNames - exportCohortStatsTables - dropCohortStatsTables @@ -77,6 +84,17 @@ reference: contents: - createEmptyNegativeControlOutcomeCohortSet - generateNegativeControlOutcomeCohorts + + - title: "Result Model Management" + desc: > + Functions for managing the results of running Cohort Generator via + `runCohortGeneration` + contents: + - createResultsDataModel + - getDataMigrator + - getResultsDataModelSpecifications + - migrateDataModel + - uploadResults - title: "CSV File Helpers" desc: > diff --git a/docs/404.html b/docs/404.html index 1fb5c5e..da143bd 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/articles/CreatingCohortSubsetDefinitions.html b/docs/articles/CreatingCohortSubsetDefinitions.html index 5e8b709..71a5f0e 100644 --- a/docs/articles/CreatingCohortSubsetDefinitions.html +++ b/docs/articles/CreatingCohortSubsetDefinitions.html @@ -33,7 +33,7 @@ CohortGenerator - 0.9.0 + 0.10.0 @@ -91,7 +91,7 @@

Creating Cohort Subset Definitions

James P. Gilbert and Anthony G. Sena

-

2024-05-28

+

2024-07-14

Source: vignettes/CreatingCohortSubsetDefinitions.Rmd @@ -277,6 +277,15 @@

Applying subset NA +1778214 +celecoxibCensored +1778214 + +1778214 +FALSE +NA + + 1778211001 celecoxib - Patients in cohort cohort 1778213 with 365 days prior observation Subset to patients in cohort 1778213, Observation @@ -287,7 +296,7 @@

Applying subset TRUE 1 - + 1778212001 celecoxibAge40 - Patients in cohort cohort 1778213 with 365 days prior observation Subset to patients in cohort 1778213, @@ -298,7 +307,7 @@

Applying subset TRUE 1 - + 1778213001 celecoxibAge40Male - Patients in cohort cohort 1778213 with 365 days prior observation Subset to patients in cohort 1778213, @@ -309,6 +318,17 @@

Applying subset TRUE 1 + +1778214001 +celecoxibCensored - Patients in cohort cohort 1778213 +with 365 days prior observation Subset to patients in cohort 1778213, +Observation of at least 365 days prior +NA +NA +1778214 +TRUE +1 +

We can also apply a subset definition to only a limited number of @@ -366,6 +386,15 @@

Applying subset NA +1778214 +celecoxibCensored +1778214 + +1778214 +FALSE +NA + + 1778211001 celecoxib - Patients in cohort cohort 1778213 with 365 days prior observation Subset to patients in cohort 1778213, Observation @@ -376,7 +405,7 @@

Applying subset TRUE 1 - + 1778212001 celecoxibAge40 - Patients in cohort cohort 1778213 with 365 days prior observation Subset to patients in cohort 1778213, @@ -387,7 +416,7 @@

Applying subset TRUE 1 - + 1778213001 celecoxibAge40Male - Patients in cohort cohort 1778213 with 365 days prior observation Subset to patients in cohort 1778213, @@ -398,6 +427,17 @@

Applying subset TRUE 1 + +1778214001 +celecoxibCensored - Patients in cohort cohort 1778213 +with 365 days prior observation Subset to patients in cohort 1778213, +Observation of at least 365 days prior +NA +NA +1778214 +TRUE +1 + 1778212002 celecoxibAge40 - Patients in cohort 1778213 with 365 @@ -439,9 +479,9 @@

Applying subset paste("Subset Parent Id:", cohortDefinitionSet$subsetParent[4]), paste("Name", cohortDefinitionSet$cohortName[4]) )) -
#> Cohort Id: 1778211001
-#> Subset Parent Id: 1778211
-#> Name celecoxib - Patients in cohort cohort 1778213 with 365 days prior observation Subset to patients in cohort 1778213, Observation of at least 365 days prior
+
#> Cohort Id: 1778214
+#> Subset Parent Id: 1778214
+#> Name celecoxibCensored

Note that when adding a subset definition to a cohort definition set, the target cohort ids e.g (1778211, 1778212) must exist in the cohortDefinitionSet and the output ids diff --git a/docs/articles/GeneratingCohorts.html b/docs/articles/GeneratingCohorts.html index 2762151..69526fe 100644 --- a/docs/articles/GeneratingCohorts.html +++ b/docs/articles/GeneratingCohorts.html @@ -33,7 +33,7 @@ CohortGenerator - 0.9.0 + 0.10.0 @@ -91,7 +91,7 @@

Generating Cohorts

Anthony G. Sena and Martijn J. Schuemie

-

2024-05-28

+

2024-07-14

Source: vignettes/GeneratingCohorts.Rmd @@ -262,12 +262,13 @@

Generating Cohorts= "main", cohortTable = cohortTableNames$cohortTable ) -
#> Connecting using SQLite driver
-
#> Counting cohorts took 0.126 secs
+
#> Connecting using SQLite driver
+#> Counting cohorts took 0.131 secs
#>   cohortId cohortEntries cohortSubjects
 #> 1  1778211          1800           1800
 #> 2  1778212           569            569
-#> 3  1778213           266            266
+#> 3 1778213 266 266 +#> 4 1778214 1750 1750
@@ -287,7 +288,7 @@

Cohort Statistics (Inclusio CohortDiagnostics. Building on our basic example, let’s export the cohorts from WebAPI but this time indicate that we’d like to also include the code that generatesStats:

-
+
 

Next we’ll create the tables to store the cohort and the cohort statistics. Then we can generate the cohorts.

-
+
 # First get the cohort table names to use for this generation task
 cohortTableNames <- getCohortTableNames(cohortTable = "stats_example")
 
@@ -321,7 +322,7 @@ 

Cohort Statistics (Inclusio are available in the cohort statistics tables. The next step is to export the results to the file system which is done using the code below:

-
+
 insertInclusionRuleNames(
   connectionDetails = connectionDetails,
   cohortDefinitionSet = cohortDefinitionSet,
@@ -344,7 +345,7 @@ 

Cohort Statistics (Inclusio table in the InclusionStats folder.

Once you have exported your cohort statistics, you can optionally drop the statistics tables by using the following command:

-
+
 dropCohortStatsTables(
   connectionDetails = connectionDetails,
   cohortDatabaseSchema = "main",
@@ -362,7 +363,7 @@ 

Incremental Mode -
+
 # Create a set of tables for this example
 cohortTableNames <- getCohortTableNames(cohortTable = "cohort")
 createCohortTables(
@@ -376,15 +377,15 @@ 

Incremental Mode -
+
 createCohortTables(
   connectionDetails = connectionDetails,
   cohortTableNames = cohortTableNames,
   cohortDatabaseSchema = "main",
   incremental = TRUE
 )
-
#> Connecting using SQLite driver
-
#> Table "cohort" already exists and in incremental mode, so not recreating it.
+
#> Connecting using SQLite driver
+#> Table "cohort" already exists and in incremental mode, so not recreating it.
 #> Table "cohort" already exists and in incremental mode, so not recreating it.
 #> Table "cohort_inclusion" already exists and in incremental mode, so not recreating it.
 #> Table "cohort_inclusion_result" already exists and in incremental mode, so not recreating it.
@@ -394,7 +395,7 @@ 

Incremental ModeThe use of incremental = TRUE here allows for assurance that tables and results from previous runs are preserved. Next, we can generate our cohortDefinitionSet in incremental mode.

-
+
 generateCohortSet(
   connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
@@ -418,7 +419,7 @@ 

Incremental ModeincrementalFolder for the same cohort ID, the generation is skipped. To illustrate how this looks:

-
+
 generateCohortSet(
   connectionDetails = connectionDetails,
   cdmDatabaseSchema = "main",
@@ -429,11 +430,12 @@ 

Incremental Mode incrementalFolder = file.path(someFolder, "RecordKeeping") )

#> Connecting using SQLite driver
-
#> Initiating cluster consisting only of main thread
-#> Skipping cohortId = '1778211' because it is unchanged from earlier run
+
#> Initiating cluster consisting only of main thread
+
#> Skipping cohortId = '1778211' because it is unchanged from earlier run
 #> Skipping cohortId = '1778212' because it is unchanged from earlier run
 #> Skipping cohortId = '1778213' because it is unchanged from earlier run
-#> Generating cohort set took 0.11 secs
+#> Skipping cohortId = '1778214' because it is unchanged from earlier run
+
#> Generating cohort set took 0.13 secs
Potential Pitfalls of Incremental Mode
diff --git a/docs/articles/SamplingCohorts.html b/docs/articles/SamplingCohorts.html index 76bc9ec..e856ab2 100644 --- a/docs/articles/SamplingCohorts.html +++ b/docs/articles/SamplingCohorts.html @@ -33,7 +33,7 @@ CohortGenerator - 0.9.0 + 0.10.0
@@ -91,7 +91,7 @@

Sampling Cohorts

James P. Gilbert

-

2024-05-28

+

2024-07-14

Source: vignettes/SamplingCohorts.Rmd diff --git a/docs/articles/index.html b/docs/articles/index.html index c0fbc19..23c3513 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/authors.html b/docs/authors.html index 934e4a3..83a259e 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0

diff --git a/docs/index.html b/docs/index.html index e325d4e..e722aa9 100644 --- a/docs/index.html +++ b/docs/index.html @@ -33,7 +33,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/news/index.html b/docs/news/index.html index abf1fc5..caab98e 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0

@@ -66,6 +66,11 @@

Changelog

Source: NEWS.md

+
+ +

New Features - Add runCohortGeneration function (Issue #165) - Adopt ResultModelManager for handling results data models & uploading. Extend results data model to include information on cohort subsets(#154, #162) - Remove REMOTES entries for CirceR and Eunomia which are now in CRAN (#145) - Unit tests now running on all OHDSI DB Platforms (#151)

+

Bug Fixes - Negation of cohort subset operator must join on subject_id AND start_date (#167) - Allow integer as cohort ID (#146) - Use native messaging functions for output vs. ParallelLogger (#97) - Prevent upload of inclusion rule information (#78) - Expose colTypes when working with .csv files (#59) - Remove bit64 from package (mostly) (#152) - Updated documentation for cohort subset negate feature (#111)

+
  • Random sample functionality (for development only) (Issue #129)
  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index ec62a11..475ec55 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -5,5 +5,5 @@ articles: CreatingCohortSubsetDefinitions: CreatingCohortSubsetDefinitions.html GeneratingCohorts: GeneratingCohorts.html SamplingCohorts: SamplingCohorts.html -last_built: 2024-05-28T17:58Z +last_built: 2024-07-14T13:51Z diff --git a/docs/reference/CohortGenerator-package.html b/docs/reference/CohortGenerator-package.html index 94e32b7..7030844 100644 --- a/docs/reference/CohortGenerator-package.html +++ b/docs/reference/CohortGenerator-package.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/CohortSubsetDefinition.html b/docs/reference/CohortSubsetDefinition.html index 3540129..f71367a 100644 --- a/docs/reference/CohortSubsetDefinition.html +++ b/docs/reference/CohortSubsetDefinition.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/CohortSubsetOperator.html b/docs/reference/CohortSubsetOperator.html index eb899f8..a493c14 100644 --- a/docs/reference/CohortSubsetOperator.html +++ b/docs/reference/CohortSubsetOperator.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0

diff --git a/docs/reference/DemographicSubsetOperator.html b/docs/reference/DemographicSubsetOperator.html index b0286f9..58adfff 100644 --- a/docs/reference/DemographicSubsetOperator.html +++ b/docs/reference/DemographicSubsetOperator.html @@ -1,6 +1,5 @@ -Criteria Subset — DemographicSubsetOperator • CohortGeneratorDemographic Subset Operator — DemographicSubsetOperator • CohortGenerator @@ -18,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0
@@ -63,14 +62,13 @@
-

Criteria Subset

-

Criteria Subset

+

Operators for subsetting a cohort by demographic criteria

diff --git a/docs/reference/LimitSubsetOperator.html b/docs/reference/LimitSubsetOperator.html index 22f4022..fbcaf92 100644 --- a/docs/reference/LimitSubsetOperator.html +++ b/docs/reference/LimitSubsetOperator.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/SubsetCohortWindow.html b/docs/reference/SubsetCohortWindow.html index 08abaf9..3047776 100644 --- a/docs/reference/SubsetCohortWindow.html +++ b/docs/reference/SubsetCohortWindow.html @@ -1,5 +1,5 @@ -to List — SubsetCohortWindow • CohortGeneratorTime Window For Cohort Subset Operator — SubsetCohortWindow • CohortGenerator @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0
@@ -62,7 +62,7 @@
@@ -72,10 +72,6 @@

to List

-
-

Details

-

SubsetCohortWindow settings

-

Active bindings

startDay
diff --git a/docs/reference/SubsetOperator.html b/docs/reference/SubsetOperator.html index b9629d1..1281969 100644 --- a/docs/reference/SubsetOperator.html +++ b/docs/reference/SubsetOperator.html @@ -1,5 +1,5 @@ -SubsetOperator — SubsetOperator • CohortGeneratorAbstract base class for subsets. — SubsetOperator • CohortGenerator @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0
@@ -62,7 +62,7 @@
diff --git a/docs/reference/addCohortSubsetDefinition.html b/docs/reference/addCohortSubsetDefinition.html index 90567f9..465e098 100644 --- a/docs/reference/addCohortSubsetDefinition.html +++ b/docs/reference/addCohortSubsetDefinition.html @@ -20,7 +20,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/checkAndFixCohortDefinitionSetDataTypes.html b/docs/reference/checkAndFixCohortDefinitionSetDataTypes.html index 947ac3f..b2ac30c 100644 --- a/docs/reference/checkAndFixCohortDefinitionSetDataTypes.html +++ b/docs/reference/checkAndFixCohortDefinitionSetDataTypes.html @@ -19,7 +19,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/computeChecksum.html b/docs/reference/computeChecksum.html index 065b877..6b88acc 100644 --- a/docs/reference/computeChecksum.html +++ b/docs/reference/computeChecksum.html @@ -19,7 +19,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/createCohortSubset.html b/docs/reference/createCohortSubset.html index d143bf1..ae84cb4 100644 --- a/docs/reference/createCohortSubset.html +++ b/docs/reference/createCohortSubset.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0
@@ -98,7 +98,7 @@

Arguments

negate
-

The opposite of this definition - include patients who do NOT meet the specified criteria (NOT YET IMPLEMENTED)

+

The opposite of this definition - include patients who do NOT meet the specified criteria

startWindow
diff --git a/docs/reference/createCohortSubsetDefinition.html b/docs/reference/createCohortSubsetDefinition.html index 73a692c..3244251 100644 --- a/docs/reference/createCohortSubsetDefinition.html +++ b/docs/reference/createCohortSubsetDefinition.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0

diff --git a/docs/reference/createCohortTables.html b/docs/reference/createCohortTables.html index c28cf0c..4a7c50c 100644 --- a/docs/reference/createCohortTables.html +++ b/docs/reference/createCohortTables.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/createDemographicSubset.html b/docs/reference/createDemographicSubset.html index a3879b9..1b231e9 100644 --- a/docs/reference/createDemographicSubset.html +++ b/docs/reference/createDemographicSubset.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0

diff --git a/docs/reference/createEmptyCohortDefinitionSet.html b/docs/reference/createEmptyCohortDefinitionSet.html index 31f3f6a..5edc70f 100644 --- a/docs/reference/createEmptyCohortDefinitionSet.html +++ b/docs/reference/createEmptyCohortDefinitionSet.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/createEmptyNegativeControlOutcomeCohortSet.html b/docs/reference/createEmptyNegativeControlOutcomeCohortSet.html index 8c2f1e1..008b62d 100644 --- a/docs/reference/createEmptyNegativeControlOutcomeCohortSet.html +++ b/docs/reference/createEmptyNegativeControlOutcomeCohortSet.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/createLimitSubset.html b/docs/reference/createLimitSubset.html index 4071515..961433f 100644 --- a/docs/reference/createLimitSubset.html +++ b/docs/reference/createLimitSubset.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0

diff --git a/docs/reference/createResultsDataModel.html b/docs/reference/createResultsDataModel.html new file mode 100644 index 0000000..b1f7e10 --- /dev/null +++ b/docs/reference/createResultsDataModel.html @@ -0,0 +1,124 @@ + +Create the results data model tables on a database server. — createResultsDataModel • CohortGenerator + + +
+
+ + + +
+
+ + +
+

Create the results data model tables on a database server.

+
+ +
+
createResultsDataModel(
+  connectionDetails = NULL,
+  databaseSchema,
+  tablePrefix = ""
+)
+
+ +
+

Arguments

+
connectionDetails
+

DatabaseConnector connectionDetails instance @seealso[DatabaseConnector::createConnectionDetails]

+ + +
databaseSchema
+

The schema on the server where the tables will be created.

+ + +
tablePrefix
+

(Optional) string to insert before table names for database table names

+ +
+
+

Details

+

Only PostgreSQL and SQLite servers are supported.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/createSubsetCohortWindow.html b/docs/reference/createSubsetCohortWindow.html index 27f90e4..08f5230 100644 --- a/docs/reference/createSubsetCohortWindow.html +++ b/docs/reference/createSubsetCohortWindow.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/dropCohortStatsTables.html b/docs/reference/dropCohortStatsTables.html index 4c9e8cd..bd0a8b9 100644 --- a/docs/reference/dropCohortStatsTables.html +++ b/docs/reference/dropCohortStatsTables.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/exportCohortStatsTables.html b/docs/reference/exportCohortStatsTables.html index 258095b..363565d 100644 --- a/docs/reference/exportCohortStatsTables.html +++ b/docs/reference/exportCohortStatsTables.html @@ -1,7 +1,12 @@ Export the cohort statistics tables to the file system — exportCohortStatsTables • CohortGenerator @@ -19,7 +24,7 @@ CohortGenerator - 0.9.0 + 0.10.0 @@ -72,7 +77,12 @@

Export the cohort statistics tables to the file system

This function retrieves the data from the cohort statistics tables and writes them to the inclusion statistics folder specified in the function -call.

+call. NOTE: inclusion rule names are handled in one of two ways:

+

1. You can specify the cohortDefinitionSet parameter and the inclusion rule +names will be extracted from the data.frame. +2. You can insert the inclusion rule names into the database using the +insertInclusionRuleNames function of this package.

+

The first approach is preferred as to avoid the warning emitted.

@@ -85,7 +95,9 @@

Export the cohort statistics tables to the file system

snakeCaseToCamelCase = TRUE, fileNamesInSnakeCase = FALSE, incremental = FALSE, - databaseId = NULL + databaseId = NULL, + cohortDefinitionSet = NULL, + tablePrefix = "" )
@@ -140,6 +152,28 @@

Arguments

Optional - when specified, the databaseId will be added to the exported results

+ +
cohortDefinitionSet
+

The cohortDefinitionSet argument must be a data frame with +the following columns:

cohortId
+

The unique integer identifier of the cohort

+ +
cohortName
+

The cohort's name

+ +
sql
+

The OHDSI-SQL used to generate the cohort

+ +

Optionally, this data frame may contain:

json
+

The Circe JSON representation of the cohort

+ +
+ + +
tablePrefix
+

Optional - allows to append a prefix to the exported +file names.

+ diff --git a/docs/reference/generateCohortSet.html b/docs/reference/generateCohortSet.html index 3a73337..6180c55 100644 --- a/docs/reference/generateCohortSet.html +++ b/docs/reference/generateCohortSet.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/generateNegativeControlOutcomeCohorts.html b/docs/reference/generateNegativeControlOutcomeCohorts.html index 3d94e46..b217dfb 100644 --- a/docs/reference/generateNegativeControlOutcomeCohorts.html +++ b/docs/reference/generateNegativeControlOutcomeCohorts.html @@ -19,7 +19,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/getCohortCounts.html b/docs/reference/getCohortCounts.html index 51ddd6b..1ed50e2 100644 --- a/docs/reference/getCohortCounts.html +++ b/docs/reference/getCohortCounts.html @@ -20,7 +20,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/getCohortDefinitionSet.html b/docs/reference/getCohortDefinitionSet.html index 6fabd6b..2424a59 100644 --- a/docs/reference/getCohortDefinitionSet.html +++ b/docs/reference/getCohortDefinitionSet.html @@ -20,7 +20,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/getCohortInclusionRules.html b/docs/reference/getCohortInclusionRules.html new file mode 100644 index 0000000..8dea65b --- /dev/null +++ b/docs/reference/getCohortInclusionRules.html @@ -0,0 +1,123 @@ + +Get Cohort Inclusion Rules from a cohort definition set — getCohortInclusionRules • CohortGenerator + + +
+
+ + + +
+
+ + +
+

This function returns a data frame of the inclusion rules defined +in a cohort definition set.

+
+ +
+
getCohortInclusionRules(cohortDefinitionSet)
+
+ +
+

Arguments

+
cohortDefinitionSet
+

The cohortDefinitionSet argument must be a data frame with +the following columns:

cohortId
+

The unique integer identifier of the cohort

+ +
cohortName
+

The cohort's name

+ +
sql
+

The OHDSI-SQL used to generate the cohort

+ +

Optionally, this data frame may contain:

json
+

The Circe JSON representation of the cohort

+ +
+ +
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/getCohortStats.html b/docs/reference/getCohortStats.html index 9665f7a..5fa025c 100644 --- a/docs/reference/getCohortStats.html +++ b/docs/reference/getCohortStats.html @@ -27,7 +27,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/getCohortTableNames.html b/docs/reference/getCohortTableNames.html index 24eaf0a..352585b 100644 --- a/docs/reference/getCohortTableNames.html +++ b/docs/reference/getCohortTableNames.html @@ -19,7 +19,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/getDataMigrator.html b/docs/reference/getDataMigrator.html new file mode 100644 index 0000000..6c29602 --- /dev/null +++ b/docs/reference/getDataMigrator.html @@ -0,0 +1,122 @@ + +Get database migrations instance — getDataMigrator • CohortGenerator + + +
+
+ + + +
+
+ + +
+

Returns ResultModelManager DataMigrationsManager instance.

+
+ +
+
getDataMigrator(connectionDetails, databaseSchema, tablePrefix = "")
+
+ +
+

Arguments

+
connectionDetails
+

DatabaseConnector connection details object

+ + +
databaseSchema
+

String schema where database schema lives

+ + +
tablePrefix
+

(Optional) Use if a table prefix is used before table names (e.g. "cg_")

+ +
+
+

Value

+ + +

Instance of ResultModelManager::DataMigrationManager that has interface for converting existing data models

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/getRequiredTasks.html b/docs/reference/getRequiredTasks.html index e2c8f0e..84010df 100644 --- a/docs/reference/getRequiredTasks.html +++ b/docs/reference/getRequiredTasks.html @@ -19,7 +19,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/getResultsDataModelSpecifications.html b/docs/reference/getResultsDataModelSpecifications.html new file mode 100644 index 0000000..6f65545 --- /dev/null +++ b/docs/reference/getResultsDataModelSpecifications.html @@ -0,0 +1,108 @@ + +Get specifications for CohortGenerator results data model — getResultsDataModelSpecifications • CohortGenerator + + +
+
+ + + +
+
+ + +
+

Get specifications for CohortGenerator results data model

+
+ +
+
getResultsDataModelSpecifications()
+
+ +
+

Value

+ + +

A tibble data frame object with specifications

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/getSubsetDefinitions.html b/docs/reference/getSubsetDefinitions.html index 7564da7..95f1d53 100644 --- a/docs/reference/getSubsetDefinitions.html +++ b/docs/reference/getSubsetDefinitions.html @@ -21,7 +21,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/index.html b/docs/reference/index.html index f8b1ac2..04a3217 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0 @@ -66,6 +66,18 @@

Reference

+ + + + + @@ -101,18 +113,6 @@

Cohort Defintion Set isCohortDefinitionSet()

- - - - - - - - - @@ -157,38 +149,54 @@

Cohort Subset createSubsetCohortWindow()

- - - - + + + + + + + + + - + - + + + @@ -213,6 +221,30 @@

Negative Control Outcomes generateNegativeControlOutcomeCohorts()

+ + + + + + + + + + +
+

Cohort Generation

+

Functions that support generating cohorts.

+
+

runCohortGeneration()

+

Run a cohort generation and export results

+

generateCohortSet()

+

Generate a set of cohorts

Cohort Tables

Functions that support creating the necessary cohort tables.

Is the data.frame a cohort definition set?

-

Cohort Generation

-

Functions that support generating cohorts.

-
-

generateCohortSet()

-

Generate a set of cohorts

-

createEmptyCohortDefinitionSet()

-

Create an empty cohort definition set

Cohort Counts

Function for obtaining the counts of subjects and events for one or more cohorts

@@ -122,21 +122,13 @@

Cohort Counts

Count the cohort(s)

-

Cohort Subset

-

Functions and R6 classes for creating cohort subset definitions and subset operators.

+

Cohort Subset Functions

+

Functions for creating cohort subset definitions and subset operators.

addCohortSubsetDefinition()

Add cohort subset definition to a cohort definition set

-

CohortSubsetDefinition

-

Cohort Subset Definition

-

CohortSubsetOperator

-

Cohort Subset Operator

createCohortSubset()

A definition of subset functions to be applied to a set of cohorts

-

DemographicSubsetOperator

-

Criteria Subset

getSubsetDefinitions()

Get cohort subset definitions from a cohort definition set

-

LimitSubsetOperator

-

Limit Subset Operator

saveCohortSubsetDefinition()

Save cohort subset definitions to json

+

Cohort Subset Classes

+

R6 classes for cohort subset definitions and subset operators.

+
+

CohortSubsetDefinition

+

Cohort Subset Definition

+

CohortSubsetOperator

+

Cohort Subset Operator

+

DemographicSubsetOperator

+

Demographic Subset Operator

+

LimitSubsetOperator

+

Limit Subset Operator

SubsetCohortWindow

to List

Time Window For Cohort Subset Operator

SubsetOperator

SubsetOperator

Abstract base class for subsets.

Cohort Statistics

-

Functions for inserting inclusion rule names from a cohort definition, exporting the cohort statistics to the file system and a helper function for dropping those tables when they are no longer needed.

+

Functions for inserting inclusion rule names from a cohort definition, exporting the cohort statistics to the file system and a helper function for dropping those tables when they are no longer needed. These functions assume you are using Circe for inclusion rules and cohort statistics.

getCohortStats()

Get Cohort Inclusion Stats Table Data

+

getCohortInclusionRules()

+

Get Cohort Inclusion Rules from a cohort definition set

insertInclusionRuleNames()

Generate a set of negative control outcome cohorts

+

Result Model Management

+

Functions for managing the results of running Cohort Generator via runCohortGeneration

+
+

createResultsDataModel()

+

Create the results data model tables on a database server.

+

getDataMigrator()

+

Get database migrations instance

+

getResultsDataModelSpecifications()

+

Get specifications for CohortGenerator results data model

+

migrateDataModel()

+

Migrate Data model

+

uploadResults()

+

Upload results to the database server.

CSV File Helpers

Functions for reading and writing CSV files to ensure adherance to the HADES standard when interfacing between R and SQL/File System: https://ohdsi.github.io/Hades/codeStyle.html#Interfacing_between_R_and_SQL

diff --git a/docs/reference/insertInclusionRuleNames.html b/docs/reference/insertInclusionRuleNames.html index 4e47142..2e7edc1 100644 --- a/docs/reference/insertInclusionRuleNames.html +++ b/docs/reference/insertInclusionRuleNames.html @@ -21,7 +21,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/isCamelCase.html b/docs/reference/isCamelCase.html index 6c716f1..180c5bd 100644 --- a/docs/reference/isCamelCase.html +++ b/docs/reference/isCamelCase.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/isCohortDefinitionSet.html b/docs/reference/isCohortDefinitionSet.html index 310f356..fdcd446 100644 --- a/docs/reference/isCohortDefinitionSet.html +++ b/docs/reference/isCohortDefinitionSet.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/isFormattedForDatabaseUpload.html b/docs/reference/isFormattedForDatabaseUpload.html index 0bc0021..f8b9a68 100644 --- a/docs/reference/isFormattedForDatabaseUpload.html +++ b/docs/reference/isFormattedForDatabaseUpload.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/isSnakeCase.html b/docs/reference/isSnakeCase.html index 08554da..985e571 100644 --- a/docs/reference/isSnakeCase.html +++ b/docs/reference/isSnakeCase.html @@ -18,7 +18,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/isTaskRequired.html b/docs/reference/isTaskRequired.html index 3a2dfa3..75332da 100644 --- a/docs/reference/isTaskRequired.html +++ b/docs/reference/isTaskRequired.html @@ -19,7 +19,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/migrateDataModel.html b/docs/reference/migrateDataModel.html new file mode 100644 index 0000000..772c00e --- /dev/null +++ b/docs/reference/migrateDataModel.html @@ -0,0 +1,120 @@ + +Migrate Data model — migrateDataModel • CohortGenerator + + +
+
+ + + +
+
+ + +
+

Migrate data from current state to next state

+

It is strongly advised that you have a backup of all data (either sqlite files, a backup database (in the case you +are using a postgres backend) or have kept the csv/zip files from your data generation.

+
+ +
+
migrateDataModel(connectionDetails, databaseSchema, tablePrefix = "")
+
+ +
+

Arguments

+
connectionDetails
+

DatabaseConnector connection details object

+ + +
databaseSchema
+

String schema where database schema lives

+ + +
tablePrefix
+

(Optional) Use if a table prefix is used before table names (e.g. "cg_")

+ +
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/readCsv.html b/docs/reference/readCsv.html index cee3cec..42ec1e1 100644 --- a/docs/reference/readCsv.html +++ b/docs/reference/readCsv.html @@ -21,7 +21,7 @@ CohortGenerator - 0.9.0 + 0.10.0 @@ -80,7 +80,7 @@

Used to read a .csv file

-
readCsv(file, warnOnCaseMismatch = TRUE)
+
readCsv(file, warnOnCaseMismatch = TRUE, colTypes = readr::cols())
@@ -93,6 +93,34 @@

Arguments

When TRUE, raise a warning if column headings in the .csv are not in snake_case format

+ +
colTypes
+

Corresponds to the `col_types` in the `readr::read_csv` function. + One of `NULL`, a [readr::cols()] specification, or + a string. See `vignette("readr")` for more details.

+

If `NULL`, all column types will be inferred from `guess_max` rows of the + input, interspersed throughout the file. This is convenient (and fast), + but not robust. If the guessed types are wrong, you'll need to increase + `guess_max` or supply the correct types yourself.

+

Column specifications created by [list()] or [cols()] must contain + one column specification for each column.

+

Alternatively, you can use a compact string representation where each + character represents one column: + - c = character + - i = integer + - n = number + - d = double + - l = logical + - f = factor + - D = date + - T = date time + - t = time + - ? = guess + - _ or - = skip

+

By default, reading a file without a column specification will print a + message showing what `readr` guessed they were. To remove this message, + set `show_col_types = FALSE` or set `options(readr.show_col_types = FALSE)`.

+

Value

diff --git a/docs/reference/recordTasksDone.html b/docs/reference/recordTasksDone.html index b934c79..056c5aa 100644 --- a/docs/reference/recordTasksDone.html +++ b/docs/reference/recordTasksDone.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/runCohortGeneration.html b/docs/reference/runCohortGeneration.html new file mode 100644 index 0000000..49066fb --- /dev/null +++ b/docs/reference/runCohortGeneration.html @@ -0,0 +1,229 @@ + +Run a cohort generation and export results — runCohortGeneration • CohortGenerator + + +
+
+ + + +
+
+ + +
+

Run a cohort generation and export results

+
+ +
+
runCohortGeneration(
+  connectionDetails,
+  cdmDatabaseSchema,
+  tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
+  cohortDatabaseSchema = cdmDatabaseSchema,
+  cohortTableNames = getCohortTableNames(),
+  cohortDefinitionSet = NULL,
+  negativeControlOutcomeCohortSet = NULL,
+  occurrenceType = "all",
+  detectOnDescendants = FALSE,
+  stopOnError = TRUE,
+  outputFolder,
+  databaseId = 1,
+  incremental = FALSE,
+  incrementalFolder = NULL
+)
+
+ +
+

Arguments

+
connectionDetails
+

An object of type connectionDetails as created using the +createConnectionDetails function in the +DatabaseConnector package.

+ + +
cdmDatabaseSchema
+

Schema name where your patient-level data in OMOP CDM format resides. +Note that for SQL Server, this should include both the database and +schema name, for example 'cdm_data.dbo'.

+ + +
tempEmulationSchema
+

Some database platforms like Oracle and Impala do not truly support +temp tables. To emulate temp tables, provide a schema with write +privileges where temp tables can be created.

+ + +
cohortDatabaseSchema
+

Schema name where your cohort tables reside. Note that for SQL Server, +this should include both the database and schema name, for example +'scratch.dbo'.

+ + +
cohortTableNames
+

The names of the cohort tables. See getCohortTableNames +for more details.

+ + +
cohortDefinitionSet
+

The cohortDefinitionSet argument must be a data frame with +the following columns:

cohortId
+

The unique integer identifier of the cohort

+ +
cohortName
+

The cohort's name

+ +
sql
+

The OHDSI-SQL used to generate the cohort

+ +

Optionally, this data frame may contain:

json
+

The Circe JSON representation of the cohort

+ +
+ + +
negativeControlOutcomeCohortSet
+

The negativeControlOutcomeCohortSet argument must be a data frame with +the following columns:

cohortId
+

The unique integer identifier of the cohort

+ +
cohortName
+

The cohort's name

+ +
outcomeConceptId
+

The concept_id in the condition domain to use for the negative control outcome.

+ +
+ + +
occurrenceType
+

For negative controls outcomes, the occurrenceType +will detect either: the first time an +outcomeConceptId occurs or all times the +outcomeConceptId occurs for a person. Values +accepted: 'all' or 'first'.

+ + +
detectOnDescendants
+

For negative controls outcomes, when set to TRUE, +detectOnDescendants will use the vocabulary to +find negative control outcomes using the +outcomeConceptId and all descendants via the +concept_ancestor table. When FALSE, only the exact +outcomeConceptId will be used to detect the +outcome.

+ + +
stopOnError
+

If an error happens while generating one of the +cohorts in the cohortDefinitionSet, should we +stop processing the other cohorts? The default is +TRUE; when set to FALSE, failures will be +identified in the return value from this function.

+ + +
outputFolder
+

Name of the folder where all the outputs will written to.

+ + +
databaseId
+

A unique ID for the database. This will be appended to +most tables.

+ + +
incremental
+

Create only cohorts that haven't been created before?

+ + +
incrementalFolder
+

If incremental = TRUE, specify a folder where +records are kept of which definition has been +executed.

+ +
+
+

Details

+

Run a cohort generation for a set of cohorts and negative control outcomes. +This function will also export the results of the run to the `outputFolder`.

+
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/sampleCohortDefinitionSet.html b/docs/reference/sampleCohortDefinitionSet.html index 41c171e..c6566eb 100644 --- a/docs/reference/sampleCohortDefinitionSet.html +++ b/docs/reference/sampleCohortDefinitionSet.html @@ -24,7 +24,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/saveCohortDefinitionSet.html b/docs/reference/saveCohortDefinitionSet.html index 22c8814..94e3d22 100644 --- a/docs/reference/saveCohortDefinitionSet.html +++ b/docs/reference/saveCohortDefinitionSet.html @@ -23,7 +23,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/reference/saveCohortSubsetDefinition.html b/docs/reference/saveCohortSubsetDefinition.html index d2291c1..98862b5 100644 --- a/docs/reference/saveCohortSubsetDefinition.html +++ b/docs/reference/saveCohortSubsetDefinition.html @@ -17,7 +17,7 @@ CohortGenerator - 0.9.0 + 0.10.0 @@ -81,7 +81,7 @@

Save cohort subset definitions to json

Arguments

subsetDefinition
-

The subset definition object @seealso CohortSubsetDefinition

+

The subset definition object @seealso[CohortSubsetDefinition]

subsetJsonFolder
diff --git a/docs/reference/saveIncremental.html b/docs/reference/saveIncremental.html index 0f48925..6914fc8 100644 --- a/docs/reference/saveIncremental.html +++ b/docs/reference/saveIncremental.html @@ -19,7 +19,7 @@ CohortGenerator - 0.9.0 + 0.10.0
diff --git a/docs/reference/uploadResults.html b/docs/reference/uploadResults.html new file mode 100644 index 0000000..cf7024b --- /dev/null +++ b/docs/reference/uploadResults.html @@ -0,0 +1,146 @@ + +Upload results to the database server. — uploadResults • CohortGenerator + + +
+
+ + + +
+
+ + +
+

Requires the results data model tables have been created using the createResultsDataModel function.

+
+ +
+
uploadResults(
+  connectionDetails,
+  schema,
+  resultsFolder,
+  forceOverWriteOfSpecifications = FALSE,
+  purgeSiteDataBeforeUploading = TRUE,
+  tablePrefix = "",
+  ...
+)
+
+ +
+

Arguments

+
connectionDetails
+

An object of type connectionDetails as created using the +createConnectionDetails function in the +DatabaseConnector package.

+ + +
schema
+

The schema on the server where the tables have been created.

+ + +
resultsFolder
+

The folder holding the results in .csv files

+ + +
forceOverWriteOfSpecifications
+

If TRUE, specifications of the phenotypes, cohort definitions, and analysis +will be overwritten if they already exist on the database. Only use this if these specifications +have changed since the last upload.

+ + +
purgeSiteDataBeforeUploading
+

If TRUE, before inserting data for a specific databaseId all the data for +that site will be dropped. This assumes the resultsFolder file contains the full data for that +data site.

+ + +
tablePrefix
+

(Optional) string to insert before table names for database table names

+ + +
...
+

See ResultModelManager::uploadResults

+ +
+ +
+ +
+ + +
+ +
+

Site built with pkgdown 2.0.9.

+
+ +
+ + + + + + + + diff --git a/docs/reference/writeCsv.html b/docs/reference/writeCsv.html index 15328a3..f98829e 100644 --- a/docs/reference/writeCsv.html +++ b/docs/reference/writeCsv.html @@ -28,7 +28,7 @@ CohortGenerator - 0.9.0 + 0.10.0 diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 3808b80..b810c7d 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -72,6 +72,9 @@ /reference/createLimitSubset.html + + /reference/createResultsDataModel.html + /reference/createSubsetCohortWindow.html @@ -105,15 +108,24 @@ /reference/getCohortDefinitionSet.html + + /reference/getCohortInclusionRules.html + /reference/getCohortStats.html /reference/getCohortTableNames.html + + /reference/getDataMigrator.html + /reference/getRequiredTasks.html + + /reference/getResultsDataModelSpecifications.html + /reference/getSubsetDefinitions.html @@ -141,12 +153,18 @@ /reference/LimitSubsetOperator.html + + /reference/migrateDataModel.html + /reference/readCsv.html /reference/recordTasksDone.html + + /reference/runCohortGeneration.html + /reference/sampleCohortDefinitionSet.html @@ -168,6 +186,9 @@ /reference/SubsetOperator.html + + /reference/uploadResults.html + /reference/writeCsv.html diff --git a/extras/CohortGenerator.pdf b/extras/CohortGenerator.pdf index 0822960..ea4a379 100644 Binary files a/extras/CohortGenerator.pdf and b/extras/CohortGenerator.pdf differ diff --git a/extras/CreateResultsForUpload.R b/extras/CreateResultsForUpload.R index 7ac4047..9b5f232 100644 --- a/extras/CreateResultsForUpload.R +++ b/extras/CreateResultsForUpload.R @@ -200,4 +200,4 @@ zip::zip( ) unlink(connectionDetails$server()) -unlink(resultsFolder, recursive = TRUE) \ No newline at end of file +unlink(resultsFolder, recursive = TRUE) diff --git a/inst/doc/CreatingCohortSubsetDefinitions.pdf b/inst/doc/CreatingCohortSubsetDefinitions.pdf index 61986cb..cf698b5 100644 Binary files a/inst/doc/CreatingCohortSubsetDefinitions.pdf and b/inst/doc/CreatingCohortSubsetDefinitions.pdf differ diff --git a/inst/doc/GeneratingCohorts.pdf b/inst/doc/GeneratingCohorts.pdf index bcb7ad4..c6f8e97 100644 Binary files a/inst/doc/GeneratingCohorts.pdf and b/inst/doc/GeneratingCohorts.pdf differ diff --git a/inst/doc/SamplingCohorts.pdf b/inst/doc/SamplingCohorts.pdf index a27ffb3..829e798 100644 Binary files a/inst/doc/SamplingCohorts.pdf and b/inst/doc/SamplingCohorts.pdf differ diff --git a/man/DemographicSubsetOperator.Rd b/man/DemographicSubsetOperator.Rd index fea8116..53078dc 100644 --- a/man/DemographicSubsetOperator.Rd +++ b/man/DemographicSubsetOperator.Rd @@ -2,15 +2,13 @@ % Please edit documentation in R/Subsets.R \name{DemographicSubsetOperator} \alias{DemographicSubsetOperator} -\title{Criteria Subset} +\title{Demographic Subset Operator} \value{ char vector Get auto generated name } \description{ -Criteria Subset - -Criteria Subset +Operators for subsetting a cohort by demographic criteria } \section{Super class}{ \code{\link[CohortGenerator:SubsetOperator]{CohortGenerator::SubsetOperator}} -> \code{DemographicSubsetOperator} diff --git a/man/SubsetCohortWindow.Rd b/man/SubsetCohortWindow.Rd index 9b274eb..c0a1e13 100644 --- a/man/SubsetCohortWindow.Rd +++ b/man/SubsetCohortWindow.Rd @@ -2,13 +2,10 @@ % Please edit documentation in R/Subsets.R \name{SubsetCohortWindow} \alias{SubsetCohortWindow} -\title{to List} +\title{Time Window For Cohort Subset Operator} \description{ Representation of a time window to use when subsetting a target cohort with a subset cohort } -\details{ -SubsetCohortWindow settings -} \section{Active bindings}{ \if{html}{\out{
}} \describe{ diff --git a/man/SubsetOperator.Rd b/man/SubsetOperator.Rd index 640fa7a..44f7444 100644 --- a/man/SubsetOperator.Rd +++ b/man/SubsetOperator.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/Subsets.R \name{SubsetOperator} \alias{SubsetOperator} -\title{SubsetOperator} +\title{Abstract base class for subsets.} \description{ Abstract Base Class for subsets. Subsets should inherit from this and implement their own requirements. } diff --git a/man/exportCohortStatsTables.Rd b/man/exportCohortStatsTables.Rd index a63e44e..9791697 100644 --- a/man/exportCohortStatsTables.Rd +++ b/man/exportCohortStatsTables.Rd @@ -67,10 +67,10 @@ This function retrieves the data from the cohort statistics tables and writes them to the inclusion statistics folder specified in the function call. NOTE: inclusion rule names are handled in one of two ways: -1. You can specify the cohortDefinitionSet parameter and the inclusion rule -names will be extracted from the data.frame. +1. You can specify the cohortDefinitionSet parameter and the inclusion rule +names will be extracted from the data.frame. 2. You can insert the inclusion rule names into the database using the -insertInclusionRuleNames function of this package. +insertInclusionRuleNames function of this package. The first approach is preferred as to avoid the warning emitted. } diff --git a/man/readCsv.Rd b/man/readCsv.Rd index 2247862..9eac7ea 100644 --- a/man/readCsv.Rd +++ b/man/readCsv.Rd @@ -22,7 +22,7 @@ in the .csv are not in snake_case format} `guess_max` or supply the correct types yourself. Column specifications created by [list()] or [cols()] must contain - one column specification for each column. + one column specification for each column. Alternatively, you can use a compact string representation where each character represents one column: diff --git a/man/runCohortGeneration.Rd b/man/runCohortGeneration.Rd index 8185e41..7781f28 100644 --- a/man/runCohortGeneration.Rd +++ b/man/runCohortGeneration.Rd @@ -55,24 +55,24 @@ the following columns: \describe{ \item{cohortName}{The cohort's name} \item{outcomeConceptId}{The concept_id in the condition domain to use for the negative control outcome.}}} -\item{occurrenceType}{For negative controls outcomes, the occurrenceType -will detect either: the first time an -outcomeConceptId occurs or all times the -outcomeConceptId occurs for a person. Values +\item{occurrenceType}{For negative controls outcomes, the occurrenceType +will detect either: the first time an +outcomeConceptId occurs or all times the +outcomeConceptId occurs for a person. Values accepted: 'all' or 'first'.} -\item{detectOnDescendants}{For negative controls outcomes, when set to TRUE, -detectOnDescendants will use the vocabulary to -find negative control outcomes using the -outcomeConceptId and all descendants via the -concept_ancestor table. When FALSE, only the exact -outcomeConceptId will be used to detect the +\item{detectOnDescendants}{For negative controls outcomes, when set to TRUE, +detectOnDescendants will use the vocabulary to +find negative control outcomes using the +outcomeConceptId and all descendants via the +concept_ancestor table. When FALSE, only the exact +outcomeConceptId will be used to detect the outcome.} -\item{stopOnError}{If an error happens while generating one of the -cohorts in the cohortDefinitionSet, should we -stop processing the other cohorts? The default is -TRUE; when set to FALSE, failures will be +\item{stopOnError}{If an error happens while generating one of the +cohorts in the cohortDefinitionSet, should we +stop processing the other cohorts? The default is +TRUE; when set to FALSE, failures will be identified in the return value from this function.} \item{outputFolder}{Name of the folder where all the outputs will written to.} @@ -82,8 +82,8 @@ most tables.} \item{incremental}{Create only cohorts that haven't been created before?} -\item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where -records are kept of which definition has been +\item{incrementalFolder}{If \code{incremental = TRUE}, specify a folder where +records are kept of which definition has been executed.} } \description{ diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 1e171bd..1cda5b4 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -87,21 +87,23 @@ getPlatformConnectionDetails <- function(dbmsPlatform) { if (dbmsPlatform == "bigquery") { # To avoid rate limit on BigQuery, only test on 1 OS: if (.Platform$OS.type == "windows") { - bqKeyFile <- tempfile(fileext = ".json") - writeLines(Sys.getenv("CDM_BIG_QUERY_KEY_FILE"), bqKeyFile) - if (testthat::is_testing()) { - withr::defer(unlink(bqKeyFile, force = TRUE), testthat::teardown_env()) - } - bqConnectionString <- gsub("", - normalizePath(bqKeyFile, winslash = "/"), - Sys.getenv("CDM_BIG_QUERY_CONNECTION_STRING")) - connectionDetails = DatabaseConnector::createConnectionDetails( - dbms = dbmsPlatform, - user = "", - password = "", - connectionString = !!bqConnectionString, - pathToDriver = jdbcDriverFolder - ) + bqKeyFile <- tempfile(fileext = ".json") + writeLines(Sys.getenv("CDM_BIG_QUERY_KEY_FILE"), bqKeyFile) + if (testthat::is_testing()) { + withr::defer(unlink(bqKeyFile, force = TRUE), testthat::teardown_env()) + } + bqConnectionString <- gsub( + "", + normalizePath(bqKeyFile, winslash = "/"), + Sys.getenv("CDM_BIG_QUERY_CONNECTION_STRING") + ) + connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = dbmsPlatform, + user = "", + password = "", + connectionString = !!bqConnectionString, + pathToDriver = jdbcDriverFolder + ) cdmDatabaseSchema <- Sys.getenv("CDM_BIG_QUERY_CDM_SCHEMA") vocabularyDatabaseSchema <- Sys.getenv("CDM_BIG_QUERY_CDM_SCHEMA") cohortDatabaseSchema <- Sys.getenv("CDM_BIG_QUERY_OHDSI_SCHEMA") @@ -116,7 +118,7 @@ getPlatformConnectionDetails <- function(dbmsPlatform) { password = URLdecode(Sys.getenv("CDM5_ORACLE_PASSWORD")), server = Sys.getenv("CDM5_ORACLE_SERVER"), pathToDriver = jdbcDriverFolder - ) + ) cdmDatabaseSchema <- Sys.getenv("CDM5_ORACLE_CDM_SCHEMA") vocabularyDatabaseSchema <- Sys.getenv("CDM5_ORACLE_CDM_SCHEMA") cohortDatabaseSchema <- Sys.getenv("CDM5_ORACLE_OHDSI_SCHEMA") @@ -139,12 +141,12 @@ getPlatformConnectionDetails <- function(dbmsPlatform) { password = URLdecode(Sys.getenv("CDM5_REDSHIFT_PASSWORD")), server = Sys.getenv("CDM5_REDSHIFT_SERVER"), pathToDriver = jdbcDriverFolder - ) + ) cdmDatabaseSchema <- Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA") vocabularyDatabaseSchema <- Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA") cohortDatabaseSchema <- Sys.getenv("CDM5_REDSHIFT_OHDSI_SCHEMA") } else if (dbmsPlatform == "snowflake") { - connectionDetails = DatabaseConnector::createConnectionDetails( + connectionDetails <- DatabaseConnector::createConnectionDetails( dbms = dbmsPlatform, user = Sys.getenv("CDM_SNOWFLAKE_USER"), password = URLdecode(Sys.getenv("CDM_SNOWFLAKE_PASSWORD")), @@ -156,7 +158,7 @@ getPlatformConnectionDetails <- function(dbmsPlatform) { cohortDatabaseSchema <- Sys.getenv("CDM_SNOWFLAKE_OHDSI_SCHEMA") options(sqlRenderTempEmulationSchema = Sys.getenv("CDM_SNOWFLAKE_OHDSI_SCHEMA")) } else if (dbmsPlatform == "spark") { - connectionDetails = DatabaseConnector::createConnectionDetails( + connectionDetails <- DatabaseConnector::createConnectionDetails( dbms = dbmsPlatform, user = Sys.getenv("CDM5_SPARK_USER"), password = URLdecode(Sys.getenv("CDM5_SPARK_PASSWORD")), @@ -174,7 +176,7 @@ getPlatformConnectionDetails <- function(dbmsPlatform) { password = URLdecode(Sys.getenv("CDM5_SQL_SERVER_PASSWORD")), server = Sys.getenv("CDM5_SQL_SERVER_SERVER"), pathToDriver = jdbcDriverFolder - ) + ) cdmDatabaseSchema <- Sys.getenv("CDM5_SQL_SERVER_CDM_SCHEMA") vocabularyDatabaseSchema <- Sys.getenv("CDM5_SQL_SERVER_CDM_SCHEMA") cohortDatabaseSchema <- Sys.getenv("CDM5_SQL_SERVER_OHDSI_SCHEMA") diff --git a/tests/testthat/test-Export.R b/tests/testthat/test-Export.R index 59441d6..87f535d 100644 --- a/tests/testthat/test-Export.R +++ b/tests/testthat/test-Export.R @@ -67,7 +67,7 @@ test_that("Export cohort stats with permanent tables", { cohortStatisticsFolder = cohortStatsFolder, incremental = FALSE ) - ) + ) # Verify the files are written to the file system exportedFiles <- list.files(path = cohortStatsFolder, pattern = "*.csv") @@ -339,7 +339,7 @@ test_that("Export cohort stats using cohortDefinitionSet for inclusion rule name cohortDatabaseSchema = "main", cohortTableNames = cohortTableNames ) - + # Generate with stats cohortsWithStats <- getCohortsForTest(cohorts, generateStats = TRUE) generateCohortSet( @@ -350,7 +350,7 @@ test_that("Export cohort stats using cohortDefinitionSet for inclusion rule name cohortDatabaseSchema = "main", incremental = FALSE ) - + # Export the results exportCohortStatsTables( connectionDetails = connectionDetails, @@ -361,7 +361,7 @@ test_that("Export cohort stats using cohortDefinitionSet for inclusion rule name databaseId = "Eunomia", cohortDefinitionSet = cohortsWithStats ) - + # Verify the files are written to the file system and that # the cohort inclusion information has been written exportedFiles <- list.files(path = cohortStatsFolder, pattern = ".csv", full.names = TRUE) @@ -373,4 +373,4 @@ test_that("Export cohort stats using cohortDefinitionSet for inclusion rule name } } unlink(cohortStatsFolder) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-ResultsDataModel.R b/tests/testthat/test-ResultsDataModel.R index 1fc1665..f8ae0aa 100644 --- a/tests/testthat/test-ResultsDataModel.R +++ b/tests/testthat/test-ResultsDataModel.R @@ -32,19 +32,20 @@ sqliteConnectionDetails <- DatabaseConnector::createConnectionDetails( ) sqliteResultsDatabaseSchema <- "main" -withr::defer({ - connection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) - sql <- "DROP SCHEMA IF EXISTS @resultsDatabaseSchema CASCADE;" - DatabaseConnector::renderTranslateExecuteSql( - sql = sql, - resultsDatabaseSchema = postgresResultsDatabaseSchema, - connection = connection - ) +withr::defer( + { + connection <- DatabaseConnector::connect(connectionDetails = postgresConnectionDetails) + sql <- "DROP SCHEMA IF EXISTS @resultsDatabaseSchema CASCADE;" + DatabaseConnector::renderTranslateExecuteSql( + sql = sql, + resultsDatabaseSchema = postgresResultsDatabaseSchema, + connection = connection + ) - DatabaseConnector::disconnect(connection) - unlink(databaseFile, force = TRUE) -}, -testthat::teardown_env() + DatabaseConnector::disconnect(connection) + unlink(databaseFile, force = TRUE) + }, + testthat::teardown_env() ) testCreateSchema <- function(connectionDetails, resultsDatabaseSchema) { @@ -66,9 +67,11 @@ testCreateSchema <- function(connectionDetails, resultsDatabaseSchema) { ) specifications <- getResultsDataModelSpecifications() for (tableName in unique(specifications$tableName)) { - expect_true(DatabaseConnector::existsTable(connection = connection, - databaseSchema = resultsDatabaseSchema, - tableName = tableName)) + expect_true(DatabaseConnector::existsTable( + connection = connection, + databaseSchema = resultsDatabaseSchema, + tableName = tableName + )) } # Bad schema name expect_error(createResultsDataModel( @@ -78,10 +81,14 @@ testCreateSchema <- function(connectionDetails, resultsDatabaseSchema) { } test_that("Create schema", { - testCreateSchema(connectionDetails = postgresConnectionDetails, - resultsDatabaseSchema = postgresResultsDatabaseSchema) - testCreateSchema(connectionDetails = sqliteConnectionDetails, - resultsDatabaseSchema = sqliteResultsDatabaseSchema) + testCreateSchema( + connectionDetails = postgresConnectionDetails, + resultsDatabaseSchema = postgresResultsDatabaseSchema + ) + testCreateSchema( + connectionDetails = sqliteConnectionDetails, + resultsDatabaseSchema = sqliteResultsDatabaseSchema + ) }) testUploadResults <- function(connectionDetails, resultsDatabaseSchema, resultsFolder) { @@ -100,7 +107,7 @@ testUploadResults <- function(connectionDetails, resultsDatabaseSchema, resultsF for (tableName in unique(specifications$tableName)) { primaryKey <- specifications %>% dplyr::filter(tableName == !!tableName & - primaryKey == "Yes") %>% + primaryKey == "Yes") %>% dplyr::select(columnName) %>% dplyr::pull() @@ -130,10 +137,14 @@ test_that("Results upload", { ), exdir = unzipFolder ) - testUploadResults(connectionDetails = postgresConnectionDetails, - resultsDatabaseSchema = postgresResultsDatabaseSchema, - resultsFolder = unzipFolder) - testUploadResults(connectionDetails = sqliteConnectionDetails, - resultsDatabaseSchema = sqliteResultsDatabaseSchema, - resultsFolder = unzipFolder) -}) \ No newline at end of file + testUploadResults( + connectionDetails = postgresConnectionDetails, + resultsDatabaseSchema = postgresResultsDatabaseSchema, + resultsFolder = unzipFolder + ) + testUploadResults( + connectionDetails = sqliteConnectionDetails, + resultsDatabaseSchema = sqliteResultsDatabaseSchema, + resultsFolder = unzipFolder + ) +}) diff --git a/tests/testthat/test-RunCohortGeneration.R b/tests/testthat/test-RunCohortGeneration.R index 719f731..eeb38b7 100644 --- a/tests/testthat/test-RunCohortGeneration.R +++ b/tests/testthat/test-RunCohortGeneration.R @@ -3,15 +3,14 @@ library(CohortGenerator) # Exception Handling ------------- test_that("Call runCohortGeneration without connectionDetails", { - expect_error(runCohortGeneration(), message = "(connection details)" - ) + expect_error(runCohortGeneration(), message = "(connection details)") }) test_that("Call runCohortGeneration without connectionDetails", { expect_error( runCohortGeneration( connectionDetails = connectionDetails - ), + ), message = "(You must supply at least 1 cohortDefinitionSet OR 1 negativeControlOutcomeCohortSet)" ) }) @@ -22,7 +21,7 @@ test_that("Call runCohortGeneration happy path", { cohortsWithStats <- getCohortsForTest(cohorts, generateStats = TRUE) ncSet <- getNegativeControlOutcomeCohortsForTest() expectedDatabaseId <- "db1" - + runCohortGeneration( connectionDetails = connectionDetails, cdmDatabaseSchema = "main", @@ -37,7 +36,7 @@ test_that("Call runCohortGeneration happy path", { databaseId = expectedDatabaseId, incremental = F ) - + # Ensure the resultsDataModelSpecification.csv is written # to the output folder expect_true(file.exists(file.path(testOutputFolder, "resultsDataModelSpecification.csv"))) @@ -49,7 +48,7 @@ test_that("Call runCohortGeneration happy path", { expectedFileList <- paste0(unique(spec$tableName), ".csv") diffs <- setdiff(expectedFileList, basename(list.files(testOutputFolder))) expect_true(length(diffs) == 0) - + # Make sure that each output file contains the same columns as defined # in the specification for (i in seq_along(expectedFileList)) { @@ -57,15 +56,15 @@ test_that("Call runCohortGeneration happy path", { file = file.path(testOutputFolder, expectedFileList[i]) ) tbl <- tools::file_path_sans_ext(expectedFileList[i]) - + emptyResult <- CohortGenerator:::createEmptyResult(tbl) expect_equal(!!c(tbl, sort(names(data))), !!c(tbl, sort(names(emptyResult)))) } - + # Make sure that the output that specifies a database ID has the correct # value included tablesWithDatabaseId <- spec %>% - dplyr::filter(columnName == 'database_id') + dplyr::filter(columnName == "database_id") for (i in seq_along(tablesWithDatabaseId)) { # Read in the data and ensure all of the database_ids match the # the one used in the test @@ -74,4 +73,4 @@ test_that("Call runCohortGeneration happy path", { ) expect_true(all(data$databaseId == expectedDatabaseId)) } -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-Subsets.R b/tests/testthat/test-Subsets.R index c9ffe9c..7569431 100644 --- a/tests/testthat/test-Subsets.R +++ b/tests/testthat/test-Subsets.R @@ -470,11 +470,12 @@ test_that("Subset logic checks", { ) sqliteResultsDatabaseSchema <- "main" connection <- DatabaseConnector::connect(sqliteConnectionDetails) - withr::defer({ - DatabaseConnector::disconnect(connection) - unlink(databaseFile, force = TRUE) - }, - testthat::teardown_env() + withr::defer( + { + DatabaseConnector::disconnect(connection) + unlink(databaseFile, force = TRUE) + }, + testthat::teardown_env() ) # Create dummy OMOP data for testing ------------------ @@ -489,7 +490,7 @@ test_that("Subset logic checks", { observation_period_end_date = lubridate::date("2008-12-31") ) ) - + DatabaseConnector::insertTable( connection = connection, databaseSchema = sqliteResultsDatabaseSchema, @@ -502,8 +503,8 @@ test_that("Subset logic checks", { ethnicity_concept_id = 0 ) ) - - + + # Define limit subsets for tests ------------- lsd1 <- createCohortSubsetDefinition( name = "first ever", @@ -515,7 +516,7 @@ test_that("Subset logic checks", { ) ) ) - + lsd2 <- createCohortSubsetDefinition( name = "earliestRemaining", definitionId = 102, @@ -527,7 +528,7 @@ test_that("Subset logic checks", { ) ) ) - + lsd3 <- createCohortSubsetDefinition( name = "latestRemaining", definitionId = 103, @@ -539,7 +540,7 @@ test_that("Subset logic checks", { ) ) ) - + lsd4 <- createCohortSubsetDefinition( name = "lastEver", definitionId = 104, @@ -550,7 +551,7 @@ test_that("Subset logic checks", { ) ) ) - + lsd5 <- createCohortSubsetDefinition( name = "calendar", definitionId = 105, @@ -562,7 +563,7 @@ test_that("Subset logic checks", { ) ) ) - + lsd6 <- createCohortSubsetDefinition( name = "firstEver + calendar", definitionId = 106, @@ -575,7 +576,7 @@ test_that("Subset logic checks", { ) ) ) - + lsd7 <- createCohortSubsetDefinition( name = "earliestRemaining + calendar", definitionId = 107, @@ -589,7 +590,7 @@ test_that("Subset logic checks", { ) ) ) - + # Define demographics subsets for tests ------------- ds1 <- createCohortSubsetDefinition( name = "Age subset", @@ -602,7 +603,7 @@ test_that("Subset logic checks", { ) ) ) - + ds2 <- createCohortSubsetDefinition( name = "Gender subset", definition = 202, @@ -613,7 +614,7 @@ test_that("Subset logic checks", { ) ) ) - + ds3 <- createCohortSubsetDefinition( name = "Race subset", definition = 203, @@ -624,7 +625,7 @@ test_that("Subset logic checks", { ) ) ) - + ds4 <- createCohortSubsetDefinition( name = "Race subset", definition = 204, @@ -635,7 +636,7 @@ test_that("Subset logic checks", { ) ) ) - + # Define cohort subsets for tests ------------- cs1 <- createCohortSubsetDefinition( name = "Subset overlaps cohort start", @@ -651,7 +652,7 @@ test_that("Subset logic checks", { ) ) ) - + cs2 <- createCohortSubsetDefinition( name = "Subset overlaps entire target cohort period", definition = 302, @@ -666,7 +667,7 @@ test_that("Subset logic checks", { ) ) ) - + cs3 <- createCohortSubsetDefinition( name = "Subset subsumed by entire target cohort period", definition = 303, @@ -681,7 +682,7 @@ test_that("Subset logic checks", { ) ) ) - + cs4 <- createCohortSubsetDefinition( name = "Subset overlaps cohort end", definition = 304, @@ -696,7 +697,7 @@ test_that("Subset logic checks", { ) ) ) - + cs5 <- createCohortSubsetDefinition( name = "Subset does NOT overlap cohort end - negate", definition = 305, @@ -711,14 +712,14 @@ test_that("Subset logic checks", { ) ) ) - + cs6 <- createCohortSubsetDefinition( name = "Subset overlaps target start - tests combo == all", definition = 306, subsetOperators = list( createCohortSubset( name = "subsetOverlapTargetStartComboAll", - cohortIds = c(2,3), + cohortIds = c(2, 3), negate = F, cohortCombinationOperator = "all", startWindow = createSubsetCohortWindow(-99999, 0, "cohortStart"), @@ -726,7 +727,7 @@ test_that("Subset logic checks", { ) ) ) - + # Create cohort def. set and apply subset definitions --------- cohortDefinitionSet <- data.frame( cohortId = 1, @@ -768,8 +769,9 @@ test_that("Subset logic checks", { SELECT @target_cohort_id, 1, DATEFROMPARTS(2004, 01, 01), DATEFROMPARTS(2004, 12, 31) ;", json = "" - )) - + ) + ) + cohortDefinitionSet <- rbind( cohortDefinitionSet, data.frame( @@ -787,11 +789,12 @@ test_that("Subset logic checks", { SELECT @target_cohort_id, 1, DATEFROMPARTS(2002, 01, 01), DATEFROMPARTS(2005, 12, 31) UNION -- NOTE: DOES NOT FULLY SUBSUME COHORT ID = 1 FOR TESTING - SELECT @target_cohort_id, 1, DATEFROMPARTS(2004, 01, 01), DATEFROMPARTS(2005, 12, 31) + SELECT @target_cohort_id, 1, DATEFROMPARTS(2004, 01, 01), DATEFROMPARTS(2005, 12, 31) ;", json = "" - )) - + ) + ) + cohortDefinitionSet <- rbind( cohortDefinitionSet, data.frame( @@ -809,11 +812,12 @@ test_that("Subset logic checks", { SELECT @target_cohort_id, 1, DATEFROMPARTS(2003, 02, 01), DATEFROMPARTS(2003, 12, 31) UNION -- NOTE: IS NOT FULLY SUBSUMED BY COHORT ID = 1 FOR TESTING - SELECT @target_cohort_id, 1, DATEFROMPARTS(2004, 01, 01), DATEFROMPARTS(2005, 12, 31) + SELECT @target_cohort_id, 1, DATEFROMPARTS(2004, 01, 01), DATEFROMPARTS(2005, 12, 31) ;", json = "" - )) - + ) + ) + cohortDefinitionSet <- rbind( cohortDefinitionSet, data.frame( @@ -831,11 +835,12 @@ test_that("Subset logic checks", { SELECT @target_cohort_id, 1, DATEFROMPARTS(2003, 02, 01), DATEFROMPARTS(2004, 02, 01) UNION -- NOTE: DOES NOT OVERLAP ANY END DATE ENTRIES IN COHORT ID = 1 FOR TESTING - SELECT @target_cohort_id, 1, DATEFROMPARTS(2003, 02, 01), DATEFROMPARTS(2003, 03, 01) + SELECT @target_cohort_id, 1, DATEFROMPARTS(2003, 02, 01), DATEFROMPARTS(2003, 03, 01) ;", json = "" - )) - + ) + ) + cohortDefinitionSet <- cohortDefinitionSet |> addCohortSubsetDefinition(lsd1, targetCohortIds = c(1)) |> addCohortSubsetDefinition(lsd2, targetCohortIds = c(1)) |> @@ -854,16 +859,16 @@ test_that("Subset logic checks", { addCohortSubsetDefinition(cs4, targetCohortIds = c(1)) |> addCohortSubsetDefinition(cs5, targetCohortIds = c(1)) |> addCohortSubsetDefinition(cs6, targetCohortIds = c(1)) - + # Generate cohorts ------------ cohortTableNames <- getCohortTableNames() - + createCohortTables( connection = connection, cohortDatabaseSchema = "main", cohortTableNames = cohortTableNames ) - + generateCohortSet( connection = connection, cdmDatabaseSchema = "main", @@ -871,37 +876,37 @@ test_that("Subset logic checks", { cohortTableNames = getCohortTableNames(), cohortDefinitionSet = cohortDefinitionSet ) - - + + cohorts <- DatabaseConnector::querySql( connection = connection, sql = "SELECT * FROM main.cohort ORDER BY COHORT_DEFINITION_ID, SUBJECT_ID, COHORT_START_DATE;" ) - + # Check the cohort counts to verify the logic worked as expected --------- # cohorts # <------ USE TO SEE THE COHORTS TO VERIFY THE INFO BELOW - + # Limit subsets cohort definition 1100 range ------ - expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1101,]$COHORT_START_DATE[[1]], lubridate::date("2001-01-01")) # 1101 - First Ever - expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1102,]$COHORT_START_DATE[[1]], lubridate::date("2003-01-01")) # 1102 - Earliest Remaining - expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1103,]$COHORT_START_DATE[[1]], lubridate::date("2005-01-01")) # 1103 - Latest Remaining - expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1104,]$COHORT_START_DATE[[1]], lubridate::date("2007-01-01")) # 1104 - Last Ever - expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1105,]$COHORT_START_DATE[[1]], lubridate::date("2003-01-01")) # 1105 - Calendar #1 - expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1105,]$COHORT_START_DATE[[2]], lubridate::date("2005-01-01")) # 1105 - Calendar #2 - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1106,]), 0) # 1106 - First ever + calendar time that restricts to no one - expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1107,]$COHORT_START_DATE[[1]], lubridate::date("2003-01-01")) # 1107 - Earliest remaining+calendar restriction + expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1101, ]$COHORT_START_DATE[[1]], lubridate::date("2001-01-01")) # 1101 - First Ever + expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1102, ]$COHORT_START_DATE[[1]], lubridate::date("2003-01-01")) # 1102 - Earliest Remaining + expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1103, ]$COHORT_START_DATE[[1]], lubridate::date("2005-01-01")) # 1103 - Latest Remaining + expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1104, ]$COHORT_START_DATE[[1]], lubridate::date("2007-01-01")) # 1104 - Last Ever + expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1105, ]$COHORT_START_DATE[[1]], lubridate::date("2003-01-01")) # 1105 - Calendar #1 + expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1105, ]$COHORT_START_DATE[[2]], lubridate::date("2005-01-01")) # 1105 - Calendar #2 + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1106, ]), 0) # 1106 - First ever + calendar time that restricts to no one + expect_equal(cohorts[cohorts$COHORT_DEFINITION_ID == 1107, ]$COHORT_START_DATE[[1]], lubridate::date("2003-01-01")) # 1107 - Earliest remaining+calendar restriction # Demographic subsets cohort definition 1200 range ------ - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1201,]), 2) # 1201 - Age 2-5 - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1202,]), 4) # 1202 - Gender - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1203,]), 4) # 1203 - Race - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1204,]), 4) # 1204 - Ethnicity - + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1201, ]), 2) # 1201 - Age 2-5 + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1202, ]), 4) # 1202 - Gender + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1203, ]), 4) # 1203 - Race + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1204, ]), 4) # 1204 - Ethnicity + # Cohort subsets cohort definition 1300 range ------ - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1301,]), 2) # 1301 - Subset overlaps cohort start - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1302,]), 2) # 1302 - Subset overlaps entire target cohort period - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1303,]), 2) # 1303 - Subset subsumed by entire target cohort period - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1304,]), 2) # 1304 - Subset overlaps cohort end - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1305,]), 2) # 1305 - Subset does NOT overlap cohort end - negate - expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1306,]), 2) # 1306 - Subset overlaps target start - tests combo == all -}) \ No newline at end of file + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1301, ]), 2) # 1301 - Subset overlaps cohort start + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1302, ]), 2) # 1302 - Subset overlaps entire target cohort period + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1303, ]), 2) # 1303 - Subset subsumed by entire target cohort period + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1304, ]), 2) # 1304 - Subset overlaps cohort end + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1305, ]), 2) # 1305 - Subset does NOT overlap cohort end - negate + expect_equal(nrow(cohorts[cohorts$COHORT_DEFINITION_ID == 1306, ]), 2) # 1306 - Subset overlaps target start - tests combo == all +}) diff --git a/tests/testthat/test-dbms-platforms.R b/tests/testthat/test-dbms-platforms.R index 2cd2801..14da170 100644 --- a/tests/testthat/test-dbms-platforms.R +++ b/tests/testthat/test-dbms-platforms.R @@ -13,7 +13,7 @@ testPlatform <- function(dbmsDetails) { ) unlink(platformOutputFolder, recursive = TRUE) }) - + cohortsWithStats <- getCohortDefinitionSet( settingsFileName = "testdata/name/Cohorts.csv", jsonFolder = "testdata/name/cohorts", @@ -50,9 +50,9 @@ testPlatform <- function(dbmsDetails) { subsetOperators = subsetOperations ) cohortsWithSubsets <- addCohortSubsetDefinition(cohortsWithStats, subsetDef) - + ncSet <- getNegativeControlOutcomeCohortsForTest() - + runCohortGeneration( connectionDetails = dbmsDetails$connectionDetails, cdmDatabaseSchema = dbmsDetails$cdmDatabaseSchema, @@ -66,18 +66,18 @@ testPlatform <- function(dbmsDetails) { databaseId = dbmsDetails$connectionDetails$dbms, incremental = F ) - + # Check the output to verify the generation worked properly cohortsGenerated <- readCsv( file = file.path(platformOutputFolder, "cg_cohort_generation.csv") ) expect_equal(nrow(cohortsGenerated), nrow(cohortsWithSubsets)) - + cohortCounts <- readCsv( file = file.path(platformOutputFolder, "cg_cohort_count.csv") ) expect_equal(nrow(cohortsGenerated), nrow(cohortCounts)) - + ncCohortCounts <- readCsv( file = file.path(platformOutputFolder, "cg_cohort_count_neg_ctrl.csv") )