OHDSI · anthonysena · Jul 14, 2024 · Jul 14, 2024 · Jul 14, 2024 · Jul 14, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -4,3 +4,9 @@
 ^\.Rproj\.user$
 ^\.idea$
 ^\.github$
+_pkgdown\.yml
+compare_versions
+deploy.sh
+docs
+extras
+man-roxygen
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: CohortGenerator
 Type: Package
 Title: An R Package for Cohort Generation Against the OMOP CDM
-Version: 0.9.0
-Date: 2024-05-28
+Version: 0.10.0
+Date: 2024-07-14
 Authors@R: c(
   person("Anthony", "Sena", email = "[email protected]", role = c("aut", "cre")),
   person("Jamie", "Gilbert", role = c("aut")),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,20 @@
+CohortGenerator 0.10.0
+=======================
+New Features
+- Add `runCohortGeneration` function (Issue #165)
+- Adopt ResultModelManager for handling results data models & uploading. Extend results data model to include information on cohort subsets(#154, #162)
+- Remove REMOTES entries for CirceR and Eunomia which are now in CRAN (#145)
+- Unit tests now running on all OHDSI DB Platforms (#151)
+
+Bug Fixes
+- Negation of cohort subset operator must join on `subject_id` AND `start_date` (#167)
+- Allow integer as cohort ID (#146)
+- Use native messaging functions for output vs. ParallelLogger (#97)
+- Prevent upload of inclusion rule information (#78)
+- Expose `colTypes` when working with .csv files (#59)
+- Remove `bit64` from package (mostly) (#152)
+- Updated documentation for cohort subset negate feature (#111)
+
 CohortGenerator 0.9.0
 =======================
 - Random sample functionality (for development only) (Issue #129)

diff --git a/R/CohortDefinitionSet.R b/R/CohortDefinitionSet.R
@@ -50,7 +50,7 @@ createEmptyCohortDefinitionSet <- function(verbose = FALSE) {
   cohortDefinitionSetColumns <- colnames(df)
   matchingColumns <- intersect(x = colnames(x), y = cohortDefinitionSetColumns)
   columnNamesMatch <- setequal(matchingColumns, cohortDefinitionSetColumns)
-  
+
   if (!columnNamesMatch && emitWarning) {
     columnsMissing <- setdiff(x = cohortDefinitionSetColumns, y = colnames(x))
     warningMessage <- paste0(
@@ -144,7 +144,7 @@ checkAndFixCohortDefinitionSetDataTypes <- function(x, fixDataTypes = TRUE, emit
   # Check if the data types match
   # NOTE: createEmptyCohortDefinitionSet() is the reference for the data
   # types. cohortId is declared as a numeric but an integer is also fine
-  dataTypesMatch <- (xDataTypes[1] %in% c('integer', 'double') && all(xDataTypes[2:4] == "character"))
+  dataTypesMatch <- (xDataTypes[1] %in% c("integer", "double") && all(xDataTypes[2:4] == "character"))
   # Create the cohortDefinitionSetSpec from the names/data types for reference
   cohortDefinitionSetSpec <- data.frame(
     columnName = names(xDataTypes),
@@ -520,7 +520,7 @@ checkLargeInteger <- function(x, columnName = "cohortId") {
   # NOTE: suppressWarnings used to mask
   # warning from R which may happen for
   # large values in X.
-  res <- all(suppressWarnings(x%%1) == 0)
+  res <- all(suppressWarnings(x %% 1) == 0)
   if (!isTRUE(res)) {
     errorMessage <- paste0("The column ", columnName, " included non-integer values. Please update and re-try")
     return(errorMessage)

diff --git a/R/CohortStats.R b/R/CohortStats.R
@@ -58,7 +58,7 @@ insertInclusionRuleNames <- function(connectionDetails = NULL,
   }
 
   inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)
-  
+
   # Remove any existing data to prevent duplication
   DatabaseConnector::renderTranslateExecuteSql(
     connection = connection,
@@ -129,7 +129,7 @@ getStatsTable <- function(connectionDetails,
 }
 
 #' Get Cohort Inclusion Stats Table Data
-#' 
+#'
 #' @description
 #' This function returns a data frame of the data in the Cohort Inclusion Tables.
 #' Results are organized in to a list with 5 different data frames:
@@ -203,23 +203,23 @@ getCohortStats <- function(connectionDetails,
 
 
 #' Get Cohort Inclusion Rules from a cohort definition set
-#' 
+#'
 #' @description
 #' This function returns a data frame of the inclusion rules defined
 #' in a cohort definition set.
-#' 
+#'
 #' @md
 #' @template CohortDefinitionSet
-#' 
+#'
 #' @export
 getCohortInclusionRules <- function(cohortDefinitionSet) {
   checkmate::assertDataFrame(cohortDefinitionSet, min.rows = 1, col.names = "named")
   checkmate::assertNames(colnames(cohortDefinitionSet),
-                         must.include = c(
-                           "cohortId",
-                           "cohortName",
-                           "json"
-                         )
+    must.include = c(
+      "cohortId",
+      "cohortName",
+      "json"
+    )
   )
 
   # Assemble the cohort inclusion rules
@@ -231,7 +231,7 @@ getCohortInclusionRules <- function(cohortDefinitionSet) {
     name = character(),
     description = character()
   )
-  
+
   # Remove any cohort definitions that do not include the JSON property
   cohortDefinitionSet <- cohortDefinitionSet[!(is.null(cohortDefinitionSet$json) | is.na(cohortDefinitionSet$json)), ]
   for (i in 1:nrow(cohortDefinitionSet)) {
@@ -261,6 +261,6 @@ getCohortInclusionRules <- function(cohortDefinitionSet) {
       }
     }
   }
-  
+
   invisible(inclusionRules)
-}
+}
diff --git a/R/CsvHelper.R b/R/CsvHelper.R
@@ -27,7 +27,7 @@
 #' @param file  The .csv file to read.
 #' @param warnOnCaseMismatch  When TRUE, raise a warning if column headings
 #' in the .csv are not in snake_case format
-#' 
+#'
 #' @param colTypes Corresponds to the `col_types` in the `readr::read_csv` function.
 #'   One of `NULL`, a [readr::cols()] specification, or
 #'   a string. See `vignette("readr")` for more details.
@@ -38,7 +38,7 @@
 #'    `guess_max` or supply the correct types yourself.
 #'
 #'    Column specifications created by [list()] or [cols()] must contain
-#'    one column specification for each column. 
+#'    one column specification for each column.
 #'
 #'    Alternatively, you can use a compact string representation where each
 #'    character represents one column:
@@ -57,7 +57,7 @@
 #'    By default, reading a file without a column specification will print a
 #'    message showing what `readr` guessed they were. To remove this message,
 #'    set `show_col_types = FALSE` or set `options(readr.show_col_types = FALSE)`.
-#'  
+#'
 #' @return
 #' A tibble with the .csv contents
 #'

diff --git a/R/Export.R b/R/Export.R
@@ -20,12 +20,12 @@
 #' This function retrieves the data from the cohort statistics tables and
 #' writes them to the inclusion statistics folder specified in the function
 #' call. NOTE: inclusion rule names are handled in one of two ways:
-#' 
-#' 1. You can specify the cohortDefinitionSet parameter and the inclusion rule 
-#' names will be extracted from the data.frame. 
+#'
+#' 1. You can specify the cohortDefinitionSet parameter and the inclusion rule
+#' names will be extracted from the data.frame.
 #' 2. You can insert the inclusion rule names into the database using the
-#' insertInclusionRuleNames function of this package. 
-#' 
+#' insertInclusionRuleNames function of this package.
+#'
 #' The first approach is preferred as to avoid the warning emitted.
 #'
 #' @template Connection
@@ -45,9 +45,9 @@
 #'
 #' @param databaseId                  Optional - when specified, the databaseId will be added
 #'                                    to the exported results
-#'                                    
+#'
 #' @template CohortDefinitionSet
-#' 
+#'
 #' @param tablePrefix Optional - allows to append a prefix to the exported
 #'                    file names.
 #'
@@ -72,7 +72,7 @@ exportCohortStatsTables <- function(connectionDetails,
   if (!dir.exists(cohortStatisticsFolder)) {
     dir.create(cohortStatisticsFolder, recursive = TRUE)
   }
-  
+
   # Internal function to export the stats
   exportStats <- function(data,
                           fileName,
@@ -91,7 +91,7 @@ exportCohortStatsTables <- function(connectionDetails,
       .writeCsv(x = data, file = fullFileName)
     }
   }
-  
+
   tablesToExport <- data.frame(
     tableName = c("cohortInclusionResultTable", "cohortInclusionStatsTable", "cohortSummaryStatsTable", "cohortCensorStatsTable"),
     fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv")
@@ -122,7 +122,7 @@ exportCohortStatsTables <- function(connectionDetails,
     snakeCaseToCamelCase = snakeCaseToCamelCase,
     cohortTableNames = cohortTableNames
   )
-  
+
   for (i in 1:nrow(tablesToExport)) {
     fileName <- ifelse(test = fileNamesInSnakeCase,
       yes = tablesToExport$fileName[i],
@@ -143,39 +143,42 @@ exportCohortDefinitionSet <- function(outputFolder, cohortDefinitionSet = NULL)
     cdsCohortSubsets <- getSubsetDefinitions(cohortDefinitionSet)
     if (length(cdsCohortSubsets) > 0) {
       for (i in seq_along(cdsCohortSubsets)) {
-        cohortSubsets <- rbind(cohortSubsets, 
-                               data.frame(
-                                 subsetDefinitionId = cdsCohortSubsets[[i]]$definitionId,
-                                 json = as.character(cdsCohortSubsets[[i]]$toJSON())
-                               ))
+        cohortSubsets <- rbind(
+          cohortSubsets,
+          data.frame(
+            subsetDefinitionId = cdsCohortSubsets[[i]]$definitionId,
+            json = as.character(cdsCohortSubsets[[i]]$toJSON())
+          )
+        )
       }
     } else {
       # NOTE: In this case the cohortDefinitionSet has no subsets defined
       # and so we need to add the additional columns that are defined
-      # in the function: addCohortSubsetDefinition. To do this, 
+      # in the function: addCohortSubsetDefinition. To do this,
       # we'll construct a copy of the cohortDefinitionSet with a single
       # subset to get the proper structure and filter it to the
       # cohorts of interest.
-      cdsCopy <- cohortDefinitionSet %>% 
+      cdsCopy <- cohortDefinitionSet %>%
         addCohortSubsetDefinition(
           cohortSubsetDefintion = createCohortSubsetDefinition(
-            definitionId = 1, 
-            name="empty", 
+            definitionId = 1,
+            name = "empty",
             subsetOperators = list(
               createDemographicSubset()
             )
           )
-        ) %>% dplyr::filter(cohortId == cohortDefinitionSet$cohortId)
+        ) %>%
+        dplyr::filter(.data$cohortId == cohortDefinitionSet$cohortId)
       cohortDefinitionSet <- cdsCopy
     }
     # Massage and save the cohort definition set
     colsToRename <- c("cohortId", "cohortName", "sql", "json")
     colInd <- which(names(cohortDefinitionSet) %in% colsToRename)
     names(cohortDefinitionSet)[colInd] <- c("cohortDefinitionId", "cohortName", "sqlCommand", "json")
-    if (! "description" %in% names(cohortDefinitionSet)) {
+    if (!"description" %in% names(cohortDefinitionSet)) {
       cohortDefinitionSet$description <- ""
     }
-    cohortDefinitions <- cohortDefinitionSet[,intersect(names(cohortDefinitions), names(cohortDefinitionSet))]
+    cohortDefinitions <- cohortDefinitionSet[, intersect(names(cohortDefinitions), names(cohortDefinitionSet))]
   }
   writeCsv(
     x = cohortDefinitions,
@@ -199,4 +202,4 @@ createEmptyResult <- function(tableName) {
   result <- tibble::as_tibble(t(result), name_repair = "check_unique")
   result <- result[FALSE, ]
   return(result)
-}
+}
diff --git a/R/NegativeControlCohorts.R b/R/NegativeControlCohorts.R
@@ -100,7 +100,7 @@ generateNegativeControlOutcomeCohorts <- function(connectionDetails = NULL,
   )
   assertLargeInteger(negativeControlOutcomeCohortSet$cohortId)
   assertLargeInteger(negativeControlOutcomeCohortSet$outcomeConceptId, columnName = "outcomeConceptId")
-  
+
   # Verify that cohort IDs are not repeated in the negative control
   # cohort definition set before generating
   if (length(unique(negativeControlOutcomeCohortSet$cohortId)) != length(negativeControlOutcomeCohortSet$cohortId)) {
@@ -186,11 +186,12 @@ createNegativeControlOutcomesQuery <- function(connection,
                                                detectOnDescendants,
                                                negativeControlOutcomeCohortSet) {
   selectClause <- ""
-  for (i in 1:nrow(negativeControlOutcomeCohortSet)){
-    selectClause <- paste0(selectClause, 
-                           "SELECT CAST(", negativeControlOutcomeCohortSet$cohortId[i], " AS BIGINT), ", 
-                           "CAST(", negativeControlOutcomeCohortSet$outcomeConceptId[i], " AS BIGINT)"
-                           )
+  for (i in 1:nrow(negativeControlOutcomeCohortSet)) {
+    selectClause <- paste0(
+      selectClause,
+      "SELECT CAST(", negativeControlOutcomeCohortSet$cohortId[i], " AS BIGINT), ",
+      "CAST(", negativeControlOutcomeCohortSet$outcomeConceptId[i], " AS BIGINT)"
+    )
     if (i < nrow(negativeControlOutcomeCohortSet)) {
       selectClause <- paste0(selectClause, "\nUNION\n")
     }

diff --git a/R/ResultsDataModel.R b/R/ResultsDataModel.R
@@ -43,10 +43,10 @@ createResultsDataModel <- function(connectionDetails = NULL,
   if (connectionDetails$dbms == "sqlite" & databaseSchema != "main") {
     stop("Invalid schema for sqlite, use databaseSchema = 'main'")
   }
-  
+
   connection <- DatabaseConnector::connect(connectionDetails)
   on.exit(DatabaseConnector::disconnect(connection))
-  
+
   # Create first version of results model:
   sql <- SqlRender::readSql(system.file("sql/sql_server/CreateResultsDataModel.sql", package = "CohortGenerator", mustWork = TRUE))
   sql <- SqlRender::render(
@@ -115,9 +115,11 @@ uploadResults <- function(connectionDetails,
 #' @export
 migrateDataModel <- function(connectionDetails, databaseSchema, tablePrefix = "") {
   ParallelLogger::logInfo("Migrating data set")
-  migrator <- getDataMigrator(connectionDetails = connectionDetails,
-                              databaseSchema = databaseSchema,
-                              tablePrefix = tablePrefix)
+  migrator <- getDataMigrator(
+    connectionDetails = connectionDetails,
+    databaseSchema = databaseSchema,
+    tablePrefix = tablePrefix
+  )
   migrator$executeMigrations()
   migrator$finalize()
 }
@@ -142,4 +144,4 @@ getDataMigrator <- function(connectionDetails, databaseSchema, tablePrefix = "")
     migrationPath = "migrations",
     packageName = "CohortGenerator"
   )
-}
+}