Add runCohortGeneration function (#170)

* Add runCohortGeneration function to generate and export results * Remove insert table for NC cohorts * NC counts should include 0 counts * fix platform tests
OHDSI · Jun 28, 2024 · 667f5c8 · 667f5c8
1 parent 2f30ef8
commit 667f5c8
Show file tree

Hide file tree

Showing 13 changed files with 644 additions and 77 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,14 +22,16 @@ Imports:
   digest,
   dplyr,
   lubridate,
+  methods,
   ParallelLogger (>= 3.0.0),
   readr (>= 2.1.0),
   rlang,
   RJSONIO,
   jsonlite,
   ResultModelManager,
   SqlRender (>= 1.11.1),
-  stringi (>= 1.7.6)
+  stringi (>= 1.7.6),
+  tibble
 Suggests:
   CirceR (>= 1.1.1),
   Eunomia,

diff --git a/NAMESPACE b/NAMESPACE
@@ -40,6 +40,7 @@ export(isTaskRequired)
 export(migrateDataModel)
 export(readCsv)
 export(recordTasksDone)
+export(runCohortGeneration)
 export(sampleCohortDefinitionSet)
 export(saveCohortDefinitionSet)
 export(saveCohortSubsetDefinition)
@@ -48,8 +49,9 @@ export(uploadResults)
 export(writeCsv)
 import(DatabaseConnector)
 import(R6)
-importFrom(dplyr,"%>%")
+import(dplyr)
 importFrom(grDevices,rgb)
+importFrom(methods,is)
 importFrom(rlang,':=')
 importFrom(rlang,.data)
 importFrom(stats,aggregate)

diff --git a/R/CohortGenerator.R b/R/CohortGenerator.R
@@ -19,10 +19,11 @@
 
 #' @import DatabaseConnector
 #' @import R6
+#' @import dplyr
 #' @importFrom grDevices rgb
+#' @importFrom methods is
 #' @importFrom stats aggregate setNames
 #' @importFrom utils write.csv install.packages menu packageVersion sessionInfo
-#' @importFrom dplyr "%>%"
 #' @importFrom rlang .data ':='
 NULL
 

diff --git a/R/Export.R b/R/Export.R
@@ -47,6 +47,9 @@
 #'                                    to the exported results
 #'                                    
 #' @template CohortDefinitionSet
+#' 
+#' @param tablePrefix Optional - allows to append a prefix to the exported
+#'                    file names.
 #'
 #' @export
 exportCohortStatsTables <- function(connectionDetails,
@@ -58,7 +61,8 @@ exportCohortStatsTables <- function(connectionDetails,
                                     fileNamesInSnakeCase = FALSE,
                                     incremental = FALSE,
                                     databaseId = NULL,
-                                    cohortDefinitionSet = NULL) {
+                                    cohortDefinitionSet = NULL,
+                                    tablePrefix = "") {
   if (is.null(connection)) {
     # Establish the connection and ensure the cleanup is performed
     connection <- DatabaseConnector::connect(connectionDetails)
@@ -71,8 +75,9 @@ exportCohortStatsTables <- function(connectionDetails,
 
   # Internal function to export the stats
   exportStats <- function(data,
-                          fileName) {
-    fullFileName <- file.path(cohortStatisticsFolder, fileName)
+                          fileName,
+                          tablePrefix) {
+    fullFileName <- file.path(cohortStatisticsFolder, paste0(tablePrefix, fileName))
     rlang::inform(paste0("- Saving data to - ", fullFileName))
     if (incremental) {
       if (snakeCaseToCamelCase) {
@@ -96,14 +101,15 @@ exportCohortStatsTables <- function(connectionDetails,
     warning("No cohortDefinitionSet specified; please make sure you've inserted the inclusion rule names using the insertInclusionRuleNames function.")
     tablesToExport <- rbind(tablesToExport, data.frame(
       tableName = "cohortInclusionTable",
-      fileName = "cohort_inclusion.csv"
+      fileName = paste0(tablePrefix, "cohort_inclusion.csv")
     ))
   } else {
     inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)
     names(inclusionRules) <- SqlRender::camelCaseToSnakeCase(names(inclusionRules))
     exportStats(
       data = inclusionRules,
-      fileName = "cohort_inclusion.csv"
+      fileName = "cohort_inclusion.csv",
+      tablePrefix = tablePrefix
     )
   }
 
@@ -124,7 +130,73 @@ exportCohortStatsTables <- function(connectionDetails,
     )
     exportStats(
       data = cohortStats[[tablesToExport$tableName[i]]],
-      fileName = fileName
+      fileName = fileName,
+      tablePrefix = tablePrefix
     )
   }
 }
+
+exportCohortDefinitionSet <- function(outputFolder, cohortDefinitionSet = NULL) {
+  cohortDefinitions <- createEmptyResult("cg_cohort_definition")
+  cohortSubsets <- createEmptyResult("cg_cohort_subset_definition")
+  if (!is.null(cohortDefinitionSet)) {
+    cdsCohortSubsets <- getSubsetDefinitions(cohortDefinitionSet)
+    if (length(cdsCohortSubsets) > 0) {
+      for (i in seq_along(cdsCohortSubsets)) {
+        cohortSubsets <- rbind(cohortSubsets, 
+                               data.frame(
+                                 subsetDefinitionId = cdsCohortSubsets[[i]]$definitionId,
+                                 json = as.character(cdsCohortSubsets[[i]]$toJSON())
+                               ))
+      }
+    } else {
+      # NOTE: In this case the cohortDefinitionSet has no subsets defined
+      # and so we need to add the additional columns that are defined
+      # in the function: addCohortSubsetDefinition. To do this, 
+      # we'll construct a copy of the cohortDefinitionSet with a single
+      # subset to get the proper structure and filter it to the
+      # cohorts of interest.
+      cdsCopy <- cohortDefinitionSet %>% 
+        addCohortSubsetDefinition(
+          cohortSubsetDefintion = createCohortSubsetDefinition(
+            definitionId = 1, 
+            name="empty", 
+            subsetOperators = list(
+              createDemographicSubset()
+            )
+          )
+        ) %>% dplyr::filter(cohortId == cohortDefinitionSet$cohortId)
+      cohortDefinitionSet <- cdsCopy
+    }
+    # Massage and save the cohort definition set
+    colsToRename <- c("cohortId", "cohortName", "sql", "json")
+    colInd <- which(names(cohortDefinitionSet) %in% colsToRename)
+    names(cohortDefinitionSet)[colInd] <- c("cohortDefinitionId", "cohortName", "sqlCommand", "json")
+    if (! "description" %in% names(cohortDefinitionSet)) {
+      cohortDefinitionSet$description <- ""
+    }
+    cohortDefinitions <- cohortDefinitionSet[,intersect(names(cohortDefinitions), names(cohortDefinitionSet))]
+  }
+  writeCsv(
+    x = cohortDefinitions,
+    file = file.path(outputFolder, "cg_cohort_definition.csv")
+  )
+  writeCsv(
+    x = cohortSubsets,
+    file = file.path(outputFolder, "cg_cohort_subset_definition.csv")
+  )
+}
+
+createEmptyResult <- function(tableName) {
+  columns <- readCsv(
+    file = system.file("csv", "resultsDataModelSpecification.csv", package = "CohortGenerator")
+  ) %>%
+    dplyr::filter(.data$tableName == !!tableName) %>%
+    dplyr::pull(.data$columnName) %>%
+    SqlRender::snakeCaseToCamelCase()
+  result <- vector(length = length(columns))
+  names(result) <- columns
+  result <- tibble::as_tibble(t(result), name_repair = "check_unique")
+  result <- result[FALSE, ]
+  return(result)
+}
diff --git a/R/NegativeControlCohorts.R b/R/NegativeControlCohorts.R
@@ -148,28 +148,15 @@ generateNegativeControlOutcomeCohorts <- function(connectionDetails = NULL,
 
   rlang::inform("Generating negative control outcome cohorts")
 
-  # Send the negative control outcome cohort set to the server for use
-  # in processing. This temp table will hold the mapping between
-  # cohort_definition_id and the outcomeConceptId in the data.frame()
-  DatabaseConnector::insertTable(
-    connection = connection,
-    data = negativeControlOutcomeCohortSet,
-    tempEmulationSchema = tempEmulationSchema,
-    tableName = "#nc_set",
-    camelCaseToSnakeCase = TRUE,
-    dropTableIfExists = TRUE,
-    createTable = TRUE,
-    tempTable = TRUE
-  )
-
   sql <- createNegativeControlOutcomesQuery(
     connection = connection,
     cdmDatabaseSchema = cdmDatabaseSchema,
     tempEmulationSchema = tempEmulationSchema,
     cohortDatabaseSchema = cohortDatabaseSchema,
     cohortTable = cohortTable,
     occurrenceType = occurrenceType,
-    detectOnDescendants = detectOnDescendants
+    detectOnDescendants = detectOnDescendants,
+    negativeControlOutcomeCohortSet = negativeControlOutcomeCohortSet
   )
 
   DatabaseConnector::executeSql(
@@ -196,7 +183,30 @@ createNegativeControlOutcomesQuery <- function(connection,
                                                cohortDatabaseSchema,
                                                cohortTable,
                                                occurrenceType,
-                                               detectOnDescendants) {
+                                               detectOnDescendants,
+                                               negativeControlOutcomeCohortSet) {
+  selectClause <- ""
+  for (i in 1:nrow(negativeControlOutcomeCohortSet)){
+    selectClause <- paste0(selectClause, 
+                           "SELECT CAST(", negativeControlOutcomeCohortSet$cohortId[i], " AS BIGINT), ", 
+                           "CAST(", negativeControlOutcomeCohortSet$outcomeConceptId[i], " AS BIGINT)"
+                           )
+    if (i < nrow(negativeControlOutcomeCohortSet)) {
+      selectClause <- paste0(selectClause, "\nUNION\n")
+    }
+  }
+  selectClause
+  ncSetQuery <- paste0(
+    "CREATE TABLE #nc_set (",
+    "  cohort_id bigint NOT NULL,",
+    "  outcome_concept_id bigint NOT NULL",
+    ")",
+    ";",
+    "INSERT INTO #nc_set (cohort_id, outcome_concept_id)\n",
+    selectClause,
+    "\n;"
+  )
+
   sql <- sql <- SqlRender::readSql(system.file("sql/sql_server/NegativeControlOutcomes.sql", package = "CohortGenerator", mustWork = TRUE))
   sql <- SqlRender::render(
     sql = sql,
@@ -205,6 +215,7 @@ createNegativeControlOutcomesQuery <- function(connection,
     cohort_table = cohortTable,
     detect_on_descendants = detectOnDescendants,
     occurrence_type = occurrenceType,
+    nc_set_query = ncSetQuery,
     warnOnMissingParameters = TRUE
   )
   sql <- SqlRender::translate(