Prevent upload of inclusion rule names (#156)

* Isolate cohort inclusion rule function * Export inclusion rule stats without the need to insert to db
OHDSI · Jun 5, 2024 · cdc6e24 · cdc6e24
1 parent 624b25d
commit cdc6e24
Show file tree

Hide file tree

Showing 6 changed files with 284 additions and 161 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -23,6 +23,7 @@ export(generateCohortSet)
 export(generateNegativeControlOutcomeCohorts)
 export(getCohortCounts)
 export(getCohortDefinitionSet)
+export(getCohortInclusionRules)
 export(getCohortStats)
 export(getCohortTableNames)
 export(getRequiredTasks)

diff --git a/R/CohortStats.R b/R/CohortStats.R
@@ -47,14 +47,6 @@ insertInclusionRuleNames <- function(connectionDetails = NULL,
     stop("You must provide either a database connection or the connection details.")
   }
 
-  checkmate::assertDataFrame(cohortDefinitionSet, min.rows = 1, col.names = "named")
-  checkmate::assertNames(colnames(cohortDefinitionSet),
-    must.include = c(
-      "cohortId",
-      "cohortName",
-      "json"
-    )
-  )
   if (is.null(connection)) {
     connection <- DatabaseConnector::connect(connectionDetails)
     on.exit(DatabaseConnector::disconnect(connection))
@@ -65,45 +57,8 @@ insertInclusionRuleNames <- function(connectionDetails = NULL,
     stop(paste0(cohortInclusionTable, " table not found in schema: ", cohortDatabaseSchema, ". Please make sure the table is created using the createCohortTables() function before calling this function."))
   }
 
-  # Assemble the cohort inclusion rules
-  # NOTE: This data frame must match the @cohort_inclusion_table
-  # structure as defined in inst/sql/sql_server/CreateCohortTables.sql
-  inclusionRules <- data.frame(
-    cohortDefinitionId = bit64::integer64(),
-    ruleSequence = integer(),
-    name = character(),
-    description = character()
-  )
-  # Remove any cohort definitions that do not include the JSON property
-  cohortDefinitionSet <- cohortDefinitionSet[!(is.null(cohortDefinitionSet$json) | is.na(cohortDefinitionSet$json)), ]
-  for (i in 1:nrow(cohortDefinitionSet)) {
-    cohortDefinition <- RJSONIO::fromJSON(content = cohortDefinitionSet$json[i], digits = 23)
-    if (!is.null(cohortDefinition$InclusionRules)) {
-      nrOfRules <- length(cohortDefinition$InclusionRules)
-      if (nrOfRules > 0) {
-        for (j in 1:nrOfRules) {
-          ruleName <- cohortDefinition$InclusionRules[[j]]$name
-          ruleDescription <- cohortDefinition$InclusionRules[[j]]$description
-          if (is.na(ruleName) || ruleName == "") {
-            ruleName <- paste0("Unamed rule (Sequence ", j - 1, ")")
-          }
-          if (is.null(ruleDescription)) {
-            ruleDescription <- ""
-          }
-          inclusionRules <- rbind(
-            inclusionRules,
-            data.frame(
-              cohortDefinitionId = bit64::as.integer64(cohortDefinitionSet$cohortId[i]),
-              ruleSequence = as.integer(j - 1),
-              name = ruleName,
-              description = ruleDescription
-            )
-          )
-        }
-      }
-    }
-  }
-
+  inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)
+
   # Remove any existing data to prevent duplication
   DatabaseConnector::renderTranslateExecuteSql(
     connection = connection,
@@ -174,6 +129,7 @@ getStatsTable <- function(connectionDetails,
 }
 
 #' Get Cohort Inclusion Stats Table Data
+#' 
 #' @description
 #' This function returns a data frame of the data in the Cohort Inclusion Tables.
 #' Results are organized in to a list with 5 different data frames:
@@ -244,3 +200,67 @@ getCohortStats <- function(connectionDetails,
   }
   return(results)
 }
+
+
+#' Get Cohort Inclusion Rules from a cohort definition set
+#' 
+#' @description
+#' This function returns a data frame of the inclusion rules defined
+#' in a cohort definition set.
+#' 
+#' @md
+#' @template CohortDefinitionSet
+#' 
+#' @export
+getCohortInclusionRules <- function(cohortDefinitionSet) {
+  checkmate::assertDataFrame(cohortDefinitionSet, min.rows = 1, col.names = "named")
+  checkmate::assertNames(colnames(cohortDefinitionSet),
+                         must.include = c(
+                           "cohortId",
+                           "cohortName",
+                           "json"
+                         )
+  )
+
+  # Assemble the cohort inclusion rules
+  # NOTE: This data frame must match the @cohort_inclusion_table
+  # structure as defined in inst/sql/sql_server/CreateCohortTables.sql
+  inclusionRules <- data.frame(
+    cohortDefinitionId = bit64::integer64(),
+    ruleSequence = integer(),
+    name = character(),
+    description = character()
+  )
+
+  # Remove any cohort definitions that do not include the JSON property
+  cohortDefinitionSet <- cohortDefinitionSet[!(is.null(cohortDefinitionSet$json) | is.na(cohortDefinitionSet$json)), ]
+  for (i in 1:nrow(cohortDefinitionSet)) {
+    cohortDefinition <- RJSONIO::fromJSON(content = cohortDefinitionSet$json[i], digits = 23)
+    if (!is.null(cohortDefinition$InclusionRules)) {
+      nrOfRules <- length(cohortDefinition$InclusionRules)
+      if (nrOfRules > 0) {
+        for (j in 1:nrOfRules) {
+          ruleName <- cohortDefinition$InclusionRules[[j]]$name
+          ruleDescription <- cohortDefinition$InclusionRules[[j]]$description
+          if (is.na(ruleName) || ruleName == "") {
+            ruleName <- paste0("Unamed rule (Sequence ", j - 1, ")")
+          }
+          if (is.null(ruleDescription)) {
+            ruleDescription <- ""
+          }
+          inclusionRules <- rbind(
+            inclusionRules,
+            data.frame(
+              cohortDefinitionId = bit64::as.integer64(cohortDefinitionSet$cohortId[i]),
+              ruleSequence = as.integer(j - 1),
+              name = ruleName,
+              description = ruleDescription
+            )
+          )
+        }
+      }
+    }
+  }
+
+  invisible(inclusionRules)
+}
diff --git a/R/Export.R b/R/Export.R
@@ -19,7 +19,14 @@
 #' @description
 #' This function retrieves the data from the cohort statistics tables and
 #' writes them to the inclusion statistics folder specified in the function
-#' call.
+#' call. NOTE: inclusion rule names are handled in one of two ways:
+#' 
+#' 1. You can specify the cohortDefinitionSet parameter and the inclusion rule 
+#' names will be extracted from the data.frame. 
+#' 2. You can insert the inclusion rule names into the database using the
+#' insertInclusionRuleNames function of this package. 
+#' 
+#' The first approach is preferred as to avoid the warning emitted.
 #'
 #' @template Connection
 #'
@@ -38,6 +45,8 @@
 #'
 #' @param databaseId                  Optional - when specified, the databaseId will be added
 #'                                    to the exported results
+#'                                    
+#' @template CohortDefinitionSet
 #'
 #' @export
 exportCohortStatsTables <- function(connectionDetails,
@@ -48,7 +57,8 @@ exportCohortStatsTables <- function(connectionDetails,
                                     snakeCaseToCamelCase = TRUE,
                                     fileNamesInSnakeCase = FALSE,
                                     incremental = FALSE,
-                                    databaseId = NULL) {
+                                    databaseId = NULL,
+                                    cohortDefinitionSet = NULL) {
   if (is.null(connection)) {
     # Establish the connection and ensure the cleanup is performed
     connection <- DatabaseConnector::connect(connectionDetails)
@@ -58,20 +68,10 @@ exportCohortStatsTables <- function(connectionDetails,
   if (!dir.exists(cohortStatisticsFolder)) {
     dir.create(cohortStatisticsFolder, recursive = TRUE)
   }
-
-  # Export the stats
-  exportStats <- function(table,
-                          fileName,
-                          includeDatabaseId) {
-    data <- getStatsTable(
-      connection = connection,
-      table = table,
-      snakeCaseToCamelCase = snakeCaseToCamelCase,
-      databaseId = databaseId,
-      cohortDatabaseSchema = cohortDatabaseSchema,
-      includeDatabaseId = includeDatabaseId
-    )
-
+
+  # Internal function to export the stats
+  exportStats <- function(data,
+                          fileName) {
     fullFileName <- file.path(cohortStatisticsFolder, fileName)
     ParallelLogger::logInfo("- Saving data to - ", fullFileName)
     if (incremental) {
@@ -86,41 +86,44 @@ exportCohortStatsTables <- function(connectionDetails,
       .writeCsv(x = data, file = fullFileName)
     }
   }
-
+  
   tablesToExport <- data.frame(
-    tableName = cohortTableNames$cohortInclusionTable,
-    fileName = "cohort_inclusion.csv",
-    includeDatabaseId = FALSE
+    tableName = c("cohortInclusionResultTable", "cohortInclusionStatsTable", "cohortSummaryStatsTable", "cohortCensorStatsTable"),
+    fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv")
+  )
+
+  if (is.null(cohortDefinitionSet)) {
+    warning("No cohortDefinitionSet specified; please make sure you've inserted the inclusion rule names using the insertInclusionRuleNames function.")
+    tablesToExport <- rbind(tablesToExport, data.frame(
+      tableName = "cohortInclusionTable",
+      fileName = "cohort_inclusion.csv"
+    ))
+  } else {
+    inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)
+    exportStats(
+      data = inclusionRules,
+      fileName = "cohort_inclusion.csv"
+    )
+  }
+
+  # Get the cohort statistics
+  cohortStats <- getCohortStats(
+    connectionDetails = connectionDetails,
+    connection = connection,
+    cohortDatabaseSchema = cohortDatabaseSchema,
+    databaseId = databaseId,
+    snakeCaseToCamelCase = snakeCaseToCamelCase,
+    cohortTableName = cohortTableNames
   )
-  tablesToExport <- rbind(tablesToExport, data.frame(
-    tableName = cohortTableNames$cohortInclusionResultTable,
-    fileName = "cohort_inc_result.csv",
-    includeDatabaseId = TRUE
-  ))
-  tablesToExport <- rbind(tablesToExport, data.frame(
-    tableName = cohortTableNames$cohortInclusionStatsTable,
-    fileName = "cohort_inc_stats.csv",
-    includeDatabaseId = TRUE
-  ))
-  tablesToExport <- rbind(tablesToExport, data.frame(
-    tableName = cohortTableNames$cohortSummaryStatsTable,
-    fileName = "cohort_summary_stats.csv",
-    includeDatabaseId = TRUE
-  ))
-  tablesToExport <- rbind(tablesToExport, data.frame(
-    tableName = cohortTableNames$cohortCensorStatsTable,
-    fileName = "cohort_censor_stats.csv",
-    includeDatabaseId = TRUE
-  ))
+
   for (i in 1:nrow(tablesToExport)) {
     fileName <- ifelse(test = fileNamesInSnakeCase,
       yes = tablesToExport$fileName[i],
       no = SqlRender::snakeCaseToCamelCase(tablesToExport$fileName[i])
     )
     exportStats(
-      table = tablesToExport$tableName[i],
-      fileName = fileName,
-      includeDatabaseId = tablesToExport$includeDatabaseId[i]
+      data = cohortStats[[tablesToExport$tableName[i]]],
+      fileName = fileName
     )
   }
 }
diff --git a/man/exportCohortStatsTables.Rd b/man/exportCohortStatsTables.Rd
diff --git a/man/getCohortInclusionRules.Rd b/man/getCohortInclusionRules.Rd