Skip to content

Commit

Permalink
Add runCohortGeneration function (#170)
Browse files Browse the repository at this point in the history
* Add runCohortGeneration function to generate and export results
* Remove insert table for NC cohorts
* NC counts should include 0 counts
* fix platform tests
  • Loading branch information
anthonysena authored Jun 28, 2024
1 parent 2f30ef8 commit 667f5c8
Show file tree
Hide file tree
Showing 13 changed files with 644 additions and 77 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,16 @@ Imports:
digest,
dplyr,
lubridate,
methods,
ParallelLogger (>= 3.0.0),
readr (>= 2.1.0),
rlang,
RJSONIO,
jsonlite,
ResultModelManager,
SqlRender (>= 1.11.1),
stringi (>= 1.7.6)
stringi (>= 1.7.6),
tibble
Suggests:
CirceR (>= 1.1.1),
Eunomia,
Expand Down
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export(isTaskRequired)
export(migrateDataModel)
export(readCsv)
export(recordTasksDone)
export(runCohortGeneration)
export(sampleCohortDefinitionSet)
export(saveCohortDefinitionSet)
export(saveCohortSubsetDefinition)
Expand All @@ -48,8 +49,9 @@ export(uploadResults)
export(writeCsv)
import(DatabaseConnector)
import(R6)
importFrom(dplyr,"%>%")
import(dplyr)
importFrom(grDevices,rgb)
importFrom(methods,is)
importFrom(rlang,':=')
importFrom(rlang,.data)
importFrom(stats,aggregate)
Expand Down
3 changes: 2 additions & 1 deletion R/CohortGenerator.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@

#' @import DatabaseConnector
#' @import R6
#' @import dplyr
#' @importFrom grDevices rgb
#' @importFrom methods is
#' @importFrom stats aggregate setNames
#' @importFrom utils write.csv install.packages menu packageVersion sessionInfo
#' @importFrom dplyr "%>%"
#' @importFrom rlang .data ':='
NULL

Expand Down
84 changes: 78 additions & 6 deletions R/Export.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
#' to the exported results
#'
#' @template CohortDefinitionSet
#'
#' @param tablePrefix Optional - allows to append a prefix to the exported
#' file names.
#'
#' @export
exportCohortStatsTables <- function(connectionDetails,
Expand All @@ -58,7 +61,8 @@ exportCohortStatsTables <- function(connectionDetails,
fileNamesInSnakeCase = FALSE,
incremental = FALSE,
databaseId = NULL,
cohortDefinitionSet = NULL) {
cohortDefinitionSet = NULL,
tablePrefix = "") {
if (is.null(connection)) {
# Establish the connection and ensure the cleanup is performed
connection <- DatabaseConnector::connect(connectionDetails)
Expand All @@ -71,8 +75,9 @@ exportCohortStatsTables <- function(connectionDetails,

# Internal function to export the stats
exportStats <- function(data,
fileName) {
fullFileName <- file.path(cohortStatisticsFolder, fileName)
fileName,
tablePrefix) {
fullFileName <- file.path(cohortStatisticsFolder, paste0(tablePrefix, fileName))
rlang::inform(paste0("- Saving data to - ", fullFileName))
if (incremental) {
if (snakeCaseToCamelCase) {
Expand All @@ -96,14 +101,15 @@ exportCohortStatsTables <- function(connectionDetails,
warning("No cohortDefinitionSet specified; please make sure you've inserted the inclusion rule names using the insertInclusionRuleNames function.")
tablesToExport <- rbind(tablesToExport, data.frame(
tableName = "cohortInclusionTable",
fileName = "cohort_inclusion.csv"
fileName = paste0(tablePrefix, "cohort_inclusion.csv")
))
} else {
inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)
names(inclusionRules) <- SqlRender::camelCaseToSnakeCase(names(inclusionRules))
exportStats(
data = inclusionRules,
fileName = "cohort_inclusion.csv"
fileName = "cohort_inclusion.csv",
tablePrefix = tablePrefix
)
}

Expand All @@ -124,7 +130,73 @@ exportCohortStatsTables <- function(connectionDetails,
)
exportStats(
data = cohortStats[[tablesToExport$tableName[i]]],
fileName = fileName
fileName = fileName,
tablePrefix = tablePrefix
)
}
}

exportCohortDefinitionSet <- function(outputFolder, cohortDefinitionSet = NULL) {
cohortDefinitions <- createEmptyResult("cg_cohort_definition")
cohortSubsets <- createEmptyResult("cg_cohort_subset_definition")
if (!is.null(cohortDefinitionSet)) {
cdsCohortSubsets <- getSubsetDefinitions(cohortDefinitionSet)
if (length(cdsCohortSubsets) > 0) {
for (i in seq_along(cdsCohortSubsets)) {
cohortSubsets <- rbind(cohortSubsets,
data.frame(
subsetDefinitionId = cdsCohortSubsets[[i]]$definitionId,
json = as.character(cdsCohortSubsets[[i]]$toJSON())
))
}
} else {
# NOTE: In this case the cohortDefinitionSet has no subsets defined
# and so we need to add the additional columns that are defined
# in the function: addCohortSubsetDefinition. To do this,
# we'll construct a copy of the cohortDefinitionSet with a single
# subset to get the proper structure and filter it to the
# cohorts of interest.
cdsCopy <- cohortDefinitionSet %>%
addCohortSubsetDefinition(
cohortSubsetDefintion = createCohortSubsetDefinition(
definitionId = 1,
name="empty",
subsetOperators = list(
createDemographicSubset()
)
)
) %>% dplyr::filter(cohortId == cohortDefinitionSet$cohortId)
cohortDefinitionSet <- cdsCopy
}
# Massage and save the cohort definition set
colsToRename <- c("cohortId", "cohortName", "sql", "json")
colInd <- which(names(cohortDefinitionSet) %in% colsToRename)
names(cohortDefinitionSet)[colInd] <- c("cohortDefinitionId", "cohortName", "sqlCommand", "json")
if (! "description" %in% names(cohortDefinitionSet)) {
cohortDefinitionSet$description <- ""
}
cohortDefinitions <- cohortDefinitionSet[,intersect(names(cohortDefinitions), names(cohortDefinitionSet))]
}
writeCsv(
x = cohortDefinitions,
file = file.path(outputFolder, "cg_cohort_definition.csv")
)
writeCsv(
x = cohortSubsets,
file = file.path(outputFolder, "cg_cohort_subset_definition.csv")
)
}

createEmptyResult <- function(tableName) {
columns <- readCsv(
file = system.file("csv", "resultsDataModelSpecification.csv", package = "CohortGenerator")
) %>%
dplyr::filter(.data$tableName == !!tableName) %>%
dplyr::pull(.data$columnName) %>%
SqlRender::snakeCaseToCamelCase()
result <- vector(length = length(columns))
names(result) <- columns
result <- tibble::as_tibble(t(result), name_repair = "check_unique")
result <- result[FALSE, ]
return(result)
}
43 changes: 27 additions & 16 deletions R/NegativeControlCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,28 +148,15 @@ generateNegativeControlOutcomeCohorts <- function(connectionDetails = NULL,

rlang::inform("Generating negative control outcome cohorts")

# Send the negative control outcome cohort set to the server for use
# in processing. This temp table will hold the mapping between
# cohort_definition_id and the outcomeConceptId in the data.frame()
DatabaseConnector::insertTable(
connection = connection,
data = negativeControlOutcomeCohortSet,
tempEmulationSchema = tempEmulationSchema,
tableName = "#nc_set",
camelCaseToSnakeCase = TRUE,
dropTableIfExists = TRUE,
createTable = TRUE,
tempTable = TRUE
)

sql <- createNegativeControlOutcomesQuery(
connection = connection,
cdmDatabaseSchema = cdmDatabaseSchema,
tempEmulationSchema = tempEmulationSchema,
cohortDatabaseSchema = cohortDatabaseSchema,
cohortTable = cohortTable,
occurrenceType = occurrenceType,
detectOnDescendants = detectOnDescendants
detectOnDescendants = detectOnDescendants,
negativeControlOutcomeCohortSet = negativeControlOutcomeCohortSet
)

DatabaseConnector::executeSql(
Expand All @@ -196,7 +183,30 @@ createNegativeControlOutcomesQuery <- function(connection,
cohortDatabaseSchema,
cohortTable,
occurrenceType,
detectOnDescendants) {
detectOnDescendants,
negativeControlOutcomeCohortSet) {
selectClause <- ""
for (i in 1:nrow(negativeControlOutcomeCohortSet)){
selectClause <- paste0(selectClause,
"SELECT CAST(", negativeControlOutcomeCohortSet$cohortId[i], " AS BIGINT), ",
"CAST(", negativeControlOutcomeCohortSet$outcomeConceptId[i], " AS BIGINT)"
)
if (i < nrow(negativeControlOutcomeCohortSet)) {
selectClause <- paste0(selectClause, "\nUNION\n")
}
}
selectClause
ncSetQuery <- paste0(
"CREATE TABLE #nc_set (",
" cohort_id bigint NOT NULL,",
" outcome_concept_id bigint NOT NULL",
")",
";",
"INSERT INTO #nc_set (cohort_id, outcome_concept_id)\n",
selectClause,
"\n;"
)

sql <- sql <- SqlRender::readSql(system.file("sql/sql_server/NegativeControlOutcomes.sql", package = "CohortGenerator", mustWork = TRUE))
sql <- SqlRender::render(
sql = sql,
Expand All @@ -205,6 +215,7 @@ createNegativeControlOutcomesQuery <- function(connection,
cohort_table = cohortTable,
detect_on_descendants = detectOnDescendants,
occurrence_type = occurrenceType,
nc_set_query = ncSetQuery,
warnOnMissingParameters = TRUE
)
sql <- SqlRender::translate(
Expand Down
Loading

0 comments on commit 667f5c8

Please sign in to comment.