Skip to content

Commit

Permalink
Prevent upload of inclusion rule names (#156)
Browse files Browse the repository at this point in the history
* Isolate cohort inclusion rule function

* Export inclusion rule stats without the need to insert to db
  • Loading branch information
anthonysena authored Jun 5, 2024
1 parent 624b25d commit cdc6e24
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 161 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export(generateCohortSet)
export(generateNegativeControlOutcomeCohorts)
export(getCohortCounts)
export(getCohortDefinitionSet)
export(getCohortInclusionRules)
export(getCohortStats)
export(getCohortTableNames)
export(getRequiredTasks)
Expand Down
114 changes: 67 additions & 47 deletions R/CohortStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,6 @@ insertInclusionRuleNames <- function(connectionDetails = NULL,
stop("You must provide either a database connection or the connection details.")
}

checkmate::assertDataFrame(cohortDefinitionSet, min.rows = 1, col.names = "named")
checkmate::assertNames(colnames(cohortDefinitionSet),
must.include = c(
"cohortId",
"cohortName",
"json"
)
)
if (is.null(connection)) {
connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))
Expand All @@ -65,45 +57,8 @@ insertInclusionRuleNames <- function(connectionDetails = NULL,
stop(paste0(cohortInclusionTable, " table not found in schema: ", cohortDatabaseSchema, ". Please make sure the table is created using the createCohortTables() function before calling this function."))
}

# Assemble the cohort inclusion rules
# NOTE: This data frame must match the @cohort_inclusion_table
# structure as defined in inst/sql/sql_server/CreateCohortTables.sql
inclusionRules <- data.frame(
cohortDefinitionId = bit64::integer64(),
ruleSequence = integer(),
name = character(),
description = character()
)
# Remove any cohort definitions that do not include the JSON property
cohortDefinitionSet <- cohortDefinitionSet[!(is.null(cohortDefinitionSet$json) | is.na(cohortDefinitionSet$json)), ]
for (i in 1:nrow(cohortDefinitionSet)) {
cohortDefinition <- RJSONIO::fromJSON(content = cohortDefinitionSet$json[i], digits = 23)
if (!is.null(cohortDefinition$InclusionRules)) {
nrOfRules <- length(cohortDefinition$InclusionRules)
if (nrOfRules > 0) {
for (j in 1:nrOfRules) {
ruleName <- cohortDefinition$InclusionRules[[j]]$name
ruleDescription <- cohortDefinition$InclusionRules[[j]]$description
if (is.na(ruleName) || ruleName == "") {
ruleName <- paste0("Unamed rule (Sequence ", j - 1, ")")
}
if (is.null(ruleDescription)) {
ruleDescription <- ""
}
inclusionRules <- rbind(
inclusionRules,
data.frame(
cohortDefinitionId = bit64::as.integer64(cohortDefinitionSet$cohortId[i]),
ruleSequence = as.integer(j - 1),
name = ruleName,
description = ruleDescription
)
)
}
}
}
}

inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)

# Remove any existing data to prevent duplication
DatabaseConnector::renderTranslateExecuteSql(
connection = connection,
Expand Down Expand Up @@ -174,6 +129,7 @@ getStatsTable <- function(connectionDetails,
}

#' Get Cohort Inclusion Stats Table Data
#'
#' @description
#' This function returns a data frame of the data in the Cohort Inclusion Tables.
#' Results are organized in to a list with 5 different data frames:
Expand Down Expand Up @@ -244,3 +200,67 @@ getCohortStats <- function(connectionDetails,
}
return(results)
}


#' Get Cohort Inclusion Rules from a cohort definition set
#'
#' @description
#' This function returns a data frame of the inclusion rules defined
#' in a cohort definition set.
#'
#' @md
#' @template CohortDefinitionSet
#'
#' @export
getCohortInclusionRules <- function(cohortDefinitionSet) {
checkmate::assertDataFrame(cohortDefinitionSet, min.rows = 1, col.names = "named")
checkmate::assertNames(colnames(cohortDefinitionSet),
must.include = c(
"cohortId",
"cohortName",
"json"
)
)

# Assemble the cohort inclusion rules
# NOTE: This data frame must match the @cohort_inclusion_table
# structure as defined in inst/sql/sql_server/CreateCohortTables.sql
inclusionRules <- data.frame(
cohortDefinitionId = bit64::integer64(),
ruleSequence = integer(),
name = character(),
description = character()
)

# Remove any cohort definitions that do not include the JSON property
cohortDefinitionSet <- cohortDefinitionSet[!(is.null(cohortDefinitionSet$json) | is.na(cohortDefinitionSet$json)), ]
for (i in 1:nrow(cohortDefinitionSet)) {
cohortDefinition <- RJSONIO::fromJSON(content = cohortDefinitionSet$json[i], digits = 23)
if (!is.null(cohortDefinition$InclusionRules)) {
nrOfRules <- length(cohortDefinition$InclusionRules)
if (nrOfRules > 0) {
for (j in 1:nrOfRules) {
ruleName <- cohortDefinition$InclusionRules[[j]]$name
ruleDescription <- cohortDefinition$InclusionRules[[j]]$description
if (is.na(ruleName) || ruleName == "") {
ruleName <- paste0("Unamed rule (Sequence ", j - 1, ")")
}
if (is.null(ruleDescription)) {
ruleDescription <- ""
}
inclusionRules <- rbind(
inclusionRules,
data.frame(
cohortDefinitionId = bit64::as.integer64(cohortDefinitionSet$cohortId[i]),
ruleSequence = as.integer(j - 1),
name = ruleName,
description = ruleDescription
)
)
}
}
}
}

invisible(inclusionRules)
}
89 changes: 46 additions & 43 deletions R/Export.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@
#' @description
#' This function retrieves the data from the cohort statistics tables and
#' writes them to the inclusion statistics folder specified in the function
#' call.
#' call. NOTE: inclusion rule names are handled in one of two ways:
#'
#' 1. You can specify the cohortDefinitionSet parameter and the inclusion rule
#' names will be extracted from the data.frame.
#' 2. You can insert the inclusion rule names into the database using the
#' insertInclusionRuleNames function of this package.
#'
#' The first approach is preferred as to avoid the warning emitted.
#'
#' @template Connection
#'
Expand All @@ -38,6 +45,8 @@
#'
#' @param databaseId Optional - when specified, the databaseId will be added
#' to the exported results
#'
#' @template CohortDefinitionSet
#'
#' @export
exportCohortStatsTables <- function(connectionDetails,
Expand All @@ -48,7 +57,8 @@ exportCohortStatsTables <- function(connectionDetails,
snakeCaseToCamelCase = TRUE,
fileNamesInSnakeCase = FALSE,
incremental = FALSE,
databaseId = NULL) {
databaseId = NULL,
cohortDefinitionSet = NULL) {
if (is.null(connection)) {
# Establish the connection and ensure the cleanup is performed
connection <- DatabaseConnector::connect(connectionDetails)
Expand All @@ -58,20 +68,10 @@ exportCohortStatsTables <- function(connectionDetails,
if (!dir.exists(cohortStatisticsFolder)) {
dir.create(cohortStatisticsFolder, recursive = TRUE)
}

# Export the stats
exportStats <- function(table,
fileName,
includeDatabaseId) {
data <- getStatsTable(
connection = connection,
table = table,
snakeCaseToCamelCase = snakeCaseToCamelCase,
databaseId = databaseId,
cohortDatabaseSchema = cohortDatabaseSchema,
includeDatabaseId = includeDatabaseId
)


# Internal function to export the stats
exportStats <- function(data,
fileName) {
fullFileName <- file.path(cohortStatisticsFolder, fileName)
ParallelLogger::logInfo("- Saving data to - ", fullFileName)
if (incremental) {
Expand All @@ -86,41 +86,44 @@ exportCohortStatsTables <- function(connectionDetails,
.writeCsv(x = data, file = fullFileName)
}
}

tablesToExport <- data.frame(
tableName = cohortTableNames$cohortInclusionTable,
fileName = "cohort_inclusion.csv",
includeDatabaseId = FALSE
tableName = c("cohortInclusionResultTable", "cohortInclusionStatsTable", "cohortSummaryStatsTable", "cohortCensorStatsTable"),
fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv")
)

if (is.null(cohortDefinitionSet)) {
warning("No cohortDefinitionSet specified; please make sure you've inserted the inclusion rule names using the insertInclusionRuleNames function.")
tablesToExport <- rbind(tablesToExport, data.frame(
tableName = "cohortInclusionTable",
fileName = "cohort_inclusion.csv"
))
} else {
inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)
exportStats(
data = inclusionRules,
fileName = "cohort_inclusion.csv"
)
}

# Get the cohort statistics
cohortStats <- getCohortStats(
connectionDetails = connectionDetails,
connection = connection,
cohortDatabaseSchema = cohortDatabaseSchema,
databaseId = databaseId,
snakeCaseToCamelCase = snakeCaseToCamelCase,
cohortTableName = cohortTableNames
)
tablesToExport <- rbind(tablesToExport, data.frame(
tableName = cohortTableNames$cohortInclusionResultTable,
fileName = "cohort_inc_result.csv",
includeDatabaseId = TRUE
))
tablesToExport <- rbind(tablesToExport, data.frame(
tableName = cohortTableNames$cohortInclusionStatsTable,
fileName = "cohort_inc_stats.csv",
includeDatabaseId = TRUE
))
tablesToExport <- rbind(tablesToExport, data.frame(
tableName = cohortTableNames$cohortSummaryStatsTable,
fileName = "cohort_summary_stats.csv",
includeDatabaseId = TRUE
))
tablesToExport <- rbind(tablesToExport, data.frame(
tableName = cohortTableNames$cohortCensorStatsTable,
fileName = "cohort_censor_stats.csv",
includeDatabaseId = TRUE
))

for (i in 1:nrow(tablesToExport)) {
fileName <- ifelse(test = fileNamesInSnakeCase,
yes = tablesToExport$fileName[i],
no = SqlRender::snakeCaseToCamelCase(tablesToExport$fileName[i])
)
exportStats(
table = tablesToExport$tableName[i],
fileName = fileName,
includeDatabaseId = tablesToExport$includeDatabaseId[i]
data = cohortStats[[tablesToExport$tableName[i]]],
fileName = fileName
)
}
}
20 changes: 18 additions & 2 deletions man/exportCohortStatsTables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/getCohortInclusionRules.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit cdc6e24

Please sign in to comment.