Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V0.10 release prep #173

Merged
merged 6 commits into from
Jul 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@
^\.Rproj\.user$
^\.idea$
^\.github$
_pkgdown\.yml
compare_versions
deploy.sh
docs
extras
man-roxygen
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: CohortGenerator
Type: Package
Title: An R Package for Cohort Generation Against the OMOP CDM
Version: 0.9.0
Date: 2024-05-28
Version: 0.10.0
Date: 2024-07-14
Authors@R: c(
person("Anthony", "Sena", email = "[email protected]", role = c("aut", "cre")),
person("Jamie", "Gilbert", role = c("aut")),
Expand Down
17 changes: 17 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
CohortGenerator 0.10.0
=======================
New Features
- Add `runCohortGeneration` function (Issue #165)
- Adopt ResultModelManager for handling results data models & uploading. Extend results data model to include information on cohort subsets(#154, #162)
- Remove REMOTES entries for CirceR and Eunomia which are now in CRAN (#145)
- Unit tests now running on all OHDSI DB Platforms (#151)

Bug Fixes
- Negation of cohort subset operator must join on `subject_id` AND `start_date` (#167)
- Allow integer as cohort ID (#146)
- Use native messaging functions for output vs. ParallelLogger (#97)
- Prevent upload of inclusion rule information (#78)
- Expose `colTypes` when working with .csv files (#59)
- Remove `bit64` from package (mostly) (#152)
- Updated documentation for cohort subset negate feature (#111)

CohortGenerator 0.9.0
=======================
- Random sample functionality (for development only) (Issue #129)
Expand Down
6 changes: 3 additions & 3 deletions R/CohortDefinitionSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ createEmptyCohortDefinitionSet <- function(verbose = FALSE) {
cohortDefinitionSetColumns <- colnames(df)
matchingColumns <- intersect(x = colnames(x), y = cohortDefinitionSetColumns)
columnNamesMatch <- setequal(matchingColumns, cohortDefinitionSetColumns)

if (!columnNamesMatch && emitWarning) {
columnsMissing <- setdiff(x = cohortDefinitionSetColumns, y = colnames(x))
warningMessage <- paste0(
Expand Down Expand Up @@ -144,7 +144,7 @@ checkAndFixCohortDefinitionSetDataTypes <- function(x, fixDataTypes = TRUE, emit
# Check if the data types match
# NOTE: createEmptyCohortDefinitionSet() is the reference for the data
# types. cohortId is declared as a numeric but an integer is also fine
dataTypesMatch <- (xDataTypes[1] %in% c('integer', 'double') && all(xDataTypes[2:4] == "character"))
dataTypesMatch <- (xDataTypes[1] %in% c("integer", "double") && all(xDataTypes[2:4] == "character"))
# Create the cohortDefinitionSetSpec from the names/data types for reference
cohortDefinitionSetSpec <- data.frame(
columnName = names(xDataTypes),
Expand Down Expand Up @@ -520,7 +520,7 @@ checkLargeInteger <- function(x, columnName = "cohortId") {
# NOTE: suppressWarnings used to mask
# warning from R which may happen for
# large values in X.
res <- all(suppressWarnings(x%%1) == 0)
res <- all(suppressWarnings(x %% 1) == 0)
if (!isTRUE(res)) {
errorMessage <- paste0("The column ", columnName, " included non-integer values. Please update and re-try")
return(errorMessage)
Expand Down
26 changes: 13 additions & 13 deletions R/CohortStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ insertInclusionRuleNames <- function(connectionDetails = NULL,
}

inclusionRules <- getCohortInclusionRules(cohortDefinitionSet)

# Remove any existing data to prevent duplication
DatabaseConnector::renderTranslateExecuteSql(
connection = connection,
Expand Down Expand Up @@ -129,7 +129,7 @@ getStatsTable <- function(connectionDetails,
}

#' Get Cohort Inclusion Stats Table Data
#'
#'
#' @description
#' This function returns a data frame of the data in the Cohort Inclusion Tables.
#' Results are organized in to a list with 5 different data frames:
Expand Down Expand Up @@ -203,23 +203,23 @@ getCohortStats <- function(connectionDetails,


#' Get Cohort Inclusion Rules from a cohort definition set
#'
#'
#' @description
#' This function returns a data frame of the inclusion rules defined
#' in a cohort definition set.
#'
#'
#' @md
#' @template CohortDefinitionSet
#'
#'
#' @export
getCohortInclusionRules <- function(cohortDefinitionSet) {
checkmate::assertDataFrame(cohortDefinitionSet, min.rows = 1, col.names = "named")
checkmate::assertNames(colnames(cohortDefinitionSet),
must.include = c(
"cohortId",
"cohortName",
"json"
)
must.include = c(
"cohortId",
"cohortName",
"json"
)
)

# Assemble the cohort inclusion rules
Expand All @@ -231,7 +231,7 @@ getCohortInclusionRules <- function(cohortDefinitionSet) {
name = character(),
description = character()
)

# Remove any cohort definitions that do not include the JSON property
cohortDefinitionSet <- cohortDefinitionSet[!(is.null(cohortDefinitionSet$json) | is.na(cohortDefinitionSet$json)), ]
for (i in 1:nrow(cohortDefinitionSet)) {
Expand Down Expand Up @@ -261,6 +261,6 @@ getCohortInclusionRules <- function(cohortDefinitionSet) {
}
}
}

invisible(inclusionRules)
}
}
6 changes: 3 additions & 3 deletions R/CsvHelper.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#' @param file The .csv file to read.
#' @param warnOnCaseMismatch When TRUE, raise a warning if column headings
#' in the .csv are not in snake_case format
#'
#'
#' @param colTypes Corresponds to the `col_types` in the `readr::read_csv` function.
#' One of `NULL`, a [readr::cols()] specification, or
#' a string. See `vignette("readr")` for more details.
Expand All @@ -38,7 +38,7 @@
#' `guess_max` or supply the correct types yourself.
#'
#' Column specifications created by [list()] or [cols()] must contain
#' one column specification for each column.
#' one column specification for each column.
#'
#' Alternatively, you can use a compact string representation where each
#' character represents one column:
Expand All @@ -57,7 +57,7 @@
#' By default, reading a file without a column specification will print a
#' message showing what `readr` guessed they were. To remove this message,
#' set `show_col_types = FALSE` or set `options(readr.show_col_types = FALSE)`.
#'
#'
#' @return
#' A tibble with the .csv contents
#'
Expand Down
49 changes: 26 additions & 23 deletions R/Export.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
#' This function retrieves the data from the cohort statistics tables and
#' writes them to the inclusion statistics folder specified in the function
#' call. NOTE: inclusion rule names are handled in one of two ways:
#'
#' 1. You can specify the cohortDefinitionSet parameter and the inclusion rule
#' names will be extracted from the data.frame.
#'
#' 1. You can specify the cohortDefinitionSet parameter and the inclusion rule
#' names will be extracted from the data.frame.
#' 2. You can insert the inclusion rule names into the database using the
#' insertInclusionRuleNames function of this package.
#'
#' insertInclusionRuleNames function of this package.
#'
#' The first approach is preferred as to avoid the warning emitted.
#'
#' @template Connection
Expand All @@ -45,9 +45,9 @@
#'
#' @param databaseId Optional - when specified, the databaseId will be added
#' to the exported results
#'
#'
#' @template CohortDefinitionSet
#'
#'
#' @param tablePrefix Optional - allows to append a prefix to the exported
#' file names.
#'
Expand All @@ -72,7 +72,7 @@ exportCohortStatsTables <- function(connectionDetails,
if (!dir.exists(cohortStatisticsFolder)) {
dir.create(cohortStatisticsFolder, recursive = TRUE)
}

# Internal function to export the stats
exportStats <- function(data,
fileName,
Expand All @@ -91,7 +91,7 @@ exportCohortStatsTables <- function(connectionDetails,
.writeCsv(x = data, file = fullFileName)
}
}

tablesToExport <- data.frame(
tableName = c("cohortInclusionResultTable", "cohortInclusionStatsTable", "cohortSummaryStatsTable", "cohortCensorStatsTable"),
fileName = c("cohort_inc_result.csv", "cohort_inc_stats.csv", "cohort_summary_stats.csv", "cohort_censor_stats.csv")
Expand Down Expand Up @@ -122,7 +122,7 @@ exportCohortStatsTables <- function(connectionDetails,
snakeCaseToCamelCase = snakeCaseToCamelCase,
cohortTableNames = cohortTableNames
)

for (i in 1:nrow(tablesToExport)) {
fileName <- ifelse(test = fileNamesInSnakeCase,
yes = tablesToExport$fileName[i],
Expand All @@ -143,39 +143,42 @@ exportCohortDefinitionSet <- function(outputFolder, cohortDefinitionSet = NULL)
cdsCohortSubsets <- getSubsetDefinitions(cohortDefinitionSet)
if (length(cdsCohortSubsets) > 0) {
for (i in seq_along(cdsCohortSubsets)) {
cohortSubsets <- rbind(cohortSubsets,
data.frame(
subsetDefinitionId = cdsCohortSubsets[[i]]$definitionId,
json = as.character(cdsCohortSubsets[[i]]$toJSON())
))
cohortSubsets <- rbind(
cohortSubsets,
data.frame(
subsetDefinitionId = cdsCohortSubsets[[i]]$definitionId,
json = as.character(cdsCohortSubsets[[i]]$toJSON())
)
)
}
} else {
# NOTE: In this case the cohortDefinitionSet has no subsets defined
# and so we need to add the additional columns that are defined
# in the function: addCohortSubsetDefinition. To do this,
# in the function: addCohortSubsetDefinition. To do this,
# we'll construct a copy of the cohortDefinitionSet with a single
# subset to get the proper structure and filter it to the
# cohorts of interest.
cdsCopy <- cohortDefinitionSet %>%
cdsCopy <- cohortDefinitionSet %>%
addCohortSubsetDefinition(
cohortSubsetDefintion = createCohortSubsetDefinition(
definitionId = 1,
name="empty",
definitionId = 1,
name = "empty",
subsetOperators = list(
createDemographicSubset()
)
)
) %>% dplyr::filter(cohortId == cohortDefinitionSet$cohortId)
) %>%
dplyr::filter(.data$cohortId == cohortDefinitionSet$cohortId)
cohortDefinitionSet <- cdsCopy
}
# Massage and save the cohort definition set
colsToRename <- c("cohortId", "cohortName", "sql", "json")
colInd <- which(names(cohortDefinitionSet) %in% colsToRename)
names(cohortDefinitionSet)[colInd] <- c("cohortDefinitionId", "cohortName", "sqlCommand", "json")
if (! "description" %in% names(cohortDefinitionSet)) {
if (!"description" %in% names(cohortDefinitionSet)) {
cohortDefinitionSet$description <- ""
}
cohortDefinitions <- cohortDefinitionSet[,intersect(names(cohortDefinitions), names(cohortDefinitionSet))]
cohortDefinitions <- cohortDefinitionSet[, intersect(names(cohortDefinitions), names(cohortDefinitionSet))]
}
writeCsv(
x = cohortDefinitions,
Expand All @@ -199,4 +202,4 @@ createEmptyResult <- function(tableName) {
result <- tibble::as_tibble(t(result), name_repair = "check_unique")
result <- result[FALSE, ]
return(result)
}
}
13 changes: 7 additions & 6 deletions R/NegativeControlCohorts.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ generateNegativeControlOutcomeCohorts <- function(connectionDetails = NULL,
)
assertLargeInteger(negativeControlOutcomeCohortSet$cohortId)
assertLargeInteger(negativeControlOutcomeCohortSet$outcomeConceptId, columnName = "outcomeConceptId")

# Verify that cohort IDs are not repeated in the negative control
# cohort definition set before generating
if (length(unique(negativeControlOutcomeCohortSet$cohortId)) != length(negativeControlOutcomeCohortSet$cohortId)) {
Expand Down Expand Up @@ -186,11 +186,12 @@ createNegativeControlOutcomesQuery <- function(connection,
detectOnDescendants,
negativeControlOutcomeCohortSet) {
selectClause <- ""
for (i in 1:nrow(negativeControlOutcomeCohortSet)){
selectClause <- paste0(selectClause,
"SELECT CAST(", negativeControlOutcomeCohortSet$cohortId[i], " AS BIGINT), ",
"CAST(", negativeControlOutcomeCohortSet$outcomeConceptId[i], " AS BIGINT)"
)
for (i in 1:nrow(negativeControlOutcomeCohortSet)) {
selectClause <- paste0(
selectClause,
"SELECT CAST(", negativeControlOutcomeCohortSet$cohortId[i], " AS BIGINT), ",
"CAST(", negativeControlOutcomeCohortSet$outcomeConceptId[i], " AS BIGINT)"
)
if (i < nrow(negativeControlOutcomeCohortSet)) {
selectClause <- paste0(selectClause, "\nUNION\n")
}
Expand Down
14 changes: 8 additions & 6 deletions R/ResultsDataModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ createResultsDataModel <- function(connectionDetails = NULL,
if (connectionDetails$dbms == "sqlite" & databaseSchema != "main") {
stop("Invalid schema for sqlite, use databaseSchema = 'main'")
}

connection <- DatabaseConnector::connect(connectionDetails)
on.exit(DatabaseConnector::disconnect(connection))

# Create first version of results model:
sql <- SqlRender::readSql(system.file("sql/sql_server/CreateResultsDataModel.sql", package = "CohortGenerator", mustWork = TRUE))
sql <- SqlRender::render(
Expand Down Expand Up @@ -115,9 +115,11 @@ uploadResults <- function(connectionDetails,
#' @export
migrateDataModel <- function(connectionDetails, databaseSchema, tablePrefix = "") {
ParallelLogger::logInfo("Migrating data set")
migrator <- getDataMigrator(connectionDetails = connectionDetails,
databaseSchema = databaseSchema,
tablePrefix = tablePrefix)
migrator <- getDataMigrator(
connectionDetails = connectionDetails,
databaseSchema = databaseSchema,
tablePrefix = tablePrefix
)
migrator$executeMigrations()
migrator$finalize()
}
Expand All @@ -142,4 +144,4 @@ getDataMigrator <- function(connectionDetails, databaseSchema, tablePrefix = "")
migrationPath = "migrations",
packageName = "CohortGenerator"
)
}
}
Loading
Loading