Skip to content

Commit

Permalink
Merge branch 'main' into v0.1.2
Browse files Browse the repository at this point in the history
  • Loading branch information
Marta Alcalde-Herraiz committed Nov 13, 2024
2 parents f7b509a + e5d68a1 commit 2d44f1d
Show file tree
Hide file tree
Showing 12 changed files with 601 additions and 5 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,19 @@ export(plotInObservation)
export(plotObservationPeriod)
export(plotRecordCount)
export(settings)
export(summariseAllConceptCounts)
export(summariseClinicalRecords)
export(summariseConceptCounts)
export(summariseInObservation)
export(summariseMissingData)
export(summariseObservationPeriod)
export(summariseOmopSnapshot)
export(summarisePopulationCharacteristics)
export(summariseRecordCount)
export(suppress)
export(tableAllConceptCounts)
export(tableClinicalRecords)
export(tableMissingData)
export(tableObservationPeriod)
export(tableOmopSnapshot)
export(tablePopulationCharacteristics)
Expand Down
187 changes: 187 additions & 0 deletions R/summariseAllConceptCounts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@

my_getStrataList <- function(sex = FALSE, ageGroup = NULL, year = FALSE){

strata <- as.character()

if(!is.null(ageGroup)){
strata <- append(strata, "age_group")
}

if(sex){
strata <- append(strata, "sex")
}
if(year){
strata <- append(strata, "year")
}
return(strata)
}


checkFeasibility <- function(omopTable, tableName, conceptId){

if (omopgenerics::isTableEmpty(omopTable)){
cli::cli_warn(paste0(tableName, " omop table is empty."))
return(NULL)
}

if (is.na(conceptId)){
cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts."))
return(NULL)
}

y <- omopTable |>
dplyr::filter(!is.na(.data[[conceptId]]))

if (omopgenerics::isTableEmpty(y)){
cli::cli_warn(paste0(tableName, " omop table doesn't contain standard concepts."))
return(NULL)
}
return(TRUE)
}

#' Summarise concept use in patient-level data
#'
#' @param cdm A cdm object
#' @param omopTableName A character vector of the names of the tables to
#' summarise in the cdm object.
#' @param countBy Either "record" for record-level counts or "person" for
#' person-level counts
#' @param year TRUE or FALSE. If TRUE code use will be summarised by year.
#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex.
#' @param ageGroup A list of ageGroup vectors of length two. Code use will be
#' thus summarised by age groups.
#' @return A summarised_result object with results overall and, if specified, by
#' strata.
#' @export
summariseAllConceptCounts <- function(cdm,
omopTableName,
countBy = "record",
year = FALSE,
sex = FALSE,
ageGroup = NULL){

omopgenerics::validateCdmArgument(cdm)
checkCountBy(countBy)
omopgenerics::assertLogical(year, length = 1)
omopgenerics::assertLogical(sex, length = 1)
omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE)

ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]]

strata <- my_getStrataList(sex = sex, year = year, ageGroup = ageGroup)

stratification <- omopgenerics::combineStrata(strata)

result_tables <- purrr::map(omopTableName, function(table){




omopTable <- cdm[[table]] |>
dplyr::ungroup()


conceptId <- standardConcept(omopgenerics::tableName(omopTable))

if (is.null(checkFeasibility(omopTable, table, conceptId))){
return(NULL)
}


indexDate <- startDate(omopgenerics::tableName(omopTable))

x <- omopTable |>
dplyr::filter(!is.na(.data[[conceptId]])) |>
dplyr::left_join(
cdm$concept |> dplyr::select("concept_id", "concept_name"),
by = stats::setNames("concept_id", conceptId)) |>
PatientProfiles::addDemographicsQuery(age = FALSE,
ageGroup = ageGroup,
sex = sex,
indexDate = indexDate, priorObservation = FALSE, futureObservation = FALSE)
if (year){
x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]])))
}

level <- c(conceptId, "concept_name")

groupings <- c(list(level), purrr::map(stratification, ~ c(level, .x)))

result <- list()
if ("record" %in% countBy){

stratified_result <- x |>
dplyr::group_by(dplyr::across(dplyr::all_of(c(level,strata)))) |>
dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|>
dplyr::collect()


grouped_results <- purrr::map(groupings, \(g) {
stratified_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(g))) |>
dplyr::summarise("estimate_value" = as.integer(sum(.data$estimate_value, na.rm = TRUE)), .groups = "drop")

})

result_record <- purrr::reduce(grouped_results, dplyr::bind_rows)|>
dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|>
dplyr::mutate("estimate_name" = "record_count")
result<-dplyr::bind_rows(result,result_record)
}

if ("person" %in% countBy){

grouped_results <- purrr::map(groupings, \(g) {
x |>
dplyr::group_by(dplyr::across(dplyr::all_of(g))) |>
dplyr::summarise("estimate_value" = as.integer(dplyr::n()), .groups = "drop")|>
dplyr::collect()
})

result_person <- purrr::reduce(grouped_results, dplyr::bind_rows) |>
dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall"))) |>
dplyr::mutate("estimate_name" = "person_count")
result<-dplyr::bind_rows(result,result_person)
}
result<- result |>
dplyr::mutate("omop_table" = table,
"variable_level" = as.character(.data[[conceptId]])) |>

dplyr::select(-dplyr::all_of(conceptId))
return(result)
})
if (rlang::is_empty(purrr::compact(result_tables))){
return(omopgenerics::emptySummarisedResult())
}

sr <-purrr::compact(result_tables) |>
purrr::reduce(dplyr::union)|>
dplyr::mutate(
result_id = 1L,
cdm_name = omopgenerics::cdmName(cdm)
) |>
visOmopResults::uniteGroup(cols = "omop_table") |>
visOmopResults::uniteStrata(cols = strata) |>
visOmopResults::uniteAdditional() |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
"estimate_type" = "integer"
) |>
dplyr::rename("variable_name" = "concept_name")
# |>
# dplyr::select(!c())


settings <- dplyr::tibble(
result_id = unique(sr$result_id),
package_name = "omopSketch",
package_version = as.character(utils::packageVersion("OmopSketch")),
result_type = "summarise_all_concept_counts"
)
sr <- sr |>
omopgenerics::newSummarisedResult(settings = settings)

return(sr)

}

156 changes: 156 additions & 0 deletions R/summariseMissingData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#' Summarise missing data in omop tables
#'
#' @param cdm A cdm object
#' @param omopTableName A character vector of the names of the tables to
#' summarise in the cdm object.
#' @param col A character vector of column names to check for missing values.
#' If `NULL`, all columns in the specified tables are checked. Default is `NULL`.
#' @param sex TRUE or FALSE. If TRUE code use will be summarised by sex.
#' @param year TRUE or FALSE. If TRUE code use will be summarised by year.
#' @param ageGroup A list of ageGroup vectors of length two. Code use will be
#' thus summarised by age groups.
#' @return A summarised_result object with results overall and, if specified, by
#' strata.
#' @export
summariseMissingData <- function(cdm,
omopTableName,
col = NULL,
sex = FALSE,
year = FALSE,
ageGroup = NULL){


omopgenerics::validateCdmArgument(cdm)

omopgenerics::assertLogical(sex, length = 1)
omopgenerics::assertChoice(omopTableName,choices = omopgenerics::omopTables(), unique = TRUE)


ageGroup <- omopgenerics::validateAgeGroupArgument(ageGroup, ageGroupName = "")[[1]]

strata <- my_getStrataList(sex = sex, ageGroup = ageGroup, year = year)
stratification <- c(list(character()),omopgenerics::combineStrata(strata))

result_tables <- purrr::map(omopTableName, function(table) {

if (omopgenerics::isTableEmpty(cdm[[table]])){
cli::cli_warn(paste0(table, " omop table is empty."))
return(NULL)
}

omopTable <- cdm[[table]]
col_table <- intersect(col, colnames(omopTable))
if (is.null(col_table) | rlang::is_empty(col_table)){
col_table<-colnames(omopTable)
}

indexDate <- startDate(omopgenerics::tableName(omopTable))
x <- omopTable |> PatientProfiles::addDemographicsQuery(age = FALSE, ageGroup = ageGroup, sex = sex, indexDate = indexDate)
if (year){
x <- x|> dplyr::mutate(year = as.character(clock::get_year(.data[[indexDate]])))
}

result_columns <- purrr::map(col_table, function(c) {

stratified_result <- x |>
dplyr::group_by(dplyr::across(dplyr::all_of(strata))) |>
dplyr::summarise(
na_count = sum(as.integer(is.na(.data[[c]])), na.rm = TRUE),
total_count = dplyr::n(),
.groups = "drop"
) |>
dplyr::collect()

# Group results for each level of stratification
grouped_results <- purrr::map(stratification, function(g) {
stratified_result |>
dplyr::group_by(dplyr::across(dplyr::all_of(g))) |>
dplyr::summarise(
na_count = sum(.data$na_count, na.rm = TRUE),
total_count = sum(.data$total_count, na.rm = TRUE),
colName = c,
.groups = "drop"
) |>
dplyr::mutate(na_percentage = dplyr::if_else(.data$total_count > 0, (.data$na_count / .data$total_count) * 100, 0))
})

return(purrr::reduce(grouped_results, dplyr::bind_rows))

})

res <- purrr::reduce(result_columns, dplyr::union)|>
dplyr::mutate(omop_table = table)

warningDataRequire(cdm = cdm, res = res, table = table)

return(res)
})
if (rlang::is_empty(purrr::compact(result_tables))){
return(omopgenerics::emptySummarisedResult())
}


result <- purrr::compact(result_tables) |>
purrr::reduce(dplyr::union)|>
dplyr::mutate(dplyr::across(dplyr::all_of(strata), ~ dplyr::coalesce(., "overall")))|>
dplyr::mutate(
na_count = as.double(.data$na_count), # Cast na_count to double
na_percentage = as.double(.data$na_percentage)
)|>
tidyr::pivot_longer(
cols = c(.data$na_count, .data$na_percentage),
names_to = "estimate_name",
values_to = "estimate_value"
)


sr <- result |>
dplyr::mutate(
result_id = 1L,
cdm_name = omopgenerics::cdmName(cdm),
) |>
visOmopResults::uniteGroup(cols = "omop_table") |>
visOmopResults::uniteStrata(cols = strata) |>
visOmopResults::uniteAdditional() |>
dplyr::mutate(
"estimate_value" = as.character(.data$estimate_value),
"estimate_type" = "integer",
"variable_level" = NA_character_
) |>
dplyr::rename("variable_name" = "colName") |>
dplyr::select(!c(.data$total_count))

settings <- dplyr::tibble(
result_id = unique(sr$result_id),
package_name = "omopSketch",
package_version = as.character(utils::packageVersion("OmopSketch")),
result_type = "summarise_missing_data"
)
sr <- sr |>
omopgenerics::newSummarisedResult(settings = settings)


return(sr)

}

warningDataRequire <- function(cdm, table, res){
required_cols <- omopgenerics::omopTableFields(CDMConnector::cdmVersion(cdm))|>
dplyr::filter(.data$cdm_table_name==table)|>
dplyr::filter(.data$is_required==TRUE)|>
dplyr::pull(.data$cdm_field_name)
warning_columns <- res |>
dplyr::filter(.data$colName %in% required_cols)|>
dplyr::filter(.data$na_count>0)|>
dplyr::distinct(.data$colName)|>
dplyr::pull()

if (length(warning_columns) > 0) {
cli::cli_warn(c(
"These columns contain missing values, which are not permitted:",
"{.val {warning_columns}}"
))
}
}


Loading

0 comments on commit 2d44f1d

Please sign in to comment.