diff --git a/NAMESPACE b/NAMESPACE index edeb014b3..41c62aa02 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,6 +62,7 @@ S3method(plot,tbl_regression) S3method(plot,tbl_uvregression) S3method(print,gtsummary) S3method(print,tbl_split) +S3method(tbl_filter,tbl_hierarchical) S3method(tbl_regression,brmsfit) S3method(tbl_regression,crr) S3method(tbl_regression,default) @@ -77,6 +78,7 @@ S3method(tbl_regression,multinom) S3method(tbl_regression,stanreg) S3method(tbl_regression,survreg) S3method(tbl_regression,workflow) +S3method(tbl_sort,tbl_hierarchical) S3method(tbl_split,gtsummary) S3method(tbl_survfit,data.frame) S3method(tbl_survfit,list) @@ -210,11 +212,13 @@ export(tbl_butcher) export(tbl_continuous) export(tbl_cross) export(tbl_custom_summary) +export(tbl_filter) export(tbl_hierarchical) export(tbl_hierarchical_count) export(tbl_likert) export(tbl_merge) export(tbl_regression) +export(tbl_sort) export(tbl_split) export(tbl_stack) export(tbl_strata) diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R new file mode 100644 index 000000000..56b968041 --- /dev/null +++ b/R/filter_tbl_hierarchical.R @@ -0,0 +1,125 @@ +#' Filter Hierarchical Tables +#' +#' @description `r lifecycle::badge('experimental')`\cr +#' +#' This function is used to filter hierarchical table rows by frequency row sum. +#' +#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr +#' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. +#' @param t (scalar `numeric`)\cr +#' Threshold used to determine which rows will be retained. +#' @param gt (scalar `logical`)\cr +#' Whether to filter for row sums greater than `t` or less than `t`. Default is greater than (`gt = TRUE`). +#' @param eq (scalar `logical`)\cr +#' Whether to include the value of `t` in the filtered range, i.e. whether to use exclusive comparators (`>`, `<`) or +#' inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`. +#' @param .stat (`string`)\cr +#' Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. +#' Default is `"n"`. +#' @inheritParams rlang::args_dots_empty +#' +#' @return A `gtsummary` of the same class as `x`. +#' +#' @name filter_tbl_hierarchical +#' @seealso [tbl_sort()] +#' +#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true")) +#' ADAE_subset <- cards::ADAE |> +#' dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) +#' +#' tbl <- tbl_hierarchical( +#' data = ADAE_subset, +#' variables = c(SEX, RACE, AETERM), +#' by = TRTA, +#' denominator = cards::ADSL |> mutate(TRTA = ARM), +#' id = USUBJID, +#' overall_row = TRUE +#' ) +#' +#' # Example 1 - Row Sums > 10 ------------------ +#' tbl_filter(tbl, t = 10) +#' +#' # Example 2 - Row Sums <= 5 ------------------ +#' tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE) +NULL + +#' @rdname filter_tbl_hierarchical +#' @export +tbl_filter <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + + UseMethod("tbl_filter") +} + +#' @export +#' @rdname filter_tbl_hierarchical +tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...) { + set_cli_abort_call() + + # process and check inputs ---------------------------------------------------------------------- + check_numeric(t) + check_scalar_logical(gt) + check_scalar_logical(eq) + check_string(.stat) + + outer_cols <- sapply( + x$table_body |> select(cards::all_ard_groups("names")), + function(x) dplyr::last(unique(stats::na.omit(x))) + ) + + # get row sums ---------------------------------------------------------------------------------- + x <- .append_hierarchy_row_sums(x, .stat) + + # keep all summary rows (removed later if no sub-rows are kept) + if (!gt) x$table_body$sum_row[x$table_body$variable %in% outer_cols] <- t - 1 + + # create and apply filtering expression --------------------------------------------------------- + filt_expr <- paste( + "sum_row", + dplyr::case_when( + gt && eq ~ ">=", + !gt && eq ~ "<=", + !gt ~ "<", + TRUE ~ ">" + ), + t + ) + x$table_body <- x$table_body |> + dplyr::filter(!!parse_expr(filt_expr)) + + # remove any summary rows with no sub-rows still present ---------------------------------------- + if (!gt) { + for (i in rev(seq_along(outer_cols))) { + gp_empty <- x$table_body |> + dplyr::group_by(across(c(names(outer_cols[1:i]), paste0(names(outer_cols[1:i]), "_level")))) |> + dplyr::summarize(is_empty := dplyr::n() == 1) |> + stats::na.omit() + + if (!all(!gp_empty$is_empty)) { + x$table_body <- x$table_body |> + dplyr::left_join( + gp_empty, + by = gp_empty |> select(cards::all_ard_groups()) |> names() + ) |> + dplyr::filter(!is_empty | is.na(is_empty)) |> + dplyr::select(-"is_empty") + } else { + break + } + } + if (nrow(x$table_body) > 0) { + cli::cli_inform( + "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept + regardless of whether they meet the filtering criteria themselves.", + .frequency = "once", + .frequency_id = "sum_rows_lt" + ) + } + } + + x$table_body <- x$table_body |> + dplyr::select(-"sum_row") + + x +} diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R new file mode 100644 index 000000000..623ac1b7f --- /dev/null +++ b/R/sort_tbl_hierarchical.R @@ -0,0 +1,235 @@ +#' Sort Hierarchical Tables +#' +#' @description `r lifecycle::badge('experimental')`\cr +#' +#' This function is used to sort hierarchical tables. Options for sorting criteria are: +#' +#' 1. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are +#' ordered accordingly (default). +#' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables +#' in ascending alphanumeric order (i.e. A to Z). +#' +#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr +#' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. +#' @param sort (`string`)\cr +#' Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`. +#' @param desc (scalar `logical`)\cr +#' Whether to sort rows in ascending or descending order. Default is descending (`TRUE`) when `sort = "frequency"` +#' and ascending (`FALSE`) when `sort = "alphanumeric"`. +#' @param .stat (`string`)\cr +#' Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for +#' all hierarchy levels. Default is `"n"`. +#' @inheritParams rlang::args_dots_empty +#' +#' @return A `gtsummary` of the same class as `x`. +#' +#' @name sort_tbl_hierarchical +#' @seealso [tbl_filter()] +#' +#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true")) +#' ADAE_subset <- cards::ADAE |> +#' dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) +#' +#' tbl <- tbl_hierarchical( +#' data = ADAE_subset, +#' variables = c(SEX, RACE, AETERM), +#' by = TRTA, +#' denominator = cards::ADSL |> mutate(TRTA = ARM), +#' id = USUBJID, +#' overall_row = TRUE +#' ) +#' +#' # Example 1 - Descending Frequency Sort ------------------ +#' tbl_sort(tbl) +#' +#' # Example 2 - Descending Alphanumeric Sort (Z to A) ------ +#' tbl_sort(tbl, sort = "alphanumeric", desc = TRUE) +NULL + +#' @rdname sort_tbl_hierarchical +#' @export +tbl_sort <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + + UseMethod("tbl_sort") +} + +#' @rdname sort_tbl_hierarchical +#' @export +tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...) { + set_cli_abort_call() + + # process and check inputs ---------------------------------------------------------------------- + check_scalar_logical(desc) + check_string(.stat) + + if (!sort %in% c("frequency", "alphanumeric")) { + cli::cli_abort( + "The {.arg sort} argument must be either {.val frequency} or {.val alphanumeric}.", + call = get_cli_abort_call() + ) + } + + overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable + outer_cols <- sapply( + x$table_body |> select(cards::all_ard_groups("names")), + function(x) dplyr::last(unique(stats::na.omit(x))) + ) + inner_col <- setdiff( + x$table_body$variable, + x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() + ) + + if (sort == "alphanumeric") { + # summary rows remain at the top of each sub-section + rep_str <- if (desc) "zzzz" else " " + + # overall row always appears first + if (desc && overall) { + ovrl_row <- x$table_body[1, ] + x$table_body <- x$table_body[-1, ] + } + + # sort by label ------------------------------------------------------------------------------- + sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label") + + x$table_body <- x$table_body |> + dplyr::rowwise() |> + dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |> + dplyr::ungroup() |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |> + dplyr::arrange(across(all_of(sort_cols), ~ if (desc) dplyr::desc(.x) else .x)) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |> + select(-"inner_var") + + if (desc && overall) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body) + } else { + # get row sums -------------------------------------------------------------------------------- + x <- .append_hierarchy_row_sums(x, .stat) + + # append outer hierarchy level sums in each row to sort at all levels ------------------------- + for (g in names(outer_cols)) { + x$table_body <- x$table_body |> dplyr::group_by(across(all_of(c(g, paste0(g, "_level")))), .add = TRUE) + x$table_body <- x$table_body |> + dplyr::left_join( + x$table_body |> + dplyr::summarize(!!paste0("sum_", g) := dplyr::first(.data$sum_row)), + by = x$table_body |> dplyr::group_vars() + ) + } + + # summary rows remain at the top of each sub-section + x$table_body <- x$table_body |> + dplyr::ungroup() |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., " "))) |> + dplyr::rowwise() |> + dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) " " else .data$variable) |> + dplyr::ungroup() + + # sort by row sum ----------------------------------------------------------------------------- + sort_cols <- c(rbind( + x$table_body |> select(cards::all_ard_groups("names")) |> names(), + x$table_body |> select(starts_with("sum_group")) |> names(), + x$table_body |> select(cards::all_ard_groups("levels")) |> names() + ), "inner_var", "sum_row", "label") + + x$table_body <- x$table_body |> + dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |> + select(-starts_with("sum_"), -"inner_var") + } + + x +} + +.append_hierarchy_row_sums <- function(x, .stat) { + cards <- x$cards$tbl_hierarchical + + if (!.stat %in% cards$stat_name) { + cli::cli_abort( + "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid + statistic options see the {.val stat_name} column of {.code x$cards$tbl_hierarchical}.", + call = get_cli_abort_call() + ) + } + + by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA + outer_cols <- sapply( + x$table_body |> select(cards::all_ard_groups("names")), + function(x) dplyr::last(unique(stats::na.omit(x))) + ) + + # update logical variable_level entries from overall row to character + cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |> + dplyr::filter(.data$variable == "..ard_hierarchical_overall..") |> + dplyr::pull("label") |> + as.list() + + # extract row sums ------------------------------------------------------------------------------ + cards <- cards |> + dplyr::filter(.data$stat_name == .stat, .data$variable %in% x$table_body$variable) |> + dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> + dplyr::summarise(sum_row = sum(unlist(.data$stat))) |> + dplyr::ungroup() |> + dplyr::rename(label = "variable_level") |> + tidyr::unnest(cols = everything()) + + # match cards names to x$table_body ------------------------------------------------------------- + if (length(by_cols) > 1) { + names(cards)[grep("group", names(cards))] <- x$table_body |> + select(cards::all_ard_groups()) |> + names() + } + cards[cards$variable == "..ard_hierarchical_overall..", 1] <- "..ard_hierarchical_overall.." + + # fill in NAs to align cards layout with x$table_body ------------------------------------------- + cards <- cards |> + dplyr::rowwise() |> + dplyr::mutate(across( + cards::all_ard_groups(), + ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && .data$variable == outer_cols[dplyr::cur_column()]) { + .data$variable + } else if (is.na(.x) && .data$variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) { + .data$label + } else { + .x + } + )) + + # for any variables not in include, calculate group sums ---------------------------------------- + if (!all(outer_cols %in% cards$variable)) { + gp_vars <- outer_cols[outer_cols %in% setdiff(outer_cols, cards$variable)] + gp_cols <- names(gp_vars) + + cli::cli_inform( + "Not all hierarchy variables present in the table were included in the {.arg include} argument. + These variables ({gp_vars}) do not have event rate data available so the total sum of the event + rates for this hierarchy section will be used instead. To use event rates for all sections of the table, + set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." + ) + + for (i in seq_along(gp_cols)) { + cards <- cards |> + dplyr::bind_rows( + cards |> + dplyr::filter(.data$variable != "..ard_hierarchical_overall..") |> + dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |> + dplyr::summarize(sum_row = sum(.data$sum_row)) |> + dplyr::mutate( + variable = .data[[gp_cols[i]]], + label = .data[[paste0(gp_cols[i], "_level")]] + ) + ) + } + } + + # append row sums to x$table_body --------------------------------------------------------------- + x$table_body <- x$table_body |> + dplyr::left_join( + cards, + by = c(cards |> select(-"sum_row") |> names()) + ) + + x +} diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R index 789244787..908242541 100644 --- a/R/tbl_hierarchical.R +++ b/R/tbl_hierarchical.R @@ -216,6 +216,9 @@ internal_tbl_hierarchical <- function(data, if ("..ard_hierarchical_overall.." %in% variables) { cli::cli_abort("The {.arg variables} argument cannot include a column named {.val ..ard_hierarchical_overall..}.") } + if (!all(variables == unique(variables))) { + cli::cli_abort("The {.arg variables} argument cannot contain repeated variables.") + } # evaluate tidyselect cards::process_selectors(data[variables], include = {{ include }}) diff --git a/inst/WORDLIST b/inst/WORDLIST index d553a93aa..5b59a5c15 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -46,6 +46,7 @@ cardx cli codebase coef +comparators conf coxph customizability diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd new file mode 100644 index 000000000..a84a127f9 --- /dev/null +++ b/man/filter_tbl_hierarchical.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter_tbl_hierarchical.R +\name{filter_tbl_hierarchical} +\alias{filter_tbl_hierarchical} +\alias{tbl_filter} +\alias{tbl_filter.tbl_hierarchical} +\title{Filter Hierarchical Tables} +\usage{ +tbl_filter(x, ...) + +\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...) +} +\arguments{ +\item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr +A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{t}{(scalar \code{numeric})\cr +Threshold used to determine which rows will be retained.} + +\item{gt}{(scalar \code{logical})\cr +Whether to filter for row sums greater than \code{t} or less than \code{t}. Default is greater than (\code{gt = TRUE}).} + +\item{eq}{(scalar \code{logical})\cr +Whether to include the value of \code{t} in the filtered range, i.e. whether to use exclusive comparators (\code{>}, \code{<}) or +inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.} + +\item{.stat}{(\code{string})\cr +Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. +Default is \code{"n"}.} +} +\value{ +A \code{gtsummary} of the same class as \code{x}. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr + +This function is used to filter hierarchical table rows by frequency row sum. +} +\examples{ +\dontshow{if ((identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +ADAE_subset <- cards::ADAE |> + dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]) + +tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE +) + +# Example 1 - Row Sums > 10 ------------------ +tbl_filter(tbl, t = 10) + +# Example 2 - Row Sums <= 5 ------------------ +tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE) +\dontshow{\}) # examplesIf} +} +\seealso{ +\code{\link[=tbl_sort]{tbl_sort()}} +} diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd new file mode 100644 index 000000000..45c165614 --- /dev/null +++ b/man/sort_tbl_hierarchical.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sort_tbl_hierarchical.R +\name{sort_tbl_hierarchical} +\alias{sort_tbl_hierarchical} +\alias{tbl_sort} +\alias{tbl_sort.tbl_hierarchical} +\title{Sort Hierarchical Tables} +\usage{ +tbl_sort(x, ...) + +\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...) +} +\arguments{ +\item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr +A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.} + +\item{...}{These dots are for future extensions and must be empty.} + +\item{sort}{(\code{string})\cr +Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.} + +\item{desc}{(scalar \code{logical})\cr +Whether to sort rows in ascending or descending order. Default is descending (\code{TRUE}) when \code{sort = "frequency"} +and ascending (\code{FALSE}) when \code{sort = "alphanumeric"}.} + +\item{.stat}{(\code{string})\cr +Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for +all hierarchy levels. Default is \code{"n"}.} +} +\value{ +A \code{gtsummary} of the same class as \code{x}. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr + +This function is used to sort hierarchical tables. Options for sorting criteria are: +\enumerate{ +\item Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are +ordered accordingly (default). +\item Alphanumeric - rows are ordered alphanumerically by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}} sorts tables +in ascending alphanumeric order (i.e. A to Z). +} +} +\examples{ +\dontshow{if ((identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +ADAE_subset <- cards::ADAE |> + dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]) + +tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE +) + +# Example 1 - Descending Frequency Sort ------------------ +tbl_sort(tbl) + +# Example 2 - Descending Alphanumeric Sort (Z to A) ------ +tbl_sort(tbl, sort = "alphanumeric", desc = TRUE) +\dontshow{\}) # examplesIf} +} +\seealso{ +\code{\link[=tbl_filter]{tbl_filter()}} +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 5bbafe8d7..148701846 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -86,6 +86,8 @@ reference: - subtitle: Hierarchical Summary Tables - contents: - tbl_hierarchical + - tbl_sort.tbl_hierarchical + - tbl_filter.tbl_hierarchical - add_overall.tbl_hierarchical - subtitle: Likert Summary Tables - contents: diff --git a/tests/testthat/_snaps/filter_tbl_hierarchical.md b/tests/testthat/_snaps/filter_tbl_hierarchical.md new file mode 100644 index 000000000..237023eac --- /dev/null +++ b/tests/testthat/_snaps/filter_tbl_hierarchical.md @@ -0,0 +1,59 @@ +# tbl_filter.tbl_hierarchical() works + + Code + as.data.frame(tbl) + Output + **Sex** \n    **Race** \n        **Reported Term for the Adverse Event** **Placebo** \nN = 86 **Xanomeline High Dose** \nN = 84 **Xanomeline Low Dose** \nN = 84 + 1 Number of patients with event 26 (30%) 42 (50%) 40 (48%) + 2 F 13 (25%) 18 (45%) 23 (46%) + 3 WHITE 10 (21%) 14 (41%) 20 (45%) + 4 APPLICATION SITE ERYTHEMA 2 (4.2%) 5 (15%) 5 (11%) + 5 APPLICATION SITE PRURITUS 2 (4.2%) 8 (24%) 10 (23%) + 6 ERYTHEMA 6 (13%) 6 (18%) 8 (18%) + 7 M 13 (39%) 24 (55%) 17 (50%) + 8 WHITE 12 (40%) 22 (55%) 17 (50%) + 9 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%) + 10 APPLICATION SITE PRURITUS 1 (3.3%) 12 (30%) 10 (29%) + 11 DIARRHOEA 6 (20%) 3 (7.5%) 2 (5.9%) + 12 ERYTHEMA 3 (10%) 5 (13%) 6 (18%) + +# tbl_filter.tbl_hierarchical() error messaging works + + Code + tbl_filter(data.frame(), t = 10) + Condition + Error in `check_class()`: + ! The `x` argument must be class , not a data frame. + +--- + + Code + tbl_filter(tbl, t = "10") + Condition + Error in `tbl_filter()`: + ! The `t` argument must be numeric. + +--- + + Code + tbl_filter(tbl, t = "10", gt = "yes") + Condition + Error in `tbl_filter()`: + ! The `t` argument must be numeric. + +--- + + Code + tbl_filter(tbl, t = "10", eq = "no") + Condition + Error in `tbl_filter()`: + ! The `t` argument must be numeric. + +--- + + Code + tbl_filter(tbl, t = "10", .stat = "pct") + Condition + Error in `tbl_filter()`: + ! The `t` argument must be numeric. + diff --git a/tests/testthat/_snaps/sort_tbl_hierarchical.md b/tests/testthat/_snaps/sort_tbl_hierarchical.md new file mode 100644 index 000000000..99a9f5f65 --- /dev/null +++ b/tests/testthat/_snaps/sort_tbl_hierarchical.md @@ -0,0 +1,56 @@ +# tbl_sort.tbl_hierarchical() works + + Code + as.data.frame(tbl) + Output + **Sex** \n    **Race** \n        **Reported Term for the Adverse Event** **Placebo** \nN = 86 **Xanomeline High Dose** \nN = 84 **Xanomeline Low Dose** \nN = 84 + 1 Number of patients with event 26 (30%) 42 (50%) 40 (48%) + 2 F 13 (25%) 18 (45%) 23 (46%) + 3 WHITE 10 (21%) 14 (41%) 20 (45%) + 4 APPLICATION SITE PRURITUS 2 (4.2%) 8 (24%) 10 (23%) + 5 ERYTHEMA 6 (13%) 6 (18%) 8 (18%) + 6 APPLICATION SITE ERYTHEMA 2 (4.2%) 5 (15%) 5 (11%) + 7 DIARRHOEA 2 (4.2%) 0 (0%) 3 (6.8%) + 8 BLACK OR AFRICAN AMERICAN 3 (60%) 4 (67%) 3 (50%) + 9 APPLICATION SITE PRURITUS 2 (40%) 2 (33%) 2 (33%) + 10 ERYTHEMA 0 (0%) 1 (17%) 1 (17%) + 11 ATRIOVENTRICULAR BLOCK SECOND DEGREE 0 (0%) 1 (17%) 0 (0%) + 12 DIARRHOEA 1 (20%) 0 (0%) 0 (0%) + 13 M 13 (39%) 24 (55%) 17 (50%) + 14 WHITE 12 (40%) 22 (55%) 17 (50%) + 15 APPLICATION SITE PRURITUS 1 (3.3%) 12 (30%) 10 (29%) + 16 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%) + 17 ERYTHEMA 3 (10%) 5 (13%) 6 (18%) + 18 DIARRHOEA 6 (20%) 3 (7.5%) 2 (5.9%) + 19 ATRIOVENTRICULAR BLOCK SECOND DEGREE 2 (6.7%) 2 (5.0%) 0 (0%) + 20 BLACK OR AFRICAN AMERICAN 1 (33%) 1 (33%) 0 (NA%) + 21 APPLICATION SITE PRURITUS 1 (33%) 0 (0%) 0 (NA%) + 22 DIARRHOEA 0 (0%) 1 (33%) 0 (NA%) + 23 ERYTHEMA 0 (0%) 1 (33%) 0 (NA%) + 24 AMERICAN INDIAN OR ALASKA NATIVE 0 (NA%) 1 (100%) 0 (NA%) + 25 ERYTHEMA 0 (NA%) 1 (100%) 0 (NA%) + +# tbl_sort.tbl_hierarchical() error messaging works + + Code + tbl_sort(data.frame()) + Condition + Error in `check_class()`: + ! The `x` argument must be class , not a data frame. + +--- + + Code + tbl_sort(tbl, sort = "no_sorting") + Condition + Error in `tbl_sort()`: + ! The `sort` argument must be either "frequency" or "alphanumeric". + +--- + + Code + tbl_sort(tbl, .stat = "mean") + Condition + Error in `tbl_sort()`: + ! The `.stat` argument is "mean" but this statistic is not present in `x`. For all valid statistic options see the "stat_name" column of `x$cards$tbl_hierarchical`. + diff --git a/tests/testthat/test-filter_tbl_hierarchical.R b/tests/testthat/test-filter_tbl_hierarchical.R new file mode 100644 index 000000000..13e0906cc --- /dev/null +++ b/tests/testthat/test-filter_tbl_hierarchical.R @@ -0,0 +1,138 @@ +skip_on_cran() + +ADAE_subset <- cards::ADAE |> + dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) + +tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE +) + +test_that("tbl_filter.tbl_hierarchical() works", { + withr::local_options(width = 200) + + # no errors + expect_silent(tbl <- tbl_filter(tbl, t = 10)) + expect_snapshot(tbl |> as.data.frame()) + + # .stat argument works + expect_silent(tbl <- tbl_filter(tbl, t = 10, .stat = "p")) +}) + +test_that("tbl_filter.tbl_hierarchical(gt) works", { + # gt = TRUE + expect_silent(tbl_gt <- tbl_filter(tbl, t = 10)) + + # gt = FALSE + expect_message(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE)) + + expect_equal( + dplyr::inner_join( + tbl_gt$table_body, + tbl_lt$table_body, + by = names(tbl_gt$table_body) + ) |> + dplyr::filter(variable == "AETERM") |> + nrow(), + 0 + ) + + expect_equal( + sum( + tbl_gt$table_body |> + dplyr::filter(variable == "AETERM") |> + nrow(), + tbl_lt$table_body |> + dplyr::filter(variable == "AETERM") |> + nrow() + ), + tbl$table_body |> + dplyr::filter(variable == "AETERM") |> + nrow() + ) +}) + +test_that("tbl_filter.tbl_hierarchical(eq) works", { + # gt = TRUE, eq = FALSE + expect_silent(tbl_gt <- tbl_filter(tbl, t = 12)) + + # gt = TRUE, eq = TRUE + expect_silent(tbl_geq <- tbl_filter(tbl, t = 12, eq = TRUE)) + expect_gt(nrow(tbl_geq$table_body), nrow(tbl_gt$table_body)) + + # gt = FALSE, eq = FALSE + expect_silent(tbl_lt <- tbl_filter(tbl, t = 12, gt = FALSE)) + + # gt = TRUE, eq = TRUE + expect_silent(tbl_leq <- tbl_filter(tbl, t = 12, gt = FALSE, eq = TRUE)) + expect_lt(nrow(tbl_lt$table_body), nrow(tbl_leq$table_body)) +}) + +test_that("tbl_filter.tbl_hierarchical() returns empty table when all rows filtered out", { + expect_silent(tbl <- tbl_filter(tbl, t = 200)) + expect_equal(nrow(tbl$table_body), 0) +}) + +test_that("tbl_filter.tbl_hierarchical() works with only one variable in x", { + tbl_single <- tbl_hierarchical( + data = ADAE_subset, + variables = AETERM, + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE + ) + + expect_silent(tbl_single <- tbl_filter(tbl_single, t = 20)) + expect_equal(nrow(tbl_single$table_body), 4) +}) + +test_that("tbl_filter.tbl_hierarchical() works when some variables not included in x", { + tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + include = c(SEX, AETERM), + overall_row = TRUE + ) + + expect_message(tbl_filter(tbl, t = 10)) +}) + +test_that("tbl_filter.tbl_hierarchical() error messaging works", { + # invalid x input + expect_snapshot( + tbl_filter(data.frame(), t = 10), + error = TRUE + ) + + # invalid t input + expect_snapshot( + tbl_filter(tbl, t = "10"), + error = TRUE + ) + + # invalid gt input + expect_snapshot( + tbl_filter(tbl, t = "10", gt = "yes"), + error = TRUE + ) + + # invalid eq input + expect_snapshot( + tbl_filter(tbl, t = "10", eq = "no"), + error = TRUE + ) + + # invalid .stat input + expect_snapshot( + tbl_filter(tbl, t = "10", .stat = "pct"), + error = TRUE + ) +}) diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-sort_tbl_hierarchical.R new file mode 100644 index 000000000..7c61ea284 --- /dev/null +++ b/tests/testthat/test-sort_tbl_hierarchical.R @@ -0,0 +1,206 @@ +skip_on_cran() + +ADAE_subset <- cards::ADAE |> + dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) + +tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE +) + +test_that("tbl_sort.tbl_hierarchical() works", { + withr::local_options(width = 200) + + # no errors + expect_silent(tbl <- tbl_sort(tbl)) + expect_snapshot(tbl |> as.data.frame()) + + # .stat argument works + expect_silent(tbl <- tbl_sort(tbl, .stat = "p")) +}) + +test_that("tbl_sort.tbl_hierarchical(sort = 'frequency') works", { + # descending frequency (default) + expect_silent(tbl <- tbl_sort(tbl)) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "SEX") |> + dplyr::pull(label), + c("F", "M") + ) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "RACE") |> + dplyr::pull(label), + c("WHITE", "BLACK OR AFRICAN AMERICAN", "WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE") + ) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + c( + "APPLICATION SITE PRURITUS", "ERYTHEMA", "APPLICATION SITE ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS", + "ERYTHEMA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "APPLICATION SITE PRURITUS", + "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", + "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "ERYTHEMA" + ) + ) + + # ascending frequency + expect_silent(tbl <- tbl_sort(tbl, desc = FALSE)) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "SEX") |> + dplyr::pull(label), + c("F", "M") # F and M have equal numbers + ) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "RACE") |> + dplyr::pull(label), + c("BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE", "BLACK OR AFRICAN AMERICAN", "WHITE") + ) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + c( + "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", "APPLICATION SITE PRURITUS", "DIARRHOEA", + "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", "APPLICATION SITE PRURITUS", + "DIARRHOEA", "ERYTHEMA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", + "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS" + ) + ) +}) + +test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", { + # descending (Z to A) + expect_silent(result <- tbl_sort(tbl, sort = "alphanumeric", desc = TRUE)) + expect_equal( + result$table_body |> + dplyr::filter(variable == "SEX") |> + dplyr::pull(label), + c("M", "F") + ) + expect_equal( + result$table_body |> + dplyr::filter(variable == "RACE") |> + dplyr::pull(label), + c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "WHITE", "BLACK OR AFRICAN AMERICAN") + ) + expect_equal( + result$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + c( + "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS", + "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", + "DIARRHOEA", "APPLICATION SITE PRURITUS", "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", + "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS" + ) + ) + + # ascending (A to Z) + expect_silent(result <- tbl_sort(result, sort = "alphanumeric")) + + # results match with tbl_hierarchical which sorts A to Z by default + expect_equal( + result |> as.data.frame(), + tbl |> as.data.frame() + ) +}) + +test_that("tbl_sort.tbl_hierarchical() works when there is no overall row in x", { + tbl_no_overall <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = FALSE + ) + + # sort = 'frequency' + expect_silent(tbl_no_overall <- tbl_sort(tbl_no_overall)) + expect_equal( + tbl_no_overall$table_body, + tbl_sort(tbl)$table_body[-1, ] + ) + + # sort = 'alphanumeric' + expect_silent(tbl_no_overall <- tbl_sort(tbl_no_overall, sort = "alphanumeric")) + expect_equal( + tbl_no_overall$table_body, + tbl$table_body[-1, ] + ) +}) + +test_that("tbl_sort.tbl_hierarchical() works with only one variable in x", { + tbl_single <- tbl_hierarchical( + data = ADAE_subset, + variables = AETERM, + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE + ) + + # sort = 'frequency' + expect_silent(tbl_single <- tbl_sort(tbl_single)) + expect_equal( + tbl_single$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + c( + "APPLICATION SITE PRURITUS", "ERYTHEMA", "APPLICATION SITE ERYTHEMA", "DIARRHOEA", + "ATRIOVENTRICULAR BLOCK SECOND DEGREE" + ) + ) + + # sort = 'alphanumeric' + expect_silent(tbl_single <- tbl_sort(tbl_single, sort = "alphanumeric")) + expect_equal( + tbl_single$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + sort(unique(ADAE_subset$AETERM)) + ) +}) + +test_that("tbl_sort.tbl_hierarchical() works when some variables not included in x", { + tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + include = c(SEX, AETERM), + overall_row = TRUE + ) + + expect_message(tbl_sort(tbl)) +}) + +test_that("tbl_sort.tbl_hierarchical() error messaging works", { + # invalid x input + expect_snapshot( + tbl_sort(data.frame()), + error = TRUE + ) + + # invalid sort input + expect_snapshot( + tbl_sort(tbl, sort = "no_sorting"), + error = TRUE + ) + + # invalid .stat input + expect_snapshot( + tbl_sort(tbl, .stat = "mean"), + error = TRUE + ) +})