Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow Inf and near 0 CIs to plot in ggforest #582

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 104 additions & 71 deletions R/ggforest.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,72 +39,84 @@
#' @importFrom stats anova var

ggforest <- function(model, data = NULL,
main = "Hazard ratio", cpositions=c(0.02, 0.22, 0.4),
fontsize = 0.7, refLabel = "reference", noDigits=2) {
main = "Hazard ratio", cpositions = c(0.02, 0.22, 0.4),
fontsize = 0.7, refLabel = "reference", noDigits = 2) {
conf.high <- conf.low <- estimate <- NULL
stopifnot(inherits(model, "coxph"))

# get data and variables/terms from cox model
data <- .get_data(model, data = data)
data <- survminer:::.get_data(model, data = data)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

survminer::: should be removed

terms <- attr(model$terms, "dataClasses")[-1]
# removed as requested in #388
# terms <- terms[intersect(names(terms),
# gsub(rownames(anova(model))[-1], pattern = "`", replacement = ""))]
# removed as requested in #388
# terms <- terms[intersect(names(terms),
# gsub(rownames(anova(model))[-1], pattern = "`", replacement = ""))]

# use broom to get some required statistics
coef <- as.data.frame(tidy(model, conf.int = TRUE))
gmodel <- glance(model)
coef <- as.data.frame(broom::tidy(model, conf.int = TRUE))
gmodel <- broom::glance(model)

# extract statistics for every variable
allTerms <- lapply(seq_along(terms), function(i){
allTerms <- lapply(seq_along(terms), function(i) {
var <- names(terms)[i]
#### ADD with = FALSE because error during testing
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be a data.table version thing, data.table version is 1.14.0

if (terms[i] %in% c("factor", "character")) {
adf <- as.data.frame(table(data[, var]))
adf <- as.data.frame(table(data[, var, with = FALSE]))
cbind(var = var, adf, pos = 1:nrow(adf))
}
else if (terms[i] == "numeric") {
data.frame(var = var, Var1 = "", Freq = nrow(data),
pos = 1)
}
else {
vars = grep(paste0("^", var, "*."), coef$term, value=TRUE)
data.frame(var = vars, Var1 = "", Freq = nrow(data),
pos = seq_along(vars))
} else if (terms[i] == "numeric") {
data.frame(
var = var, Var1 = "", Freq = nrow(data),
pos = 1
)
} else {
vars <- grep(paste0("^", var, "*."), coef$term, value = TRUE)
data.frame(
var = vars, Var1 = "", Freq = nrow(data),
pos = seq_along(vars)
)
}
})
allTermsDF <- do.call(rbind, allTerms)
colnames(allTermsDF) <- c("var", "level", "N", "pos")
inds <- apply(allTermsDF[,1:2], 1, paste0, collapse="")
inds <- apply(allTermsDF[, 1:2], 1, paste0, collapse = "")

# use broom again to get remaining required statistics
rownames(coef) <- gsub(coef$term, pattern = "`", replacement = "")
toShow <- cbind(allTermsDF, coef[inds,])[,c("var", "level", "N", "p.value", "estimate", "conf.low", "conf.high", "pos")]
toShowExp <- toShow[,5:7]
toShow <- cbind(allTermsDF, coef[inds, ])[, c("var", "level", "N", "p.value", "estimate", "conf.low", "conf.high", "pos")]
toShowExp <- toShow[, 5:7]
toShowExp[is.na(toShowExp)] <- 0
toShowExp <- format(exp(toShowExp), digits=noDigits)
toShowExp <- format(exp(toShowExp), digits = noDigits)
toShowExpClean <- data.frame(toShow,
pvalue = signif(toShow[,4],noDigits+1),
toShowExp)
toShowExpClean$stars <- paste0(round(toShowExpClean$p.value, noDigits+1), " ",
ifelse(toShowExpClean$p.value < 0.05, "*",""),
ifelse(toShowExpClean$p.value < 0.01, "*",""),
ifelse(toShowExpClean$p.value < 0.001, "*",""))
toShowExpClean$ci <- paste0("(",toShowExpClean[,"conf.low.1"]," - ",toShowExpClean[,"conf.high.1"],")")
toShowExpClean$estimate.1[is.na(toShowExpClean$estimate)] = refLabel
toShowExpClean$stars[which(toShowExpClean$p.value < 0.001)] = "<0.001 ***"
toShowExpClean$stars[is.na(toShowExpClean$estimate)] = ""
toShowExpClean$ci[is.na(toShowExpClean$estimate)] = ""
toShowExpClean$estimate[is.na(toShowExpClean$estimate)] = 0
toShowExpClean$var = as.character(toShowExpClean$var)
toShowExpClean$var[duplicated(toShowExpClean$var)] = ""
pvalue = signif(toShow[, 4], noDigits + 1),
toShowExp
)
toShowExpClean$stars <- paste0(
round(toShowExpClean$p.value, noDigits + 1), " ",
ifelse(toShowExpClean$p.value < 0.05, "*", ""),
ifelse(toShowExpClean$p.value < 0.01, "*", ""),
ifelse(toShowExpClean$p.value < 0.001, "*", "")
)
toShowExpClean$ci <- paste0("(", toShowExpClean[, "conf.low.1"], " - ", toShowExpClean[, "conf.high.1"], ")")
toShowExpClean$estimate.1[is.na(toShowExpClean$estimate)] <- refLabel
toShowExpClean$stars[which(toShowExpClean$p.value < 0.001)] <- "<0.001 ***"
toShowExpClean$stars[is.na(toShowExpClean$estimate)] <- ""
toShowExpClean$ci[is.na(toShowExpClean$estimate)] <- ""
toShowExpClean$estimate[is.na(toShowExpClean$estimate)] <- 0
toShowExpClean$var <- as.character(toShowExpClean$var)
toShowExpClean$var[duplicated(toShowExpClean$var)] <- ""
# make label strings:
toShowExpClean$N <- paste0("(N=",toShowExpClean$N,")")
toShowExpClean$N <- paste0("(N=", toShowExpClean$N, ")")

#flip order
# flip order
toShowExpClean <- toShowExpClean[nrow(toShowExpClean):1, ]

# get min/max CI values to place where Inf would be
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get min/max CI values and replace near 0 and Inf values with those

ci_h <- toShowExpClean$conf.high[is.finite(as.numeric(toShowExpClean$conf.high.1))]
ci_l <- toShowExpClean$conf.low[!near(as.numeric(toShowExpClean$conf.low.1), 0)]
toShowExpClean$conf.high[is.infinite(as.numeric(toShowExpClean$conf.high.1))] <- max(ci_h, na.rm = TRUE)
toShowExpClean$conf.low[near(as.numeric(toShowExpClean$conf.low.1), 0)] <- min(ci_l, na.rm = TRUE)

rangeb <- range(toShowExpClean$conf.low, toShowExpClean$conf.high, na.rm = TRUE)
breaks <- axisTicks(rangeb/2, log = TRUE, nint = 7)
breaks <- axisTicks(rangeb / 2, log = TRUE, nint = 7)
rangeplot <- rangeb
# make plot twice as wide as needed to create space for annotations
rangeplot[1] <- rangeplot[1] - diff(rangeb)
Expand All @@ -113,20 +125,22 @@ ggforest <- function(model, data = NULL,

width <- diff(rangeplot)
# y-coordinates for labels:
y_variable <- rangeplot[1] + cpositions[1] * width
y_nlevel <- rangeplot[1] + cpositions[2] * width
y_cistring <- rangeplot[1] + cpositions[3] * width
y_variable <- rangeplot[1] + cpositions[1] * width
y_nlevel <- rangeplot[1] + cpositions[2] * width
y_cistring <- rangeplot[1] + cpositions[3] * width
y_stars <- rangeb[2]
x_annotate <- seq_len(nrow(toShowExpClean))

# geom_text fontsize is in mm (https://github.com/tidyverse/ggplot2/issues/1828)
annot_size_mm <- fontsize *
as.numeric(convertX(unit(theme_get()$text$size, "pt"), "mm"))
as.numeric(grid::convertX(unit(theme_get()$text$size, "pt"), "mm"))

p <- ggplot(toShowExpClean, aes(seq_along(var), exp(estimate))) +
geom_rect(aes(xmin = seq_along(var) - .5, xmax = seq_along(var) + .5,
geom_rect(aes(
xmin = seq_along(var) - .5, xmax = seq_along(var) + .5,
ymin = exp(rangeplot[1]), ymax = exp(rangeplot[2]),
fill = ordered(seq_along(var) %% 2 + 1))) +
fill = ordered(seq_along(var) %% 2 + 1)
)) +
scale_fill_manual(values = c("#FFFFFF33", "#00000033"), guide = "none") +
geom_point(pch = 15, size = 4) +
geom_errorbar(aes(ymin = exp(conf.low), ymax = exp(conf.high)), width = 0.15) +
Expand All @@ -137,45 +151,64 @@ ggforest <- function(model, data = NULL,
name = "",
labels = sprintf("%g", breaks),
expand = c(0.02, 0.02),
breaks = breaks) +
breaks = breaks
) +
theme_light() +
theme(panel.grid.minor.y = element_blank(),
theme(
panel.grid.minor.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none",
panel.border=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
plot.title = element_text(hjust = 0.5)) +
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.title = element_text(hjust = 0.5)
) +
xlab("") +
annotate(geom = "text", x = x_annotate, y = exp(y_variable),
annotate(
geom = "text", x = x_annotate, y = exp(y_variable),
label = toShowExpClean$var, fontface = "bold", hjust = 0,
size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_nlevel), hjust = 0,
label = toShowExpClean$level, vjust = -0.1, size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_nlevel),
size = annot_size_mm
) +
annotate(
geom = "text", x = x_annotate, y = exp(y_nlevel), hjust = 0,
label = toShowExpClean$level, vjust = -0.1, size = annot_size_mm
) +
annotate(
geom = "text", x = x_annotate, y = exp(y_nlevel),
label = toShowExpClean$N, fontface = "italic", hjust = 0,
vjust = ifelse(toShowExpClean$level == "", .5, 1.1),
size = annot_size_mm) +
annotate(geom = "text", x = x_annotate, y = exp(y_cistring),
size = annot_size_mm
) +
annotate(
geom = "text", x = x_annotate, y = exp(y_cistring),
label = toShowExpClean$estimate.1, size = annot_size_mm,
vjust = ifelse(toShowExpClean$estimate.1 == "reference", .5, -0.1)) +
annotate(geom = "text", x = x_annotate, y = exp(y_cistring),
vjust = ifelse(toShowExpClean$estimate.1 == "reference", .5, -0.1)
) +
annotate(
geom = "text", x = x_annotate, y = exp(y_cistring),
label = toShowExpClean$ci, size = annot_size_mm,
vjust = 1.1, fontface = "italic") +
annotate(geom = "text", x = x_annotate, y = exp(y_stars),
vjust = 1.1, fontface = "italic"
) +
annotate(
geom = "text", x = x_annotate, y = exp(y_stars),
label = toShowExpClean$stars, size = annot_size_mm,
hjust = -0.2, fontface = "italic") +
annotate(geom = "text", x = 0.5, y = exp(y_variable),
label = paste0("# Events: ", gmodel$nevent, "; Global p-value (Log-Rank): ",
format.pval(gmodel$p.value.log, eps = ".001"), " \nAIC: ", round(gmodel$AIC,2),
"; Concordance Index: ", round(gmodel$concordance,2)),
size = annot_size_mm, hjust = 0, vjust = 1.2, fontface = "italic")
hjust = -0.2, fontface = "italic"
) +
annotate(
geom = "text", x = 0.5, y = exp(y_variable),
label = paste0(
"# Events: ", gmodel$nevent, "; Global p-value (Log-Rank): ",
format.pval(gmodel$p.value.log, eps = ".001"), " \nAIC: ", round(gmodel$AIC, 2),
"; Concordance Index: ", round(gmodel$concordance, 2)
),
size = annot_size_mm, hjust = 0, vjust = 1.2, fontface = "italic"
)
# switch off clipping for p-vals, bottom annotation:
gt <- ggplot_gtable(ggplot_build(p))
gt$layout$clip[gt$layout$name == "panel"] <- "off"
# grid.draw(gt)
# invisible(p)
ggpubr::as_ggplot(gt)
}
}