diff --git a/.travis.yml b/.travis.yml index 98cb980..8dc42e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,51 +1,38 @@ -language: r -sudo: false -dist: trusty -cache: - - packages - - ccache -latex: false +# DO NOT CHANGE THE CODE BELOW +before_install: + - R -q -e 'if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes")' + - R -q -e 'if (!requireNamespace("curl", quietly = TRUE)) install.packages("curl")' + - R -q -e 'remotes::install_github("ropenscilabs/tic"); tic::prepare_all_stages(); tic::before_install()' +install: R -q -e 'tic::install()' +after_install: R -q -e 'tic::after_install()' +before_script: R -q -e 'tic::before_script()' +script: R -q -e 'tic::script()' +after_success: R -q -e 'tic::after_success()' +after_failure: R -q -e 'tic::after_failure()' +before_deploy: R -q -e 'tic::before_deploy()' +deploy: + provider: script + script: R -q -e 'tic::deploy()' + on: + all_branches: true +after_deploy: R -q -e 'tic::after_deploy()' +after_script: R -q -e 'tic::after_script()' +# DO NOT CHANGE THE CODE ABOVE + -# These steps apply to all matrix jobs unless defined differently in tic.R -before_install: - - R -q -e 'install.packages("remotes"); remotes::install_github("ropenscilabs/tic"); tic::prepare_all_stages(); tic::before_install(); remotes::install_github("tidyverse/ggplot2")' +# Custom parts: -matrix: - fast_finish: true - allow_failures: - - env: NB='w/ lintr' - - env: NB='w/ covr' - include: - - os: linux - r: devel - install: R -q -e 'tic::install()' - after_install: R -q -e 'tic::after_install()' - before_script: R -q -e 'tic::before_script()' - script: R -q -e 'tic::script()' - before_deploy: R -q -e 'tic::before_deploy()' - deploy: - provider: script - script: R -q -e 'tic::deploy()' - on: - branch: master - condition: - - $TRAVIS_PULL_REQUEST = false - - $TRAVIS_EVENT_TYPE != cron - after_deploy: R -q -e 'tic::after_deploy()' - - os: osx - r: release - install: R -q -e 'tic::install()' - after_install: R -q -e 'tic::after_install()' - before_script: R -q -e 'tic::before_script()' - script: R -q -e 'tic::script()' - - os: linux - r: devel - env: NB='w/ covr' - - os: linux - r: devel - env: NB='w/ lintr' +# Header +language: r +sudo: false +dist: xenial +cache: packages +latex: true +pandoc_version: 2.5 -notifications: - on_success: change # default: always - on_failure: change # default: always - email: false +env: + global: + - _R_CHECK_FORCE_SUGGESTS_=false + - MAKEFLAGS="-j 2" + - BUILD_PKGDOWN=true + - _R_CHECK_TESTS_NLINES_=0 diff --git a/DESCRIPTION b/DESCRIPTION index fc6a3a9..6b04941 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,39 +1,52 @@ Package: oddsratio -Version: 1.0.3 -Date: 2018-07-15 Title: Odds Ratio Calculation for GAM(M)s & GLM(M)s -Authors@R: c( - person("Patrick", "Schratz", , - email = "patrick.schratz@gmail.com", - role = c("aut", "cre"), - comment = c(ORCID = '0000-0003-0748-6624')) - ) -Description: Simplified odds ratio calculation of GAM(M)s & GLM(M)s. - Provides structured output (data frame) of all predictors and their corresponding odds ratios and confident intervals for further analyses. - It helps to avoid false references of predictors and increments by specifying these parameters in a list instead of using 'exp(coef(model))' (standard approach of odds ratio calculation for GLMs) which just returns a plain numeric output. - For GAM(M)s, odds ratio calculation is highly simplified with this package since it takes care of the multiple 'predict()' calls of the chosen predictor while holding other predictors constant. - Also, this package allows odds ratio calculation of percentage steps across the whole predictor distribution range for GAM(M)s. - In both cases, confident intervals are returned additionally. - Calculated odds ratio of GAM(M)s can be inserted into the smooth function plot. +Version: 2.0.0 +Date: 2019-06-13 +Authors@R: + person(given = "Patrick", + family = "Schratz", + role = c("aut", "cre"), + email = "patrick.schratz@gmail.com", + comment = c(ORCID = "0000-0003-0748-6624")) +Description: Simplified odds ratio calculation of GAM(M)s & + GLM(M)s. Provides structured output (data frame) of all predictors + and their corresponding odds ratios and confident intervals for + further analyses. It helps to avoid false references of predictors + and increments by specifying these parameters in a list instead of + using 'exp(coef(model))' (standard approach of odds ratio calculation + for GLMs) which just returns a plain numeric output. For GAM(M)s, + odds ratio calculation is highly simplified with this package since it + takes care of the multiple 'predict()' calls of the chosen predictor + while holding other predictors constant. Also, this package allows + odds ratio calculation of percentage steps across the whole predictor + distribution range for GAM(M)s. In both cases, confident intervals + are returned additionally. Calculated odds ratio of GAM(M)s can be + inserted into the smooth function plot. +License: MIT + file LICENSE URL: https://github.com/pat-s/oddsratio BugReports: https://github.com/pat-s/oddsratio/issues -Depends: R (>= 3.0.0) -Imports: mgcv, - MASS, - stats, - ggplot2 (>= 3.0.0), - cowplot -License: MIT + file LICENSE -RoxygenNote: 6.0.1 -Suggests: knitr, - rmarkdown, - grid, - gtable, - scales, - testthat, - pacman, - gam -LazyData: true +Depends: + R (>= 3.0.0) +Imports: + ggplot2 (>= 3.0.0), + mgcv, + stats, + stringr, + tibble +Suggests: + cowplot, + gam, + grid, + gtable, + knitr, + MASS, + rmarkdown, + scales, + testthat +VignetteBuilder: + knitr ByteCompile: true -VignetteBuilder: knitr +Encoding: UTF-8 +LazyData: true Roxygen: list(markdown = TRUE) +RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE index 85a38c1..3103d96 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,13 +6,14 @@ export(no_plot) export(or_gam) export(or_glm) export(plot_gam) -import(MASS) import(ggplot2) import(mgcv) -importFrom(cowplot,background_grid) importFrom(grDevices,dev.off) importFrom(grDevices,png) importFrom(graphics,plot) importFrom(stats,coefficients) importFrom(stats,confint) importFrom(stats,predict) +importFrom(stringr,str_remove_all) +importFrom(tibble,as_tibble) +importFrom(tibble,tibble) diff --git a/NEWS b/NEWS deleted file mode 100644 index 9090713..0000000 --- a/NEWS +++ /dev/null @@ -1,48 +0,0 @@ -oddsratio 1.0.3 (June 19 2018) --------------------------------------------------------------------------------- - - * update functions to work with ggplot2 v3.0.0 - -oddsratio 1.0.2 (December 08 2017) --------------------------------------------------------------------------------- - -Minor: - * Add CITATION file - -oddsratio 1.0.0 (June 12 2017) --------------------------------------------------------------------------------- - -Major: - * rename functions (snake_case) - -oddsratio 0.3.1 (Nov 9 2016) --------------------------------------------------------------------------------- - - * update functions to work with ggplot2 v2.2.0 - * add data and enable lazy loading in examples - -oddsratio 0.3.0 (Oct 27 2016) --------------------------------------------------------------------------------- - -# New functions: --------------------------------------------------------------------------------- - * `plot_smooth.gam()`: Lets you plot smoothing functions of GAM(M)s using `ggplot2`. - * `add.oddsratio.into.plot()`: Add odds ratios into plot of GAM(M) smoothing function. - -# Function updates: --------------------------------------------------------------------------------- - * `calc.oddsratio.glm`, `calc.oddsratio.gam`: Add odds ratio confident interval calculation - * For GLM models CI level can be specified manually. - * Print 'CI' warning if model is of type `glmmPQL` - -oddsratio 0.2.0 (Oct 12 2016) --------------------------------------------------------------------------------- - - * Remove param `quietly` - * return data.frame in any case - * update DESCRIPTION - -oddsratio 0.1.0 (Oct 11 2016) --------------------------------------------------------------------------------- - - * Initial release attempt to CRAN diff --git a/NEWS.md b/NEWS.md index cb58965..ef5e116 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# oddsratio 2.0.0 (June 13 2019) + +* return a `tibble` instead of a `data.frame` +* clean up code base +* don't use `cowplot` ggplot theme by default +* optimize wording in vignette + # oddsratio 1.0.3 (June 19 2018) * update functions to work with ggplot2 v3.0.0 diff --git a/R/data.R b/R/data.R index 8d73e80..1c12a7d 100644 --- a/R/data.R +++ b/R/data.R @@ -10,7 +10,7 @@ #' #' @docType data #' -#' @format a `data.frame` randomly created numerical and non-numerical variables +#' @format a [tibble] randomly created numerical and non-numerical variables NULL #' data_glm @@ -23,8 +23,8 @@ NULL #' @keywords datasets #' @keywords internal #' -#' @format a `data.frame` randomly created numerical and non-numerical variables +#' @format a [tibble] randomly created numerical and non-numerical variables #' #' @source Taken from \url{http://www.ats.ucla.edu/stat/r/dae/logit.htm}, direct download #' link: \url{http://www.ats.ucla.edu/stat/data/binary.csv} -NULL \ No newline at end of file +NULL diff --git a/R/helper_funs.R b/R/helper_funs.R index 5f70f1b..63832b5 100644 --- a/R/helper_funs.R +++ b/R/helper_funs.R @@ -24,10 +24,12 @@ #' n <- 200 #' sig <- 2 #' dat <- gamSim(1, n = n, scale = sig, verbose = FALSE) -#' dat$x4 <- as.factor(c(rep("A", 50), rep("B", 50), rep("C", 50), -#' rep("D", 50))) +#' dat$x4 <- as.factor(c( +#' rep("A", 50), rep("B", 50), rep("C", 50), +#' rep("D", 50) +#' )) #' fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + -#' offset(x3) + x4, data = dat) # fit model +#' offset(x3) + x4, data = dat) # fit model #' #' tmp <- plot(fit_gam, pages = 1) # plot output #' tmp <- no_plot(fit_gam) # no plot output @@ -65,10 +67,12 @@ no_plot <- function(model = NULL) { #' n <- 200 #' sig <- 2 #' dat <- gamSim(1, n = n, scale = sig, verbose = FALSE) -#' dat$x4 <- as.factor(c(rep("A", 50), rep("B", 50), rep("C", 50), -#' rep("D", 50))) +#' dat$x4 <- as.factor(c( +#' rep("A", 50), rep("B", 50), rep("C", 50), +#' rep("D", 50) +#' )) #' fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + -#' offset(x3) + x4, data = dat) # fit model +#' offset(x3) + x4, data = dat) # fit model #' #' tmp <- gam_to_df(fit_gam, "x2") #' @export @@ -80,9 +84,11 @@ gam_to_df <- function(model = NULL, pred = NULL) { # get list index of spec. predictor set_pred <- which(grepl(pred, plot_df)) - df <- data.frame(x = plot_df[[set_pred]]$x, - se_upr = plot_df[[set_pred]]$fit + plot_df[[set_pred]]$se, - se_lwr = plot_df[[set_pred]]$fit - plot_df[[set_pred]]$se, - y = plot_df[[set_pred]]$fit) + df <- tibble( + x = plot_df[[set_pred]]$x, + se_upr = plot_df[[set_pred]]$fit + plot_df[[set_pred]]$se, + se_lwr = plot_df[[set_pred]]$fit - plot_df[[set_pred]]$se, + y = plot_df[[set_pred]]$fit + ) return(df) } diff --git a/R/insert_or.R b/R/insert_or.R index 55aa23a..4d0f15a 100644 --- a/R/insert_or.R +++ b/R/insert_or.R @@ -5,11 +5,10 @@ #' a plot of a GAM(M) smoothing function. #' #' @import ggplot2 -#' @importFrom cowplot background_grid #' #' @param plot_object A `ggplot` object from [plot_gam]. #' -#' @param or_object A returned data.frame from [or_gam]. +#' @param or_object A [tibble] as returned from [or_gam]. #' #' @param values Logical. Whether to print predictor value information nearby #' the inserted vertical lines. Default to `TRUE`. @@ -60,31 +59,39 @@ #' # load data (Source: ?mgcv::gam) and fit model #' library(mgcv) #' fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + -#' offset(x3) + x4, data = data_gam) # fit model +#' offset(x3) + x4, data = data_gam) # fit model #' #' # create input objects (plot + odds ratios) #' library(oddsratio) #' plot_object <- plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -#' or_object1 <- or_gam(data = data_gam, model = fit_gam, -#' pred = "x2", values = c(0.099, 0.198)) +#' or_object1 <- or_gam( +#' data = data_gam, model = fit_gam, +#' pred = "x2", values = c(0.099, 0.198) +#' ) #' #' # insert first odds ratios to plot -#' plot_object <- insert_or(plot_object, or_object1, or_yloc = 3, -#' values_xloc = 0.04, line_size = 0.5, -#' line_type = "dotdash", text_size = 6, -#' values_yloc = 0.5, arrow_col = "red") +#' plot_object <- insert_or(plot_object, or_object1, +#' or_yloc = 3, +#' values_xloc = 0.04, line_size = 0.5, +#' line_type = "dotdash", text_size = 6, +#' values_yloc = 0.5, arrow_col = "red" +#' ) #' #' # calculate second odds ratio -#' or_object2 <- or_gam(data = data_gam, model = fit_gam, pred = "x2", -#' values = c(0.4, 0.6)) +#' or_object2 <- or_gam( +#' data = data_gam, model = fit_gam, pred = "x2", +#' values = c(0.4, 0.6) +#' ) #' #' # add or_object2 into plot -#' insert_or(plot_object, or_object2, or_yloc = 2.1, values_yloc = 2, -#' line_col = "green4", text_col = "black", -#' rect_col = "green4", rect_alpha = 0.2, -#' line_alpha = 1, line_type = "dashed", -#' arrow_xloc_r = 0.01, arrow_xloc_l = -0.01, -#' arrow_length = 0.01, rect = TRUE) +#' insert_or(plot_object, or_object2, +#' or_yloc = 2.1, values_yloc = 2, +#' line_col = "green4", text_col = "black", +#' rect_col = "green4", rect_alpha = 0.2, +#' line_alpha = 1, line_type = "dashed", +#' arrow_xloc_r = 0.01, arrow_xloc_l = -0.01, +#' arrow_length = 0.01, rect = TRUE +#' ) #' @export insert_or <- function(plot_object = NULL, or_object = NULL, line_col = "red", # nocov start # nolint @@ -97,14 +104,20 @@ insert_or <- function(plot_object = NULL, or_object = NULL, line_col = "red", # arrow_xloc_r = NULL, arrow_xloc_l = NULL) { plot_object <- plot_object + - geom_vline(xintercept = or_object$value1, color = line_col, - size = line_size, linetype = line_type, alpha = line_alpha) + - geom_vline(xintercept = or_object$value2, color = line_col, - size = line_size, linetype = line_type, alpha = line_alpha) + - annotate("text", x = mean(c(or_object$value2, or_object$value1)), - y = min(plot_object$data$se_lwr) + or_yloc, - label = paste0("OR: \n", round(or_object$oddsratio, 2)), - color = text_col, size = text_size) + geom_vline( + xintercept = or_object$value1, color = line_col, + size = line_size, linetype = line_type, alpha = line_alpha + ) + + geom_vline( + xintercept = or_object$value2, color = line_col, + size = line_size, linetype = line_type, alpha = line_alpha + ) + + annotate("text", + x = mean(c(or_object$value2, or_object$value1)), + y = min(plot_object$data$se_lwr) + or_yloc, + label = paste0("OR: \n", round(or_object$oddsratio, 2)), + color = text_col, size = text_size + ) if (rect) { if (is.null(rect_col)) { @@ -112,15 +125,19 @@ insert_or <- function(plot_object = NULL, or_object = NULL, line_col = "red", # } # set drawing order to place rect behind smoothing fun - plot_object$layers <- c(geom_rect(data = plot_object$data[1,], # avoids multiple rect drawings # nolint - ymin = ggplot_build(plot_object)$layout$ - panel_params[[1]]$y.range[1], - ymax = ggplot_build(plot_object)$layout$ - panel_params[[1]]$y.range[2], - xmin = or_object$value1, - xmax = or_object$value2, - alpha = rect_alpha, fill = rect_col), - plot_object$layers) + plot_object$layers <- c( + geom_rect( + data = plot_object$data[1, ], # avoids multiple rect drawings # nolint + ymin = ggplot_build(plot_object)$layout$ + panel_params[[1]]$y.range[1], + ymax = ggplot_build(plot_object)$layout$ + panel_params[[1]]$y.range[2], + xmin = or_object$value1, + xmax = or_object$value2, + alpha = rect_alpha, fill = rect_col + ), + plot_object$layers + ) } if (values) { @@ -143,7 +160,7 @@ insert_or <- function(plot_object = NULL, or_object = NULL, line_col = "red", # if (is.null(arrow_xloc_l)) { # calc arrow shift from x axis range - arrow_xloc_l <- - (max(plot_object$data$y) - min(plot_object$data$y)) * + arrow_xloc_l <- -(max(plot_object$data$y) - min(plot_object$data$y)) * 0.002 } @@ -155,14 +172,18 @@ insert_or <- function(plot_object = NULL, or_object = NULL, line_col = "red", # plot_object <- plot_object + - annotate("text", x = or_object$value1 - values_xloc, - y = min(plot_object$data$se_lwr) + values_yloc, - label = or_object$value1, - color = text_col, alpha = text_alpha, size = text_size) + - annotate("text", x = or_object$value2 + values_xloc, - y = min(plot_object$data$se_lwr) + values_yloc, - label = or_object$value2, - color = text_col, alpha = text_alpha, size = text_size) + annotate("text", + x = or_object$value1 - values_xloc, + y = min(plot_object$data$se_lwr) + values_yloc, + label = or_object$value1, + color = text_col, alpha = text_alpha, size = text_size + ) + + annotate("text", + x = or_object$value2 + values_xloc, + y = min(plot_object$data$se_lwr) + values_yloc, + label = or_object$value2, + color = text_col, alpha = text_alpha, size = text_size + ) if (arrow) { @@ -174,23 +195,27 @@ insert_or <- function(plot_object = NULL, or_object = NULL, line_col = "red", # plot_object <- plot_object + # left arrow - geom_segment(x = or_object$value1 - values_xloc + arrow_xloc_l, - xend = or_object$value1 - values_xloc + arrow_length, - y = min(plot_object$data$se_lwr) + values_yloc + - arrow_yloc, - yend = min(plot_object$data$se_lwr) + values_yloc + - arrow_yloc, - color = arrow_col, - arrow = arrow(length = unit(0.2, "cm"), type = "closed")) + + geom_segment( + x = or_object$value1 - values_xloc + arrow_xloc_l, + xend = or_object$value1 - values_xloc + arrow_length, + y = min(plot_object$data$se_lwr) + values_yloc + + arrow_yloc, + yend = min(plot_object$data$se_lwr) + values_yloc + + arrow_yloc, + color = arrow_col, + arrow = arrow(length = unit(0.2, "cm"), type = "closed") + ) + # right arrow - geom_segment(x = or_object$value2 + values_xloc + arrow_xloc_r, - xend = or_object$value2 + values_xloc - arrow_length, - y = min(plot_object$data$se_lwr) + values_yloc + - arrow_yloc, - yend = min(plot_object$data$se_lwr) + values_yloc + - arrow_yloc, - color = arrow_col, - arrow = arrow(length = unit(0.2, "cm"), type = "closed")) + geom_segment( + x = or_object$value2 + values_xloc + arrow_xloc_r, + xend = or_object$value2 + values_xloc - arrow_length, + y = min(plot_object$data$se_lwr) + values_yloc + + arrow_yloc, + yend = min(plot_object$data$se_lwr) + values_yloc + + arrow_yloc, + color = arrow_col, + arrow = arrow(length = unit(0.2, "cm"), type = "closed") + ) } } return(plot_object) diff --git a/R/or_gam.R b/R/or_gam.R index e169ab5..b1c9e63 100644 --- a/R/or_gam.R +++ b/R/or_gam.R @@ -2,45 +2,35 @@ #' @title Calculate odds ratios of Generalized Additive (Mixed) Models #' #' @importFrom stats coefficients predict +#' @importFrom tibble tibble #' #' @description This function calculates odds ratio(s) for specific increment -#' steps of a GAM(M)s. -#' @description Odds ratios can also be calculated for continuous percentage -#' increment steps across the whole predictor distribution using `slice = TRUE`. +#' steps of a GAM(M)s. Odds ratios can also be calculated for continuous +#' percentage increment steps across the whole predictor distribution using +#' `slice = TRUE`. #' #' @param data The data used for model fitting. -#' #' @param model A fitted GAM(M). -#' -#' @param pred Character. Predictor name for which to calculate -#' the odds ratio. -#' -#' @param values Numeric vector of length two. -#' Predictor values to estimate odds ratio from. Function is written to use the -#' first provided value as the "lower" one, i.e. calculating the odds ratio -#' 'from value1 to value2'. Only used if `slice = FALSE`. -#' -#' @param percentage Numeric. Percentage number to split the -#' predictor distribution into. -#' A value of 10 would split the predictor distribution by 10\% intervals. -#' Only needed if `slice = TRUE`. -#' -#' @param slice Logical. `Default = FALSE`. Whether to calculate -#' odds ratios for fixed increment steps over the whole predictor distribution. -#' See `percentage` for setting the increment values. -#' -#' @param CI Numeric. Currently fixed to 95\% confidence interval level -#' (2.5\% - 97.5\%). -#' It should not be changed in a function call! -#' -#' @details Currently supported functions: [mgcv::gam], -#' [mgcv::gamm], [gam::gam]. -#' -#' @details For [mgcv::gamm], the `model` input of -#' [or_gam] needs to be the `gam` output (e.g. `fit_gam$gam`). -#' -#' @return A data frame with (up to) eight columns. `perc1` and `perc2` -#' are only returned if `slice = TRUE`: +#' @param pred Character. Predictor name for which to calculate the odds ratio. +#' @param values Numeric vector of length two. Predictor values to estimate odds +#' ratio from. Function is written to use the first provided value as the +#' "lower" one, i.e. calculating the odds ratio 'from value1 to value2'. Only +#' used if `slice = FALSE`. +#' @param percentage Numeric. Percentage number to split the predictor +#' distribution into. A value of 10 would split the predictor distribution by +#' 10\% intervals. Only needed if `slice = TRUE`. +#' @param slice Logical. `Default = FALSE`. Whether to calculate odds ratios for +#' fixed increment steps over the whole predictor distribution. See +#' `percentage` for setting the increment values. +#' @param CI Numeric. Currently fixed to 95\% confidence interval level (2.5\% - +#' 97.5\%). It should not be changed in a function call! +#' +#' @details Currently supported functions: [mgcv::gam], [mgcv::gamm], +#' [gam::gam]. For [mgcv::gamm], the `model` input of [or_gam] needs to be the +#' `gam` output (e.g. `fit_gam$gam`). +#' +#' @return A data frame with (up to) eight columns. `perc1` and `perc2` are only +#' returned if `slice = TRUE`: #' \item{predictor}{Predictor name} #' \item{value1}{First value of odds ratio calculation} #' \item{value2}{Second value of odds ratio calculation} @@ -60,21 +50,26 @@ #' # load data (Source: ?mgcv::gam) and fit model #' library(mgcv) #' fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + -#' offset(x3) + x4, data = data_gam) # fit model +#' offset(x3) + x4, data = data_gam) # fit model #' #' # Calculate OR for specific increment step of continuous variable -#' or_gam(data = data_gam, model = fit_gam, pred = "x2", -#' values = c(0.099, 0.198)) +#' or_gam( +#' data = data_gam, model = fit_gam, pred = "x2", +#' values = c(0.099, 0.198) +#' ) #' #' ## Calculate OR for change of indicator variable -#' or_gam(data = data_gam, model = fit_gam, pred = "x4", -#' values = c("B", "D")) +#' or_gam( +#' data = data_gam, model = fit_gam, pred = "x4", +#' values = c("B", "D") +#' ) #' #' ## Calculate ORs for percentage increments of predictor distribution #' ## (here: 20%) -#' or_gam(data = data_gam, model = fit_gam, pred = "x2", -#' percentage = 20, slice = TRUE) -#' +#' or_gam( +#' data = data_gam, model = fit_gam, pred = "x2", +#' percentage = 20, slice = TRUE +#' ) #' @export or_gam <- function(data = NULL, model = NULL, pred = NULL, values = NULL, percentage = NULL, slice = FALSE, CI = NULL) { @@ -97,23 +92,25 @@ or_gam <- function(data = NULL, model = NULL, pred = NULL, values = NULL, range_v <- c(range_v, min(data[, pred]) + step * i) } - result <- data.frame(predictor = length(100 / percentage), - value1 = numeric(length = 100 / percentage), - value2 = numeric(length = 100 / percentage), - perc1 = character(length = 100 / percentage), - perc2 = character(length = 100 / percentage), - oddsratio = numeric(length = 100 / percentage), - CI_low = numeric(length = 100 / percentage), - CI_high = numeric(length = 100 / percentage), - stringsAsFactors = FALSE) + result <- tibble( + predictor = length(100 / percentage), + value1 = numeric(length = 100 / percentage), + value2 = numeric(length = 100 / percentage), + perc1 = character(length = 100 / percentage), + perc2 = character(length = 100 / percentage), + oddsratio = numeric(length = 100 / percentage), + CI_low = numeric(length = 100 / percentage), + CI_high = numeric(length = 100 / percentage) + ) # apply OR calc for vector for (x in 1:(100 / percentage)) { # set all preds to their mean if they are numeric for (i in names_pred) { - if (is.numeric(data[[i]])) + if (is.numeric(data[[i]])) { data[[i]] <- mean(data[[i]]) + } } # reduce to 1 row @@ -125,7 +122,8 @@ or_gam <- function(data = NULL, model = NULL, pred = NULL, values = NULL, data[, pred] <- range_v[x] # calc log odds for value 1 pred_gam1 <- as.numeric(predict(model, data, - type = "link", se.fit = TRUE)) + type = "link", se.fit = TRUE + )) # calc 95% CI log odds (mean +- 2* stdev) pred_gam1_CI_low <- pred_gam1[1] - (2 * pred_gam1[2]) pred_gam1_CI_high <- pred_gam1[1] + (2 * pred_gam1[2]) @@ -134,20 +132,21 @@ or_gam <- function(data = NULL, model = NULL, pred = NULL, values = NULL, data[, pred] <- range_v[x + 1] # calc log odds for value 2 pred_gam2 <- as.numeric(predict(model, data, - type = "link", se.fit = TRUE)) + type = "link", se.fit = TRUE + )) # calc 95% CI log odds (mean +- 2* stdev) pred_gam2_CI_low <- pred_gam2[1] - (2 * pred_gam2[2]) pred_gam2_CI_high <- pred_gam2[1] + (2 * pred_gam2[2]) result$predictor <- pred result$oddsratio[x] <- round(as.numeric(exp(pred_gam2[1] - - pred_gam1[1])), 2) + pred_gam1[1])), 2) result$value1[x] <- round(range_v[x], 3) result$value2[x] <- round(range_v[x + 1], 3) result$CI_high[x] <- round(as.numeric(exp(pred_gam2_CI_low - - pred_gam1_CI_low)), 2) # no mistake # nolint + pred_gam1_CI_low)), 2) # no mistake # nolint result$CI_low[x] <- round(as.numeric(exp(pred_gam2_CI_high - - pred_gam1_CI_high)), 2) # no mistake # nolint + pred_gam1_CI_high)), 2) # no mistake # nolint result$perc1[x] <- percentage * x - percentage result$perc2[x] <- percentage * x } @@ -161,8 +160,9 @@ or_gam <- function(data = NULL, model = NULL, pred = NULL, values = NULL, # set all preds to their mean if they are numeric for (i in names_pred) { - if (is.numeric(data[[i]])) + if (is.numeric(data[[i]])) { data[[i]] <- mean(data[[i]]) + } } # reduce to 1 row @@ -192,13 +192,14 @@ or_gam <- function(data = NULL, model = NULL, pred = NULL, values = NULL, odds_ratio_low <- as.numeric(exp(pred_gam2_CI_low - pred_gam1_CI_low), 2) odds_ratio_high <- as.numeric(exp(pred_gam2_CI_high - pred_gam1_CI_high), 2) - result <- data.frame(predictor = pred, - value1 = values[1], - value2 = values[2], - oddsratio = odds_ratio, - CI_low = odds_ratio_high, # no mistake - CI_high = odds_ratio_low, # no mistake - stringsAsFactors = FALSE) + result <- tibble( + predictor = pred, + value1 = values[1], + value2 = values[2], + oddsratio = odds_ratio, + CI_low = odds_ratio_high, # no mistake + CI_high = odds_ratio_low# no mistake + ) # change col names colnames(result)[5] <- paste0("CI_low (2.5%)") diff --git a/R/or_glm.R b/R/or_glm.R index 6403aa4..82752af 100644 --- a/R/or_glm.R +++ b/R/or_glm.R @@ -4,27 +4,26 @@ #' @importFrom stats coefficients #' @importFrom stats confint #' @import mgcv -#' @import MASS +#' @importFrom tibble as_tibble +#' @importFrom stringr str_remove_all #' -#' @description This function calculates odds ratio(s) for specific -#' increment steps of GLMs. +#' @description This function calculates odds ratio(s) for specific increment +#' steps of GLMs. #' #' @param data The data used for model fitting. #' @param model A fitted GLM(M). #' @param incr List. Increment values of each predictor. -#' @param CI numeric. Which confident interval to calculate. -#' Must be between 0 and 1. Default to 0.95 +#' @param CI numeric. Which confident interval to calculate. Must be between 0 +#' and 1. Default to 0.95 #' -#' @return A data frame with five columns: -#' \item{predictor}{Predictor name(s)} -#' \item{oddsratio}{Calculated odds ratio(s)} -#' \item{CI_low}{Lower confident interval of odds ratio} -#' \item{CI_high}{Higher confident interval of odds ratio} -#' \item{increment}{Increment of the predictor(s)} +#' @return A data frame with five columns: \item{predictor}{Predictor name(s)} +#' \item{oddsratio}{Calculated odds ratio(s)} \item{CI_low}{Lower confident +#' interval of odds ratio} \item{CI_high}{Higher confident interval of odds +#' ratio} \item{increment}{Increment of the predictor(s)} #' #' @details `CI_low` and `CI_high` are only calculated for GLM models because -#' [glmmPQL] does not return confident intervals due to its penalizing -#' behavior. +#' [glmmPQL] does not return confident intervals due to its penalizing +#' behavior. #' #' @author Patrick Schratz #' @@ -32,27 +31,32 @@ #' ## Example with glm() #' # load data (source: http://www.ats.ucla.edu/stat/r/dae/logit.htm) and #' # fit model -#' fit_glm <- glm(admit ~ gre + gpa + rank, data = data_glm, -#' family = "binomial") # fit model +#' fit_glm <- glm(admit ~ gre + gpa + rank, +#' data = data_glm, +#' family = "binomial" +#' ) # fit model #' #' # Calculate OR for specific increment step of continuous variable #' or_glm(data = data_glm, model = fit_glm, incr = list(gre = 380, gpa = 5)) #' #' # Calculate OR and change the confidence interval level -#' or_glm(data = data_glm, model = fit_glm, -#' incr = list(gre = 380, gpa = 5), CI = .70) +#' or_glm( +#' data = data_glm, model = fit_glm, +#' incr = list(gre = 380, gpa = 5), CI = .70 +#' ) #' #' ## Example with MASS:glmmPQL() #' # load data #' library(MASS) #' data(bacteria) -#' fit_glmmPQL <- glmmPQL(y ~ trt + week, random = ~1 | ID, -#' family = binomial, data = bacteria, -#' verbose = FALSE) +#' fit_glmmPQL <- glmmPQL(y ~ trt + week, +#' random = ~ 1 | ID, +#' family = binomial, data = bacteria, +#' verbose = FALSE +#' ) #' #' # Apply function #' or_glm(data = bacteria, model = fit_glmmPQL, incr = list(week = 5)) -#' #' @details Currently supported functions: [glm], #' [glmmPQL] #' @@ -79,23 +83,25 @@ or_glm <- function(data, model, incr, CI = 0.95) { odds_ratios <- list() CI_low <- list() CI_high <- list() + for (i in preds) { # CI calculation if (class(model)[1] == "glm") { - CI_list <- as.data.frame(suppressMessages(confint(model, - level = CI))) [-1, ] + CI_list <- data.frame(suppressMessages(confint(model, + level = CI + ))) [-1, ] } # check if predictor is numeric or integer if (is.numeric(data[[i]]) | is.integer(data[[i]])) { odds_ratios[[i]] <- round(exp(as.numeric(coef[[i]]) * - as.numeric(incr[[i]])), 3) + as.numeric(incr[[i]])), 3) if (!class(model)[1] == "glmmPQL") { CI_low[[i]] <- round(exp(CI_list[i, 1] * # nocov start - as.numeric(incr[[i]])), 3) + as.numeric(incr[[i]])), 3) CI_high[[i]] <- round(exp(CI_list[i, 2] * - as.numeric(incr[[i]])), 3) # nocov end + as.numeric(incr[[i]])), 3) # nocov end } increments[[i]] <- as.numeric(incr[[i]]) or <- odds_ratios[[i]] # nolint @@ -121,19 +127,26 @@ or_glm <- function(data, model, incr, CI = 0.95) { } # create data frame to return - result <- data.frame(predictor = as.character(names(odds_ratios)), - oddsratio = unlist(odds_ratios, use.names = FALSE), - CI_low = unlist(CI_low, use.names = FALSE), - CI_high = unlist(CI_high, use.names = FALSE), - increment = as.character(unlist(increments, - use.names = FALSE)), - stringsAsFactors = FALSE) + result <- tibble( + predictor = as.character(names(odds_ratios)), + oddsratio = unlist(odds_ratios, use.names = FALSE), + CI_low = unlist(CI_low, use.names = FALSE), + CI_high = unlist(CI_high, use.names = FALSE), + increment = as.character(unlist(increments, + use.names = FALSE + )) + ) # set CI column names if (class(model)[1] == "glm") { - colnames(result)[3] <- paste0("CI_low (", names(CI_list) [1], ")") - colnames(result)[4] <- paste0("CI_high (", names(CI_list) [2], ")") + + # Clean variable names + col_names = stringr::str_remove_all(names(CI_list), "\\.\\.") + col_names = stringr::str_remove_all(col_names, "X") + + colnames(result)[3] <- paste0("CI_low (", col_names[1], ")") + colnames(result)[4] <- paste0("CI_high (", col_names[2], ")") } - return(result) + return(as_tibble(result)) } diff --git a/R/plot_gam.R b/R/plot_gam.R index 8c79780..b97e27a 100644 --- a/R/plot_gam.R +++ b/R/plot_gam.R @@ -5,7 +5,6 @@ #' using the `ggplot2` plotting system. #' #' @import ggplot2 -#' @importFrom cowplot background_grid #' @importFrom grDevices dev.off png #' @importFrom graphics plot #' @param model A fitted model of class `gam`. @@ -44,11 +43,11 @@ #' # load data (Source: ?mgcv::gam) and fit model #' library(mgcv) #' fit_gam <- mgcv::gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, -#' data = data_gam) +#' data = data_gam +#' ) #' #' library(oddsratio) #' plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -#' #' @seealso [plot_gam] #' @seealso [or_gam] #' @seealso [insert_or] @@ -75,15 +74,19 @@ plot_gam <- function(model = NULL, pred = NULL, col_line = "blue", # nocov start plot_gam <- ggplot(df, aes_(~x, ~y)) + geom_line(colour = col_line, size = sm_fun_size) + - geom_line(aes_(~x, ~se_upr), linetype = ci_line_type, - colour = ci_line_col, size = ci_line_size) + - geom_line(aes_(~x, ~se_lwr), linetype = ci_line_type, - colour = ci_line_col, size = ci_line_size) + + geom_line(aes_(~x, ~se_upr), + linetype = ci_line_type, + colour = ci_line_col, size = ci_line_size + ) + + geom_line(aes_(~x, ~se_lwr), + linetype = ci_line_type, + colour = ci_line_col, size = ci_line_size + ) + geom_ribbon(aes_(x = ~x, ymin = ~se_lwr, ymax = ~se_upr), - fill = ci_fill, alpha = ci_alpha) + + fill = ci_fill, alpha = ci_alpha + ) + ylab(ylab) + - xlab(xlab) + - cowplot::background_grid(major = "xy", minor = "none") + xlab(xlab) if (!is.null(limits_y) & !is.null(breaks_y)) { plot_gam <- plot_gam + diff --git a/README.Rmd b/README.Rmd deleted file mode 100644 index 3bff8e0..0000000 --- a/README.Rmd +++ /dev/null @@ -1,153 +0,0 @@ ---- -output: github_document ---- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE, eval = FALSE) -library(ggplot2) -library(cowplot) -``` - -```{r, echo = FALSE, eval = TRUE} -version <- "1.0.0.9000" -``` - -```{r, echo = FALSE, eval = TRUE} -rvers <- "3.0.0" -``` - -#### General -[![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1095473.svg)](https://doi.org/10.5281/zenodo.1095473) - -| Resource: | CRAN | Travis CI | Appveyor | -| ------------- | ------------------- | --------------- | ---------------- | -| _Platforms:_ | _Multiple_ | _Linux & macOS_ | _Windows_ | -| R CMD check | CRAN version | Build status | Build status | -| Test coverage | | Coverage Status | | - - -#### CRAN -[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/oddsratio)](https://cran.r-project.org/package=oddsratio) -[![Downloads](https://cranlogs.r-pkg.org/badges/oddsratio?color=brightgreen)](https://www.r-pkg.org/pkg/oddsratio) -![](https://cranlogs.r-pkg.org/badges/grand-total/oddsratio) - -Functions for calculation and plotting of odds ratios of Generalized Additive (Mixed) -Models and Generalized Linear (Mixed) Models with a binomial -response variable (i.e. logistic regression models). - -## Installation - -Install from CRAN: - -```R -install.packages("oddsratio") -``` - -Get the development version from Github: - -```R -remotes::install_github("pat-s/oddsratio") -``` -## Examples - -### GLM - -Odds ratio calculation of predictors `gre` & `gpa` of a fitted model `fit_glm` -with increment steps of 380 and 5, respectively. -For factor variables (here: `rank` with 4 levels), automatically all odds ratios -corresponding to the base level (here: `rank1`) are returned including their -respective confident intervals. The default level is 95%. -However, other levels can be specified with the param `CI`. -Data source: https://stats.idre.ucla.edu/stat/data/binary.csv - -```{r} -pacman::p_load(oddsratio, mgcv) -df <- data_glm -df$rank <- factor(df$rank) -fit_glm <- glm(admit ~ gre + gpa + rank, data = df, family = "binomial") - -or_glm(data = df, model = fit_glm, - incr = list(gre = 380, gpa = 5, CI = 0.95)) -``` - -### GAM - -For GAMs, the calculation of odds ratio is different. -Due to its non-linear definition, odds ratios do only apply to specific -value changes and are not constant throughout the whole value range of the -predictor as for GLMs. -Hence, odds ratios of GAMs can only be computed for one predictor at a time by -holding all other predictors at a fixed value while changing the value of the -specific predictor. -Confident intervals are currently fixed to the 95% level for GAMs. -Data source: `?mgcv::predict.gam()` - -Here, the usage of `or_gam()` is shown by calculating odds ratios of -pred `x2` for a 20% steps across the whole value range of the predictor. - -```{r} -set.seed(1234) -n <- 200 -sig <- 2 -df <- gamSim(1, n = n,scale = sig, verbose = FALSE) -df$x4 <- as.factor(c(rep("A", 50), rep("B", 50), rep("C", 50), rep("D", 50))) -fit_gam <- mgcv::gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, data = df) - -or_gam(data = df, model = fit_gam, pred = "x2", - percentage = 20, slice = TRUE) -``` - -If you want to compute a single odds ratio for specific values, simply set -param `slice = FALSE`: - -```{r} -or_gam(data = df, model = fit_gam, - pred = "x2", values = c(0.099, 0.198)) -``` - -Plotting of GAM smooths is also supported: - -```{r} -plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -``` - -

- -

- -Insert the calculated odds ratios into the smoothing function: - -```{r} -plot_object <- plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -or_object <- or_gam(data = df, model = fit_gam, - pred = "x2", values = c(0.099, 0.198)) - -plot <- insert_or(plot_object, or_object, or_yloc = 3, - values_xloc = 0.04, line_size = 0.5, - line_type = "dotdash", values_yloc = 0.5, - arrow_col = "red") -plot -``` - -

- -

- -Insert multiple odds ratios into one smooth: - -```{r} -or_object2 <- or_gam(data = df, model = fit_gam, pred = "x2", - values = c(0.4, 0.6)) - -insert_or(plot, or_object2, or_yloc = 2.1, values_yloc = 2, - line_col = "green4", text_col = "black", - rect_col = "green4", rect_alpha = 0.2, - line_alpha = 1, line_type = "dashed", - arrow_xloc_r = 0.01, arrow_xloc_l = -0.01, - arrow_length = 0.01, rect = T) -``` - -

- -

diff --git a/README.md b/README.md index 314fd37..bba4f9b 100644 --- a/README.md +++ b/README.md @@ -1,146 +1,29 @@ - -#### General - -[![Project Status: Active – The project has reached a stable, usable -state and is being actively -developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1095473.svg)](https://doi.org/10.5281/zenodo.1095473) - -| Resource: | CRAN | Travis CI | Appveyor | -| ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| *Platforms:* | *Multiple* | *Linux & macOS* | *Windows* | -| R CMD check | CRAN version | Build status | Build status | -| Test coverage | | Coverage Status | | - -#### CRAN - -[![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/oddsratio)](https://cran.r-project.org/package=oddsratio) -[![Downloads](https://cranlogs.r-pkg.org/badges/oddsratio?color=brightgreen)](https://www.r-pkg.org/pkg/oddsratio) -![](https://cranlogs.r-pkg.org/badges/grand-total/oddsratio) - -Functions for calculation and plotting of odds ratios of Generalized -Additive (Mixed) Models and Generalized Linear (Mixed) Models with a -binomial response variable (i.e. logistic regression models). +[![Build Status](https://travis-ci.org/pat-s/oddsratio.svg?branch=master)](https://travis-ci.org/pat-s/oddsratio) +[![Build status](https://ci.appveyor.com/api/projects/status/s5t0sowc6mxu4yhl/branch/master?svg=true)](https://ci.appveyor.com/project/pat-s/oddsratio/branch/master) +[![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version-ago/oddsratio)](https://cran.r-project.org/package=oddsratio) +[![cran checks](https://cranchecks.info/badges/worst/oddsratio)](https://cran.r-project.org/web/checks/check_results_oddsratio.html) +[![CRAN Downloads](https://cranlogs.r-pkg.org/badges/oddsratio)](https://cran.rstudio.com/web/packages/oddsratio/index.html) +[![lifecycle](https://img.shields.io/badge/lifecycle-stable-blue.svg)](https://www.tidyverse.org/lifecycle/#stable) +[![Dependencies](https://tinyverse.netlify.com/badge/oddsratio)](https://cran.r-project.org/package=oddsratio) + +Functions for calculation and plotting of odds ratios of Generalized Additive (Mixed) +Models and Generalized Linear (Mixed) Models with a binomial +response variable (i.e. logistic regression models). ## Installation Install from CRAN: -``` r +```R install.packages("oddsratio") ``` Get the development version from Github: -``` r +```R remotes::install_github("pat-s/oddsratio") ``` -## Examples - -### GLM - -Odds ratio calculation of predictors `gre` & `gpa` of a fitted model -`fit_glm` with increment steps of 380 and 5, respectively. -For factor variables (here: `rank` with 4 levels), automatically all -odds ratios corresponding to the base level (here: `rank1`) are returned -including their respective confident intervals. The default level is -95%. However, other levels can be specified with the param `CI`. Data -source: - -``` r -pacman::p_load(oddsratio, mgcv) -df <- data_glm -df$rank <- factor(df$rank) -fit_glm <- glm(admit ~ gre + gpa + rank, data = df, family = "binomial") - -or_glm(data = df, model = fit_glm, - incr = list(gre = 380, gpa = 5, CI = 0.95)) -``` - -### GAM - -For GAMs, the calculation of odds ratio is different. Due to its -non-linear definition, odds ratios do only apply to specific value -changes and are not constant throughout the whole value range of the -predictor as for GLMs. Hence, odds ratios of GAMs can only be computed -for one predictor at a time by holding all other predictors at a fixed -value while changing the value of the specific predictor. Confident -intervals are currently fixed to the 95% level for GAMs. Data source: -`?mgcv::predict.gam()` - -Here, the usage of `or_gam()` is shown by calculating odds ratios of -pred `x2` for a 20% steps across the whole value range of the predictor. - -``` r -set.seed(1234) -n <- 200 -sig <- 2 -df <- gamSim(1, n = n,scale = sig, verbose = FALSE) -df$x4 <- as.factor(c(rep("A", 50), rep("B", 50), rep("C", 50), rep("D", 50))) -fit_gam <- mgcv::gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, data = df) - -or_gam(data = df, model = fit_gam, pred = "x2", - percentage = 20, slice = TRUE) -``` - -If you want to compute a single odds ratio for specific values, simply -set param `slice = FALSE`: - -``` r -or_gam(data = df, model = fit_gam, - pred = "x2", values = c(0.099, 0.198)) -``` - -Plotting of GAM smooths is also -supported: - -``` r -plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -``` - -

- - - -

- -Insert the calculated odds ratios into the smoothing function: - -``` r -plot_object <- plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -or_object <- or_gam(data = df, model = fit_gam, - pred = "x2", values = c(0.099, 0.198)) - -plot <- insert_or(plot_object, or_object, or_yloc = 3, - values_xloc = 0.04, line_size = 0.5, - line_type = "dotdash", values_yloc = 0.5, - arrow_col = "red") -plot -``` - -

- - - -

- -Insert multiple odds ratios into one smooth: - -``` r -or_object2 <- or_gam(data = df, model = fit_gam, pred = "x2", - values = c(0.4, 0.6)) - -insert_or(plot, or_object2, or_yloc = 2.1, values_yloc = 2, - line_col = "green4", text_col = "black", - rect_col = "green4", rect_alpha = 0.2, - line_alpha = 1, line_type = "dashed", - arrow_xloc_r = 0.01, arrow_xloc_l = -0.01, - arrow_length = 0.01, rect = T) -``` - -

- - +## Usage -

+See the [Getting Started](https://pat-s.github.io/oddsratio/articles/oddsratio.html) vignette. diff --git a/_pkgdown.yml b/_pkgdown.yml index 6ec456c..27a4f8b 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,3 @@ template: params: - bootswatch: yeti + bootswatch: cosmo diff --git a/appveyor.yml b/appveyor.yml index e32d316..3001a68 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -8,35 +8,33 @@ init: Import-Module '..\appveyor-tool.ps1' install: - ps: Bootstrap - -# Adapt as necessary starting from here - -build_script: - - travis-tool.sh install_deps - -test_script: - - travis-tool.sh run_tests - -on_failure: - - 7z a failure.zip *.Rcheck\* - - appveyor PushArtifact failure.zip - -artifacts: - - path: '*.Rcheck\**\*.log' - name: Logs - - - path: '*.Rcheck\**\*.out' - name: Logs - - - path: '*.Rcheck\**\*.fail' - name: Logs - - - path: '*.Rcheck\**\*.Rout' - name: Logs - - - path: '\*_*.tar.gz' - name: Bits - - - path: '\*_*.zip' - name: Bits + - ps: Bootstrap + - cmd: R -q -e "writeLines('options(repos = \'https://cloud.r-project.org\')', '~/.Rprofile')" + - cmd: R -q -e "if (!requireNamespace('remotes', quietly = TRUE)) install.packages('remotes')" + - cmd: R -q -e "if (!requireNamespace('curl', quietly = TRUE)) install.packages('curl')" + - cmd: R -q -e "remotes::install_github('ropenscilabs/tic')" + - cmd: R -q -e "tic::prepare_all_stages()" + - cmd: R -q -e "tic::before_install()" + +cache: + - C:\RLibrary\ + +before_build: Rscript -e "tic::before_install()" +build_script: Rscript -e "tic::install()" +after_build: Rscript -e "tic::after_install()" +before_test: Rscript -e "tic::before_script()" +test_script: Rscript -e "tic::script()" +on_success: Rscript -e "try(tic::after_success(), silent = TRUE)" +on_failure: Rscript -e "tic::after_failure()" +before_deploy: Rscript -e "tic::before_deploy()" +deploy_script: Rscript -e "tic::deploy()" +after_deploy: Rscript -e "tic::after_deploy()" +on_finish: Rscript -e "tic::after_script()" + +platform: x64 +image: Visual Studio 2017 + +environment: + global: + USE_RTOOLS: true + R_ARCH: x64 diff --git a/cran-comments.md b/cran-comments.md index 1e1e948..a5f758c 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,9 +1,8 @@ ## Test environments -* local Arch Linux installation, R 3.5.1 -* ubuntu 14.04 (on travis-ci), R-devel -* OSX 10.12, Xcode 8.3 (on travis-ci), R-devel -* win-builder (devel and release) +* local Arch Linux installation, R 3.6.0 +* ubuntu 16.04 (on travis-ci), R-devel +* win-builder, R-devel ## R CMD check results diff --git a/inst/Plots/smoothing_function_oddsratio.png b/inst/Plots/smoothing_function_oddsratio.png deleted file mode 100644 index 0dadf5d..0000000 Binary files a/inst/Plots/smoothing_function_oddsratio.png and /dev/null differ diff --git a/inst/Plots/smoothing_function_oddsratio_two.png b/inst/Plots/smoothing_function_oddsratio_two.png deleted file mode 100644 index 91ac1ee..0000000 Binary files a/inst/Plots/smoothing_function_oddsratio_two.png and /dev/null differ diff --git a/inst/Plots/smoothing_funtion.png b/inst/Plots/smoothing_funtion.png deleted file mode 100644 index 991e71c..0000000 Binary files a/inst/Plots/smoothing_funtion.png and /dev/null differ diff --git a/inst/convert_to_ascii_news.sh b/inst/convert_to_ascii_news.sh deleted file mode 100644 index 368a9b7..0000000 --- a/inst/convert_to_ascii_news.sh +++ /dev/null @@ -1,7 +0,0 @@ -# Part 1: Replaces all level 2 headers and appends a ":" at the end of the line -# Part 2: Indents all bullet points with a whitespace -# Part 3: Removes all level 2 headers -# Part 4: For all level 1 headers, add linebreak and 80 hyphens (not strictly required but clean) -# Part 5: Remove all level 1 headers - -sed -e '/^##/ s/$/:/' -e 's/^*/ */' -e 's/^## *//' -e "/^#/a\\--------------------------------------------------------------------------------" -e 's/^# *//' < NEWS.md > NEWS diff --git a/inst/doc/.gitignore b/inst/doc/.gitignore deleted file mode 100644 index 5bfb386..0000000 --- a/inst/doc/.gitignore +++ /dev/null @@ -1 +0,0 @@ -function.tutorial.html diff --git a/inst/wordpress.Rmd b/inst/wordpress.Rmd deleted file mode 100644 index 3883a19..0000000 --- a/inst/wordpress.Rmd +++ /dev/null @@ -1,168 +0,0 @@ ---- -output: - md_document - ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - # fig.path = "figures/README-", - fig.align = "center", - fig.height = 4, - fig.width = 6, - collapse = TRUE, - comment = "#>" -) -library(ggplot2) -library(cowplot) -``` - -## Load example data - -Data source: `?mgcv::predict.gam` - -```{r, results='hide'} -library(oddsratio) -suppressPackageStartupMessages(library(mgcv)) -set.seed(1234) -n <- 200 -sig <- 2 -dat <- suppressMessages(gamSim(1, n = n, scale = sig)) -dat$x4 <- as.factor(c(rep("A", 50), rep("B", 50), rep("C", 50), rep("D", 50))) - -fit.gam <- mgcv::gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, data = dat) -``` - -## GAM example - -### Calculate OR for specific increment step of continuous variable - -To calculate specific increment steps of `fit.gam`, we take predictor `x2` (randomly chosen) -and specify for which values we want to calculate the odds ratio. -We can see that the odds of response `y` happening are 22 times higher when predictor `x2` increases -from 0.099 to 0.198 while holding all other predictors constant. - - -```{r} -calc.oddsratio.gam(data = dat, model = fit.gam, pred = "x2", - values = c(0.099, 0.198)) -``` - -Usually, this calculation is done by setting all predictors to their mean value, -predict the response, change the desired predictor to a new value and predict the response again. -These actions results in two log odds values, respectively, which are transformed into odds by exponentiating them. Finally, the odds ratio can be calculated from these two odds values. - -### Calculate OR for level change of indicator variable - -If the predictor is a indicator variable, i.e. consists of fixed levels, you can use the function in the same way by just putting in the respective levels you are interested in: - -```{r} -calc.oddsratio.gam(data = dat, model = fit.gam, - pred = "x4", values = c("A", "B")) -``` - -Here, the change in odds of `y` happening if predictor `x4` is changing from level `A` to `B` is rather small. In detail, an increase in odds of 37.8% is reported. - -### Calculate ORs for percentage increments of predictor distribution - -To get an impression of odds ratio behaviour throughout the complete range of the smoothing function of the fitted GAM model, you can calculate odds ratios based on percentage breaks of the predictors distribution. -Here we slice predictor `x2` into 5 parts by taking the predictor values of every 20% increment step. - - -```{r} -calc.oddsratio.gam(data = dat, model = fit.gam, pred = "x2", - percentage = 20, slice = TRUE) -``` - -We can see that there is a high odds ratio reported when increasing predictor `x2` from 0.008 to 0.206 while all further predictor increases decrease the odds of response `y` happening substantially. - -### Plot GAM(M) smoothing functions - -Right now, the only (quick) possibility to plot the smoothing functions of a GAM(M) -was to use the base `plot()` function. The fiddly work to do the same using the `ggplot2` -plotting system is done by `plot_smooth.gam()`: - -```{r} -plot_smooth.gam(fit.gam, pred = "x2", title = "Predictor 'x2'") -``` - -You can further customize the look using other colors or linetypes. - -### Add odds ratio information into smoothing function plot - -So now, we have the odds ratios and we have a plot of the smoothing function. -Why not combine both? We can do so using `add.oddsratio.into.plot()`. Its main arguments -are (i) a `ggplot` plotting object containing the smooth function and a data frame -returned from `calc.oddsratio.gam()` containing information about the predictor and -the respective values we want to insert. - -```{r} -plot.object <- plot_smooth.gam(fit.gam, pred = "x2", title = "Predictor 'x2'") -or.object <- calc.oddsratio.gam(data = dat, model = fit.gam, - pred = "x2", values = c(0.099, 0.198)) - -plot <- add.oddsratio.into.plot(plot.object, or.object, height.or = 5, x.shift = 0.04) -plot -``` - -The odds ratio information is always centered between the two vertical lines. Hence it only -looks nice if the gap between the two chosen values (here 0.099 and 0.198) is large enough. -If the smoothing line crosses your inserted text, you can just correct it adjusting `height.or`. This param sets the y-location of the inserted odds ratio information. - -Depending on the digits of your chosen values (here 3), you might also need to adjust the -x-axis location of the two values so that they do not interfer with the vertical line. - -Let's do all this by inserting another odds ratio into this plot! This time we simply -take the already produced plot as an input to `add.oddsratio.into.plot()` and use a new odds ratio -object: - -```{r} -or.object2 <- calc.oddsratio.gam(data = dat, model = fit.gam, - pred = "x2", values = c(0.4, 0.6)) - -add.oddsratio.into.plot(plot, or.object2, height.or = 2.5, x.shift = 0.024, - col.line = "green4", col.text = "green4") -``` - -Note that I adjusted `x.shift` because we have only one digit this time. Also, -`height.or` was set to a lower value than in the first example to avoid an interference -with the smoothing function. - -## GLM example - -Create example data. -Data source: http://www.ats.ucla.edu/stat/r/dae/logit.htm - -```{r} -dat <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv") -dat$rank <- factor(dat$rank) -fit.glm <- glm(admit ~ gre + gpa + rank, data = dat, family = "binomial") -``` - -### Calculate odds ratio for continuous predictors - -For GLMs, the odds ratio calculation is simpler because odds ratio changes correspond to fixed predictor increases throughout the complete value range of each predictor. - -Hence, function `calc.oddsratio.glm` takes the increment steps of each predictor directly as an input in its parameter `incr`. - -To avoid false predictor/value assignments, the combinations need to be given in a list. - -Odds ratios of indicator variables are computed automatically and do always refer to the base factor level. - -Indicator predictor `rank` has four levels. Subsequently, we will get three odds ratio outputs referring to the base factor level (here: rank1). - -The output is interpreted as follows: "Having `rank2` instead of `rank1` while holding all other values constant results in a decrease in odds of 49.1% (1-0.509)". - -```{r} -calc.oddsratio.glm(data = dat, model = fit.glm, incr = list(gre = 380, gpa = 5)) -``` - -You can also set other confident intervals for GLM(M) models. The resulting data -frame will automatically adjust its column names to the specified level. - -```{r} -calc.oddsratio.glm(data = dat, model = fit.glm, - incr = list(gre = 380, gpa = 5), CI = 0.70) -``` - - diff --git a/inst/wordpress.md b/inst/wordpress.md deleted file mode 100644 index e7c7770..0000000 --- a/inst/wordpress.md +++ /dev/null @@ -1,192 +0,0 @@ -Load example data ------------------ - -Data source: `?mgcv::predict.gam` - - library(oddsratio) - suppressPackageStartupMessages(library(mgcv)) - set.seed(1234) - n <- 200 - sig <- 2 - dat <- suppressMessages(gamSim(1, n = n, scale = sig)) - dat$x4 <- as.factor(c(rep("A", 50), rep("B", 50), rep("C", 50), rep("D", 50))) - - fit.gam <- mgcv::gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, data = dat) - -GAM example ------------ - -### Calculate OR for specific increment step of continuous variable - -To calculate specific increment steps of `fit.gam`, we take predictor -`x2` (randomly chosen) and specify for which values we want to calculate -the odds ratio. -We can see that the odds of response `y` happening are 22 times higher -when predictor `x2` increases from 0.099 to 0.198 while holding all -other predictors constant. - - calc.oddsratio.gam(data = dat, model = fit.gam, pred = "x2", - values = c(0.099, 0.198)) - #> predictor value1 value2 oddsratio CI.low (2.5%) CI.high (97.5%) - #> 1 x2 0.099 0.198 23.32353 23.30424 23.34283 - -Usually, this calculation is done by setting all predictors to their -mean value, predict the response, change the desired predictor to a new -value and predict the response again. These actions results in two log -odds values, respectively, which are transformed into odds by -exponentiating them. Finally, the odds ratio can be calculated from -these two odds values. - -### Calculate OR for level change of indicator variable - -If the predictor is a indicator variable, i.e. consists of fixed levels, -you can use the function in the same way by just putting in the -respective levels you are interested in: - - calc.oddsratio.gam(data = dat, model = fit.gam, - pred = "x4", values = c("A", "B")) - #> predictor value1 value2 oddsratio CI.low (2.5%) CI.high (97.5%) - #> 1 x4 A B 1.377537 1.334837 1.421604 - -Here, the change in odds of `y` happening if predictor `x4` is changing -from level `A` to `B` is rather small. In detail, an increase in odds of -37.8% is reported. - -### Calculate ORs for percentage increments of predictor distribution - -To get an impression of odds ratio behaviour throughout the complete -range of the smoothing function of the fitted GAM model, you can -calculate odds ratios based on percentage breaks of the predictors -distribution. -Here we slice predictor `x2` into 5 parts by taking the predictor values -of every 20% increment step. - - calc.oddsratio.gam(data = dat, model = fit.gam, pred = "x2", - percentage = 20, slice = TRUE) - #> predictor value1 value2 perc1 perc2 oddsratio CI.low (2.5%) - #> 1 x2 0.001 0.200 0 20 2.510768e+03 1.091683e+03 - #> 2 x2 0.200 0.400 20 40 2.870699e-02 2.621879e-02 - #> 3 x2 0.400 0.599 40 60 5.761210e-01 5.556941e-01 - #> 4 x2 0.599 0.799 60 80 6.032289e-02 5.789875e-02 - #> 5 x2 0.799 0.998 80 100 4.063187e-01 7.469151e-01 - #> CI.high (97.5%) - #> 1 5.774533e+03 - #> 2 3.143133e-02 - #> 3 5.972988e-01 - #> 4 6.284853e-02 - #> 5 2.210357e-01 - -We can see that there is a high odds ratio reported when increasing -predictor `x2` from 0.008 to 0.206 while all further predictor increases -decrease the odds of response `y` happening substantially. - -### Plot GAM(M) smoothing functions - -Right now, the only (quick) possibility to plot the smoothing functions -of a GAM(M) was to use the base `plot()` function. The fiddly work to do -the same using the `ggplot2` plotting system is done by -`plot_smooth.gam()`: - - plot_smooth.gam(fit.gam, pred = "x2", title = "Predictor 'x2'") - - - -You can further customize the look using other colors or linetypes. - -### Add odds ratio information into smoothing function plot - -So now, we have the odds ratios and we have a plot of the smoothing -function. Why not combine both? We can do so using -`add.oddsratio.into.plot()`. Its main arguments are (i) a `ggplot` -plotting object containing the smooth function and a data frame returned -from `calc.oddsratio.gam()` containing information about the predictor -and the respective values we want to insert. - - plot.object <- plot_smooth.gam(fit.gam, pred = "x2", title = "Predictor 'x2'") - or.object <- calc.oddsratio.gam(data = dat, model = fit.gam, - pred = "x2", values = c(0.099, 0.198)) - - plot <- add.oddsratio.into.plot(plot.object, or.object, height.or = 5, x.shift = 0.04) - plot - - - -The odds ratio information is always centered between the two vertical -lines. Hence it only looks nice if the gap between the two chosen values -(here 0.099 and 0.198) is large enough. If the smoothing line crosses -your inserted text, you can just correct it adjusting `height.or`. This -param sets the y-location of the inserted odds ratio information. - -Depending on the digits of your chosen values (here 3), you might also -need to adjust the x-axis location of the two values so that they do not -interfer with the vertical line. - -Let's do all this by inserting another odds ratio into this plot! This -time we simply take the already produced plot as an input to -`add.oddsratio.into.plot()` and use a new odds ratio object: - - or.object2 <- calc.oddsratio.gam(data = dat, model = fit.gam, - pred = "x2", values = c(0.4, 0.6)) - - add.oddsratio.into.plot(plot, or.object2, height.or = 2.5, x.shift = 0.024, - col.line = "green4", col.text = "green4") - - - -Note that I adjusted `x.shift` because we have only one digit this time. -Also, `height.or` was set to a lower value than in the first example to -avoid an interference with the smoothing function. - -GLM example ------------ - -Create example data. -Data source: - - dat <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv") - dat$rank <- factor(dat$rank) - fit.glm <- glm(admit ~ gre + gpa + rank, data = dat, family = "binomial") - -### Calculate odds ratio for continuous predictors - -For GLMs, the odds ratio calculation is simpler because odds ratio -changes correspond to fixed predictor increases throughout the complete -value range of each predictor. - -Hence, function `calc.oddsratio.glm` takes the increment steps of each -predictor directly as an input in its parameter `incr`. - -To avoid false predictor/value assignments, the combinations need to be -given in a list. - -Odds ratios of indicator variables are computed automatically and do -always refer to the base factor level. - -Indicator predictor `rank` has four levels. Subsequently, we will get -three odds ratio outputs referring to the base factor level (here: -rank1). - -The output is interpreted as follows: "Having `rank2` instead of `rank1` -while holding all other values constant results in a decrease in odds of -49.1% (1-0.509)". - - calc.oddsratio.glm(data = dat, model = fit.glm, incr = list(gre = 380, gpa = 5)) - #> predictor oddsratio CI.low (2.5 %) CI.high (97.5 %) increment - #> 1 gre 2.364 1.054 5.396 380 - #> 2 gpa 55.712 2.229 1511.282 5 - #> 3 rank2 0.509 0.272 0.945 Indicator variable - #> 4 rank3 0.262 0.132 0.512 Indicator variable - #> 5 rank4 0.212 0.091 0.471 Indicator variable - -You can also set other confident intervals for GLM(M) models. The -resulting data frame will automatically adjust its column names to the -specified level. - - calc.oddsratio.glm(data = dat, model = fit.glm, - incr = list(gre = 380, gpa = 5), CI = 0.70) - #> predictor oddsratio CI.low (15 %) CI.high (85 %) increment - #> 1 gre 2.364 1.540 3.647 380 - #> 2 gpa 55.712 10.084 314.933 5 - #> 3 rank2 0.509 0.366 0.706 Indicator variable - #> 4 rank3 0.262 0.183 0.374 Indicator variable - #> 5 rank4 0.212 0.136 0.325 Indicator variable diff --git a/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-6-1.png b/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-6-1.png deleted file mode 100644 index 2c52e20..0000000 Binary files a/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-6-1.png and /dev/null differ diff --git a/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-7-1.png b/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-7-1.png deleted file mode 100644 index 2442780..0000000 Binary files a/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-7-1.png and /dev/null differ diff --git a/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-8-1.png b/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-8-1.png deleted file mode 100644 index 9dd9fc0..0000000 Binary files a/inst/wordpress_files/figure-markdown_strict/unnamed-chunk-8-1.png and /dev/null differ diff --git a/man/data_gam.Rd b/man/data_gam.Rd index 1defde5..ebffb86 100644 --- a/man/data_gam.Rd +++ b/man/data_gam.Rd @@ -4,7 +4,7 @@ \name{data_gam} \alias{data_gam} \title{data_gam} -\format{a \code{data.frame} randomly created numerical and non-numerical variables} +\format{a \link{tibble} randomly created numerical and non-numerical variables} \source{ Taken from ?mgcv::gam and added variable "x4" } diff --git a/man/data_glm.Rd b/man/data_glm.Rd index 91c0f1b..f2a7566 100644 --- a/man/data_glm.Rd +++ b/man/data_glm.Rd @@ -4,7 +4,7 @@ \name{data_glm} \alias{data_glm} \title{data_glm} -\format{a \code{data.frame} randomly created numerical and non-numerical variables} +\format{a \link{tibble} randomly created numerical and non-numerical variables} \source{ Taken from \url{http://www.ats.ucla.edu/stat/r/dae/logit.htm}, direct download link: \url{http://www.ats.ucla.edu/stat/data/binary.csv} diff --git a/man/gam_to_df.Rd b/man/gam_to_df.Rd index eb00f95..90d67e7 100644 --- a/man/gam_to_df.Rd +++ b/man/gam_to_df.Rd @@ -9,8 +9,7 @@ gam_to_df(model = NULL, pred = NULL) \arguments{ \item{model}{A fitted GAM(M).} -\item{pred}{Character. Predictor name for which to calculate -the odds ratio.} +\item{pred}{Character. Predictor name for which to calculate the odds ratio.} } \description{ This function converts a fitted GAM model into a tidy data frame @@ -27,10 +26,12 @@ library(mgcv) n <- 200 sig <- 2 dat <- gamSim(1, n = n, scale = sig, verbose = FALSE) -dat$x4 <- as.factor(c(rep("A", 50), rep("B", 50), rep("C", 50), - rep("D", 50))) +dat$x4 <- as.factor(c( + rep("A", 50), rep("B", 50), rep("C", 50), + rep("D", 50) +)) fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + - offset(x3) + x4, data = dat) # fit model + offset(x3) + x4, data = dat) # fit model tmp <- gam_to_df(fit_gam, "x2") } diff --git a/man/insert_or.Rd b/man/insert_or.Rd index a014420..51f0c7a 100644 --- a/man/insert_or.Rd +++ b/man/insert_or.Rd @@ -5,17 +5,17 @@ \title{Insert odds ratios of GAM(M)s into smoothing function} \usage{ insert_or(plot_object = NULL, or_object = NULL, line_col = "red", - line_size = 1.2, line_type = "solid", line_alpha = 1, text_alpha = 1, - text_size = 4, text_col = "black", rect_alpha = 0.5, rect_col = NULL, - rect = FALSE, arrow = TRUE, values = TRUE, values_yloc = 0, - values_xloc = NULL, or_yloc = 0, arrow_length = NULL, - arrow_yloc = NULL, arrow_col = NULL, arrow_xloc_r = NULL, - arrow_xloc_l = NULL) + line_size = 1.2, line_type = "solid", line_alpha = 1, + text_alpha = 1, text_size = 4, text_col = "black", + rect_alpha = 0.5, rect_col = NULL, rect = FALSE, arrow = TRUE, + values = TRUE, values_yloc = 0, values_xloc = NULL, or_yloc = 0, + arrow_length = NULL, arrow_yloc = NULL, arrow_col = NULL, + arrow_xloc_r = NULL, arrow_xloc_l = NULL) } \arguments{ \item{plot_object}{A \code{ggplot} object from \link{plot_gam}.} -\item{or_object}{A returned data.frame from \link{or_gam}.} +\item{or_object}{A \link{tibble} as returned from \link{or_gam}.} \item{line_col, line_alpha, line_type, line_size}{Aesthetics of vertical lines.} @@ -67,31 +67,39 @@ If you want to insert multiple odds ratio you have to do it iteratively. # load data (Source: ?mgcv::gam) and fit model library(mgcv) fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + - offset(x3) + x4, data = data_gam) # fit model + offset(x3) + x4, data = data_gam) # fit model # create input objects (plot + odds ratios) library(oddsratio) plot_object <- plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -or_object1 <- or_gam(data = data_gam, model = fit_gam, - pred = "x2", values = c(0.099, 0.198)) +or_object1 <- or_gam( + data = data_gam, model = fit_gam, + pred = "x2", values = c(0.099, 0.198) +) # insert first odds ratios to plot -plot_object <- insert_or(plot_object, or_object1, or_yloc = 3, - values_xloc = 0.04, line_size = 0.5, - line_type = "dotdash", text_size = 6, - values_yloc = 0.5, arrow_col = "red") +plot_object <- insert_or(plot_object, or_object1, + or_yloc = 3, + values_xloc = 0.04, line_size = 0.5, + line_type = "dotdash", text_size = 6, + values_yloc = 0.5, arrow_col = "red" +) # calculate second odds ratio -or_object2 <- or_gam(data = data_gam, model = fit_gam, pred = "x2", - values = c(0.4, 0.6)) +or_object2 <- or_gam( + data = data_gam, model = fit_gam, pred = "x2", + values = c(0.4, 0.6) +) # add or_object2 into plot -insert_or(plot_object, or_object2, or_yloc = 2.1, values_yloc = 2, - line_col = "green4", text_col = "black", - rect_col = "green4", rect_alpha = 0.2, - line_alpha = 1, line_type = "dashed", - arrow_xloc_r = 0.01, arrow_xloc_l = -0.01, - arrow_length = 0.01, rect = TRUE) +insert_or(plot_object, or_object2, + or_yloc = 2.1, values_yloc = 2, + line_col = "green4", text_col = "black", + rect_col = "green4", rect_alpha = 0.2, + line_alpha = 1, line_type = "dashed", + arrow_xloc_r = 0.01, arrow_xloc_l = -0.01, + arrow_length = 0.01, rect = TRUE +) } \seealso{ \link{plot_gam} diff --git a/man/no_plot.Rd b/man/no_plot.Rd index 572e783..d2a10cf 100644 --- a/man/no_plot.Rd +++ b/man/no_plot.Rd @@ -23,10 +23,12 @@ library(mgcv) n <- 200 sig <- 2 dat <- gamSim(1, n = n, scale = sig, verbose = FALSE) -dat$x4 <- as.factor(c(rep("A", 50), rep("B", 50), rep("C", 50), - rep("D", 50))) +dat$x4 <- as.factor(c( + rep("A", 50), rep("B", 50), rep("C", 50), + rep("D", 50) +)) fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + - offset(x3) + x4, data = dat) # fit model + offset(x3) + x4, data = dat) # fit model tmp <- plot(fit_gam, pages = 1) # plot output tmp <- no_plot(fit_gam) # no plot output diff --git a/man/or_gam.Rd b/man/or_gam.Rd index 183291b..745a43d 100644 --- a/man/or_gam.Rd +++ b/man/or_gam.Rd @@ -12,30 +12,27 @@ or_gam(data = NULL, model = NULL, pred = NULL, values = NULL, \item{model}{A fitted GAM(M).} -\item{pred}{Character. Predictor name for which to calculate -the odds ratio.} +\item{pred}{Character. Predictor name for which to calculate the odds ratio.} -\item{values}{Numeric vector of length two. -Predictor values to estimate odds ratio from. Function is written to use the -first provided value as the "lower" one, i.e. calculating the odds ratio -'from value1 to value2'. Only used if \code{slice = FALSE}.} +\item{values}{Numeric vector of length two. Predictor values to estimate odds +ratio from. Function is written to use the first provided value as the +"lower" one, i.e. calculating the odds ratio 'from value1 to value2'. Only +used if \code{slice = FALSE}.} -\item{percentage}{Numeric. Percentage number to split the -predictor distribution into. -A value of 10 would split the predictor distribution by 10\% intervals. -Only needed if \code{slice = TRUE}.} +\item{percentage}{Numeric. Percentage number to split the predictor +distribution into. A value of 10 would split the predictor distribution by +10\% intervals. Only needed if \code{slice = TRUE}.} -\item{slice}{Logical. \code{Default = FALSE}. Whether to calculate -odds ratios for fixed increment steps over the whole predictor distribution. -See \code{percentage} for setting the increment values.} +\item{slice}{Logical. \code{Default = FALSE}. Whether to calculate odds ratios for +fixed increment steps over the whole predictor distribution. See +\code{percentage} for setting the increment values.} -\item{CI}{Numeric. Currently fixed to 95\% confidence interval level -(2.5\% - 97.5\%). -It should not be changed in a function call!} +\item{CI}{Numeric. Currently fixed to 95\% confidence interval level (2.5\% - +97.5\%). It should not be changed in a function call!} } \value{ -A data frame with (up to) eight columns. \code{perc1} and \code{perc2} -are only returned if \code{slice = TRUE}: +A data frame with (up to) eight columns. \code{perc1} and \code{perc2} are only +returned if \code{slice = TRUE}: \item{predictor}{Predictor name} \item{value1}{First value of odds ratio calculation} \item{value2}{Second value of odds ratio calculation} @@ -47,37 +44,39 @@ are only returned if \code{slice = TRUE}: } \description{ This function calculates odds ratio(s) for specific increment -steps of a GAM(M)s. - -Odds ratios can also be calculated for continuous percentage -increment steps across the whole predictor distribution using \code{slice = TRUE}. +steps of a GAM(M)s. Odds ratios can also be calculated for continuous +percentage increment steps across the whole predictor distribution using +\code{slice = TRUE}. } \details{ -Currently supported functions: \link[mgcv:gam]{mgcv::gam}, -\link[mgcv:gamm]{mgcv::gamm}, \link[gam:gam]{gam::gam}. - -For \link[mgcv:gamm]{mgcv::gamm}, the \code{model} input of -\link{or_gam} needs to be the \code{gam} output (e.g. \code{fit_gam$gam}). +Currently supported functions: \link[mgcv:gam]{mgcv::gam}, \link[mgcv:gamm]{mgcv::gamm}, +\link[gam:gam]{gam::gam}. For \link[mgcv:gamm]{mgcv::gamm}, the \code{model} input of \link{or_gam} needs to be the +\code{gam} output (e.g. \code{fit_gam$gam}). } \examples{ # load data (Source: ?mgcv::gam) and fit model library(mgcv) fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + - offset(x3) + x4, data = data_gam) # fit model + offset(x3) + x4, data = data_gam) # fit model # Calculate OR for specific increment step of continuous variable -or_gam(data = data_gam, model = fit_gam, pred = "x2", - values = c(0.099, 0.198)) +or_gam( + data = data_gam, model = fit_gam, pred = "x2", + values = c(0.099, 0.198) +) ## Calculate OR for change of indicator variable -or_gam(data = data_gam, model = fit_gam, pred = "x4", - values = c("B", "D")) +or_gam( + data = data_gam, model = fit_gam, pred = "x4", + values = c("B", "D") +) ## Calculate ORs for percentage increments of predictor distribution ## (here: 20\%) -or_gam(data = data_gam, model = fit_gam, pred = "x2", - percentage = 20, slice = TRUE) - +or_gam( + data = data_gam, model = fit_gam, pred = "x2", + percentage = 20, slice = TRUE +) } \seealso{ \link{or_glm} diff --git a/man/or_glm.Rd b/man/or_glm.Rd index 9ea95e6..837f1a3 100644 --- a/man/or_glm.Rd +++ b/man/or_glm.Rd @@ -13,20 +13,18 @@ or_glm(data, model, incr, CI = 0.95) \item{incr}{List. Increment values of each predictor.} -\item{CI}{numeric. Which confident interval to calculate. -Must be between 0 and 1. Default to 0.95} +\item{CI}{numeric. Which confident interval to calculate. Must be between 0 +and 1. Default to 0.95} } \value{ -A data frame with five columns: -\item{predictor}{Predictor name(s)} -\item{oddsratio}{Calculated odds ratio(s)} -\item{CI_low}{Lower confident interval of odds ratio} -\item{CI_high}{Higher confident interval of odds ratio} -\item{increment}{Increment of the predictor(s)} +A data frame with five columns: \item{predictor}{Predictor name(s)} +\item{oddsratio}{Calculated odds ratio(s)} \item{CI_low}{Lower confident +interval of odds ratio} \item{CI_high}{Higher confident interval of odds +ratio} \item{increment}{Increment of the predictor(s)} } \description{ -This function calculates odds ratio(s) for specific -increment steps of GLMs. +This function calculates odds ratio(s) for specific increment +steps of GLMs. } \details{ \code{CI_low} and \code{CI_high} are only calculated for GLM models because @@ -40,27 +38,32 @@ Currently supported functions: \link{glm}, ## Example with glm() # load data (source: http://www.ats.ucla.edu/stat/r/dae/logit.htm) and # fit model -fit_glm <- glm(admit ~ gre + gpa + rank, data = data_glm, - family = "binomial") # fit model +fit_glm <- glm(admit ~ gre + gpa + rank, + data = data_glm, + family = "binomial" +) # fit model # Calculate OR for specific increment step of continuous variable or_glm(data = data_glm, model = fit_glm, incr = list(gre = 380, gpa = 5)) # Calculate OR and change the confidence interval level -or_glm(data = data_glm, model = fit_glm, - incr = list(gre = 380, gpa = 5), CI = .70) +or_glm( + data = data_glm, model = fit_glm, + incr = list(gre = 380, gpa = 5), CI = .70 +) ## Example with MASS:glmmPQL() # load data library(MASS) data(bacteria) -fit_glmmPQL <- glmmPQL(y ~ trt + week, random = ~1 | ID, - family = binomial, data = bacteria, - verbose = FALSE) +fit_glmmPQL <- glmmPQL(y ~ trt + week, + random = ~ 1 | ID, + family = binomial, data = bacteria, + verbose = FALSE +) # Apply function or_glm(data = bacteria, model = fit_glmmPQL, incr = list(week = 5)) - } \seealso{ \link{or_gam} diff --git a/man/plot_gam.Rd b/man/plot_gam.Rd index f56f002..d648b6a 100644 --- a/man/plot_gam.Rd +++ b/man/plot_gam.Rd @@ -6,8 +6,9 @@ \usage{ plot_gam(model = NULL, pred = NULL, col_line = "blue", ci_line_col = "black", ci_line_type = "dashed", ci_fill = "grey", - ci_alpha = 0.4, ci_line_size = 0.8, sm_fun_size = 1.1, title = NULL, - xlab = NULL, ylab = NULL, limits_y = NULL, breaks_y = NULL) + ci_alpha = 0.4, ci_line_size = 0.8, sm_fun_size = 1.1, + title = NULL, xlab = NULL, ylab = NULL, limits_y = NULL, + breaks_y = NULL) } \arguments{ \item{model}{A fitted model of class \code{gam}.} @@ -50,11 +51,11 @@ using the \code{ggplot2} plotting system. # load data (Source: ?mgcv::gam) and fit model library(mgcv) fit_gam <- mgcv::gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, - data = data_gam) + data = data_gam +) library(oddsratio) plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") - } \seealso{ \link{plot_gam} diff --git a/oddsratio.Rproj b/oddsratio.Rproj index cba1b6b..7be91f5 100644 --- a/oddsratio.Rproj +++ b/oddsratio.Rproj @@ -18,4 +18,5 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageCheckArgs: --as-cran --no-vignettes PackageRoxygenize: rd,collate,namespace diff --git a/tests/testthat/test-insert-or.R b/tests/testthat/test-insert-or.R index aefb42b..4c8692a 100644 --- a/tests/testthat/test-insert-or.R +++ b/tests/testthat/test-insert-or.R @@ -2,8 +2,9 @@ context("insert_or") test_that("check bevhaviour of ggplot_build (changed in ggplot2 v2.2)", { data("data_gam") - fit_gam <- gam(y ~ s(x0) + s(I(x1 ^ 2)) + s(x2) + offset(x3) + x4, - data = data_gam) + fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, + data = data_gam + ) library(oddsratio) plot_object <- plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") @@ -15,7 +16,7 @@ test_that("check bevhaviour of ggplot_build (changed in ggplot2 v2.2)", { # ggplot2 v2.2.x # ymin <- ggplot_build(plot_object)$layout$panel_ranges[[1]]$y.range[1] # ymax <- ggplot_build(plot_object)$layout$panel_ranges[[1]]$y.range[2] - + # ggplot2 v2.3.x library(ggplot2) ymin <- ggplot_build(plot_object)$layout$panel_params[[1]]$y.range[1] @@ -24,6 +25,5 @@ test_that("check bevhaviour of ggplot_build (changed in ggplot2 v2.2)", { expect_equal(class(ymin), "numeric") expect_equal(class(ymax), "numeric") expect_equal(length(ggplot_build(plot_object)$layout$panel_params[[1]]$ - y.range), 2) - + y.range), 2) }) diff --git a/tests/testthat/test-or-gam.R b/tests/testthat/test-or-gam.R index ec307bc..cfa74ec 100644 --- a/tests/testthat/test-or-gam.R +++ b/tests/testthat/test-or-gam.R @@ -3,50 +3,54 @@ context("or_gam") library(mgcv) test_that("or_gam works for continuous variable", { - data("data_gam") library(mgcv) - fit_gam <- gam(y ~ s(x0) + s(I(x1 ^ 2)) + s(x2) + - offset(x3) + x4, data = data_gam) # fit model + fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + + offset(x3) + x4, data = data_gam) # fit model # Calculate OR for specific increment step of continuous variable - out <- or_gam(data = data_gam, model = fit_gam, pred = "x2", - values = c(0.099, 0.198)) + out <- or_gam( + data = data_gam, model = fit_gam, pred = "x2", + values = c(0.099, 0.198) + ) expect_length(out, 6) }) test_that("or_gam works with indicator variables", { - data("data_gam") library(mgcv) - fit_gam <- gam(y ~ s(x0) + s(I(x1 ^ 2)) + s(x2) + - offset(x3) + x4, data = data_gam) # fit model + fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + + offset(x3) + x4, data = data_gam) # fit model ## Calculate OR for change of indicator variable - out <- or_gam(data = data_gam, model = fit_gam, pred = "x4", - values = c("B", "D")) + out <- or_gam( + data = data_gam, model = fit_gam, pred = "x4", + values = c("B", "D") + ) expect_length(out, 6) ## Calculate ORs for percentage increments of predictor distribution ## (here: 20%) - or_gam(data = data_gam, model = fit_gam, pred = "x2", - percentage = 20, slice = TRUE) - + or_gam( + data = data_gam, model = fit_gam, pred = "x2", + percentage = 20, slice = TRUE + ) }) test_that("or_gam works on percentage increments", { - data("data_gam") library(mgcv) - fit_gam <- gam(y ~ s(x0) + s(I(x1 ^ 2)) + s(x2) + - offset(x3) + x4, data = data_gam) # fit model + fit_gam <- gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + + offset(x3) + x4, data = data_gam) # fit model ## Calculate ORs for percentage increments of predictor distribution ## (here: 20%) - out <- or_gam(data = data_gam, model = fit_gam, pred = "x2", - percentage = 20, slice = TRUE) + out <- or_gam( + data = data_gam, model = fit_gam, pred = "x2", + percentage = 20, slice = TRUE + ) expect_length(out, 8) }) diff --git a/tests/testthat/test-or-glm.R b/tests/testthat/test-or-glm.R index eebb5d7..dbeef42 100644 --- a/tests/testthat/test-or-glm.R +++ b/tests/testthat/test-or-glm.R @@ -1,6 +1,7 @@ context("or_glm") -pacman::p_load(mgcv, MASS) +library("mgcv") +library("MASS") test_that("correct level count of indicator variable for glm", { data("data_glm") @@ -9,18 +10,17 @@ test_that("correct level count of indicator variable for glm", { out <- or_glm(data = data_glm, model = fit_glm) expect_length(out$predictor, length(levels(data_glm$rank)) - 1) - }) test_that("or_glm works with glmmPQL", { - data(bacteria) - fit_glmmpql <- glmmPQL(y ~ trt + week, random = ~1 | ID, - family = binomial, data = bacteria, - verbose = FALSE) + fit_glmmpql <- glmmPQL(y ~ trt + week, + random = ~ 1 | ID, + family = binomial, data = bacteria, + verbose = FALSE + ) # Apply function out <- or_glm(data = bacteria, model = fit_glmmpql, incr = list(week = 5)) expect_length(out, 5) - }) diff --git a/tic.R b/tic.R index 8141ed4..7b28586 100644 --- a/tic.R +++ b/tic.R @@ -1,28 +1,5 @@ -if (Sys.getenv("NB") != "w/ covr" | Sys.getenv("NB") != "w/ lintr") { - add_package_checks() -} - -if (Sys.getenv("id_rsa") != "") { - - get_stage("before_deploy") %>% - add_step(step_setup_ssh()) - - get_stage("deploy") %>% - add_step(step_build_pkgdown()) %>% - add_step(step_push_deploy(path = "docs", branch = "gh-pages")) - -} - -if (Sys.getenv("NB") == "w/ covr" | Sys.getenv("NB") == "w/ lintr") { - - if (Sys.getenv("NB") == "w/ lintr") { +do_package_checks(error_on = "error") - get_stage("after_success") %>% - step_run_code(lintr::lint_package(linters = with_defaults(commented_code_linter = NULL, - closed_curly_linter = closed_curly_linter(allow_single_line = TRUE), - open_curly_linter = open_curly_linter(allow_single_line = TRUE)))) - } else { - get_stage("after_success") %>% - step_run_code(covr::codecov(quiet = FALSE)) - } +if (ci_has_env("BUILD_PKGDOWN")) { + do_pkgdown(orphan = TRUE) } diff --git a/vignettes/function_tutorial.Rmd b/vignettes/function_tutorial.Rmd deleted file mode 100644 index 2039714..0000000 --- a/vignettes/function_tutorial.Rmd +++ /dev/null @@ -1,173 +0,0 @@ ---- -title: "Function tutorial" -author: "Patrick Schratz" -date: "`r Sys.Date()`" -output: - rmarkdown::html_vignette: - toc: TRUE -vignette: > - %\VignetteIndexEntry{function-tutorial} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - # fig.path = "figures/README-", - fig.align = "center", - fig.height = 4, - fig.width = 6, - collapse = TRUE, - comment = "#>" -) -library(ggplot2) -library(cowplot) -``` - -## Load example data & fit model - -Data source: `?mgcv::predict.gam` - -```{r, results='hide'} -library(oddsratio) - -fit_gam <- mgcv::gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, - data = data_gam) -``` - -## GAM example - -### Calculate OR for specific increment step of continuous variable - -To calculate specific increment steps of `fit_gam`, we take predictor `x2` (randomly chosen) -and specify for which values we want to calculate the odds ratio. -We can see that the odds of response `y` happening are 22 times higher when predictor `x2` increases -from 0.099 to 0.198 while holding all other predictors constant. - - -```{r} -or_gam(data = data_gam, model = fit_gam, pred = "x2", - values = c(0.099, 0.198)) -``` - -Usually, this calculation is done by setting all predictors to their mean value, -predict the response, change the desired predictor to a new value and predict the response again. -These actions results in two log odds values, respectively, which are transformed into odds by exponentiating them. Finally, the odds ratio can be calculated from these two odds values. - -### Calculate OR for level change of indicator variable - -If the predictor is an indicator variable, i.e. consists of fixed levels, you can use the function in the same way by just putting in the respective levels you are interested in: - -```{r} -or_gam(data = data_gam, model = fit_gam, - pred = "x4", values = c("A", "B")) -``` - -Here, the change in odds of `y` happening if predictor `x4` is changing from level `A` to `B` is rather small. In detail, an increase in odds of 37.8% is reported. - -### Calculate ORs for percentage increments of predictor distribution - -To get an impression of odds ratio behaviour throughout the complete range of the smoothing function of the fitted GAM model, you can calculate odds ratios based on percentage breaks of the predictors distribution. -Here we slice predictor `x2` into 5 parts by taking the predictor values of every 20% increment step. - - -```{r} -or_gam(data = data_gam, model = fit_gam, pred = "x2", - percentage = 20, slice = TRUE) -``` - -We can see that there is a high odds ratio reported when increasing predictor `x2` from 0.008 to 0.206 while all further predictor increases decrease the odds of response `y` happening substantially. - -### Plot GAM(M) smoothing functions - -Right now, the only (quick) possibility to plot the smoothing functions of a GAM(M) -was to use the base `plot()` function. The fiddly work to do the same using the `ggplot2` -plotting system is done by `plot_gam()`: - -```{r} -plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -``` - -You can further customize the look using other colors or linetypes. - -### Add odds ratio information into smoothing function plot - -So now, we have the odds ratios and we have a plot of the smoothing function. -Why not combine both? We can do so using `insert_or()`. Its main arguments -are (i) a `ggplot` plotting object containing the smooth function and a data frame -returned from `or_gam()` containing information about the predictor and -the respective values we want to insert. - -```{r} -plot_object <- plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") -or_object <- or_gam(data = data_gam, model = fit_gam, - pred = "x2", values = c(0.099, 0.198)) - -plot <- insert_or(plot_object, or_object, or_yloc = 3, - values_xloc = 0.05, arrow_length = 0.02, - arrow_col = "red") -plot -``` - -The odds ratio information is always centered between the two vertical lines. Hence it only -looks nice if the gap between the two chosen values (here 0.099 and 0.198) is large enough. -If the smoothing line crosses your inserted text, you can just correct it adjusting `or_yloc`. This param sets the y-location of the inserted odds ratio information. - -Depending on the digits of your chosen values (here 3), you might also need to adjust the -x-axis location of the two values so that they do not interfer with the vertical line. - -Let's do all this by inserting another odds ratio into this plot! This time we simply -take the already produced plot as an input to `insert_or()` and use a new odds ratio -object: - -```{r} -or_object2 <- or_gam(data = data_gam, model = fit_gam, - pred = "x2", values = c(0.4, 0.6)) - -insert_or(plot, or_object2, or_yloc = 2.1, values_yloc = 2, - line_col = "green4", text_col = "black", - rect_col = "green4", rect_alpha = 0.2, - line_alpha = 1, line_type = "dashed", - arrow_xloc_r = 0.01, arrow_xloc_l = -0.01, - arrow_length = 0.02, rect = TRUE) -``` - -Using `rect = TRUE`, you can additionally highlight certain odds ratio intervals. -Aesthetics like opacity or color are fully customizable. - -## GLM example - -Fit model. -Data source: https://stats.idre.ucla.edu/stat/data/binary.csv - -```{r} -fit_glm <- glm(admit ~ gre + gpa + rank, data = data_glm, family = "binomial") -``` - -### Calculate odds ratio for continuous predictors - -For GLMs, the odds ratio calculation is simpler because odds ratio changes correspond to fixed predictor increases throughout the complete value range of each predictor. - -Hence, function `or_glm` takes the increment steps of each predictor directly as an input in its parameter `incr`. - -To avoid false predictor/value assignments, the combinations need to be given in a list. - -Odds ratios of indicator variables are computed automatically and do always refer to the base factor level. - -Indicator predictor `rank` has four levels. Subsequently, we will get three odds ratio outputs referring to the base factor level (here: rank1). - -The output is interpreted as follows: "Having `rank2` instead of `rank1` while holding all other values constant results in a decrease in odds of 49.1% (1-0.509)". - -```{r} -or_glm(data = data_glm, model = fit_glm, incr = list(gre = 380, gpa = 5)) -``` - -You can also set other confident intervals for GLM(M) models. The resulting data -frame will automatically adjust its column names to the specified level. - -```{r} -or_glm(data = data_glm, model = fit_glm, - incr = list(gre = 380, gpa = 5), CI = 0.70) -``` - - diff --git a/vignettes/oddsratio.Rmd b/vignettes/oddsratio.Rmd new file mode 100644 index 0000000..dbb4397 --- /dev/null +++ b/vignettes/oddsratio.Rmd @@ -0,0 +1,180 @@ +--- +title: "oddsratio tutorial" +author: "Patrick Schratz" +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: TRUE +vignette: > + %\VignetteIndexEntry{function-tutorial} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + # fig.path = "figures/README-", + fig.align = "center", + fig.height = 4, + fig.width = 6, + collapse = TRUE, + comment = "#>" +) +library("ggplot2") +library("cowplot") +``` + +## Load example data & fit GAM model + +Data source: `?mgcv::predict.gam` + +First, fit a simple GAM model. + +```{r, results='hide'} +library("oddsratio") + +fit_gam <- mgcv::gam(y ~ s(x0) + s(I(x1^2)) + s(x2) + offset(x3) + x4, + data = data_gam) +``` + +## GAM example + +### Calculating odds ratio for specific increment steps of a continuous variable + +In this example we take predictor `x2` (randomly chosen). +We need to define start and stop values via argument `values`. + +```{r} +or_gam(data = data_gam, model = fit_gam, pred = "x2", + values = c(0.099, 0.198)) +``` + +In the plot it turns out that the odds of response `y` happening are 22 times higher when predictor `x2` increases from 0.099 to 0.198 while holding all other predictors constant. + +What is going on behind the scenes? +Usually, this calculation is done by + +1. setting all predictors to their mean value, +1. predict the response, +1. change the desired predictor to a new value and predict the response again. +These steps result in two "log odds" values, which are transformed into "odds" afterwards. +Finally, the odds ratio can be calculated from these two odds values. + +### Calculating odds ratio for a level change of a nominal variable + +If the predictor is an indicator/nominal/factor variable (i.e. consists of fixed levels) you can use the function in the same way by just putting in the respective factor levels: + +```{r} +or_gam(data = data_gam, model = fit_gam, + pred = "x4", values = c("A", "B")) +``` + +Here, the change in odds of `y` happening if predictor `x4` is changing from level `A` to `B` is rather small. +In detail, an increase in odds of 37.8% is reported. + +### Calculating odds ratio for percentage increments of a continuous predictor + +To get an impression of odds ratio changes throughout the complete range of the smoothing function of the fitted GAM model for a specific predictor, you can calculate odds ratios based on percentage breaks of the predictors distribution. +Here we slice predictor `x2` into 5 parts by taking the predictor values of every 20% increment step. + +```{r} +or_gam(data = data_gam, model = fit_gam, pred = "x2", + percentage = 20, slice = TRUE) +``` + +We can see that there is a high odds ratio reported when increasing predictor `x2` from 0.008 to 0.206 while all further predictor increases decrease the odds of response `y` happening substantially. + +### Plot GAM(M) smoothing functions + +Plotting smoother functions of GAM models is not very well supported in R. +`plot_gam()` helps to simplify things: + +```{r} +plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") + + theme_cowplot() +``` + +You can further customize the look using other colors or line types. +I highly recommend the themes from the [cowplot](https://github.com/wilkelab/cowplot) in combination with `oddsratio`. +However, you are of course free to go ahead with any other theme you prefer. + +### Add odds ratio information into smoothing function plot + +So now, we computed the odds ratios and created a plot of a GAM smoothing function. +Why not combine both? +This is what `insert_or()` is for. +Its main arguments are + +- `ggplot` plotting object containing the smooth function and +- a data frame returned from `or_gam()` containing information about the predictor and the respective values that should be inserted. + +```{r} +plot_object <- plot_gam(fit_gam, pred = "x2", title = "Predictor 'x2'") +or_object <- or_gam(data = data_gam, model = fit_gam, + pred = "x2", values = c(0.099, 0.198)) + +plot <- insert_or(plot_object, or_object, or_yloc = 3, + values_xloc = 0.05, arrow_length = 0.02, + arrow_col = "red") +plot + + theme_cowplot() +``` + +The odds ratio information is always centered between the two vertical lines. Hence it only looks nice if the gap between the two chosen values (here 0.099 and 0.198) is large enough. +If the smoothing line crosses your inserted text, you can just correct it adjusting `or_yloc`. +This argument sets the y-location of the inserted odds ratio information. + +Depending on the digits of your chosen values (here 3), you might also need to adjust the x-axis location of the two values so that they do not interfere with the vertical line. + +Let's do all of this by inserting another odds ratio result into this plot! +This time we simply take the already produced plot as an input to `insert_or()` and use a new odds ratio object: + +```{r} +or_object2 <- or_gam(data = data_gam, model = fit_gam, + pred = "x2", values = c(0.4, 0.6)) + +insert_or(plot, or_object2, or_yloc = 2.1, values_yloc = 2, + line_col = "green4", text_col = "black", + rect_col = "green4", rect_alpha = 0.2, + line_alpha = 1, line_type = "dashed", + arrow_xloc_r = 0.01, arrow_xloc_l = -0.01, + arrow_length = 0.02, rect = TRUE) + + theme_cowplot() +``` + +Using `rect = TRUE`, you can additionally highlight certain odds ratio intervals. +Aesthetics like opacity or color are fully customizable. + +## GLM example + +Fit model. +Data source: https://stats.idre.ucla.edu/stat/data/binary.csv + +```{r} +fit_glm <- glm(admit ~ gre + gpa + rank, data = data_glm, family = "binomial") +``` + +### Calculate odds ratio for continuous predictors + +For GLMs, the odds ratio calculation is simpler because odds ratio changes correspond to fixed predictor increases throughout the complete value range of each predictor. + +Hence, function `or_glm` takes the increment steps of each predictor directly as an input in its parameter `incr`. +To avoid false predictor/value assignments, the combinations need to be given in a `list`. +Odds ratios of indicator variables are computed automatically and always refer to the base factor level. + +The indicator predictor `rank` in this dataset has four levels. +Subsequently, we will get three odds ratio outputs referring to the base factor level (here: `rank1`). + +The output can be interpreted as follows: "Having `rank2` instead of `rank1` while holding all other values constant results in a decrease in odds of 49.1% (1-0.509)". + +```{r} +or_glm(data = data_glm, model = fit_glm, incr = list(gre = 380, gpa = 5)) +``` + +You can also set other confident intervals for GLM(M) models. +The resulting `tibble` will automatically adjust the column names to the specified level. + +```{r} +or_glm(data = data_glm, model = fit_glm, + incr = list(gre = 380, gpa = 5), CI = 0.70) +```