diff --git a/.Rbuildignore b/.Rbuildignore index c071ea6..f83ab13 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,6 +8,7 @@ cran-comment.md ## Avoid Makefile, testing-related files, and archives Makefile +revdep revdep_check.txt tableone.Rcheck .*.tar.gz diff --git a/.gitignore b/.gitignore index b5f15b2..8df890f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ *.Rhistory *.tar.gz *.Rcheck +revdep diff --git a/.travis.yml b/.travis.yml index 8275323..722ca19 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,17 @@ -# Sample .travis.yml for R projects +# .travis.yml for Travis CI +# https://docs.travis-ci.com/user/languages/r +# https://github.com/craigcitro/r-travis/wiki/Porting-to-native-R-support-in-Travis language: r +r: + - oldrel + - release + - devel +cache: packages +# helpful when preparing your package for submission to CRAN warnings_are_errors: true -sudo: required +# No need for sudo as R is natively supported now. +sudo: false env: global: diff --git a/DESCRIPTION b/DESCRIPTION index 554914d..85a40f2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: tableone Type: Package Title: Create "Table 1" to Describe Baseline Characteristics -Version: 0.7.3 -Date: 2015-11-10 +Version: 0.8.0 +Date: 2017-06-15 Author: Kazuki Yoshida, Justin Bohn. Maintainer: Kazuki Yoshida Description: Creates "Table 1", i.e., description of baseline patient @@ -19,7 +19,8 @@ Imports: MASS, e1071, zoo, - gmodels + gmodels, + nlme Suggests: survival, testthat, @@ -28,7 +29,9 @@ Suggests: Matching, reshape2, ggplot2, - knitr + rmarkdown, + geepack, + lme4 URL: https://github.com/kaz-yos/tableone -VignetteBuilder: knitr -RoxygenNote: 5.0.0 +VignetteBuilder: rmarkdown +RoxygenNote: 6.0.1 diff --git a/Makefile b/Makefile index d3b5dd7..0d68c74 100644 --- a/Makefile +++ b/Makefile @@ -29,8 +29,8 @@ PKG_FILES := DESCRIPTION NAMESPACE NEWS $(R_FILES) $(TST_FILES) $(SRC_FILES) $(V ### Define targets ## test just runs testthat scripts. No dependencies. -test: - Rscript -e "devtools::test()" | tee test-all.txt +test: NAMESPACE + Rscript -e "options(width = 120); devtools::test()" | tee test-all.txt ## build_win always build regardless of file update status ## Links to results e-mailed (no useful output locally) @@ -45,29 +45,30 @@ build: $(PKG_NAME)_$(PKG_VERSION).tar.gz ## (file target) The *.tar.gz file depends on package files including NAMESPACE, ## and build *.tar.gz file from these. $(PKG_NAME)_$(PKG_VERSION).tar.gz: $(PKG_FILES) - R CMD build ../${PKG_NAME} + cp -a ${VIG_FILES} inst/doc/ + Rscript -e "devtools::build(pkg = '.', path = '.', manual = TRUE)" ## (file target) NAMESPACE depends on *.R files, and excecute roxygen2 on these. ## methods::is() is not automatically loaded by roxygen2 version 4 NAMESPACE: $(R_FILES) - Rscript -e "library(methods); library(roxygen2); roxygenize('.')" + Rscript -e "devtools::document('.')" ## check requires the *.tar.gz file, and execute strict tests on it. check: $(PKG_NAME)_$(PKG_VERSION).tar.gz - R CMD check --as-cran ./$(PKG_NAME)_$(PKG_VERSION).tar.gz | tee cran-check.txt + Rscript -e "options(width = 120); devtools::check(pkg = '.', check_dir = '.', manual = TRUE)" | tee cran-check.txt ## revdep requires the *.tar.gz file, and execute strict tests on it. revdep: $(PKG_NAME)_$(PKG_VERSION).tar.gz - Rscript -e "devtools::revdep_check()" | tee revdep_check.txt + Rscript -e "options(width = 120); devtools::revdep_check()" | tee revdep_check.txt ## install requires the *.tar.gz file, and execute installation using it. install: $(PKG_NAME)_$(PKG_VERSION).tar.gz - R CMD install ./$(PKG_NAME)_$(PKG_VERSION).tar.gz + Rscript -e "devtools::install('.')" ## clean has no dependency, and execute removal of make output files. clean: - -rm -f $(PKG_NAME)_*.tar.gz + -rm -f $(PKG_NAME)_$(PKG_VERSION).tar.gz -rm -r -f $(PKG_NAME).Rcheck -rm -r -f man/*.Rd -rm -r -f NAMESPACE diff --git a/NAMESPACE b/NAMESPACE index 16f7d79..9329bf3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,8 +27,11 @@ importFrom(stats,fisher.test) importFrom(stats,kruskal.test) importFrom(stats,median) importFrom(stats,oneway.test) +importFrom(stats,qnorm) importFrom(stats,quantile) importFrom(stats,sd) importFrom(stats,var) +importFrom(stats,vcov) importFrom(stats,xtabs) importFrom(utils,combn) +importFrom(utils,tail) diff --git a/NEWS b/NEWS index db02b15..c17a36e 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,58 @@ +tableone 0.8.0 (2017-06-15) +---------------------------------------------------------------- + +NEW FEATURES + +* The "missing" option for the print methods was implemented. If + TRUE, a column called "Missing" is added as the rightmost column + of the formatted table. This represents percentage of missing + observation in each variable. Please note this is the percentage + with respect to the unweighted raw observations even in weighted + tables. + +* The "padColnames" option was added the print.TableOne method. If + TRUE, the column names of the formatted table become space-padded + to center them. + + +tableone 0.7.6 (2016-07-12) +---------------------------------------------------------------- + +BUG FIXES + +* The explanation for the "factorVars" argument for the functions + CreateTableOne and svyCreateTableOne were changed for clarity. + When factor variables are included in the argument, they are + releveled to exlude empty levels. This was not clearly documented + in the previous documentation. Thanks @eribul. + +* svyrep.design objects (survey design objects with replicate weights) + are allowed for the data argument in svyTableOne. This is considered + experimental. Thanks @przemo. + + +tableone 0.7.5 (2016-04-10) +---------------------------------------------------------------- + +BUG FIXES + +* ShowRegTable() now correctly supports models fit with geepack, + nlme, and lme4. + + +tableone 0.7.4 (2016-03-31) +---------------------------------------------------------------- + +NEW FEATURE + +* Define SMD := 0 when the numerator is 0 even if the denominator + is also 0. This is more intuitive because a constant compared + across two groups will give an SMD of 0 rather than NaN (0/0). + For example, if two groups being compared both only have one + gender (all female or all male), then SMD for the gender + variable is defined as 0. + + tableone 0.7.3 (2015-11-10) ---------------------------------------------------------------- diff --git a/R/CreateCatTable.R b/R/CreateCatTable.R index a1dc05a..7c5a85e 100644 --- a/R/CreateCatTable.R +++ b/R/CreateCatTable.R @@ -100,6 +100,10 @@ function(vars, # character vector of variable n ## Abort if no variables exist at this point ModuleStopIfNoVarsLeft(vars) + ## Get the missing percentage for each variable (no strata). + ## This has to happen before includeNA is used. + percentMissing <- ModulePercentMissing(data[vars]) + ## Extract necessary variables (unused variables are not included in dat) dat <- data[c(vars)] @@ -199,7 +203,8 @@ function(vars, # character vector of variable n attributes(result) <- c(attributes(result), list(pValues = pValues), list(xtabs = listXtabs), - list(smd = smds)) + list(smd = smds), + list(percentMissing = percentMissing)) ## Return return(result) diff --git a/R/CreateContTable.R b/R/CreateContTable.R index 0e06371..bd9bb7f 100644 --- a/R/CreateContTable.R +++ b/R/CreateContTable.R @@ -103,6 +103,9 @@ function(vars, # character vector of variable ## Abort if no variables exist at this point ModuleStopIfNoVarsLeft(vars) + ## Get the missing percentage for each variable (no strata). + percentMissing <- ModulePercentMissing(data[vars]) + ## Extract necessary variables dat <- data[c(vars)] @@ -249,7 +252,8 @@ function(vars, # character vector of variable ## Give additional attributes attributes(result) <- c(attributes(result), list(pValues = pValues), - list(smd = smds)) + list(smd = smds), + list(percentMissing = percentMissing)) ## Return return(result) diff --git a/R/CreateTableOne.R b/R/CreateTableOne.R index a3af750..97ed3b9 100644 --- a/R/CreateTableOne.R +++ b/R/CreateTableOne.R @@ -5,7 +5,7 @@ ##' @param vars Variables to be summarized given as a character vector. Factors are handled as categorical variables, whereas numeric variables are handled as continuous variables. If empty, all variables in the data frame specified in the data argument are used. ##' @param strata Stratifying (grouping) variable name(s) given as a character vector. If omitted, the overall results are returned. ##' @param data A data frame in which these variables exist. All variables (both vars and strata) must be in this data frame. -##' @param factorVars Numerically coded variables that should be handled as categorical variables given as a character vector. If omitted, only factors are considered categorical variables. If all categorical variables in the dataset are already factors, this option is not necessary. The variables specified here must also be specified in the \code{vars} argument. +##' @param factorVars Numerically coded variables that should be handled as categorical variables given as a character vector. Do not include factors, unless you need to relevel them by removing empty levels. If omitted, only factors are considered categorical variables. The variables specified here must also be specified in the \code{vars} argument. ##' @param includeNA If TRUE, NA is handled as a regular factor level rather than missing. NA is shown as the last factor level in the table. Only effective for categorical variables. ##' @param test If TRUE, as in the default and there are more than two groups, groupwise comparisons are performed. ##' @param testNormal A function used to perform the normal assumption based tests. The default is \code{oneway.test}. This is equivalent of the t-test when there are only two groups. @@ -141,6 +141,9 @@ function(vars, # character vector of variab test <- ModuleReturnFalseIfNoStrata(strata, test) smd <- ModuleReturnFalseIfNoStrata(strata, smd) + ## Get the missing percentage for each variable (no strata). + percentMissing <- ModulePercentMissing(data[vars]) + ## Get the classes of the variables varClasses <- lapply(data[vars], class) @@ -238,7 +241,9 @@ function(vars, # character vector of variab logiFactors = logiFactors, ## names of vars of each type varFactors = varFactors, - varNumerics = varNumerics)) + varNumerics = varNumerics, + ## Missing data percentage for each variable (no strata). + percentMissing = percentMissing)) ## Give a class class(TableOneObject) <- "TableOne" diff --git a/R/ShowRegTable.R b/R/ShowRegTable.R index 9434347..1906188 100644 --- a/R/ShowRegTable.R +++ b/R/ShowRegTable.R @@ -41,12 +41,39 @@ ShowRegTable <- function(model, exp = TRUE, digits = 2, pDigits = 3, printToggle fmt1 <- paste0("%.", digits, "f") fmt2 <- paste0("%.", pDigits, "f") - ## Obtain necessary data - ## The model must have summary and confint methods - modelCoef <- coef(model) - modelConfInt <- suppressMessages(ciFun(model)) - modelSummaryMat <- coef(summary(model)) - modelP <- modelSummaryMat[,ncol(modelSummaryMat)] + ## Extract coefficients + if (any(class(model) %in% c("lme"))) { + ## nlme needs special handling + ## Use column 2 because it is the point estimate + modelCoef <- nlme::intervals(model)[[1]][, 2] + } else if (any(class(model) %in% c("lmerMod","glmerMod"))) { + ## (g)lmer gives confint for other extra parameters + modelCoef <- coef(summary(model))[,1] + } else { + modelCoef <- coef(model) + } + + ## Extract confidence intervals + if (any(class(model) %in% c("lme"))) { + ## nlme needs special handling + ## Drop column 2 because it is the point estimate + modelConfInt <- nlme::intervals(model)[[1]][, -2] + } else if (any(class(model) %in% c("lmerMod","glmerMod"))) { + ## (g)lmer gives confint for other extra parameters + modelConfInt <- tail(suppressMessages(ciFun(model)), length(modelCoef)) + } else { + modelConfInt <- suppressMessages(ciFun(model)) + } + + ## P-value extraction + if (any(class(model) %in% c("gls", "lme"))) { + ## nlme needs special handling + modelSummaryMat <- summary(model)$tTable + } else { + modelSummaryMat <- coef(summary(model)) + } + ## Extract p value from + modelP <- modelSummaryMat[,ncol(modelSummaryMat)] ## Create the result matrix with beta and two columns of confidence interval resMat <- cbind(beta = modelCoef, diff --git a/R/modules-ShowRegTable.R b/R/modules-ShowRegTable.R new file mode 100755 index 0000000..6f32613 --- /dev/null +++ b/R/modules-ShowRegTable.R @@ -0,0 +1,27 @@ +#!/usr/local/bin/Rscript + +################################################################################ +### Modules to support ShowRegTable() +## +## Created on: 2016-03-19 +## Author: Kazuki Yoshida +################################################################################ + +### geepack support + +## cov.unscaled +## the unscaled (dispersion = 1) estimated covariance matrix of the estimated coefficients. +## cov.scaled (This is appropriate.) +## ditto, scaled by dispersion. +vcov.geeglm <- function(object, ...) { + summary(object)$cov.scaled +} + +confint.geeglm <- function(fit, level = 0.95) { + coefs <- coef(fit) + ses <- sqrt(diag(vcov(fit))) + q <- qnorm(p = 1 - level, lower.tail = FALSE) + + data.frame(lower = coefs - q * ses, + upper = coefs + q * ses) +} diff --git a/R/modules-constructors.R b/R/modules-constructors.R index b6c642d..f6d0094 100644 --- a/R/modules-constructors.R +++ b/R/modules-constructors.R @@ -320,3 +320,13 @@ ModuleApproxExactTests <- function(result, strata, dat, strataVarName, ## Return both xtabs and p value df list(pValues = pValues, xtabs = listXtabs) } + + +### +### Module for marginal missing percentage handling +################################################################################ + +## Returns a vector of missing percentage given data frame +ModulePercentMissing <- function(data) { + unlist(lapply(data, function(x) {sum(is.na(x)) / length(x) * 100})) +} diff --git a/R/modules-print.R b/R/modules-print.R index 7637e79..f30a8ad 100644 --- a/R/modules-print.R +++ b/R/modules-print.R @@ -64,6 +64,17 @@ ModuleCreateStrataNames <- function(TableObject) { } +## Percentage formatter +ModuleFormatPercents <- function(percents, digits) { + + fmt <- paste0("%.", digits, "f") + out <- sprintf(fmt = fmt, percents) + + ## right justify by adding spaces + format(out, justify = "right") +} + + ## p-value formatter ModuleFormatPValues <- function(pValues, pDigits) { @@ -128,6 +139,40 @@ ModuleReturnDimHeaders <- function(TableObject) { } +## Module to mid justify column names considering max width +ModuleMidJustifyColnames <- function(mat) { + + ## Extract column names + colNames <- colnames(mat) + + ## Widths of column names + widthsColNames <- nchar(colNames) + + ## Obtain max width for each column + maxWidths <- unlist(lapply(seq_len(ncol(mat)), function(i) { + max(nchar(mat[,i])) + })) + + ## Half of the difference should be padded to the left. + nPads <- ceiling((maxWidths - widthsColNames) / 2) + ## Do not allow negative numbers + nPads <- nPads * as.numeric(nPads >= 0) + + ## Create a vector of padding spaces + pads <- unlist(lapply(nPads, function(n) { + ifelse(n > 0, + paste0(rep(" ", n), collapse = ""), + "") + })) + + ## Manipulate + colnames(mat) <- paste0(pads, colNames) + + ## Return matrix + mat +} + + ## Module to remove spaces from the result matrix ModuleRemoveSpaces <- function(mat, noSpaces) { @@ -582,8 +627,8 @@ ModuleAddSpacesToTable <- function(FmtElementTables, nSpacesToAdd, showAllLevels ## Extract Cont/CatTable elements of x and dispatch print() appropriately ModuleFormatTables <- function(x, catDigits, contDigits, - ## Generic argumetns passed - test, smd, + ## Generic arguments passed + test, smd, missing, explain, pDigits, ## print.CatTable arguments passed format, exact, @@ -628,7 +673,7 @@ ModuleFormatTables <- function(x, catDigits, contDigits, ## The rests are just passed ## generic arguments passed - test = test, smd = smd, + test = test, smd = smd, missing = missing, explain = explain, pDigits = pDigits, ## print.CatTable arguments passed diff --git a/R/modules-smd.R b/R/modules-smd.R index 1f92e01..603834e 100644 --- a/R/modules-smd.R +++ b/R/modules-smd.R @@ -87,7 +87,15 @@ StdDiffFromLstMeans <- function(lstMeans) { ## are assessed for 0's. If all remaining ## are zeros or no element remained (all NA), ## all() returns TRUE, and sqMD is forced to NaN. - sqMD <- NaN + + if (all(!is.na(T_C) & (T_C == 0))) { + ## If the mean difference vector is a zero vector, + ## the distance can be defined 0. + sqMD <- 0 + } else { + sqMD <- NaN + } + } else { ## Squared Mahalanobis distance sqMD <- t(T_C) %*% MASS::ginv(S) %*% T_C @@ -195,6 +203,11 @@ StdDiff <- function(variable, group, binary = FALSE, na.rm = TRUE) { out <- meanDiffs / sqrt(varMeans) + ## If mean difference is zero and variance is zero, + ## only one constant exists across two groups. + ## In this case, the SMD can be defined zero, rather than NaN from 0/0. + out[is.na(out) & !is.na(meanDiffs) & (meanDiffs == 0) & !is.na(varMeans) & (varMeans == 0)] <- 0 + ## This lower.tri() approach is actually giving 2vs1, 3vs1, etc ## opposite of stated 1vs2, 1vs3. Only correct if abs() is used. abs(out[lower.tri(out)]) @@ -283,6 +296,11 @@ svyStdDiff <- function(varName, groupName, design, binary = FALSE, na.rm = TRUE) out <- meanDiffs / sqrt(varMeans) + ## If mean difference is zero and variance is zero, + ## only one constant exists across two groups. + ## In this case, the SMD can be defined zero, rather than NaN from 0/0. + out[is.na(out) & !is.na(meanDiffs) & (meanDiffs == 0) & !is.na(varMeans) & (varMeans == 0)] <- 0 + ## This lower.tri() approach is actually giving 2vs1, 3vs1, etc ## opposite of stated 1vs2, 1vs3. Only correct if abs() is used. abs(out[lower.tri(out)]) diff --git a/R/modules-svy.R b/R/modules-svy.R index 9489a20..d06b705 100644 --- a/R/modules-svy.R +++ b/R/modules-svy.R @@ -36,7 +36,7 @@ FormulaString <- function(vars) { ## Check for survey data; fail if not StopIfNotSurveyDesign <- function(data) { - if (!("survey.design2" %in% class(data))) { + if (!any(c("svyrep.design", "survey.design2", "survey.design") %in% class(data))) { stop("The data argument needs to be a survey design object.") } diff --git a/R/print.CatTable.R b/R/print.CatTable.R index f078e19..6245fb6 100644 --- a/R/print.CatTable.R +++ b/R/print.CatTable.R @@ -6,7 +6,7 @@ ##' @param digits Number of digits to print in the table. ##' @param pDigits Number of digits to print for p-values (also used for standardized mean differences). ##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param missing Whether to show missing data information. ##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. ##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param noSpaces Whether to remove spaces added for alignment. Use this option if you prefer to align numbers yourself in other software. @@ -257,6 +257,17 @@ function(x, # CatTable object } + ## Add percentMissing when requested and available + if (missing & !is.null(attr(CatTable, "percentMissing"))) { + + ## Create an empty column + out <- cbind(out, + Missing = rep("", nrow(out))) # Column for p-values + ## Put the values at the non-empty positions + out[logiNonEmptyRowNames,"Missing"] <- ModuleFormatPercents(attr(CatTable, "percentMissing"), 1) + } + + ## Add freq () explanation if requested if (explain) { ## Choose the format of the explanation string diff --git a/R/print.ContTable.R b/R/print.ContTable.R index 08962ed..d2de8f6 100644 --- a/R/print.ContTable.R +++ b/R/print.ContTable.R @@ -6,7 +6,7 @@ ##' @param digits Number of digits to print in the table. ##' @param pDigits Number of digits to print for p-values (also used for standardized mean differences). ##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param missing Whether to show missing data information. ##' @param explain Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the variable names. ##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param noSpaces Whether to remove spaces added for alignment. Use this option if you prefer to align numbers yourself in other software. @@ -231,6 +231,16 @@ function(x, # ContTable object } + ## Add percentMissing when requested and available + if (missing & !is.null(attr(ContTable, "percentMissing"))) { + + ## Create an empty column + out <- cbind(out, + Missing = rep("", nrow(out))) # Column for p-values + ## Put the values + out[,"Missing"] <- ModuleFormatPercents(attr(ContTable, "percentMissing"), 1) + } + ## Add mean (sd) or median [IQR]/median [range] explanation if requested if (explain) { diff --git a/R/print.TableOne.R b/R/print.TableOne.R index 24fd757..72f1673 100644 --- a/R/print.TableOne.R +++ b/R/print.TableOne.R @@ -7,12 +7,13 @@ ##' @param contDigits Number of digits to print for continuous variables. Default 2. ##' @param pDigits Number of digits to print for p-values (also used for standardized mean differences). Default 3. ##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param missing Whether to show missing data information. ##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. ##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param test Whether to show p-values. TRUE by default. If FALSE, only the numerical summaries are shown. ##' @param smd Whether to show standardized mean differences. FALSE by default. If there are more than one contrasts, the average of all possible standardized mean differences is shown. For individual contrasts, use \code{summary}. ##' @param noSpaces Whether to remove spaces added for alignment. Use this option if you prefer to align numbers yourself in other software. +##' @param padColnames Whether to pad column names with spaces to center justify. The default is FALSE. It is not conducted if noSpaces = TRUE. ##' @param format The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). ##' @param showAllLevels Whether to show all levels. FALSE by default, i.e., for 2-level categorical variables, only the higher level is shown to avoid redundant information. ##' @param cramVars A character vector to specify the two-level categorical variables, for which both levels should be shown in one row. @@ -39,8 +40,9 @@ function(x, # TableOne object explain = TRUE, # Whether to show explanation in variable names printToggle = TRUE, # Whether to print the result visibly test = TRUE, # Whether to add p-values - smd = FALSE, # Whether to add standardized mean differences + smd = FALSE, # Whether to add standardized mean differences noSpaces = FALSE, # Whether to remove spaces for alignments + padColnames = FALSE, # Whether to pad column names for alignments ## Categorical options format = c("fp","f","p","pf")[1], # Format f_requency and/or p_ercent @@ -57,7 +59,7 @@ function(x, # TableOne object ## Extract Cont/CatTable elements of x and dispatch print() appropriately FmtTables <- ModuleFormatTables(x, catDigits = catDigits, contDigits = contDigits, - test = test, smd = smd, + test = test, smd = smd, missing = missing, explain = explain, pDigits = pDigits, ## print.CatTable arguments passed @@ -131,6 +133,11 @@ function(x, # TableOne object names(dimnames(out)) <- c("", "") } + ## Center-justify column names if asked and not removing spaces. + if (padColnames & !noSpaces) { + out <- ModuleMidJustifyColnames(mat = out) + } + ## Remove spaces if asked. out <- ModuleRemoveSpaces(mat = out, noSpaces = noSpaces) diff --git a/R/print.svyCatTable.R b/R/print.svyCatTable.R index 4d5b3a1..2c064d3 100644 --- a/R/print.svyCatTable.R +++ b/R/print.svyCatTable.R @@ -6,7 +6,7 @@ ##' @param digits Number of digits to print in the table. ##' @param pDigits Number of digits to print for p-values (also used for standardized mean differences). ##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param missing Whether to show missing data information. ##' @param explain Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown. ##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param noSpaces Whether to remove spaces added for alignment. Use this option if you prefer to align numbers yourself in other software. @@ -201,6 +201,17 @@ function(x, # CatTable object } + ## Add percentMissing when requested and available + if (missing & !is.null(attr(CatTable, "percentMissing"))) { + + ## Create an empty column + out <- cbind(out, + Missing = rep("", nrow(out))) # Column for p-values + ## Put the values at the non-empty positions + out[logiNonEmptyRowNames,"Missing"] <- ModuleFormatPercents(attr(CatTable, "percentMissing"), 1) + } + + ## Add freq () explanation if requested if (explain) { ## Choose the format of the explanation string diff --git a/R/print.svyContTable.R b/R/print.svyContTable.R index 36c2ea3..749f6c3 100644 --- a/R/print.svyContTable.R +++ b/R/print.svyContTable.R @@ -6,7 +6,7 @@ ##' @param digits Number of digits to print in the table. ##' @param pDigits Number of digits to print for p-values (also used for standardized mean differences). ##' @param quote Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. -##' @param missing Whether to show missing data information (not implemented yet, placeholder) +##' @param missing Whether to show missing data information. ##' @param explain Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the variable names. ##' @param printToggle Whether to print the output. If FLASE, no output is created, and a matrix is invisibly returned. ##' @param noSpaces Whether to remove spaces added for alignment. Use this option if you prefer to align numbers yourself in other software. @@ -177,6 +177,17 @@ function(x, # ContTable object } + ## Add percentMissing when requested and available + if (missing & !is.null(attr(ContTable, "percentMissing"))) { + + ## Create an empty column + out <- cbind(out, + Missing = rep("", nrow(out))) # Column for p-values + ## Put the values + out[,"Missing"] <- ModuleFormatPercents(attr(ContTable, "percentMissing"), 1) + } + + ## Add mean (sd) or median [IQR]/median [range] explanation if requested if (explain) { ## Create a vector of explanations to be pasted diff --git a/R/svyCreateCatTable.R b/R/svyCreateCatTable.R index 24351f6..16f0959 100644 --- a/R/svyCreateCatTable.R +++ b/R/svyCreateCatTable.R @@ -40,6 +40,10 @@ function(vars, # character vector of variable names ## Abort if no variables exist at this point ModuleStopIfNoVarsLeft(vars) + ## Get the missing percentage for each variable (no strata). + ## This has to happen before includeNA is used. + percentMissing <- ModulePercentMissing(data$variables[vars]) + ## Toggle test FALSE if no strata test <- ModuleReturnFalseIfNoStrata(strata, test) smd <- ModuleReturnFalseIfNoStrata(strata, smd) @@ -133,7 +137,8 @@ function(vars, # character vector of variable names attributes(result) <- c(attributes(result), list(pValues = pValues), list(xtabs = listXtabs), - list(smd = smds)) + list(smd = smds), + list(percentMissing = percentMissing)) ## Return return(result) diff --git a/R/svyCreateContTable.R b/R/svyCreateContTable.R index dfe46d7..77942b6 100644 --- a/R/svyCreateContTable.R +++ b/R/svyCreateContTable.R @@ -43,6 +43,9 @@ function(vars, # character vector of variable n ## Abort if no variables exist at this point ModuleStopIfNoVarsLeft(vars) + ## Get the missing percentage for each variable (no strata). + percentMissing <- ModulePercentMissing(data$variables[vars]) + ## Toggle test FALSE if no strata test <- ModuleReturnFalseIfNoStrata(strata, test) smd <- ModuleReturnFalseIfNoStrata(strata, smd) @@ -153,7 +156,8 @@ function(vars, # character vector of variable n ## Give additional attributes attributes(result) <- c(attributes(result), list(pValues = pValues), - list(smd = smds)) + list(smd = smds), + list(percentMissing = percentMissing)) ## Return return(result) diff --git a/R/svyCreateTableOne.R b/R/svyCreateTableOne.R index ed5b8dc..28af559 100644 --- a/R/svyCreateTableOne.R +++ b/R/svyCreateTableOne.R @@ -3,18 +3,14 @@ ##' Create an object summarizing all baseline variables (both continuous and categorical) optionally stratifying by one or more startifying variables and performing statistical tests. The object gives a table that is easy to use in medical research papers. ##' ##' @param vars Variables to be summarized given as a character vector. Factors are handled as categorical variables, whereas numeric variables are handled as continuous variables. If empty, all variables in the survey design object specified in the data argument are used. -##' @param strata Stratifying (grouping) variable name(s) given as a character vector. If omitted, the overall results are returned. +##' @inheritParams CreateTableOne ##' @param data A survey design object in which these variables exist. All variables (both vars and strata) must be in this survey design object. It is created with the \code{svydesign} function in the \code{survey} package. -##' @param factorVars Numerically coded variables that should be handled as categorical variables given as a character vector. If omitted, only factors are considered categorical variables. If all categorical variables in the dataset are already factors, this option is not necessary. The variables specified here must also be specified in the \code{vars} argument. -##' @param includeNA If TRUE, NA is handled as a regular factor level rather than missing. NA is shown as the last factor level in the table. Only effective for categorical variables. -##' @param test If TRUE, as in the default and there are more than two groups, groupwise comparisons are performed. ##' @param testNormal A function used to perform the normal assumption based tests. The default is multiple degrees of freedom test using \code{svyglm} and \code{regTermTest}. This is equivalent of the \code{svyttest} when there are only two groups. ##' @param argsNormal A named list of arguments passed to the function specified in \code{testNormal}. ##' @param testNonNormal A function used to perform the nonparametric tests. The default is \code{svyranktest}. ##' @param argsNonNormal A named list of arguments passed to the function specified in \code{testNonNormal}. ##' @param testApprox A function used to perform the large sample approximation based tests. The default is \code{svychisq}. ##' @param argsApprox A named list of arguments passed to the function specified in testApprox. -##' @param smd If TRUE, as in the default and there are more than two groups, standardized mean differences for all pairwise comparisons are calculated. ##' ##' @details See the details for \code{\link{CreateTableOne}}. ##' @@ -119,6 +115,9 @@ function(vars, # character vector of variable test <- ModuleReturnFalseIfNoStrata(strata, test) smd <- ModuleReturnFalseIfNoStrata(strata, smd) + ## Get the missing percentage for each variable (no strata). + percentMissing <- ModulePercentMissing(data$variables[vars]) + ## Get the classes of the variables varClasses <- lapply(data$variables[vars], class) @@ -214,7 +213,9 @@ function(vars, # character vector of variable logiFactors = logiFactors, ## names of vars of each type varFactors = varFactors, - varNumerics = varNumerics)) + varNumerics = varNumerics, + ## Missing data percentage for each variable (no strata). + percentMissing = percentMissing)) ## Give a class class(TableOneObject) <- c("svyTableOne", "TableOne") diff --git a/R/tableone-package.R b/R/tableone-package.R index f66be1f..d6ad966 100644 --- a/R/tableone-package.R +++ b/R/tableone-package.R @@ -6,8 +6,8 @@ ##' @aliases tableone-package tableone ##' @docType package ##' @import survey -##' @importFrom stats as.formula chisq.test coef confint fisher.test kruskal.test median oneway.test quantile sd var xtabs -##' @importFrom utils combn +##' @importFrom stats as.formula chisq.test coef confint fisher.test kruskal.test median oneway.test quantile sd var xtabs qnorm vcov +##' @importFrom utils combn tail ##' @note Acknowledgement: ##' ##' Ian Fellows for developing the \code{deducer} package, which this package is based on. diff --git a/cran-check.txt b/cran-check.txt index 06f5d5a..1c59258 100644 --- a/cran-check.txt +++ b/cran-check.txt @@ -1,13 +1,17 @@ +* checking for file ‘/Users/kazuki/Documents/programming/r/tableone/DESCRIPTION’ ... OK +* preparing ‘tableone’: +* checking DESCRIPTION meta-information ... OK +* checking for LF line-endings in source and make files +* checking for empty or unneeded directories +* building ‘tableone_0.8.0.tar.gz’ * using log directory ‘/Users/kazuki/Documents/programming/r/tableone/tableone.Rcheck’ -* using R version 3.2.2 (2015-08-14) -* using platform: x86_64-apple-darwin13.4.0 (64-bit) +* using R version 3.4.0 (2017-04-21) +* using platform: x86_64-apple-darwin15.6.0 (64-bit) * using session charset: UTF-8 * using option ‘--as-cran’ * checking for file ‘tableone/DESCRIPTION’ ... OK * checking extension type ... Package -* this is package ‘tableone’ version ‘0.7.3’ -* checking CRAN incoming feasibility ... Note_to_CRAN_maintainers -Maintainer: ‘Kazuki Yoshida ’ +* this is package ‘tableone’ version ‘0.8.0’ * checking package namespace information ... OK * checking package dependencies ... OK * checking if this is a source package ... OK @@ -19,7 +23,6 @@ Maintainer: ‘Kazuki Yoshida ’ * checking whether package ‘tableone’ can be installed ... OK * checking installed package size ... OK * checking package directory ... OK -* checking ‘build’ directory ... OK * checking DESCRIPTION meta-information ... OK * checking top-level files ... OK * checking for left-over files ... OK @@ -32,7 +35,6 @@ Maintainer: ‘Kazuki Yoshida ’ * checking whether the package can be unloaded cleanly ... OK * checking whether the namespace can be loaded with stated dependencies ... OK * checking whether the namespace can be unloaded cleanly ... OK -* checking use of S3 registration ... OK * checking dependencies in R code ... OK * checking S3 generic/method consistency ... OK * checking replacement functions ... OK @@ -53,13 +55,7 @@ Maintainer: ‘Kazuki Yoshida ’ * checking for unstated dependencies in ‘tests’ ... OK * checking tests ... OK -* checking for unstated dependencies in vignettes ... OK -* checking package vignettes in ‘inst/doc’ ... OK -* checking running R code from vignettes ... - ‘introduction.Rmd’ using ‘UTF-8’ ... OK - ‘smd.Rmd’ using ‘UTF-8’ ... OK - OK -* checking re-building of vignette outputs ... OK * checking PDF version of manual ... OK * DONE Status: OK + diff --git a/cran-comment.md b/cran-comment.md index 5211153..26e1334 100644 --- a/cran-comment.md +++ b/cran-comment.md @@ -1,10 +1,51 @@ ## What's new -* Fix a minor problem in the vignette with the upcoming release of ggplot2 +The following changes are included. + +tableone 0.8.0 (2017-06-15) +---------------------------------------------------------------- +NEW FEATURES +* The "missing" option for the print methods was implemented. If + TRUE, a column called "Missing" is added as the rightmost column + of the formatted table. This represents percentage of missing + observation in each variable. Please note this is the percentage + with respect to the unweighted raw observations even in weighted + tables. +* The "padColnames" option was added the print.TableOne method. If + TRUE, the column names of the formatted table become space-padded + to center them. + +tableone 0.7.6 (2016-07-12) +---------------------------------------------------------------- +BUG FIXES +* The explanation for the "factorVars" argument for the functions + CreateTableOne and svyCreateTableOne were changed for clarity. + When factor variables are included in the argument, they are + releveled to exlude empty levels. This was not clearly documented + in the previous documentation. Thanks @eribul. +* svyrep.design objects (survey design objects with replicate weights) + are allowed for the data argument in svyTableOne. This is considered + experimental. Thanks @przemo. + +tableone 0.7.5 (2016-04-10) +---------------------------------------------------------------- +BUG FIXES +* ShowRegTable() now correctly supports models fit with geepack, + nlme, and lme4. + +tableone 0.7.4 (2016-03-31) +---------------------------------------------------------------- +NEW FEATURE +* Define SMD := 0 when the numerator is 0 even if the denominator + is also 0. This is more intuitive because a constant compared + across two groups will give an SMD of 0 rather than NaN (0/0). + For example, if two groups being compared both only have one + gender (all female or all male), then SMD for the gender + variable is defined as 0. ## Test environments -* Local OS X 10.11.1, R 3.2.2 -* Ubuntu Linux (on Travis-CI), R 3.2.2 -* win-builder (devel and release) +* Local OS X 10.12.5, R 3.4.0 +* Ubuntu Linux on Travis-CI (oldrel, release, and devel) +* win-builder (release and devel) ## R CMD check results * ERRORs: None diff --git a/inst/doc/introduction.Rmd b/inst/doc/introduction.Rmd new file mode 100644 index 0000000..1471697 --- /dev/null +++ b/inst/doc/introduction.Rmd @@ -0,0 +1,166 @@ +--- +title: "Introduction to tableone" +author: "Kazuki Yoshida" +date: "2014-12-28" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Introduction to tableone} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} +--- + +```{r, message = FALSE, tidy = FALSE, echo = F} +## Create a header using devtools::use_vignette("my-vignette") +## knitr configuration: http://yihui.name/knitr/options#chunk_options +library(knitr) +showMessage <- FALSE +showWarning <- TRUE +set_alias(w = "fig.width", h = "fig.height", res = "results") +opts_chunk$set(comment = "", error= TRUE, warning = showWarning, message = showMessage, + tidy = FALSE, cache = F, echo = T, + fig.width = 7, fig.height = 7, dev.args = list(family = "sans")) + +## R configuration +options(width = 116, scipen = 5) +``` + +## What is tableone? + +The tableone package is an R package that eases the construction of "Table 1", *i.e.*, patient baseline characteristics table commonly found in biomedical research papers. The packages can summarize both continuous and categorical variables mixed within one table. Categorical variables can be summarized as counts and/or percentages. Continuous variables can be summarized in the "normal" way (means and standard deviations) or "nonnormal" way (medians and interquartile ranges). + +A screencast demonstrating this vignette is available at: https://www.youtube.com/watch?v=IZgDKmOC0Wg&feature=youtu.be + +## Load packages + +```{r} +## tableone package itself +library(tableone) +## survival pcakge for Mayo Clinic's PBC data +library(survival) +data(pbc) +``` + +## Single group summary + +### Simplest use case + +The simplest use case is summarizing the whole dataset. You can just feed in the data frame to the main workhorse function CreateTableOne(). You can see there are 418 patients in the dataset. +```{r} +CreateTableOne(data = pbc) +``` + +### Categorical variable conversion + +Most of the categorical variables are coded numerically, so we either have to transform them to factors in the dataset or use factorVars argument to transform them on-the-fly. Also it's a better practice to specify which variables to summarize by the vars argument, and exclude the ID variable(s). How do we know which ones are numerically-coded categorical variables? Please check your data dictionary (in this case help(pbc)). This time I am saving the result object in a variable. + +```{r} +## Get variables names +dput(names(pbc)) +## Vector of variables to summarize +myVars <- c("time", "status", "trt", "age", "sex", "ascites", "hepato", + "spiders", "edema", "bili", "chol", "albumin", "copper", "alk.phos", + "ast", "trig", "platelet", "protime", "stage") +## Vector of categorical variables that need transformation +catVars <- c("status", "trt", "ascites", "hepato", + "spiders", "edema", "stage") +## Create a TableOne object +tab2 <- CreateTableOne(vars = myVars, data = pbc, factorVars = catVars) +``` + +OK. It's more interpretable now. Binary categorical variables are summarized as counts and percentages of the second level. For example, if it is coded as 0 and 1, the "1" level is summarized. For 3+ category variable all levels are summarized. Please bear in mind, the percentages are calculated after excluding missing values. + +```{r} +tab2 +``` + +### Showing all levels for categorical variables + +If you want to show all levels, you can use showAllLevels argument to the print() method. + +```{r} +print(tab2, showAllLevels = TRUE) +``` + +### Detailed information including missingness + +If you need more detailed information including the number/proportion missing. Use the summary() method on the result object. The continuous variables are shown first, and the categorical variables are shown second. + +```{r} +summary(tab2) +``` + +### Summarizing nonnormal variables + +It looks like most of the continuous variables are highly skewed except time, age, albumin, and platelet (biomarkers are usually distributed with strong positive skews). Summarizing them as such may please your future peer reviewer(s). Let's do it with the nonnormal argument to the print() method. Can you see the difference. If you just say nonnormal = TRUE, all variables are summarized the "nonnormal" way. + +```{r} +biomarkers <- c("bili","chol","copper","alk.phos","ast","trig","protime") +print(tab2, nonnormal = biomarkers) +``` + +### Fine tuning + +If you want to fine tune the table further, please check out ?print.TableOne for the full list of options. + + +## Multiple group summary + +Often you want to group patients and summarize group by group. It's also pretty simple. Grouping by exposure categories is probably the most common way, so let's do it by the treatment variable. According to ?pbc, it is coded as (1) D-penicillmain (it's really "D-penicillamine"), (2) placebo, and (NA) not randomized. NA's do not function as a grouping variable, so it is dropped. If you do want to show the result for the NA group, then you need to recoded it something other than NA. + +```{r} +tab3 <- CreateTableOne(vars = myVars, strata = "trt" , data = pbc, factorVars = catVars) +tab3 +``` + +### Testing + +As you can see in the previous table, when there are two or more groups group comparison p-values are printed along with the table (well, let's not argue the appropriateness of hypothesis testing for table 1 in an RCT for now.). Very small p-values are shown with the less than sign. The hypothesis test functions used by default are chisq.test() for categorical variables (with continuity correction) and oneway.test() for continous variables (with equal variance assumption, i.e., regular ANOVA). Two-group ANOVA is equivalent of t-test. + +You may be worried about the nonnormal variables and small cell counts in the stage variable. In such a situation, you can use the nonnormal argument like before as well as the exact (test) argument in the print() method. Now kruskal.test() is used for the nonnormal continous variables and fisher.test() is used for categorical variables specified in the exact argument. kruskal.test() is equivalent to wilcox.test() in the two-group case. The column named test is to indicate which p-values were calculated using the non-default tests. + +To also show standardized mean differences, use the smd option. + +```{r} +print(tab3, nonnormal = biomarkers, exact = "stage", smd = TRUE) +``` + +## Exporting + +My typical next step is to export the table to Excel for editing, and then to Word (clinical medical journals usually do not offer LaTeX submission). + + +### Quick and dirty way + +The quick and dirty way that I usually use is copy and paste. Use the quote = TRUE argument to show the quotes and noSpaces = TRUE to remove spaces used to align text in the R console (the latter is optional). Now you can just copy and paste the whole thing to an Excel spread sheet. After pasting, click the small pasting icon to choose Use Text Import Wizard..., in the dialogue you can just click finish to fit the values in the appropriate cells. Then you can edit or re-align things as you like. I usualy center-align the group summaries, and right-aligh the p-values. + +```{r} +print(tab3, nonnormal = biomarkers, exact = "stage", quote = TRUE, noSpaces = TRUE) +``` + +### Real export way + +If you do not like the manual labor of copy-and-paste, you can potentially automate the task by the following way. The print() method for a TableOne object invisibly return a matrix identical to what you see. You can capture this by assignment to a variable (here tab3Mat). Do not use the quote argument in this case, the noSpaces argument is again optional. The self-contradictory printToggle = FALSE for the print() method avoids unnecessary printing if you wish. Then you can save the object to a CSV file. As it is a regular matrix object, you can save it to an Excel file using packages such as XLConnect. + +```{r, eval = FALSE} +tab3Mat <- print(tab3, nonnormal = biomarkers, exact = "stage", quote = FALSE, noSpaces = TRUE, printToggle = FALSE) +## Save to a CSV file +write.csv(tab3Mat, file = "myTable.csv") +``` + +## Miscellaneous + +### Categorical or continous variables-only + +You may want to see the categorical or continous variables only. You can do this by accessing the CatTable part and ContTable part of the TableOne object as follows. summary() methods are defined for both as well as print() method with various arguments. Please see ?print.CatTable and ?print.ContTable for details. + +```{r} +## Categorical part only +tab3$CatTable +## Continous part only +print(tab3$ContTable, nonnormal = biomarkers) +``` + +-------------------- +- Authored by Kazuki Yoshida +- CRAN page: http://cran.r-project.org/package=tableone +- github page: https://github.com/kaz-yos/tableone diff --git a/inst/doc/smd.Rmd b/inst/doc/smd.Rmd new file mode 100644 index 0000000..f01f177 --- /dev/null +++ b/inst/doc/smd.Rmd @@ -0,0 +1,258 @@ +--- +title: "Using standardized mean differences" +author: "Kazuki Yoshida" +date: "2015-08-07" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Using standardized mean differences} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} +--- + +```{r, message = FALSE, tidy = FALSE, echo = F} +## Create a header using devtools::use_vignette("my-vignette") +## knitr configuration: http://yihui.name/knitr/options#chunk_options +library(knitr) +showMessage <- FALSE +showWarning <- FALSE +set_alias(w = "fig.width", h = "fig.height", res = "results") +opts_chunk$set(comment = "", error= TRUE, warning = showWarning, message = showMessage, + tidy = FALSE, cache = F, echo = T, + fig.width = 10, fig.height = 10, dev.args = list(family = "sans")) + +## R configuration +options(width = 130, scipen = 5) +``` + +## Standardized mean difference + +The standardized (mean) difference is a measure of distance between two group means in terms of one or more variables. In practice it is often used as a balance measure of individual covariates before and after propensity score matching. As it is standardized, comparison across variables on different scales is possible. For definitions see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3144483/#s11title . + + +Standardized mean differences can be easily calculated with tableone. All standardized mean differences in this package are absolute values, thus, there is no directionality. + + +## Load packages + +```{r} + +## tableone package itself +library(tableone) +## PS matching +library(Matching) +## Weighted analysis +library(survey) +## Reorganizing data +library(reshape2) +## plotting +library(ggplot2) + +``` + + +## Load data + +The right heart catheterization dataset is available at http://biostat.mc.vanderbilt.edu/wiki/Main/DataSets . This dataset was originally used in Connors *et al*. JAMA 1996;276:889-897, and has been made publicly available. + +```{r} + +## Right heart cath dataset +rhc <- read.csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/rhc.csv") + +``` + + +## Unmatched table + +Out of the 50 covariates, 32 have standardized mean differences of greater than 0.1, which is often considered the sign of important covariate imbalance (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3144483/#s11title ). + +```{r} + +## Covariates +vars <- c("age","sex","race","edu","income","ninsclas","cat1","das2d3pc","dnr1", + "ca","surv2md1","aps1","scoma1","wtkilo1","temp1","meanbp1","resp1", + "hrt1","pafi1","paco21","ph1","wblc1","hema1","sod1","pot1","crea1", + "bili1","alb1","resp","card","neuro","gastr","renal","meta","hema", + "seps","trauma","ortho","cardiohx","chfhx","dementhx","psychhx", + "chrpulhx","renalhx","liverhx","gibledhx","malighx","immunhx", + "transhx","amihx") + +## Construct a table +tabUnmatched <- CreateTableOne(vars = vars, strata = "swang1", data = rhc, test = FALSE) +## Show table with SMD +print(tabUnmatched, smd = TRUE) +## Count covariates with important imbalance +addmargins(table(ExtractSmd(tabUnmatched) > 0.1)) + +``` + + +## Propensity score estimation + +Usually a logistic regression model is used to estimate individual propensity scores. The model here is taken from "How To Use Propensity Score Analysis" (http://www.mc.vanderbilt.edu/crc/workshop_files/2008-04-11.pdf ). Predicted probabilities of being assigned to right heart catherterization, being assigned no right heart catherterization, being assigned to the true assignment, as well as the smaller of the probabilities of being assigned to right heart catherterization or no right heart catherterization are calculated for later use in propensity score matching and weighting. + +```{r} + +## Fit model +psModel <- glm(formula = swang1 ~ age + sex + race + edu + income + ninsclas + + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + + bili1 + alb1 + resp + card + neuro + gastr + renal + + meta + hema + seps + trauma + ortho + cardiohx + chfhx + + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + + malighx + immunhx + transhx + amihx, + family = binomial(link = "logit"), + data = rhc) + +## Predicted probability of being assigned to RHC +rhc$pRhc <- predict(psModel, type = "response") +## Predicted probability of being assigned to no RHC +rhc$pNoRhc <- 1 - rhc$pRhc + +## Predicted probability of being assigned to the +## treatment actually assigned (either RHC or no RHC) +rhc$pAssign <- NA +rhc$pAssign[rhc$swang1 == "RHC"] <- rhc$pRhc[rhc$swang1 == "RHC"] +rhc$pAssign[rhc$swang1 == "No RHC"] <- rhc$pNoRhc[rhc$swang1 == "No RHC"] +## Smaller of pRhc vs pNoRhc for matching weight +rhc$pMin <- pmin(rhc$pRhc, rhc$pNoRhc) + +``` + + +## Propensity score matching + +The Matching package can be used for propensity score matching. The logit of propensity score is often used as the matching scale, and the matchign caliper is often 0.2 $\times$ SD(logit(PS)). See http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3144483/#s5title for suggestions. After matching, all the standardized mean differences are below 0.1. + +```{r} + +listMatch <- Match(Tr = (rhc$swang1 == "RHC"), # Need to be in 0,1 + ## logit of PS,i.e., log(PS/(1-PS)) as matching scale + X = log(rhc$pRhc / rhc$pNoRhc), + ## 1:1 matching + M = 1, + ## caliper = 0.2 * SD(logit(PS)) + caliper = 0.2, + replace = FALSE, + ties = TRUE, + version = "fast") +## Extract matched data +rhcMatched <- rhc[unlist(listMatch[c("index.treated","index.control")]), ] + +## Construct a table +tabMatched <- CreateTableOne(vars = vars, strata = "swang1", data = rhcMatched, test = FALSE) +## Show table with SMD +print(tabMatched, smd = TRUE) +## Count covariates with important imbalance +addmargins(table(ExtractSmd(tabMatched) > 0.1)) + +``` + + +## Propensity score matching weight + +The matching weight method is a weighting analogue to the 1:1 pairwise algorithmic matching (http://www.ncbi.nlm.nih.gov/pubmed/23902694 ). An earlier version of the paper is available free (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.359.4724&rep=rep1&type=pdf ). The matching weight is defined as the smaller of the predicted probabilities of receiving or not receiving the treatment over the predicted probability of being assigned to the arm the patient is actually in. After weighting, all the standardized mean differences are below 0.1. The standardized mean differences in weighted data are explained in http://onlinelibrary.wiley.com/doi/10.1002/sim.6607/full . + +```{r} + +## Matching weight +rhc$mw <- rhc$pMin / rhc$pAssign +## Weighted data +rhcSvy <- svydesign(ids = ~ 1, data = rhc, weights = ~ mw) + +## Construct a table (This is a bit slow.) +tabWeighted <- svyCreateTableOne(vars = vars, strata = "swang1", data = rhcSvy, test = FALSE) +## Show table with SMD +print(tabWeighted, smd = TRUE) +## Count covariates with important imbalance +addmargins(table(ExtractSmd(tabWeighted) > 0.1)) + +``` + + +## Assessing balance before and after matching/weighting + +A plot showing covariate balance is often constructed to demonstrate the balancing effect of matching and/or weighting. Given the same propensity score model, the matching weight method often achieves better covariate balance than matching. + +```{r} + +## Construct a data frame containing variable name and SMD from all methods +dataPlot <- data.frame(variable = names(ExtractSmd(tabUnmatched)), + Unmatched = ExtractSmd(tabUnmatched), + Matched = ExtractSmd(tabMatched), + Weighted = ExtractSmd(tabWeighted)) + +## Create long-format data for ggplot2 +dataPlotMelt <- melt(data = dataPlot, + id.vars = c("variable"), + variable.name = "Method", + value.name = "SMD") + +## Order variable names by magnitude of SMD +varNames <- as.character(dataPlot$variable)[order(dataPlot$Unmatched)] + +## Order factor levels in the same order +dataPlotMelt$variable <- factor(dataPlotMelt$variable, + levels = varNames) + +## Plot using ggplot2 +ggplot(data = dataPlotMelt, mapping = aes(x = variable, y = SMD, + group = Method, color = Method)) + +geom_line() + +geom_point() + +geom_hline(yintercept = 0.1, color = "black", size = 0.1) + +coord_flip() + +theme_bw() + theme(legend.key = element_blank()) + +``` + +To construct a side-by-side table, data can be extracted as a matrix and combined using the print() method, which actually invisibly returns a matrix. + +```{r} + +## Column bind tables +resCombo <- cbind(print(tabUnmatched, printToggle = FALSE), + print(tabMatched, printToggle = FALSE), + print(tabWeighted, printToggle = FALSE)) + +## Add group name row, and rewrite column names +resCombo <- rbind(Group = rep(c("No RHC","RHC"), 3), resCombo) +colnames(resCombo) <- c("Unmatched","","Matched","","Weighted","") +print(resCombo, quote = FALSE) + +``` + + +## Outcome analysis + +The final analysis can be conducted using matched and weighted data. The results from the matching and matching weight are similar. ShowRegTable() function may come in handly. + + +```{r} + +## Unmatched model (unadjsuted) +glmUnmatched <- glm(formula = (death == "Yes") ~ swang1, + family = binomial(link = "logit"), + data = rhc) +## Matched model +glmMatched <- glm(formula = (death == "Yes") ~ swang1, + family = binomial(link = "logit"), + data = rhcMatched) +## Weighted model +glmWeighted <- svyglm(formula = (death == "Yes") ~ swang1, + family = binomial(link = "logit"), + design = rhcSvy) + +## Show results together +resTogether <- list(Unmatched = ShowRegTable(glmUnmatched, printToggle = FALSE), + Matched = ShowRegTable(glmMatched, printToggle = FALSE), + Weighted = ShowRegTable(glmWeighted, printToggle = FALSE)) +print(resTogether, quote = FALSE) + +``` + +-------------------- +- Authored by Kazuki Yoshida +- CRAN page: http://cran.r-project.org/package=tableone +- github page: https://github.com/kaz-yos/tableone diff --git a/man/CreateCatTable.Rd b/man/CreateCatTable.Rd index 40fd1eb..ec7bf1c 100644 --- a/man/CreateCatTable.Rd +++ b/man/CreateCatTable.Rd @@ -96,11 +96,10 @@ print(catTableBySexTrt, exact = "ascites", quote = TRUE) ## If you want to center-align values in Word, use noSpaces option. print(catTableBySexTrt, exact = "ascites", quote = TRUE, noSpaces = TRUE) -} -\author{ -Kazuki Yoshida (based on \code{Deducer::frequencies()}) } \seealso{ \code{\link{CreateTableOne}}, \code{\link{print.CatTable}}, \code{\link{summary.CatTable}} } - +\author{ +Kazuki Yoshida (based on \code{Deducer::frequencies()}) +} diff --git a/man/CreateContTable.Rd b/man/CreateContTable.Rd index f2cfe54..9ca55db 100644 --- a/man/CreateContTable.Rd +++ b/man/CreateContTable.Rd @@ -96,11 +96,10 @@ print(contTableBySexTrt, nonnormal = nonNormalVars, quote = TRUE) ## If you want to center-align values in Word, use noSpaces option. print(contTableBySexTrt, nonnormal = nonNormalVars, quote = TRUE, noSpaces = TRUE) -} -\author{ -Kazuki Yoshida (based on \code{Deducer::descriptive.table()}) } \seealso{ \code{\link{CreateTableOne}}, \code{\link{print.ContTable}}, \code{\link{summary.ContTable}} } - +\author{ +Kazuki Yoshida (based on \code{Deducer::descriptive.table()}) +} diff --git a/man/CreateTableOne.Rd b/man/CreateTableOne.Rd index 0c6876d..820561b 100644 --- a/man/CreateTableOne.Rd +++ b/man/CreateTableOne.Rd @@ -17,7 +17,7 @@ CreateTableOne(vars, strata, data, factorVars, includeNA = FALSE, \item{data}{A data frame in which these variables exist. All variables (both vars and strata) must be in this data frame.} -\item{factorVars}{Numerically coded variables that should be handled as categorical variables given as a character vector. If omitted, only factors are considered categorical variables. If all categorical variables in the dataset are already factors, this option is not necessary. The variables specified here must also be specified in the \code{vars} argument.} +\item{factorVars}{Numerically coded variables that should be handled as categorical variables given as a character vector. Do not include factors, unless you need to relevel them by removing empty levels. If omitted, only factors are considered categorical variables. The variables specified here must also be specified in the \code{vars} argument.} \item{includeNA}{If TRUE, NA is handled as a regular factor level rather than missing. NA is shown as the last factor level in the table. Only effective for categorical variables.} @@ -115,9 +115,6 @@ print(tableOne, nonnormal = c("bili","chol","copper","alk.phos","trig"), ## If SMDs are needed as numericals, use ExtractSmd() ExtractSmd(tableOne) -} -\author{ -Kazuki Yoshida, Justin Bohn } \references{ Flury, BK. and Riedwyl, H. (1986). Standard distance in univariate and multivariate analysis. \emph{The American Statistician}, \bold{40}, 249-251. @@ -133,4 +130,6 @@ Austin, PC. and Stuart, EA. (2015). Moving towards best practice when using inve \seealso{ \code{\link{print.TableOne}}, \code{\link{summary.TableOne}} } - +\author{ +Kazuki Yoshida, Justin Bohn +} diff --git a/man/ExtractSmd.Rd b/man/ExtractSmd.Rd index 4bf1f9b..dc2d4f8 100644 --- a/man/ExtractSmd.Rd +++ b/man/ExtractSmd.Rd @@ -19,11 +19,10 @@ Extracts standardized mean differences data as a vector or matrix from a (svy)Ta ## See examples for CreateTableOne and svyCreateTableOne -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{CreateTableOne}}, \code{\link{svyCreateTableOne}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/Rd2roxygen.R b/man/Rd2roxygen.R deleted file mode 100644 index 5c1c4e4..0000000 --- a/man/Rd2roxygen.R +++ /dev/null @@ -1,104 +0,0 @@ -################################################################################ -### Temporary R script to convert Rd files to Roxygen files -## -## Created on: 2014-02-09 -## Author: Kazuki Yoshida -################################################################################ - - -### Prepare environment -################################################################################ - -## Load package -library(Rd2roxygen) -## Configure the format -options(roxygen.comment = "##' ") - - -### Conversion -## Set wd to man dir -setwd("~/Documents/statistics/package_development/tableone/man/") - -## Obtain file names -rdFilesToConvert <- dir(pattern = "*.Rd$") - -## Create a list of converted data -listConvertedData <- lapply(rdFilesToConvert, parse_file) - -## Show title element -lapply(listConvertedData, getElement, "title") - -## Check conversion of the first file visually -cat(create_roxygen(listConvertedData[[1]]), sep = "\n") - - -### Direct file conversion -## Rd2roxygen -## Convert all the Rd files of a package to roxygen comments -## Description: -## This function takes a package root directory, parses all its Rd -## files under the man directory and update the corresponding R -## source code by inserting roxygen documentation in to the R -## scripts. -## Usage: -## Rd2roxygen(pkg, nomatch, usage = FALSE) -## Note: -## ESS users may use ‘options(roxygen.comment = "##' ")’ to ensure -## the generated roxygen comments begin with ‘"##' "’, which is the -## default setting in Emacs/ESS. -## Re-run this function on a package will remove the previous roxygen -## comments before functions in R scripts. -Rd2roxygen(pkg = "~/Documents/statistics/package_development/tableone/", - usage = TRUE) -## ##------ Sun Feb 9 05:49:44 2014 ------## -## parsed: CreateCatTable.Rd -## looking for the object 'CreateCatTable' in: -## CreateCatTable.R: line 4 -## ~/Documents/statistics/package_development/tableone//R/CreateCatTable.R updated - - -## ##------ Sun Feb 9 05:49:44 2014 ------## -## parsed: CreateContTable.Rd -## looking for the object 'CreateContTable' in: -## CreateContTable.R: line 4 -## ~/Documents/statistics/package_development/tableone//R/CreateContTable.R updated - - -## ##------ Sun Feb 9 05:49:44 2014 ------## -## parsed: print.CatTable.Rd -## looking for the object 'print.CatTable' in: -## print.CatTable.R: line 2 -## ~/Documents/statistics/package_development/tableone//R/print.CatTable.R updated - - -## ##------ Sun Feb 9 05:49:44 2014 ------## -## parsed: print.ContTable.Rd -## looking for the object 'print.ContTable' in: -## print.ContTable.R: line 2 -## ~/Documents/statistics/package_development/tableone//R/print.ContTable.R updated - - -## ##------ Sun Feb 9 05:49:44 2014 ------## -## parsed: ShowRegTable.Rd -## looking for the object 'ShowRegTable' in: -## ShowRegTable.R: line 1 -## ~/Documents/statistics/package_development/tableone//R/ShowRegTable.R updated - - -## ##------ Sun Feb 9 05:49:44 2014 ------## -## parsed: summary.CatTable.Rd -## looking for the object 'summary.CatTable' in: -## summary.CatTable.R: line 1 -## ~/Documents/statistics/package_development/tableone//R/summary.CatTable.R updated - - -## ##------ Sun Feb 9 05:49:44 2014 ------## -## parsed: summary.ContTable.Rd -## looking for the object 'summary.ContTable' in: -## summary.ContTable.R: line 1 -## ~/Documents/statistics/package_development/tableone//R/summary.ContTable.R updated - - -## ##------ Sun Feb 9 05:49:44 2014 ------## -## parsed: tableone_dummy-package.Rd -## unmatched object 'tableone-package' written into ~/Documents/statistics/package_development/tableone//R/tableone-package.R diff --git a/man/ShowRegTable.Rd b/man/ShowRegTable.Rd index 3980bd6..160a8aa 100644 --- a/man/ShowRegTable.Rd +++ b/man/ShowRegTable.Rd @@ -53,4 +53,3 @@ ShowRegTable(objCoxph, quote = TRUE) \author{ Kazuki Yoshida } - diff --git a/man/clean_comments.sh b/man/clean_comments.sh deleted file mode 100644 index 6f72945..0000000 --- a/man/clean_comments.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -## Invoke in the target folder -for file in *.Rd - -do - ## Remove comment lines - cat ${file} | grep -v "^ *%" > ${file}2 - ## Save the old files with comments - mv ${file} ${file}Old - ## Rename the new files to the original names - mv ${file}2 ${file} -done - diff --git a/man/print.CatTable.Rd b/man/print.CatTable.Rd index 57dc566..dd8021b 100644 --- a/man/print.CatTable.Rd +++ b/man/print.CatTable.Rd @@ -19,7 +19,7 @@ \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily.} -\item{missing}{Whether to show missing data information (not implemented yet, placeholder)} +\item{missing}{Whether to show missing data information.} \item{explain}{Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown.} @@ -108,11 +108,10 @@ print(catTableBySexTrt, exact = "ascites", quote = TRUE) ## If you want to center-align values in Word, use noSpaces option. print(catTableBySexTrt, exact = "ascites", quote = TRUE, noSpaces = TRUE) -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{CreateTableOne}}, \code{\link{CreateCatTable}}, \code{\link{summary.CatTable}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/print.ContTable.Rd b/man/print.ContTable.Rd index 0b9b469..70066be 100644 --- a/man/print.ContTable.Rd +++ b/man/print.ContTable.Rd @@ -18,7 +18,7 @@ \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily.} -\item{missing}{Whether to show missing data information (not implemented yet, placeholder)} +\item{missing}{Whether to show missing data information.} \item{explain}{Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the variable names.} @@ -104,11 +104,10 @@ print(contTableBySexTrt, nonnormal = nonNormalVars, quote = TRUE) ## If you want to center-align values in Word, use noSpaces option. print(contTableBySexTrt, nonnormal = nonNormalVars, quote = TRUE, noSpaces = TRUE) -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{CreateTableOne}}, \code{\link{CreateContTable}}, \code{\link{summary.ContTable}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/print.TableOne.Rd b/man/print.TableOne.Rd index 2864b2c..66012f7 100644 --- a/man/print.TableOne.Rd +++ b/man/print.TableOne.Rd @@ -6,9 +6,9 @@ \usage{ \method{print}{TableOne}(x, catDigits = 1, contDigits = 2, pDigits = 3, quote = FALSE, missing = FALSE, explain = TRUE, printToggle = TRUE, - test = TRUE, smd = FALSE, noSpaces = FALSE, format = c("fp", "f", "p", - "pf")[1], showAllLevels = FALSE, cramVars = NULL, exact = NULL, - nonnormal = NULL, minMax = FALSE, ...) + test = TRUE, smd = FALSE, noSpaces = FALSE, padColnames = FALSE, + format = c("fp", "f", "p", "pf")[1], showAllLevels = FALSE, + cramVars = NULL, exact = NULL, nonnormal = NULL, minMax = FALSE, ...) } \arguments{ \item{x}{Object returned by \code{\link{CreateTableOne}} function.} @@ -21,7 +21,7 @@ \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily.} -\item{missing}{Whether to show missing data information (not implemented yet, placeholder)} +\item{missing}{Whether to show missing data information.} \item{explain}{Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown.} @@ -33,6 +33,8 @@ \item{noSpaces}{Whether to remove spaces added for alignment. Use this option if you prefer to align numbers yourself in other software.} +\item{padColnames}{Whether to pad column names with spaces to center justify. The default is FALSE. It is not conducted if noSpaces = TRUE.} + \item{format}{The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency).} \item{showAllLevels}{Whether to show all levels. FALSE by default, i.e., for 2-level categorical variables, only the higher level is shown to avoid redundant information.} @@ -57,11 +59,10 @@ A matrix object containing what you see is also invisibly returned. This can be ## See examples for CreateTableOne and svyCreateTableOne -} -\author{ -Kazuki Yoshida, Justin Bohn } \seealso{ \code{\link{CreateTableOne}}, \code{\link{CreateTableOne}}, \code{\link{summary.TableOne}} } - +\author{ +Kazuki Yoshida, Justin Bohn +} diff --git a/man/print.svyCatTable.Rd b/man/print.svyCatTable.Rd index 251a99a..7180298 100644 --- a/man/print.svyCatTable.Rd +++ b/man/print.svyCatTable.Rd @@ -19,7 +19,7 @@ \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily.} -\item{missing}{Whether to show missing data information (not implemented yet, placeholder)} +\item{missing}{Whether to show missing data information.} \item{explain}{Whether to add explanation to the variable names, i.e., (\%) is added to the variable names when percentage is shown.} @@ -53,11 +53,10 @@ A matrix object containing what you see is also invisibly returned. This can be ## See the examples for svyCreateTableOne() -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{svyCreateTableOne}}, \code{\link{svyCreateCatTable}}, \code{\link{summary.svyCatTable}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/print.svyContTable.Rd b/man/print.svyContTable.Rd index 98750d2..fa3ca84 100644 --- a/man/print.svyContTable.Rd +++ b/man/print.svyContTable.Rd @@ -18,7 +18,7 @@ \item{quote}{Whether to show everything in quotes. The default is FALSE. If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily.} -\item{missing}{Whether to show missing data information (not implemented yet, placeholder)} +\item{missing}{Whether to show missing data information.} \item{explain}{Whether to add explanation to the variable names, i.e., (mean (sd) or median [IQR]) is added to the variable names.} @@ -48,11 +48,10 @@ A matrix object containing what you see is also invisibly returned. This can be ## See the examples for svyCreateTableOne() -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{svyCreateTableOne}}, \code{\link{svyCreateCatTable}}, \code{\link{summary.svyCatTable}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/summary.CatTable.Rd b/man/summary.CatTable.Rd index eeddaf3..c89ffa4 100644 --- a/man/summary.CatTable.Rd +++ b/man/summary.CatTable.Rd @@ -23,11 +23,10 @@ Shows all data a \code{CatTable} class object has. This includes the (optionally ## See examples for CreateTableOne -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{CreateTableOne}}, \code{\link{CreateCatTable}}, \code{\link{print.CatTable}}, } - +\author{ +Kazuki Yoshida +} diff --git a/man/summary.ContTable.Rd b/man/summary.ContTable.Rd index e1755f8..030e0d3 100644 --- a/man/summary.ContTable.Rd +++ b/man/summary.ContTable.Rd @@ -23,11 +23,10 @@ Shows all data a \code{ContTable} class object has. This includes the (optionall ## See examples for CreateTableOne -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{CreateTableOne}}, \code{\link{CreateContTable}}, \code{\link{print.ContTable}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/summary.TableOne.Rd b/man/summary.TableOne.Rd index 4717715..ebdbe63 100644 --- a/man/summary.TableOne.Rd +++ b/man/summary.TableOne.Rd @@ -23,11 +23,10 @@ Shows all data a \code{(svy)TableOne} class object has. This includes the (optio ## See examples for CreateTableOne and svyCreateTableOne -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{CreateTableOne}}, \code{\link{svyCreateCatTable}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/summary.svyCatTable.Rd b/man/summary.svyCatTable.Rd index 6feea06..0abef03 100644 --- a/man/summary.svyCatTable.Rd +++ b/man/summary.svyCatTable.Rd @@ -23,11 +23,10 @@ Shows all data a \code{svyCatTable} class object has. This includes the (optiona ## See the examples for svyCreateTableOne() -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{svyCreateTableOne}}, \code{\link{svyCreateCatTable}}, \code{\link{print.svyCatTable}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/summary.svyContTable.Rd b/man/summary.svyContTable.Rd index 195f30f..e6944a9 100644 --- a/man/summary.svyContTable.Rd +++ b/man/summary.svyContTable.Rd @@ -23,11 +23,10 @@ Shows all data a \code{svyContTable} class object has. This includes the (option ## See the examples for svyCreateTableOne() -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{svyCreateTableOne}}, \code{\link{svyCreateContTable}}, \code{\link{print.svyContTable}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/svyCreateCatTable.Rd b/man/svyCreateCatTable.Rd index 3f4d711..9f77fa8 100644 --- a/man/svyCreateCatTable.Rd +++ b/man/svyCreateCatTable.Rd @@ -34,11 +34,10 @@ Create an object summarizing categorical variables optionally stratifying by one ## See the examples for svyCreateTableOne() -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{svyCreateTableOne}}, \code{\link{print.svyCatTable}}, \code{\link{summary.svyCatTable}}, } - +\author{ +Kazuki Yoshida +} diff --git a/man/svyCreateContTable.Rd b/man/svyCreateContTable.Rd index 585b4bd..2a9e3af 100644 --- a/man/svyCreateContTable.Rd +++ b/man/svyCreateContTable.Rd @@ -37,11 +37,10 @@ Create an object summarizing continous variables optionally stratifying by one o ## See the examples for svyCreateTableOne() -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{svyCreateTableOne}}, \code{\link{print.svyContTable}}, \code{\link{summary.svyContTable}}, } - +\author{ +Kazuki Yoshida +} diff --git a/man/svyCreateTableOne.Rd b/man/svyCreateTableOne.Rd index 4f2ecd5..3366641 100644 --- a/man/svyCreateTableOne.Rd +++ b/man/svyCreateTableOne.Rd @@ -16,7 +16,7 @@ svyCreateTableOne(vars, strata, data, factorVars, includeNA = FALSE, \item{data}{A survey design object in which these variables exist. All variables (both vars and strata) must be in this survey design object. It is created with the \code{svydesign} function in the \code{survey} package.} -\item{factorVars}{Numerically coded variables that should be handled as categorical variables given as a character vector. If omitted, only factors are considered categorical variables. If all categorical variables in the dataset are already factors, this option is not necessary. The variables specified here must also be specified in the \code{vars} argument.} +\item{factorVars}{Numerically coded variables that should be handled as categorical variables given as a character vector. Do not include factors, unless you need to relevel them by removing empty levels. If omitted, only factors are considered categorical variables. The variables specified here must also be specified in the \code{vars} argument.} \item{includeNA}{If TRUE, NA is handled as a regular factor level rather than missing. NA is shown as the last factor level in the table. Only effective for categorical variables.} @@ -98,11 +98,10 @@ tab1$ContTable ## If SMDs are needed as numericals, use ExtractSmd() ExtractSmd(tab1) -} -\author{ -Kazuki Yoshida } \seealso{ \code{\link{print.TableOne}}, \code{\link{summary.TableOne}} } - +\author{ +Kazuki Yoshida +} diff --git a/man/tableone-package.Rd b/man/tableone-package.Rd index 0670843..1e3ad6e 100644 --- a/man/tableone-package.Rd +++ b/man/tableone-package.Rd @@ -2,8 +2,8 @@ % Please edit documentation in R/tableone-package.R \docType{package} \name{tableone-package} -\alias{tableone} \alias{tableone-package} +\alias{tableone} \title{Create "Table 1" to describe baseline characteristics} \description{ Creates "Table 1", i.e., description of baseline patient characteristics, which is essential in every medical research. Supports both continuous and categorical variables, as well as p-values and standardized mean differences. Weighted data are supported via the survey package. See github for a screencast. tableone was inspired by descriptive statistics functions in Deducer , a Java-based GUI package by Ian Fellows. This package does not require GUI or Java, and intended for command-line users. Most important functions are \code{\link{CreateTableOne}} and \code{\link{svyCreateTableOne}}. @@ -32,13 +32,12 @@ Developmental repository is on github. Your contributions are appreciated. ## See examples for CreateTableOne and svyCreateTableOne +} +\seealso{ +\code{\link{CreateTableOne}}, \code{\link{svyCreateTableOne}}, \code{\link{print.TableOne}}, \code{\link{summary.TableOne}}, \code{\link{ShowRegTable}} } \author{ Kazuki Yoshida, Justin Bohn Maintainer: Kazuki Yoshida } -\seealso{ -\code{\link{CreateTableOne}}, \code{\link{svyCreateTableOne}}, \code{\link{print.TableOne}}, \code{\link{summary.TableOne}}, \code{\link{ShowRegTable}} -} - diff --git a/revdep_check.txt b/revdep_check.txt index e7eff2b..e69de29 100644 --- a/revdep_check.txt +++ b/revdep_check.txt @@ -1,55 +0,0 @@ -$check_dir -[1] "/var/folders/r8/dhc1ygjd6332_fbgphmsbm800000gn/T//Rtmp5vf49d/check_cran6b7316c7d6f" - -$libpath -[1] "/var/folders/r8/dhc1ygjd6332_fbgphmsbm800000gn/T//Rtmp5vf49d/R-lib" - -$pkg -$package -[1] "tableone" - -$type -[1] "Package" - -$title -[1] "Create \"Table 1\" to Describe Baseline Characteristics" - -$version -[1] "0.7.0" - -$date -[1] "2015-08-07" - -$author -[1] "Kazuki Yoshida, Justin Bohn." - -$maintainer -[1] "Kazuki Yoshida " - -$description -[1] "Creates \"Table 1\", i.e., description of baseline patient\ncharacteristics, which is essential in every medical research.\nSupports both continuous and categorical variables, as well as\np-values and standardized mean differences. Weighted data are\nsupported via the survey package. See github for a screencast.\ntableone was inspired by descriptive statistics functions in\nDeducer , a Java-based GUI package by Ian Fellows. This package\ndoes not require GUI or Java, and intended for command-line users." - -$license -[1] "GPL-2" - -$imports -[1] "\nsurvey,\nMASS,\ne1071,\nzoo,\ngmodels" - -$suggests -[1] "\nsurvival,\ntestthat,\nMatrix,\ndummies,\nMatching,\nreshape2,\nggplot2,\nknitr" - -$url -[1] "https://github.com/kaz-yos/tableone" - -$vignettebuilder -[1] "knitr" - -$path -[1] "/Users/kazuki/Documents/programming/r/tableone" - -attr(,"class") -[1] "package" - -$deps -[1] "RcmdrPlugin.EZR" - diff --git a/tableone.Rcheck/tableone-Ex.Rout b/tableone.Rcheck/tableone-Ex.Rout index 405159c..425fbe4 100644 --- a/tableone.Rcheck/tableone-Ex.Rout +++ b/tableone.Rcheck/tableone-Ex.Rout @@ -1,7 +1,7 @@ -R version 3.2.2 (2015-08-14) -- "Fire Safety" -Copyright (C) 2015 The R Foundation for Statistical Computing -Platform: x86_64-apple-darwin13.4.0 (64-bit) +R version 3.4.0 (2017-04-21) -- "You Stupid Darkness" +Copyright (C) 2017 The R Foundation for Statistical Computing +Platform: x86_64-apple-darwin15.6.0 (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. @@ -2941,6 +2941,8 @@ detaching ‘package:survival’ > library(tableone) > library(survey) Loading required package: grid +Loading required package: Matrix +Loading required package: survival Attaching package: ‘survey’ @@ -3163,9 +3165,9 @@ _U_s_a_g_e: ## S3 method for class 'TableOne' print(x, catDigits = 1, contDigits = 2, pDigits = 3, quote = FALSE, missing = FALSE, explain = TRUE, printToggle = TRUE, - test = TRUE, smd = FALSE, noSpaces = FALSE, format = c("fp", "f", "p", - "pf")[1], showAllLevels = FALSE, cramVars = NULL, exact = NULL, - nonnormal = NULL, minMax = FALSE, ...) + test = TRUE, smd = FALSE, noSpaces = FALSE, padColnames = FALSE, + format = c("fp", "f", "p", "pf")[1], showAllLevels = FALSE, + cramVars = NULL, exact = NULL, nonnormal = NULL, minMax = FALSE, ...) _A_r_g_u_m_e_n_t_s: @@ -3183,8 +3185,7 @@ contDigits: Number of digits to print for continuous variables. Default If TRUE, everything including the row and column names are quoted so that you can copy it to Excel easily. - missing: Whether to show missing data information (not implemented - yet, placeholder) + missing: Whether to show missing data information. explain: Whether to add explanation to the variable names, i.e., (%) is added to the variable names when percentage is shown. @@ -3203,6 +3204,9 @@ printToggle: Whether to print the output. If FLASE, no output is noSpaces: Whether to remove spaces added for alignment. Use this option if you prefer to align numbers yourself in other software. +padColnames: Whether to pad column names with spaces to center justify. + The default is FALSE. It is not conducted if noSpaces = TRUE. + format: The default is "fp" frequency (percentage). You can also choose from "f" frequency only, "p" percentage only, and "pf" percentage (frequency). @@ -3285,7 +3289,8 @@ _E_x_a_m_p_l_e_s: > base::cat("svyCreateTableOne", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() -detaching ‘package:survey’, ‘package:grid’ +detaching ‘package:survey’, ‘package:survival’, ‘package:Matrix’, + ‘package:grid’ > nameEx("tableone-package") > ### * tableone-package @@ -3295,7 +3300,7 @@ detaching ‘package:survey’, ‘package:grid’ > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: tableone-package > ### Title: Create "Table 1" to describe baseline characteristics -> ### Aliases: tableone tableone-package +> ### Aliases: tableone-package tableone > > ### ** Examples > @@ -3311,7 +3316,7 @@ detaching ‘package:survey’, ‘package:grid’ > ### > options(digits = 7L) > base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n") -Time elapsed: 3.603 0.1 3.758 0.004 0.006 +Time elapsed: 9.621 0.285 9.982 0.006 0.006 > grDevices::dev.off() null device 1 diff --git a/test-all.txt b/test-all.txt index c7a8a55..39e8abe 100644 --- a/test-all.txt +++ b/test-all.txt @@ -1,4 +1,138 @@ -Unit tests for the CreateTableOne function : ................. Stratified by trt +Unit tests for the CreateTableOne function: ....................... + Overall Missing + n 418 + time (mean (sd)) 1917.78 (1104.67) 0.0 + status (%) 0.0 + 0 232 (55.5) + 1 25 ( 6.0) + 2 161 (38.5) + age (mean (sd)) 50.74 (10.45) 0.0 + sex = f (%) 374 (89.5) 0.0 + ascites = 1 (%) 24 ( 7.7) 25.4 + hepato = 1 (%) 160 (51.3) 25.4 + spiders = 1 (%) 90 (28.8) 25.4 + edema (%) 0.0 + 0 354 (84.7) + 0.5 44 (10.5) + 1 20 ( 4.8) + bili (mean (sd)) 3.22 (4.41) 0.0 + chol (mean (sd)) 369.51 (231.94) 32.1 + albumin (mean (sd)) 3.50 (0.42) 0.0 + copper (mean (sd)) 97.65 (85.61) 25.8 + alk.phos (mean (sd)) 1982.66 (2140.39) 25.4 + ast (mean (sd)) 122.56 (56.70) 25.4 + trig (mean (sd)) 124.70 (65.15) 32.5 + platelet (mean (sd)) 257.02 (98.33) 2.6 + protime (mean (sd)) 10.73 (1.02) 0.5 + stage (%) 1.4 + 1 21 ( 5.1) + 2 92 (22.3) + 3 155 (37.6) + 4 144 (35.0) +. + Overall Missing + n 418 + time (mean (sd)) 1917.78 (1104.67) 0.0 + status (%) 0.0 + 0 232 (55.5) + 1 25 ( 6.0) + 2 161 (38.5) + age (mean (sd)) 50.74 (10.45) 0.0 + sex = f (%) 374 (89.5) 0.0 + ascites (%) 25.4 + 0 288 (68.9) + 1 24 ( 5.7) + NA 106 (25.4) + hepato (%) 25.4 + 0 152 (36.4) + 1 160 (38.3) + NA 106 (25.4) + spiders (%) 25.4 + 0 222 (53.1) + 1 90 (21.5) + NA 106 (25.4) + edema (%) 0.0 + 0 354 (84.7) + 0.5 44 (10.5) + 1 20 ( 4.8) + bili (mean (sd)) 3.22 (4.41) 0.0 + chol (mean (sd)) 369.51 (231.94) 32.1 + albumin (mean (sd)) 3.50 (0.42) 0.0 + copper (mean (sd)) 97.65 (85.61) 25.8 + alk.phos (mean (sd)) 1982.66 (2140.39) 25.4 + ast (mean (sd)) 122.56 (56.70) 25.4 + trig (mean (sd)) 124.70 (65.15) 32.5 + platelet (mean (sd)) 257.02 (98.33) 2.6 + protime (mean (sd)) 10.73 (1.02) 0.5 + stage (%) 1.4 + 1 21 ( 5.0) + 2 92 (22.0) + 3 155 (37.1) + 4 144 (34.4) + NA 6 ( 1.4) +. Stratified by trt + 1 2 p test Missing + n 158 154 + time (mean (sd)) 2015.62 (1094.12) 1996.86 (1155.93) 0.883 0.0 + status (%) 0.894 0.0 + 0 83 (52.5) 85 (55.2) + 1 10 ( 6.3) 9 ( 5.8) + 2 65 (41.1) 60 (39.0) + age (mean (sd)) 51.42 (11.01) 48.58 (9.96) 0.018 0.0 + sex = f (%) 137 (86.7) 139 (90.3) 0.421 0.0 + ascites = 1 (%) 14 ( 8.9) 10 ( 6.5) 0.567 25.4 + hepato = 1 (%) 73 (46.2) 87 (56.5) 0.088 25.4 + spiders = 1 (%) 45 (28.5) 45 (29.2) 0.985 25.4 + edema (%) 0.877 0.0 + 0 132 (83.5) 131 (85.1) + 0.5 16 (10.1) 13 ( 8.4) + 1 10 ( 6.3) 10 ( 6.5) + bili (mean (sd)) 2.87 (3.63) 3.65 (5.28) 0.131 0.0 + chol (mean (sd)) 365.01 (209.54) 373.88 (252.48) 0.748 32.1 + albumin (mean (sd)) 3.52 (0.44) 3.52 (0.40) 0.874 0.0 + copper (mean (sd)) 97.64 (90.59) 97.65 (80.49) 0.999 25.8 + alk.phos (mean (sd)) 2021.30 (2183.44) 1943.01 (2101.69) 0.747 25.4 + ast (mean (sd)) 120.21 (54.52) 124.97 (58.93) 0.460 25.4 + trig (mean (sd)) 124.14 (71.54) 125.25 (58.52) 0.886 32.5 + platelet (mean (sd)) 258.75 (100.32) 265.20 (90.73) 0.555 2.6 + protime (mean (sd)) 10.65 (0.85) 10.80 (1.14) 0.197 0.5 + stage (%) 0.201 1.4 + 1 12 ( 7.6) 4 ( 2.6) + 2 35 (22.2) 32 (20.8) + 3 56 (35.4) 64 (41.6) + 4 55 (34.8) 54 (35.1) +. Stratified by trt:sex + 1:m 2:m 1:f 2:f p test Missing + n 21 15 137 139 + time (mean (sd)) 1793.48 (1244.70) 2156.33 (1428.56) 2049.67 (1070.20) 1979.65 (1127.52) 0.730 0.0 + status (%) 0.033 0.0 + 0 4 (19.0) 7 (46.7) 79 ( 57.7) 78 ( 56.1) + 1 3 (14.3) 0 ( 0.0) 7 ( 5.1) 9 ( 6.5) + 2 14 (66.7) 8 (53.3) 51 ( 37.2) 52 ( 37.4) + age (mean (sd)) 55.57 (12.61) 57.09 (10.07) 50.78 (10.65) 47.66 (9.54) <0.001 0.0 + sex = f (%) 0 ( 0.0) 0 ( 0.0) 137 (100.0) 139 (100.0) <0.001 0.0 + ascites = 1 (%) 1 ( 4.8) 2 (13.3) 13 ( 9.5) 8 ( 5.8) 0.516 25.4 + hepato = 1 (%) 12 (57.1) 9 (60.0) 61 ( 44.5) 78 ( 56.1) 0.208 25.4 + spiders = 1 (%) 3 (14.3) 1 ( 6.7) 42 ( 30.7) 44 ( 31.7) 0.089 25.4 + edema (%) 0.906 0.0 + 0 17 (81.0) 12 (80.0) 115 ( 83.9) 119 ( 85.6) + 0.5 3 (14.3) 1 ( 6.7) 13 ( 9.5) 12 ( 8.6) + 1 1 ( 4.8) 2 (13.3) 9 ( 6.6) 8 ( 5.8) + bili (mean (sd)) 2.98 (2.11) 2.72 (2.46) 2.86 (3.81) 3.75 (5.50) 0.394 0.0 + chol (mean (sd)) 403.43 (204.95) 301.00 (111.32) 358.24 (210.46) 381.73 (262.26) 0.512 32.1 + albumin (mean (sd)) 3.63 (0.43) 3.50 (0.49) 3.50 (0.44) 3.53 (0.39) 0.585 0.0 + copper (mean (sd)) 174.90 (105.33) 125.40 (89.18) 85.71 (82.27) 94.64 (79.25) <0.001 25.8 + alk.phos (mean (sd)) 2486.16 (2385.32) 1734.45 (2478.07) 1950.04 (2151.35) 1965.52 (2066.15) 0.706 25.4 + ast (mean (sd)) 128.71 (48.74) 112.58 (44.38) 118.91 (55.40) 126.30 (60.27) 0.598 25.4 + trig (mean (sd)) 145.81 (56.62) 114.86 (39.59) 120.28 (73.41) 126.38 (60.22) 0.370 32.5 + platelet (mean (sd)) 232.65 (97.38) 240.13 (73.93) 262.59 (100.53) 267.95 (92.20) 0.362 2.6 + protime (mean (sd)) 10.84 (0.94) 11.23 (0.97) 10.62 (0.84) 10.75 (1.15) 0.137 0.5 + stage (%) 0.646 1.4 + 1 2 ( 9.5) 1 ( 6.7) 10 ( 7.3) 3 ( 2.2) + 2 4 (19.0) 2 (13.3) 31 ( 22.6) 30 ( 21.6) + 3 7 (33.3) 5 (33.3) 49 ( 35.8) 59 ( 42.4) + 4 8 (38.1) 7 (46.7) 47 ( 34.3) 47 ( 33.8) +. Stratified by trt 1 2 p test n 158 154 time (mean (sd)) 2015.62 (1094.12) 1996.86 (1155.93) 0.883 @@ -1202,7 +1336,7 @@ ast 158 0 0.0 120 54.5 112 76.7 152 26.4 338 1.09 1.6 trig 158 19 12.0 124 71.5 106 84.5 146 33.0 598 2.95 14.3 platelet 158 2 1.3 259 100.3 255 189.5 322 62.0 563 0.50 0.2 protime 158 0 0.0 11 0.9 11 10.0 11 9.0 14 1.10 1.6 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 n miss p.miss mean sd median p25 p75 min max skew kurt time 154 0 0.0 1997 1155.9 1811 1153.0 2771 51.0 4523 0.4 -0.7 @@ -1276,7 +1410,7 @@ trt: 1 3 56 35.4 65.2 4 55 34.8 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 var n miss p.miss level freq percent cum.percent status 154 0 0.0 0 85 55.2 55.2 @@ -1341,7 +1475,7 @@ ast 21 0 0 129 48.7 127 86 158 56.8 222 0.31 -0.9 trig 21 0 0 146 56.6 142 107 194 55.0 242 0.16 -1.0 platelet 21 1 5 233 97.4 228 148 314 70.0 394 -0.05 -1.2 protime 21 0 0 11 0.9 11 10 11 9.7 14 2.11 6.8 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 sex: m n miss p.miss mean sd median p25 p75 min max skew kurt @@ -1356,7 +1490,7 @@ ast 15 0 0 113 44.4 110 74 129 46.5 188 0.3 -0.8 trig 15 1 7 115 39.6 108 90 137 49.0 188 0.4 -0.4 platelet 15 0 0 240 73.9 214 190 294 119.0 360 0.2 -1.0 protime 15 0 0 11 1.0 11 11 12 10.0 13 0.7 0.2 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 1 sex: f n miss p.miss mean sd median p25 p75 min max skew kurt @@ -1371,7 +1505,7 @@ ast 137 0 0.0 119 55.4 108 74.4 150 26.4 338 1.20 1.9 trig 137 19 13.9 120 73.4 101 84.0 134 33.0 598 3.28 16.0 platelet 137 1 0.7 263 100.5 258 194.2 322 62.0 563 0.57 0.3 protime 137 0 0.0 11 0.8 11 10.0 11 9.0 13 0.92 0.6 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 sex: f n miss p.miss mean sd median p25 p75 min max skew kurt @@ -1447,7 +1581,7 @@ sex: m 3 7 33.3 61.9 4 8 38.1 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 sex: m var n miss p.miss level freq percent cum.percent @@ -1476,7 +1610,7 @@ sex: m 3 5 33.3 53.3 4 7 46.7 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 1 sex: f var n miss p.miss level freq percent cum.percent @@ -1505,7 +1639,7 @@ sex: f 3 49 35.8 65.7 4 47 34.3 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 sex: f var n miss p.miss level freq percent cum.percent @@ -1548,7 +1682,7 @@ stage 6.463005e-01 NA Standardize mean differences average 1 vs 2 1 vs 3 1 vs 4 2 vs 3 2 vs 4 3 vs 4 status 0.5770647 0.79644734 0.8748771 0.83097631 0.4375075 0.46214324 0.06043662 -sex NaN NaN NaN NaN NaN NaN NaN +sex NaN 0.00000000 NaN NaN NaN NaN 0.00000000 ascites 0.1755851 0.30219043 0.1845375 0.04452096 0.1211310 0.26007640 0.14105454 hepato 0.1598214 0.05802589 0.2544151 0.02073917 0.3135916 0.07878788 0.23336861 spiders 0.4018377 0.25073566 0.3999925 0.42200838 0.6471968 0.66954801 0.02154469 @@ -1563,21 +1697,21 @@ sex: m time 21 0 0 1793 1244.7 1302 999 2386 140 4459 1.0 0.1 age 21 0 0 56 12.6 56 46 66 33 78 -0.2 -0.7 protime 21 0 0 11 0.9 11 10 11 10 14 2.1 6.8 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 sex: m n miss p.miss mean sd median p25 p75 min max skew kurt time 15 0 0 2156 1429 1656 1054 3420 191 4427 0.3 -1.4 age 15 0 0 57 10 53 49 66 44 75 0.5 -1.3 protime 15 0 0 11 1 11 11 12 10 13 0.7 0.2 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 1 sex: f n miss p.miss mean sd median p25 p75 min max skew kurt time 137 0 0 2050 1070.2 1945 1293 2644 41 4556 0.32 -0.4 age 137 0 0 51 10.7 51 43 57 26 77 0.05 -0.5 protime 137 0 0 11 0.8 11 10 11 9 13 0.92 0.6 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 sex: f n miss p.miss mean sd median p25 p75 min max skew kurt @@ -1612,7 +1746,7 @@ sex: m sex 21 0 0.0 m 21 100.0 100.0 f 0 0.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 sex: m var n miss p.miss level freq percent cum.percent @@ -1626,7 +1760,7 @@ sex: m sex 15 0 0.0 m 15 100.0 100.0 f 0 0.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 1 sex: f var n miss p.miss level freq percent cum.percent @@ -1640,7 +1774,7 @@ sex: f sex 137 0 0.0 m 0 0.0 0.0 f 137 100.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ trt: 2 sex: f var n miss p.miss level freq percent cum.percent @@ -1664,14 +1798,14 @@ sex 2.514569e-67 4.934035e-48 Standardize mean differences average 1 vs 2 1 vs 3 1 vs 4 2 vs 3 2 vs 4 3 vs 4 status 0.5770647 0.7964473 0.8748771 0.8309763 0.4375075 0.4621432 0.06043662 -trt NaN NaN NaN NaN NaN NaN NaN -sex NaN NaN NaN NaN NaN NaN NaN +trt NaN NaN 0.0000000 NaN NaN 0.0000000 NaN +sex NaN 0.0000000 NaN NaN NaN NaN 0.00000000 . -Unit tests for the ExtractSmd function : ..... average 1 vs 2 1 vs 3 1 vs 4 2 vs 3 2 vs 4 3 vs 4 +Unit tests for the ExtractSmd function: ..... average 1 vs 2 1 vs 3 1 vs 4 2 vs 3 2 vs 4 3 vs 4 time 0.1556365 0.27083134 0.22071813 0.15677410 0.08450703 0.13729269 0.06369595 status 0.5770647 0.79644734 0.87487707 0.83097631 0.43750751 0.46214324 0.06043662 age 0.5214974 0.13353519 0.40993004 0.70700091 0.60864596 0.96133382 0.30853876 -sex NaN NaN NaN NaN NaN NaN NaN +sex NaN 0.00000000 NaN NaN NaN NaN 0.00000000 ascites 0.1755851 0.30219043 0.18453749 0.04452096 0.12113103 0.26007640 0.14105454 hepato 0.1598214 0.05802589 0.25441512 0.02073917 0.31359163 0.07878788 0.23336861 spiders 0.4018377 0.25073566 0.39999254 0.42200838 0.64719678 0.66954801 0.02154469 @@ -1692,24 +1826,24 @@ race NaN NaN NaN NaN NaN NaN agecat 0.26102029 0.47958417 0.21054511 0.163093544 0.26927031 0.33972601 0.103902581 RIAGENDR 0.06902841 0.04460885 0.11590734 0.111246698 0.07120651 0.06655532 0.004645715 .... -Tests for functions for standardized mean differences : ..................................................................................................................................................... +Tests for functions for standardized mean differences: ............................................................................................................................................................. ### Summary of continuous variables ### HI_CHOL: 0 RIAGENDR: 1 n miss p.miss mean sd median p25 p75 min max skew kurt race 3526 0 0 2 0.9 2 1 2 1 4 0.6 -0.3 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ HI_CHOL: 1 RIAGENDR: 1 n miss p.miss mean sd median p25 p75 min max skew kurt race 363 0 0 2 0.9 2 1 2 1 4 0.8 -0.1 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ HI_CHOL: 0 RIAGENDR: 2 n miss p.miss mean sd median p25 p75 min max skew kurt race 3533 0 0 2 0.9 2 1 2 1 4 0.6 -0.4 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ HI_CHOL: 1 RIAGENDR: 2 n miss p.miss mean sd median p25 p75 min max skew kurt @@ -1735,7 +1869,7 @@ RIAGENDR: 1 (39,59] 775 22.0 76.3 (59,Inf] 835 23.7 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ HI_CHOL: 1 RIAGENDR: 1 var n miss p.miss level freq percent cum.percent @@ -1744,7 +1878,7 @@ RIAGENDR: 1 (39,59] 173 47.7 74.7 (59,Inf] 92 25.3 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ HI_CHOL: 0 RIAGENDR: 2 var n miss p.miss level freq percent cum.percent @@ -1753,7 +1887,7 @@ RIAGENDR: 2 (39,59] 796 22.5 78.1 (59,Inf] 772 21.9 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ HI_CHOL: 1 RIAGENDR: 2 var n miss p.miss level freq percent cum.percent @@ -1784,9 +1918,9 @@ agecat 0.702331 0.9040822 0.109801 0.985333 0.8536402 0.3838583 0.9772713 (39,59] 775 (22.0) 173 (47.7) 796 (22.5) 167 (39.4) (59,Inf] 835 (23.7) 92 (25.3) 772 (21.9) 181 (42.7) . -Unit tests for the survey-related modules : ................................................ -Unit tests for the modules : ........ -Unit tests for regression summary function : . exp(coef) [confint] p +Unit tests for the survey-related modules: ................................................ +Unit tests for the modules: ........ +Unit tests for regression summary function: . exp(coef) [confint] p trt 1.03 [0.72, 1.47] 0.891 age 1.03 [1.01, 1.05] 0.005 albumin 0.26 [0.17, 0.41] <0.001 @@ -1820,8 +1954,67 @@ trt -50.39655 [-275.07261, 174.27950] 0.661 age -2.97371 [-13.96894, 8.02152] 0.596 albumin 987.37770 [697.13553, 1277.61988] <0.001 ascites -664.69235 [-1125.03679, -204.34791] 0.005 -. -Unit tests for svy* user functions : .......... Stratified by E +. exp(coef) [confint] p +(Intercept) 2.82698 [2.64169, 3.02526] <0.001 +trt 0.82689 [0.75758, 0.90254] <0.001 +day 0.96758 [0.96225, 0.97295] <0.001 +.. coef [confint] p +(Intercept) 2.70219 [2.51140, 2.89298] <0.001 +trt -0.36806 [-0.57681, -0.15930] 0.001 +day -0.06464 [-0.07910, -0.05017] <0.001 +.. coef [confint] p +(Intercept) 2.70219 [2.51320, 2.89118] 27.917 +trt -0.36806 [-0.57306, -0.16305] -3.516 +day -0.06464 [-0.07905, -0.05022] -8.807 +.. exp(coef) [confint] p +(Intercept) 2.82371 [2.31045, 3.43231] <0.001 +trt 0.82903 [0.70232, 0.97777] 0.026 +day 0.96758 [0.94787, 0.98762] 0.002 +.. +Unit tests for svy* user functions: ................. + Overall Missing + n 450.01 + E (mean (sd)) 2.00 (0.82) 0.0 + C (mean (sd)) 2.13 (0.88) 0.0 + Y = 1 (%) 177.0 (39.4) 0.2 + C1 = 1 (%) 150.0 (33.3) 0.0 + C2 (mean (sd)) 0.47 (0.50) 0.0 +. + Overall Missing + n 450.01 + E (mean (sd)) 2.00 (0.82) 0.0 + C (mean (sd)) 2.13 (0.88) 0.0 + Y (%) 0.2 + 0 272.7 (60.6) + 1 177.0 (39.3) + NA 0.3 ( 0.1) + C1 = 1 (%) 150.0 (33.3) 0.0 + C2 (mean (sd)) 0.47 (0.50) 0.0 +. Stratified by E + 1 2 3 p test Missing + n 150.01 150.00 150.00 + E (mean (sd)) 1.00 (0.00) 2.00 (0.00) 3.00 (0.00) <0.001 0.0 + C (mean (sd)) 2.13 (0.89) 2.13 (0.89) 2.13 (0.89) 1.000 0.0 + Y = 1 (%) 59.0 (39.4) 59.0 (39.3) 59.0 (39.3) 1.000 0.2 + C1 = 1 (%) 50.0 (33.3) 50.0 (33.3) 50.0 (33.3) 1.000 0.0 + C2 (mean (sd)) 0.47 (0.50) 0.47 (0.50) 0.47 (0.50) 1.000 0.0 +. Stratified by E:C1 + 1:0 2:0 3:0 1:1 2:1 3:1 p test + n 100.01 100.00 100.00 50.00 50.00 50.00 + E (mean (sd)) 1.00 (0.00) 2.00 (0.00) 3.00 (0.00) 1.00 (0.00) 2.00 (0.00) 3.00 (0.00) <0.001 + C (mean (sd)) 1.60 (0.49) 1.60 (0.49) 1.60 (0.49) 3.20 (0.40) 3.20 (0.40) 3.20 (0.40) <0.001 + Y = 1 (%) 22.0 (22.1) 22.0 (22.0) 22.0 (22.0) 37.0 ( 74.0) 37.0 ( 74.0) 37.0 ( 74.0) <0.001 + C1 = 1 (%) 0.0 ( 0.0) 0.0 ( 0.0) 0.0 ( 0.0) 50.0 (100.0) 50.0 (100.0) 50.0 (100.0) <0.001 + C2 (mean (sd)) 0.60 (0.49) 0.60 (0.49) 0.60 (0.49) 0.20 (0.40) 0.20 (0.40) 0.20 (0.40) <0.001 + Stratified by E:C1 + Missing + n + E (mean (sd)) 0.0 + C (mean (sd)) 0.0 + Y = 1 (%) 0.2 + C1 = 1 (%) 0.0 + C2 (mean (sd)) 0.0 +. Stratified by E 1 2 3 p test n 150.01 150.00 150.00 E (mean (sd)) 1.00 (0.00) 2.00 (0.00) 3.00 (0.00) <0.001 @@ -1849,21 +2042,13 @@ Unit tests for svy* user functions : .......... Stratified by E C1 = 1 (%) 150.0 (33.3) C2 (mean (sd)) 0.47 (0.50) . Stratified by E:C1 - 1:0 2:0 3:0 1:1 2:1 3:1 p - n 100.01 100.00 100.00 50.00 50.00 50.00 - E (mean (sd)) 1.00 (0.00) 2.00 (0.00) 3.00 (0.00) 1.00 (0.00) 2.00 (0.00) 3.00 (0.00) <0.001 - C (mean (sd)) 1.60 (0.49) 1.60 (0.49) 1.60 (0.49) 3.20 (0.40) 3.20 (0.40) 3.20 (0.40) <0.001 - Y = 1 (%) 22.0 (22.1) 22.0 (22.0) 22.0 (22.0) 37.0 ( 74.0) 37.0 ( 74.0) 37.0 ( 74.0) <0.001 - C1 = 1 (%) 0.0 ( 0.0) 0.0 ( 0.0) 0.0 ( 0.0) 50.0 (100.0) 50.0 (100.0) 50.0 (100.0) <0.001 - C2 (mean (sd)) 0.60 (0.49) 0.60 (0.49) 0.60 (0.49) 0.20 (0.40) 0.20 (0.40) 0.20 (0.40) <0.001 - Stratified by E:C1 - test - n - E (mean (sd)) - C (mean (sd)) - Y = 1 (%) - C1 = 1 (%) - C2 (mean (sd)) + 1:0 2:0 3:0 1:1 2:1 3:1 p test + n 100.01 100.00 100.00 50.00 50.00 50.00 + E (mean (sd)) 1.00 (0.00) 2.00 (0.00) 3.00 (0.00) 1.00 (0.00) 2.00 (0.00) 3.00 (0.00) <0.001 + C (mean (sd)) 1.60 (0.49) 1.60 (0.49) 1.60 (0.49) 3.20 (0.40) 3.20 (0.40) 3.20 (0.40) <0.001 + Y = 1 (%) 22.0 (22.1) 22.0 (22.0) 22.0 (22.0) 37.0 ( 74.0) 37.0 ( 74.0) 37.0 ( 74.0) <0.001 + C1 = 1 (%) 0.0 ( 0.0) 0.0 ( 0.0) 0.0 ( 0.0) 50.0 (100.0) 50.0 (100.0) 50.0 (100.0) <0.001 + C2 (mean (sd)) 0.60 (0.49) 0.60 (0.49) 0.60 (0.49) 0.20 (0.40) 0.20 (0.40) 0.20 (0.40) <0.001 . Stratified by E 1 2 3 p test n 150.0120 150.0030 150.0000 @@ -1956,15 +2141,15 @@ Unit tests for svy* user functions : .......... Stratified by E Y = 1 (%) 22.0 (22.1) 22.0 (22.0) 22.0 (22.0) 37.0 ( 74.0) 37.0 ( 74.0) 37.0 ( 74.0) <0.001 C1 = 1 (%) 0.0 ( 0.0) 0.0 ( 0.0) 0.0 ( 0.0) 50.0 (100.0) 50.0 (100.0) 50.0 (100.0) <0.001 . Stratified by E:C1 - 1:0 2:0 3:0 1:1 2:1 - n 100.012 100.003 100.000 50.000 50.000 - Y = 1 (%) 22.001 (22.062) 22.001 (22.000) 22.000 (22.000) 37.000 ( 74.000) 37.000 ( 74.000) - C1 = 1 (%) 0.000 ( 0.000) 0.000 ( 0.000) 0.000 ( 0.000) 50.000 (100.000) 50.000 (100.000) + 1:0 2:0 3:0 1:1 2:1 3:1 + n 100.012 100.003 100.000 50.000 50.000 50.000 + Y = 1 (%) 22.001 (22.062) 22.001 (22.000) 22.000 (22.000) 37.000 ( 74.000) 37.000 ( 74.000) 37.000 ( 74.000) + C1 = 1 (%) 0.000 ( 0.000) 0.000 ( 0.000) 0.000 ( 0.000) 50.000 (100.000) 50.000 (100.000) 50.000 (100.000) Stratified by E:C1 - 3:1 p test - n 50.000 - Y = 1 (%) 37.000 ( 74.000) <0.00001 - C1 = 1 (%) 50.000 (100.000) <0.00001 + p test + n + Y = 1 (%) <0.00001 + C1 = 1 (%) <0.00001 . Stratified by E:C1 1:0 2:0 3:0 1:1 2:1 3:1 n 100.0 100.0 100.0 50.0 50.0 50.0 @@ -2341,13 +2526,13 @@ E: 1 E 150 0 0 1.0 0.0 1 1 1 1 1 C 150 0 0 2.1 0.9 2 1 3 1 4 C2 150 0 0 0.5 0.5 0 0 1 0 1 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E: 2 n miss p.miss mean sd median p25 p75 min max E 150 0 0 2.0 0.0 2 2 2 2 2 C 150 0 0 2.1 0.9 2 1 3 1 4 C2 150 0 0 0.5 0.5 0 0 1 0 1 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E: 3 n miss p.miss mean sd median p25 p75 min max E 150 0 0 3.0 0.0 3 3 3 3 3 @@ -2378,7 +2563,7 @@ E: 1 C1 150.0 0.0 0.0 0 100.0 66.7 66.7 1 50.0 33.3 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E: 2 var n miss p.miss level freq percent cum.percent Y 150.0 0.0 0.0 0 91.0 60.7 60.7 @@ -2387,7 +2572,7 @@ E: 2 C1 150.0 0.0 0.0 0 100.0 66.7 66.7 1 50.0 33.3 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E: 3 var n miss p.miss level freq percent cum.percent Y 150.0 0.0 0.0 0 91.0 60.7 60.7 @@ -2414,31 +2599,31 @@ E:C1: 1:0 E 100 0 0 1.0 0.0 1 1 1 1 1 C 100 0 0 1.6 0.5 2 1 2 1 2 C2 100 0 0 0.6 0.5 1 0 1 0 1 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 2:0 n miss p.miss mean sd median p25 p75 min max E 100 0 0 2.0 2e-16 2 2 2 2 2 C 100 0 0 1.6 5e-01 2 1 2 1 2 C2 100 0 0 0.6 5e-01 1 0 1 0 1 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 3:0 n miss p.miss mean sd median p25 p75 min max E 100 0 0 3.0 0.0 3 3 3 3 3 C 100 0 0 1.6 0.5 2 1 2 1 2 C2 100 0 0 0.6 0.5 1 0 1 0 1 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 1:1 n miss p.miss mean sd median p25 p75 min max E 50 0 0 1.0 0.0 1 1 1 1 1 C 50 0 0 3.2 0.4 3 3 3 3 4 C2 50 0 0 0.2 0.4 0 0 0 0 1 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 2:1 n miss p.miss mean sd median p25 p75 min max E 50 0 0 2.0 0.0 2 2 2 2 2 C 50 0 0 3.2 0.4 3 3 3 3 4 C2 50 0 0 0.2 0.4 0 0 0 0 1 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 3:1 n miss p.miss mean sd median p25 p75 min max E 50 0 0 3.0 0.0 3 3 3 3 3 @@ -2452,14 +2637,14 @@ C 2.518377e-307 0.00000e+00 C2 1.950910e-37 1.95091e-37 Standardize mean differences - average 1 vs 2 1 vs 3 1 vs 4 1 vs 5 1 vs 6 2 vs 3 2 vs 4 2 vs 5 -E NaN 6.350292e+15 Inf NaN Inf Inf 6.350292e+15 6.350292e+15 1.4100480 -C 2.1373647 1.710913e-04 0.000146495 3.5608223 3.5673100 3.5708236 2.439672e-05 3.556921e+00 3.5633882 -C2 0.5343248 1.710913e-04 0.000146495 0.8900053 0.8916269 0.8925051 2.439672e-05 8.892636e-01 0.8908805 - 2 vs 6 3 vs 4 3 vs 5 3 vs 6 4 vs 5 4 vs 6 5 vs 6 -E 6.350292e+15 Inf Inf NaN Inf Inf Inf -C 3.566891e+00 3.5525326 3.5589756 3.5624649 0 0 0 -C2 8.917561e-01 0.8881331 0.8897439 0.8906162 0 0 0 + average 1 vs 2 1 vs 3 1 vs 4 1 vs 5 1 vs 6 2 vs 3 2 vs 4 2 vs 5 2 vs 6 +E Inf 6.350292e+15 Inf 0.0000000 Inf Inf 6.350292e+15 6.350292e+15 1.4100480 6.350292e+15 +C 2.1373647 1.710913e-04 0.000146495 3.5608223 3.5673100 3.5708236 2.439672e-05 3.556921e+00 3.5633882 3.566891e+00 +C2 0.5343248 1.710913e-04 0.000146495 0.8900053 0.8916269 0.8925051 2.439672e-05 8.892636e-01 0.8908805 8.917561e-01 + 3 vs 4 3 vs 5 3 vs 6 4 vs 5 4 vs 6 5 vs 6 +E Inf Inf 0.0000000 Inf Inf Inf +C 3.5525326 3.5589756 3.5624649 0 0 0 +C2 0.8881331 0.8897439 0.8906162 0 0 0 ======================================================================================= @@ -2473,7 +2658,7 @@ E:C1: 1:0 C1 100.0 0.0 0.0 0 100.0 100.0 100.0 1 0.0 0.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 2:0 var n miss p.miss level freq percent cum.percent Y 100.0 0.0 0.0 0 78.0 78.0 78.0 @@ -2482,7 +2667,7 @@ E:C1: 2:0 C1 100.0 0.0 0.0 0 100.0 100.0 100.0 1 0.0 0.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 3:0 var n miss p.miss level freq percent cum.percent Y 100.0 0.0 0.0 0 78.0 78.0 78.0 @@ -2491,7 +2676,7 @@ E:C1: 3:0 C1 100.0 0.0 0.0 0 100.0 100.0 100.0 1 0.0 0.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 1:1 var n miss p.miss level freq percent cum.percent Y 50.0 0.0 0.0 0 13.0 26.0 26.0 @@ -2500,7 +2685,7 @@ E:C1: 1:1 C1 50.0 0.0 0.0 0 0.0 0.0 0.0 1 50.0 100.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 2:1 var n miss p.miss level freq percent cum.percent Y 50.0 0.0 0.0 0 13.0 26.0 26.0 @@ -2509,7 +2694,7 @@ E:C1: 2:1 C1 50.0 0.0 0.0 0 0.0 0.0 0.0 1 50.0 100.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 3:1 var n miss p.miss level freq percent cum.percent Y 50.0 0.0 0.0 0 13.0 26.0 26.0 @@ -2527,10 +2712,10 @@ C1 4.640660e-128 NA Standardize mean differences average 1 vs 2 1 vs 3 1 vs 4 1 vs 5 1 vs 6 2 vs 3 2 vs 4 2 vs 5 2 vs 6 3 vs 4 Y 0.7311319 0.001480611 0.001486405 1.216879 1.216879 1.216879 0.000005793469 1.218891 1.218891 1.218891 1.218899 -C1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN +C1 NaN 0.000000000 0.000000000 NaN NaN NaN 0.000000000000 NaN NaN NaN NaN 3 vs 5 3 vs 6 4 vs 5 4 vs 6 5 vs 6 Y 1.218899 1.218899 0 0 0 -C1 NaN NaN NaN NaN NaN +C1 NaN NaN 0 0 0 . ### Summary of continuous variables ### @@ -2538,27 +2723,27 @@ E:C1: 1:0 n miss p.miss mean sd median p25 p75 min max E 100 0 0 1 0.0 1 1 1 1 1 C 100 0 0 2 0.5 2 1 2 1 2 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 2:0 n miss p.miss mean sd median p25 p75 min max E 100 0 0 2 2e-16 2 2 2 2 2 C 100 0 0 2 5e-01 2 1 2 1 2 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 3:0 n miss p.miss mean sd median p25 p75 min max E 100 0 0 3 0.0 3 3 3 3 3 C 100 0 0 2 0.5 2 1 2 1 2 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 1:1 n miss p.miss mean sd median p25 p75 min max E 50 0 0 1 0.0 1 1 1 1 1 C 50 0 0 3 0.4 3 3 3 3 4 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 2:1 n miss p.miss mean sd median p25 p75 min max E 50 0 0 2 0.0 2 2 2 2 2 C 50 0 0 3 0.4 3 3 3 3 4 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 3:1 n miss p.miss mean sd median p25 p75 min max E 50 0 0 3 0.0 3 3 3 3 3 @@ -2570,12 +2755,12 @@ E 0.000000e+00 0 C 2.518377e-307 0 Standardize mean differences - average 1 vs 2 1 vs 3 1 vs 4 1 vs 5 1 vs 6 2 vs 3 2 vs 4 2 vs 5 2 vs 6 -E NaN 6.350292e+15 Inf NaN Inf Inf 6.350292e+15 6.350292e+15 1.410048 6.350292e+15 -C 2.137365 1.710913e-04 0.000146495 3.560822 3.56731 3.570824 2.439672e-05 3.556921e+00 3.563388 3.566891e+00 - 3 vs 4 3 vs 5 3 vs 6 4 vs 5 4 vs 6 5 vs 6 -E Inf Inf NaN Inf Inf Inf -C 3.552533 3.558976 3.562465 0 0 0 + average 1 vs 2 1 vs 3 1 vs 4 1 vs 5 1 vs 6 2 vs 3 2 vs 4 2 vs 5 2 vs 6 3 vs 4 +E Inf 6.350292e+15 Inf 0.000000 Inf Inf 6.350292e+15 6.350292e+15 1.410048 6.350292e+15 Inf +C 2.137365 1.710913e-04 0.000146495 3.560822 3.56731 3.570824 2.439672e-05 3.556921e+00 3.563388 3.566891e+00 3.552533 + 3 vs 5 3 vs 6 4 vs 5 4 vs 6 5 vs 6 +E Inf 0.000000 Inf Inf Inf +C 3.558976 3.562465 0 0 0 . ### Summary of categorical variables ### @@ -2587,7 +2772,7 @@ E:C1: 1:0 C1 100.0 0.0 0.0 0 100.0 100.0 100.0 1 0.0 0.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 2:0 var n miss p.miss level freq percent cum.percent Y 100.0 0.0 0.0 0 78.0 78.0 78.0 @@ -2596,7 +2781,7 @@ E:C1: 2:0 C1 100.0 0.0 0.0 0 100.0 100.0 100.0 1 0.0 0.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 3:0 var n miss p.miss level freq percent cum.percent Y 100.0 0.0 0.0 0 78.0 78.0 78.0 @@ -2605,7 +2790,7 @@ E:C1: 3:0 C1 100.0 0.0 0.0 0 100.0 100.0 100.0 1 0.0 0.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 1:1 var n miss p.miss level freq percent cum.percent Y 50.0 0.0 0.0 0 13.0 26.0 26.0 @@ -2614,7 +2799,7 @@ E:C1: 1:1 C1 50.0 0.0 0.0 0 0.0 0.0 0.0 1 50.0 100.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 2:1 var n miss p.miss level freq percent cum.percent Y 50.0 0.0 0.0 0 13.0 26.0 26.0 @@ -2623,7 +2808,7 @@ E:C1: 2:1 C1 50.0 0.0 0.0 0 0.0 0.0 0.0 1 50.0 100.0 100.0 ---------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------ E:C1: 3:1 var n miss p.miss level freq percent cum.percent Y 50.0 0.0 0.0 0 13.0 26.0 26.0 @@ -2641,10 +2826,18 @@ C1 4.640660e-128 NA Standardize mean differences average 1 vs 2 1 vs 3 1 vs 4 1 vs 5 1 vs 6 2 vs 3 2 vs 4 2 vs 5 2 vs 6 3 vs 4 Y 0.7311319 0.001480611 0.001486405 1.216879 1.216879 1.216879 0.000005793469 1.218891 1.218891 1.218891 1.218899 -C1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN +C1 NaN 0.000000000 0.000000000 NaN NaN NaN 0.000000000000 NaN NaN NaN NaN 3 vs 5 3 vs 6 4 vs 5 4 vs 6 5 vs 6 Y 1.218899 1.218899 0 0 0 -C1 NaN NaN NaN NaN NaN -. - -DONE +C1 NaN NaN 0 0 0 +.W Stratified by ESA + 1 2 3 p test + n 2.00 2.00 2.00 + alive (mean (sd)) 24.50 (0.71) 39.50 (13.44) 75.00 (7.07) 0.098 + ambulance = 2 (%) 50.0 50.0 50.0 1.000 +.. + +Warnings --------------------------------------------------------------------------------------------------------------- +1. svyrep.design is allowed (@test-svyCreateTableOne.R#504) - No sampling weights provided: equal probability assumed + +DONE =================================================================================================================== diff --git a/tests/testthat/ref-ExtractSmd_TableOne b/tests/testthat/ref-ExtractSmd_TableOne index 7bb94b3..444fff0 100644 Binary files a/tests/testthat/ref-ExtractSmd_TableOne and b/tests/testthat/ref-ExtractSmd_TableOne differ diff --git a/tests/testthat/test-CreateTableOne.R b/tests/testthat/test-CreateTableOne.R index 88e85f2..193fa56 100644 --- a/tests/testthat/test-CreateTableOne.R +++ b/tests/testthat/test-CreateTableOne.R @@ -164,6 +164,42 @@ test_that("TableOne objects are always returned", { }) +test_that("Missing percentages are correctly stored and printed", { + + ## Extract from dataset + percentMissing <- unlist(lapply(pbc[vars], function(x) {sum(is.na(x)) / length(x) * 100})) + ## Sanity check for the standard. + expect_equal(length(percentMissing), length(vars)) + + ## Unstratified table + expect_equal(pbcOverall$MetaData$percentMissing, percentMissing) + ## Including NA as a category should not matter. + expect_equal(pbcInclNa$MetaData$percentMissing, percentMissing) + ## Stratification should not matter + expect_equal(pbcByTrt$MetaData$percentMissing, percentMissing) + expect_equal(pbcByTrtSex$MetaData$percentMissing, percentMissing) + + ## Check printing + ## Gold standard + percentMissingString <- format(sprintf("%.1f", percentMissing), justify = "right") + ## Function to drop empty "" elements. + DropEmptyString <- function(x) { + ## as.character() drops names. + as.character(Filter(f = function(elt) {!(elt == "")}, x = x)) + } + ## Check against gold standard + expect_equal(DropEmptyString(print(pbcOverall, missing = TRUE)[,"Missing"]), + percentMissingString) + expect_equal(DropEmptyString(print(pbcInclNa, missing = TRUE)[,"Missing"]), + percentMissingString) + expect_equal(DropEmptyString(print(pbcByTrt, missing = TRUE)[,"Missing"]), + percentMissingString) + expect_equal(DropEmptyString(print(pbcByTrtSex, missing = TRUE)[,"Missing"]), + percentMissingString) + +}) + + test_that("printing of a TableOne object does not regress", { ## Expectations diff --git a/tests/testthat/test-ShowRegTable.R b/tests/testthat/test-ShowRegTable.R index 433f753..ad20086 100644 --- a/tests/testthat/test-ShowRegTable.R +++ b/tests/testthat/test-ShowRegTable.R @@ -5,9 +5,11 @@ ## Author: Kazuki Yoshida ################################################################################ +### ### Structure ## expectations within tests within context +### ### Prepare environment ################################################################################ library(testthat) @@ -15,75 +17,208 @@ library(survey) library(survival) +### ### Context (1 for each file) ################################################################################ context("Unit tests for regression summary function") -## Load Mayo Clinic Primary Biliary Cirrhosis Data + +### +### Independent observations +################################################################################ data(pbc) -## Cox -coxph1 <- coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, - data = pbc) -## Logistic -glm1 <- glm(formula = (status == 2) ~ trt + age + albumin + ascites, - family = binomial(link = "logit"), - data = pbc) -## Linear -lm1 <- lm(formula = time ~ trt + age + albumin + ascites, - data = pbc) +### coxph test_that("coxph works", { - ## For coxph normal approximation is uses + ## Cox + coxph1 <- coxph(formula = Surv(time, status == 2) ~ trt + age + albumin + ascites, + data = pbc) + + ## For coxph normal approximation is used expect_true(all(confint(coxph1) == confint.default(coxph1))) ## confint + ciCoxph <- confint(coxph1) ShowRegTable(coxph1) - expect_output(ShowRegTable(coxph1), - "0.72, 1.47") + expect_output(ShowRegTable(coxph1, digits = 5, exp = TRUE), + sprintf("%.5f, %.5f", + exp(ciCoxph[2,1]), + exp(ciCoxph[2,2]))) ## contint.default + cidCoxph <- confint.default(coxph1) ShowRegTable(coxph1, ciFun = confint.default) - expect_output(ShowRegTable(coxph1, ciFun = confint.default), - "0.72, 1.47") + expect_output(ShowRegTable(coxph1, ciFun = confint.default, digits = 5, exp = TRUE), + sprintf("%.5f, %.5f", + exp(cidCoxph[2,1]), + exp(cidCoxph[2,2]))) }) +### glm test_that("glm works", { + ## Logistic + glm1 <- glm(formula = (status == 2) ~ trt + age + albumin + ascites, + family = binomial(link = "logit"), + data = pbc) + ## For GLM profile likelihood method and naive approximation differ expect_true(!all(confint(glm1) == confint.default(glm1))) ## confint + ciGlm1 <- confint(glm1) ShowRegTable(glm1, digits = 5) - expect_output(ShowRegTable(glm1, digits = 5), - "0.63994, 1.75622") + expect_output(ShowRegTable(glm1, digits = 5, exp = TRUE), + sprintf("%.5f, %.5f", + exp(ciGlm1[2,1]), + exp(ciGlm1[2,2]))) ## contint.default + cidGlm1 <- confint.default(glm1) ShowRegTable(glm1, ciFun = confint.default, digits = 5) expect_output(ShowRegTable(glm1, ciFun = confint.default, digits = 5), - "0.63975, 1.75230") + sprintf("%.5f, %.5f", + exp(cidGlm1[2,1]), + exp(cidGlm1[2,2]))) }) +### lm test_that("lm works", { + ## Linear + lm1 <- lm(formula = time ~ trt + age + albumin + ascites, + data = pbc) + ## For lm t-distribution based method and naive approximation differ expect_true(!all(confint(lm1) == confint.default(lm1))) ## confint + ciLm1 <- confint(lm1) ShowRegTable(lm1, digits = 5, exp = FALSE) expect_output(ShowRegTable(lm1, digits = 5, exp = FALSE), - "-275.96185, 175.16874") + sprintf("%.5f, %.5f", + ciLm1[2,1], + ciLm1[2,2])) ## contint.default + cidLm1 <- confint.default(lm1) ShowRegTable(lm1, ciFun = confint.default, digits = 5, exp = FALSE) expect_output(ShowRegTable(lm1, ciFun = confint.default, digits = 5, exp = FALSE), - "-275.07261, 174.27950") + sprintf("%.5f, %.5f", + cidLm1[2,1], + cidLm1[2,2])) + +}) + + +### +### Clustered data +################################################################################ + +## Ordinal Data from Koch +data(koch, package = "geepack") + + +### geepack +test_that("geepack works", { + + library(geepack) + + ## Log-linear GEE + geeglm1 <- geeglm(formula = y ~ trt + day, + family = poisson(link = "log"), + id = id, + data = koch, + corstr = "exchangeable", + scale.fix = FALSE) + + ## confint.default does not work for geepack + ciGeeglm1 <- confint(geeglm1) + + ## confint + ShowRegTable(geeglm1, digits = 5, exp = TRUE) + expect_output(ShowRegTable(geeglm1, digits = 5, exp = TRUE), + sprintf("%.5f, %.5f", + exp(ciGeeglm1)[2,1], + exp(ciGeeglm1)[2,2])) + + ## coef + expect_output(ShowRegTable(geeglm1, digits = 5, exp = TRUE), + sprintf("%.5f", exp(coef(geeglm1))[2])) }) +### nlme +test_that("nlme works", { + + library(nlme) + + ## Linear LME + lme1 <- lme(fixed = y ~ trt + day, + data = koch, + random = ~ 1 | id, + method = "REML") + + ## intervals instead of confint [lower, est, upper] format + ShowRegTable(lme1, digits = 5, exp = FALSE) + expect_output(ShowRegTable(lme1, digits = 5, exp = FALSE), + sprintf("%.5f, %.5f", + intervals(lme1)$fixed[2,1], + intervals(lme1)$fixed[2,3])) + + ## coef + expect_output(ShowRegTable(lme1, digits = 5, exp = TRUE), + sprintf("%.5f", exp(intervals(lme1)$fixed[2,2]))) + +}) + + +### lme4 +test_that("lme4 works", { + + library(lme4) + + ## Linear LME + lmer1 <- lmer(formula = y ~ trt + day + (1 | id), + data = koch) + + ciLmer1 <- tail(confint(lmer1), nrow(coef(summary(lmer1)))) + + ## confint + ShowRegTable(lmer1, digits = 5, exp = FALSE) + expect_output(ShowRegTable(lmer1, digits = 5, exp = FALSE), + sprintf("%.5f, %.5f", + ciLmer1[2,1], + ciLmer1[2,2])) + + ## coef + expect_output(ShowRegTable(lmer1, digits = 5, exp = FALSE), + sprintf("%.5f", coef(summary(lmer1))[2,1])) + + + ## GLMM + glmer1 <- glmer(formula = y ~ trt + day + (1 | id), + data = koch, + family = poisson(link = "log")) + ## Last rows correspond to fixed effects + ciGlmer1 <- tail(confint(glmer1), nrow(coef(summary(lmer1)))) + + ## confint + ShowRegTable(glmer1, digits = 5, exp = TRUE) + expect_output(ShowRegTable(glmer1, digits = 5, exp = TRUE), + sprintf("%.5f, %.5f", + exp(ciGlmer1[2,1]), + exp(ciGlmer1[2,2]))) + + ## coef + expect_output(ShowRegTable(glmer1, digits = 5, exp = TRUE), + sprintf("%.5f", exp(coef(summary(glmer1)))[2,1])) + +}) diff --git a/tests/testthat/test-modules-smd.R b/tests/testthat/test-modules-smd.R index 0aea407..5fc6513 100755 --- a/tests/testthat/test-modules-smd.R +++ b/tests/testthat/test-modules-smd.R @@ -429,13 +429,14 @@ test_that("decent results are returned for anomalous/difficult data", { expect_equal(svyStdDiffMulti("race2", group = "race", design = nhanesSvy), rep(NaN, 6)) - ## Only one value - ## NaN due to division by zero variance + ## Identical constant comparison + ## T-C vector is a zero vector, but divisor is also zero + ## NaN due to division by zero variance -> Redefined as zero by(nhanes$onlyOne, nhanes$RIAGENDR, summary) - expect_equal(StdDiff(nhanes$onlyOne, group = nhanes$RIAGENDR), NaN) + expect_equal(StdDiff(nhanes$onlyOne, group = nhanes$RIAGENDR), 0) ## 0 because [0]^- = 0, and [1]^T [0]^-1 [1] = 0; defined NaN in (svy)StdDiffMulti table(nhanes$onlyOne, nhanes$RIAGENDR) - expect_equal(StdDiffMulti(nhanes$onlyOne, group = nhanes$RIAGENDR), NaN) + expect_equal(StdDiffMulti(nhanes$onlyOne, group = nhanes$RIAGENDR), 0) ## When weighted problematic means1 <- svyby(~ onlyOne, by = ~ RIAGENDR, nhanesSvy, FUN = svymean)[,2] vars1 <- svyby(~ onlyOne, by = ~ RIAGENDR, nhanesSvy, FUN = svyvar)[,2] @@ -451,15 +452,15 @@ test_that("decent results are returned for anomalous/difficult data", { ## No error even with a single level variable (constant) as redundant ## level drop from table occurs only when 2+ levels are present. ## If any group has more than 2 levels, then strata-by-level table - ## is correctly created, which is not the case here. Redefined NaN. - expect_equal(svyStdDiffMulti("onlyOne", "RIAGENDR", nhanesSvy), NaN) + ## is correctly created, which is not the case here. Redefined as 0. + expect_equal(svyStdDiffMulti("onlyOne", "RIAGENDR", nhanesSvy), 0) ## Four groups (six contrasts) - ## NaN due to division by zero variance + ## NaN due to division by zero variance -> Redefined as 0 by(nhanes$onlyOne, nhanes$race, summary) - expect_equal(StdDiff(nhanes$onlyOne, group = nhanes$race), rep(NaN, 6)) + expect_equal(StdDiff(nhanes$onlyOne, group = nhanes$race), rep(0, 6)) ## 0 because [0]^- = 0, and [1]^T [0]^-1 [1] = 0; defined NaN in (svy)StdDiffMulti - expect_equal(StdDiffMulti(nhanes$onlyOne, group = nhanes$race), rep(NaN, 6)) + expect_equal(StdDiffMulti(nhanes$onlyOne, group = nhanes$race), rep(0, 6)) ## When weighted problematic; not in this case?? means2 <- svyby(~ onlyOne, by = ~ race, nhanesSvy, FUN = svymean)[,2] vars2 <- svyby(~ onlyOne, by = ~ race, nhanesSvy, FUN = svyvar)[,2] @@ -469,20 +470,10 @@ test_that("decent results are returned for anomalous/difficult data", { (means2[2] - means2[3]) / sqrt((vars2[2] + vars2[3]) / 2), (means2[2] - means2[4]) / sqrt((vars2[2] + vars2[4]) / 2), (means2[3] - means2[4]) / sqrt((vars2[3] + vars2[4]) / 2)) - ## on sparc-sun-solaris sign was opposite; abs() solves this issue - ## as svyStdDiff() uses abs() internally - expect_equal(svyStdDiff("onlyOne", "race", nhanesSvy), abs(meanDiffs2)) - ## This one is rep(NaN,6) for most platforms except for sparc-sun-solaris - ## where ./configure --disable-long-double is used. - ## capabilities()["long.double"] was added in R 3.1.3. - ## As of 2015-08-11, only r-oldrel-windows-ix86+x86_64 is R 3.1.3. - ## https://cran.r-project.org/web/checks/check_results_tableone.html - if (capabilities()["long.double"]) { - ## Cannot run on sparc-sun-solaris due to lack of extended precision arithmetic - expect_equal(svyStdDiff("onlyOne", "race", nhanesSvy), rep(NaN, 6)) - } + ## Redefined as 0 as T-C is a zero vector + expect_equal(svyStdDiff("onlyOne", "race", nhanesSvy), rep(0,6)) ## 0 because [0]^- = 0, and [1]^T [0]^-1 [1] = 0; defined NaN in (svy)StdDiffMulti - expect_equal(svyStdDiffMulti("onlyOne", "race", nhanesSvy), rep(NaN, 6)) + expect_equal(svyStdDiffMulti("onlyOne", "race", nhanesSvy), rep(0, 6)) ## onlyNa @@ -490,8 +481,8 @@ test_that("decent results are returned for anomalous/difficult data", { expect_warning(expect_equal(StdDiff(nhanes$onlyNa, group = nhanes$RIAGENDR), as.numeric(NA)), "Variable has only NA's in at least one stratum. na.rm turned off.") - ## 0 only one level - expect_warning(expect_equal(StdDiffMulti(nhanes$onlyNa, group = nhanes$RIAGENDR), NaN), + ## defined 0 as NA is a level + expect_warning(expect_equal(StdDiffMulti(nhanes$onlyNa, group = nhanes$RIAGENDR), 0), "Variable has only NA's in all strata. Regarding NA as a level") ## When weighted problematic means1 <- svyby(~ onlyNa, by = ~ RIAGENDR, nhanesSvy, FUN = svymean)[,2] @@ -503,7 +494,7 @@ test_that("decent results are returned for anomalous/difficult data", { as.numeric(NA)), "onlyNa has only NA's in at least one stratum. na.rm turned off.") ## 0 because [0]^- = 0, and [1]^T [0]^-1 [1] = 0; defined NaN in (svy)StdDiffMulti - expect_warning(expect_equal(svyStdDiffMulti("onlyNa", "RIAGENDR", nhanesSvy), NaN), + expect_warning(expect_equal(svyStdDiffMulti("onlyNa", "RIAGENDR", nhanesSvy), 0), "onlyNa has only NA's in all strata. Regarding NA as a level.") ## Four groups (six contrasts) @@ -511,7 +502,7 @@ test_that("decent results are returned for anomalous/difficult data", { expect_warning(expect_equal(StdDiff(nhanes$onlyNa, group = nhanes$race), rep(NaN, 6)), "Variable has only NA's in at least one stratum. na.rm turned off.") ## 0 because [0]^- = 0, and [1]^T [0]^-1 [1] = 0; defined NaN in (svy)StdDiffMulti - expect_warning(expect_equal(StdDiffMulti(nhanes$onlyNa, group = nhanes$race), rep(NaN, 6)), + expect_warning(expect_equal(StdDiffMulti(nhanes$onlyNa, group = nhanes$race), rep(0, 6)), "Variable has only NA's in all strata. Regarding NA as a level.") ## When weighted problematic; not in this case?? means2 <- svyby(~ onlyNa, by = ~ race, nhanesSvy, FUN = svymean)[,2] @@ -526,8 +517,8 @@ test_that("decent results are returned for anomalous/difficult data", { "onlyNa has only NA's in at least one stratum. na.rm turned off.") expect_warning(expect_equal(svyStdDiff("onlyNa", "race", nhanesSvy), rep(NaN, 6)), "onlyNa has only NA's in at least one stratum. na.rm turned off.") - ## 0 because [0]^- = 0, and [1]^T [0]^-1 [1] = 0; defined NaN in (svy)StdDiffMulti - expect_warning(expect_equal(svyStdDiffMulti("onlyNa", "race", nhanesSvy), rep(NaN, 6)), + ## 0 because [0]^- = 0, and [1]^T [0]^-1 [1] = 0; defined 0 as NA is a level + expect_warning(expect_equal(svyStdDiffMulti("onlyNa", "race", nhanesSvy), rep(0, 6)), "onlyNa has only NA's in all strata. Regarding NA as a level.") }) @@ -1084,7 +1075,8 @@ test_that("SMDs are correctly shown in print()", { StdDiffMulti(nhanes$agecat, nhanes$strataVar)) out1 <- print(tab1, smd = TRUE) - expect_equal(as.vector(out1[,"SMD"][2:3]), + ## With default test = TRUE, missing = FALSE, last column should be SMD. + expect_equal(as.vector(out1[,ncol(out1)][2:3]), c(sprintf(" %.3f", attr(tab1$ContTable, "smd")[1,1]), sprintf(" %.3f", attr(tab1$CatTable, "smd")[1,1]))) diff --git a/tests/testthat/test-svyCreateTableOne.R b/tests/testthat/test-svyCreateTableOne.R index cde85c0..bf2e210 100644 --- a/tests/testthat/test-svyCreateTableOne.R +++ b/tests/testthat/test-svyCreateTableOne.R @@ -5,21 +5,24 @@ ## Author: Kazuki Yoshida ################################################################################ +### ### Structure ## expectations within tests within context +### ### Prepare environment ################################################################################ library(testthat) library(survey) +### ### Context (1 for each file) ################################################################################ context("Unit tests for svy* user functions") - +### ### Provide data ################################################################################ @@ -50,6 +53,7 @@ vars <- c("E", "C", "Y", "C1", "C2") factorVars <- c("Y","C1") +### ### Tests ################################################################################ ## A test should group together expectations for one functionality. @@ -118,6 +122,42 @@ test_that("svyTableOne objects are always returned", { }) +test_that("Missing percentages are correctly stored and printed", { + + ## Extract from dataset + percentMissing <- unlist(lapply(datMw[vars], function(x) {sum(is.na(x)) / length(x) * 100})) + ## Sanity check for the standard. + expect_equal(length(percentMissing), length(vars)) + + ## Unstratified table + expect_equal(mwOverall$MetaData$percentMissing, percentMissing) + ## Including NA as a category should not matter. + expect_equal(mwInclNa$MetaData$percentMissing, percentMissing) + ## Stratification should not matter + expect_equal(mwByE$MetaData$percentMissing, percentMissing) + expect_equal(mwByEC1$MetaData$percentMissing, percentMissing) + + ## Check printing + ## Gold standard + percentMissingString <- format(sprintf("%.1f", percentMissing), justify = "right") + ## Function to drop empty "" elements. + DropEmptyString <- function(x) { + ## as.character() drops names. + as.character(Filter(f = function(elt) {!(elt == "")}, x = x)) + } + ## Check against gold standard + expect_equal(DropEmptyString(print(mwOverall, missing = TRUE)[,"Missing"]), + percentMissingString) + expect_equal(DropEmptyString(print(mwInclNa, missing = TRUE)[,"Missing"]), + percentMissingString) + expect_equal(DropEmptyString(print(mwByE, missing = TRUE)[,"Missing"]), + percentMissingString) + expect_equal(DropEmptyString(print(mwByEC1, missing = TRUE)[,"Missing"]), + percentMissingString) + +}) + + test_that("printing of a svyTableOne object does not regress", { ## Expectations @@ -444,3 +484,40 @@ test_that("summary method works without errors", { "Standardize mean differences") }) + + +test_that("svyrep.design is allowed", { + +### Replication weight data + ## http://www.ats.ucla.edu/stat/stata/library/replicate_weights.htm + ## http://r-survey.r-forge.r-project.org/survey/html/svrepdesign.html + + ## Survival in cardiac arrest (in survey) + data(scd) + scd + + ## use BRR replicate weights from Levy and Lemeshow + repweights <- 2 * cbind(c(1,0,1,0,1,0), + c(1,0,0,1,0,1), + c(0,1,1,0,0,1), + c(0,1,0,1,1,0)) + scdrep <- svrepdesign(data = scd, type = "BRR", repweights = repweights, combined.weights = FALSE) + + ## Standard construction + ans_means <- svyby(formula = ~ alive, by = ~ ESA, design = scdrep, FUN = svymean)[,2] + ans_sds <- sqrt(svyby(formula = ~ alive, by = ~ ESA, design = scdrep, FUN = svyvar)[,2]) + ans_props <- svyby(formula = ~ I(ambulance - 1), by = ~ ESA, design = scdrep, FUN = svymean)[,2] + + ## Table construction + tab1 <- svyCreateTableOne(vars = c("alive", "ambulance"), strata = c("ESA"), + factorVars = "ambulance", data = scdrep) + tab1_print <- print(tab1, format = "p") + + ## Expectations + expect_equal(as.character(tab1_print[2, 1:3]), + sprintf("%.2f (%.2f)", ans_means, ans_sds)) + + expect_equal(as.character(gsub(" ", "", tab1_print[3, 1:3])), + sprintf("%.1f", ans_props * 100)) + +})