From 0ca6d13ef9fae8a9f4817a7e0c15b31b1334a2ee Mon Sep 17 00:00:00 2001 From: Anthony Sena Date: Wed, 5 Jun 2024 18:17:36 -0400 Subject: [PATCH] Expose col_types from readr --- R/CsvHelper.R | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/R/CsvHelper.R b/R/CsvHelper.R index 25d3d10..ab730f2 100644 --- a/R/CsvHelper.R +++ b/R/CsvHelper.R @@ -27,13 +27,43 @@ #' @param file The .csv file to read. #' @param warnOnCaseMismatch When TRUE, raise a warning if column headings #' in the .csv are not in snake_case format -#' +#' +#' @param colTypes Corresponds to the `col_types` in the `readr::read_csv` function. +#' One of `NULL`, a [readr::cols()] specification, or +#' a string. See `vignette("readr")` for more details. +#' +#' If `NULL`, all column types will be inferred from `guess_max` rows of the +#' input, interspersed throughout the file. This is convenient (and fast), +#' but not robust. If the guessed types are wrong, you'll need to increase +#' `guess_max` or supply the correct types yourself. +#' +#' Column specifications created by [list()] or [cols()] must contain +#' one column specification for each column. +#' +#' Alternatively, you can use a compact string representation where each +#' character represents one column: +#' - c = character +#' - i = integer +#' - n = number +#' - d = double +#' - l = logical +#' - f = factor +#' - D = date +#' - T = date time +#' - t = time +#' - ? = guess +#' - _ or - = skip +#' +#' By default, reading a file without a column specification will print a +#' message showing what `readr` guessed they were. To remove this message, +#' set `show_col_types = FALSE` or set `options(readr.show_col_types = FALSE)`. +#' #' @return #' A tibble with the .csv contents #' #' @export -readCsv <- function(file, warnOnCaseMismatch = TRUE) { - fileContents <- .readCsv(file = file) +readCsv <- function(file, warnOnCaseMismatch = TRUE, colTypes = readr::cols()) { + fileContents <- .readCsv(file = file, colTypes = colTypes) columnNames <- colnames(fileContents) columnNamesInSnakeCaseFormat <- isSnakeCase(columnNames) if (!all(columnNamesInSnakeCaseFormat) && warnOnCaseMismatch) { @@ -58,10 +88,10 @@ readCsv <- function(file, warnOnCaseMismatch = TRUE) { #' #' @noRd #' @keywords internal -.readCsv <- function(file) { +.readCsv <- function(file, colTypes = readr::cols()) { invisible(readr::read_csv( file = file, - col_types = readr::cols(), + col_types = colTypes, lazy = FALSE )) }