From 0133207d35a557f9660af6634b74aa00d6b598f8 Mon Sep 17 00:00:00 2001 From: chainsawriot Date: Fri, 15 Mar 2024 15:45:47 +0100 Subject: [PATCH] Reduce verbosity and remove colorization ref #4 Also explicitly remove `problems` ref #7 --- DESCRIPTION | 3 -- R/parser.R | 88 +++++++++++++++----------------------- R/type_convert.R | 6 ++- README.Rmd | 41 +++++++++++++++++- README.md | 98 ++++++++++++++++++++++++++++++++++++++----- man/parse_atomic.Rd | 34 +++++++++++++-- man/parse_datetime.Rd | 11 +++-- man/parse_factor.Rd | 5 ++- man/parse_guess.Rd | 5 ++- man/parse_number.Rd | 10 ++++- man/parse_vector.Rd | 5 ++- man/type_convert.Rd | 5 ++- 12 files changed, 229 insertions(+), 82 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 176a302..83c8890 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,7 +24,6 @@ RoxygenNote: 7.3.1 Imports: cli, rlang, - crayon, tzdb Suggests: knitr, @@ -32,5 +31,3 @@ Suggests: testthat, withr, hms - - diff --git a/R/parser.R b/R/parser.R index 7aa29f7..19ea4e1 100644 --- a/R/parser.R +++ b/R/parser.R @@ -31,20 +31,26 @@ collector_find <- function(name) { #' @family parsers #' @param x Character vector of elements to parse. #' @param collector Column specification. +#' @param .return_problems Whether to hide the `problems` tibble from the output #' @keywords internal #' @export #' @examples #' x <- c("1", "2", "3", "NA") #' parse_vector(x, col_integer()) #' parse_vector(x, col_double()) -parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { +parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { stopifnot(is.character(x)) if (is.character(collector)) { collector <- collector_find(collector) } ## warn_problems(parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws)) - parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws) + res <- parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws) + if (.return_problems || is.null(attr(res, "problems"))) { + return(res) + } + attr(res, "problems") <- NULL + return(res) } #' Parse logicals, integers, and reals @@ -65,6 +71,7 @@ parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale #' names. #' @param trim_ws Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from #' each field before parsing it? +#' @inheritParams parse_vector #' @family parsers #' @examples #' parse_integer(c("1", "2", "3")) @@ -84,26 +91,26 @@ NULL #' @rdname parse_atomic #' @export -parse_logical <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { - parse_vector(x, col_logical(), na = na, locale = locale, trim_ws = trim_ws) +parse_logical <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_logical(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems) } #' @rdname parse_atomic #' @export -parse_integer <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { - parse_vector(x, col_integer(), na = na, locale = locale, trim_ws = trim_ws) +parse_integer <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_integer(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems) } #' @rdname parse_atomic #' @export -parse_double <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { - parse_vector(x, col_double(), na = na, locale = locale, trim_ws = trim_ws) +parse_double <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_double(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems) } #' @rdname parse_atomic #' @export -parse_character <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { - parse_vector(x, col_character(), na = na, locale = locale, trim_ws = trim_ws) +parse_character <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_character(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems) } #' @rdname parse_atomic @@ -166,8 +173,8 @@ col_skip <- function() { #' ## Specifying strings for NAs #' parse_number(c("1", "2", "3", "NA")) #' parse_number(c("1", "2", "3", "NA", "Nothing"), na = c("NA", "Nothing")) -parse_number <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { - parse_vector(x, col_number(), na = na, locale = locale, trim_ws = trim_ws) +parse_number <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_number(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems) } #' @rdname parse_number @@ -203,8 +210,9 @@ col_number <- function() { #' # ISO 8601 date times #' guess_parser(c("2010-10-10")) #' parse_guess(c("2010-10-10")) -parse_guess <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, guess_integer = FALSE) { - parse_vector(x, guess_parser(x, locale, guess_integer = guess_integer, na = na), na = na, locale = locale, trim_ws = trim_ws) +parse_guess <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, guess_integer = FALSE, .return_problems = FALSE) { + parse_vector(x, guess_parser(x, locale, guess_integer = guess_integer, na = na), na = na, locale = locale, trim_ws = trim_ws, + .return_problems = .return_problems) } #' @rdname parse_guess @@ -262,8 +270,9 @@ guess_parser <- function(x, locale = default_locale(), guess_integer = FALSE, na #' # and reports problems #' parse_factor(x, levels = animals) parse_factor <- function(x, levels = NULL, ordered = FALSE, na = c("", "NA"), - locale = default_locale(), include_na = TRUE, trim_ws = TRUE) { - parse_vector(x, col_factor(levels, ordered, include_na), na = na, locale = locale, trim_ws = trim_ws) + locale = default_locale(), include_na = TRUE, trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_factor(levels, ordered, include_na), na = na, locale = locale, trim_ws = trim_ws, + .return_problems = .return_problems) } #' @rdname parse_factor @@ -401,20 +410,20 @@ col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE) { #' parse_datetime("1979-10-14T1010Z", locale = us_central) #' # Your current time zone #' parse_datetime("1979-10-14T1010", locale = locale(tz = "")) -parse_datetime <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { - parse_vector(x, col_datetime(format), na = na, locale = locale, trim_ws = trim_ws) +parse_datetime <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_datetime(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems) } #' @rdname parse_datetime #' @export -parse_date <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { - parse_vector(x, col_date(format), na = na, locale = locale, trim_ws = trim_ws) +parse_date <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_date(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems) } #' @rdname parse_datetime #' @export -parse_time <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) { - parse_vector(x, col_time(format), na = na, locale = locale, trim_ws = trim_ws) +parse_time <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) { + parse_vector(x, col_time(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems) } #' @rdname parse_datetime @@ -877,8 +886,8 @@ as.character.col_spec <- function(x, ...) { } #' @export -print.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_color(), ...) { - cat(format.col_spec(x, n = n, condense = condense, colour = colour, ...)) +print.col_spec <- function(x, n = Inf, condense = NULL, ...) { + cat(format.col_spec(x, n = n, condense = condense, ...)) invisible(x) } @@ -894,7 +903,7 @@ cols_condense <- function(x) { } #' @export -format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_color(), ...) { +format.col_spec <- function(x, n = Inf, condense = NULL, ...) { if (n == 0) { return("") } @@ -929,7 +938,6 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co args <- paste(names(args), args, sep = " = ", collapse = ", ") col_funs <- paste0(col_funs, "(", args, ")") - col_funs <- colourise_cols(col_funs, colour) col_names <- names(cols)[[i]] %||% "" @@ -961,34 +969,6 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co out } -colourise_cols <- function(cols, colourise = crayon::has_color()) { - if (!isTRUE(colourise)) { - return(cols) - } - - fname <- sub("[(].*", "", cols) - for (i in seq_along(cols)) { - cols[[i]] <- switch(fname, - col_skip = , - col_guess = cols[[i]], - - col_character = , - col_factor = crayon::red(cols[[i]]), - - col_logical = crayon::yellow(cols[[i]]), - - col_double = , - col_integer = , - col_number = crayon::green(cols[[i]]), - - col_date = , - col_datetime = , - col_time = crayon::blue(cols[[i]]) - ) - } - cols -} - # Used in read_delim(), read_fwf() and type_convert() show_cols_spec <- function(spec, n = getOption("readr.num_columns", 20)) { if (n > 0) { diff --git a/R/type_convert.R b/R/type_convert.R index c108296..469e977 100644 --- a/R/type_convert.R +++ b/R/type_convert.R @@ -10,6 +10,7 @@ #' a string. See `vignette("readr")` for more details. #' #' If `NULL`, column types will be imputed using all rows. +#' @param verbose whether to print messages #' @inheritParams guess_parser #' @note `type_convert()` removes a 'spec' attribute, #' because it likely modifies the column data types. @@ -27,7 +28,8 @@ #' df <- data.frame(x = c("NA", "10"), stringsAsFactors = FALSE) #' str(type_convert(df)) type_convert <- function(df, col_types = NULL, na = c("", "NA"), trim_ws = TRUE, - locale = default_locale(), guess_integer = FALSE) { + locale = default_locale(), guess_integer = FALSE, + verbose = FALSE) { stopifnot(is.data.frame(df)) is_character <- vapply(df, is.character, logical(1)) @@ -54,7 +56,7 @@ type_convert <- function(df, col_types = NULL, na = c("", "NA"), trim_ws = TRUE, ) ## if (is.null(col_types) && !is_testing()) { - if (is.null(col_types)) { + if (is.null(col_types) && verbose) { show_cols_spec(specs) } diff --git a/README.Rmd b/README.Rmd index ba5c6af..66b1ba8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( `readr`'s 1e type inferencing and parsing tools are used by various R packages, e.g. `readODS` and `surveytoolbox`, but ironically those packages do not use the main functions (e.g. `readr::read_delim()`) of `readr`. As explained in the README of `readr`, those 1e code will be eventually removed from `readr`. -`minty` aims at providing a set of minimal, long-term, and compatible type inferencing and parsing tools for those packages. +`minty` aims at providing a set of minimal, long-term, and compatible type inferencing and parsing tools for those packages. If you need to parse interactively, please use either `readr` or `vroom`. ## Installation @@ -107,3 +107,42 @@ res ```{r} str(res) ``` + +## Differences: `readr` vs `minty` + +Unlike `readr` and `vroom`, please note that `minty` is mainly for **non-interactive usage**. Therefore, `minty` emits fewer messages and warnings than `readr` and `vroom`. + +```{r} +data <- minty::type_convert(text_only) +data +``` + +```{r} +data <- readr::type_convert(text_only) +data +``` + +`verbose` option is added if you like those messages, default to `FALSE`. + +```{r} +data <- minty::type_convert(text_only, verbose = TRUE) +``` + +At the moment, `minty` does not use [the `problems` mechanism](https://vroom.r-lib.org/reference/problems.html) by default. + +```{r} +minty::parse_logical(c("true", "fake", "IDK"), na = "IDK") +``` + +```{r} +readr::parse_logical(c("true", "fake", "IDK"), na = "IDK") +``` + +## Similar packages + +For parsing ambiguous date(time) + +* [timeless](https://github.com/schochastics/timeless) +* [anytime](https://github.com/eddelbuettel/anytime) + + diff --git a/README.md b/README.md index 94b4cd1..841ea9a 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,8 @@ packages do not use the main functions (e.g. `readr::read_delim()`) of eventually removed from `readr`. `minty` aims at providing a set of minimal, long-term, and compatible -type inferencing and parsing tools for those packages. +type inferencing and parsing tools for those packages. If you need to +parse interactively, please use either `readr` or `vroom`. ## Installation @@ -70,15 +71,6 @@ Inferencing the column types ``` r library(minty, warn.conflicts = FALSE) data <- type_convert(text_only) -#> -#> ── Column specification ──────────────────────────────────────────────────────── -#> cols( -#> maybe_age = col_character(), -#> maybe_male = col_logical(), -#> maybe_name = col_character(), -#> some_na = col_character(), -#> dob = col_date(format = "") -#> ) data #> maybe_age maybe_male maybe_name some_na dob #> 1 17 TRUE AA 2019-07-21 @@ -148,3 +140,89 @@ res str(res) #> Date[1:4], format: "2019-07-21" "2019-08-31" "2019-10-01" NA ``` + +## Differences: `readr` vs `minty` + +Unlike `readr` and `vroom`, please note that `minty` is mainly for +**non-interactive usage**. Therefore, `minty` emits fewer messages and +warnings than `readr` and `vroom`. + +``` r +data <- minty::type_convert(text_only) +data +#> maybe_age maybe_male maybe_name some_na dob +#> 1 17 TRUE AA 2019-07-21 +#> 2 18 FALSE BB Not good 2019-08-31 +#> 3 019 TRUE CC Bad 2019-10-01 +``` + +``` r +data <- readr::type_convert(text_only) +#> Registered S3 methods overwritten by 'readr': +#> method from +#> as.character.col_spec minty +#> format.col_spec minty +#> print.col_spec minty +#> print.collector minty +#> print.date_names minty +#> print.locale minty +#> str.col_spec minty +#> +#> ── Column specification ──────────────────────────────────────────────────────── +#> cols( +#> maybe_age = col_character(), +#> maybe_male = col_logical(), +#> maybe_name = col_character(), +#> some_na = col_character(), +#> dob = col_date(format = "") +#> ) +data +#> maybe_age maybe_male maybe_name some_na dob +#> 1 17 TRUE AA 2019-07-21 +#> 2 18 FALSE BB Not good 2019-08-31 +#> 3 019 TRUE CC Bad 2019-10-01 +``` + +`verbose` option is added if you like those messages, default to +`FALSE`. + +``` r +data <- minty::type_convert(text_only, verbose = TRUE) +#> +#> ── Column specification ──────────────────────────────────────────────────────── +#> cols( +#> maybe_age = col_character(), +#> maybe_male = col_logical(), +#> maybe_name = col_character(), +#> some_na = col_character(), +#> dob = col_date(format = "") +#> ) +``` + +At the moment, `minty` does not use [the `problems` +mechanism](https://vroom.r-lib.org/reference/problems.html) by default. + +``` r +minty::parse_logical(c("true", "fake", "IDK"), na = "IDK") +#> [1] TRUE NA NA +``` + +``` r +readr::parse_logical(c("true", "fake", "IDK"), na = "IDK") +#> Warning: 1 parsing failure. +#> row col expected actual +#> 2 -- 1/0/T/F/TRUE/FALSE fake +#> [1] TRUE NA NA +#> attr(,"problems") +#> # A tibble: 1 × 4 +#> row col expected actual +#> +#> 1 2 NA 1/0/T/F/TRUE/FALSE fake +``` + +## Similar packages + +For parsing ambiguous date(time) + + - [timeless](https://github.com/schochastics/timeless) + - [anytime](https://github.com/eddelbuettel/anytime) diff --git a/man/parse_atomic.Rd b/man/parse_atomic.Rd index 9ca1e9b..aff37eb 100644 --- a/man/parse_atomic.Rd +++ b/man/parse_atomic.Rd @@ -11,13 +11,37 @@ \alias{col_character} \title{Parse logicals, integers, and reals} \usage{ -parse_logical(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) +parse_logical( + x, + na = c("", "NA"), + locale = default_locale(), + trim_ws = TRUE, + .return_problems = FALSE +) -parse_integer(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) +parse_integer( + x, + na = c("", "NA"), + locale = default_locale(), + trim_ws = TRUE, + .return_problems = FALSE +) -parse_double(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) +parse_double( + x, + na = c("", "NA"), + locale = default_locale(), + trim_ws = TRUE, + .return_problems = FALSE +) -parse_character(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) +parse_character( + x, + na = c("", "NA"), + locale = default_locale(), + trim_ws = TRUE, + .return_problems = FALSE +) col_logical() @@ -41,6 +65,8 @@ names.} \item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from each field before parsing it?} + +\item{.return_problems}{Whether to hide the \code{problems} tibble from the output} } \description{ Use \verb{parse_*()} if you have a character vector you want to parse. Use diff --git a/man/parse_datetime.Rd b/man/parse_datetime.Rd index 6785319..c08db4a 100644 --- a/man/parse_datetime.Rd +++ b/man/parse_datetime.Rd @@ -14,7 +14,8 @@ parse_datetime( format = "", na = c("", "NA"), locale = default_locale(), - trim_ws = TRUE + trim_ws = TRUE, + .return_problems = FALSE ) parse_date( @@ -22,7 +23,8 @@ parse_date( format = "", na = c("", "NA"), locale = default_locale(), - trim_ws = TRUE + trim_ws = TRUE, + .return_problems = FALSE ) parse_time( @@ -30,7 +32,8 @@ parse_time( format = "", na = c("", "NA"), locale = default_locale(), - trim_ws = TRUE + trim_ws = TRUE, + .return_problems = FALSE ) col_datetime(format = "") @@ -60,6 +63,8 @@ names.} \item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from each field before parsing it?} + +\item{.return_problems}{Whether to hide the \code{problems} tibble from the output} } \value{ A \code{\link[=POSIXct]{POSIXct()}} vector with \code{tzone} attribute set to diff --git a/man/parse_factor.Rd b/man/parse_factor.Rd index d775a07..91b2d62 100644 --- a/man/parse_factor.Rd +++ b/man/parse_factor.Rd @@ -12,7 +12,8 @@ parse_factor( na = c("", "NA"), locale = default_locale(), include_na = TRUE, - trim_ws = TRUE + trim_ws = TRUE, + .return_problems = FALSE ) col_factor(levels = NULL, ordered = FALSE, include_na = FALSE) @@ -40,6 +41,8 @@ is included in the levels of the constructed factor.} \item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from each field before parsing it?} + +\item{.return_problems}{Whether to hide the \code{problems} tibble from the output} } \description{ \code{parse_factor()} is similar to \code{\link[=factor]{factor()}}, but generates a warning if diff --git a/man/parse_guess.Rd b/man/parse_guess.Rd index a1d28f6..58cac9a 100644 --- a/man/parse_guess.Rd +++ b/man/parse_guess.Rd @@ -11,7 +11,8 @@ parse_guess( na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, - guess_integer = FALSE + guess_integer = FALSE, + .return_problems = FALSE ) col_guess() @@ -47,6 +48,8 @@ each field before parsing it?} \item{guess_integer}{If \code{TRUE}, guess integer types for whole numbers, if \code{FALSE} guess numeric type for all numbers.} + +\item{.return_problems}{Whether to hide the \code{problems} tibble from the output} } \description{ \code{parse_guess()} returns the parser vector; \code{guess_parser()} diff --git a/man/parse_number.Rd b/man/parse_number.Rd index 79ba907..c5fbd3f 100644 --- a/man/parse_number.Rd +++ b/man/parse_number.Rd @@ -5,7 +5,13 @@ \alias{col_number} \title{Parse numbers, flexibly} \usage{ -parse_number(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) +parse_number( + x, + na = c("", "NA"), + locale = default_locale(), + trim_ws = TRUE, + .return_problems = FALSE +) col_number() } @@ -23,6 +29,8 @@ names.} \item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from each field before parsing it?} + +\item{.return_problems}{Whether to hide the \code{problems} tibble from the output} } \value{ A numeric vector (double) of parsed numbers. diff --git a/man/parse_vector.Rd b/man/parse_vector.Rd index 26c04a5..623e1ed 100644 --- a/man/parse_vector.Rd +++ b/man/parse_vector.Rd @@ -9,13 +9,16 @@ parse_vector( collector, na = c("", "NA"), locale = default_locale(), - trim_ws = TRUE + trim_ws = TRUE, + .return_problems = FALSE ) } \arguments{ \item{x}{Character vector of elements to parse.} \item{collector}{Column specification.} + +\item{.return_problems}{Whether to hide the \code{problems} tibble from the output} } \description{ Parse a character vector. diff --git a/man/type_convert.Rd b/man/type_convert.Rd index 8b51825..96d167b 100644 --- a/man/type_convert.Rd +++ b/man/type_convert.Rd @@ -10,7 +10,8 @@ type_convert( na = c("", "NA"), trim_ws = TRUE, locale = default_locale(), - guess_integer = FALSE + guess_integer = FALSE, + verbose = FALSE ) } \arguments{ @@ -35,6 +36,8 @@ names.} \item{guess_integer}{If \code{TRUE}, guess integer types for whole numbers, if \code{FALSE} guess numeric type for all numbers.} + +\item{verbose}{whether to print messages} } \description{ This is useful if you need to do some manual munging - you can read the