Skip to content

Commit

Permalink
Reduce verbosity and remove colorization ref #4
Browse files Browse the repository at this point in the history
Also explicitly remove `problems` ref #7
  • Loading branch information
chainsawriot committed Mar 15, 2024
1 parent 2b5347b commit 0133207
Show file tree
Hide file tree
Showing 12 changed files with 229 additions and 82 deletions.
3 changes: 0 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@ RoxygenNote: 7.3.1
Imports:
cli,
rlang,
crayon,
tzdb
Suggests:
knitr,
stringi,
testthat,
withr,
hms


88 changes: 34 additions & 54 deletions R/parser.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,26 @@ collector_find <- function(name) {
#' @family parsers
#' @param x Character vector of elements to parse.
#' @param collector Column specification.
#' @param .return_problems Whether to hide the `problems` tibble from the output
#' @keywords internal
#' @export
#' @examples
#' x <- c("1", "2", "3", "NA")
#' parse_vector(x, col_integer())
#' parse_vector(x, col_double())
parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
stopifnot(is.character(x))
if (is.character(collector)) {
collector <- collector_find(collector)
}

## warn_problems(parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws))
parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws)
res <- parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws)
if (.return_problems || is.null(attr(res, "problems"))) {
return(res)
}
attr(res, "problems") <- NULL
return(res)
}

#' Parse logicals, integers, and reals
Expand All @@ -65,6 +71,7 @@ parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale
#' names.
#' @param trim_ws Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
#' each field before parsing it?
#' @inheritParams parse_vector
#' @family parsers
#' @examples
#' parse_integer(c("1", "2", "3"))
Expand All @@ -84,26 +91,26 @@ NULL

#' @rdname parse_atomic
#' @export
parse_logical <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_logical(), na = na, locale = locale, trim_ws = trim_ws)
parse_logical <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_logical(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_atomic
#' @export
parse_integer <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_integer(), na = na, locale = locale, trim_ws = trim_ws)
parse_integer <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_integer(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_atomic
#' @export
parse_double <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_double(), na = na, locale = locale, trim_ws = trim_ws)
parse_double <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_double(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_atomic
#' @export
parse_character <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_character(), na = na, locale = locale, trim_ws = trim_ws)
parse_character <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_character(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_atomic
Expand Down Expand Up @@ -166,8 +173,8 @@ col_skip <- function() {
#' ## Specifying strings for NAs
#' parse_number(c("1", "2", "3", "NA"))
#' parse_number(c("1", "2", "3", "NA", "Nothing"), na = c("NA", "Nothing"))
parse_number <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_number(), na = na, locale = locale, trim_ws = trim_ws)
parse_number <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_number(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_number
Expand Down Expand Up @@ -203,8 +210,9 @@ col_number <- function() {
#' # ISO 8601 date times
#' guess_parser(c("2010-10-10"))
#' parse_guess(c("2010-10-10"))
parse_guess <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, guess_integer = FALSE) {
parse_vector(x, guess_parser(x, locale, guess_integer = guess_integer, na = na), na = na, locale = locale, trim_ws = trim_ws)
parse_guess <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, guess_integer = FALSE, .return_problems = FALSE) {
parse_vector(x, guess_parser(x, locale, guess_integer = guess_integer, na = na), na = na, locale = locale, trim_ws = trim_ws,
.return_problems = .return_problems)
}

#' @rdname parse_guess
Expand Down Expand Up @@ -262,8 +270,9 @@ guess_parser <- function(x, locale = default_locale(), guess_integer = FALSE, na
#' # and reports problems
#' parse_factor(x, levels = animals)
parse_factor <- function(x, levels = NULL, ordered = FALSE, na = c("", "NA"),
locale = default_locale(), include_na = TRUE, trim_ws = TRUE) {
parse_vector(x, col_factor(levels, ordered, include_na), na = na, locale = locale, trim_ws = trim_ws)
locale = default_locale(), include_na = TRUE, trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_factor(levels, ordered, include_na), na = na, locale = locale, trim_ws = trim_ws,
.return_problems = .return_problems)
}

#' @rdname parse_factor
Expand Down Expand Up @@ -401,20 +410,20 @@ col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE) {
#' parse_datetime("1979-10-14T1010Z", locale = us_central)
#' # Your current time zone
#' parse_datetime("1979-10-14T1010", locale = locale(tz = ""))
parse_datetime <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_datetime(format), na = na, locale = locale, trim_ws = trim_ws)
parse_datetime <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_datetime(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_datetime
#' @export
parse_date <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_date(format), na = na, locale = locale, trim_ws = trim_ws)
parse_date <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_date(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_datetime
#' @export
parse_time <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_time(format), na = na, locale = locale, trim_ws = trim_ws)
parse_time <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_time(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_datetime
Expand Down Expand Up @@ -877,8 +886,8 @@ as.character.col_spec <- function(x, ...) {
}

#' @export
print.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_color(), ...) {
cat(format.col_spec(x, n = n, condense = condense, colour = colour, ...))
print.col_spec <- function(x, n = Inf, condense = NULL, ...) {
cat(format.col_spec(x, n = n, condense = condense, ...))

invisible(x)
}
Expand All @@ -894,7 +903,7 @@ cols_condense <- function(x) {
}

#' @export
format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_color(), ...) {
format.col_spec <- function(x, n = Inf, condense = NULL, ...) {
if (n == 0) {
return("")
}
Expand Down Expand Up @@ -929,7 +938,6 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co
args <- paste(names(args), args, sep = " = ", collapse = ", ")

col_funs <- paste0(col_funs, "(", args, ")")
col_funs <- colourise_cols(col_funs, colour)

col_names <- names(cols)[[i]] %||% ""

Expand Down Expand Up @@ -961,34 +969,6 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co
out
}

colourise_cols <- function(cols, colourise = crayon::has_color()) {
if (!isTRUE(colourise)) {
return(cols)
}

fname <- sub("[(].*", "", cols)
for (i in seq_along(cols)) {
cols[[i]] <- switch(fname,
col_skip = ,
col_guess = cols[[i]],

col_character = ,
col_factor = crayon::red(cols[[i]]),

col_logical = crayon::yellow(cols[[i]]),

col_double = ,
col_integer = ,
col_number = crayon::green(cols[[i]]),

col_date = ,
col_datetime = ,
col_time = crayon::blue(cols[[i]])
)
}
cols
}

# Used in read_delim(), read_fwf() and type_convert()
show_cols_spec <- function(spec, n = getOption("readr.num_columns", 20)) {
if (n > 0) {
Expand Down
6 changes: 4 additions & 2 deletions R/type_convert.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' a string. See `vignette("readr")` for more details.
#'
#' If `NULL`, column types will be imputed using all rows.
#' @param verbose whether to print messages
#' @inheritParams guess_parser
#' @note `type_convert()` removes a 'spec' attribute,
#' because it likely modifies the column data types.
Expand All @@ -27,7 +28,8 @@
#' df <- data.frame(x = c("NA", "10"), stringsAsFactors = FALSE)
#' str(type_convert(df))
type_convert <- function(df, col_types = NULL, na = c("", "NA"), trim_ws = TRUE,
locale = default_locale(), guess_integer = FALSE) {
locale = default_locale(), guess_integer = FALSE,
verbose = FALSE) {
stopifnot(is.data.frame(df))
is_character <- vapply(df, is.character, logical(1))

Expand All @@ -54,7 +56,7 @@ type_convert <- function(df, col_types = NULL, na = c("", "NA"), trim_ws = TRUE,
)

## if (is.null(col_types) && !is_testing()) {
if (is.null(col_types)) {
if (is.null(col_types) && verbose) {
show_cols_spec(specs)
}

Expand Down
41 changes: 40 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ knitr::opts_chunk$set(

`readr`'s 1e type inferencing and parsing tools are used by various R packages, e.g. `readODS` and `surveytoolbox`, but ironically those packages do not use the main functions (e.g. `readr::read_delim()`) of `readr`. As explained in the README of `readr`, those 1e code will be eventually removed from `readr`.

`minty` aims at providing a set of minimal, long-term, and compatible type inferencing and parsing tools for those packages.
`minty` aims at providing a set of minimal, long-term, and compatible type inferencing and parsing tools for those packages. If you need to parse interactively, please use either `readr` or `vroom`.

## Installation

Expand Down Expand Up @@ -107,3 +107,42 @@ res
```{r}
str(res)
```

## Differences: `readr` vs `minty`

Unlike `readr` and `vroom`, please note that `minty` is mainly for **non-interactive usage**. Therefore, `minty` emits fewer messages and warnings than `readr` and `vroom`.

```{r}
data <- minty::type_convert(text_only)
data
```

```{r}
data <- readr::type_convert(text_only)
data
```

`verbose` option is added if you like those messages, default to `FALSE`.

```{r}
data <- minty::type_convert(text_only, verbose = TRUE)
```

At the moment, `minty` does not use [the `problems` mechanism](https://vroom.r-lib.org/reference/problems.html) by default.

```{r}
minty::parse_logical(c("true", "fake", "IDK"), na = "IDK")
```

```{r}
readr::parse_logical(c("true", "fake", "IDK"), na = "IDK")
```

## Similar packages

For parsing ambiguous date(time)

* [timeless](https://github.com/schochastics/timeless)
* [anytime](https://github.com/eddelbuettel/anytime)


Loading

0 comments on commit 0133207

Please sign in to comment.