Skip to content

Commit

Permalink
fix: rewrite and uncomment tests for $str$split* functions
Browse files Browse the repository at this point in the history
Author: Etienne Bacher <[email protected]>
  • Loading branch information
eitsupi committed Nov 9, 2024
1 parent fb3b389 commit ce105a6
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 114 deletions.
38 changes: 25 additions & 13 deletions R/expr-string.R
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ expr_str_extract <- function(pattern, group_index) {
#' pl$col("foo")$str$extract_all(r"((\d+))")$alias("extracted_nrs")
#' )
expr_str_extract_all <- function(pattern) {
self$`_rexpr`$str_extract_all(as_polars_expr(pattern)$`_rexpr`) |>
self$`_rexpr`$str_extract_all(as_polars_expr(pattern, as_lit = TRUE)$`_rexpr`) |>
wrap()
}

Expand All @@ -706,6 +706,7 @@ expr_str_count_matches <- function(pattern, ..., literal = FALSE) {

#' Split the string by a substring
#'
#' @inheritParams rlang::check_dots_empty0
#' @param by Substring to split by. Can be an Expr.
#' @param inclusive If `TRUE`, include the split character/string in the results.
#'
Expand All @@ -720,19 +721,25 @@ expr_str_count_matches <- function(pattern, ..., literal = FALSE) {
#' by = c("_", "_", "*")
#' )
#' df
#' df$select(pl$col("s")$str$split(by = pl$col("by"))$alias("split"))
expr_str_split <- function(by, inclusive = FALSE) {
self$`_rexpr`$str_split(result(by), result(inclusive)) |>
wrap()
#' df$select(split = pl$col("s")$str$split(by = pl$col("by")))
expr_str_split <- function(by, ..., inclusive = FALSE) {
wrap({
check_dots_empty0(...)
self$`_rexpr`$str_split(
as_polars_expr(by, as_lit = TRUE)$`_rexpr`,
inclusive
)
})
}

#' Split the string by a substring using `n` splits
#'
#' @description This results in a struct of `n+1` fields. If it cannot make `n`
#' splits, the remaining field elements will be null.
#' @param by Substring to split by.
#'
#' @inheritParams rlang::check_dots_empty0
#' @inheritParams expr_str_split
#' @param n Number of splits to make.
#' @param inclusive If `TRUE`, include the split character/string in the results.
#'
#' @inherit as_polars_expr return
#'
Expand All @@ -742,20 +749,25 @@ expr_str_split <- function(by, inclusive = FALSE) {
#' split = pl$col("s")$str$split_exact(by = "_", 1),
#' split_inclusive = pl$col("s")$str$split_exact(by = "_", 1, inclusive = TRUE)
#' )
expr_str_split_exact <- function(by, n, inclusive = FALSE) {
self$`_rexpr`$str_split_exact(by, result(n), result(inclusive)) |>
wrap()
expr_str_split_exact <- function(by, n, ..., inclusive = FALSE) {
wrap({
check_dots_empty0(...)
self$`_rexpr`$str_split_exact(
as_polars_expr(by, as_lit = TRUE)$`_rexpr`,
n,
inclusive
)
})
}


#' Split the string by a substring, restricted to returning at most `n` items
#'
#' @description
#' If the number of possible splits is less than `n-1`, the remaining field
#' elements will be null. If the number of possible splits is `n-1` or greater,
#' the last (nth) substring will contain the remainder of the string.
#' @param by Substring to split by.
#' @param n Number of splits to make.
#'
#' @inheritParams expr_str_split_exact
#'
#' @inherit as_polars_expr return
#'
Expand Down
6 changes: 4 additions & 2 deletions man/expr_str_split.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/expr_str_split_exact.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/expr_str_splitn.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

72 changes: 72 additions & 0 deletions tests/testthat/_snaps/expr-string.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,20 @@
Caused by error:
! -2.0 is out of range that can be safely converted to usize

---

Code
df$select(pl$col("a")$str$pad_start(5, "multiple_chars"))
Condition
Error in `df$select()`:
! Evaluation failed in `$select()`.
Caused by error:
! Evaluation failed in `$select()`.
Caused by error in `pl$col("a")$str$pad_start()`:
! Evaluation failed in `$pad_start()`.
Caused by error:
! Expected a string with one character only, currently has 14 (from "multiple_chars").

# encode decode

Code
Expand Down Expand Up @@ -277,6 +291,18 @@
Caused by error:
! Argument `group_index` must be numeric, not character

# str$extract_all

Code
pl$select(pl$lit("abc")$str$extract_all(1))
Condition
Error:
! Evaluation failed in `$select()`.
Caused by error:
! Evaluation failed in `$collect()`.
Caused by error:
! invalid series dtype: expected `String`, got `f64`

# str$count_matches

Code
Expand All @@ -289,6 +315,52 @@
Caused by error:
! invalid series dtype: expected `String`, got `f64`

# str$split

Code
df$select(pl$col("x")$str$split(by = 42))
Condition
Error in `df$select()`:
! Evaluation failed in `$select()`.
Caused by error:
! Evaluation failed in `$collect()`.
Caused by error:
! invalid series dtype: expected `String`, got `f64`

---

Code
df$select(pl$col("x")$str$split(by = "foo", inclusive = 42))
Condition
Error in `df$select()`:
! Evaluation failed in `$select()`.
Caused by error:
! Evaluation failed in `$select()`.
Caused by error in `pl$col("x")$str$split()`:
! Evaluation failed in `$split()`.
Caused by error:
! Argument `inclusive` must be logical, not double

# str$split_exact

Code
pl$lit("42")$str$split_exact(by = "a", n = -1, inclusive = TRUE)
Condition
Error in `pl$lit("42")$str$split_exact()`:
! Evaluation failed in `$split_exact()`.
Caused by error:
! -1.0 is out of range that can be safely converted to usize

---

Code
pl$lit("42")$str$split_exact(by = "a", n = 2, inclusive = "joe")
Condition
Error in `pl$lit("42")$str$split_exact()`:
! Evaluation failed in `$split_exact()`.
Caused by error:
! Argument `inclusive` must be logical, not character

# str$replace

Code
Expand Down
Loading

0 comments on commit ce105a6

Please sign in to comment.