From 49b1eb81ca47c7e90128b3ad37ce324e1c635a79 Mon Sep 17 00:00:00 2001
From: evalparse <zhuojia.dai@gmail.com>
Date: Sat, 30 Nov 2019 17:32:18 +1100
Subject: [PATCH 1/3] adding support for tidyfast

---
 DESCRIPTION                                   |   3 +-
 NAMESPACE                                     |  16 +-
 R/chunk_mapper.r                              |  89 ++++++++++
 R/dplyr_verbs.r                               | 163 +++++-------------
 R/sample_frac.R                               |   2 +-
 R/tidyfast-verbs.r                            |  49 ++++++
 ...dplyr_mapper.Rd => create_chunk_mapper.Rd} |  18 +-
 man/tidyfast_verbs.Rd                         |  51 ++++++
 8 files changed, 256 insertions(+), 135 deletions(-)
 create mode 100644 R/chunk_mapper.r
 create mode 100644 R/tidyfast-verbs.r
 rename man/{create_dplyr_mapper.Rd => create_chunk_mapper.Rd} (66%)
 create mode 100644 man/tidyfast_verbs.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 8aa60a23..d011853f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -51,7 +51,8 @@ Suggests:
     speedglm,
     broom,
     learnr,
-    ggplot2
+    ggplot2, 
+    tidyfast (>= 0.1.8)
 LinkingTo: 
     Rcpp
 RoxygenNote: 7.0.1
diff --git a/NAMESPACE b/NAMESPACE
index 9f88d417..c11973c6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,7 @@ S3method(compute,disk.frame)
 S3method(delayed,disk.frame)
 S3method(distinct,disk.frame)
 S3method(do,disk.frame)
+S3method(dt_separate,disk.frame)
 S3method(filter,disk.frame)
 S3method(full_join,disk.frame)
 S3method(get_chunk,disk.frame)
@@ -62,6 +63,12 @@ export(as.disk.frame)
 export(ceremony_text)
 export(chunk_arrange)
 export(chunk_distinct)
+export(chunk_dt_count.disk.frame)
+export(chunk_dt_fill)
+export(chunk_dt_hoist)
+export(chunk_dt_nest)
+export(chunk_dt_uncount.disk.frame)
+export(chunk_dt_unnest)
 export(chunk_group_by)
 export(chunk_lapply)
 export(chunk_summarise)
@@ -75,7 +82,7 @@ export(collect_list)
 export(colnames)
 export(copy_df_to)
 export(count.disk.frame)
-export(create_dplyr_mapper)
+export(create_chunk_mapper)
 export(csv_to_disk.frame)
 export(delayed)
 export(delete)
@@ -232,6 +239,13 @@ importFrom(rlang,eval_tidy)
 importFrom(rlang,quo)
 importFrom(stats,runif)
 importFrom(stringr,fixed)
+importFrom(tidyfast,dt_count)
+importFrom(tidyfast,dt_fill)
+importFrom(tidyfast,dt_hoist)
+importFrom(tidyfast,dt_nest)
+importFrom(tidyfast,dt_separate)
+importFrom(tidyfast,dt_uncount)
+importFrom(tidyfast,dt_unnest)
 importFrom(utils,capture.output)
 importFrom(utils,head)
 importFrom(utils,memory.limit)
diff --git a/R/chunk_mapper.r b/R/chunk_mapper.r
new file mode 100644
index 00000000..b51fd8e7
--- /dev/null
+++ b/R/chunk_mapper.r
@@ -0,0 +1,89 @@
+#' Create function that applies to each chunk if disk.frame
+#' 
+#' A function to make it easier to create functions like \code{filter}
+#' 
+#' @examples 
+#' 
+#' filter = create_chunk_mapper(dplyr::filter)
+#' 
+#' #' example: creating a function that keeps only the first and last n row
+#' first_and_last <- function(chunk, n, ...) {
+#'   nr = nrow(chunk)
+#'   print(nr-n+1:nr)
+#'   chunk[c(1:n, (nr-n+1):nr), ]
+#' }
+#' 
+#' #' create the function for use with disk.frame
+#' first_and_last_df = create_chunk_mapper(first_and_last)
+#' 
+#' mtcars.df = as.disk.frame(mtcars)
+#' 
+#' #' the operation is lazy
+#' lazy_mtcars.df = mtcars.df %>%
+#'   first_and_last_df(2)
+#' 
+#' #' bring into R
+#' collect(lazy_mtcars.df)
+#' 
+#' #' clean up
+#' delete(mtcars.df)
+#' 
+#' @param fn The dplyr function to create a mapper for
+#' @param warning_msg The warning message to display when invoking the mapper
+#' @param as.data.frame force the input chunk of a data.frame; needed for dtplyr
+#' @importFrom rlang enquos quo
+#' @export
+create_chunk_mapper <- function(chunk_fn, warning_msg = NULL, as.data.frame = TRUE) {
+  return_func <- function(.data, ...) {
+    if (!is.null(warning_msg)) {
+      warning(warning_msg)
+    }
+    
+    
+    quo_dotdotdot = rlang::enquos(...)
+    
+    # this is designed to capture any global stuff
+    vars_and_pkgs = future::getGlobalsAndPackages(quo_dotdotdot)
+    data_for_eval_tidy = force(vars_and_pkgs$globals)
+    
+    res = map(.data, ~{
+      
+      this_env = environment()
+      
+      if(length(data_for_eval_tidy) > 0) {
+        for(i in 1:length(data_for_eval_tidy)) {
+          assign(names(data_for_eval_tidy)[i], data_for_eval_tidy[[i]], pos = this_env)
+        }
+      }
+      
+      lapply(quo_dotdotdot, function(x) {
+        attr(x, ".Environment") = this_env
+      })
+      
+      if(as.data.frame) {
+        if("grouped_df" %in% class(.x)) {
+          code = rlang::quo(chunk_fn(.x, !!!quo_dotdotdot))
+        } else {
+          code = rlang::quo(chunk_fn(as.data.frame(.x), !!!quo_dotdotdot))
+        }
+      } else {
+        code = rlang::quo(chunk_fn(.x, !!!quo_dotdotdot))
+      }
+      
+      # ZJ: we need both approaches. TRUST ME
+      # TODO better NSE at some point need dist
+      tryCatch({
+        return(rlang::eval_tidy(code))
+      }, error = function(e) {
+        as_label_code = rlang::as_label(code)
+        if(as_label_code == "chunk_fn(...)") {
+          stop(glue::glue("disk.frame has detected a syntax error in \n\n`{code}`\n\n. If you believe your syntax is correct, raise an issue at https://github.com/xiaodaigh/disk.frame with a MWE"))
+        } else {
+          # likely to be dealing with data.tables
+          return(eval(parse(text=as_label_code), envir = this_env))
+        }
+      })
+    }, lazy = TRUE)
+  }
+  return_func
+}
\ No newline at end of file
diff --git a/R/dplyr_verbs.r b/R/dplyr_verbs.r
index 577bcfe5..ca3d2a52 100644
--- a/R/dplyr_verbs.r
+++ b/R/dplyr_verbs.r
@@ -1,86 +1,3 @@
-#' Create dplyr function for disk.frame
-#' 
-#' A function to make it easier to create functions like \code{filter}
-#' 
-#' @examples 
-#' 
-#' filter = create_dplyr_mapper(dplyr::filter)
-#' 
-#' #' example: creating a function that keeps only the first and last n row
-#' first_and_last <- function(chunk, n, ...) {
-#'   nr = nrow(chunk)
-#'   print(nr-n+1:nr)
-#'   chunk[c(1:n, (nr-n+1):nr), ]
-#' }
-#' 
-#' #' create the function for use with disk.frame
-#' first_and_last_df = create_dplyr_mapper(first_and_last)
-#' 
-#' mtcars.df = as.disk.frame(mtcars)
-#' 
-#' #' the operation is lazy
-#' lazy_mtcars.df = mtcars.df %>%
-#'   first_and_last_df(2)
-#' 
-#' #' bring into R
-#' collect(lazy_mtcars.df)
-#' 
-#' #' clean up
-#' delete(mtcars.df)
-#' 
-#' @param dplyr_fn The dplyr function to create a mapper for
-#' @param warning_msg The warning message to display when invoking the mapper
-#' @param as.data.frame force the input chunk of a data.frame; needed for dtplyr
-#' @importFrom rlang enquos quo
-#' @export
-create_dplyr_mapper <- function(dplyr_fn, warning_msg = NULL, as.data.frame = TRUE) {
-  return_func <- function(.data, ...) {
-    if (!is.null(warning_msg)) {
-      warning(warning_msg)
-    }
-    
-    quo_dotdotdot = rlang::enquos(...)
-    
-    # this is designed to capture any global stuff
-    vars_and_pkgs = future::getGlobalsAndPackages(quo_dotdotdot)
-    data_for_eval_tidy = force(vars_and_pkgs$globals)
-    
-    res = map(.data, ~{
-      this_env = environment()
-      
-      if(length(data_for_eval_tidy) > 0) {
-        for(i in 1:length(data_for_eval_tidy)) {
-          assign(names(data_for_eval_tidy)[i], data_for_eval_tidy[[i]], pos = this_env)
-        }
-      }
-      
-      lapply(quo_dotdotdot, function(x) {
-        attr(x, ".Environment") = this_env
-      })
-      
-      if(as.data.frame) {
-        if("grouped_df" %in% class(.x)) {
-          code = rlang::quo(dplyr_fn(.x, !!!quo_dotdotdot))
-        } else {
-          code = rlang::quo(dplyr_fn(as.data.frame(.x), !!!quo_dotdotdot))
-        }
-      } else {
-        code = rlang::quo(dplyr_fn(.x, !!!quo_dotdotdot))
-      }
-      
-      # ZJ: we need both approaches. TRUST ME
-      # TODO better NSE at some point
-      #tryCatch({
-        rlang::eval_tidy(code)
-      #}, error = function(e) {
-        # if the previous failed
-      #  eval(parse(text=rlang::as_label(code)), envir = this_env)
-      #})
-    }, lazy = TRUE)
-  }
-  return_func
-}
-
 #' The dplyr verbs implemented for disk.frame
 #' @description 
 #' Please see the dplyr document for their usage. Please note that `group_by`
@@ -119,30 +36,30 @@ select.disk.frame <- function(.data, ...) {
 
 #' @export
 #' @rdname dplyr_verbs
-rename.disk.frame <- create_dplyr_mapper(dplyr::rename)
+rename.disk.frame <- create_chunk_mapper(dplyr::rename)
 
 
 #' @export
 #' @rdname dplyr_verbs
-filter.disk.frame <- create_dplyr_mapper(dplyr::filter)
+filter.disk.frame <- create_chunk_mapper(dplyr::filter)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr filter_all
-filter_all.disk.frame <- create_dplyr_mapper(dplyr::filter_all)
+filter_all.disk.frame <- create_chunk_mapper(dplyr::filter_all)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr filter_if
-filter_if.disk.frame <- create_dplyr_mapper(dplyr::filter_if)
+filter_if.disk.frame <- create_chunk_mapper(dplyr::filter_if)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr filter_at
-filter_at.disk.frame <- create_dplyr_mapper(dplyr::filter_at)
+filter_at.disk.frame <- create_chunk_mapper(dplyr::filter_at)
 
 
 #' @export
@@ -150,37 +67,37 @@ filter_at.disk.frame <- create_dplyr_mapper(dplyr::filter_at)
 #' @importFrom future getGlobalsAndPackages
 #' @importFrom rlang eval_tidy quo enquos
 #' @importFrom dplyr mutate
-mutate.disk.frame <- create_dplyr_mapper(dplyr::mutate)
+mutate.disk.frame <- create_chunk_mapper(dplyr::mutate)
 
 
 #' @export
 #' @importFrom dplyr transmute
 #' @rdname dplyr_verbs
-transmute.disk.frame <- create_dplyr_mapper(dplyr::transmute)
+transmute.disk.frame <- create_chunk_mapper(dplyr::transmute)
 
 
 #' @export
 #' @importFrom dplyr arrange
 #' @rdname dplyr_verbs
-arrange.disk.frame =create_dplyr_mapper(dplyr::arrange, warning_msg="`arrange.disk.frame` is now deprecated. Please use `chunk_arrange` instead. This is in preparation for a more powerful `arrange` that sorts the whole disk.frame")
+arrange.disk.frame =create_chunk_mapper(dplyr::arrange, warning_msg="`arrange.disk.frame` is now deprecated. Please use `chunk_arrange` instead. This is in preparation for a more powerful `arrange` that sorts the whole disk.frame")
 
 
 #' @export
 #' @importFrom dplyr arrange
 #' @rdname dplyr_verbs
-chunk_arrange <- create_dplyr_mapper(dplyr::arrange)
+chunk_arrange <- create_chunk_mapper(dplyr::arrange)
 
 
 #' @export
 #' @importFrom dplyr tally
 #' @rdname dplyr_verbs
-tally.disk.frame <- create_dplyr_mapper(dplyr::tally)
+tally.disk.frame <- create_chunk_mapper(dplyr::tally)
 
 
 #' @export
 #' @importFrom dplyr count
 #' @rdname dplyr_verbs
-count.disk.frame <- create_dplyr_mapper(dplyr::count)
+count.disk.frame <- create_chunk_mapper(dplyr::count)
 
 # TODO family is not required is group-by
 # TODO alot of these .disk.frame functions are not generic
@@ -189,31 +106,31 @@ count.disk.frame <- create_dplyr_mapper(dplyr::count)
 #' @export
 #' @importFrom dplyr add_count
 #' @rdname dplyr_verbs
-add_count.disk.frame <- create_dplyr_mapper(dplyr::add_count)
+add_count.disk.frame <- create_chunk_mapper(dplyr::add_count)
 
 
 #' @export
 #' @importFrom dplyr add_tally
 #' @rdname dplyr_verbs
-add_tally.disk.frame <- create_dplyr_mapper(dplyr::add_tally)
+add_tally.disk.frame <- create_chunk_mapper(dplyr::add_tally)
 
 
 #' @export
 #' @importFrom dplyr summarize
 #' @rdname dplyr_verbs
-chunk_summarize <- create_dplyr_mapper(dplyr::summarize)
+chunk_summarize <- create_chunk_mapper(dplyr::summarize)
 
 
 #' @export
 #' @importFrom dplyr summarise
 #' @rdname dplyr_verbs
-chunk_summarise <- create_dplyr_mapper(dplyr::summarise)
+chunk_summarise <- create_chunk_mapper(dplyr::summarise)
 
 
 #' @export
 #' @importFrom dplyr summarize
 #' @rdname dplyr_verbs
-summarize.disk.frame <- create_dplyr_mapper(dplyr::summarize, warning_msg="`summarize.disk.frame` is now deprecated. Please use `chunk_summarize` instead. This is in preparation for a more powerful `group_by` framework")
+summarize.disk.frame <- create_chunk_mapper(dplyr::summarize, warning_msg="`summarize.disk.frame` is now deprecated. Please use `chunk_summarize` instead. This is in preparation for a more powerful `group_by` framework")
   #function(...) {
   #stop("`summarize.disk.frame` has been removed. Please use `chunk_summarize` instead. This is in preparation for a more powerful `group_by` framework")
 #}
@@ -222,7 +139,7 @@ summarize.disk.frame <- create_dplyr_mapper(dplyr::summarize, warning_msg="`summ
 #' @export
 #' @importFrom dplyr summarize
 #' @rdname dplyr_verbs
-summarise.disk.frame <- create_dplyr_mapper(dplyr::summarise, warning_msg="`summarise.disk.frame` is now deprecated. Please use `chunk_summarise` instead. This is in preparation for a more powerful `group_by` framework")
+summarise.disk.frame <- create_chunk_mapper(dplyr::summarise, warning_msg="`summarise.disk.frame` is now deprecated. Please use `chunk_summarise` instead. This is in preparation for a more powerful `group_by` framework")
 #function(...) {
 #   stop("`summarise.disk.frame` has been removed. Please use `chunk_summarise` instead. This is in preparation for a more powerful `group_by` framework")
 # }
@@ -231,115 +148,115 @@ summarise.disk.frame <- create_dplyr_mapper(dplyr::summarise, warning_msg="`summ
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr do
-do.disk.frame <- create_dplyr_mapper(dplyr::do)
+do.disk.frame <- create_chunk_mapper(dplyr::do)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr group_by_all
-group_by_all.disk.frame <- create_dplyr_mapper(dplyr::group_by_all)
+group_by_all.disk.frame <- create_chunk_mapper(dplyr::group_by_all)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr group_by_at
-group_by_at.disk.frame <- create_dplyr_mapper(dplyr::group_by_at)
+group_by_at.disk.frame <- create_chunk_mapper(dplyr::group_by_at)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr group_by_if
-group_by_if.disk.frame <- create_dplyr_mapper(dplyr::group_by_if)
+group_by_if.disk.frame <- create_chunk_mapper(dplyr::group_by_if)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr mutate_all
-mutate_all.disk.frame <- create_dplyr_mapper(dplyr::mutate_all)
+mutate_all.disk.frame <- create_chunk_mapper(dplyr::mutate_all)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr mutate_at
-mutate_at.disk.frame <- create_dplyr_mapper(dplyr::mutate_at)
+mutate_at.disk.frame <- create_chunk_mapper(dplyr::mutate_at)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr mutate_if
-mutate_if.disk.frame <- create_dplyr_mapper(dplyr::mutate_if)
+mutate_if.disk.frame <- create_chunk_mapper(dplyr::mutate_if)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr rename_all
-rename_all.disk.frame <- create_dplyr_mapper(dplyr::rename_all)
+rename_all.disk.frame <- create_chunk_mapper(dplyr::rename_all)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr rename_at
-rename_at.disk.frame <- create_dplyr_mapper(dplyr::rename_at)
+rename_at.disk.frame <- create_chunk_mapper(dplyr::rename_at)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr rename_if
-rename_if.disk.frame <- create_dplyr_mapper(dplyr::rename_if)
+rename_if.disk.frame <- create_chunk_mapper(dplyr::rename_if)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr select_all
-select_all.disk.frame <- create_dplyr_mapper(dplyr::select_all)
+select_all.disk.frame <- create_chunk_mapper(dplyr::select_all)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr select_at
-select_at.disk.frame <- create_dplyr_mapper(dplyr::select_at)
+select_at.disk.frame <- create_chunk_mapper(dplyr::select_at)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr select_if
-select_if.disk.frame <- create_dplyr_mapper(dplyr::select_if)
+select_if.disk.frame <- create_chunk_mapper(dplyr::select_if)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr summarise_all
-chunk_summarise_all <- create_dplyr_mapper(dplyr::summarise_all)
+chunk_summarise_all <- create_chunk_mapper(dplyr::summarise_all)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr summarise_at
-chunk_summarise_at <- create_dplyr_mapper(dplyr::summarise_at)
+chunk_summarise_at <- create_chunk_mapper(dplyr::summarise_at)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr summarize
-chunk_summarize <- create_dplyr_mapper(dplyr::summarize)
+chunk_summarize <- create_chunk_mapper(dplyr::summarize)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr summarize_all
-chunk_summarize_all <- create_dplyr_mapper(dplyr::summarize_all)
+chunk_summarize_all <- create_chunk_mapper(dplyr::summarize_all)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr summarize_at
-chunk_summarize_at <- create_dplyr_mapper(dplyr::summarize_at)
+chunk_summarize_at <- create_chunk_mapper(dplyr::summarize_at)
 
 
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr summarize_if
-chunk_summarize_if <- create_dplyr_mapper(dplyr::summarize_if)
+chunk_summarize_if <- create_chunk_mapper(dplyr::summarize_if)
 
 
 #' @export
@@ -353,7 +270,7 @@ distinct.disk.frame <- function(...) {
 #' @export
 #' @rdname dplyr_verbs
 #' @importFrom dplyr distinct
-chunk_distinct <- create_dplyr_mapper(dplyr::distinct, warning_msg = "the `distinct` function applies distinct chunk-wise")
+chunk_distinct <- create_chunk_mapper(dplyr::distinct, warning_msg = "the `distinct` function applies distinct chunk-wise")
 
 #' The shard keys of the disk.frame
 #' @return character
@@ -402,8 +319,8 @@ groups.disk.frame <- function(x){
 #     eval(parse(text=rlang::as_label(code)), envir = this_env)
 #   }, lazy = TRUE)
 # }
-#group_by.disk.frame <- create_dplyr_mapper(dplyr::group_by, warning_msg = "The group_by operation is applied WITHIN each chunk, hence the results may not be as expected. To address this issue, you can rechunk(df, shardby = your_group_keys) which can be computationally expensive. Otherwise, you may use a second stage summary to obtain the desired result.")
-group_by.disk.frame <- create_dplyr_mapper(dplyr::group_by, warning_msg="`group_by.disk.frame` is now deprecated. Please use `chunk_group_by` instead. This is in preparation for a more powerful `group_by` framework")
+#group_by.disk.frame <- create_chunk_mapper(dplyr::group_by, warning_msg = "The group_by operation is applied WITHIN each chunk, hence the results may not be as expected. To address this issue, you can rechunk(df, shardby = your_group_keys) which can be computationally expensive. Otherwise, you may use a second stage summary to obtain the desired result.")
+group_by.disk.frame <- create_chunk_mapper(dplyr::group_by, warning_msg="`group_by.disk.frame` is now deprecated. Please use `chunk_group_by` instead. This is in preparation for a more powerful `group_by` framework")
 #function(...) {
   #stop("`arrange.disk.frame` has been removed. Please use `chunk_arrange` instead. This is preparation for a more powerful `group_by` framework")
 #}
@@ -411,7 +328,7 @@ group_by.disk.frame <- create_dplyr_mapper(dplyr::group_by, warning_msg="`group_
 #' @export
 
 #' @rdname group_by
-chunk_group_by <- create_dplyr_mapper(dplyr::group_by)
+chunk_group_by <- create_chunk_mapper(dplyr::group_by)
 
 #' @export
 #' @rdname dplyr_verbs
diff --git a/R/sample_frac.R b/R/sample_frac.R
index 0191b948..476cf98e 100644
--- a/R/sample_frac.R
+++ b/R/sample_frac.R
@@ -20,7 +20,7 @@ sample_frac.disk.frame <- function(tbl, size=1, replace=FALSE, weight=NULL, .env
     stop(warning_msg)
   }
   
-  fn = disk.frame::create_dplyr_mapper(dplyr::sample_frac)
+  fn = disk.frame::create_chunk_mapper(dplyr::sample_frac)
   
   fn(tbl, size = size, replace = replace, ...)
 }
diff --git a/R/tidyfast-verbs.r b/R/tidyfast-verbs.r
new file mode 100644
index 00000000..da969f6e
--- /dev/null
+++ b/R/tidyfast-verbs.r
@@ -0,0 +1,49 @@
+#' The tidy verbs implemented for disk.frame
+#' @description 
+#' Please see the tidyfast document for their usage
+#' @export
+#' @importFrom tidyfast dt_count dt_uncount dt_hoist dt_nest dt_unnest dt_fill dt_separate
+#' @param ... Same as the tidyfast functions
+#' @param .data a disk.frame
+#' @rdname tidyfast_verbs
+#' @family tidyfast verbs
+#' @examples
+#' library(tidyfast)
+#' library(data.table)
+#' 
+#' #' create a disk.frame
+#' disk.frame_to_split <- as.disk.frame(data.table(
+#'   x = paste(letters, LETTERS, sep = ".")
+#' ))
+#' 
+#' disk.frame_to_split %>% 
+#'   dt_separate(x, into = c("lower", "upper")) %>% 
+#'   collect
+#' 
+#' #' clean up
+#' delete(disk.frame_to_split)
+chunk_dt_count.disk.frame <- create_chunk_mapper(tidyfast::dt_count, as.data.frame = FALSE)
+
+#' @rdname tidyfast_verbs
+#' @export
+chunk_dt_uncount.disk.frame <- create_chunk_mapper(tidyfast::dt_uncount, as.data.frame = FALSE)
+
+#' @rdname tidyfast_verbs
+#' @export
+chunk_dt_unnest = create_chunk_mapper(tidyfast::dt_unnest, as.data.frame = FALSE)
+
+#' @rdname tidyfast_verbs
+#' @export
+chunk_dt_nest = create_chunk_mapper(tidyfast::dt_nest, as.data.frame = FALSE)
+
+#' @rdname tidyfast_verbs
+#' @export
+chunk_dt_hoist = create_chunk_mapper(tidyfast::dt_hoist, as.data.frame = FALSE)
+
+#' @rdname tidyfast_verbs
+#' @export
+chunk_dt_fill = create_chunk_mapper(tidyfast::dt_fill, as.data.frame = FALSE)
+
+#' @rdname tidyfast_verbs
+#' @export
+dt_separate.disk.frame = create_chunk_mapper(tidyfast::dt_separate, as.data.frame = FALSE)
diff --git a/man/create_dplyr_mapper.Rd b/man/create_chunk_mapper.Rd
similarity index 66%
rename from man/create_dplyr_mapper.Rd
rename to man/create_chunk_mapper.Rd
index 64256972..dfe508f6 100644
--- a/man/create_dplyr_mapper.Rd
+++ b/man/create_chunk_mapper.Rd
@@ -1,24 +1,24 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/dplyr_verbs.r
-\name{create_dplyr_mapper}
-\alias{create_dplyr_mapper}
-\title{Create dplyr function for disk.frame}
+% Please edit documentation in R/chunk_mapper.r
+\name{create_chunk_mapper}
+\alias{create_chunk_mapper}
+\title{Create function that applies to each chunk if disk.frame}
 \usage{
-create_dplyr_mapper(dplyr_fn, warning_msg = NULL, as.data.frame = TRUE)
+create_chunk_mapper(chunk_fn, warning_msg = NULL, as.data.frame = TRUE)
 }
 \arguments{
-\item{dplyr_fn}{The dplyr function to create a mapper for}
-
 \item{warning_msg}{The warning message to display when invoking the mapper}
 
 \item{as.data.frame}{force the input chunk of a data.frame; needed for dtplyr}
+
+\item{fn}{The dplyr function to create a mapper for}
 }
 \description{
 A function to make it easier to create functions like \code{filter}
 }
 \examples{
 
-filter = create_dplyr_mapper(dplyr::filter)
+filter = create_chunk_mapper(dplyr::filter)
 
 #' example: creating a function that keeps only the first and last n row
 first_and_last <- function(chunk, n, ...) {
@@ -28,7 +28,7 @@ first_and_last <- function(chunk, n, ...) {
 }
 
 #' create the function for use with disk.frame
-first_and_last_df = create_dplyr_mapper(first_and_last)
+first_and_last_df = create_chunk_mapper(first_and_last)
 
 mtcars.df = as.disk.frame(mtcars)
 
diff --git a/man/tidyfast_verbs.Rd b/man/tidyfast_verbs.Rd
new file mode 100644
index 00000000..15d3b522
--- /dev/null
+++ b/man/tidyfast_verbs.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tidyfast-verbs.r
+\name{chunk_dt_count.disk.frame}
+\alias{chunk_dt_count.disk.frame}
+\alias{chunk_dt_uncount.disk.frame}
+\alias{chunk_dt_unnest}
+\alias{chunk_dt_nest}
+\alias{chunk_dt_hoist}
+\alias{chunk_dt_fill}
+\alias{dt_separate.disk.frame}
+\title{The tidy verbs implemented for disk.frame}
+\usage{
+chunk_dt_count.disk.frame(.data, ...)
+
+chunk_dt_uncount.disk.frame(.data, ...)
+
+chunk_dt_unnest(.data, ...)
+
+chunk_dt_nest(.data, ...)
+
+chunk_dt_hoist(.data, ...)
+
+chunk_dt_fill(.data, ...)
+
+\method{dt_separate}{disk.frame}(.data, ...)
+}
+\arguments{
+\item{.data}{a disk.frame}
+
+\item{...}{Same as the tidyfast functions}
+}
+\description{
+Please see the tidyfast document for their usage
+}
+\examples{
+library(tidyfast)
+library(data.table)
+
+#' create a disk.frame
+disk.frame_to_split <- as.disk.frame(data.table(
+  x = paste(letters, LETTERS, sep = ".")
+))
+
+disk.frame_to_split \%>\% 
+  dt_separate(x, into = c("lower", "upper")) \%>\% 
+  collect
+
+#' clean up
+delete(disk.frame_to_split)
+}
+\concept{tidyfast verbs}

From 3a32b21b6384b58a91bfe8f98eabf23773ecd817 Mon Sep 17 00:00:00 2001
From: xiaodaigh <zhuojia.dai@gmail.com>
Date: Mon, 16 Mar 2020 23:10:34 +1100
Subject: [PATCH 2/3] update readme with youtube stream link

---
 README.Rmd |  4 ++++
 README.md  | 66 +++++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 0c1d02ce..bcd3e1a0 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -327,3 +327,7 @@ Do you wish to give back the open-source community in non-financial ways? Here a
 [![](http://cranlogs.r-pkg.org/badges/grand-total/disk.frame)](https://cran.r-project.org/package=disk.frame)
 [![Travis build status](https://travis-ci.org/xiaodaigh/disk.frame.svg?branch=master)](https://travis-ci.org/xiaodaigh/disk.frame)
 [![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/xiaodaigh/disk.frame?branch=master&svg=true)](https://ci.appveyor.com/project/xiaodaigh/disk.frame)
+
+## Live Stream of `{disk.frame}` development
+
+* https://www.youtube.com/playlist?list=PL3DVdT3kym4fIU5CO-pxKtWhdjMVn4XGe
diff --git a/README.md b/README.md
index 95e17ce0..a43cfe2a 100644
--- a/README.md
+++ b/README.md
@@ -217,12 +217,15 @@ flights.df %>%
   filter(year == 2013) %>% 
   mutate(origin_dest = paste0(origin, dest)) %>% 
   head(2)
-#>   year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum
-#> 1 2013     1   1      517            515         2      830            819        11      UA   1545  N14228
-#> 2 2013     1   1      533            529         4      850            830        20      UA   1714  N24211
-#>   origin dest air_time distance hour minute           time_hour origin_dest
-#> 1    EWR  IAH      227     1400    5     15 2013-01-01 05:00:00      EWRIAH
-#> 2    LGA  IAH      227     1416    5     29 2013-01-01 05:00:00      LGAIAH
+#>   year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
+#> 1 2013     1   1      517            515         2      830            819
+#> 2 2013     1   1      533            529         4      850            830
+#>   arr_delay carrier flight tailnum origin dest air_time distance hour minute
+#> 1        11      UA   1545  N14228    EWR  IAH      227     1400    5     15
+#> 2        20      UA   1714  N24211    LGA  IAH      227     1416    5     29
+#>             time_hour origin_dest
+#> 1 2013-01-01 05:00:00      EWRIAH
+#> 2 2013-01-01 05:00:00      LGAIAH
 ```
 
 ### Group-by
@@ -279,7 +282,6 @@ obtained using estimated methods.
 
 ``` r
 library(data.table)
-#> data.table 1.12.8 using 6 threads (see ?getDTthreads).  Latest news: r-datatable.com
 #> 
 #> Attaching package: 'data.table'
 #> The following object is masked from 'package:purrr':
@@ -296,6 +298,30 @@ grp_by_stage1 =
     .(sum_dist = sum(distance)), 
     .(qtr = ifelse(month <= 3, "Q1", "Q2"))
     ]
+#> Warning in serialize(data, node$con): 'package:stats' may not be available when
+#> loading
+#> Warning in serialize(data, node$con): 'package:data.table' may not be available
+#> when loading
+#> Warning in serialize(data, node$con): 'package:stats' may not be available when
+#> loading
+#> Warning in serialize(data, node$con): 'package:data.table' may not be available
+#> when loading
+#> Warning in serialize(data, node$con): 'package:stats' may not be available when
+#> loading
+#> Warning in serialize(data, node$con): 'package:data.table' may not be available
+#> when loading
+#> Warning in serialize(data, node$con): 'package:stats' may not be available when
+#> loading
+#> Warning in serialize(data, node$con): 'package:data.table' may not be available
+#> when loading
+#> Warning in serialize(data, node$con): 'package:stats' may not be available when
+#> loading
+#> Warning in serialize(data, node$con): 'package:data.table' may not be available
+#> when loading
+#> Warning in serialize(data, node$con): 'package:stats' may not be available when
+#> loading
+#> Warning in serialize(data, node$con): 'package:data.table' may not be available
+#> when loading
 
 grp_by_stage1
 #>    qtr sum_dist
@@ -326,7 +352,7 @@ To find out where the disk.frame is stored on disk:
 ``` r
 # where is the disk.frame stored
 attr(flights.df, "path")
-#> [1] "C:\\Users\\RTX2080\\AppData\\Local\\Temp\\Rtmpa6R05d\\file1b086cec36c7.df"
+#> [1] "C:\\Users\\RTX2080\\AppData\\Local\\Temp\\RtmpOeAro4\\file17a0150634fd.df"
 ```
 
 A number of data.frame functions are implemented for disk.frame
@@ -334,19 +360,23 @@ A number of data.frame functions are implemented for disk.frame
 ``` r
 # get first few rows
 head(flights.df, 1)
-#>    year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum
-#> 1: 2013     1   1      517            515         2      830            819        11      UA   1545  N14228
-#>    origin dest air_time distance hour minute           time_hour
-#> 1:    EWR  IAH      227     1400    5     15 2013-01-01 05:00:00
+#>    year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
+#> 1: 2013     1   1      517            515         2      830            819
+#>    arr_delay carrier flight tailnum origin dest air_time distance hour minute
+#> 1:        11      UA   1545  N14228    EWR  IAH      227     1400    5     15
+#>              time_hour
+#> 1: 2013-01-01 05:00:00
 ```
 
 ``` r
 # get last few rows
 tail(flights.df, 1)
-#>    year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay carrier flight tailnum
-#> 1: 2013     9  30       NA            840        NA       NA           1020        NA      MQ   3531  N839MQ
-#>    origin dest air_time distance hour minute           time_hour
-#> 1:    LGA  RDU       NA      431    8     40 2013-09-30 08:00:00
+#>    year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
+#> 1: 2013     9  30       NA            840        NA       NA           1020
+#>    arr_delay carrier flight tailnum origin dest air_time distance hour minute
+#> 1:        NA      MQ   3531  N839MQ    LGA  RDU       NA      431    8     40
+#>              time_hour
+#> 1: 2013-09-30 08:00:00
 ```
 
 ``` r
@@ -455,3 +485,7 @@ ways? Here are some ways you can contribute
 status](https://travis-ci.org/xiaodaigh/disk.frame.svg?branch=master)](https://travis-ci.org/xiaodaigh/disk.frame)
 [![AppVeyor build
 status](https://ci.appveyor.com/api/projects/status/github/xiaodaigh/disk.frame?branch=master&svg=true)](https://ci.appveyor.com/project/xiaodaigh/disk.frame)
+
+## Live Stream of `{disk.frame}` development
+
+  - <https://www.youtube.com/playlist?list=PL3DVdT3kym4fIU5CO-pxKtWhdjMVn4XGe>

From ff2c0f0fbe0b67f06b8f1b61e90e1131af72d401 Mon Sep 17 00:00:00 2001
From: xiaodaigh <zhuojia.dai@gmail.com>
Date: Sat, 21 Mar 2020 20:03:24 +1100
Subject: [PATCH 3/3] mid development

---
 R/chunk_mapper.r           |  4 ++--
 R/tidyfast-verbs.r         | 14 ++++++++++++--
 man/chunk_group_by.Rd      |  3 ---
 man/create_chunk_mapper.Rd |  4 ++--
 man/create_dplyr_mapper.Rd | 11 +++++++++++
 man/tidyfast_verbs.Rd      |  2 +-
 6 files changed, 28 insertions(+), 10 deletions(-)
 create mode 100644 man/create_dplyr_mapper.Rd

diff --git a/R/chunk_mapper.r b/R/chunk_mapper.r
index a6d80c61..eea3a288 100644
--- a/R/chunk_mapper.r
+++ b/R/chunk_mapper.r
@@ -39,7 +39,7 @@ create_chunk_mapper <- function(chunk_fn, warning_msg = NULL, as.data.frame = TR
       warning(warning_msg)
     }
     
-    
+    browser()
     quo_dotdotdot = rlang::enquos(...)
     
     # this is designed to capture any global stuff
@@ -85,4 +85,4 @@ create_chunk_mapper <- function(chunk_fn, warning_msg = NULL, as.data.frame = TR
     }, lazy = TRUE)
   }
   return_func
-}
\ No newline at end of file
+}
diff --git a/R/tidyfast-verbs.r b/R/tidyfast-verbs.r
index da969f6e..b4883238 100644
--- a/R/tidyfast-verbs.r
+++ b/R/tidyfast-verbs.r
@@ -22,11 +22,21 @@
 #' 
 #' #' clean up
 #' delete(disk.frame_to_split)
-chunk_dt_count.disk.frame <- create_chunk_mapper(tidyfast::dt_count, as.data.frame = FALSE)
+chunk_dt_count <- create_chunk_mapper(tidyfast::dt_count, as.data.frame = FALSE)
+
+#' dt_count working on whole disk.frame
+dt_count.disk.frame <- function(dt_, ..., na.rm = FALSE, wt = NULL) {
+  stop("ZJ: I was up to here, and I need better understanding of NSE. Why?
+       ifelse(is.null(wt), NULL, wt) is not going to work if wt is a column name")
+  
+  dt_ %>% 
+    chunk_dt_count(..., na.rm = force(na.rm), wt = ifelse(is.null(wt), NULL, wt)) %>% 
+    collect
+}
 
 #' @rdname tidyfast_verbs
 #' @export
-chunk_dt_uncount.disk.frame <- create_chunk_mapper(tidyfast::dt_uncount, as.data.frame = FALSE)
+chunk_dt_uncount <- create_chunk_mapper(tidyfast::dt_uncount, as.data.frame = FALSE)
 
 #' @rdname tidyfast_verbs
 #' @export
diff --git a/man/chunk_group_by.Rd b/man/chunk_group_by.Rd
index f06836c2..9a54e6f3 100644
--- a/man/chunk_group_by.Rd
+++ b/man/chunk_group_by.Rd
@@ -2,15 +2,12 @@
 % Please edit documentation in R/dplyr_verbs.r
 \name{chunk_summarize}
 \alias{chunk_summarize}
-\alias{chunk_summarise}
 \alias{chunk_group_by}
 \alias{chunk_ungroup}
 \title{Group by within each disk.frame}
 \usage{
 chunk_summarize(.data, ...)
 
-chunk_summarise(.data, ...)
-
 chunk_group_by(.data, ...)
 
 chunk_ungroup(.data, ...)
diff --git a/man/create_chunk_mapper.Rd b/man/create_chunk_mapper.Rd
index dfe508f6..0702093d 100644
--- a/man/create_chunk_mapper.Rd
+++ b/man/create_chunk_mapper.Rd
@@ -7,11 +7,11 @@
 create_chunk_mapper(chunk_fn, warning_msg = NULL, as.data.frame = TRUE)
 }
 \arguments{
+\item{chunk_fn}{The dplyr function to create a mapper for}
+
 \item{warning_msg}{The warning message to display when invoking the mapper}
 
 \item{as.data.frame}{force the input chunk of a data.frame; needed for dtplyr}
-
-\item{fn}{The dplyr function to create a mapper for}
 }
 \description{
 A function to make it easier to create functions like \code{filter}
diff --git a/man/create_dplyr_mapper.Rd b/man/create_dplyr_mapper.Rd
new file mode 100644
index 00000000..a486be28
--- /dev/null
+++ b/man/create_dplyr_mapper.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr_verbs.r
+\name{create_dplyr_mapper}
+\alias{create_dplyr_mapper}
+\title{Kept for backwards-compatibility to be removed in 0.3}
+\usage{
+create_dplyr_mapper()
+}
+\description{
+Kept for backwards-compatibility to be removed in 0.3
+}
diff --git a/man/tidyfast_verbs.Rd b/man/tidyfast_verbs.Rd
index 15d3b522..977cbcf3 100644
--- a/man/tidyfast_verbs.Rd
+++ b/man/tidyfast_verbs.Rd
@@ -22,7 +22,7 @@ chunk_dt_hoist(.data, ...)
 
 chunk_dt_fill(.data, ...)
 
-\method{dt_separate}{disk.frame}(.data, ...)
+dt_separate.disk.frame(.data, ...)
 }
 \arguments{
 \item{.data}{a disk.frame}