From 0dcb5a6489ecbdc8c03ebb8f5bf715c8ca708502 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Thu, 8 Feb 2024 18:04:37 -0800
Subject: [PATCH] wip+feat: refactor epi_archive to use S3 * A/B testing branch
 * make sure the parallel set of tests pass

---
 DESCRIPTION                                   |    3 +
 NAMESPACE                                     |   26 +
 R/archive.R                                   |    3 -
 R/archive_new.R                               | 1115 +++++++++++++++++
 R/grouped_archive_new.R                       |  456 +++++++
 R/methods-epi_archive_new.R                   |  830 ++++++++++++
 man/as_epi_archive2.Rd                        |  142 +++
 man/as_of.epi_archive2.Rd                     |   33 +
 man/epi_archive.Rd                            |  106 +-
 man/epix_as_of2.Rd                            |   96 ++
 man/epix_fill_through_version2.Rd             |   48 +
 man/epix_merge2.Rd                            |   73 ++
 man/epix_slide2.Rd                            |  283 +++++
 man/epix_truncate_versions_after.Rd           |   10 +-
 ...ate_versions_after.grouped_epi_archive2.Rd |   11 +
 man/fill_through_version.epi_archive2.Rd      |   21 +
 man/group_by.epi_archive.Rd                   |   23 +-
 man/group_by.epi_archive2.Rd                  |  147 +++
 man/is_epi_archive2.Rd                        |   35 +
 man/max_version_with_row_in.Rd                |    9 +-
 man/merge_epi_archive2.Rd                     |   30 +
 man/new_epi_archive2.Rd                       |   69 +
 man/next_after.Rd                             |    8 +-
 man/print.epi_archive2.Rd                     |   17 +
 man/slide.epi_archive2.Rd                     |  101 ++
 man/slide.grouped_epi_archive2.Rd             |   24 +
 man/truncate_versions_after.epi_archive2.Rd   |   19 +
 ...ate_versions_after.grouped_epi_archive2.Rd |   18 +
 tests/testthat/test-archive_new.R             |  173 +++
 tests/testthat/test-compactify_new.R          |  110 ++
 .../test-epix_fill_through_version_new.R      |  109 ++
 tests/testthat/test-epix_merge_new.R          |  228 ++++
 tests/testthat/test-epix_slide_new.R          |  810 ++++++++++++
 tests/testthat/test-grouped_epi_archive_new.R |  104 ++
 tests/testthat/test-methods-epi_archive_new.R |  138 ++
 35 files changed, 5420 insertions(+), 8 deletions(-)
 create mode 100644 R/archive_new.R
 create mode 100644 R/grouped_archive_new.R
 create mode 100644 R/methods-epi_archive_new.R
 create mode 100644 man/as_epi_archive2.Rd
 create mode 100644 man/as_of.epi_archive2.Rd
 create mode 100644 man/epix_as_of2.Rd
 create mode 100644 man/epix_fill_through_version2.Rd
 create mode 100644 man/epix_merge2.Rd
 create mode 100644 man/epix_slide2.Rd
 create mode 100644 man/epix_truncate_versions_after.grouped_epi_archive2.Rd
 create mode 100644 man/fill_through_version.epi_archive2.Rd
 create mode 100644 man/group_by.epi_archive2.Rd
 create mode 100644 man/is_epi_archive2.Rd
 create mode 100644 man/merge_epi_archive2.Rd
 create mode 100644 man/new_epi_archive2.Rd
 create mode 100644 man/print.epi_archive2.Rd
 create mode 100644 man/slide.epi_archive2.Rd
 create mode 100644 man/slide.grouped_epi_archive2.Rd
 create mode 100644 man/truncate_versions_after.epi_archive2.Rd
 create mode 100644 man/truncate_versions_after.grouped_epi_archive2.Rd
 create mode 100644 tests/testthat/test-archive_new.R
 create mode 100644 tests/testthat/test-compactify_new.R
 create mode 100644 tests/testthat/test-epix_fill_through_version_new.R
 create mode 100644 tests/testthat/test-epix_merge_new.R
 create mode 100644 tests/testthat/test-epix_slide_new.R
 create mode 100644 tests/testthat/test-grouped_epi_archive_new.R
 create mode 100644 tests/testthat/test-methods-epi_archive_new.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 71d95969..538fd023 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,6 +73,7 @@ Depends:
 URL: https://cmu-delphi.github.io/epiprocess/
 Collate:
     'archive.R'
+    'archive_new.R'
     'autoplot.R'
     'correlation.R'
     'data.R'
@@ -80,9 +81,11 @@ Collate:
     'epiprocess.R'
     'group_by_epi_df_methods.R'
     'methods-epi_archive.R'
+    'grouped_archive_new.R'
     'grouped_epi_archive.R'
     'growth_rate.R'
     'key_colnames.R'
+    'methods-epi_archive_new.R'
     'methods-epi_df.R'
     'outliers.R'
     'reexports.R'
diff --git a/NAMESPACE b/NAMESPACE
index 03e0e41d..51c4091c 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,40 +6,57 @@ S3method(as_epi_df,data.frame)
 S3method(as_epi_df,epi_df)
 S3method(as_epi_df,tbl_df)
 S3method(as_epi_df,tbl_ts)
+S3method(as_of,epi_archive2)
 S3method(as_tibble,epi_df)
 S3method(as_tsibble,epi_df)
 S3method(autoplot,epi_df)
+S3method(clone,epi_archive2)
+S3method(clone,grouped_epi_archive2)
 S3method(dplyr_col_modify,col_modify_recorder_df)
 S3method(dplyr_col_modify,epi_df)
 S3method(dplyr_reconstruct,epi_df)
 S3method(dplyr_row_slice,epi_df)
 S3method(epix_truncate_versions_after,epi_archive)
+S3method(epix_truncate_versions_after,epi_archive2)
 S3method(epix_truncate_versions_after,grouped_epi_archive)
+S3method(epix_truncate_versions_after,grouped_epi_archive2)
 S3method(group_by,epi_archive)
+S3method(group_by,epi_archive2)
 S3method(group_by,epi_df)
 S3method(group_by,grouped_epi_archive)
+S3method(group_by,grouped_epi_archive2)
 S3method(group_by_drop_default,grouped_epi_archive)
+S3method(group_by_drop_default,grouped_epi_archive2)
 S3method(group_modify,epi_df)
 S3method(groups,grouped_epi_archive)
+S3method(groups,grouped_epi_archive2)
 S3method(key_colnames,data.frame)
 S3method(key_colnames,default)
 S3method(key_colnames,epi_archive)
 S3method(key_colnames,epi_df)
 S3method(next_after,Date)
 S3method(next_after,integer)
+S3method(print,epi_archive2)
 S3method(print,epi_df)
+S3method(print,grouped_epi_archive2)
 S3method(select,epi_df)
+S3method(slide,grouped_epi_archive2)
 S3method(summary,epi_df)
+S3method(truncate_versions_after,grouped_epi_archive2)
 S3method(ungroup,epi_df)
 S3method(ungroup,grouped_epi_archive)
+S3method(ungroup,grouped_epi_archive2)
 S3method(unnest,epi_df)
 export("%>%")
 export(archive_cases_dv_subset)
 export(arrange)
 export(as_epi_archive)
+export(as_epi_archive2)
 export(as_epi_df)
+export(as_of)
 export(as_tsibble)
 export(autoplot)
+export(clone)
 export(detect_outlr)
 export(detect_outlr_rm)
 export(detect_outlr_stl)
@@ -47,24 +64,33 @@ export(epi_archive)
 export(epi_cor)
 export(epi_slide)
 export(epix_as_of)
+export(epix_as_of2)
 export(epix_merge)
+export(epix_merge2)
 export(epix_slide)
+export(epix_slide2)
 export(epix_truncate_versions_after)
+export(fill_through_version)
 export(filter)
 export(group_by)
 export(group_modify)
 export(growth_rate)
 export(is_epi_archive)
+export(is_epi_archive2)
 export(is_epi_df)
 export(is_grouped_epi_archive)
+export(is_grouped_epi_archive2)
 export(key_colnames)
 export(max_version_with_row_in)
 export(mutate)
+export(new_epi_archive2)
 export(new_epi_df)
 export(next_after)
 export(relocate)
 export(rename)
 export(slice)
+export(slide)
+export(truncate_versions_after)
 export(ungroup)
 export(unnest)
 importFrom(R6,R6Class)
diff --git a/R/archive.R b/R/archive.R
index ff3bc20c..a530cc05 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -514,9 +514,6 @@ epi_archive <-
               fromLast = TRUE
             ) %>%
             tibble::as_tibble() %>%
-            # (`as_tibble` should de-alias the DT and its columns in any edge
-            # cases where they are aliased. We don't say we guarantee this
-            # though.)
             dplyr::select(-"version") %>%
             as_epi_df(
               geo_type = self$geo_type,
diff --git a/R/archive_new.R b/R/archive_new.R
new file mode 100644
index 00000000..bcfa84c3
--- /dev/null
+++ b/R/archive_new.R
@@ -0,0 +1,1115 @@
+# We use special features of data.table's `[`. The data.table package has a
+# compatibility feature that disables some/all of these features if it thinks we
+# might expect `data.frame`-compatible behavior instead. We can signal that we
+# want the special behavior via `.datatable.aware = TRUE` or by importing any
+# `data.table` package member. Do both to prevent surprises if we decide to use
+# `data.table::` everywhere and not importing things.
+.datatable.aware <- TRUE
+
+#' Validate a version bound arg
+#'
+#' Expected to be used on `clobberable_versions_start`, `versions_end`,
+#' and similar arguments. Some additional context-specific checks may be needed.
+#'
+#' @param version_bound the version bound to validate
+#' @param x a data frame containing a version column with which to check
+#'   compatibility
+#' @param na_ok Boolean; is `NA` an acceptable "bound"? (If so, `NA` will
+#'   have a special context-dependent meaning.)
+#' @param version_bound_arg optional string; what to call the version bound in
+#'   error messages
+#'
+#' @section Side effects: raises an error if version bound appears invalid
+#'
+#' @noRd
+validate_version_bound <- function(version_bound, x, na_ok = FALSE,
+                                   version_bound_arg = rlang::caller_arg(version_bound),
+                                   x_arg = rlang::caller_arg(version_bound)) {
+  if (is.null(version_bound)) {
+    cli_abort(
+      "{version_bound_arg} cannot be NULL"
+    )
+  }
+  if (na_ok && is.na(version_bound)) {
+    return(invisible(NULL))
+  }
+  if (!test_set_equal(class(version_bound), class(x[["version"]]))) {
+    cli_abort(
+      "{version_bound_arg} must have the same classes as x$version,
+        which is {class(x$version)}",
+    )
+  }
+  if (!test_set_equal(typeof(version_bound), typeof(x[["version"]]))) {
+    cli_abort(
+      "{version_bound_arg} must have the same types as x$version,
+        which is {typeof(x$version)}",
+    )
+  }
+
+  return(invisible(NULL))
+}
+
+#' `max(x$version)`, with error if `x` has 0 rows
+#'
+#' Exported to make defaults more easily copyable.
+#'
+#' @param x `x` argument of [`as_epi_archive`]
+#'
+#' @return `max(x$version)` if it has any rows; raises error if it has 0 rows or
+#'   an `NA` version value
+#'
+#' @export
+max_version_with_row_in <- function(x) {
+  if (nrow(x) == 0L) {
+    cli_abort(
+      "`nrow(x)==0L`, representing a data set history with no row up through the
+       latest observed version, but we don't have a sensible guess at what version
+       that is, or whether any of the empty versions might be clobbered in the
+       future; if we use `x` to form an `epi_archive`, then
+       `clobberable_versions_start` and `versions_end` must be manually specified.",
+      class = "epiprocess__max_version_cannot_be_used"
+    )
+  } else {
+    version_col <- purrr::pluck(x, "version") # error not NULL if doesn't exist
+    if (anyNA(version_col)) {
+      cli_abort("version values cannot be NA",
+        class = "epiprocess__version_values_must_not_be_na"
+      )
+    } else {
+      version_bound <- max(version_col)
+    }
+  }
+}
+
+#' Get the next possible value greater than `x` of the same type
+#'
+#' @param x the starting "value"(s)
+#' @return same class, typeof, and length as `x`
+#'
+#' @export
+next_after <- function(x) UseMethod("next_after")
+
+#' @export
+next_after.integer <- function(x) x + 1L
+
+#' @export
+next_after.Date <- function(x) x + 1L
+
+
+
+#' epi archive
+#' @title `epi_archive` object
+#'
+#' @description An `epi_archive` is an R6 class which contains a data table
+#'   along with several relevant pieces of metadata. The data table can be seen
+#'   as the full archive (version history) for some signal variables of
+#'   interest.
+#'
+#' @details An `epi_archive` is an R6 class which contains a data table `DT`, of
+#'   class `data.table` from the `data.table` package, with (at least) the
+#'   following columns:
+#'
+#' * `geo_value`: the geographic value associated with each row of measurements.
+#' * `time_value`: the time value associated with each row of measurements.
+#' * `version`: the time value specifying the version for each row of
+#'   measurements. For example, if in a given row the `version` is January 15,
+#'   2022 and `time_value` is January 14, 2022, then this row contains the
+#'   measurements of the data for January 14, 2022 that were available one day
+#'   later.
+#'
+#' The data table `DT` has key variables `geo_value`, `time_value`, `version`,
+#'   as well as any others (these can be specified when instantiating the
+#'   `epi_archive` object via the `other_keys` argument, and/or set by operating
+#'   on `DT` directly). Refer to the documentation for [as_epi_archive()] for
+#'   information and examples of relevant parameter names for an `epi_archive` object.
+#'   Note that there can only be a single row per unique combination of
+#'   key variables, and thus the key variables are critical for figuring out how
+#'   to generate a snapshot of data from the archive, as of a given version.
+#'
+#' In general, the last version of each observation is carried forward (LOCF) to
+#'   fill in data between recorded versions, and between the last recorded
+#'   update and the `versions_end`. One consequence is that the `DT`
+#'   doesn't have to contain a full snapshot of every version (although this
+#'   generally works), but can instead contain only the rows that are new or
+#'   changed from the previous version (see `compactify`, which does this
+#'   automatically). Currently, deletions must be represented as revising a row
+#'   to a special state (e.g., making the entries `NA` or including a special
+#'   column that flags the data as removed and performing some kind of
+#'   post-processing), and the archive is unaware of what this state is. Note
+#'   that `NA`s *can* be introduced by `epi_archive` methods for other reasons,
+#'   e.g., in [`epix_fill_through_version`] and [`epix_merge`], if requested, to
+#'   represent potential update data that we do not yet have access to; or in
+#'   [`epix_merge`] to represent the "value" of an observation before the
+#'   version in which it was first released, or if no version of that
+#'   observation appears in the archive data at all.
+#'
+#' **A word of caution:** R6 objects, unlike most other objects in R, have
+#'   reference semantics. A primary consequence of this is that objects are not
+#'   copied when modified. You can read more about this in Hadley Wickham's
+#'   [Advanced R](https://adv-r.hadley.nz/r6.html#r6-semantics) book. In order
+#'   to construct a modified archive while keeping the original intact, first
+#'   make a clone using the `$clone` method, then overwrite the clone's `DT`
+#'   field with `data.table::copy(clone$DT)`, and finally perform the
+#'   modifications on the clone.
+#'
+#' @section Metadata:
+#' The following pieces of metadata are included as fields in an `epi_archive`
+#'   object:
+#'
+#' * `geo_type`: the type for the geo values.
+#' * `time_type`: the type for the time values.
+#' * `additional_metadata`: list of additional metadata for the data archive.
+#'
+#' Unlike an `epi_df` object, metadata for an `epi_archive` object `x` can be
+#'   accessed (and altered) directly, as in `x$geo_type` or `x$time_type`,
+#'   etc. Like an `epi_df` object, the `geo_type` and `time_type` fields in the
+#'   metadata of an `epi_archive` object are not currently used by any
+#'   downstream functions in the `epiprocess` package, and serve only as useful
+#'   bits of information to convey about the data set at hand.
+#'
+#' @section Generating Snapshots:
+#' An `epi_archive` object can be used to generate a snapshot of the data in
+#'   `epi_df` format, which represents the most up-to-date values of the signal
+#'   variables, as of the specified version. This is accomplished by calling the
+#'   `as_of()` method for an `epi_archive` object `x`. More details on this
+#'   method are documented in the wrapper function [`epix_as_of()`].
+#'
+#' @section Sliding Computations:
+#' We can run a sliding computation over an `epi_archive` object, much like
+#'   `epi_slide()` does for an `epi_df` object. This is accomplished by calling
+#'   the `slide()` method for an `epi_archive` object, which works similarly to
+#'   the way `epi_slide()` works for an `epi_df` object, but with one key
+#'   difference: it is version-aware. That is, for an `epi_archive` object, the
+#'   sliding computation at any given reference time point t is performed on
+#'   **data that would have been available as of t**. More details on `slide()`
+#'   are documented in the wrapper function [`epix_slide()`].
+#'
+#' @export
+#' @examples
+#' tib <- tibble::tibble(
+#'   geo_value = rep(c("ca", "hi"), each = 5),
+#'   time_value = rep(seq(as.Date("2020-01-01"),
+#'     by = 1, length.out = 5
+#'   ), times = 2),
+#'   version = rep(seq(as.Date("2020-01-02"),
+#'     by = 1, length.out = 5
+#'   ), times = 2),
+#'   value = rnorm(10, mean = 2, sd = 1)
+#' )
+#'
+#' toy_epi_archive <- tib %>% epi_archive$new(
+#'   geo_type = "state",
+#'   time_type = "day"
+#' )
+#' toy_epi_archive
+#' @name epi_archive
+# TODO: Figure out where to actually put this documentation
+NULL
+
+#' New epi archive
+#' @description Creates a new `epi_archive` object.
+#' @param x A data.frame, data.table, or tibble, with columns `geo_value`,
+#'   `time_value`, `version`, and then any additional number of columns.
+#' @param geo_type Type for the geo values. If missing, then the function will
+#'   attempt to infer it from the geo values present; if this fails, then it
+#'   will be set to "custom".
+#' @param time_type Type for the time values. If missing, then the function will
+#'   attempt to infer it from the time values present; if this fails, then it
+#'   will be set to "custom".
+#' @param other_keys Character vector specifying the names of variables in `x`
+#'   that should be considered key variables (in the language of `data.table`)
+#'   apart from "geo_value", "time_value", and "version".
+#' @param additional_metadata List of additional metadata to attach to the
+#'   `epi_archive` object. The metadata will have `geo_type` and `time_type`
+#'   fields; named entries from the passed list or will be included as well.
+#' @param compactify Optional; Boolean or `NULL`: should we remove rows that are
+#'   considered redundant for the purposes of `epi_archive`'s built-in methods
+#'   such as `as_of`? As these methods use the last version of each observation
+#'   carried forward (LOCF) to interpolate between the version data provided,
+#'   rows that don't change these LOCF results can potentially be omitted to
+#'   save space while maintaining the same behavior (with the help of the
+#'   `clobberable_versions_start` and `versions_end` fields in some edge cases).
+#'   `TRUE` will remove these rows, `FALSE` will not, and missing or `NULL` will
+#'   remove these rows and issue a warning. Generally, this can be set to
+#'   `TRUE`, but if you directly inspect or edit the fields of the `epi_archive`
+#'   such as its `DT`, or rely on redundant updates to achieve a certain
+#'   behavior of the `ref_time_values` default in `epix_slide`, you will have to
+#'   determine whether `compactify=TRUE` will produce the desired results. If
+#'   compactification here is removing a large proportion of the rows, this may
+#'   indicate a potential for space, time, or bandwidth savings upstream the
+#'   data pipeline, e.g., by avoiding fetching, storing, or processing these
+#'   rows of `x`.
+#' @param clobberable_versions_start Optional; as in [`as_epi_archive`]
+#' @param versions_end Optional; as in [`as_epi_archive`]
+#' @return An `epi_archive` object.
+#' @importFrom data.table as.data.table key setkeyv
+#'
+#' @details
+#' Refer to the documentation for [as_epi_archive()] for more information
+#' and examples of parameter names.
+#' @export
+new_epi_archive2 <- function(
+    x,
+    geo_type = NULL,
+    time_type = NULL,
+    other_keys = NULL,
+    additional_metadata = NULL,
+    compactify = NULL,
+    clobberable_versions_start = NA,
+    versions_end = NULL) {
+  assert_data_frame(x)
+  if (!test_subset(c("geo_value", "time_value", "version"), names(x))) {
+    cli_abort(
+      "Columns `geo_value`, `time_value`, and `version` must be present in `x`."
+    )
+  }
+  if (anyMissing(x$version)) {
+    cli_abort("Column `version` must not contain missing values.")
+  }
+
+  # If geo type is missing, then try to guess it
+  if (missing(geo_type) || is.null(geo_type)) {
+    geo_type <- guess_geo_type(x$geo_value)
+  }
+
+  # If time type is missing, then try to guess it
+  if (missing(time_type) || is.null(time_type)) {
+    time_type <- guess_time_type(x$time_value)
+  }
+
+  # Finish off with small checks on keys variables and metadata
+  if (missing(other_keys)) other_keys <- NULL
+  if (missing(additional_metadata) || is.null(additional_metadata)) additional_metadata <- list()
+  if (!test_subset(other_keys, names(x))) {
+    cli_abort("`other_keys` must be contained in the column names of `x`.")
+  }
+  if (any(c("geo_value", "time_value", "version") %in% other_keys)) {
+    cli_abort("`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\".")
+  }
+  if (any(names(additional_metadata) %in% c("geo_type", "time_type"))) {
+    cli_warn("`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\".")
+  }
+
+  # Conduct checks and apply defaults for `compactify`
+  if (missing(compactify)) {
+    compactify <- NULL
+  }
+  assert_logical(compactify, len = 1, null.ok = TRUE)
+
+  # Apply defaults and conduct checks for
+  # `clobberable_versions_start`, `versions_end`:
+  if (missing(clobberable_versions_start)) {
+    clobberable_versions_start <- NA
+  }
+  if (missing(versions_end) || is.null(versions_end)) {
+    versions_end <- max_version_with_row_in(x)
+  }
+  validate_version_bound(clobberable_versions_start, x, na_ok = TRUE)
+  validate_version_bound(versions_end, x, na_ok = FALSE)
+  if (nrow(x) > 0L && versions_end < max(x[["version"]])) {
+    cli_abort(
+      sprintf(
+        "`versions_end` was %s, but `x` contained
+                             updates for a later version or versions, up through %s",
+        versions_end, max(x[["version"]])
+      ),
+      class = "epiprocess__versions_end_earlier_than_updates"
+    )
+  }
+  if (!is.na(clobberable_versions_start) && clobberable_versions_start > versions_end) {
+    cli_abort(
+      sprintf(
+        "`versions_end` was %s, but a `clobberable_versions_start`
+                             of %s indicated that there were later observed versions",
+        versions_end, clobberable_versions_start
+      ),
+      class = "epiprocess__versions_end_earlier_than_clobberable_versions_start"
+    )
+  }
+
+  # --- End of validation and replacing missing args with defaults ---
+
+  # Create the data table; if x was an un-keyed data.table itself,
+  # then the call to as.data.table() will fail to set keys, so we
+  # need to check this, then do it manually if needed
+  key_vars <- c("geo_value", "time_value", other_keys, "version")
+  DT <- as.data.table(x, key = key_vars)
+  if (!identical(key_vars, key(DT))) setkeyv(DT, cols = key_vars)
+
+  maybe_first_duplicate_key_row_index <- anyDuplicated(DT, by = key(DT))
+  if (maybe_first_duplicate_key_row_index != 0L) {
+    cli_abort("`x` must have one row per unique combination of the key variables. If you
+            have additional key variables other than `geo_value`, `time_value`, and
+            `version`, such as an age group column, please specify them in `other_keys`.
+            Otherwise, check for duplicate rows and/or conflicting values for the same
+            measurement.",
+      class = "epiprocess__epi_archive_requires_unique_key"
+    )
+  }
+
+  # Checks to see if a value in a vector is LOCF
+  is_locf <- function(vec) {
+    dplyr::if_else(!is.na(vec) & !is.na(dplyr::lag(vec)),
+      vec == dplyr::lag(vec),
+      is.na(vec) & is.na(dplyr::lag(vec))
+    )
+  }
+
+  # LOCF is defined by a row where all values except for the version
+  # differ from their respective lag values
+
+  # Checks for LOCF's in a data frame
+  rm_locf <- function(df) {
+    dplyr::filter(df, if_any(c(everything(), -version), ~ !is_locf(.)))
+  }
+
+  # Keeps LOCF values, such as to be printed
+  keep_locf <- function(df) {
+    dplyr::filter(df, if_all(c(everything(), -version), ~ is_locf(.)))
+  }
+
+  # Runs compactify on data frame
+  if (is.null(compactify) || compactify == TRUE) {
+    elim <- keep_locf(DT)
+    DT <- rm_locf(DT)
+  } else {
+    # Create empty data frame for nrow(elim) to be 0
+    elim <- tibble::tibble()
+  }
+
+  # Warns about redundant rows
+  if (is.null(compactify) && nrow(elim) > 0) {
+    warning_intro <- cli::format_inline(
+      "Found rows that appear redundant based on
+            last (version of each) observation carried forward;
+            these rows have been removed to 'compactify' and save space:",
+      keep_whitespace = FALSE
+    )
+    warning_data <- paste(collapse = "\n", capture.output(print(elim, topn = 3L, nrows = 7L)))
+    warning_outro <- cli::format_inline(
+      "Built-in `epi_archive` functionality should be unaffected,
+            but results may change if you work directly with its fields (such as `DT`).
+            See `?as_epi_archive` for details.
+            To silence this warning but keep compactification,
+            you can pass `compactify=TRUE` when constructing the archive.",
+      keep_whitespace = FALSE
+    )
+    warning_message <- paste(sep = "\n", warning_intro, warning_data, warning_outro)
+    rlang::warn(warning_message, class = "epiprocess__compactify_default_removed_rows")
+  }
+
+  structure(
+    list(
+      DT = DT,
+      geo_type = geo_type,
+      time_type = time_type,
+      additional_metadata = additional_metadata,
+      clobberable_versions_start = clobberable_versions_start,
+      versions_end = versions_end,
+      private = list() # TODO: to be encapsulated with guard-rails later
+    ),
+    class = "epi_archive2"
+  )
+}
+
+#' Print information about an `epi_archive` object
+#' @param class Boolean; whether to print the class label header
+#' @param methods Boolean; whether to print all available methods of
+#'   the archive
+#' @importFrom cli cli_inform
+#' @export
+print.epi_archive2 <- function(epi_archive, class = TRUE, methods = TRUE) {
+  cli_inform(
+    c(
+      ">" = if (class) "An `epi_archive` object, with metadata:",
+      "i" = if (length(setdiff(key(epi_archive$DT), c("geo_value", "time_value", "version"))) > 0) {
+        "Non-standard DT keys: {setdiff(key(epi_archive$DT), c('geo_value', 'time_value', 'version'))}"
+      },
+      "i" = "Min/max time values: {min(epi_archive$DT$time_value)} / {max(epi_archive$DT$time_value)}",
+      "i" = "First/last version with update: {min(epi_archive$DT$version)} / {max(epi_archive$DT$version)}",
+      "i" = if (!is.na(epi_archive$clobberable_versions_start)) {
+        "Clobberable versions start: {epi_archive$clobberable_versions_start}"
+      },
+      "i" = "Versions end: {epi_archive$versions_end}",
+      "i" = if (methods) "Public R6 methods: {names(epi_archive$public_methods)}",
+      "i" = "A preview of the table ({nrow(epi_archive$DT)} rows x {ncol(epi_archive$DT)} columns):"
+    )
+  )
+
+  return(invisible(epi_archive$DT %>% print()))
+}
+
+
+#' @export
+as_of <- function(x, ...) {
+  UseMethod("as_of")
+}
+
+
+#' As of epi_archive
+#' @description Generates a snapshot in `epi_df` format as of a given version.
+#'   See the documentation for the wrapper function [`epix_as_of()`] for
+#'   details. The parameter descriptions below are copied from there
+#' @param epi_archive An `epi_archive` object
+#' @param max_version Version specifying the max version to permit in the
+#'   snapshot. That is, the snapshot will comprise the unique rows of the
+#'   current archive data that represent the most up-to-date signal values, as
+#'   of the specified `max_version` (and whose `time_value`s are at least
+#'   `min_time_value`).
+#' @param min_time_value Time value specifying the min `time_value` to permit in
+#'   the snapshot. Default is `-Inf`, which effectively means that there is no
+#'   minimum considered.
+#' @param all_versions Boolean; If `all_versions = TRUE`, then the output will be in
+#'   `epi_archive` format, and contain rows in the specified `time_value` range
+#'   having `version <= max_version`. The resulting object will cover a
+#'   potentially narrower `version` and `time_value` range than `x`, depending
+#'   on user-provided arguments. Otherwise, there will be one row in the output
+#'   for the `max_version` of each `time_value`. Default is `FALSE`.
+#' @importFrom data.table between key
+#' @export
+as_of.epi_archive2 <- function(epi_archive, max_version, min_time_value = -Inf, all_versions = FALSE) {
+  other_keys <- setdiff(
+    key(epi_archive$DT),
+    c("geo_value", "time_value", "version")
+  )
+  if (length(other_keys) == 0) other_keys <- NULL
+
+  # Check a few things on max_version
+  if (!test_set_equal(class(max_version), class(epi_archive$DT$version))) {
+    cli_abort(
+      "`max_version` must have the same classes as `epi_archive$DT$version`."
+    )
+  }
+  if (!test_set_equal(typeof(max_version), typeof(epi_archive$DT$version))) {
+    cli_abort(
+      "`max_version` must have the same types as `epi_archive$DT$version`."
+    )
+  }
+  assert_scalar(max_version, na.ok = FALSE)
+  if (max_version > epi_archive$versions_end) {
+    cli_abort("`max_version` must be at most `epi_archive$versions_end`.")
+  }
+  assert_logical(all_versions, len = 1)
+  if (!is.na(epi_archive$clobberable_versions_start) && max_version >= epi_archive$clobberable_versions_start) {
+    cli_warn(
+      'Getting data as of some recent version which could still be
+      overwritten (under routine circumstances) without assigning a new
+      version number (a.k.a. "clobbered").  Thus, the snapshot that we
+      produce here should not be expected to be reproducible later. See
+      `?epi_archive` for more info and `?epix_as_of` on how to muffle.',
+      class = "epiprocess__snapshot_as_of_clobberable_version"
+    )
+  }
+
+  # Filter by version and return
+  if (all_versions) {
+    # epi_archive is copied into result, so we can modify result directly
+    result <- epix_truncate_versions_after(epi_archive, max_version)
+    result$DT <- result$DT[time_value >= min_time_value, ]
+    return(result)
+  }
+
+  # Make sure to use data.table ways of filtering and selecting
+  as_of_epi_df <- epi_archive$DT[time_value >= min_time_value & version <= max_version, ] %>%
+    unique(
+      by = c("geo_value", "time_value", other_keys),
+      fromLast = TRUE
+    ) %>%
+    tibble::as_tibble() %>%
+    dplyr::select(-"version") %>%
+    as_epi_df(
+      geo_type = epi_archive$geo_type,
+      time_type = epi_archive$time_type,
+      as_of = max_version,
+      additional_metadata = c(epi_archive$additional_metadata,
+        other_keys = other_keys
+      )
+    )
+
+  return(as_of_epi_df)
+}
+
+
+#' @export
+fill_through_version <- function(x, ...) {
+  UseMethod("fill_through_version")
+}
+
+
+#' Fill through version
+#' @description Fill in unobserved history using requested scheme by mutating
+#'   the given object and potentially reseating its fields. See
+#'   [`epix_fill_through_version`], which doesn't mutate the input archive but
+#'   might alias its fields.
+#'
+#' @param epi_archive an `epi_archive` object
+#' @param fill_versions_end as in [`epix_fill_through_version`]
+#' @param how as in [`epix_fill_through_version`]
+#'
+#' @importFrom data.table key setkeyv := address copy
+#' @importFrom rlang arg_match
+fill_through_version.epi_archive2 <- function(
+    epi_archive,
+    fill_versions_end,
+    how = c("na", "locf")) {
+  validate_version_bound(fill_versions_end, epi_archive$DT, na_ok = FALSE)
+  how <- arg_match(how)
+  if (epi_archive$versions_end < fill_versions_end) {
+    new_DT <- switch(how,
+      "na" = {
+        # old DT + a version consisting of all NA observations
+        # immediately after the last currently/actually-observed
+        # version. Note that this NA-observation version must only be
+        # added if `epi_archive` is outdated.
+        nonversion_key_cols <- setdiff(key(epi_archive$DT), "version")
+        nonkey_cols <- setdiff(names(epi_archive$DT), key(epi_archive$DT))
+        next_version_tag <- next_after(epi_archive$versions_end)
+        if (next_version_tag > fill_versions_end) {
+          cli_abort(sprintf(paste(
+            "Apparent problem with `next_after` method:",
+            "archive contained observations through version %s",
+            "and the next possible version was supposed to be %s,",
+            "but this appeared to jump from a version < %3$s",
+            "to one > %3$s, implying at least one version in between."
+          ), epi_archive$versions_end, next_version_tag, fill_versions_end))
+        }
+        nonversion_key_vals_ever_recorded <- unique(epi_archive$DT, by = nonversion_key_cols)
+        # In edge cases, the `unique` result can alias the original
+        # DT; detect and copy if necessary:
+        if (identical(address(epi_archive$DT), address(nonversion_key_vals_ever_recorded))) {
+          nonversion_key_vals_ever_recorded <- copy(nonversion_key_vals_ever_recorded)
+        }
+        next_version_DT <- nonversion_key_vals_ever_recorded[
+          , version := next_version_tag
+        ][
+          # this makes the class of these columns logical (`NA` is a
+          # logical NA; we're relying on the rbind below to convert to
+          # the proper class&typeof)
+          , (nonkey_cols) := NA
+        ]
+        # full result DT:
+        setkeyv(rbind(epi_archive$DT, next_version_DT), key(epi_archive$DT))[]
+      },
+      "locf" = {
+        # just the old DT; LOCF is built into other methods:
+        epi_archive$DT
+      }
+    )
+    new_versions_end <- fill_versions_end
+    # Update `epi_archive` all at once with simple, error-free operations +
+    # return below:
+    epi_archive$DT <- new_DT
+    epi_archive$versions_end <- new_versions_end
+  } else {
+    # Already sufficiently up to date; nothing to do.
+  }
+  return(invisible(epi_archive))
+}
+
+
+#' @export
+truncate_versions_after <- function(x, ...) {
+  UseMethod("truncate_versions_after")
+}
+
+
+#' Truncate versions after
+#' @description Filter to keep only older versions, mutating the archive by
+#'   potentially reseating but not mutating some fields. `DT` is likely, but not
+#'   guaranteed, to be copied. Returns the mutated archive
+#'   [invisibly][base::invisible].
+#' @param epi_archive as in [`epix_truncate_versions_after`]
+#' @param max_version as in [`epix_truncate_versions_after`]
+truncate_versions_after.epi_archive2 <- function(
+    epi_archive,
+    max_version) {
+  if (!test_set_equal(class(max_version), class(epi_archive$DT$version))) {
+    cli_abort("`max_version` must have the same classes as `epi_archive$DT$version`.")
+  }
+  if (!test_set_equal(typeof(max_version), typeof(epi_archive$DT$version))) {
+    cli_abort("`max_version` must have the same types as `epi_archive$DT$version`.")
+  }
+  assert_scalar(max_version, na.ok = FALSE)
+  if (max_version > epi_archive$versions_end) {
+    cli_abort("`max_version` must be at most `epi_archive$versions_end`.")
+  }
+  epi_archive$DT <- epi_archive$DT[epi_archive$DT$version <= max_version, colnames(epi_archive$DT), with = FALSE]
+  # (^ this filter operation seems to always copy the DT, even if it
+  # keeps every entry; we don't guarantee this behavior in
+  # documentation, though, so we could change to alias in this case)
+  if (!is.na(epi_archive$clobberable_versions_start) && epi_archive$clobberable_versions_start > max_version) {
+    epi_archive$clobberable_versions_start <- NA
+  }
+  epi_archive$versions_end <- max_version
+  return(invisible(epi_archive))
+}
+
+
+#' Merge epi archive
+#' @description Merges another `epi_archive` with the current one, mutating the
+#'   current one by reseating its `DT` and several other fields, but avoiding
+#'   mutation of the old `DT`; returns the current archive
+#'   [invisibly][base::invisible]. See [`epix_merge`] for a full description
+#'   of the non-R6-method version, which does not mutate either archive, and
+#'   does not alias either archive's `DT`.a
+#' @param x as in [`epix_merge`]
+#' @param y as in [`epix_merge`]
+#' @param sync as in [`epix_merge`]
+#' @param compactify as in [`epix_merge`]
+merge_epi_archive2 <- function(
+    x,
+    y,
+    sync = c("forbid", "na", "locf", "truncate"),
+    compactify = TRUE) {
+  result <- epix_merge(x, y,
+    sync = sync,
+    compactify = compactify
+  )
+
+  # TODO: Use encapsulating methods instead.
+  if (length(x$private_fields) != 0L) {
+    cli_abort("expected no private fields in x",
+      internal = TRUE
+    )
+  }
+
+  # Mutate fields all at once, trying to avoid any potential errors:
+  for (field_name in names(x$public_fields)) {
+    x[[field_name]] <- result[[field_name]]
+  }
+
+  return(invisible(x))
+}
+
+
+#' `group_by` and related methods for `epi_archive`, `grouped_epi_archive`
+#'
+#' @param .data An `epi_archive` or `grouped_epi_archive`
+#' @param ... Similar to [`dplyr::group_by`] (see "Details:" for edge cases);
+#' * For `group_by`: unquoted variable name(s) or other
+#'   ["data masking"][dplyr::dplyr_data_masking] expression(s). It's possible to
+#'   use [`dplyr::mutate`]-like syntax here to calculate new columns on which to
+#'   perform grouping, but note that, if you are regrouping an already-grouped
+#'   `.data` object, the calculations will be carried out ignoring such grouping
+#'   (same as [in dplyr][dplyr::group_by]).
+#' * For `ungroup`: either
+#'   * empty, in order to remove the grouping and output an `epi_archive`; or
+#'   * variable name(s) or other ["tidy-select"][dplyr::dplyr_tidy_select]
+#'     expression(s), in order to remove the matching variables from the list of
+#'     grouping variables, and output another `grouped_epi_archive`.
+#' @param .add Boolean. If `FALSE`, the default, the output will be grouped by
+#'   the variable selection from `...` only; if `TRUE`, the output will be
+#'   grouped by the current grouping variables plus the variable selection from
+#'   `...`.
+#' @param .drop As described in [`dplyr::group_by`]; determines treatment of
+#'   factor columns.
+#' @param x For `groups` or `ungroup`: a `grouped_epi_archive`; for
+#'   `is_grouped_epi_archive`: any object
+#' @param .tbl (For `group_by_drop_default`:) an `epi_archive` or
+#'   `grouped_epi_archive` (`epi_archive` dispatches to the S3 default method;
+#'   `grouped_epi_archive` dispatches its own S3 method)
+#'
+#' @details
+#'
+#' To match `dplyr`, `group_by` allows "data masking" (also referred to as
+#' "tidy evaluation") expressions `...`, not just column names, in a way similar
+#' to `mutate`. Note that replacing or removing key columns with these
+#' expressions is disabled.
+#'
+#' `archive %>% group_by()` and other expressions that group or regroup by zero
+#' columns (indicating that all rows should be treated as part of one large
+#' group) will output a `grouped_epi_archive`, in order to enable the use of
+#' `grouped_epi_archive` methods on the result. This is in slight contrast to
+#' the same operations on tibbles and grouped tibbles, which will *not* output a
+#' `grouped_df` in these circumstances.
+#'
+#' Using `group_by` with `.add=FALSE` to override the existing grouping is
+#' disabled; instead, `ungroup` first then `group_by`.
+#'
+#' Mutation and aliasing: `group_by` tries to use a shallow copy of the `DT`,
+#' introducing column-level aliasing between its input and its result. This
+#' doesn't follow the general model for most `data.table` operations, which
+#' seems to be that, given an nonaliased (i.e., unique) pointer to a
+#' `data.table` object, its pointers to its columns should also be nonaliased.
+#' If you mutate any of the columns of either the input or result, first ensure
+#' that it is fine if columns of the other are also mutated, but do not rely on
+#' such behavior to occur. Additionally, never perform mutation on the key
+#' columns at all (except for strictly increasing transformations), as this will
+#' invalidate sortedness assumptions about the rows.
+#'
+#' `group_by_drop_default` on (ungrouped) `epi_archive`s is expected to dispatch
+#' to `group_by_drop_default.default` (but there is a dedicated method for
+#' `grouped_epi_archive`s).
+#'
+#' @examples
+#'
+#' grouped_archive <- archive_cases_dv_subset %>% group_by(geo_value)
+#'
+#' # `print` for metadata and method listing:
+#' grouped_archive %>% print()
+#'
+#' # The primary use for grouping is to perform a grouped `epix_slide`:
+#'
+#' archive_cases_dv_subset %>%
+#'   group_by(geo_value) %>%
+#'   epix_slide(
+#'     f = ~ mean(.x$case_rate_7d_av),
+#'     before = 2,
+#'     ref_time_values = as.Date("2020-06-11") + 0:2,
+#'     new_col_name = "case_rate_3d_av"
+#'   ) %>%
+#'   ungroup()
+#'
+#' # -----------------------------------------------------------------
+#'
+#' # Advanced: some other features of dplyr grouping are implemented:
+#'
+#' library(dplyr)
+#' toy_archive <-
+#'   tribble(
+#'     ~geo_value, ~age_group, ~time_value, ~version, ~value,
+#'     "us", "adult", "2000-01-01", "2000-01-02", 121,
+#'     "us", "pediatric", "2000-01-02", "2000-01-03", 5, # (addition)
+#'     "us", "adult", "2000-01-01", "2000-01-03", 125, # (revision)
+#'     "us", "adult", "2000-01-02", "2000-01-03", 130 # (addition)
+#'   ) %>%
+#'   mutate(
+#'     age_group = ordered(age_group, c("pediatric", "adult")),
+#'     time_value = as.Date(time_value),
+#'     version = as.Date(version)
+#'   ) %>%
+#'   as_epi_archive(other_keys = "age_group")
+#'
+#' # The following are equivalent:
+#' toy_archive %>% group_by(geo_value, age_group)
+#' toy_archive %>%
+#'   group_by(geo_value) %>%
+#'   group_by(age_group, .add = TRUE)
+#' grouping_cols <- c("geo_value", "age_group")
+#' toy_archive %>% group_by(across(all_of(grouping_cols)))
+#'
+#' # And these are equivalent:
+#' toy_archive %>% group_by(geo_value)
+#' toy_archive %>%
+#'   group_by(geo_value, age_group) %>%
+#'   ungroup(age_group)
+#'
+#' # To get the grouping variable names as a `list` of `name`s (a.k.a. symbols):
+#' toy_archive %>%
+#'   group_by(geo_value) %>%
+#'   groups()
+#'
+#' toy_archive %>%
+#'   group_by(geo_value, age_group, .drop = FALSE) %>%
+#'   epix_slide(f = ~ sum(.x$value), before = 20) %>%
+#'   ungroup()
+#'
+#' @importFrom dplyr group_by
+#' @export
+#'
+#' @aliases grouped_epi_archive
+group_by.epi_archive2 <- function(epi_archive, ..., .add = FALSE, .drop = dplyr::group_by_drop_default(epi_archive)) {
+  # `add` makes no difference; this is an ungrouped `epi_archive`.
+  detailed_mutate <- epix_detailed_restricted_mutate2(epi_archive, ...)
+  assert_logical(.drop)
+  if (!.drop) {
+    grouping_cols <- as.list(detailed_mutate[["archive"]][["DT"]])[detailed_mutate[["request_names"]]]
+    grouping_col_is_factor <- purrr::map_lgl(grouping_cols, is.factor)
+    # ^ Use `as.list` to try to avoid any possibility of a deep copy.
+    if (!any(grouping_col_is_factor)) {
+      cli_warn(
+        "`.drop=FALSE` but there are no factor grouping columns;
+        did you mean to convert one of the columns to a factor beforehand?",
+        class = "epiprocess__group_by_epi_archive__drop_FALSE_no_factors"
+      )
+    } else if (any(diff(grouping_col_is_factor) == -1L)) {
+      cli_warn(
+        "`.drop=FALSE` but there are one or more non-factor grouping columns listed
+        after a factor grouping column; this may produce groups with `NA`s for these
+        columns; see https://github.com/tidyverse/dplyr/issues/5369#issuecomment-683762553;
+        depending on how you want completion to work, you might instead want to convert all
+        grouping columns to factors beforehand, specify the non-factor grouping columns first,
+        or use `.drop=TRUE` and add a call to `tidyr::complete`.",
+        class = "epiprocess__group_by_epi_archive__drop_FALSE_nonfactor_after_factor"
+      )
+    }
+  }
+  new_grouped_epi_archive(detailed_mutate[["archive"]],
+    detailed_mutate[["request_names"]],
+    drop = .drop
+  )
+}
+
+
+#' @export
+slide <- function(.data, ...) {
+  UseMethod("slide")
+}
+
+
+#' Slide over epi archive
+#' @description Slides a given function over variables in an `epi_archive`
+#'   object. See the documentation for the wrapper function [`epix_slide()`] for
+#'   details. The parameter descriptions below are copied from there
+#' @importFrom data.table key
+#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
+#' @param f Function, formula, or missing; together with `...` specifies the
+#'   computation to slide. To "slide" means to apply a computation over a
+#'   sliding (a.k.a. "rolling") time window for each data group. The window is
+#'   determined by the `before` parameter described below. One time step is
+#'   typically one day or one week; see [`epi_slide`] details for more
+#'   explanation. If a function, `f` must take an `epi_df` with the same
+#'   column names as the archive's `DT`, minus the `version` column; followed
+#'   by a one-row tibble containing the values of the grouping variables for
+#'   the associated group; followed by a reference time value, usually as a
+#'   `Date` object; followed by any number of named arguments. If a formula,
+#'   `f` can operate directly on columns accessed via `.x$var` or `.$var`, as
+#'   in `~ mean (.x$var)` to compute a mean of a column `var` for each
+#'   group-`ref_time_value` combination. The group key can be accessed via
+#'   `.y` or `.group_key`, and the reference time value can be accessed via
+#'   `.z` or `.ref_time_value`. If `f` is missing, then `...` will specify the
+#'   computation.
+#' @param ... Additional arguments to pass to the function or formula specified
+#'   via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an
+#'   expression for tidy evaluation; in addition to referring to columns
+#'   directly by name, the expression has access to `.data` and `.env` pronouns
+#'   as in `dplyr` verbs, and can also refer to the `.group_key` and
+#'   `.ref_time_value`. See details of [`epi_slide`].
+#' @param before How far `before` each `ref_time_value` should the sliding
+#'   window extend? If provided, should be a single, non-NA,
+#'   [integer-compatible][vctrs::vec_cast] number of time steps. This window
+#'   endpoint is inclusive. For example, if `before = 7`, and one time step is
+#'   one day, then to produce a value for a `ref_time_value` of January 8, we
+#'   apply the given function or formula to data (for each group present) with
+#'   `time_value`s from January 1 onward, as they were reported on January 8.
+#'   For typical disease surveillance sources, this will not include any data
+#'   with a `time_value` of January 8, and, depending on the amount of reporting
+#'   latency, may not include January 7 or even earlier `time_value`s. (If
+#'   instead the archive were to hold nowcasts instead of regular surveillance
+#'   data, then we would indeed expect data for `time_value` January 8. If it
+#'   were to hold forecasts, then we would expect data for `time_value`s after
+#'   January 8, and the sliding window would extend as far after each
+#'   `ref_time_value` as needed to include all such `time_value`s.)
+#' @param ref_time_values Reference time values / versions for sliding
+#'   computations; each element of this vector serves both as the anchor point
+#'   for the `time_value` window for the computation and the `max_version`
+#'   `as_of` which we fetch data in this window. If missing, then this will set
+#'   to a regularly-spaced sequence of values set to cover the range of
+#'   `version`s in the `DT` plus the `versions_end`; the spacing of values will
+#'   be guessed (using the GCD of the skips between values).
+#' @param time_step Optional function used to define the meaning of one time
+#'   step, which if specified, overrides the default choice based on the
+#'   `time_value` column. This function must take a positive integer and return
+#'   an object of class `lubridate::period`. For example, we can use `time_step
+#'   = lubridate::hours` in order to set the time step to be one hour (this
+#'   would only be meaningful if `time_value` is of class `POSIXct`).
+#' @param new_col_name String indicating the name of the new column that will
+#'   contain the derivative values. Default is "slide_value"; note that setting
+#'   `new_col_name` equal to an existing column name will overwrite this column.
+#' @param as_list_col Should the slide results be held in a list column, or be
+#'   [unchopped][tidyr::unchop]/[unnested][tidyr::unnest]? Default is `FALSE`,
+#'   in which case a list object returned by `f` would be unnested (using
+#'   [`tidyr::unnest()`]), and, if the slide computations output data frames,
+#'   the names of the resulting columns are given by prepending `new_col_name`
+#'   to the names of the list elements.
+#' @param names_sep String specifying the separator to use in `tidyr::unnest()`
+#'   when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
+#'   from `new_col_name` entirely.
+#' @param all_versions (Not the same as `all_rows` parameter of `epi_slide`.) If
+#'   `all_versions = TRUE`, then `f` will be passed the version history (all
+#'   `version <= ref_time_value`) for rows having `time_value` between
+#'   `ref_time_value - before` and `ref_time_value`. Otherwise, `f` will be
+#'   passed only the most recent `version` for every unique `time_value`.
+#'   Default is `FALSE`.
+slide.epi_archive2 <- function(epi_archive, f, ..., before, ref_time_values,
+                               time_step, new_col_name = "slide_value",
+                               as_list_col = FALSE, names_sep = "_",
+                               all_versions = FALSE) {
+  # For an "ungrouped" slide, treat all rows as belonging to one big
+  # group (group by 0 vars), like `dplyr::summarize`, and let the
+  # resulting `grouped_epi_archive` handle the slide:
+  slide(
+    group_by(epi_archive),
+    f,
+    ...,
+    before = before, ref_time_values = ref_time_values,
+    time_step = time_step, new_col_name = new_col_name,
+    as_list_col = as_list_col, names_sep = names_sep,
+    all_versions = all_versions
+  ) %>%
+    # We want a slide on ungrouped archives to output something
+    # ungrouped, rather than retaining the trivial (0-variable)
+    # grouping applied above. So we `ungroup()`. However, the current
+    # `dplyr` implementation automatically ignores/drops trivial
+    # groupings, so this is just a no-op for now.
+    ungroup()
+}
+
+
+#' Convert to `epi_archive` format
+#'
+#' Converts a data frame, data table, or tibble into an `epi_archive`
+#' object. See the [archive
+#' vignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html) for
+#' examples. The parameter descriptions below are copied from there
+#'
+#' @param x A data frame, data table, or tibble, with columns `geo_value`,
+#'   `time_value`, `version`, and then any additional number of columns.
+#' @param geo_type Type for the geo values. If missing, then the function will
+#'   attempt to infer it from the geo values present; if this fails, then it
+#'   will be set to "custom".
+#' @param time_type Type for the time values. If missing, then the function will
+#'   attempt to infer it from the time values present; if this fails, then it
+#'   will be set to "custom".
+#' @param other_keys Character vector specifying the names of variables in `x`
+#'   that should be considered key variables (in the language of `data.table`)
+#'   apart from "geo_value", "time_value", and "version".
+#' @param additional_metadata List of additional metadata to attach to the
+#'   `epi_archive` object. The metadata will have `geo_type` and `time_type`
+#'   fields; named entries from the passed list or will be included as well.
+#' @param compactify Optional; Boolean or `NULL`: should we remove rows that are
+#'   considered redundant for the purposes of `epi_archive`'s built-in methods
+#'   such as `as_of`? As these methods use the last version of each observation
+#'   carried forward (LOCF) to interpolate between the version data provided,
+#'   rows that don't change these LOCF results can potentially be omitted to
+#'   save space. `TRUE` will remove these rows, `FALSE` will not, and missing or
+#'   `NULL` will remove these rows and issue a warning. Generally, this can be
+#'   set to `TRUE`, but if you directly inspect or edit the fields of the
+#'   `epi_archive` such as its `DT`, you will have to determine whether
+#'   `compactify=TRUE` will produce the desired results. If compactification
+#'   here is removing a large proportion of the rows, this may indicate a
+#'   potential for space, time, or bandwidth savings upstream the data pipeline,
+#'   e.g., when fetching, storing, or preparing the input data `x`
+#' @param clobberable_versions_start Optional; `length`-1; either a value of the
+#'   same `class` and `typeof` as `x$version`, or an `NA` of any `class` and
+#'   `typeof`: specifically, either (a) the earliest version that could be
+#'   subject to "clobbering" (being overwritten with different update data, but
+#'   using the *same* version tag as the old update data), or (b) `NA`, to
+#'   indicate that no versions are clobberable. There are a variety of reasons
+#'   why versions could be clobberable under routine circumstances, such as (a)
+#'   today's version of one/all of the columns being published after initially
+#'   being filled with `NA` or LOCF, (b) a buggy version of today's data being
+#'   published but then fixed and republished later in the day, or (c) data
+#'   pipeline delays (e.g., publisher uploading, periodic scraping, database
+#'   syncing, periodic fetching, etc.) that make events (a) or (b) reflected
+#'   later in the day (or even on a different day) than expected; potential
+#'   causes vary between different data pipelines. The default value is `NA`,
+#'   which doesn't consider any versions to be clobberable. Another setting that
+#'   may be appropriate for some pipelines is `max_version_with_row_in(x)`.
+#' @param versions_end Optional; length-1, same `class` and `typeof` as
+#'   `x$version`: what is the last version we have observed? The default is
+#'   `max_version_with_row_in(x)`, but values greater than this could also be
+#'   valid, and would indicate that we observed additional versions of the data
+#'   beyond `max(x$version)`, but they all contained empty updates. (The default
+#'   value of `clobberable_versions_start` does not fully trust these empty
+#'   updates, and assumes that any version `>= max(x$version)` could be
+#'   clobbered.) If `nrow(x) == 0`, then this argument is mandatory.
+#' @return An `epi_archive` object.
+#'
+#' @details This simply a wrapper around the `new()` method of the `epi_archive`
+#'   class, so for example:
+#'   ```
+#'   x <- as_epi_archive(df, geo_type = "state", time_type = "day")
+#'   ```
+#'   would be equivalent to:
+#'   ```
+#'   x <- epi_archive$new(df, geo_type = "state", time_type = "day")
+#'   ```
+#'
+#' @export
+#' @examples
+#' # Simple ex. with necessary keys
+#' tib <- tibble::tibble(
+#'   geo_value = rep(c("ca", "hi"), each = 5),
+#'   time_value = rep(seq(as.Date("2020-01-01"),
+#'     by = 1, length.out = 5
+#'   ), times = 2),
+#'   version = rep(seq(as.Date("2020-01-02"),
+#'     by = 1, length.out = 5
+#'   ), times = 2),
+#'   value = rnorm(10, mean = 2, sd = 1)
+#' )
+#'
+#' toy_epi_archive <- tib %>% as_epi_archive(
+#'   geo_type = "state",
+#'   time_type = "day"
+#' )
+#' toy_epi_archive
+#'
+#' # Ex. with an additional key for county
+#' df <- data.frame(
+#'   geo_value = c(replicate(2, "ca"), replicate(2, "fl")),
+#'   county = c(1, 3, 2, 5),
+#'   time_value = c(
+#'     "2020-06-01",
+#'     "2020-06-02",
+#'     "2020-06-01",
+#'     "2020-06-02"
+#'   ),
+#'   version = c(
+#'     "2020-06-02",
+#'     "2020-06-03",
+#'     "2020-06-02",
+#'     "2020-06-03"
+#'   ),
+#'   cases = c(1, 2, 3, 4),
+#'   cases_rate = c(0.01, 0.02, 0.01, 0.05)
+#' )
+#'
+#' x <- df %>% as_epi_archive(
+#'   geo_type = "state",
+#'   time_type = "day",
+#'   other_keys = "county"
+#' )
+as_epi_archive2 <- function(x, geo_type, time_type, other_keys,
+                            additional_metadata = list(),
+                            compactify = NULL,
+                            clobberable_versions_start = NA,
+                            versions_end = max_version_with_row_in(x)) {
+  new_epi_archive2(
+    x, geo_type, time_type, other_keys, additional_metadata,
+    compactify, clobberable_versions_start, versions_end
+  )
+}
+
+#' Test for `epi_archive` format
+#'
+#' @param x An object.
+#' @param grouped_okay Optional; Boolean; should a `grouped_epi_archive` also
+#'   count? Default is `FALSE`.
+#' @return `TRUE` if the object inherits from `epi_archive`.
+#'
+#' @export
+#' @examples
+#' is_epi_archive(jhu_csse_daily_subset) # FALSE (this is an epi_df, not epi_archive)
+#' is_epi_archive(archive_cases_dv_subset) # TRUE
+#'
+#' # By default, grouped_epi_archives don't count as epi_archives, as they may
+#' # support a different set of operations from regular `epi_archives`. This
+#' # behavior can be controlled by `grouped_okay`.
+#' grouped_archive <- archive_cases_dv_subset$group_by(geo_value)
+#' is_epi_archive(grouped_archive) # FALSE
+#' is_epi_archive(grouped_archive, grouped_okay = TRUE) # TRUE
+#'
+#' @seealso [`is_grouped_epi_archive`]
+is_epi_archive2 <- function(x, grouped_okay = FALSE) {
+  inherits(x, "epi_archive2") || grouped_okay && inherits(x, "grouped_epi_archive2")
+}
+
+
+#' @export
+clone <- function(x, ...) {
+  UseMethod("clone")
+}
+
+
+#' @export
+clone.epi_archive2 <- function(epi_archive, deep = FALSE) {
+  # TODO: Finish.
+  if (deep) {
+    epi_archive$DT <- copy(epi_archive$DT)
+  } else {
+    epi_archive$DT <- copy(epi_archive$DT)
+  }
+  return(epi_archive)
+}
diff --git a/R/grouped_archive_new.R b/R/grouped_archive_new.R
new file mode 100644
index 00000000..c0e6c35e
--- /dev/null
+++ b/R/grouped_archive_new.R
@@ -0,0 +1,456 @@
+#'
+#' Convenience function for performing a `tidy_select` on dots according to its
+#' docs, and taking the names (rather than the integer indices).
+#'
+#' @param ... tidyselect-syntax selection description
+#' @param .data named vector / data frame; context for the description / the
+#'   object to which the selections apply
+#' @return character vector containing names of entries/columns of
+#'   `names(.data)` denoting the selection
+#'
+#' @noRd
+eval_pure_select_names_from_dots <- function(..., .data) {
+  # `?tidyselect::eval_select` tells us to use this form when we take in dots.
+  # It seems a bit peculiar, since the expr doesn't pack with it a way to get at
+  # the environment for the dots, but it looks like `eval_select` will assume
+  # the caller env (our `environment()`) when given an expr, and thus have
+  # access to the dots.
+  #
+  # If we were allowing renaming, we'd need to be careful about which names (new
+  # vs. old vs. both) to return here.
+  names(tidyselect::eval_select(rlang::expr(c(...)), .data, allow_rename = FALSE))
+}
+
+#' Get names of dots without forcing the dots
+#'
+#' For use in functions that use nonstandard evaluation (NSE) on the dots; we
+#' can't use the pattern `names(list(...))` in this case because it will attempt
+#' to force/(standard-)evaluate the dots, and we want to avoid attempted forcing of the
+#' dots if we're using NSE.
+#'
+#' @noRd
+nse_dots_names <- function(...) {
+  names(rlang::call_match())
+}
+nse_dots_names2 <- function(...) {
+  rlang::names2(rlang::call_match())
+}
+
+#' @importFrom dplyr group_by_drop_default
+#' @noRd
+new_grouped_epi_archive <- function(ungrouped, vars, drop) {
+  if (inherits(ungrouped, "grouped_epi_archive")) {
+    cli_abort(
+      "`ungrouped` must not already be grouped (neither automatic regrouping
+       nor nested grouping is supported).  Either use `group_by` with `.add=TRUE`,
+        or `ungroup` first.",
+      class = "epiprocess__grouped_epi_archive__ungrouped_arg_is_already_grouped",
+      epiprocess__ungrouped_class = class(ungrouped),
+      epiprocess__ungrouped_groups = groups(ungrouped)
+    )
+  }
+  assert_class(ungrouped, "epi_archive2")
+  assert_character(vars)
+  if (!test_subset(vars, names(ungrouped$DT))) {
+    cli_abort(
+      "All grouping variables `vars` must be present in the data.",
+    )
+  }
+  if ("version" %in% vars) {
+    cli_abort("`version` has a special interpretation and cannot be used by itself as a grouping variable")
+  }
+  assert_logical(drop, len = 1)
+
+  # -----
+  private <- list()
+  private$ungrouped <- ungrouped
+  private$vars <- vars
+  private$drop <- drop
+
+  return(structure(
+    list(
+      private = private
+    ),
+    class = c("grouped_epi_archive2", "epi_archive2")
+  ))
+}
+
+#' @export
+print.grouped_epi_archive2 <- function(grouped_epi_archive, class = TRUE) {
+  if (class) cat("A `grouped_epi_archive` object:\n")
+  writeLines(wrap_varnames(grouped_epi_archive$private$vars, initial = "* Groups: "))
+  # If none of the grouping vars is a factor, then $drop doesn't seem
+  # relevant, so try to be less verbose and don't message about it.
+  #
+  # Below map-then-extract may look weird, but the more natural
+  # extract-then-map appears to trigger copies of the extracted columns
+  # since we are working with a `data.table` (unless we go through
+  # `as.list`, but its current column-aliasing behavior is probably not
+  # something to rely too much on), while map functions currently appear
+  # to avoid column copies.
+  if (any(purrr::map_lgl(grouped_epi_archive$private$ungrouped$DT, is.factor)[grouped_epi_archive$private$vars])) {
+    cat(strwrap(init = "* ", prefix = "  ", sprintf(
+      "%s groups formed by factor levels that don't appear in the data",
+      if (grouped_epi_archive$private$drop) "Drops" else "Does not drop"
+    )))
+    cat("\n")
+  }
+  cat("It wraps an ungrouped `epi_archive`, with metadata:\n")
+  print(grouped_epi_archive$private$ungrouped, class = FALSE)
+  # Return self invisibly for convenience in `$`-"pipe":
+  invisible(grouped_epi_archive)
+}
+
+#' @include methods-epi_archive.R
+#' @rdname group_by.epi_archive
+#'
+#' @importFrom dplyr group_by
+#' @export
+group_by.grouped_epi_archive2 <- function(
+    grouped_epi_archive,
+    ...,
+    .add = FALSE,
+    .drop = dplyr::group_by_drop_default(grouped_epi_archive)) {
+  assert_logical(.add, len = 1)
+  if (!.add) {
+    cli_abort('`group_by` on a `grouped_epi_archive` with `.add=FALSE` is forbidden
+              (neither automatic regrouping nor nested grouping is supported).
+              If you want to "regroup", replacing the existing grouping vars, `ungroup` first and then `group_by`.
+              If you want to add to the existing grouping vars, call `group_by` specifying `.add=TRUE`.
+            ',
+      class = "epiprocess__grouped_epi_archive_group_by_with_add_FALSE"
+    )
+  } else {
+    # `group_by` `...` computations are performed on ungrouped data (see
+    # `?dplyr::group_by`)
+    detailed_mutate <- epix_detailed_restricted_mutate2(grouped_epi_archive$private$ungrouped, ...)
+    out_ungrouped <- detailed_mutate[["archive"]]
+    vars_from_dots <- detailed_mutate[["request_names"]]
+    vars <- union(grouped_epi_archive$private$vars, vars_from_dots)
+    new_grouped_epi_archive(grouped_epi_archive$private$ungrouped, vars, .drop)
+  }
+}
+
+#' @include methods-epi_archive.R
+#' @rdname group_by.epi_archive
+#'
+#' @export
+group_by_drop_default.grouped_epi_archive2 <- function(grouped_epi_archive) {
+  grouped_epi_archive$private$drop
+}
+
+#' @include methods-epi_archive.R
+#' @rdname group_by.epi_archive
+#'
+#' @importFrom dplyr groups
+#' @export
+groups.grouped_epi_archive2 <- function(grouped_epi_archive) {
+  rlang::syms(grouped_epi_archive$private$vars)
+}
+
+#' @include methods-epi_archive.R
+#' @rdname group_by.epi_archive
+#'
+#' @importFrom dplyr ungroup
+#' @export
+ungroup.grouped_epi_archive2 <- function(grouped_epi_archive, ...) {
+  if (rlang::dots_n(...) == 0L) {
+    # No dots = special behavior: remove all grouping vars and convert to
+    # an ungrouped class, as with `grouped_df`s.
+    grouped_epi_archive$private$ungrouped
+  } else {
+    exclude_vars <- eval_pure_select_names_from_dots(..., .data = grouped_epi_archive$private$ungrouped$DT)
+    # (requiring a pure selection here is a little stricter than dplyr
+    # implementations, but passing a renaming selection into `ungroup`
+    # seems pretty weird.)
+    result_vars <- grouped_epi_archive$private$vars[!grouped_epi_archive$private$vars %in% exclude_vars]
+    # `vars` might be length 0 if the user's tidyselection removed all
+    # grouping vars. Unlike with tibble, opt here to keep the result as a
+    # grouped_epi_archive, for output class consistency when `...` is
+    # provided.
+    new_grouped_epi_archive(grouped_epi_archive$private$ungrouped, result_vars, grouped_epi_archive$private$drop)
+  }
+}
+
+#' Truncate versions after a given version, grouped
+#' @description Filter to keep only older versions by mutating the underlying
+#'   `epi_archive` using `$truncate_versions_after`. Returns the mutated
+#'   `grouped_epi_archive` [invisibly][base::invisible].
+#' @param x as in [`epix_truncate_versions_after`]
+#' @param max_version as in [`epix_truncate_versions_after`]
+#' @export
+truncate_versions_after.grouped_epi_archive2 <- function(grouped_epi_archive, max_version) {
+  # The grouping is irrelevant for this method; if we were to split into
+  # groups and recombine appropriately, we should get the same result as
+  # just leveraging the ungrouped method, so just do the latter:
+  truncate_versions_after(grouped_epi_archive$private$ungrouped, max_version)
+  return(invisible(grouped_epi_archive))
+}
+
+#' Truncate versions after a given version, grouped
+#' @export
+epix_truncate_versions_after.grouped_epi_archive2 <- function(grouped_epi_archive, max_version) {
+  cloned_group_epi_archive <- clone(grouped_epi_archive, deep = TRUE)
+  return((truncate_versions_after(cloned_group_epi_archive, max_version)))
+  # ^ second set of parens drops invisibility
+}
+
+
+#' Slide over grouped epi archive
+#' @description Slides a given function over variables in a `grouped_epi_archive`
+#'   object. See the documentation for the wrapper function [`epix_slide()`] for
+#'   details.
+#' @importFrom data.table key address rbindlist setDF
+#' @importFrom tibble as_tibble new_tibble validate_tibble
+#' @importFrom dplyr group_by groups
+#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
+#'  env missing_arg
+#' @export
+slide.grouped_epi_archive2 <- function(grouped_epi_archive, f, ..., before, ref_time_values,
+                                       time_step, new_col_name = "slide_value",
+                                       as_list_col = FALSE, names_sep = "_",
+                                       all_versions = FALSE) {
+  # Perform some deprecated argument checks without using `<param> =
+  # deprecated()` in the function signature, because they are from
+  # early development versions and much more likely to be clutter than
+  # informative in the signature.
+  if ("group_by" %in% nse_dots_names(...)) {
+    cli_abort("
+          The `group_by` argument to `slide` has been removed; please use
+          the `group_by` S3 generic function or `$group_by` R6 method
+          before the slide instead. (If you were instead trying to pass a
+          `group_by` argument to `f` or create a column named `group_by`,
+          this check is a false positive, but you will still need to use a
+          different column name here and rename the resulting column after
+          the slide.)
+        ", class = "epiprocess__epix_slide_group_by_parameter_deprecated")
+  }
+  if ("all_rows" %in% nse_dots_names(...)) {
+    cli_abort("
+          The `all_rows` argument has been removed from `epix_slide` (but
+          is still supported in `epi_slide`). Add rows for excluded
+          results with a manual join instead.
+        ", class = "epiprocess__epix_slide_all_rows_parameter_deprecated")
+  }
+
+  if (missing(ref_time_values)) {
+    ref_time_values <- epix_slide_ref_time_values_default(grouped_epi_archive$private$ungrouped)
+  } else {
+    assert_numeric(ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
+    if (any(ref_time_values > grouped_epi_archive$private$ungrouped$versions_end)) {
+      cli_abort("Some `ref_time_values` are greater than the latest version in the archive.")
+    }
+    if (anyDuplicated(ref_time_values) != 0L) {
+      cli_abort("Some `ref_time_values` are duplicated.")
+    }
+    # Sort, for consistency with `epi_slide`, although the current
+    # implementation doesn't take advantage of it.
+    ref_time_values <- sort(ref_time_values)
+  }
+
+  # Validate and pre-process `before`:
+  if (missing(before)) {
+    cli_abort("`before` is required (and must be passed by name);
+                if you did not want to apply a sliding window but rather
+                to map `as_of` and `f` across various `ref_time_values`,
+                pass a large `before` value (e.g., if time steps are days,
+                `before=365000`).")
+  }
+  before <- vctrs::vec_cast(before, integer())
+  assert_int(before, lower = 0L, null.ok = FALSE, na.ok = FALSE)
+
+  # If a custom time step is specified, then redefine units
+
+  if (!missing(time_step)) before <- time_step(before)
+
+  # Symbolize column name
+  new_col <- sym(new_col_name)
+
+  # Validate rest of parameters:
+  assert_logical(as_list_col, len = 1L)
+  assert_logical(all_versions, len = 1L)
+  assert_character(names_sep, len = 1L, null.ok = TRUE)
+
+  # Computation for one group, one time value
+  comp_one_grp <- function(.data_group, .group_key,
+                           f, ...,
+                           ref_time_value,
+                           new_col) {
+    # Carry out the specified computation
+    comp_value <- f(.data_group, .group_key, ref_time_value, ...)
+
+    if (all_versions) {
+      # Extract data from archive so we can do length checks below. When
+      # `all_versions = TRUE`, `.data_group` will always be an ungrouped
+      # archive because of the preceding `as_of` step.
+      .data_group <- .data_group$DT
+    }
+
+    assert(
+      check_atomic(comp_value, any.missing = TRUE),
+      check_data_frame(comp_value),
+      combine = "or",
+      .var.name = vname(comp_value)
+    )
+
+    # Label every result row with the `ref_time_value`
+    res <- list(time_value = ref_time_value)
+
+    # Wrap the computation output in a list and unchop/unnest later if
+    # `as_list_col = FALSE`. This approach means that we will get a
+    # list-class col rather than a data.frame-class col when
+    # `as_list_col = TRUE` and the computations outputs are data
+    # frames.
+    res[[new_col]] <- list(comp_value)
+
+    # Convert the list to a tibble all at once for speed.
+    return(validate_tibble(new_tibble(res)))
+  }
+
+  # If `f` is missing, interpret ... as an expression for tidy evaluation
+  if (missing(f)) {
+    quos <- enquos(...)
+    if (length(quos) == 0) {
+      cli_abort("If `f` is missing then a computation must be specified via `...`.")
+    }
+    if (length(quos) > 1) {
+      cli_abort("If `f` is missing then only a single computation can be specified via `...`.")
+    }
+
+    f <- quos[[1]]
+    new_col <- sym(names(rlang::quos_auto_name(quos)))
+    ... <- missing_arg() # magic value that passes zero args as dots in calls below
+  }
+
+  f <- as_slide_computation(f, ...)
+  x <- lapply(ref_time_values, function(ref_time_value) {
+    # Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
+    # `epi_archive` if `all_versions` is `TRUE`:
+    as_of_raw <- as_of(grouped_epi_archive$private$ungrouped,
+      ref_time_value,
+      min_time_value = ref_time_value - before,
+      all_versions = all_versions
+    )
+
+    # Set:
+    # * `as_of_df`, the data.frame/tibble/epi_df/etc. that we will
+    #    `group_modify` as the `.data` argument. Might or might not
+    #    include version column.
+    # * `group_modify_fn`, the corresponding `.f` argument
+    if (!all_versions) {
+      as_of_df <- as_of_raw
+      group_modify_fn <- comp_one_grp
+    } else {
+      as_of_archive <- as_of_raw
+      # We essentially want to `group_modify` the archive, but
+      # haven't implemented this method yet. Next best would be
+      # `group_modify` on its `$DT`, but that has different
+      # behavior based on whether or not `dtplyr` is loaded.
+      # Instead, go through an ordinary data frame, trying to avoid
+      # copies.
+      if (address(as_of_archive$DT) == address(grouped_epi_archive$private$ungrouped$DT)) {
+        # `as_of` aliased its the full `$DT`; copy before mutating:
+        #
+        # Note: this step is probably unneeded; we're fine with
+        # aliasing of the DT or its columns: vanilla operations aren't
+        # going to mutate them in-place if they are aliases, and we're
+        # not performing mutation (unlike the situation with
+        # `fill_through_version` where we do mutate a `DT` and don't
+        # want aliasing).
+        as_of_archive$DT <- copy(as_of_archive$DT)
+      }
+      dt_key <- data.table::key(as_of_archive$DT)
+      as_of_df <- as_of_archive$DT
+      data.table::setDF(as_of_df)
+
+      # Convert each subgroup chunk to an archive before running the calculation.
+      group_modify_fn <- function(.data_group, .group_key,
+                                  f, ...,
+                                  ref_time_value,
+                                  new_col) {
+        # .data_group is coming from as_of_df as a tibble, but we
+        # want to feed `comp_one_grp` an `epi_archive` backed by a
+        # DT; convert and wrap:
+        data.table::setattr(.data_group, "sorted", dt_key)
+        data.table::setDT(.data_group, key = dt_key)
+        .data_group_archive <- clone(as_of_archive)
+        .data_group_archive$DT <- .data_group
+        comp_one_grp(.data_group_archive, .group_key,
+          f = f, ...,
+          ref_time_value = ref_time_value,
+          new_col = new_col
+        )
+      }
+    }
+
+    return(
+      dplyr::group_modify(
+        dplyr::group_by(as_of_df, !!!syms(grouped_epi_archive$private$vars), .drop = grouped_epi_archive$private$drop),
+        group_modify_fn,
+        f = f, ...,
+        ref_time_value = ref_time_value,
+        new_col = new_col,
+        .keep = TRUE
+      )
+    )
+  })
+  # Combine output into a single tibble
+  x <- as_tibble(setDF(rbindlist(x)))
+  # Reconstruct groups
+  x <- group_by(x, !!!syms(grouped_epi_archive$private$vars), .drop = grouped_epi_archive$private$drop)
+
+  # Unchop/unnest if we need to
+  if (!as_list_col) {
+    x <- tidyr::unnest(x, !!new_col, names_sep = names_sep)
+  }
+
+  # if (is_epi_df(x)) {
+  #   # The analogue of `epi_df`'s `as_of` metadata for an archive is
+  #   # `<archive>$versions_end`, at least in the current absence of
+  #   # separate fields/columns denoting the "archive version" with a
+  #   # different resolution, or from the perspective of a different
+  #   # stage of a data pipeline. The `as_of` that is automatically
+  #   # derived won't always match; override:
+  #   attr(x, "metadata")[["as_of"]] <- private$ungrouped$versions_end
+  # }
+
+  # XXX We need to work out when we want to return an `epi_df` and how
+  # to get appropriate keys (see #290, #223, #163). We'll probably
+  # need the commented-out code above if we ever output an `epi_df`.
+  # However, as a stopgap measure to have some more consistency across
+  # different ways of calling `epix_slide`, and to prevent `epi_df`
+  # output with invalid metadata, always output a (grouped or
+  # ungrouped) tibble.
+  x <- decay_epi_df(x)
+
+  return(x)
+}
+
+
+# At time of writing, roxygen parses content in collation order, impacting the
+# presentation of .Rd files that document multiple functions (see
+# https://github.com/r-lib/roxygen2/pull/324). Use @include tags (determining
+# `Collate:`) and ordering of functions within each file in order to get the
+# desired ordering.
+
+
+
+#' @include methods-epi_archive.R
+#' @rdname group_by.epi_archive
+#'
+#' @export
+is_grouped_epi_archive2 <- function(x) {
+  inherits(x, "grouped_epi_archive2")
+}
+
+
+#' @export
+clone.grouped_epi_archive2 <- function(x, deep = FALSE) {
+  # TODO: Finish.
+  if (deep) {
+    ungrouped <- clone(x$private$ungrouped, deep = TRUE)
+  } else {
+    ungrouped <- x$private$ungrouped
+  }
+  new_grouped_epi_archive(ungrouped, x$private$vars, x$private$drop)
+}
diff --git a/R/methods-epi_archive_new.R b/R/methods-epi_archive_new.R
new file mode 100644
index 00000000..3af3056f
--- /dev/null
+++ b/R/methods-epi_archive_new.R
@@ -0,0 +1,830 @@
+#' Generate a snapshot from an `epi_archive` object
+#'
+#' Generates a snapshot in `epi_df` format from an `epi_archive` object, as of a
+#' given version. See the [archive
+#' vignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html) for
+#' examples.
+#'
+#' @param x An `epi_archive` object
+#' @param max_version Time value specifying the max version to permit in the
+#'   snapshot. That is, the snapshot will comprise the unique rows of the
+#'   current archive data that represent the most up-to-date signal values, as
+#'   of the specified `max_version` (and whose time values are at least
+#'   `min_time_value`.)
+#' @param min_time_value Time value specifying the min time value to permit in
+#'   the snapshot. Default is `-Inf`, which effectively means that there is no
+#'   minimum considered.
+#' @param all_versions If `all_versions = TRUE`, then the output will be in
+#'   `epi_archive` format, and contain rows in the specified `time_value` range
+#'   having `version <= max_version`. The resulting object will cover a
+#'   potentially narrower `version` and `time_value` range than `x`, depending
+#'   on user-provided arguments. Otherwise, there will be one row in the output
+#'   for the `max_version` of each `time_value`. Default is `FALSE`.
+#' @return An `epi_df` object.
+#'
+#' @details This is simply a wrapper around the `as_of()` method of the
+#'   `epi_archive` class, so if `x` is an `epi_archive` object, then:
+#'   ```
+#'   epix_as_of(x, max_version = v)
+#'   ```
+#'   is equivalent to:
+#'   ```
+#'   x$as_of(max_version = v)
+#'   ```
+#'
+#' Mutation and aliasing: `epix_as_of` and `$as_of` will not mutate the input
+#' archives, but may in some edge cases alias parts of the inputs, so copy the
+#' outputs if needed before using mutating operations like `data.table`'s `:=`
+#' operator. Currently, the only situation where there is potentially aliasing
+#' is of the `DT` in edge cases with `all_versions = TRUE`, but this may change
+#' in the future.
+#'
+#' @examples
+#' # warning message of data latency shown
+#' epix_as_of2(
+#'   x = archive_cases_dv_subset,
+#'   max_version = max(archive_cases_dv_subset$DT$version)
+#' )
+#'
+#' @examples
+#'
+#' range(archive_cases_dv_subset$DT$version) # 2020-06-02 -- 2021-12-01
+#'
+#' epix_as_of2(
+#'   x = archive_cases_dv_subset,
+#'   max_version = as.Date("2020-06-12")
+#' )
+#'
+#' # When fetching a snapshot as of the latest version with update data in the
+#' # archive, a warning is issued by default, as this update data might not yet
+#' # be finalized (for example, if data versions are labeled with dates, these
+#' # versions might be overwritten throughout the corresponding days with
+#' # additional data or "hotfixes" of erroroneous data; when we build an archive
+#' # based on database queries, the latest available update might still be
+#' # subject to change, but previous versions should be finalized). We can
+#' # muffle such warnings with the following pattern:
+#' withCallingHandlers(
+#'   {
+#'     epix_as_of2(
+#'       x = archive_cases_dv_subset,
+#'       max_version = max(archive_cases_dv_subset$DT$version)
+#'     )
+#'   },
+#'   epiprocess__snapshot_as_of_clobberable_version = function(wrn) invokeRestart("muffleWarning")
+#' )
+#' # Since R 4.0, there is a `globalCallingHandlers` function that can be used
+#' # to globally toggle these warnings.
+#'
+#' @export
+epix_as_of2 <- function(epi_archive, max_version, min_time_value = -Inf, all_versions = FALSE) {
+  assert_class(epi_archive, "epi_archive2")
+  return(as_of(epi_archive, max_version, min_time_value, all_versions = all_versions))
+}
+
+#' `epi_archive` with unobserved history filled in (won't mutate, might alias)
+#'
+#' Sometimes, due to upstream data pipeline issues, we have to work with a
+#' version history that isn't completely up to date, but with functions that
+#' expect archives that are completely up to date, or equally as up-to-date as
+#' another archive. This function provides one way to approach such mismatches:
+#' pretend that we've "observed" additional versions, filling in these versions
+#' with NAs or extrapolated values.
+#'
+#' '`epix_fill_through_version` will not mutate its `x` argument, but its result
+#'  might alias fields of `x` (e.g., mutating the result's `DT` might mutate
+#'  `x$DT`). The R6 method variant, `x$fill_through_version`, will mutate `x` to
+#'  give the result, but might reseat its fields (e.g., references to the old
+#'  `x$DT` might not be updated by this function or subsequent operations on
+#'  `x`), and returns the updated `x` [invisibly][base::invisible].
+#'
+#' @param x An `epi_archive`
+#' @param fill_versions_end Length-1, same class&type as `x$version`: the
+#'   version through which to fill in missing version history; this will be the
+#'   result's `$versions_end` unless it already had a later
+#'   `$versions_end`.
+#' @param how Optional; `"na"` or `"locf"`: `"na"` will fill in any missing
+#'   required version history with `NA`s, by inserting (if necessary) an update
+#'   immediately after the current `$versions_end` that revises all
+#'   existing measurements to be `NA` (this is only supported for `version`
+#'   classes with a `next_after` implementation); `"locf"` will fill in missing
+#'   version history with the last version of each observation carried forward
+#'   (LOCF), by leaving the update `$DT` alone (other `epi_archive` methods are
+#'   based on LOCF).  Default is `"na"`.
+#' @return An `epi_archive`
+epix_fill_through_version2 <- function(epi_archive, fill_versions_end,
+                                       how = c("na", "locf")) {
+  assert_class(epi_archive, "epi_archive2")
+  cloned_epi_archive <- clone(epi_archive)
+  # Enclosing parentheses drop the invisibility flag. See description above of
+  # potential mutation and aliasing behavior.
+  (fill_through_version(cloned_epi_archive, fill_versions_end, how = how))
+}
+
+#' Merge two `epi_archive` objects
+#'
+#' Merges two `epi_archive`s that share a common `geo_value`, `time_value`, and
+#' set of key columns. When they also share a common `versions_end`,
+#' using `$as_of` on the result should be the same as using `$as_of` on `x` and
+#' `y` individually, then performing a full join of the `DT`s on the non-version
+#' key columns (potentially consolidating multiple warnings about clobberable
+#' versions). If the `versions_end` values differ, the
+#' `sync` parameter controls what is done.
+#'
+#' This function, [`epix_merge`], does not mutate its inputs and will not alias
+#' either archive's `DT`, but may alias other fields; `x$merge` will overwrite
+#' `x` with the result of the merge, reseating its `DT` and several other fields
+#' (making them point to different objects), but avoiding mutation of the
+#' contents of the old `DT` (only relevant if you have another reference to the
+#' old `DT` in another object).
+#'
+#' @param x,y Two `epi_archive` objects to join together.
+#' @param sync Optional; `"forbid"`, `"na"`, `"locf"`, or `"truncate"`; in the
+#'   case that `x$versions_end` doesn't match `y$versions_end`, what do we do?:
+#'   `"forbid"`: emit an error; "na": use `max(x$versions_end, y$versions_end)`
+#'   as the result's `versions_end`, but ensure that, if we request a snapshot
+#'   as of a version after `min(x$versions_end, y$versions_end)`, the
+#'   observation columns from the less up-to-date archive will be all NAs (i.e.,
+#'   imagine there was an update immediately after its `versions_end` which
+#'   revised all observations to be `NA`); `"locf"`: use `max(x$versions_end,
+#'   y$versions_end)` as the result's `versions_end`, allowing the last version
+#'   of each observation to be carried forward to extrapolate unavailable
+#'   versions for the less up-to-date input archive (i.e., imagining that in the
+#'   less up-to-date archive's data set remained unchanged between its actual
+#'   `versions_end` and the other archive's `versions_end`); or `"truncate"`:
+#'   use `min(x$versions_end, y$versions_end)` as the result's `versions_end`,
+#'   and discard any rows containing update rows for later versions.
+#' @param compactify Optional; `TRUE`, `FALSE`, or `NULL`; should the result be
+#'   compactified? See [`as_epi_archive`] for an explanation of what this means.
+#'   Default here is `TRUE`.
+#' @return the resulting `epi_archive`
+#'
+#' @details In all cases, `additional_metadata` will be an empty list, and
+#'   `clobberable_versions_start` will be set to the earliest version that could
+#'   be clobbered in either input archive.
+#'
+#' @examples
+#' # create two example epi_archive datasets
+#' x <- archive_cases_dv_subset$DT %>%
+#'   dplyr::select(geo_value, time_value, version, case_rate_7d_av) %>%
+#'   as_epi_archive(compactify = TRUE)
+#' y <- archive_cases_dv_subset$DT %>%
+#'   dplyr::select(geo_value, time_value, version, percent_cli) %>%
+#'   as_epi_archive(compactify = TRUE)
+#' # merge results stored in a third object:
+#' xy <- epix_merge(x, y)
+#' # vs. mutating x to hold the merge result:
+#' x$merge(y)
+#'
+#' @importFrom data.table key set setkeyv
+#' @export
+epix_merge2 <- function(x, y,
+                        sync = c("forbid", "na", "locf", "truncate"),
+                        compactify = TRUE) {
+  assert_class(x, "epi_archive2")
+  assert_class(y, "epi_archive2")
+  sync <- rlang::arg_match(sync)
+
+  if (!identical(x$geo_type, y$geo_type)) {
+    cli_abort("`x` and `y` must have the same `$geo_type`")
+  }
+
+  if (!identical(x$time_type, y$time_type)) {
+    cli_abort("`x` and `y` must have the same `$time_type`")
+  }
+
+  if (length(x$additional_metadata) != 0L) {
+    cli_warn("x$additional_metadata won't appear in merge result",
+      class = "epiprocess__epix_merge_ignores_additional_metadata"
+    )
+  }
+  if (length(y$additional_metadata) != 0L) {
+    cli_warn("y$additional_metadata won't appear in merge result",
+      class = "epiprocess__epix_merge_ignores_additional_metadata"
+    )
+  }
+  result_additional_metadata <- list()
+
+  result_clobberable_versions_start <-
+    if (all(is.na(c(x$clobberable_versions_start, y$clobberable_versions_start)))) {
+      NA # (any type of NA is fine here)
+    } else {
+      Min(c(x$clobberable_versions_start, y$clobberable_versions_start))
+    }
+
+  # The actual merge below may not succeed 100% of the time, so do this
+  # preprocessing using non-mutating (but potentially aliasing) functions. This
+  # approach potentially uses more memory, but won't leave behind a
+  # partially-mutated `x` on failure.
+  if (sync == "forbid") {
+    if (!identical(x$versions_end, y$versions_end)) {
+      cli_abort(paste(
+        "`x` and `y` were not equally up to date version-wise:",
+        "`x$versions_end` was not identical to `y$versions_end`;",
+        "either ensure that `x` and `y` are equally up to date before merging,",
+        "or specify how to deal with this using `sync`"
+      ), class = "epiprocess__epix_merge_unresolved_sync")
+    } else {
+      new_versions_end <- x$versions_end
+      x_DT <- x$DT
+      y_DT <- y$DT
+    }
+  } else if (sync %in% c("na", "locf")) {
+    new_versions_end <- max(x$versions_end, y$versions_end)
+    x_DT <- epix_fill_through_version2(x, new_versions_end, sync)$DT
+    y_DT <- epix_fill_through_version2(y, new_versions_end, sync)$DT
+  } else if (sync == "truncate") {
+    new_versions_end <- min(x$versions_end, y$versions_end)
+    x_DT <- x$DT[x[["DT"]][["version"]] <= new_versions_end, names(x$DT), with = FALSE]
+    y_DT <- y$DT[y[["DT"]][["version"]] <= new_versions_end, names(y$DT), with = FALSE]
+  } else {
+    cli_abort("unimplemented")
+  }
+
+  # key(x_DT) should be the same as key(x$DT) and key(y_DT) should be the same
+  # as key(y$DT). Below, we only use {x,y}_DT in the code (making it easier to
+  # split the code into separate functions if we wish), but still refer to
+  # {x,y}$DT in the error messages (further relying on this assumption).
+  #
+  # Check&ensure that the above assumption; if it didn't already hold, we likely
+  # have a bug in the preprocessing, a weird/invalid archive as input, and/or a
+  # data.table version with different semantics (which may break other parts of
+  # our code).
+  x_DT_key_as_expected <- identical(key(x$DT), key(x_DT))
+  y_DT_key_as_expected <- identical(key(y$DT), key(y_DT))
+  if (!x_DT_key_as_expected || !y_DT_key_as_expected) {
+    cli_warn("
+      `epiprocess` internal warning (please report): pre-processing for
+      epix_merge unexpectedly resulted in an intermediate data table (or
+      tables) with a different key than the corresponding input archive.
+      Manually setting intermediate data table keys to the expected values.
+    ", internal = TRUE)
+    setkeyv(x_DT, key(x$DT))
+    setkeyv(y_DT, key(y$DT))
+  }
+  # Without some sort of annotations of what various columns represent, we can't
+  # do something that makes sense when merging archives with mismatched keys.
+  # E.g., even if we assume extra keys represent demographic breakdowns, a
+  # sensible default treatment of count-type and rate-type value columns would
+  # differ.
+  if (!identical(sort(key(x_DT)), sort(key(y_DT)))) {
+    cli_abort("
+            The archives must have the same set of key column names; if the
+            key columns represent the same things, just with different
+            names, please retry after manually renaming to match; if they
+            represent different things (e.g., x has an age breakdown
+            but y does not), please retry after processing them to share
+            the same key (e.g., by summarizing x to remove the age breakdown,
+            or by applying a static age breakdown to y).
+          ", class = "epiprocess__epix_merge_x_y_must_have_same_key_set")
+  }
+  # `by` cols = result (and each input's) `key` cols, and determine
+  # the row set, determined using a full join via `merge`
+  #
+  # non-`by` cols = "value"-ish cols, and are looked up with last
+  #                 version carried forward via rolling joins
+  by <- key(x_DT) # = some perm of key(y_DT)
+  if (!all(c("geo_value", "time_value", "version") %in% key(x_DT))) {
+    cli_abort('Invalid `by`; `by` is currently set to the common `key` of
+           the two archives, and is expected to contain
+           "geo_value", "time_value", and "version".',
+      class = "epiprocess__epi_archive_must_have_required_key_cols"
+    )
+  }
+  if (length(by) < 1L || utils::tail(by, 1L) != "version") {
+    cli_abort('Invalid `by`; `by` is currently set to the common `key` of
+           the two archives, and is expected to have a "version" as
+           the last key col.',
+      class = "epiprocess__epi_archive_must_have_version_at_end_of_key"
+    )
+  }
+  x_nonby_colnames <- setdiff(names(x_DT), by)
+  y_nonby_colnames <- setdiff(names(y_DT), by)
+  if (length(intersect(x_nonby_colnames, y_nonby_colnames)) != 0L) {
+    cli_abort("
+            `x` and `y` DTs have overlapping non-by column names;
+            this is currently not supported; please manually fix up first:
+            any overlapping columns that can are key-like should be
+            incorporated into the key, and other columns should be renamed.
+          ", class = "epiprocess__epix_merge_x_y_must_not_have_overlapping_nonby_colnames")
+  }
+  x_by_vals <- x_DT[, by, with = FALSE]
+  if (anyDuplicated(x_by_vals) != 0L) {
+    cli_abort("
+            The `by` columns must uniquely determine rows of `x$DT`;
+            the `by` is currently set to the common `key` of the two
+            archives, so this can be resolved by adding key-like columns
+            to `x`'s key (to get a unique key).
+          ", class = "epiprocess__epix_merge_by_cols_must_act_as_unique_key")
+  }
+  y_by_vals <- y_DT[, by, with = FALSE]
+  if (anyDuplicated(y_by_vals) != 0L) {
+    cli_abort("
+            The `by` columns must uniquely determine rows of `y$DT`;
+            the `by` is currently set to the common `key` of the two
+            archives, so this can be resolved by adding key-like columns
+            to `y`'s key (to get a unique key).
+          ", class = "epiprocess__epix_merge_by_cols_must_act_as_unique_key")
+  }
+  result_DT <- merge(x_by_vals, y_by_vals,
+    by = by,
+    # We must have `all=TRUE` or we may skip updates
+    # from x and/or y and corrupt the history
+    all = TRUE,
+    # We don't want Cartesian products, but the
+    # by-is-unique-key check above already ensures
+    # this. (Note that `allow.cartesian=FALSE` doesn't
+    # actually catch all Cartesian products anyway.)
+    # Disable superfluous check:
+    allow.cartesian = TRUE
+  )
+  set(
+    result_DT, , x_nonby_colnames,
+    x_DT[result_DT[, by, with = FALSE], x_nonby_colnames,
+      with = FALSE,
+      # It's good practice to specify `on`, and we must
+      # explicitly specify `on` if there's a potential key vs.
+      # by order mismatch (not possible currently for x
+      # with by = key(x$DT), but possible for y):
+      on = by,
+      # last version carried forward:
+      roll = TRUE,
+      # requesting non-version key that doesn't exist in the other archive,
+      # or before its first version, should result in NA
+      nomatch = NA,
+      # see note on `allow.cartesian` above; currently have a
+      # similar story here.
+      allow.cartesian = TRUE
+    ]
+  )
+  set(
+    result_DT, , y_nonby_colnames,
+    y_DT[result_DT[, by, with = FALSE], y_nonby_colnames,
+      with = FALSE,
+      on = by,
+      roll = TRUE,
+      nomatch = NA,
+      allow.cartesian = TRUE
+    ]
+  )
+  # The key could be unset in case of a key vs. by order mismatch as
+  # noted above. Ensure that we keep it:
+  setkeyv(result_DT, by)
+
+  return(as_epi_archive2(
+    result_DT[], # clear data.table internal invisibility flag if set
+    geo_type = x$geo_type,
+    time_type = x$time_type,
+    other_keys = setdiff(key(result_DT), c("geo_value", "time_value", "version")),
+    additional_metadata = result_additional_metadata,
+    # It'd probably be better to pre-compactify before the merge, and might be
+    # guaranteed not to be necessary to compactify the merge result if the
+    # inputs are already compactified, but at time of writing we don't have
+    # compactify in its own method or field, and it seems like it should be
+    # pretty fast anyway.
+    compactify = compactify,
+    clobberable_versions_start = result_clobberable_versions_start,
+    versions_end = new_versions_end
+  ))
+}
+
+# Helpers for `group_by`:
+
+#' Make non-testing mock to get [`dplyr::dplyr_col_modify`] input
+#'
+#' A workaround for `dplyr:::mutate_cols` not being exported and directly
+#' applying test mock libraries likely being impossible (due to mocking another
+#' package's S3 generic or method).
+#'
+#' Use solely with a single call to the [`dplyr::mutate`] function and then
+#' `destructure_col_modify_recorder_df`; other applicable operations from
+#' [dplyr::dplyr_extending] have not been implemented.
+#'
+#' @param parent_df the "parent class" data frame to wrap
+#' @return a `col_modify_recorder_df`
+#'
+#' @noRd
+new_col_modify_recorder_df <- function(parent_df) {
+  assert_class(parent_df, "data.frame")
+  `class<-`(parent_df, c("col_modify_recorder_df", class(parent_df)))
+}
+
+#' Extract unchanged parent-class data frame from a `new_col_modify_recorder_df`
+#'
+#' @param col_modify_recorder_df an instance of a `col_modify_recorder_df`
+#' @return named list with elements `unchanged_parent_df`, `cols`; `cols` is the
+#'   input to [`dplyr::dplyr_col_modify`] that this class was designed to record
+#'
+#' @noRd
+destructure_col_modify_recorder_df <- function(col_modify_recorder_df) {
+  assert_class(col_modify_recorder_df, "col_modify_recorder_df")
+  list(
+    unchanged_parent_df = col_modify_recorder_df %>%
+      `attr<-`("epiprocess::col_modify_recorder_df::cols", NULL) %>%
+      `class<-`(setdiff(class(.), "col_modify_recorder_df")),
+    cols = attr(col_modify_recorder_df,
+      "epiprocess::col_modify_recorder_df::cols",
+      exact = TRUE
+    )
+  )
+}
+
+#' `dplyr_col_modify` method that simply records the `cols` argument
+#'
+#' Must export S3 methods in R >= 4.0, even if they're only designed to be
+#' package internals, and must import any corresponding upstream S3 generic
+#' functions:
+#' @importFrom dplyr dplyr_col_modify
+#' @export
+#' @noRd
+dplyr_col_modify.col_modify_recorder_df <- function(data, cols) {
+  if (!is.null(attr(data, "epiprocess::col_modify_recorder_df::cols", exact = TRUE))) {
+    cli_abort("`col_modify_recorder_df` can only record `cols` once",
+      internal = TRUE
+    )
+  }
+  attr(data, "epiprocess::col_modify_recorder_df::cols") <- cols
+  data
+}
+
+#' A more detailed but restricted `mutate` for use in `group_by.epi_archive`
+#'
+#' More detailed: provides the names of the "requested" columns in addition to
+#' the output expected from a regular `mutate` method.
+#'
+#' Restricted: doesn't allow replacing or removing key cols, where a sort is
+#' potentially required at best and what the output key should be is unclear at
+#' worst. (The originally expected restriction was that the `mutate` parameters
+#' not present in `group_by` would not be recognized, but the current
+#' implementation just lets `mutate` handle these even anyway, even if they're
+#' not part of the regular `group_by` parameters; these arguments would have to
+#' be passed by names with dot prefixes, so just hope that the user means to use
+#' them here if provided.)
+#'
+#' This can introduce column-level aliasing in `data.table`s, which isn't really
+#' intended in the `data.table` user model but we can make it part of our user
+#' model (see
+#' https://stackoverflow.com/questions/45925482/make-a-shallow-copy-in-data-table
+#' and links).
+#'
+#' Don't export this without cleaning up language of "mutate" as in side effects
+#' vs. "mutate" as in `dplyr::mutate`.
+#' @noRd
+epix_detailed_restricted_mutate2 <- function(.data, ...) {
+  # We don't want to directly use `dplyr::mutate` on the `$DT`, as:
+  # - `mutate` behavior, including the output class, changes depending on
+  #   whether `dtplyr` < 1.3.0 is loaded and would require post-processing
+  # - behavior with `dtplyr` isn't fully compatible
+  # - it doesn't give the desired details, and `rlang::exprs_auto_name` does not
+  #   appropriately handle the `= NULL` and `= <data.frame>` tidyeval cases
+  # Instead:
+  # - Use `as.list` to get a shallow copy (undocumented, but apparently
+  #   intended, behavior), then `as_tibble` (also shallow, given a list) to get
+  #   back to something that will use `dplyr`'s included `mutate` method(s),
+  #   then convert this using shallow operations into a `data.table`.
+  # - Use `col_modify_recorder_df` to get the desired details.
+  in_tbl <- tibble::as_tibble(as.list(.data$DT), .name_repair = "minimal")
+  col_modify_cols <-
+    destructure_col_modify_recorder_df(
+      mutate(new_col_modify_recorder_df(in_tbl), ...)
+    )[["cols"]]
+  invalidated_key_col_is <-
+    which(purrr::map_lgl(key(.data$DT), function(key_colname) {
+      key_colname %in% names(col_modify_cols) &&
+        !rlang::is_reference(in_tbl[[key_colname]], col_modify_cols[[key_colname]])
+    }))
+  if (length(invalidated_key_col_is) != 0L) {
+    rlang::abort(paste_lines(c(
+      "Key columns must not be replaced or removed.",
+      wrap_varnames(key(.data$DT)[invalidated_key_col_is],
+        initial = "Flagged key cols: "
+      )
+    )))
+  } else {
+    # Have `dplyr` do the `dplyr_col_modify`, keeping the column-level-aliasing
+    # and must-copy-on-write-if-refcount-more-than-1 model, obtaining a tibble,
+    # then convert it into a `data.table`. The key should still be valid
+    # (assuming that the user did not explicitly alter `key(.data$DT)` or the
+    # columns by reference somehow within `...` tidyeval-style computations, or
+    # trigger refcount-1 alterations due to still having >1 refcounts on the
+    # columns), set the "sorted" attribute accordingly to prevent attempted
+    # sorting (including potential extra copies) or sortedness checking, then
+    # `setDT` (rather than `as.data.table`, in order to prevent column copying
+    # to establish ownership according to `data.table`'s memory model).
+    out_DT <- dplyr::dplyr_col_modify(in_tbl, col_modify_cols) %>%
+      data.table::setattr("sorted", data.table::key(.data$DT)) %>%
+      data.table::setDT(key = key(.data$DT))
+    out_archive <- clone(.data)
+    out_archive$DT <- out_DT
+    request_names <- names(col_modify_cols)
+    return(list(
+      archive = out_archive,
+      request_names = request_names
+    ))
+    # (We might also consider special-casing when `mutate` hands back something
+    # equivalent (in some sense) to the input (probably only encountered when
+    # we're dealing with `group_by`), and using just `$DT`, not a shallow copy,
+    # in the result, primarily in order to hedge against `as.list` or `setDT`
+    # changing their behavior and generating deep copies somehow. This could
+    # also prevent storage, and perhaps also generation, of shallow copies, but
+    # this seems unlikely to be a major gain unless it helps enable some
+    # in-place modifications of refcount-1 columns (although detecting this case
+    # seems to be common across `group_by` implementations; maybe there is
+    # something there).)
+  }
+}
+
+
+#' Slide a function over variables in an `epi_archive` or `grouped_epi_archive`
+#'
+#' Slides a given function over variables in an `epi_archive` object. This
+#' behaves similarly to `epi_slide()`, with the key exception that it is
+#' version-aware: the sliding computation at any given reference time t is
+#' performed on **data that would have been available as of t**. See the
+#' [archive
+#' vignette](https://cmu-delphi.github.io/epiprocess/articles/archive.html) for
+#' examples.
+#'
+#' @param x An [`epi_archive`] or [`grouped_epi_archive`] object. If ungrouped,
+#'   all data in `x` will be treated as part of a single data group.
+#' @param f Function, formula, or missing; together with `...` specifies the
+#'   computation to slide. To "slide" means to apply a computation over a
+#'   sliding (a.k.a. "rolling") time window for each data group. The window is
+#'   determined by the `before` parameter described below. One time step is
+#'   typically one day or one week; see [`epi_slide`] details for more
+#'   explanation. If a function, `f` must take an `epi_df` with the same
+#'   column names as the archive's `DT`, minus the `version` column; followed
+#'   by a one-row tibble containing the values of the grouping variables for
+#'   the associated group; followed by a reference time value, usually as a
+#'   `Date` object; followed by any number of named arguments. If a formula,
+#'   `f` can operate directly on columns accessed via `.x$var` or `.$var`, as
+#'   in `~ mean (.x$var)` to compute a mean of a column `var` for each
+#'   group-`ref_time_value` combination. The group key can be accessed via
+#'   `.y` or `.group_key`, and the reference time value can be accessed via
+#'   `.z` or `.ref_time_value`. If `f` is missing, then `...` will specify the
+#'   computation.
+#' @param ... Additional arguments to pass to the function or formula specified
+#'   via `f`. Alternatively, if `f` is missing, then `...` is interpreted as an
+#'   expression for tidy evaluation; in addition to referring to columns
+#'   directly by name, the expression has access to `.data` and `.env` pronouns
+#'   as in `dplyr` verbs, and can also refer to the `.group_key` and
+#'   `.ref_time_value`. See details of [`epi_slide`].
+#' @param before How far `before` each `ref_time_value` should the sliding
+#'   window extend? If provided, should be a single, non-NA,
+#'   [integer-compatible][vctrs::vec_cast] number of time steps. This window
+#'   endpoint is inclusive. For example, if `before = 7`, and one time step is
+#'   one day, then to produce a value for a `ref_time_value` of January 8, we
+#'   apply the given function or formula to data (for each group present) with
+#'   `time_value`s from January 1 onward, as they were reported on January 8.
+#'   For typical disease surveillance sources, this will not include any data
+#'   with a `time_value` of January 8, and, depending on the amount of reporting
+#'   latency, may not include January 7 or even earlier `time_value`s. (If
+#'   instead the archive were to hold nowcasts instead of regular surveillance
+#'   data, then we would indeed expect data for `time_value` January 8. If it
+#'   were to hold forecasts, then we would expect data for `time_value`s after
+#'   January 8, and the sliding window would extend as far after each
+#'   `ref_time_value` as needed to include all such `time_value`s.)
+#' @param ref_time_values Reference time values / versions for sliding
+#'   computations; each element of this vector serves both as the anchor point
+#'   for the `time_value` window for the computation and the `max_version`
+#'   `as_of` which we fetch data in this window. If missing, then this will set
+#'   to a regularly-spaced sequence of values set to cover the range of
+#'   `version`s in the `DT` plus the `versions_end`; the spacing of values will
+#'   be guessed (using the GCD of the skips between values).
+#' @param time_step Optional function used to define the meaning of one time
+#'   step, which if specified, overrides the default choice based on the
+#'   `time_value` column. This function must take a positive integer and return
+#'   an object of class `lubridate::period`. For example, we can use `time_step
+#'   = lubridate::hours` in order to set the time step to be one hour (this
+#'   would only be meaningful if `time_value` is of class `POSIXct`).
+#' @param new_col_name String indicating the name of the new column that will
+#'   contain the derivative values. Default is "slide_value"; note that setting
+#'   `new_col_name` equal to an existing column name will overwrite this column.
+#' @param as_list_col Should the slide results be held in a list column, or be
+#'   [unchopped][tidyr::unchop]/[unnested][tidyr::unnest]? Default is `FALSE`,
+#'   in which case a list object returned by `f` would be unnested (using
+#'   [`tidyr::unnest()`]), and, if the slide computations output data frames,
+#'   the names of the resulting columns are given by prepending `new_col_name`
+#'   to the names of the list elements.
+#' @param names_sep String specifying the separator to use in `tidyr::unnest()`
+#'   when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
+#'   from `new_col_name` entirely.
+#' @param all_versions (Not the same as `all_rows` parameter of `epi_slide`.) If
+#'   `all_versions = TRUE`, then `f` will be passed the version history (all
+#'   `version <= ref_time_value`) for rows having `time_value` between
+#'   `ref_time_value - before` and `ref_time_value`. Otherwise, `f` will be
+#'   passed only the most recent `version` for every unique `time_value`.
+#'   Default is `FALSE`.
+#' @return A tibble whose columns are: the grouping variables, `time_value`,
+#'   containing the reference time values for the slide computation, and a
+#'   column named according to the `new_col_name` argument, containing the slide
+#'   values.
+#'
+#' @details A few key distinctions between the current function and `epi_slide()`:
+#'   1. In `f` functions for `epix_slide`, one should not assume that the input
+#'   data to contain any rows with `time_value` matching the computation's
+#'   `ref_time_value` (accessible via `attributes(<data>)$metadata$as_of`); for
+#'   typical epidemiological surveillance data, observations pertaining to a
+#'   particular time period (`time_value`) are first reported `as_of` some
+#'   instant after that time period has ended.
+#'   2. `epix_slide()` doesn't accept an `after` argument; its windows extend
+#'   from `before` time steps before a given `ref_time_value` through the last
+#'   `time_value` available as of version `ref_time_value` (typically, this
+#'   won't include `ref_time_value` itself, as observations about a particular
+#'   time interval (e.g., day) are only published after that time interval
+#'   ends); `epi_slide` windows extend from `before` time steps before a
+#'   `ref_time_value` through `after` time steps after `ref_time_value`.
+#'   3. The input class and columns are similar but different: `epix_slide`
+#'   (with the default `all_versions=FALSE`) keeps all columns and the
+#'   `epi_df`-ness of the first argument to each computation; `epi_slide` only
+#'   provides the grouping variables in the second input, and will convert the
+#'   first input into a regular tibble if the grouping variables include the
+#'   essential `geo_value` column. (With `all_versions=TRUE`, `epix_slide` will
+#'   will provide an `epi_archive` rather than an `epi-df` to each
+#'   computation.)
+#'   4. The output class and columns are similar but different: `epix_slide()`
+#'   returns a tibble containing only the grouping variables, `time_value`, and
+#'   the new column(s) from the slide computations, whereas `epi_slide()`
+#'   returns an `epi_df` with all original variables plus the new columns from
+#'   the slide computations. (Both will mirror the grouping or ungroupedness of
+#'   their input, with one exception: `epi_archive`s can have trivial
+#'   (zero-variable) groupings, but these will be dropped in `epix_slide`
+#'   results as they are not supported by tibbles.)
+#'   5. There are no size stability checks or element/row recycling to maintain
+#'   size stability in `epix_slide`, unlike in `epi_slide`. (`epix_slide` is
+#'   roughly analogous to [`dplyr::group_modify`], while `epi_slide` is roughly
+#'   analogous to `dplyr::mutate` followed by `dplyr::arrange`) This is detailed
+#'   in the "advanced" vignette.
+#'   6. `all_rows` is not supported in `epix_slide`; since the slide
+#'   computations are allowed more flexibility in their outputs than in
+#'   `epi_slide`, we can't guess a good representation for missing computations
+#'   for excluded group-`ref_time_value` pairs.
+#'   7. The `ref_time_values` default for `epix_slide` is based on making an
+#'   evenly-spaced sequence out of the `version`s in the `DT` plus the
+#'   `versions_end`, rather than the `time_value`s.
+#'
+#' Apart from the above distinctions, the interfaces between `epix_slide()` and
+#' `epi_slide()` are the same.
+#'
+#' Furthermore, the current function can be considerably slower than
+#'   `epi_slide()`, for two reasons: (1) it must repeatedly fetch
+#'   properly-versioned snapshots from the data archive (via its `as_of()`
+#'   method), and (2) it performs a "manual" sliding of sorts, and does not
+#'   benefit from the highly efficient `slider` package. For this reason, it
+#'   should never be used in place of `epi_slide()`, and only used when
+#'   version-aware sliding is necessary (as it its purpose).
+#'
+#' Finally, this is simply a wrapper around the `slide()` method of the
+#'   `epi_archive` and `grouped_epi_archive` classes, so if `x` is an
+#'   object of either of these classes, then:
+#'   ```
+#'   epix_slide(x, new_var = comp(old_var), before = 119)
+#'   ```
+#'   is equivalent to:
+#'   ```
+#'   x$slide(new_var = comp(old_var), before = 119)
+#'   ```
+#'
+#' Mutation and aliasing: `epix_slide` and `$slide` will not perform in-place
+#' mutation of the input archives on their own. In some edge cases the inputs it
+#' feeds to the slide computations may alias parts of the input archive, so copy
+#' the slide computation inputs if needed before using mutating operations like
+#' `data.table`'s `:=` operator. Similarly, in some edge cases, the output of
+#' the slide operation may alias parts of the input archive, so similarly, make
+#' sure to clone and/or copy appropriately before using in-place mutation.
+#'
+#' @examples
+#' library(dplyr)
+#'
+#' # Reference time points for which we want to compute slide values:
+#' ref_time_values <- seq(as.Date("2020-06-01"),
+#'   as.Date("2020-06-15"),
+#'   by = "1 day"
+#' )
+#'
+#' # A simple (but not very useful) example (see the archive vignette for a more
+#' # realistic one):
+#' archive_cases_dv_subset %>%
+#'   group_by(geo_value) %>%
+#'   epix_slide(
+#'     f = ~ mean(.x$case_rate_7d_av),
+#'     before = 2,
+#'     ref_time_values = ref_time_values,
+#'     new_col_name = "case_rate_7d_av_recent_av"
+#'   ) %>%
+#'   ungroup()
+#' # We requested time windows that started 2 days before the corresponding time
+#' # values. The actual number of `time_value`s in each computation depends on
+#' # the reporting latency of the signal and `time_value` range covered by the
+#' # archive (2020-06-01 -- 2021-11-30 in this example).  In this case, we have
+#' # * 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically
+#' #                                                discarded
+#' # * 1 `time_value`, for ref time 2020-06-02
+#' # * 2 `time_value`s, for the rest of the results
+#' # * never the 3 `time_value`s we would get from `epi_slide`, since, because
+#' #   of data latency, we'll never have an observation
+#' #   `time_value == ref_time_value` as of `ref_time_value`.
+#' # The example below shows this type of behavior in more detail.
+#'
+#' # Examining characteristics of the data passed to each computation with
+#' # `all_versions=FALSE`.
+#' archive_cases_dv_subset %>%
+#'   group_by(geo_value) %>%
+#'   epix_slide(
+#'     function(x, gk, rtv) {
+#'       tibble(
+#'         time_range = if (nrow(x) == 0L) {
+#'           "0 `time_value`s"
+#'         } else {
+#'           sprintf("%s -- %s", min(x$time_value), max(x$time_value))
+#'         },
+#'         n = nrow(x),
+#'         class1 = class(x)[[1L]]
+#'       )
+#'     },
+#'     before = 5, all_versions = FALSE,
+#'     ref_time_values = ref_time_values, names_sep = NULL
+#'   ) %>%
+#'   ungroup() %>%
+#'   arrange(geo_value, time_value)
+#'
+#' # --- Advanced: ---
+#'
+#' # `epix_slide` with `all_versions=FALSE` (the default) applies a
+#' # version-unaware computation to several versions of the data. We can also
+#' # use `all_versions=TRUE` to apply a version-*aware* computation to several
+#' # versions of the data, again looking at characteristics of the data passed
+#' # to each computation. In this case, each computation should expect an
+#' # `epi_archive` containing the relevant version data:
+#'
+#' archive_cases_dv_subset %>%
+#'   group_by(geo_value) %>%
+#'   epix_slide(
+#'     function(x, gk, rtv) {
+#'       tibble(
+#'         versions_start = if (nrow(x$DT) == 0L) {
+#'           "NA (0 rows)"
+#'         } else {
+#'           toString(min(x$DT$version))
+#'         },
+#'         versions_end = x$versions_end,
+#'         time_range = if (nrow(x$DT) == 0L) {
+#'           "0 `time_value`s"
+#'         } else {
+#'           sprintf("%s -- %s", min(x$DT$time_value), max(x$DT$time_value))
+#'         },
+#'         n = nrow(x$DT),
+#'         class1 = class(x)[[1L]]
+#'       )
+#'     },
+#'     before = 5, all_versions = TRUE,
+#'     ref_time_values = ref_time_values, names_sep = NULL
+#'   ) %>%
+#'   ungroup() %>%
+#'   # Focus on one geo_value so we can better see the columns above:
+#'   filter(geo_value == "ca") %>%
+#'   select(-geo_value)
+#'
+#' @importFrom rlang enquo !!!
+#' @export
+epix_slide2 <- function(x, f, ..., before, ref_time_values,
+                        time_step, new_col_name = "slide_value",
+                        as_list_col = FALSE, names_sep = "_",
+                        all_versions = FALSE) {
+  if (!is_epi_archive2(x, grouped_okay = TRUE)) {
+    cli_abort("`x` must be of class `epi_archive` or `grouped_epi_archive`.")
+  }
+  return(slide(x, f, ...,
+    before = before,
+    ref_time_values = ref_time_values,
+    time_step = time_step,
+    new_col_name = new_col_name,
+    as_list_col = as_list_col,
+    names_sep = names_sep,
+    all_versions = all_versions
+  ))
+}
+
+
+#' Filter an `epi_archive` object to keep only older versions
+#'
+#' Generates a filtered `epi_archive` from an `epi_archive` object, keeping
+#' only rows with `version` falling on or before a specified date.
+#'
+#' @param x An `epi_archive` object
+#' @param max_version Time value specifying the max version to permit in the
+#'   filtered archive. That is, the output archive will comprise rows of the
+#'   current archive data having `version` less than or equal to the
+#'   specified `max_version`
+#' @return An `epi_archive` object
+#'
+#' @export
+epix_truncate_versions_after <- function(x, max_version) {
+  UseMethod("epix_truncate_versions_after")
+}
+
+#' @export
+epix_truncate_versions_after.epi_archive2 <- function(x, max_version) {
+  cloned_epi_archive <- clone(x)
+  return((truncate_versions_after(x, max_version)))
+  # ^ second set of parens drops invisibility
+}
diff --git a/man/as_epi_archive2.Rd b/man/as_epi_archive2.Rd
new file mode 100644
index 00000000..090b455a
--- /dev/null
+++ b/man/as_epi_archive2.Rd
@@ -0,0 +1,142 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{as_epi_archive2}
+\alias{as_epi_archive2}
+\title{Convert to \code{epi_archive} format}
+\usage{
+as_epi_archive2(
+  x,
+  geo_type,
+  time_type,
+  other_keys,
+  additional_metadata = list(),
+  compactify = NULL,
+  clobberable_versions_start = NA,
+  versions_end = max_version_with_row_in(x)
+)
+}
+\arguments{
+\item{x}{A data frame, data table, or tibble, with columns \code{geo_value},
+\code{time_value}, \code{version}, and then any additional number of columns.}
+
+\item{geo_type}{Type for the geo values. If missing, then the function will
+attempt to infer it from the geo values present; if this fails, then it
+will be set to "custom".}
+
+\item{time_type}{Type for the time values. If missing, then the function will
+attempt to infer it from the time values present; if this fails, then it
+will be set to "custom".}
+
+\item{other_keys}{Character vector specifying the names of variables in \code{x}
+that should be considered key variables (in the language of \code{data.table})
+apart from "geo_value", "time_value", and "version".}
+
+\item{additional_metadata}{List of additional metadata to attach to the
+\code{epi_archive} object. The metadata will have \code{geo_type} and \code{time_type}
+fields; named entries from the passed list or will be included as well.}
+
+\item{compactify}{Optional; Boolean or \code{NULL}: should we remove rows that are
+considered redundant for the purposes of \code{epi_archive}'s built-in methods
+such as \code{as_of}? As these methods use the last version of each observation
+carried forward (LOCF) to interpolate between the version data provided,
+rows that don't change these LOCF results can potentially be omitted to
+save space. \code{TRUE} will remove these rows, \code{FALSE} will not, and missing or
+\code{NULL} will remove these rows and issue a warning. Generally, this can be
+set to \code{TRUE}, but if you directly inspect or edit the fields of the
+\code{epi_archive} such as its \code{DT}, you will have to determine whether
+\code{compactify=TRUE} will produce the desired results. If compactification
+here is removing a large proportion of the rows, this may indicate a
+potential for space, time, or bandwidth savings upstream the data pipeline,
+e.g., when fetching, storing, or preparing the input data \code{x}}
+
+\item{clobberable_versions_start}{Optional; \code{length}-1; either a value of the
+same \code{class} and \code{typeof} as \code{x$version}, or an \code{NA} of any \code{class} and
+\code{typeof}: specifically, either (a) the earliest version that could be
+subject to "clobbering" (being overwritten with different update data, but
+using the \emph{same} version tag as the old update data), or (b) \code{NA}, to
+indicate that no versions are clobberable. There are a variety of reasons
+why versions could be clobberable under routine circumstances, such as (a)
+today's version of one/all of the columns being published after initially
+being filled with \code{NA} or LOCF, (b) a buggy version of today's data being
+published but then fixed and republished later in the day, or (c) data
+pipeline delays (e.g., publisher uploading, periodic scraping, database
+syncing, periodic fetching, etc.) that make events (a) or (b) reflected
+later in the day (or even on a different day) than expected; potential
+causes vary between different data pipelines. The default value is \code{NA},
+which doesn't consider any versions to be clobberable. Another setting that
+may be appropriate for some pipelines is \code{max_version_with_row_in(x)}.}
+
+\item{versions_end}{Optional; length-1, same \code{class} and \code{typeof} as
+\code{x$version}: what is the last version we have observed? The default is
+\code{max_version_with_row_in(x)}, but values greater than this could also be
+valid, and would indicate that we observed additional versions of the data
+beyond \code{max(x$version)}, but they all contained empty updates. (The default
+value of \code{clobberable_versions_start} does not fully trust these empty
+updates, and assumes that any version \verb{>= max(x$version)} could be
+clobbered.) If \code{nrow(x) == 0}, then this argument is mandatory.}
+}
+\value{
+An \code{epi_archive} object.
+}
+\description{
+Converts a data frame, data table, or tibble into an \code{epi_archive}
+object. See the \href{https://cmu-delphi.github.io/epiprocess/articles/archive.html}{archive vignette} for
+examples. The parameter descriptions below are copied from there
+}
+\details{
+This simply a wrapper around the \code{new()} method of the \code{epi_archive}
+class, so for example:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{x <- as_epi_archive(df, geo_type = "state", time_type = "day")
+}\if{html}{\out{</div>}}
+
+would be equivalent to:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{x <- epi_archive$new(df, geo_type = "state", time_type = "day")
+}\if{html}{\out{</div>}}
+}
+\examples{
+# Simple ex. with necessary keys
+tib <- tibble::tibble(
+  geo_value = rep(c("ca", "hi"), each = 5),
+  time_value = rep(seq(as.Date("2020-01-01"),
+    by = 1, length.out = 5
+  ), times = 2),
+  version = rep(seq(as.Date("2020-01-02"),
+    by = 1, length.out = 5
+  ), times = 2),
+  value = rnorm(10, mean = 2, sd = 1)
+)
+
+toy_epi_archive <- tib \%>\% as_epi_archive(
+  geo_type = "state",
+  time_type = "day"
+)
+toy_epi_archive
+
+# Ex. with an additional key for county
+df <- data.frame(
+  geo_value = c(replicate(2, "ca"), replicate(2, "fl")),
+  county = c(1, 3, 2, 5),
+  time_value = c(
+    "2020-06-01",
+    "2020-06-02",
+    "2020-06-01",
+    "2020-06-02"
+  ),
+  version = c(
+    "2020-06-02",
+    "2020-06-03",
+    "2020-06-02",
+    "2020-06-03"
+  ),
+  cases = c(1, 2, 3, 4),
+  cases_rate = c(0.01, 0.02, 0.01, 0.05)
+)
+
+x <- df \%>\% as_epi_archive(
+  geo_type = "state",
+  time_type = "day",
+  other_keys = "county"
+)
+}
diff --git a/man/as_of.epi_archive2.Rd b/man/as_of.epi_archive2.Rd
new file mode 100644
index 00000000..21a4cfc1
--- /dev/null
+++ b/man/as_of.epi_archive2.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{as_of.epi_archive2}
+\alias{as_of.epi_archive2}
+\title{As of epi_archive}
+\usage{
+\method{as_of}{epi_archive2}(epi_archive, max_version, min_time_value = -Inf, all_versions = FALSE)
+}
+\arguments{
+\item{epi_archive}{An \code{epi_archive} object}
+
+\item{max_version}{Version specifying the max version to permit in the
+snapshot. That is, the snapshot will comprise the unique rows of the
+current archive data that represent the most up-to-date signal values, as
+of the specified \code{max_version} (and whose \code{time_value}s are at least
+\code{min_time_value}).}
+
+\item{min_time_value}{Time value specifying the min \code{time_value} to permit in
+the snapshot. Default is \code{-Inf}, which effectively means that there is no
+minimum considered.}
+
+\item{all_versions}{Boolean; If \code{all_versions = TRUE}, then the output will be in
+\code{epi_archive} format, and contain rows in the specified \code{time_value} range
+having \code{version <= max_version}. The resulting object will cover a
+potentially narrower \code{version} and \code{time_value} range than \code{x}, depending
+on user-provided arguments. Otherwise, there will be one row in the output
+for the \code{max_version} of each \code{time_value}. Default is \code{FALSE}.}
+}
+\description{
+Generates a snapshot in \code{epi_df} format as of a given version.
+See the documentation for the wrapper function \code{\link[=epix_as_of]{epix_as_of()}} for
+details. The parameter descriptions below are copied from there
+}
diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd
index 6a25b2af..efe5d5ba 100644
--- a/man/epi_archive.Rd
+++ b/man/epi_archive.Rd
@@ -1,9 +1,14 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/archive.R
+% Please edit documentation in R/archive.R, R/archive_new.R
 \name{epi_archive}
 \alias{epi_archive}
 \title{\code{epi_archive} object}
 \description{
+An \code{epi_archive} is an R6 class which contains a data table
+along with several relevant pieces of metadata. The data table can be seen
+as the full archive (version history) for some signal variables of
+interest.
+
 An \code{epi_archive} is an R6 class which contains a data table
 along with several relevant pieces of metadata. The data table can be seen
 as the full archive (version history) for some signal variables of
@@ -49,6 +54,56 @@ represent potential update data that we do not yet have access to; or in
 version in which it was first released, or if no version of that
 observation appears in the archive data at all.
 
+\strong{A word of caution:} R6 objects, unlike most other objects in R, have
+reference semantics. A primary consequence of this is that objects are not
+copied when modified. You can read more about this in Hadley Wickham's
+\href{https://adv-r.hadley.nz/r6.html#r6-semantics}{Advanced R} book. In order
+to construct a modified archive while keeping the original intact, first
+make a clone using the \verb{$clone} method, then overwrite the clone's \code{DT}
+field with \code{data.table::copy(clone$DT)}, and finally perform the
+modifications on the clone.
+
+epi archive
+
+An \code{epi_archive} is an R6 class which contains a data table \code{DT}, of
+class \code{data.table} from the \code{data.table} package, with (at least) the
+following columns:
+\itemize{
+\item \code{geo_value}: the geographic value associated with each row of measurements.
+\item \code{time_value}: the time value associated with each row of measurements.
+\item \code{version}: the time value specifying the version for each row of
+measurements. For example, if in a given row the \code{version} is January 15,
+2022 and \code{time_value} is January 14, 2022, then this row contains the
+measurements of the data for January 14, 2022 that were available one day
+later.
+}
+
+The data table \code{DT} has key variables \code{geo_value}, \code{time_value}, \code{version},
+as well as any others (these can be specified when instantiating the
+\code{epi_archive} object via the \code{other_keys} argument, and/or set by operating
+on \code{DT} directly). Refer to the documentation for \code{\link[=as_epi_archive]{as_epi_archive()}} for
+information and examples of relevant parameter names for an \code{epi_archive} object.
+Note that there can only be a single row per unique combination of
+key variables, and thus the key variables are critical for figuring out how
+to generate a snapshot of data from the archive, as of a given version.
+
+In general, the last version of each observation is carried forward (LOCF) to
+fill in data between recorded versions, and between the last recorded
+update and the \code{versions_end}. One consequence is that the \code{DT}
+doesn't have to contain a full snapshot of every version (although this
+generally works), but can instead contain only the rows that are new or
+changed from the previous version (see \code{compactify}, which does this
+automatically). Currently, deletions must be represented as revising a row
+to a special state (e.g., making the entries \code{NA} or including a special
+column that flags the data as removed and performing some kind of
+post-processing), and the archive is unaware of what this state is. Note
+that \code{NA}s \emph{can} be introduced by \code{epi_archive} methods for other reasons,
+e.g., in \code{\link{epix_fill_through_version}} and \code{\link{epix_merge}}, if requested, to
+represent potential update data that we do not yet have access to; or in
+\code{\link{epix_merge}} to represent the "value" of an observation before the
+version in which it was first released, or if no version of that
+observation appears in the archive data at all.
+
 \strong{A word of caution:} R6 objects, unlike most other objects in R, have
 reference semantics. A primary consequence of this is that objects are not
 copied when modified. You can read more about this in Hadley Wickham's
@@ -60,6 +115,22 @@ modifications on the clone.
 }
 \section{Metadata}{
 
+The following pieces of metadata are included as fields in an \code{epi_archive}
+object:
+\itemize{
+\item \code{geo_type}: the type for the geo values.
+\item \code{time_type}: the type for the time values.
+\item \code{additional_metadata}: list of additional metadata for the data archive.
+}
+
+Unlike an \code{epi_df} object, metadata for an \code{epi_archive} object \code{x} can be
+accessed (and altered) directly, as in \code{x$geo_type} or \code{x$time_type},
+etc. Like an \code{epi_df} object, the \code{geo_type} and \code{time_type} fields in the
+metadata of an \code{epi_archive} object are not currently used by any
+downstream functions in the \code{epiprocess} package, and serve only as useful
+bits of information to convey about the data set at hand.
+
+
 The following pieces of metadata are included as fields in an \code{epi_archive}
 object:
 \itemize{
@@ -78,6 +149,13 @@ bits of information to convey about the data set at hand.
 
 \section{Generating Snapshots}{
 
+An \code{epi_archive} object can be used to generate a snapshot of the data in
+\code{epi_df} format, which represents the most up-to-date values of the signal
+variables, as of the specified version. This is accomplished by calling the
+\code{as_of()} method for an \code{epi_archive} object \code{x}. More details on this
+method are documented in the wrapper function \code{\link[=epix_as_of]{epix_as_of()}}.
+
+
 An \code{epi_archive} object can be used to generate a snapshot of the data in
 \code{epi_df} format, which represents the most up-to-date values of the signal
 variables, as of the specified version. This is accomplished by calling the
@@ -87,6 +165,16 @@ method are documented in the wrapper function \code{\link[=epix_as_of]{epix_as_o
 
 \section{Sliding Computations}{
 
+We can run a sliding computation over an \code{epi_archive} object, much like
+\code{epi_slide()} does for an \code{epi_df} object. This is accomplished by calling
+the \code{slide()} method for an \code{epi_archive} object, which works similarly to
+the way \code{epi_slide()} works for an \code{epi_df} object, but with one key
+difference: it is version-aware. That is, for an \code{epi_archive} object, the
+sliding computation at any given reference time point t is performed on
+\strong{data that would have been available as of t}. More details on \code{slide()}
+are documented in the wrapper function \code{\link[=epix_slide]{epix_slide()}}.
+
+
 We can run a sliding computation over an \code{epi_archive} object, much like
 \code{epi_slide()} does for an \code{epi_df} object. This is accomplished by calling
 the \code{slide()} method for an \code{epi_archive} object, which works similarly to
@@ -109,6 +197,22 @@ tib <- tibble::tibble(
   value = rnorm(10, mean = 2, sd = 1)
 )
 
+toy_epi_archive <- tib \%>\% epi_archive$new(
+  geo_type = "state",
+  time_type = "day"
+)
+toy_epi_archive
+tib <- tibble::tibble(
+  geo_value = rep(c("ca", "hi"), each = 5),
+  time_value = rep(seq(as.Date("2020-01-01"),
+    by = 1, length.out = 5
+  ), times = 2),
+  version = rep(seq(as.Date("2020-01-02"),
+    by = 1, length.out = 5
+  ), times = 2),
+  value = rnorm(10, mean = 2, sd = 1)
+)
+
 toy_epi_archive <- tib \%>\% epi_archive$new(
   geo_type = "state",
   time_type = "day"
diff --git a/man/epix_as_of2.Rd b/man/epix_as_of2.Rd
new file mode 100644
index 00000000..6c3db717
--- /dev/null
+++ b/man/epix_as_of2.Rd
@@ -0,0 +1,96 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_archive_new.R
+\name{epix_as_of2}
+\alias{epix_as_of2}
+\title{Generate a snapshot from an \code{epi_archive} object}
+\usage{
+epix_as_of2(
+  epi_archive,
+  max_version,
+  min_time_value = -Inf,
+  all_versions = FALSE
+)
+}
+\arguments{
+\item{max_version}{Time value specifying the max version to permit in the
+snapshot. That is, the snapshot will comprise the unique rows of the
+current archive data that represent the most up-to-date signal values, as
+of the specified \code{max_version} (and whose time values are at least
+\code{min_time_value}.)}
+
+\item{min_time_value}{Time value specifying the min time value to permit in
+the snapshot. Default is \code{-Inf}, which effectively means that there is no
+minimum considered.}
+
+\item{all_versions}{If \code{all_versions = TRUE}, then the output will be in
+\code{epi_archive} format, and contain rows in the specified \code{time_value} range
+having \code{version <= max_version}. The resulting object will cover a
+potentially narrower \code{version} and \code{time_value} range than \code{x}, depending
+on user-provided arguments. Otherwise, there will be one row in the output
+for the \code{max_version} of each \code{time_value}. Default is \code{FALSE}.}
+
+\item{x}{An \code{epi_archive} object}
+}
+\value{
+An \code{epi_df} object.
+}
+\description{
+Generates a snapshot in \code{epi_df} format from an \code{epi_archive} object, as of a
+given version. See the \href{https://cmu-delphi.github.io/epiprocess/articles/archive.html}{archive vignette} for
+examples.
+}
+\details{
+This is simply a wrapper around the \code{as_of()} method of the
+\code{epi_archive} class, so if \code{x} is an \code{epi_archive} object, then:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{epix_as_of(x, max_version = v)
+}\if{html}{\out{</div>}}
+
+is equivalent to:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{x$as_of(max_version = v)
+}\if{html}{\out{</div>}}
+
+Mutation and aliasing: \code{epix_as_of} and \verb{$as_of} will not mutate the input
+archives, but may in some edge cases alias parts of the inputs, so copy the
+outputs if needed before using mutating operations like \code{data.table}'s \verb{:=}
+operator. Currently, the only situation where there is potentially aliasing
+is of the \code{DT} in edge cases with \code{all_versions = TRUE}, but this may change
+in the future.
+}
+\examples{
+# warning message of data latency shown
+epix_as_of2(
+  x = archive_cases_dv_subset,
+  max_version = max(archive_cases_dv_subset$DT$version)
+)
+
+
+range(archive_cases_dv_subset$DT$version) # 2020-06-02 -- 2021-12-01
+
+epix_as_of2(
+  x = archive_cases_dv_subset,
+  max_version = as.Date("2020-06-12")
+)
+
+# When fetching a snapshot as of the latest version with update data in the
+# archive, a warning is issued by default, as this update data might not yet
+# be finalized (for example, if data versions are labeled with dates, these
+# versions might be overwritten throughout the corresponding days with
+# additional data or "hotfixes" of erroroneous data; when we build an archive
+# based on database queries, the latest available update might still be
+# subject to change, but previous versions should be finalized). We can
+# muffle such warnings with the following pattern:
+withCallingHandlers(
+  {
+    epix_as_of2(
+      x = archive_cases_dv_subset,
+      max_version = max(archive_cases_dv_subset$DT$version)
+    )
+  },
+  epiprocess__snapshot_as_of_clobberable_version = function(wrn) invokeRestart("muffleWarning")
+)
+# Since R 4.0, there is a `globalCallingHandlers` function that can be used
+# to globally toggle these warnings.
+
+}
diff --git a/man/epix_fill_through_version2.Rd b/man/epix_fill_through_version2.Rd
new file mode 100644
index 00000000..7389388a
--- /dev/null
+++ b/man/epix_fill_through_version2.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_archive_new.R
+\name{epix_fill_through_version2}
+\alias{epix_fill_through_version2}
+\title{\code{epi_archive} with unobserved history filled in (won't mutate, might alias)}
+\usage{
+epix_fill_through_version2(
+  epi_archive,
+  fill_versions_end,
+  how = c("na", "locf")
+)
+}
+\arguments{
+\item{fill_versions_end}{Length-1, same class&type as \code{x$version}: the
+version through which to fill in missing version history; this will be the
+result's \verb{$versions_end} unless it already had a later
+\verb{$versions_end}.}
+
+\item{how}{Optional; \code{"na"} or \code{"locf"}: \code{"na"} will fill in any missing
+required version history with \code{NA}s, by inserting (if necessary) an update
+immediately after the current \verb{$versions_end} that revises all
+existing measurements to be \code{NA} (this is only supported for \code{version}
+classes with a \code{next_after} implementation); \code{"locf"} will fill in missing
+version history with the last version of each observation carried forward
+(LOCF), by leaving the update \verb{$DT} alone (other \code{epi_archive} methods are
+based on LOCF).  Default is \code{"na"}.}
+
+\item{x}{An \code{epi_archive}}
+}
+\value{
+An \code{epi_archive}
+}
+\description{
+Sometimes, due to upstream data pipeline issues, we have to work with a
+version history that isn't completely up to date, but with functions that
+expect archives that are completely up to date, or equally as up-to-date as
+another archive. This function provides one way to approach such mismatches:
+pretend that we've "observed" additional versions, filling in these versions
+with NAs or extrapolated values.
+}
+\details{
+'\code{epix_fill_through_version} will not mutate its \code{x} argument, but its result
+might alias fields of \code{x} (e.g., mutating the result's \code{DT} might mutate
+\code{x$DT}). The R6 method variant, \code{x$fill_through_version}, will mutate \code{x} to
+give the result, but might reseat its fields (e.g., references to the old
+\code{x$DT} might not be updated by this function or subsequent operations on
+\code{x}), and returns the updated \code{x} \link[base:invisible]{invisibly}.
+}
diff --git a/man/epix_merge2.Rd b/man/epix_merge2.Rd
new file mode 100644
index 00000000..a42e53e4
--- /dev/null
+++ b/man/epix_merge2.Rd
@@ -0,0 +1,73 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_archive_new.R
+\name{epix_merge2}
+\alias{epix_merge2}
+\title{Merge two \code{epi_archive} objects}
+\usage{
+epix_merge2(
+  x,
+  y,
+  sync = c("forbid", "na", "locf", "truncate"),
+  compactify = TRUE
+)
+}
+\arguments{
+\item{x, y}{Two \code{epi_archive} objects to join together.}
+
+\item{sync}{Optional; \code{"forbid"}, \code{"na"}, \code{"locf"}, or \code{"truncate"}; in the
+case that \code{x$versions_end} doesn't match \code{y$versions_end}, what do we do?:
+\code{"forbid"}: emit an error; "na": use \code{max(x$versions_end, y$versions_end)}
+as the result's \code{versions_end}, but ensure that, if we request a snapshot
+as of a version after \code{min(x$versions_end, y$versions_end)}, the
+observation columns from the less up-to-date archive will be all NAs (i.e.,
+imagine there was an update immediately after its \code{versions_end} which
+revised all observations to be \code{NA}); \code{"locf"}: use \code{max(x$versions_end, y$versions_end)} as the result's \code{versions_end}, allowing the last version
+of each observation to be carried forward to extrapolate unavailable
+versions for the less up-to-date input archive (i.e., imagining that in the
+less up-to-date archive's data set remained unchanged between its actual
+\code{versions_end} and the other archive's \code{versions_end}); or \code{"truncate"}:
+use \code{min(x$versions_end, y$versions_end)} as the result's \code{versions_end},
+and discard any rows containing update rows for later versions.}
+
+\item{compactify}{Optional; \code{TRUE}, \code{FALSE}, or \code{NULL}; should the result be
+compactified? See \code{\link{as_epi_archive}} for an explanation of what this means.
+Default here is \code{TRUE}.}
+}
+\value{
+the resulting \code{epi_archive}
+}
+\description{
+Merges two \code{epi_archive}s that share a common \code{geo_value}, \code{time_value}, and
+set of key columns. When they also share a common \code{versions_end},
+using \verb{$as_of} on the result should be the same as using \verb{$as_of} on \code{x} and
+\code{y} individually, then performing a full join of the \code{DT}s on the non-version
+key columns (potentially consolidating multiple warnings about clobberable
+versions). If the \code{versions_end} values differ, the
+\code{sync} parameter controls what is done.
+}
+\details{
+This function, \code{\link{epix_merge}}, does not mutate its inputs and will not alias
+either archive's \code{DT}, but may alias other fields; \code{x$merge} will overwrite
+\code{x} with the result of the merge, reseating its \code{DT} and several other fields
+(making them point to different objects), but avoiding mutation of the
+contents of the old \code{DT} (only relevant if you have another reference to the
+old \code{DT} in another object).
+
+In all cases, \code{additional_metadata} will be an empty list, and
+\code{clobberable_versions_start} will be set to the earliest version that could
+be clobbered in either input archive.
+}
+\examples{
+# create two example epi_archive datasets
+x <- archive_cases_dv_subset$DT \%>\%
+  dplyr::select(geo_value, time_value, version, case_rate_7d_av) \%>\%
+  as_epi_archive(compactify = TRUE)
+y <- archive_cases_dv_subset$DT \%>\%
+  dplyr::select(geo_value, time_value, version, percent_cli) \%>\%
+  as_epi_archive(compactify = TRUE)
+# merge results stored in a third object:
+xy <- epix_merge(x, y)
+# vs. mutating x to hold the merge result:
+x$merge(y)
+
+}
diff --git a/man/epix_slide2.Rd b/man/epix_slide2.Rd
new file mode 100644
index 00000000..71d3a11c
--- /dev/null
+++ b/man/epix_slide2.Rd
@@ -0,0 +1,283 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_archive_new.R
+\name{epix_slide2}
+\alias{epix_slide2}
+\title{Slide a function over variables in an \code{epi_archive} or \code{grouped_epi_archive}}
+\usage{
+epix_slide2(
+  x,
+  f,
+  ...,
+  before,
+  ref_time_values,
+  time_step,
+  new_col_name = "slide_value",
+  as_list_col = FALSE,
+  names_sep = "_",
+  all_versions = FALSE
+)
+}
+\arguments{
+\item{x}{An \code{\link{epi_archive}} or \code{\link{grouped_epi_archive}} object. If ungrouped,
+all data in \code{x} will be treated as part of a single data group.}
+
+\item{f}{Function, formula, or missing; together with \code{...} specifies the
+computation to slide. To "slide" means to apply a computation over a
+sliding (a.k.a. "rolling") time window for each data group. The window is
+determined by the \code{before} parameter described below. One time step is
+typically one day or one week; see \code{\link{epi_slide}} details for more
+explanation. If a function, \code{f} must take an \code{epi_df} with the same
+column names as the archive's \code{DT}, minus the \code{version} column; followed
+by a one-row tibble containing the values of the grouping variables for
+the associated group; followed by a reference time value, usually as a
+\code{Date} object; followed by any number of named arguments. If a formula,
+\code{f} can operate directly on columns accessed via \code{.x$var} or \code{.$var}, as
+in \code{~ mean (.x$var)} to compute a mean of a column \code{var} for each
+group-\code{ref_time_value} combination. The group key can be accessed via
+\code{.y} or \code{.group_key}, and the reference time value can be accessed via
+\code{.z} or \code{.ref_time_value}. If \code{f} is missing, then \code{...} will specify the
+computation.}
+
+\item{...}{Additional arguments to pass to the function or formula specified
+via \code{f}. Alternatively, if \code{f} is missing, then \code{...} is interpreted as an
+expression for tidy evaluation; in addition to referring to columns
+directly by name, the expression has access to \code{.data} and \code{.env} pronouns
+as in \code{dplyr} verbs, and can also refer to the \code{.group_key} and
+\code{.ref_time_value}. See details of \code{\link{epi_slide}}.}
+
+\item{before}{How far \code{before} each \code{ref_time_value} should the sliding
+window extend? If provided, should be a single, non-NA,
+\link[vctrs:vec_cast]{integer-compatible} number of time steps. This window
+endpoint is inclusive. For example, if \code{before = 7}, and one time step is
+one day, then to produce a value for a \code{ref_time_value} of January 8, we
+apply the given function or formula to data (for each group present) with
+\code{time_value}s from January 1 onward, as they were reported on January 8.
+For typical disease surveillance sources, this will not include any data
+with a \code{time_value} of January 8, and, depending on the amount of reporting
+latency, may not include January 7 or even earlier \code{time_value}s. (If
+instead the archive were to hold nowcasts instead of regular surveillance
+data, then we would indeed expect data for \code{time_value} January 8. If it
+were to hold forecasts, then we would expect data for \code{time_value}s after
+January 8, and the sliding window would extend as far after each
+\code{ref_time_value} as needed to include all such \code{time_value}s.)}
+
+\item{ref_time_values}{Reference time values / versions for sliding
+computations; each element of this vector serves both as the anchor point
+for the \code{time_value} window for the computation and the \code{max_version}
+\code{as_of} which we fetch data in this window. If missing, then this will set
+to a regularly-spaced sequence of values set to cover the range of
+\code{version}s in the \code{DT} plus the \code{versions_end}; the spacing of values will
+be guessed (using the GCD of the skips between values).}
+
+\item{time_step}{Optional function used to define the meaning of one time
+step, which if specified, overrides the default choice based on the
+\code{time_value} column. This function must take a positive integer and return
+an object of class \code{lubridate::period}. For example, we can use \code{time_step = lubridate::hours} in order to set the time step to be one hour (this
+would only be meaningful if \code{time_value} is of class \code{POSIXct}).}
+
+\item{new_col_name}{String indicating the name of the new column that will
+contain the derivative values. Default is "slide_value"; note that setting
+\code{new_col_name} equal to an existing column name will overwrite this column.}
+
+\item{as_list_col}{Should the slide results be held in a list column, or be
+\link[tidyr:chop]{unchopped}/\link[tidyr:unnest]{unnested}? Default is \code{FALSE},
+in which case a list object returned by \code{f} would be unnested (using
+\code{\link[tidyr:unnest]{tidyr::unnest()}}), and, if the slide computations output data frames,
+the names of the resulting columns are given by prepending \code{new_col_name}
+to the names of the list elements.}
+
+\item{names_sep}{String specifying the separator to use in \code{tidyr::unnest()}
+when \code{as_list_col = FALSE}. Default is "_". Using \code{NULL} drops the prefix
+from \code{new_col_name} entirely.}
+
+\item{all_versions}{(Not the same as \code{all_rows} parameter of \code{epi_slide}.) If
+\code{all_versions = TRUE}, then \code{f} will be passed the version history (all
+\code{version <= ref_time_value}) for rows having \code{time_value} between
+\code{ref_time_value - before} and \code{ref_time_value}. Otherwise, \code{f} will be
+passed only the most recent \code{version} for every unique \code{time_value}.
+Default is \code{FALSE}.}
+}
+\value{
+A tibble whose columns are: the grouping variables, \code{time_value},
+containing the reference time values for the slide computation, and a
+column named according to the \code{new_col_name} argument, containing the slide
+values.
+}
+\description{
+Slides a given function over variables in an \code{epi_archive} object. This
+behaves similarly to \code{epi_slide()}, with the key exception that it is
+version-aware: the sliding computation at any given reference time t is
+performed on \strong{data that would have been available as of t}. See the
+\href{https://cmu-delphi.github.io/epiprocess/articles/archive.html}{archive vignette} for
+examples.
+}
+\details{
+A few key distinctions between the current function and \code{epi_slide()}:
+\enumerate{
+\item In \code{f} functions for \code{epix_slide}, one should not assume that the input
+data to contain any rows with \code{time_value} matching the computation's
+\code{ref_time_value} (accessible via \verb{attributes(<data>)$metadata$as_of}); for
+typical epidemiological surveillance data, observations pertaining to a
+particular time period (\code{time_value}) are first reported \code{as_of} some
+instant after that time period has ended.
+\item \code{epix_slide()} doesn't accept an \code{after} argument; its windows extend
+from \code{before} time steps before a given \code{ref_time_value} through the last
+\code{time_value} available as of version \code{ref_time_value} (typically, this
+won't include \code{ref_time_value} itself, as observations about a particular
+time interval (e.g., day) are only published after that time interval
+ends); \code{epi_slide} windows extend from \code{before} time steps before a
+\code{ref_time_value} through \code{after} time steps after \code{ref_time_value}.
+\item The input class and columns are similar but different: \code{epix_slide}
+(with the default \code{all_versions=FALSE}) keeps all columns and the
+\code{epi_df}-ness of the first argument to each computation; \code{epi_slide} only
+provides the grouping variables in the second input, and will convert the
+first input into a regular tibble if the grouping variables include the
+essential \code{geo_value} column. (With \code{all_versions=TRUE}, \code{epix_slide} will
+will provide an \code{epi_archive} rather than an \code{epi-df} to each
+computation.)
+\item The output class and columns are similar but different: \code{epix_slide()}
+returns a tibble containing only the grouping variables, \code{time_value}, and
+the new column(s) from the slide computations, whereas \code{epi_slide()}
+returns an \code{epi_df} with all original variables plus the new columns from
+the slide computations. (Both will mirror the grouping or ungroupedness of
+their input, with one exception: \code{epi_archive}s can have trivial
+(zero-variable) groupings, but these will be dropped in \code{epix_slide}
+results as they are not supported by tibbles.)
+\item There are no size stability checks or element/row recycling to maintain
+size stability in \code{epix_slide}, unlike in \code{epi_slide}. (\code{epix_slide} is
+roughly analogous to \code{\link[dplyr:group_map]{dplyr::group_modify}}, while \code{epi_slide} is roughly
+analogous to \code{dplyr::mutate} followed by \code{dplyr::arrange}) This is detailed
+in the "advanced" vignette.
+\item \code{all_rows} is not supported in \code{epix_slide}; since the slide
+computations are allowed more flexibility in their outputs than in
+\code{epi_slide}, we can't guess a good representation for missing computations
+for excluded group-\code{ref_time_value} pairs.
+\item The \code{ref_time_values} default for \code{epix_slide} is based on making an
+evenly-spaced sequence out of the \code{version}s in the \code{DT} plus the
+\code{versions_end}, rather than the \code{time_value}s.
+}
+
+Apart from the above distinctions, the interfaces between \code{epix_slide()} and
+\code{epi_slide()} are the same.
+
+Furthermore, the current function can be considerably slower than
+\code{epi_slide()}, for two reasons: (1) it must repeatedly fetch
+properly-versioned snapshots from the data archive (via its \code{as_of()}
+method), and (2) it performs a "manual" sliding of sorts, and does not
+benefit from the highly efficient \code{slider} package. For this reason, it
+should never be used in place of \code{epi_slide()}, and only used when
+version-aware sliding is necessary (as it its purpose).
+
+Finally, this is simply a wrapper around the \code{slide()} method of the
+\code{epi_archive} and \code{grouped_epi_archive} classes, so if \code{x} is an
+object of either of these classes, then:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{epix_slide(x, new_var = comp(old_var), before = 119)
+}\if{html}{\out{</div>}}
+
+is equivalent to:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{x$slide(new_var = comp(old_var), before = 119)
+}\if{html}{\out{</div>}}
+
+Mutation and aliasing: \code{epix_slide} and \verb{$slide} will not perform in-place
+mutation of the input archives on their own. In some edge cases the inputs it
+feeds to the slide computations may alias parts of the input archive, so copy
+the slide computation inputs if needed before using mutating operations like
+\code{data.table}'s \verb{:=} operator. Similarly, in some edge cases, the output of
+the slide operation may alias parts of the input archive, so similarly, make
+sure to clone and/or copy appropriately before using in-place mutation.
+}
+\examples{
+library(dplyr)
+
+# Reference time points for which we want to compute slide values:
+ref_time_values <- seq(as.Date("2020-06-01"),
+  as.Date("2020-06-15"),
+  by = "1 day"
+)
+
+# A simple (but not very useful) example (see the archive vignette for a more
+# realistic one):
+archive_cases_dv_subset \%>\%
+  group_by(geo_value) \%>\%
+  epix_slide(
+    f = ~ mean(.x$case_rate_7d_av),
+    before = 2,
+    ref_time_values = ref_time_values,
+    new_col_name = "case_rate_7d_av_recent_av"
+  ) \%>\%
+  ungroup()
+# We requested time windows that started 2 days before the corresponding time
+# values. The actual number of `time_value`s in each computation depends on
+# the reporting latency of the signal and `time_value` range covered by the
+# archive (2020-06-01 -- 2021-11-30 in this example).  In this case, we have
+# * 0 `time_value`s, for ref time 2020-06-01 --> the result is automatically
+#                                                discarded
+# * 1 `time_value`, for ref time 2020-06-02
+# * 2 `time_value`s, for the rest of the results
+# * never the 3 `time_value`s we would get from `epi_slide`, since, because
+#   of data latency, we'll never have an observation
+#   `time_value == ref_time_value` as of `ref_time_value`.
+# The example below shows this type of behavior in more detail.
+
+# Examining characteristics of the data passed to each computation with
+# `all_versions=FALSE`.
+archive_cases_dv_subset \%>\%
+  group_by(geo_value) \%>\%
+  epix_slide(
+    function(x, gk, rtv) {
+      tibble(
+        time_range = if (nrow(x) == 0L) {
+          "0 `time_value`s"
+        } else {
+          sprintf("\%s -- \%s", min(x$time_value), max(x$time_value))
+        },
+        n = nrow(x),
+        class1 = class(x)[[1L]]
+      )
+    },
+    before = 5, all_versions = FALSE,
+    ref_time_values = ref_time_values, names_sep = NULL
+  ) \%>\%
+  ungroup() \%>\%
+  arrange(geo_value, time_value)
+
+# --- Advanced: ---
+
+# `epix_slide` with `all_versions=FALSE` (the default) applies a
+# version-unaware computation to several versions of the data. We can also
+# use `all_versions=TRUE` to apply a version-*aware* computation to several
+# versions of the data, again looking at characteristics of the data passed
+# to each computation. In this case, each computation should expect an
+# `epi_archive` containing the relevant version data:
+
+archive_cases_dv_subset \%>\%
+  group_by(geo_value) \%>\%
+  epix_slide(
+    function(x, gk, rtv) {
+      tibble(
+        versions_start = if (nrow(x$DT) == 0L) {
+          "NA (0 rows)"
+        } else {
+          toString(min(x$DT$version))
+        },
+        versions_end = x$versions_end,
+        time_range = if (nrow(x$DT) == 0L) {
+          "0 `time_value`s"
+        } else {
+          sprintf("\%s -- \%s", min(x$DT$time_value), max(x$DT$time_value))
+        },
+        n = nrow(x$DT),
+        class1 = class(x)[[1L]]
+      )
+    },
+    before = 5, all_versions = TRUE,
+    ref_time_values = ref_time_values, names_sep = NULL
+  ) \%>\%
+  ungroup() \%>\%
+  # Focus on one geo_value so we can better see the columns above:
+  filter(geo_value == "ca") \%>\%
+  select(-geo_value)
+
+}
diff --git a/man/epix_truncate_versions_after.Rd b/man/epix_truncate_versions_after.Rd
index 8f741418..f30be07f 100644
--- a/man/epix_truncate_versions_after.Rd
+++ b/man/epix_truncate_versions_after.Rd
@@ -1,9 +1,12 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/methods-epi_archive.R
+% Please edit documentation in R/methods-epi_archive.R,
+%   R/methods-epi_archive_new.R
 \name{epix_truncate_versions_after}
 \alias{epix_truncate_versions_after}
 \title{Filter an \code{epi_archive} object to keep only older versions}
 \usage{
+epix_truncate_versions_after(x, max_version)
+
 epix_truncate_versions_after(x, max_version)
 }
 \arguments{
@@ -15,9 +18,14 @@ current archive data having \code{version} less than or equal to the
 specified \code{max_version}}
 }
 \value{
+An \code{epi_archive} object
+
 An \code{epi_archive} object
 }
 \description{
+Generates a filtered \code{epi_archive} from an \code{epi_archive} object, keeping
+only rows with \code{version} falling on or before a specified date.
+
 Generates a filtered \code{epi_archive} from an \code{epi_archive} object, keeping
 only rows with \code{version} falling on or before a specified date.
 }
diff --git a/man/epix_truncate_versions_after.grouped_epi_archive2.Rd b/man/epix_truncate_versions_after.grouped_epi_archive2.Rd
new file mode 100644
index 00000000..5fba48fb
--- /dev/null
+++ b/man/epix_truncate_versions_after.grouped_epi_archive2.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/grouped_archive_new.R
+\name{epix_truncate_versions_after.grouped_epi_archive2}
+\alias{epix_truncate_versions_after.grouped_epi_archive2}
+\title{Truncate versions after a given version, grouped}
+\usage{
+\method{epix_truncate_versions_after}{grouped_epi_archive2}(grouped_epi_archive, max_version)
+}
+\description{
+Truncate versions after a given version, grouped
+}
diff --git a/man/fill_through_version.epi_archive2.Rd b/man/fill_through_version.epi_archive2.Rd
new file mode 100644
index 00000000..48afb864
--- /dev/null
+++ b/man/fill_through_version.epi_archive2.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{fill_through_version.epi_archive2}
+\alias{fill_through_version.epi_archive2}
+\title{Fill through version}
+\usage{
+\method{fill_through_version}{epi_archive2}(epi_archive, fill_versions_end, how = c("na", "locf"))
+}
+\arguments{
+\item{epi_archive}{an \code{epi_archive} object}
+
+\item{fill_versions_end}{as in \code{\link{epix_fill_through_version}}}
+
+\item{how}{as in \code{\link{epix_fill_through_version}}}
+}
+\description{
+Fill in unobserved history using requested scheme by mutating
+the given object and potentially reseating its fields. See
+\code{\link{epix_fill_through_version}}, which doesn't mutate the input archive but
+might alias its fields.
+}
diff --git a/man/group_by.epi_archive.Rd b/man/group_by.epi_archive.Rd
index 5e867bf3..f157e834 100644
--- a/man/group_by.epi_archive.Rd
+++ b/man/group_by.epi_archive.Rd
@@ -1,8 +1,14 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/methods-epi_archive.R, R/grouped_epi_archive.R
+% Please edit documentation in R/methods-epi_archive.R, R/grouped_archive_new.R,
+%   R/grouped_epi_archive.R
 \name{group_by.epi_archive}
 \alias{group_by.epi_archive}
 \alias{grouped_epi_archive}
+\alias{group_by.grouped_epi_archive2}
+\alias{group_by_drop_default.grouped_epi_archive2}
+\alias{groups.grouped_epi_archive2}
+\alias{ungroup.grouped_epi_archive2}
+\alias{is_grouped_epi_archive2}
 \alias{group_by.grouped_epi_archive}
 \alias{groups.grouped_epi_archive}
 \alias{ungroup.grouped_epi_archive}
@@ -12,6 +18,21 @@
 \usage{
 \method{group_by}{epi_archive}(.data, ..., .add = FALSE, .drop = dplyr::group_by_drop_default(.data))
 
+\method{group_by}{grouped_epi_archive2}(
+  grouped_epi_archive,
+  ...,
+  .add = FALSE,
+  .drop = dplyr::group_by_drop_default(grouped_epi_archive)
+)
+
+\method{group_by_drop_default}{grouped_epi_archive2}(grouped_epi_archive)
+
+\method{groups}{grouped_epi_archive2}(grouped_epi_archive)
+
+\method{ungroup}{grouped_epi_archive2}(grouped_epi_archive, ...)
+
+is_grouped_epi_archive2(x)
+
 \method{group_by}{grouped_epi_archive}(.data, ..., .add = FALSE, .drop = dplyr::group_by_drop_default(.data))
 
 \method{groups}{grouped_epi_archive}(x)
diff --git a/man/group_by.epi_archive2.Rd b/man/group_by.epi_archive2.Rd
new file mode 100644
index 00000000..3191b134
--- /dev/null
+++ b/man/group_by.epi_archive2.Rd
@@ -0,0 +1,147 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{group_by.epi_archive2}
+\alias{group_by.epi_archive2}
+\alias{grouped_epi_archive}
+\title{\code{group_by} and related methods for \code{epi_archive}, \code{grouped_epi_archive}}
+\usage{
+\method{group_by}{epi_archive2}(
+  epi_archive,
+  ...,
+  .add = FALSE,
+  .drop = dplyr::group_by_drop_default(epi_archive)
+)
+}
+\arguments{
+\item{...}{Similar to \code{\link[dplyr:group_by]{dplyr::group_by}} (see "Details:" for edge cases);
+\itemize{
+\item For \code{group_by}: unquoted variable name(s) or other
+\link[dplyr:dplyr_data_masking]{"data masking"} expression(s). It's possible to
+use \code{\link[dplyr:mutate]{dplyr::mutate}}-like syntax here to calculate new columns on which to
+perform grouping, but note that, if you are regrouping an already-grouped
+\code{.data} object, the calculations will be carried out ignoring such grouping
+(same as \link[dplyr:group_by]{in dplyr}).
+\item For \code{ungroup}: either
+\itemize{
+\item empty, in order to remove the grouping and output an \code{epi_archive}; or
+\item variable name(s) or other \link[dplyr:dplyr_tidy_select]{"tidy-select"}
+expression(s), in order to remove the matching variables from the list of
+grouping variables, and output another \code{grouped_epi_archive}.
+}
+}}
+
+\item{.add}{Boolean. If \code{FALSE}, the default, the output will be grouped by
+the variable selection from \code{...} only; if \code{TRUE}, the output will be
+grouped by the current grouping variables plus the variable selection from
+\code{...}.}
+
+\item{.drop}{As described in \code{\link[dplyr:group_by]{dplyr::group_by}}; determines treatment of
+factor columns.}
+
+\item{.data}{An \code{epi_archive} or \code{grouped_epi_archive}}
+
+\item{x}{For \code{groups} or \code{ungroup}: a \code{grouped_epi_archive}; for
+\code{is_grouped_epi_archive}: any object}
+
+\item{.tbl}{(For \code{group_by_drop_default}:) an \code{epi_archive} or
+\code{grouped_epi_archive} (\code{epi_archive} dispatches to the S3 default method;
+\code{grouped_epi_archive} dispatches its own S3 method)}
+}
+\description{
+\code{group_by} and related methods for \code{epi_archive}, \code{grouped_epi_archive}
+}
+\details{
+To match \code{dplyr}, \code{group_by} allows "data masking" (also referred to as
+"tidy evaluation") expressions \code{...}, not just column names, in a way similar
+to \code{mutate}. Note that replacing or removing key columns with these
+expressions is disabled.
+
+\code{archive \%>\% group_by()} and other expressions that group or regroup by zero
+columns (indicating that all rows should be treated as part of one large
+group) will output a \code{grouped_epi_archive}, in order to enable the use of
+\code{grouped_epi_archive} methods on the result. This is in slight contrast to
+the same operations on tibbles and grouped tibbles, which will \emph{not} output a
+\code{grouped_df} in these circumstances.
+
+Using \code{group_by} with \code{.add=FALSE} to override the existing grouping is
+disabled; instead, \code{ungroup} first then \code{group_by}.
+
+Mutation and aliasing: \code{group_by} tries to use a shallow copy of the \code{DT},
+introducing column-level aliasing between its input and its result. This
+doesn't follow the general model for most \code{data.table} operations, which
+seems to be that, given an nonaliased (i.e., unique) pointer to a
+\code{data.table} object, its pointers to its columns should also be nonaliased.
+If you mutate any of the columns of either the input or result, first ensure
+that it is fine if columns of the other are also mutated, but do not rely on
+such behavior to occur. Additionally, never perform mutation on the key
+columns at all (except for strictly increasing transformations), as this will
+invalidate sortedness assumptions about the rows.
+
+\code{group_by_drop_default} on (ungrouped) \code{epi_archive}s is expected to dispatch
+to \code{group_by_drop_default.default} (but there is a dedicated method for
+\code{grouped_epi_archive}s).
+}
+\examples{
+
+grouped_archive <- archive_cases_dv_subset \%>\% group_by(geo_value)
+
+# `print` for metadata and method listing:
+grouped_archive \%>\% print()
+
+# The primary use for grouping is to perform a grouped `epix_slide`:
+
+archive_cases_dv_subset \%>\%
+  group_by(geo_value) \%>\%
+  epix_slide(
+    f = ~ mean(.x$case_rate_7d_av),
+    before = 2,
+    ref_time_values = as.Date("2020-06-11") + 0:2,
+    new_col_name = "case_rate_3d_av"
+  ) \%>\%
+  ungroup()
+
+# -----------------------------------------------------------------
+
+# Advanced: some other features of dplyr grouping are implemented:
+
+library(dplyr)
+toy_archive <-
+  tribble(
+    ~geo_value, ~age_group, ~time_value, ~version, ~value,
+    "us", "adult", "2000-01-01", "2000-01-02", 121,
+    "us", "pediatric", "2000-01-02", "2000-01-03", 5, # (addition)
+    "us", "adult", "2000-01-01", "2000-01-03", 125, # (revision)
+    "us", "adult", "2000-01-02", "2000-01-03", 130 # (addition)
+  ) \%>\%
+  mutate(
+    age_group = ordered(age_group, c("pediatric", "adult")),
+    time_value = as.Date(time_value),
+    version = as.Date(version)
+  ) \%>\%
+  as_epi_archive(other_keys = "age_group")
+
+# The following are equivalent:
+toy_archive \%>\% group_by(geo_value, age_group)
+toy_archive \%>\%
+  group_by(geo_value) \%>\%
+  group_by(age_group, .add = TRUE)
+grouping_cols <- c("geo_value", "age_group")
+toy_archive \%>\% group_by(across(all_of(grouping_cols)))
+
+# And these are equivalent:
+toy_archive \%>\% group_by(geo_value)
+toy_archive \%>\%
+  group_by(geo_value, age_group) \%>\%
+  ungroup(age_group)
+
+# To get the grouping variable names as a `list` of `name`s (a.k.a. symbols):
+toy_archive \%>\%
+  group_by(geo_value) \%>\%
+  groups()
+
+toy_archive \%>\%
+  group_by(geo_value, age_group, .drop = FALSE) \%>\%
+  epix_slide(f = ~ sum(.x$value), before = 20) \%>\%
+  ungroup()
+
+}
diff --git a/man/is_epi_archive2.Rd b/man/is_epi_archive2.Rd
new file mode 100644
index 00000000..fd2f0a1f
--- /dev/null
+++ b/man/is_epi_archive2.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{is_epi_archive2}
+\alias{is_epi_archive2}
+\title{Test for \code{epi_archive} format}
+\usage{
+is_epi_archive2(x, grouped_okay = FALSE)
+}
+\arguments{
+\item{x}{An object.}
+
+\item{grouped_okay}{Optional; Boolean; should a \code{grouped_epi_archive} also
+count? Default is \code{FALSE}.}
+}
+\value{
+\code{TRUE} if the object inherits from \code{epi_archive}.
+}
+\description{
+Test for \code{epi_archive} format
+}
+\examples{
+is_epi_archive(jhu_csse_daily_subset) # FALSE (this is an epi_df, not epi_archive)
+is_epi_archive(archive_cases_dv_subset) # TRUE
+
+# By default, grouped_epi_archives don't count as epi_archives, as they may
+# support a different set of operations from regular `epi_archives`. This
+# behavior can be controlled by `grouped_okay`.
+grouped_archive <- archive_cases_dv_subset$group_by(geo_value)
+is_epi_archive(grouped_archive) # FALSE
+is_epi_archive(grouped_archive, grouped_okay = TRUE) # TRUE
+
+}
+\seealso{
+\code{\link{is_grouped_epi_archive}}
+}
diff --git a/man/max_version_with_row_in.Rd b/man/max_version_with_row_in.Rd
index cca554fa..6f0d35b3 100644
--- a/man/max_version_with_row_in.Rd
+++ b/man/max_version_with_row_in.Rd
@@ -1,18 +1,25 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/archive.R
+% Please edit documentation in R/archive.R, R/archive_new.R
 \name{max_version_with_row_in}
 \alias{max_version_with_row_in}
 \title{\code{max(x$version)}, with error if \code{x} has 0 rows}
 \usage{
+max_version_with_row_in(x)
+
 max_version_with_row_in(x)
 }
 \arguments{
 \item{x}{\code{x} argument of \code{\link{as_epi_archive}}}
 }
 \value{
+\code{max(x$version)} if it has any rows; raises error if it has 0 rows or
+an \code{NA} version value
+
 \code{max(x$version)} if it has any rows; raises error if it has 0 rows or
 an \code{NA} version value
 }
 \description{
+Exported to make defaults more easily copyable.
+
 Exported to make defaults more easily copyable.
 }
diff --git a/man/merge_epi_archive2.Rd b/man/merge_epi_archive2.Rd
new file mode 100644
index 00000000..dd1e671e
--- /dev/null
+++ b/man/merge_epi_archive2.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{merge_epi_archive2}
+\alias{merge_epi_archive2}
+\title{Merge epi archive}
+\usage{
+merge_epi_archive2(
+  x,
+  y,
+  sync = c("forbid", "na", "locf", "truncate"),
+  compactify = TRUE
+)
+}
+\arguments{
+\item{x}{as in \code{\link{epix_merge}}}
+
+\item{y}{as in \code{\link{epix_merge}}}
+
+\item{sync}{as in \code{\link{epix_merge}}}
+
+\item{compactify}{as in \code{\link{epix_merge}}}
+}
+\description{
+Merges another \code{epi_archive} with the current one, mutating the
+current one by reseating its \code{DT} and several other fields, but avoiding
+mutation of the old \code{DT}; returns the current archive
+\link[base:invisible]{invisibly}. See \code{\link{epix_merge}} for a full description
+of the non-R6-method version, which does not mutate either archive, and
+does not alias either archive's \code{DT}.a
+}
diff --git a/man/new_epi_archive2.Rd b/man/new_epi_archive2.Rd
new file mode 100644
index 00000000..52141190
--- /dev/null
+++ b/man/new_epi_archive2.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{new_epi_archive2}
+\alias{new_epi_archive2}
+\title{New epi archive}
+\usage{
+new_epi_archive2(
+  x,
+  geo_type = NULL,
+  time_type = NULL,
+  other_keys = NULL,
+  additional_metadata = NULL,
+  compactify = NULL,
+  clobberable_versions_start = NA,
+  versions_end = NULL
+)
+}
+\arguments{
+\item{x}{A data.frame, data.table, or tibble, with columns \code{geo_value},
+\code{time_value}, \code{version}, and then any additional number of columns.}
+
+\item{geo_type}{Type for the geo values. If missing, then the function will
+attempt to infer it from the geo values present; if this fails, then it
+will be set to "custom".}
+
+\item{time_type}{Type for the time values. If missing, then the function will
+attempt to infer it from the time values present; if this fails, then it
+will be set to "custom".}
+
+\item{other_keys}{Character vector specifying the names of variables in \code{x}
+that should be considered key variables (in the language of \code{data.table})
+apart from "geo_value", "time_value", and "version".}
+
+\item{additional_metadata}{List of additional metadata to attach to the
+\code{epi_archive} object. The metadata will have \code{geo_type} and \code{time_type}
+fields; named entries from the passed list or will be included as well.}
+
+\item{compactify}{Optional; Boolean or \code{NULL}: should we remove rows that are
+considered redundant for the purposes of \code{epi_archive}'s built-in methods
+such as \code{as_of}? As these methods use the last version of each observation
+carried forward (LOCF) to interpolate between the version data provided,
+rows that don't change these LOCF results can potentially be omitted to
+save space while maintaining the same behavior (with the help of the
+\code{clobberable_versions_start} and \code{versions_end} fields in some edge cases).
+\code{TRUE} will remove these rows, \code{FALSE} will not, and missing or \code{NULL} will
+remove these rows and issue a warning. Generally, this can be set to
+\code{TRUE}, but if you directly inspect or edit the fields of the \code{epi_archive}
+such as its \code{DT}, or rely on redundant updates to achieve a certain
+behavior of the \code{ref_time_values} default in \code{epix_slide}, you will have to
+determine whether \code{compactify=TRUE} will produce the desired results. If
+compactification here is removing a large proportion of the rows, this may
+indicate a potential for space, time, or bandwidth savings upstream the
+data pipeline, e.g., by avoiding fetching, storing, or processing these
+rows of \code{x}.}
+
+\item{clobberable_versions_start}{Optional; as in \code{\link{as_epi_archive}}}
+
+\item{versions_end}{Optional; as in \code{\link{as_epi_archive}}}
+}
+\value{
+An \code{epi_archive} object.
+}
+\description{
+Creates a new \code{epi_archive} object.
+}
+\details{
+Refer to the documentation for \code{\link[=as_epi_archive]{as_epi_archive()}} for more information
+and examples of parameter names.
+}
diff --git a/man/next_after.Rd b/man/next_after.Rd
index 5170e8d9..82fd3ebb 100644
--- a/man/next_after.Rd
+++ b/man/next_after.Rd
@@ -1,17 +1,23 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/archive.R
+% Please edit documentation in R/archive.R, R/archive_new.R
 \name{next_after}
 \alias{next_after}
 \title{Get the next possible value greater than \code{x} of the same type}
 \usage{
+next_after(x)
+
 next_after(x)
 }
 \arguments{
 \item{x}{the starting "value"(s)}
 }
 \value{
+same class, typeof, and length as \code{x}
+
 same class, typeof, and length as \code{x}
 }
 \description{
+Get the next possible value greater than \code{x} of the same type
+
 Get the next possible value greater than \code{x} of the same type
 }
diff --git a/man/print.epi_archive2.Rd b/man/print.epi_archive2.Rd
new file mode 100644
index 00000000..0105c47e
--- /dev/null
+++ b/man/print.epi_archive2.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{print.epi_archive2}
+\alias{print.epi_archive2}
+\title{Print information about an \code{epi_archive} object}
+\usage{
+\method{print}{epi_archive2}(epi_archive, class = TRUE, methods = TRUE)
+}
+\arguments{
+\item{class}{Boolean; whether to print the class label header}
+
+\item{methods}{Boolean; whether to print all available methods of
+the archive}
+}
+\description{
+Print information about an \code{epi_archive} object
+}
diff --git a/man/slide.epi_archive2.Rd b/man/slide.epi_archive2.Rd
new file mode 100644
index 00000000..54db5636
--- /dev/null
+++ b/man/slide.epi_archive2.Rd
@@ -0,0 +1,101 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{slide.epi_archive2}
+\alias{slide.epi_archive2}
+\title{Slide over epi archive}
+\usage{
+\method{slide}{epi_archive2}(
+  epi_archive,
+  f,
+  ...,
+  before,
+  ref_time_values,
+  time_step,
+  new_col_name = "slide_value",
+  as_list_col = FALSE,
+  names_sep = "_",
+  all_versions = FALSE
+)
+}
+\arguments{
+\item{f}{Function, formula, or missing; together with \code{...} specifies the
+computation to slide. To "slide" means to apply a computation over a
+sliding (a.k.a. "rolling") time window for each data group. The window is
+determined by the \code{before} parameter described below. One time step is
+typically one day or one week; see \code{\link{epi_slide}} details for more
+explanation. If a function, \code{f} must take an \code{epi_df} with the same
+column names as the archive's \code{DT}, minus the \code{version} column; followed
+by a one-row tibble containing the values of the grouping variables for
+the associated group; followed by a reference time value, usually as a
+\code{Date} object; followed by any number of named arguments. If a formula,
+\code{f} can operate directly on columns accessed via \code{.x$var} or \code{.$var}, as
+in \code{~ mean (.x$var)} to compute a mean of a column \code{var} for each
+group-\code{ref_time_value} combination. The group key can be accessed via
+\code{.y} or \code{.group_key}, and the reference time value can be accessed via
+\code{.z} or \code{.ref_time_value}. If \code{f} is missing, then \code{...} will specify the
+computation.}
+
+\item{...}{Additional arguments to pass to the function or formula specified
+via \code{f}. Alternatively, if \code{f} is missing, then \code{...} is interpreted as an
+expression for tidy evaluation; in addition to referring to columns
+directly by name, the expression has access to \code{.data} and \code{.env} pronouns
+as in \code{dplyr} verbs, and can also refer to the \code{.group_key} and
+\code{.ref_time_value}. See details of \code{\link{epi_slide}}.}
+
+\item{before}{How far \code{before} each \code{ref_time_value} should the sliding
+window extend? If provided, should be a single, non-NA,
+\link[vctrs:vec_cast]{integer-compatible} number of time steps. This window
+endpoint is inclusive. For example, if \code{before = 7}, and one time step is
+one day, then to produce a value for a \code{ref_time_value} of January 8, we
+apply the given function or formula to data (for each group present) with
+\code{time_value}s from January 1 onward, as they were reported on January 8.
+For typical disease surveillance sources, this will not include any data
+with a \code{time_value} of January 8, and, depending on the amount of reporting
+latency, may not include January 7 or even earlier \code{time_value}s. (If
+instead the archive were to hold nowcasts instead of regular surveillance
+data, then we would indeed expect data for \code{time_value} January 8. If it
+were to hold forecasts, then we would expect data for \code{time_value}s after
+January 8, and the sliding window would extend as far after each
+\code{ref_time_value} as needed to include all such \code{time_value}s.)}
+
+\item{ref_time_values}{Reference time values / versions for sliding
+computations; each element of this vector serves both as the anchor point
+for the \code{time_value} window for the computation and the \code{max_version}
+\code{as_of} which we fetch data in this window. If missing, then this will set
+to a regularly-spaced sequence of values set to cover the range of
+\code{version}s in the \code{DT} plus the \code{versions_end}; the spacing of values will
+be guessed (using the GCD of the skips between values).}
+
+\item{time_step}{Optional function used to define the meaning of one time
+step, which if specified, overrides the default choice based on the
+\code{time_value} column. This function must take a positive integer and return
+an object of class \code{lubridate::period}. For example, we can use \code{time_step = lubridate::hours} in order to set the time step to be one hour (this
+would only be meaningful if \code{time_value} is of class \code{POSIXct}).}
+
+\item{new_col_name}{String indicating the name of the new column that will
+contain the derivative values. Default is "slide_value"; note that setting
+\code{new_col_name} equal to an existing column name will overwrite this column.}
+
+\item{as_list_col}{Should the slide results be held in a list column, or be
+\link[tidyr:chop]{unchopped}/\link[tidyr:unnest]{unnested}? Default is \code{FALSE},
+in which case a list object returned by \code{f} would be unnested (using
+\code{\link[tidyr:unnest]{tidyr::unnest()}}), and, if the slide computations output data frames,
+the names of the resulting columns are given by prepending \code{new_col_name}
+to the names of the list elements.}
+
+\item{names_sep}{String specifying the separator to use in \code{tidyr::unnest()}
+when \code{as_list_col = FALSE}. Default is "_". Using \code{NULL} drops the prefix
+from \code{new_col_name} entirely.}
+
+\item{all_versions}{(Not the same as \code{all_rows} parameter of \code{epi_slide}.) If
+\code{all_versions = TRUE}, then \code{f} will be passed the version history (all
+\code{version <= ref_time_value}) for rows having \code{time_value} between
+\code{ref_time_value - before} and \code{ref_time_value}. Otherwise, \code{f} will be
+passed only the most recent \code{version} for every unique \code{time_value}.
+Default is \code{FALSE}.}
+}
+\description{
+Slides a given function over variables in an \code{epi_archive}
+object. See the documentation for the wrapper function \code{\link[=epix_slide]{epix_slide()}} for
+details. The parameter descriptions below are copied from there
+}
diff --git a/man/slide.grouped_epi_archive2.Rd b/man/slide.grouped_epi_archive2.Rd
new file mode 100644
index 00000000..b5aac24c
--- /dev/null
+++ b/man/slide.grouped_epi_archive2.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/grouped_archive_new.R
+\name{slide.grouped_epi_archive2}
+\alias{slide.grouped_epi_archive2}
+\title{Slide over grouped epi archive}
+\usage{
+\method{slide}{grouped_epi_archive2}(
+  grouped_epi_archive,
+  f,
+  ...,
+  before,
+  ref_time_values,
+  time_step,
+  new_col_name = "slide_value",
+  as_list_col = FALSE,
+  names_sep = "_",
+  all_versions = FALSE
+)
+}
+\description{
+Slides a given function over variables in a \code{grouped_epi_archive}
+object. See the documentation for the wrapper function \code{\link[=epix_slide]{epix_slide()}} for
+details.
+}
diff --git a/man/truncate_versions_after.epi_archive2.Rd b/man/truncate_versions_after.epi_archive2.Rd
new file mode 100644
index 00000000..08ae40d4
--- /dev/null
+++ b/man/truncate_versions_after.epi_archive2.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/archive_new.R
+\name{truncate_versions_after.epi_archive2}
+\alias{truncate_versions_after.epi_archive2}
+\title{Truncate versions after}
+\usage{
+\method{truncate_versions_after}{epi_archive2}(epi_archive, max_version)
+}
+\arguments{
+\item{epi_archive}{as in \code{\link{epix_truncate_versions_after}}}
+
+\item{max_version}{as in \code{\link{epix_truncate_versions_after}}}
+}
+\description{
+Filter to keep only older versions, mutating the archive by
+potentially reseating but not mutating some fields. \code{DT} is likely, but not
+guaranteed, to be copied. Returns the mutated archive
+\link[base:invisible]{invisibly}.
+}
diff --git a/man/truncate_versions_after.grouped_epi_archive2.Rd b/man/truncate_versions_after.grouped_epi_archive2.Rd
new file mode 100644
index 00000000..7c25950f
--- /dev/null
+++ b/man/truncate_versions_after.grouped_epi_archive2.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/grouped_archive_new.R
+\name{truncate_versions_after.grouped_epi_archive2}
+\alias{truncate_versions_after.grouped_epi_archive2}
+\title{Truncate versions after a given version, grouped}
+\usage{
+\method{truncate_versions_after}{grouped_epi_archive2}(grouped_epi_archive, max_version)
+}
+\arguments{
+\item{max_version}{as in \code{\link{epix_truncate_versions_after}}}
+
+\item{x}{as in \code{\link{epix_truncate_versions_after}}}
+}
+\description{
+Filter to keep only older versions by mutating the underlying
+\code{epi_archive} using \verb{$truncate_versions_after}. Returns the mutated
+\code{grouped_epi_archive} \link[base:invisible]{invisibly}.
+}
diff --git a/tests/testthat/test-archive_new.R b/tests/testthat/test-archive_new.R
new file mode 100644
index 00000000..f2d0bde5
--- /dev/null
+++ b/tests/testthat/test-archive_new.R
@@ -0,0 +1,173 @@
+library(dplyr)
+
+test_that("first input must be a data.frame", {
+  expect_error(as_epi_archive2(c(1, 2, 3), compactify = FALSE),
+    regexp = "Must be of type 'data.frame'."
+  )
+})
+
+dt <- archive_cases_dv_subset$DT
+
+test_that("data.frame must contain geo_value, time_value and version columns", {
+  expect_error(as_epi_archive2(select(dt, -geo_value), compactify = FALSE),
+    regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`."
+  )
+  expect_error(as_epi_archive2(select(dt, -time_value), compactify = FALSE),
+    regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`."
+  )
+  expect_error(as_epi_archive2(select(dt, -version), compactify = FALSE),
+    regexp = "Columns `geo_value`, `time_value`, and `version` must be present in `x`."
+  )
+})
+
+test_that("other_keys can only contain names of the data.frame columns", {
+  expect_error(as_epi_archive2(dt, other_keys = "xyz", compactify = FALSE),
+    regexp = "`other_keys` must be contained in the column names of `x`."
+  )
+  expect_error(as_epi_archive2(dt, other_keys = "percent_cli", compactify = FALSE), NA)
+})
+
+test_that("other_keys cannot contain names geo_value, time_value or version", {
+  expect_error(as_epi_archive2(dt, other_keys = "geo_value", compactify = FALSE),
+    regexp = "`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\"."
+  )
+  expect_error(as_epi_archive2(dt, other_keys = "time_value", compactify = FALSE),
+    regexp = "`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\"."
+  )
+  expect_error(as_epi_archive2(dt, other_keys = "version", compactify = FALSE),
+    regexp = "`other_keys` cannot contain \"geo_value\", \"time_value\", or \"version\"."
+  )
+})
+
+test_that("Warning thrown when other_metadata contains overlapping names with geo_type or time_type fields", {
+  expect_warning(as_epi_archive2(dt, additional_metadata = list(geo_type = 1), compactify = FALSE),
+    regexp = "`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\"."
+  )
+  expect_warning(as_epi_archive2(dt, additional_metadata = list(time_type = 1), compactify = FALSE),
+    regexp = "`additional_metadata` names overlap with existing metadata fields \"geo_type\", \"time_type\"."
+  )
+})
+
+test_that("epi_archives are correctly instantiated with a variety of data types", {
+  # Data frame
+  df <- data.frame(
+    geo_value = "ca",
+    time_value = as.Date("2020-01-01"),
+    version = as.Date("2020-01-01") + 0:19,
+    value = 1:20
+  )
+
+  ea1 <- as_epi_archive2(df, compactify = FALSE)
+  expect_equal(key(ea1$DT), c("geo_value", "time_value", "version"))
+  expect_equal(ea1$additional_metadata, list())
+
+  ea2 <- as_epi_archive2(df, other_keys = "value", additional_metadata = list(value = df$value), compactify = FALSE)
+  expect_equal(key(ea2$DT), c("geo_value", "time_value", "value", "version"))
+  expect_equal(ea2$additional_metadata, list(value = df$value))
+
+  # Tibble
+  tib <- tibble::tibble(df, code = "x")
+
+  ea3 <- as_epi_archive2(tib, compactify = FALSE)
+  expect_equal(key(ea3$DT), c("geo_value", "time_value", "version"))
+  expect_equal(ea3$additional_metadata, list())
+
+  ea4 <- as_epi_archive2(tib, other_keys = "code", additional_metadata = list(value = df$value), compactify = FALSE)
+  expect_equal(key(ea4$DT), c("geo_value", "time_value", "code", "version"))
+  expect_equal(ea4$additional_metadata, list(value = df$value))
+
+  # Keyed data.table
+  kdt <- data.table::data.table(
+    geo_value = "ca",
+    time_value = as.Date("2020-01-01"),
+    version = as.Date("2020-01-01") + 0:19,
+    value = 1:20,
+    code = "CA",
+    key = "code"
+  )
+
+  ea5 <- as_epi_archive2(kdt, compactify = FALSE)
+  # Key from data.table isn't absorbed when as_epi_archive2 is used
+  expect_equal(key(ea5$DT), c("geo_value", "time_value", "version"))
+  expect_equal(ea5$additional_metadata, list())
+
+  ea6 <- as_epi_archive2(kdt, other_keys = "value", additional_metadata = list(value = df$value), compactify = FALSE)
+  # Mismatched keys, but the one from as_epi_archive2 overrides
+  expect_equal(key(ea6$DT), c("geo_value", "time_value", "value", "version"))
+  expect_equal(ea6$additional_metadata, list(value = df$value))
+
+  # Unkeyed data.table
+  udt <- data.table::data.table(
+    geo_value = "ca",
+    time_value = as.Date("2020-01-01"),
+    version = as.Date("2020-01-01") + 0:19,
+    value = 1:20,
+    code = "CA"
+  )
+
+  ea7 <- as_epi_archive2(udt, compactify = FALSE)
+  expect_equal(key(ea7$DT), c("geo_value", "time_value", "version"))
+  expect_equal(ea7$additional_metadata, list())
+
+  ea8 <- as_epi_archive2(udt, other_keys = "code", additional_metadata = list(value = df$value), compactify = FALSE)
+  expect_equal(key(ea8$DT), c("geo_value", "time_value", "code", "version"))
+  expect_equal(ea8$additional_metadata, list(value = df$value))
+
+  # epi_df
+  edf1 <- jhu_csse_daily_subset %>%
+    select(geo_value, time_value, cases) %>%
+    mutate(version = max(time_value), code = "USA")
+
+  ea9 <- as_epi_archive2(edf1, compactify = FALSE)
+  expect_equal(key(ea9$DT), c("geo_value", "time_value", "version"))
+  expect_equal(ea9$additional_metadata, list())
+
+  ea10 <- as_epi_archive2(edf1, other_keys = "code", additional_metadata = list(value = df$value), compactify = FALSE)
+  expect_equal(key(ea10$DT), c("geo_value", "time_value", "code", "version"))
+  expect_equal(ea10$additional_metadata, list(value = df$value))
+
+  # Keyed epi_df
+  edf2 <- data.frame(
+    geo_value = "al",
+    time_value = rep(as.Date("2020-01-01") + 0:9, 2),
+    version = c(
+      rep(as.Date("2020-01-25"), 10),
+      rep(as.Date("2020-01-26"), 10)
+    ),
+    cases = 1:20,
+    misc = "USA"
+  ) %>%
+    as_epi_df(additional_metadata = list(other_keys = "misc"))
+
+  ea11 <- as_epi_archive2(edf2, compactify = FALSE)
+  expect_equal(key(ea11$DT), c("geo_value", "time_value", "version"))
+  expect_equal(ea11$additional_metadata, list())
+
+  ea12 <- as_epi_archive2(edf2, other_keys = "misc", additional_metadata = list(value = df$misc), compactify = FALSE)
+  expect_equal(key(ea12$DT), c("geo_value", "time_value", "misc", "version"))
+  expect_equal(ea12$additional_metadata, list(value = df$misc))
+})
+
+test_that("`epi_archive` rejects nonunique keys", {
+  toy_update_tbl <-
+    tibble::tribble(
+      ~geo_value, ~age_group, ~time_value, ~version, ~value,
+      "us", "adult", "2000-01-01", "2000-01-02", 121,
+      "us", "adult", "2000-01-01", "2000-01-03", 125, # (revision)
+      "us", "adult", "2000-01-02", "2000-01-03", 130,
+      "us", "pediatric", "2000-01-01", "2000-01-02", 5
+    ) %>%
+    mutate(
+      age_group = ordered(age_group, c("pediatric", "adult")),
+      time_value = as.Date(time_value),
+      version = as.Date(version)
+    )
+  expect_error(
+    as_epi_archive2(toy_update_tbl),
+    class = "epiprocess__epi_archive_requires_unique_key"
+  )
+  expect_error(
+    regexp = NA,
+    as_epi_archive2(toy_update_tbl, other_keys = "age_group"),
+  )
+})
diff --git a/tests/testthat/test-compactify_new.R b/tests/testthat/test-compactify_new.R
new file mode 100644
index 00000000..f2887eaf
--- /dev/null
+++ b/tests/testthat/test-compactify_new.R
@@ -0,0 +1,110 @@
+library(epiprocess)
+library(data.table)
+library(dplyr)
+
+dt <- archive_cases_dv_subset$DT
+dt <- filter(dt, geo_value == "ca") %>%
+  filter(version <= "2020-06-15") %>%
+  select(-case_rate_7d_av)
+
+test_that("Input for compactify must be NULL or a boolean", {
+  expect_error(as_epi_archive2(dt, compactify = "no"))
+})
+
+dt$percent_cli <- c(1:80)
+dt$case_rate <- c(1:80)
+
+row_replace <- function(dt, row, x, y) {
+  # (This way of "replacing" elements appears to use copy-on-write even though
+  # we are working with a data.table.)
+  dt[row, 4] <- x
+  dt[row, 5] <- y
+  dt
+}
+
+# Note that compactify is working on version-wise LOCF (last version of each
+# observation carried forward)
+
+# Rows 1 should not be eliminated even if NA
+dt <- row_replace(dt, 1, NA, NA) # Not LOCF
+
+# NOTE! We are assuming that there are no NA's in geo_value, time_value,
+# and version. Even though compactify may erroneously remove the first row
+# if it has all NA's, we are not testing this behaviour for now as this dataset
+# has problems beyond the scope of this test
+
+# Rows 11 and 12 correspond to different time_values
+dt <- row_replace(dt, 12, 11, 11) # Not LOCF
+
+# Rows 20 and 21 only differ in version
+dt <- row_replace(dt, 21, 20, 20) # LOCF
+
+# Rows 21 and 22 only differ in version
+dt <- row_replace(dt, 22, 20, 20) # LOCF
+
+# Row 39 comprises the first NA's
+dt <- row_replace(dt, 39, NA, NA) # Not LOCF
+
+# Row 40 has two NA's, just like its lag, row 39
+dt <- row_replace(dt, 40, NA, NA) # LOCF
+
+# Row 62's values already exist in row 15, but row 15 is not a preceding row
+dt <- row_replace(dt, 62, 15, 15) # Not LOCF
+
+# Row 73 only has one value carried over
+dt <- row_replace(dt, 74, 73, 74) # Not LOCF
+
+dt_true <- as_tibble(as_epi_archive2(dt, compactify = TRUE)$DT)
+dt_false <- as_tibble(as_epi_archive2(dt, compactify = FALSE)$DT)
+dt_null <- suppressWarnings(as_tibble(as_epi_archive2(dt, compactify = NULL)$DT))
+
+test_that("Warning for LOCF with compactify as NULL", {
+  expect_warning(as_epi_archive2(dt, compactify = NULL))
+})
+
+test_that("No warning when there is no LOCF", {
+  expect_warning(as_epi_archive2(dt[1:5], compactify = NULL), NA)
+})
+
+test_that("LOCF values are ignored with compactify=FALSE", {
+  expect_identical(nrow(dt), nrow(dt_false))
+})
+
+test_that("LOCF values are taken out with compactify=TRUE", {
+  dt_test <- as_tibble(as_epi_archive2(dt[-c(21, 22, 40), ], compactify = FALSE)$DT)
+
+  expect_identical(dt_true, dt_null)
+  expect_identical(dt_null, dt_test)
+})
+
+test_that("as_of produces the same results with compactify=TRUE as with compactify=FALSE", {
+  ea_true <- as_epi_archive2(dt, compactify = TRUE)
+  ea_false <- as_epi_archive2(dt, compactify = FALSE)
+
+  # Row 22, an LOCF row corresponding to the latest version, is omitted in
+  # ea_true
+  latest_version <- max(ea_false$DT$version)
+  as_of_true <- as_of(ea_true, latest_version)
+  as_of_false <- as_of(ea_false, latest_version)
+
+  expect_identical(as_of_true, as_of_false)
+})
+
+test_that("compactify does not alter the default clobberable and observed version bounds", {
+  x <- tibble::tibble(
+    geo_value = "geo1",
+    time_value = as.Date("2000-01-01"),
+    version = as.Date("2000-01-01") + 1:5,
+    value = 42L
+  )
+  ea_true <- as_epi_archive2(x, compactify = TRUE)
+  ea_false <- as_epi_archive2(x, compactify = FALSE)
+  # We say that we base the bounds on the user's `x` arg. We might mess up or
+  # change our minds and base things on the `DT` field (or a temporary `DT`
+  # variable, post-compactify) instead. Check that this test would trigger
+  # in that case:
+  expect_true(max(ea_true$DT$version) != max(ea_false$DT$version))
+  # The actual test:
+  expect_identical(ea_true$clobberable_versions_start, ea_false$clobberable_versions_start)
+  expect_identical(ea_true$versions_end, ea_false$versions_end)
+})
diff --git a/tests/testthat/test-epix_fill_through_version_new.R b/tests/testthat/test-epix_fill_through_version_new.R
new file mode 100644
index 00000000..2b76a851
--- /dev/null
+++ b/tests/testthat/test-epix_fill_through_version_new.R
@@ -0,0 +1,109 @@
+test_that("epix_fill_through_version2 mirrors input when it is sufficiently up to date", {
+  ea_orig <- as_epi_archive2(data.table::data.table(
+    geo_value = "g1", time_value = as.Date("2020-01-01"),
+    version = 1:5, value = 1:5
+  ))
+  some_earlier_observed_version <- 2L
+  ea_trivial_fill_na1 <- epix_fill_through_version2(ea_orig, some_earlier_observed_version, "na")
+  ea_trivial_fill_na2 <- epix_fill_through_version2(ea_orig, ea_orig$versions_end, "na")
+  ea_trivial_fill_locf <- epix_fill_through_version2(ea_orig, some_earlier_observed_version, "locf")
+  # Below, we want R6 objects to be compared based on contents rather than
+  # addresses. We appear to get this with `expect_identical` in `testthat`
+  # edition 3, which is based on `waldo::compare` rather than `base::identical`;
+  # `waldo::compare` in waldo >=0.3.1 appears (as of 0.4.0) to compare R6
+  # objects by contents rather than address (in a way that is tested but maybe
+  # not guaranteed via user docs). Use `testthat::local_edition` to ensure we
+  # use testthat edition 3 here (use `testthat::` to prevent ambiguity with
+  # `readr`).
+  testthat::local_edition(3)
+  expect_identical(ea_orig, ea_trivial_fill_na1)
+  expect_identical(ea_orig, ea_trivial_fill_na2)
+  expect_identical(ea_orig, ea_trivial_fill_locf)
+})
+
+test_that("epix_fill_through_version2 can extend observed versions, gives expected `as_of`s", {
+  ea_orig <- as_epi_archive2(data.table::data.table(
+    geo_value = "g1",
+    time_value = as.Date("2020-01-01") + c(rep(0L, 5L), 1L),
+    version = c(1:5, 2L),
+    value = 1:6
+  ))
+  first_unobserved_version <- 6L
+  later_unobserved_version <- 10L
+  ea_fill_na <- epix_fill_through_version2(ea_orig, later_unobserved_version, "na")
+  ea_fill_locf <- epix_fill_through_version2(ea_orig, later_unobserved_version, "locf")
+
+  # We use testthat edition 3 features here, passing `ignore_attr` to
+  # `waldo::compare`. Ensure we are using edition 3:
+  testthat::local_edition(3)
+  withCallingHandlers(
+    {
+      expect_identical(ea_fill_na$versions_end, later_unobserved_version)
+      expect_identical(tibble::as_tibble(as_of(ea_fill_na, first_unobserved_version)),
+        tibble::tibble(geo_value = "g1", time_value = as.Date("2020-01-01") + 0:1, value = rep(NA_integer_, 2L)),
+        ignore_attr = TRUE
+      )
+      expect_identical(ea_fill_locf$versions_end, later_unobserved_version)
+      expect_identical(
+        as_of(ea_fill_locf, first_unobserved_version),
+        as_of(ea_fill_locf, ea_orig$versions_end) %>%
+          {
+            attr(., "metadata")$as_of <- first_unobserved_version
+            .
+          }
+      )
+    },
+    epiprocess__snapshot_as_of_clobberable_version = function(wrn) invokeRestart("muffleWarning")
+  )
+})
+
+test_that("epix_fill_through_version2 does not mutate x", {
+  for (ea_orig in list(
+    # vanilla case
+    as_epi_archive2(data.table::data.table(
+      geo_value = "g1", time_value = as.Date("2020-01-01"),
+      version = 1:5, value = 1:5
+    )),
+    # data.table unique yielding original DT by reference special case (maybe
+    # having only 1 row is the trigger? having no revisions of initial values
+    # doesn't seem sufficient to trigger)
+    as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, value = 10L))
+  )) {
+    # We want to perform a strict comparison of the contents of `ea_orig` before
+    # and `ea_orig` after. `clone` + `expect_identical` based on waldo would
+    # sort of work, but we might want something stricter. `as.list` +
+    # `identical` plus a check of the DT seems to do the trick.
+    ea_orig_before_as_list <- as.list(ea_orig)
+    ea_orig_DT_before_copy <- data.table::copy(ea_orig$DT)
+    some_unobserved_version <- 8L
+    #
+    ea_fill_na <- epix_fill_through_version2(ea_orig, some_unobserved_version, "na")
+    ea_orig_after_as_list <- as.list(ea_orig)
+    # use identical, not expect_identical, for the R6-as-list test; latter isn't as strict
+    expect_true(identical(ea_orig_before_as_list, ea_orig_after_as_list))
+    expect_identical(ea_orig_DT_before_copy, ea_orig$DT)
+    #
+    ea_fill_locf <- epix_fill_through_version2(ea_orig, some_unobserved_version, "locf")
+    ea_orig_after_as_list <- as.list(ea_orig)
+    expect_true(identical(ea_orig_before_as_list, ea_orig_after_as_list))
+    expect_identical(ea_orig_DT_before_copy, ea_orig$DT)
+  }
+})
+
+test_that("epix_fill_through_version return with expected visibility", {
+  ea <- as_epi_archive(data.table::data.table(
+    geo_value = "g1", time_value = as.Date("2020-01-01"),
+    version = 1:5, value = 1:5
+  ))
+  expect_true(withVisible(epix_fill_through_version(ea, 10L, "na"))[["visible"]])
+})
+
+test_that("epix_fill_through_version2 returns same key & doesn't mutate old DT or its key", {
+  ea <- as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, value = 10L))
+  old_DT <- ea$DT
+  old_DT_copy <- data.table::copy(old_DT)
+  old_key <- data.table::key(ea$DT)
+  expect_identical(data.table::key(epix_fill_through_version2(ea, 5L, "na")$DT), old_key)
+  expect_identical(data.table::key(epix_fill_through_version2(ea, 5L, "locf")$DT), old_key)
+  expect_identical(data.table::key(ea$DT), old_key)
+})
diff --git a/tests/testthat/test-epix_merge_new.R b/tests/testthat/test-epix_merge_new.R
new file mode 100644
index 00000000..594b7b5e
--- /dev/null
+++ b/tests/testthat/test-epix_merge_new.R
@@ -0,0 +1,228 @@
+test_that("epix_merge requires forbids on invalid `y`", {
+  ea <- archive_cases_dv_subset$DT %>%
+    as_epi_archive2() %>%
+    clone() %>%
+    suppressWarnings()
+  expect_error(epix_merge2(ea, data.frame(x = 1)))
+})
+
+test_that("epix_merge merges and carries forward updates properly", {
+  x <- as_epi_archive2(
+    data.table::as.data.table(
+      tibble::tribble(
+        ~geo_value, ~time_value, ~version, ~x_value,
+        # same version set for x and y
+        "g1", 1L, 1:3, paste0("XA", 1:3),
+        # versions of x surround those of y + this measurement has
+        # max update version beyond some others
+        "g1", 2L, 1:5, paste0("XB", 1:5),
+        # mirror case
+        "g1", 3L, 2L, paste0("XC", 2L),
+        # x has 1 version, y has 0
+        "g1", 4L, 1L, paste0("XD", 1L),
+        # non-NA values that should be carried forward
+        # (version-wise LOCF) in other versions, plus NAs that
+        # should (similarly) be carried forward as NA (latter
+        # wouldn't work with an ordinary merge + post-processing
+        # with `data.table::nafill`)
+        "g1", 6L, c(1L, 3L, 5L), paste0("XE", c(1L, NA, 5L))
+      ) %>%
+        tidyr::unchop(c(version, x_value)) %>%
+        dplyr::mutate(dplyr::across(c(x_value), ~ dplyr::if_else(grepl("NA", .x), NA_character_, .x)))
+    )
+  )
+  y <- as_epi_archive2(
+    data.table::as.data.table(
+      tibble::tribble(
+        ~geo_value, ~time_value, ~version, ~y_value,
+        "g1", 1L, 1:3, paste0("YA", 1:3),
+        "g1", 2L, 2L, paste0("YB", 2L),
+        "g1", 3L, 1:5, paste0("YC", 1:5),
+        "g1", 5L, 1L, paste0("YD", 1L),
+        "g1", 6L, 1:5, paste0("YE", 1:5),
+      ) %>%
+        tidyr::unchop(c(version, y_value)) %>%
+        dplyr::mutate(dplyr::across(c(y_value), ~ dplyr::if_else(grepl("NA", .x), NA_character_, .x)))
+    )
+  )
+  xy <- epix_merge2(x, y)
+  xy_expected <- as_epi_archive2(
+    data.table::as.data.table(
+      tibble::tribble(
+        ~geo_value, ~time_value, ~version, ~x_value, ~y_value,
+        "g1", 1L, 1:3, paste0("XA", 1:3), paste0("YA", 1:3),
+        "g1", 2L, 1:5, paste0("XB", 1:5), paste0("YB", c(NA, 2L, 2L, 2L, 2L)),
+        "g1", 3L, 1:5, paste0("XC", c(NA, 2L, 2L, 2L, 2L)), paste0("YC", 1:5),
+        "g1", 4L, 1L, paste0("XD", 1L), paste0("YD", NA),
+        "g1", 5L, 1L, paste0("XD", NA), paste0("YD", 1L),
+        "g1", 6L, 1:5, paste0("XE", c(1L, 1L, NA, NA, 5L)), paste0("YE", 1:5),
+      ) %>%
+        tidyr::unchop(c(version, x_value, y_value)) %>%
+        dplyr::mutate(dplyr::across(c(x_value, y_value), ~ dplyr::if_else(grepl("NA", .x), NA_character_, .x)))
+    )
+  )
+  # We rely on testthat edition 3 expect_identical using waldo, not identical. See
+  # test-epix_fill_through_version.R comments for details.
+  testthat::local_edition(3)
+  expect_identical(xy, xy_expected)
+})
+
+test_that("epix_merge forbids and warns on metadata and naming issues", {
+  expect_error(
+    epix_merge2(
+      as_epi_archive2(tibble::tibble(geo_value = "tx", time_value = 1L, version = 1L, x_value = 1L)),
+      as_epi_archive2(tibble::tibble(geo_value = "us", time_value = 1L, version = 5L, y_value = 2L))
+    ),
+    regexp = "must have the same.*geo_type"
+  )
+  expect_error(
+    epix_merge2(
+      as_epi_archive2(tibble::tibble(geo_value = "pa", time_value = 1L, version = 1L, x_value = 1L)),
+      as_epi_archive2(tibble::tibble(geo_value = "pa", time_value = as.Date("2020-01-01"), version = 5L, y_value = 2L))
+    ),
+    regexp = "must have the same.*time_type"
+  )
+  expect_error(
+    epix_merge2(
+      as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, value = 1L)),
+      as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, value = 2L))
+    ),
+    regexp = "overlapping.*names"
+  )
+  expect_warning(
+    epix_merge2(
+      as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, x_value = 1L),
+        additional_metadata = list("updates_fetched" = lubridate::ymd_hms("2022-05-01 16:00:00", tz = "UTC"))
+      ),
+      as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, y_value = 2L))
+    ),
+    regexp = "x\\$additional_metadata",
+    class = "epiprocess__epix_merge_ignores_additional_metadata"
+  )
+  expect_warning(
+    epix_merge2(
+      as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, x_value = 1L)),
+      as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, y_value = 2L),
+        additional_metadata = list("updates_fetched" = lubridate::ymd_hms("2022-05-01 16:00:00", tz = "UTC"))
+      )
+    ),
+    regexp = "y\\$additional_metadata",
+    class = "epiprocess__epix_merge_ignores_additional_metadata"
+  )
+})
+
+# use `local` to prevent accidentally using the x, y, xy bindings here
+# elsewhere, while allowing reuse across a couple tests
+local({
+  x <- as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, x_value = 1L),
+    clobberable_versions_start = 1L, versions_end = 10L
+  )
+  y <- as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, y_value = 2L),
+    clobberable_versions_start = 3L, versions_end = 10L
+  )
+  xy <- epix_merge2(x, y)
+  test_that("epix_merge considers partially-clobberable row to be clobberable", {
+    expect_identical(xy$clobberable_versions_start, 1L)
+  })
+  test_that("epix_merge result uses versions_end metadata not max version val", {
+    expect_identical(xy$versions_end, 10L)
+  })
+})
+
+local({
+  x <- as_epi_archive2(
+    tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, x_value = 10L),
+    clobberable_versions_start = 1L,
+    versions_end = 3L
+  )
+  y <- as_epi_archive2(
+    tibble::tibble(geo_value = 1L, time_value = 1L, version = 5L, y_value = 20L),
+    clobberable_versions_start = 1L
+  )
+  test_that('epix_merge forbids on sync default or "forbid"', {
+    expect_error(epix_merge2(x, y),
+      class = "epiprocess__epix_merge_unresolved_sync"
+    )
+    expect_error(epix_merge2(x, y, sync = "forbid"),
+      class = "epiprocess__epix_merge_unresolved_sync"
+    )
+  })
+  test_that('epix_merge sync="na" works', {
+    expect_equal(
+      epix_merge2(x, y, sync = "na"),
+      as_epi_archive2(tibble::tribble(
+        ~geo_value, ~time_value, ~version, ~x_value, ~y_value,
+        1L, 1L, 1L, 10L, NA_integer_, # x updated, y not observed yet
+        1L, 1L, 4L, NA_integer_, NA_integer_, # NA-ing out x, y not observed yet
+        1L, 1L, 5L, NA_integer_, 20L, # x still NA, y updated
+        # (we should not have a y vals -> NA update here; version 5 should be
+        # the `versions_end` of the result)
+      ), clobberable_versions_start = 1L)
+    )
+  })
+  test_that('epix_merge sync="locf" works', {
+    expect_equal(
+      epix_merge2(x, y, sync = "locf"),
+      as_epi_archive2(tibble::tribble(
+        ~geo_value, ~time_value, ~version, ~x_value, ~y_value,
+        1L, 1L, 1L, 10L, NA_integer_, # x updated, y not observed yet
+        1L, 1L, 5L, 10L, 20L, # x LOCF'd, y updated
+      ), clobberable_versions_start = 1L)
+    )
+  })
+  test_that('epix_merge sync="truncate" works', {
+    expect_equal(
+      epix_merge2(x, y, sync = "truncate"),
+      as_epi_archive2(tibble::tribble(
+        ~geo_value, ~time_value, ~version, ~x_value, ~y_value,
+        1L, 1L, 1L, 10L, NA_integer_, # x updated, y not observed yet
+        # y's update beyond x's last update has been truncated
+      ), clobberable_versions_start = 1L, versions_end = 3L)
+    )
+  })
+  x_no_conflict <- as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, x_value = 10L))
+  y_no_conflict <- as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = 1L, y_value = 20L))
+  xy_no_conflict_expected <- as_epi_archive2(tibble::tribble(
+    ~geo_value, ~time_value, ~version, ~x_value, ~y_value,
+    1L, 1L, 1L, 10L, 20L, # x updated, y not observed yet
+  ))
+  test_that('epix_merge sync="forbid" on no-conflict works', {
+    expect_equal(
+      epix_merge2(x_no_conflict, y_no_conflict, sync = "forbid"),
+      xy_no_conflict_expected
+    )
+  })
+  test_that('epix_merge sync="na" on no-conflict works', {
+    # This test is the main reason for these no-conflict tests. We want to make
+    # sure that we don't add an unnecessary NA-ing-out version beyond a common
+    # versions_end.
+    expect_equal(
+      epix_merge2(x_no_conflict, y_no_conflict, sync = "na"),
+      xy_no_conflict_expected
+    )
+  })
+  test_that('epix_merge sync="locf" on no-conflict works', {
+    expect_equal(
+      epix_merge2(x_no_conflict, y_no_conflict, sync = "locf"),
+      xy_no_conflict_expected
+    )
+  })
+  test_that('epix_merge sync="truncate" on no-conflict works', {
+    expect_equal(
+      epix_merge2(x_no_conflict, y_no_conflict, sync = "truncate"),
+      xy_no_conflict_expected
+    )
+  })
+})
+
+
+test_that('epix_merge sync="na" balks if do not know next_after', {
+  expect_error(
+    epix_merge2(
+      as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = as.POSIXct(as.Date("2020-01-01")), x_value = 10L)),
+      as_epi_archive2(tibble::tibble(geo_value = 1L, time_value = 1L, version = as.POSIXct(as.Date("2020-01-02")), y_value = 20L)),
+      sync = "na"
+    ),
+    regexp = "no applicable method.*next_after"
+  )
+})
diff --git a/tests/testthat/test-epix_slide_new.R b/tests/testthat/test-epix_slide_new.R
new file mode 100644
index 00000000..f748231a
--- /dev/null
+++ b/tests/testthat/test-epix_slide_new.R
@@ -0,0 +1,810 @@
+library(dplyr)
+
+test_that("epix_slide2 only works on an epi_archive", {
+  expect_error(epix_slide2(data.frame(x = 1)))
+})
+
+x <- tibble::tribble(
+  ~version, ~time_value, ~binary,
+  4, c(1:3), 2^(1:3),
+  5, c(1:2, 4), 2^(4:6),
+  6, c(1:2, 4:5), 2^(7:10),
+  7, 2:6, 2^(11:15)
+) %>%
+  tidyr::unnest(c(time_value, binary))
+
+xx <- bind_cols(geo_value = rep("x", 15), x) %>%
+  as_epi_archive2()
+
+test_that("epix_slide2 works as intended", {
+  xx1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~ sum(.x$binary),
+      before = 2,
+      new_col_name = "sum_binary"
+    )
+
+  xx2 <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    sum_binary = c(
+      2^3 + 2^2,
+      2^6 + 2^3,
+      2^10 + 2^9,
+      2^15 + 2^14
+    )
+  ) %>%
+    group_by(geo_value)
+
+  expect_identical(xx1, xx2) # *
+
+  xx3 <- xx %>%
+    group_by(
+      dplyr::across(dplyr::all_of("geo_value"))
+    ) %>%
+    slide(
+      f = ~ sum(.x$binary),
+      before = 2,
+      new_col_name = "sum_binary"
+    )
+
+  expect_identical(xx1, xx3) # This and * imply xx2 and xx3 are identical
+
+  # function interface
+  xx4 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(f = function(x, gk, rtv) {
+      tibble::tibble(sum_binary = sum(x$binary))
+    }, before = 2, names_sep = NULL)
+
+  expect_identical(xx1, xx4)
+
+  # tidyeval interface
+  xx5 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      sum_binary = sum(binary),
+      before = 2
+    )
+
+  expect_identical(xx1, xx5)
+})
+
+test_that("epix_slide2 works as intended with `as_list_col=TRUE`", {
+  xx_dfrow1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~ data.frame(bin_sum = sum(.x$binary)),
+      before = 2,
+      as_list_col = TRUE
+    )
+
+  xx_dfrow2 <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    slide_value =
+      c(
+        2^3 + 2^2,
+        2^6 + 2^3,
+        2^10 + 2^9,
+        2^15 + 2^14
+      ) %>%
+        purrr::map(~ data.frame(bin_sum = .x))
+  ) %>%
+    group_by(geo_value)
+
+  expect_identical(xx_dfrow1, xx_dfrow2) # *
+
+  xx_dfrow3 <- xx %>%
+    group_by(dplyr::across(dplyr::all_of("geo_value"))) %>%
+    slide(
+      f = ~ data.frame(bin_sum = sum(.x$binary)),
+      before = 2,
+      as_list_col = TRUE
+    )
+
+  expect_identical(xx_dfrow1, xx_dfrow3) # This and * Imply xx_dfrow2 and xx_dfrow3 are identical
+
+  xx_df1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~ data.frame(bin = .x$binary),
+      before = 2,
+      as_list_col = TRUE
+    )
+
+  xx_df2 <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    slide_value =
+      list(
+        c(2^3, 2^2),
+        c(2^6, 2^3),
+        c(2^10, 2^9),
+        c(2^15, 2^14)
+      ) %>%
+        purrr::map(~ data.frame(bin = rev(.x)))
+  ) %>%
+    group_by(geo_value)
+
+  expect_identical(xx_df1, xx_df2)
+
+  xx_scalar1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~ sum(.x$binary),
+      before = 2,
+      as_list_col = TRUE
+    )
+
+  xx_scalar2 <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    slide_value =
+      list(
+        2^3 + 2^2,
+        2^6 + 2^3,
+        2^10 + 2^9,
+        2^15 + 2^14
+      )
+  ) %>%
+    group_by(geo_value)
+
+  expect_identical(xx_scalar1, xx_scalar2)
+
+  xx_vec1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~ .x$binary,
+      before = 2,
+      as_list_col = TRUE
+    )
+
+  xx_vec2 <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    slide_value =
+      list(
+        c(2^3, 2^2),
+        c(2^6, 2^3),
+        c(2^10, 2^9),
+        c(2^15, 2^14)
+      ) %>%
+        purrr::map(rev)
+  ) %>%
+    group_by(geo_value)
+
+  expect_identical(xx_vec1, xx_vec2)
+})
+
+test_that("epix_slide2 `before` validation works", {
+  expect_error(
+    slide(xx, f = ~ sum(.x$binary)),
+    "`before` is required"
+  )
+  expect_error(
+    slide(xx, f = ~ sum(.x$binary), before = NA),
+    "Assertion on 'before' failed: May not be NA"
+  )
+  expect_error(
+    slide(xx, f = ~ sum(.x$binary), before = -1),
+    "Assertion on 'before' failed: Element 1 is not >= 0"
+  )
+  expect_error(slide(xx, f = ~ sum(.x$binary), before = 1.5),
+    regexp = "before",
+    class = "vctrs_error_incompatible_type"
+  )
+  # We might want to allow this at some point (issue #219):
+  expect_error(slide(xx, f = ~ sum(.x$binary), before = Inf),
+    regexp = "before",
+    class = "vctrs_error_incompatible_type"
+  )
+  # (wrapper shouldn't introduce a value:)
+  expect_error(epix_slide2(xx, f = ~ sum(.x$binary)), "`before` is required")
+  # These `before` values should be accepted:
+  expect_error(
+    slide(xx, f = ~ sum(.x$binary), before = 0),
+    NA
+  )
+  expect_error(
+    slide(xx, f = ~ sum(.x$binary), before = 2L),
+    NA
+  )
+  expect_error(
+    slide(xx, f = ~ sum(.x$binary), before = 365000),
+    NA
+  )
+})
+
+test_that("quosure passing issue in epix_slide2 is resolved + other potential issues", {
+  # (First part adapted from @examples)
+  time_values <- seq(as.Date("2020-06-01"),
+    as.Date("2020-06-02"),
+    by = "1 day"
+  )
+  # We only have one non-version, non-time key in the example archive. Add
+  # another so that we don't accidentally pass tests due to accidentally
+  # matching the default grouping.
+  ea <- as_epi_archive2(
+    archive_cases_dv_subset$DT %>%
+      dplyr::mutate(modulus = seq_len(nrow(.)) %% 5L),
+    other_keys = "modulus",
+    compactify = TRUE
+  )
+  reference_by_modulus <- ea %>%
+    group_by(modulus) %>%
+    epix_slide2(
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    )
+  reference_by_neither <- ea %>%
+    group_by() %>%
+    epix_slide2(
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    )
+  # test the passing-something-that-must-be-enquosed behavior:
+  #
+  # (S3 group_by behavior for this case is the `reference_by_modulus`)
+  expect_identical(
+    ea %>% group_by(modulus) %>% slide(
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_modulus
+  )
+  # test the .data pronoun behavior:
+  expect_identical(
+    epix_slide2(
+      x = ea %>% group_by(.data$modulus),
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_modulus
+  )
+  expect_identical(
+    ea %>% group_by(.data$modulus) %>% slide(
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_modulus
+  )
+  # test the passing across-all-of-string-literal behavior:
+  expect_identical(
+    epix_slide2(
+      x = ea %>% group_by(dplyr::across(all_of("modulus"))),
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_modulus
+  )
+  expect_identical(
+    ea %>% group_by(across(all_of("modulus"))) %>% slide(
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_modulus
+  )
+  # test the passing-across-all-of-string-var behavior:
+  my_group_by <- "modulus"
+  expect_identical(
+    epix_slide2(
+      x = ea %>% group_by(dplyr::across(tidyselect::all_of(my_group_by))),
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_modulus
+  )
+  expect_identical(
+    ea %>% group_by(dplyr::across(tidyselect::all_of(my_group_by))) %>% slide(
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_modulus
+  )
+  # test the default behavior (default in this case should just be grouping by neither):
+  expect_identical(
+    epix_slide2(
+      x = ea,
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_neither
+  )
+  expect_identical(
+    ea %>% slide(
+      f = ~ mean(.x$case_rate_7d_av),
+      before = 2,
+      ref_time_values = time_values,
+      new_col_name = "case_rate_3d_av"
+    ),
+    reference_by_neither
+  )
+})
+
+ea <- tibble::tribble(
+  ~version, ~time_value, ~binary,
+  2, 1:1, 2^(1:1),
+  3, 1:2, 2^(2:1),
+  4, 1:3, 2^(3:1),
+  5, 1:4, 2^(4:1),
+  6, 1:5, 2^(5:1),
+  7, 1:6, 2^(6:1)
+) %>%
+  tidyr::unnest(c(time_value, binary)) %>%
+  mutate(geo_value = "x") %>%
+  as_epi_archive2()
+
+test_that("epix_slide2 with all_versions option has access to all older versions", {
+  library(data.table)
+  # Make sure we're using testthat edition 3, where `expect_identical` doesn't
+  # actually mean `base::identical` but something more content-based using
+  # `waldo` package:
+  testthat::local_edition(3)
+
+  slide_fn <- function(x, gk, rtv) {
+    return(tibble(
+      n_versions = length(unique(x$DT$version)),
+      n_row = nrow(x$DT),
+      dt_class1 = class(x$DT)[[1L]],
+      dt_key = list(key(x$DT))
+    ))
+  }
+
+  ea_orig_mirror <- ea %>% clone(deep = TRUE)
+  ea_orig_mirror$DT <- copy(ea_orig_mirror$DT)
+
+  result1 <- ea %>%
+    group_by() %>%
+    epix_slide2(
+      f = slide_fn,
+      before = 10^3,
+      names_sep = NULL,
+      all_versions = TRUE
+    )
+
+  expect_true(inherits(result1, "tbl_df"))
+
+  result2 <- tibble::tribble(
+    ~time_value, ~n_versions, ~n_row, ~dt_class1, ~dt_key,
+    2, 1L, sum(1:1), "data.table", key(ea$DT),
+    3, 2L, sum(1:2), "data.table", key(ea$DT),
+    4, 3L, sum(1:3), "data.table", key(ea$DT),
+    5, 4L, sum(1:4), "data.table", key(ea$DT),
+    6, 5L, sum(1:5), "data.table", key(ea$DT),
+    7, 6L, sum(1:6), "data.table", key(ea$DT),
+  )
+
+  expect_identical(result1, result2) # *
+
+  result3 <- ea %>%
+    group_by() %>%
+    slide(
+      f = slide_fn,
+      before = 10^3,
+      names_sep = NULL,
+      all_versions = TRUE
+    )
+
+  expect_identical(result1, result3) # This and * Imply result2 and result3 are identical
+
+  # formula interface
+  result4 <- ea %>%
+    group_by() %>%
+    epix_slide2(
+      f = ~ slide_fn(.x, .y),
+      before = 10^3,
+      names_sep = NULL,
+      all_versions = TRUE
+    )
+
+  expect_identical(result1, result4) # This and * Imply result2 and result4 are identical
+
+  # tidyeval interface
+  result5 <- ea %>%
+    group_by() %>%
+    epix_slide2(
+      data = slide_fn(
+        .x,
+        stop("slide_fn doesn't use group key, no need to prepare it")
+      ),
+      before = 10^3,
+      names_sep = NULL,
+      all_versions = TRUE
+    )
+
+  expect_identical(result1, result5) # This and * Imply result2 and result5 are identical
+  expect_identical(ea, ea_orig_mirror) # We shouldn't have mutated ea
+})
+
+test_that("as_of and epix_slide2 with long enough window are compatible", {
+  library(data.table)
+  testthat::local_edition(3)
+
+  # For all_versions = FALSE:
+
+  f1 <- function(x, gk, rtv) {
+    tibble(
+      diff_mean = mean(diff(x$binary))
+    )
+  }
+  ref_time_value1 <- 5
+
+  expect_identical(
+    ea %>% as_of(ref_time_value1) %>% f1() %>% mutate(time_value = ref_time_value1, .before = 1L),
+    ea %>% slide(f1, before = 1000L, ref_time_values = ref_time_value1, names_sep = NULL)
+  )
+
+  # For all_versions = TRUE:
+
+  f2 <- function(x, gk, rtv) {
+    x %>%
+      # extract time&version-lag-1 data:
+      epix_slide2(
+        function(subx, subgk, rtv) {
+          tibble(data = list(
+            subx %>%
+              filter(time_value == attr(subx, "metadata")$as_of - 1) %>%
+              rename(real_time_value = time_value, lag1 = binary)
+          ))
+        },
+        before = 1, names_sep = NULL
+      ) %>%
+      # assess as nowcast:
+      unnest(data) %>%
+      inner_join(x %>% as_of(x$versions_end), by = setdiff(key(x$DT), c("version"))) %>%
+      summarize(mean_abs_delta = mean(abs(binary - lag1)))
+  }
+  ref_time_value2 <- 5
+
+  expect_identical(
+    ea %>% as_of(ref_time_value2, all_versions = TRUE) %>% f2() %>% mutate(time_value = ref_time_value2, .before = 1L),
+    ea %>% slide(f2, before = 1000L, ref_time_values = ref_time_value2, all_versions = TRUE, names_sep = NULL)
+  )
+
+  # Test the same sort of thing when grouping by geo in an archive with multiple geos.
+  ea_multigeo <- ea %>% clone()
+  ea_multigeo$DT <- rbind(
+    ea_multigeo$DT,
+    copy(ea_multigeo$DT)[, geo_value := "y"][, binary := -binary][]
+  )
+  setkeyv(ea_multigeo$DT, key(ea$DT))
+
+  expect_identical(
+    ea_multigeo %>%
+      group_by(geo_value) %>%
+      epix_slide2(f2, before = 1000L, ref_time_values = ref_time_value2, all_versions = TRUE, names_sep = NULL) %>%
+      filter(geo_value == "x"),
+    ea %>% # using `ea` here is like filtering `ea_multigeo` to `geo_value=="x"`
+      epix_as_of2(ref_time_value2, all_versions = TRUE) %>%
+      f2() %>%
+      transmute(geo_value = "x", time_value = ref_time_value2, mean_abs_delta) %>%
+      group_by(geo_value)
+  )
+})
+
+test_that("epix_slide2 `f` is passed an ungrouped `epi_archive` when `all_versions=TRUE`", {
+  slide_fn <- function(x, gk, rtv) {
+    expect_true(is_epi_archive2(x))
+    return(NA)
+  }
+
+  ea %>%
+    group_by() %>%
+    epix_slide2(
+      f = slide_fn,
+      before = 1,
+      ref_time_values = 5,
+      new_col_name = "out",
+      all_versions = TRUE
+    )
+})
+
+test_that("epix_slide2 with all_versions option works as intended", {
+  xx1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~ sum(.x$DT$binary),
+      before = 2,
+      new_col_name = "sum_binary",
+      all_versions = TRUE
+    )
+
+  xx2 <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    sum_binary = c(
+      2^3 + 2^2,
+      2^6 + 2^3,
+      2^10 + 2^9 + 2^6,
+      2^15 + 2^14 + 2^10
+    )
+  ) %>%
+    group_by(geo_value)
+
+  expect_identical(xx1, xx2) # *
+
+  xx3 <- xx %>%
+    group_by(dplyr::across(dplyr::all_of("geo_value"))) %>%
+    slide(
+      f = ~ sum(.x$DT$binary),
+      before = 2,
+      new_col_name = "sum_binary",
+      all_versions = TRUE
+    )
+
+  expect_identical(xx1, xx3) # This and * Imply xx2 and xx3 are identical
+})
+
+# XXX currently, we're using a stopgap measure of having `epix_slide2` always
+# output a (grouped/ungrouped) tibble while we think about the class, columns,
+# and attributes of `epix_slide2` output more carefully. We might bring this test
+# back depending on the decisions there:
+#
+# test_that("`epix_slide2` uses `versions_end` as a resulting `epi_df`'s `as_of`", {
+#   ea_updated_stale = ea$clone()
+#   ea_updated_stale$versions_end <- ea_updated_stale$versions_end + 3 # (dbl)
+#   #
+#   expect_identical(
+#     ea_updated_stale %>%
+#       group_by(geo_value) %>%
+#       epix_slide2(~ slice_head(.x, n = 1L), before = 10L) %>%
+#       ungroup() %>%
+#       attr("metadata") %>%
+#       .$as_of,
+#     10
+#   )
+# })
+
+test_that("epix_slide2 works with 0-row computation outputs", {
+  epix_slide_empty <- function(ea, ...) {
+    ea %>%
+      epix_slide2(before = 5L, ..., function(x, gk, rtv) {
+        tibble::tibble()
+      })
+  }
+  expect_identical(
+    ea %>%
+      epix_slide_empty(),
+    tibble::tibble(
+      time_value = ea$DT$version[integer(0)]
+    )
+  )
+  expect_identical(
+    ea %>%
+      group_by(geo_value) %>%
+      epix_slide_empty(),
+    tibble::tibble(
+      geo_value = ea$DT$geo_value[integer(0)],
+      time_value = ea$DT$version[integer(0)]
+    ) %>%
+      # new_epi_df(geo_type = ea$geo_type, time_type = ea$time_type,
+      #            as_of = ea$versions_end) %>%
+      group_by(geo_value)
+  )
+  # with `all_versions=TRUE`, we have something similar but never get an
+  # `epi_df`:
+  expect_identical(
+    ea %>%
+      epix_slide_empty(all_versions = TRUE),
+    tibble::tibble(
+      time_value = ea$DT$version[integer(0)]
+    )
+  )
+  expect_identical(
+    ea %>%
+      group_by(geo_value) %>%
+      epix_slide_empty(all_versions = TRUE),
+    tibble::tibble(
+      geo_value = ea$DT$geo_value[integer(0)],
+      time_value = ea$DT$version[integer(0)]
+    ) %>%
+      group_by(geo_value)
+  )
+})
+
+# test_that("epix_slide grouped by geo can produce `epi_df` output", {
+#   # This is a characterization test. Not sure we actually want this behavior;
+#   # https://github.com/cmu-delphi/epiprocess/pull/290#issuecomment-1489099157
+#   expect_identical(
+#     ea %>%
+#       group_by(geo_value) %>%
+#       epix_slide(before = 5L, function(x,g) {
+#         tibble::tibble(value = 42)
+#       }, names_sep = NULL),
+#     tibble::tibble(
+#       geo_value = "x",
+#       time_value = epix_slide_ref_time_values_default(ea),
+#       value = 42
+#     ) %>%
+#       new_epi_df(as_of = ea$versions_end)
+#   )
+# })
+
+test_that("epix_slide alerts if the provided f doesn't take enough args", {
+  f_xgt <- function(x, g, t) dplyr::tibble(value = mean(x$binary), count = length(x$binary))
+  # If `regexp` is NA, asserts that there should be no errors/messages.
+  expect_error(epix_slide2(xx, f = f_xgt, before = 2L), regexp = NA)
+  expect_warning(epix_slide2(xx, f = f_xgt, before = 2L), regexp = NA)
+
+  f_x_dots <- function(x, ...) dplyr::tibble(value = mean(x$binary), count = length(x$binary))
+  expect_warning(epix_slide2(xx, f_x_dots, before = 2L),
+    class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots"
+  )
+})
+
+test_that("epix_slide2 computation via formula can use ref_time_value", {
+  xx_ref <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    slide_value = c(4, 5, 6, 7)
+  ) %>%
+    group_by(geo_value)
+
+  xx1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~.ref_time_value,
+      before = 2
+    )
+
+  expect_identical(xx1, xx_ref)
+
+  xx2 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~.z,
+      before = 2
+    )
+
+  expect_identical(xx2, xx_ref)
+
+  xx3 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = ~..3,
+      before = 2
+    )
+
+  expect_identical(xx3, xx_ref)
+})
+
+test_that("epix_slide2 computation via function can use ref_time_value", {
+  xx_ref <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    slide_value = c(4, 5, 6, 7)
+  ) %>%
+    group_by(geo_value)
+
+  xx1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      f = function(x, g, t) t,
+      before = 2
+    )
+
+  expect_identical(xx1, xx_ref)
+})
+
+test_that("epix_slide2 computation via dots can use ref_time_value and group", {
+  # ref_time_value
+  xx_ref <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    slide_value = c(4, 5, 6, 7)
+  ) %>%
+    group_by(geo_value)
+
+  xx1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      before = 2,
+      slide_value = .ref_time_value
+    )
+
+  expect_identical(xx1, xx_ref)
+
+  # group_key
+  xx_ref <- tibble(
+    geo_value = rep("x", 4),
+    time_value = c(4, 5, 6, 7),
+    slide_value = "x"
+  ) %>%
+    group_by(geo_value)
+
+  # Use group_key column
+  xx3 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      before = 2,
+      slide_value = .group_key$geo_value
+    )
+
+  expect_identical(xx3, xx_ref)
+
+  # Use entire group_key object
+  expect_error(
+    xx %>%
+      group_by(.data$geo_value) %>%
+      epix_slide2(
+        before = 2,
+        slide_value = nrow(.group_key)
+      ),
+    NA
+  )
+})
+
+test_that("epix_slide2 computation via dots outputs the same result using col names and the data var", {
+  xx_ref <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      before = 2,
+      sum_binary = sum(time_value)
+    )
+
+  xx1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      before = 2,
+      sum_binary = sum(.x$time_value)
+    )
+
+  expect_identical(xx1, xx_ref)
+
+  xx2 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide2(
+      before = 2,
+      sum_binary = sum(.data$time_value)
+    )
+
+  expect_identical(xx2, xx_ref)
+})
+
+test_that("`epix_slide2` doesn't decay date output", {
+  expect_true(
+    xx$DT %>%
+      as_tibble() %>%
+      mutate(across(c(time_value, version), ~ as.Date("2000-01-01") + .x - 1L)) %>%
+      as_epi_archive2() %>%
+      epix_slide2(before = 5L, ~ attr(.x, "metadata")$as_of) %>%
+      `[[`("slide_value") %>%
+      inherits("Date")
+  )
+})
+
+test_that("`epix_slide2` can access objects inside of helper functions", {
+  helper <- function(archive_haystack, time_value_needle) {
+    archive_haystack %>% epix_slide2(has_needle = time_value_needle %in% time_value, before = 365000L)
+  }
+  expect_error(
+    helper(suppressWarnings(as_epi_archive2(archive_cases_dv_subset$DT)), as.Date("2021-01-01")),
+    NA
+  )
+  expect_error(
+    helper(xx, 3L),
+    NA
+  )
+})
diff --git a/tests/testthat/test-grouped_epi_archive_new.R b/tests/testthat/test-grouped_epi_archive_new.R
new file mode 100644
index 00000000..8f0133b9
--- /dev/null
+++ b/tests/testthat/test-grouped_epi_archive_new.R
@@ -0,0 +1,104 @@
+test_that("Grouping, regrouping, and ungrouping archives works as intended", {
+  # From an example:
+  library(dplyr)
+  toy_archive <-
+    tribble(
+      ~geo_value, ~age_group, ~time_value, ~version, ~value,
+      "us", "adult", "2000-01-01", "2000-01-02", 121,
+      "us", "pediatric", "2000-01-02", "2000-01-03", 5, # (addition)
+      "us", "adult", "2000-01-01", "2000-01-03", 125, # (revision)
+      "us", "adult", "2000-01-02", "2000-01-03", 130 # (addition)
+    ) %>%
+    mutate(
+      age_group = ordered(age_group, c("pediatric", "adult")),
+      time_value = as.Date(time_value),
+      version = as.Date(version)
+    ) %>%
+    as_epi_archive2(other_keys = "age_group")
+
+  # Ensure that we're using testthat edition 3's idea of "identical", which is
+  # not as strict as `identical`:
+  testthat::local_edition(3)
+
+  # Test equivalency claims in example:
+  by_both_keys <- toy_archive %>% group_by(geo_value, age_group)
+  expect_identical(
+    by_both_keys,
+    toy_archive %>% group_by(geo_value) %>% group_by(age_group, .add = TRUE)
+  )
+  grouping_cols <- c("geo_value", "age_group")
+  expect_identical(
+    by_both_keys,
+    toy_archive %>% group_by(across(all_of(grouping_cols)))
+  )
+
+  expect_identical(
+    toy_archive %>% group_by(geo_value),
+    toy_archive %>% group_by(geo_value, age_group) %>% ungroup(age_group)
+  )
+
+  # Test `.drop` behavior:
+  expect_error(toy_archive %>% group_by(.drop = "bogus"),
+    regexp = "Must be of type 'logical', not 'character'"
+  )
+  expect_warning(toy_archive %>% group_by(.drop = FALSE),
+    class = "epiprocess__group_by_epi_archive__drop_FALSE_no_factors"
+  )
+  expect_warning(toy_archive %>% group_by(geo_value, .drop = FALSE),
+    class = "epiprocess__group_by_epi_archive__drop_FALSE_no_factors"
+  )
+  expect_warning(
+    grouped_factor_then_nonfactor <-
+      toy_archive %>% group_by(age_group, geo_value, .drop = FALSE),
+    class = "epiprocess__group_by_epi_archive__drop_FALSE_nonfactor_after_factor"
+  )
+  expect_identical(
+    grouped_factor_then_nonfactor %>%
+      epix_slide2(before = 10, s = sum(value)),
+    tibble::tribble(
+      ~age_group, ~geo_value, ~time_value, ~s,
+      "pediatric", NA_character_, "2000-01-02", 0,
+      "adult", "us", "2000-01-02", 121,
+      "pediatric", "us", "2000-01-03", 5,
+      "adult", "us", "2000-01-03", 255
+    ) %>%
+      mutate(
+        age_group = ordered(age_group, c("pediatric", "adult")),
+        time_value = as.Date(time_value)
+      ) %>%
+      # # See
+      # # https://github.com/cmu-delphi/epiprocess/pull/290#issuecomment-1489099157
+      # # and
+      # # https://github.com/cmu-delphi/epiprocess/pull/311#issuecomment-1535149256
+      # # for why this is commented out, pending some design
+      # # decisions.
+      # #
+      # as_epi_df(geo_type = "nation", # bug; want "custom" from NA; issue #242
+      #           as_of = as.Date("2000-01-03"),
+      #           additional_metadata = list(other_keys = "age_group")) %>%
+      # # put back in expected order; see issue #166:
+      # select(age_group, geo_value, time_value, s) %>%
+      group_by(age_group, geo_value, .drop = FALSE)
+  )
+  expect_identical(
+    toy_archive %>%
+      group_by(geo_value, age_group, .drop = FALSE) %>%
+      epix_slide2(before = 10, s = sum(value)),
+    tibble::tribble(
+      ~geo_value, ~age_group, ~time_value, ~s,
+      "us", "pediatric", "2000-01-02", 0,
+      "us", "adult", "2000-01-02", 121,
+      "us", "pediatric", "2000-01-03", 5,
+      "us", "adult", "2000-01-03", 255
+    ) %>%
+      mutate(
+        age_group = ordered(age_group, c("pediatric", "adult")),
+        time_value = as.Date(time_value)
+      ) %>%
+      # as_epi_df(as_of = as.Date("2000-01-03"),
+      #           additional_metadata = list(other_keys = "age_group")) %>%
+      # # put back in expected order; see issue #166:
+      # select(geo_value, age_group, time_value, s) %>%
+      group_by(geo_value, age_group, .drop = FALSE)
+  )
+})
diff --git a/tests/testthat/test-methods-epi_archive_new.R b/tests/testthat/test-methods-epi_archive_new.R
new file mode 100644
index 00000000..a267ba58
--- /dev/null
+++ b/tests/testthat/test-methods-epi_archive_new.R
@@ -0,0 +1,138 @@
+library(dplyr)
+
+ea <- archive_cases_dv_subset$DT %>%
+  as_epi_archive2() %>%
+  clone() %>%
+  suppressWarnings()
+
+ea2_data <- tibble::tribble(
+  ~geo_value, ~time_value, ~version, ~cases,
+  "ca", "2020-06-01", "2020-06-01", 1,
+  "ca", "2020-06-01", "2020-06-02", 2,
+  #
+  "ca", "2020-06-02", "2020-06-02", 0,
+  "ca", "2020-06-02", "2020-06-03", 1,
+  "ca", "2020-06-02", "2020-06-04", 2,
+  #
+  "ca", "2020-06-03", "2020-06-03", 1,
+  #
+  "ca", "2020-06-04", "2020-06-04", 4,
+) %>%
+  dplyr::mutate(dplyr::across(c(time_value, version), as.Date))
+
+# epix_as_of tests
+test_that("epix_as_of behaves identically to as_of method", {
+  expect_identical(
+    epix_as_of2(ea, max_version = min(ea$DT$version)),
+    ea %>% as_of(max_version = min(ea$DT$version))
+  )
+})
+
+test_that("Errors are thrown due to bad as_of inputs", {
+  # max_version cannot be of string class rather than date class
+  expect_error(ea %>% as_of("2020-01-01"))
+  # max_version cannot be later than latest version
+  expect_error(ea %>% as_of(as.Date("2025-01-01")))
+  # max_version cannot be a vector
+  expect_error(ea %>% as_of(c(as.Date("2020-01-01"), as.Date("2020-01-02"))))
+})
+
+test_that("Warning against max_version being clobberable", {
+  # none by default
+  expect_warning(regexp = NA, ea %>% as_of(max_version = max(ea$DT$version)))
+  expect_warning(regexp = NA, ea %>% as_of(max_version = min(ea$DT$version)))
+  # but with `clobberable_versions_start` non-`NA`, yes
+  ea_with_clobberable <- ea %>% clone()
+  ea_with_clobberable$clobberable_versions_start <- max(ea_with_clobberable$DT$version)
+  expect_warning(ea_with_clobberable %>% as_of(max_version = max(ea$DT$version)))
+  expect_warning(regexp = NA, ea_with_clobberable %>% as_of(max_version = min(ea$DT$version)))
+})
+
+test_that("as_of properly grabs the data and doesn't mutate key", {
+  d <- as.Date("2020-06-01")
+
+  ea2 <- ea2_data %>%
+    as_epi_archive2()
+
+  old_key <- data.table::key(ea2$DT)
+
+  edf_as_of <- ea2 %>%
+    epix_as_of2(max_version = as.Date("2020-06-03"))
+
+  edf_expected <- as_epi_df(tibble(
+    geo_value = "ca",
+    time_value = d + 0:2,
+    cases = c(2, 1, 1)
+  ), as_of = as.Date("2020-06-03"))
+
+  expect_equal(edf_as_of, edf_expected, ignore_attr = c(".internal.selfref", "sorted"))
+  expect_equal(data.table::key(ea2$DT), old_key)
+})
+
+test_that("Errors are thrown due to bad epix_truncate_versions_after inputs", {
+  # x must be an archive
+  expect_error(epix_truncate_versions_after(data.frame(), as.Date("2020-01-01")))
+  # max_version cannot be of string class rather than date class
+  expect_error(epix_truncate_versions_after(ea, "2020-01-01"))
+  # max_version cannot be a vector
+  expect_error(epix_truncate_versions_after(ea, c(as.Date("2020-01-01"), as.Date("2020-01-02"))))
+  # max_version cannot be missing
+  expect_error(epix_truncate_versions_after(ea, as.Date(NA)))
+  # max_version cannot be after latest version in archive
+  expect_error(epix_truncate_versions_after(ea, as.Date("2025-01-01")))
+})
+
+test_that("epix_truncate_version_after properly grabs the data and doesn't mutate key", {
+  ea2 <- ea2_data %>%
+    as_epi_archive2()
+
+  old_key <- data.table::key(ea2$DT)
+
+  ea_as_of <- ea2 %>%
+    epix_truncate_versions_after(max_version = as.Date("2020-06-02"))
+
+  ea_expected <- ea2_data[1:3, ] %>%
+    as_epi_archive2()
+
+  expect_equal(ea_as_of, ea_expected, ignore_attr = c(".internal.selfref", "sorted"))
+  expect_equal(data.table::key(ea2$DT), old_key)
+})
+
+test_that("epix_truncate_version_after doesn't filter if max_verion at latest version", {
+  ea2 <- ea2_data %>%
+    as_epi_archive2()
+
+  ea_expected <- ea2 %>% clone()
+
+  ea_as_of <- ea2 %>%
+    epix_truncate_versions_after(max_version = as.Date("2020-06-04"))
+  expect_equal(ea_as_of, ea_expected, ignore_attr = c(".internal.selfref", "sorted"))
+})
+
+test_that("epix_truncate_version_after returns the same grouping type as input epi_archive", {
+  ea2 <- ea2_data %>%
+    as_epi_archive2()
+
+  ea_as_of <- ea2 %>%
+    epix_truncate_versions_after(max_version = as.Date("2020-06-04"))
+  expect_true(is_epi_archive2(ea_as_of, grouped_okay = FALSE))
+
+  ea2_grouped <- ea2 %>% group_by(geo_value)
+
+  ea_as_of <- ea2_grouped %>%
+    epix_truncate_versions_after(max_version = as.Date("2020-06-04"))
+  expect_true(is_grouped_epi_archive2(ea_as_of))
+})
+
+
+test_that("epix_truncate_version_after returns the same groups as input grouped_epi_archive", {
+  ea2 <- ea2_data %>%
+    as_epi_archive2()
+  ea2 <- ea2 %>% group_by(geo_value)
+
+  ea_expected <- ea2 %>% clone()
+
+  ea_as_of <- ea2 %>%
+    epix_truncate_versions_after(max_version = as.Date("2020-06-04"))
+  expect_equal(ea_as_of %>% groups(), ea_expected %>% groups())
+})