From 888bb572b447be3b08fb7f9f4d3576844948d0d3 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 3 Oct 2024 13:38:12 -0700
Subject: [PATCH] Re-export editing pass + don't re-export cumulative death
 data

Cumulative death data is in tibble format and isn't really the type of data we
expect in many functions. Probably not good to make it too accessible.
---
 NAMESPACE                           |  1 -
 R/reexports.R                       | 20 --------
 man/jhu_confirmed_cumulative_num.Rd | 75 -----------------------------
 vignettes/aggregation.Rmd           |  2 +-
 vignettes/archive.Rmd               |  8 +--
 vignettes/correlation.Rmd           | 13 ++---
 vignettes/epiprocess.Rmd            |  9 ++--
 vignettes/growth_rate.Rmd           |  2 +-
 vignettes/outliers.Rmd              |  2 +-
 vignettes/slide.Rmd                 |  2 +-
 10 files changed, 20 insertions(+), 114 deletions(-)
 delete mode 100644 man/jhu_confirmed_cumulative_num.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 935a1239..f422b627 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -87,7 +87,6 @@ export(growth_rate)
 export(guess_period)
 export(is_epi_df)
 export(is_grouped_epi_archive)
-export(jhu_confirmed_cumulative_num)
 export(key_colnames)
 export(max_version_with_row_in)
 export(mutate)
diff --git a/R/reexports.R b/R/reexports.R
index 800b5ea3..7358fcf4 100644
--- a/R/reexports.R
+++ b/R/reexports.R
@@ -139,26 +139,6 @@ delayedAssign("covid_incidence_county_subset", epidatasets::covid_incidence_coun
 #' @export
 delayedAssign("covid_incidence_outliers", epidatasets::covid_incidence_outliers)
 
-#' @inherit epidatasets::jhu_confirmed_cumulative_num description source references title
-#' @inheritSection epidatasets::jhu_confirmed_cumulative_num Data dictionary
-#' @examples
-#' # Since this is a re-exported dataset, it cannot be loaded using
-#' # the `data()` function. `data()` looks for a file of the same name
-#' # in the `data/` directory, which doesn't exist in this package.
-#' # works
-#' epiprocess::jhu_confirmed_cumulative_num
-#'
-#' # works
-#' library(epiprocess)
-#' jhu_confirmed_cumulative_num
-#'
-#' # fails
-#' \dontrun{
-#' data(jhu_confirmed_cumulative_num, package = "epiprocess")
-#' }
-#' @export
-delayedAssign("jhu_confirmed_cumulative_num", epidatasets::jhu_confirmed_cumulative_num)
-
 #' @inherit epidatasets::archive_cases_dv_subset description source references title
 #' @inheritSection epidatasets::archive_cases_dv_subset Data dictionary
 #' @examples
diff --git a/man/jhu_confirmed_cumulative_num.Rd b/man/jhu_confirmed_cumulative_num.Rd
deleted file mode 100644
index b288d974..00000000
--- a/man/jhu_confirmed_cumulative_num.Rd
+++ /dev/null
@@ -1,75 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/reexports.R
-\docType{data}
-\name{jhu_confirmed_cumulative_num}
-\alias{jhu_confirmed_cumulative_num}
-\title{Subset of COVID-19 cumulative case counts from 4 states}
-\format{
-An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 2808 rows and 14 columns.
-}
-\source{
-This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
-\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
-by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
-Copyright Johns Hopkins University 2020.
-
-Modifications:
-\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
-These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes.
-\item Furthermore, the data has been limited to a very small number of rows,
-formatted into an \code{epi_df}, and the signal names slightly altered.
-}
-}
-\usage{
-jhu_confirmed_cumulative_num
-}
-\description{
-Data set for 4 states containing COVID-19 Cumulative Cases as reported by
-JHU-CSSE and downloaded from the CMU Delphi COVIDcast Epidata API.
-This example data is a snapshot as of March 20, 2024, and
-ranges from March 1, 2020 to January 31, 2022. It is limited
-to California, Florida, New York, and Texas.
-
-It is used in the {epiprocess} "Getting Started" vignette.
-}
-\section{Data dictionary}{
-
-
-The data has columns:
-\describe{
-\item{geo_value}{the geographic value associated with each row of measurements.}
-\item{signal}{name of metric, derived from upstream data.}
-\item{source}{name of upstream data source.}
-\item{geo_type}{spatial resolution of the signal.}
-\item{time_type}{temporal resolution of the signal.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{issue}{time unit (e.g., date) when the signal data were published.}
-\item{lag}{time delta (e.g. days) between when the underlying events happened and when the data were published.}
-\item{missing_value}{an integer code that is zero when the value field is present and non-zero when the data is missing (see \href{https://cmu-delphi.github.io/delphi-epidata/api/missing_codes.html}{missing codes}).}
-\item{missing_stderr}{an integer code that is zero when the stderr field is present and non-zero when the data is missing (see \href{https://cmu-delphi.github.io/delphi-epidata/api/missing_codes.html}{missing codes}).}
-\item{missing_sample_size}{an integer code that is zero when the sample_size field is present and non-zero when the data is missing (see \href{https://cmu-delphi.github.io/delphi-epidata/api/missing_codes.html}{missing codes}).}
-\item{value}{cumulative number of confirmed COVID-19 cases, derived from the underlying data source.}
-\item{stderr}{approximate standard error of the statistic with respect to its sampling distribution, NA when not applicable.}
-\item{sample_size}{number of “data points” used in computing the statistic, NA when not applicable.}
-}
-
-}
-
-\examples{
-# Since this is a re-exported dataset, it cannot be loaded using
-# the `data()` function. `data()` looks for a file of the same name
-# in the `data/` directory, which doesn't exist in this package.
-# works
-epiprocess::jhu_confirmed_cumulative_num
-
-# works
-library(epiprocess)
-jhu_confirmed_cumulative_num
-
-# fails
-\dontrun{
-data(jhu_confirmed_cumulative_num, package = "epiprocess")
-}
-}
-\keyword{datasets}
diff --git a/vignettes/aggregation.Rmd b/vignettes/aggregation.Rmd
index fbe33920..3b36cc96 100644
--- a/vignettes/aggregation.Rmd
+++ b/vignettes/aggregation.Rmd
@@ -22,7 +22,7 @@ library(covidcast)
 x <- covid_incidence_county_subset
 ```
 
-The data can also be fetched from the Delphi API with the following query:
+The data can also be fetched from the Delphi Epidata API with the following query:
 ```{r, message = FALSE, eval = FALSE, warning = FALSE}
 library(epidatr)
 
diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd
index a33a1d9f..86fc2c2b 100644
--- a/vignettes/archive.Rmd
+++ b/vignettes/archive.Rmd
@@ -36,10 +36,10 @@ library(ggplot2)
 
 # This fetches the raw data backing the archive_cases_dv_subset object.
 dv <- archive_cases_dv_subset$DT %>%
-  tibble()
+  as_tibble()
 ```
 
-The data can also be fetched from the Delphi API with the following query:
+The data can also be fetched from the Delphi Epidata API with the following query:
 ```{r, message = FALSE, warning = FALSE, eval = FALSE}
 library(epidatr)
 
@@ -84,8 +84,8 @@ print(x)
 ```
 
 An `epi_archive` is consists of a primary field `DT`, which is a data table
-(from the `data.table` package) that has the columns `geo_value`, `time_value`,
-`version` (and possibly additional ones), and other metadata fields, such as
+(from the `data.table` package) that has at least the required columns
+`geo_value`, `time_value`, and `version`; and other metadata fields, such as
 `geo_type`.
 
 The variables `geo_value`, `time_value`, `version` serve as **key variables**
diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
index 931564a6..adb5fe86 100644
--- a/vignettes/correlation.Rmd
+++ b/vignettes/correlation.Rmd
@@ -23,11 +23,12 @@ library(dplyr)
 The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
 ```{r}
 x <- cases_deaths_subset %>%
-  select(geo_value, time_value, case_rate = case_rate_7d_av, death_rate = death_rate_7d_av) %>%
+  select(geo_value, time_value,
+         case_rate = case_rate_7d_av, death_rate = death_rate_7d_av) %>%
   arrange(geo_value, time_value)
 ```
 
-The data can also be fetched from the Delphi API with the following query:
+The data can also be fetched from the Delphi Epidata API with the following query:
 ```{r, eval = FALSE}
 library(epidatr)
 
@@ -36,10 +37,10 @@ d <- as.Date("2024-03-20")
 x <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_7dav_incidence_prop",
-  time_type = "day",
   geo_type = "state",
-  time_values = epirange(20200301, 20211231),
+  time_type = "day",
   geo_values = "*",
+  time_values = epirange(20200301, 20211231),
   as_of = d
 ) %>%
   select(geo_value, time_value, case_rate = value)
@@ -47,10 +48,10 @@ x <- pub_covidcast(
 y <- pub_covidcast(
   source = "jhu-csse",
   signals = "deaths_7dav_incidence_prop",
-  time_type = "day",
   geo_type = "state",
-  time_values = epirange(20200301, 20211231),
+  time_type = "day",
   geo_values = "*",
+  time_values = epirange(20200301, 20211231),
   as_of = d
 ) %>%
   select(geo_value, time_value, death_rate = value)
diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd
index 8572c808..96ed725f 100644
--- a/vignettes/epiprocess.Rmd
+++ b/vignettes/epiprocess.Rmd
@@ -98,12 +98,12 @@ which we also broadly refer to as signal variables. The documentation for
 A data frame or tibble that has `geo_value` and `time_value` columns can be
 converted into an `epi_df` object, using the function `as_epi_df()`. As an
 example, we'll work with daily cumulative COVID-19 cases from four U.S. states:
-CA, FL, NY, and TX, over time span from mid 2020 to early 2022, and we'll use
-the [`epidatr`](https://github.com/cmu-delphi/epidatr) package
-to fetch this data from the [COVIDcast
-API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html).
+CA, FL, NY, and TX, over time span from mid 2020 to early 2022. We have included
+this example data in the `epidatasets::jhu_confirmed_cumulative_num` object,
+which we prepared by downloading the data using `epidatr::pub_covidcast()`.
 
 ```{r, message = FALSE}
+library(epidatasets)
 library(epiprocess)
 library(dplyr)
 library(tidyr)
@@ -111,6 +111,7 @@ library(withr)
 
 cases <- jhu_confirmed_cumulative_num
 
+class(cases)
 colnames(cases)
 ```
 
diff --git a/vignettes/growth_rate.Rmd b/vignettes/growth_rate.Rmd
index 214776f1..326a07c4 100644
--- a/vignettes/growth_rate.Rmd
+++ b/vignettes/growth_rate.Rmd
@@ -29,7 +29,7 @@ x <- cases_deaths_subset %>%
   arrange(geo_value, time_value)
 ```
 
-The data can also be fetched from the Delphi API with the following query:
+The data can also be fetched from the Delphi Epidata API with the following query:
 ```{r, message = FALSE, eval = FALSE}
 library(epidatr)
 
diff --git a/vignettes/outliers.Rmd b/vignettes/outliers.Rmd
index 48da8f80..1c00ff6e 100644
--- a/vignettes/outliers.Rmd
+++ b/vignettes/outliers.Rmd
@@ -16,7 +16,7 @@ reported COVID-19 case counts from FL and NJ.
 
 The dataset has 730 rows and 3 columns.
 
-```{r, echo=FALSE, warning=FALSE, message=FALSE}
+```{r, echo=TRUE, warning=FALSE, message=FALSE}
 library(epiprocess)
 library(dplyr)
 library(tidyr)
diff --git a/vignettes/slide.Rmd b/vignettes/slide.Rmd
index 935daac6..0257b3ee 100644
--- a/vignettes/slide.Rmd
+++ b/vignettes/slide.Rmd
@@ -38,7 +38,7 @@ edf <- cases_deaths_subset %>%
   arrange(geo_value, time_value)
 ```
 
-The data can also be fetched from the Delphi API with the following query:
+The data can also be fetched from the Delphi Epidata API with the following query:
 ```{r, message = FALSE, eval = FALSE}
 library(epidatr)