Skip to content

Commit

Permalink
style: styler (GHA)
Browse files Browse the repository at this point in the history
  • Loading branch information
rnayebi21 committed Dec 6, 2024
1 parent b9d4226 commit 98620f7
Showing 1 changed file with 62 additions and 58 deletions.
120 changes: 62 additions & 58 deletions vignettes/na-notebook.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -105,63 +105,64 @@ normalize <- function(x) {
# and prints out the corresponding days (using annotations got a little messy)
# that they start as well in the console
# also returns the number of gaps in the signal
plot_signals <- function(x, version=NULL, verbose=TRUE){
title = "Normalized Signals Comparison"
if (!is.null(version)){
if(verbose){
plot_signals <- function(x, version = NULL, verbose = TRUE) {
title <- "Normalized Signals Comparison"
if (!is.null(version)) {
if (verbose) {
print(paste0("Version: ", version))
}
title = paste0("Normalized Signals Comparison\tVersion ", version)
title <- paste0("Normalized Signals Comparison\tVersion ", version)
}
# x <- x %>% filter(geo_value == "ca")

Check warning on line 117 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=117,col=5,[commented_code_linter] Commented code should be removed.
num_signals <- length(colnames(x)) - 2

Check warning on line 118 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=118,col=3,[object_usage_linter] local variable 'num_signals' assigned but may not be used
first_non_nas <- list()
na_regions_list <- list()
for (col in colnames(x)){
for (col in colnames(x)) {
if (col == "geo_value" | col == "time_value") {

Check warning on line 123 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=123,col=28,[vector_logic_linter] Conditional expressions require scalar logical operators (&& and ||)
next
}
else{
} else {
x[[col]] <- normalize(x[[col]])
pair <- list(col, na.omit(x[, c("time_value", col)])[1, ]$time_value)
first_non_nas <- c(first_non_nas, list(pair))
na_regions <- x %>%
select(time_value, !!sym(col))
na_regions <- na_regions %>%
mutate(is_na = is.na(!!sym(col)))
na_regions <- na_regions %>%
na_regions <- na_regions %>%
mutate(is_na = is.na(!!sym(col)))
na_regions <- na_regions %>%
group_by(group = cumsum(!is_na))

Check warning on line 137 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=137,col=34,[object_usage_linter] no visible binding for global variable 'is_na'
na_regions <- na_regions %>%
na_regions <- na_regions %>%
filter(is_na == TRUE)

Check warning on line 140 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=140,col=16,[object_usage_linter] no visible binding for global variable 'is_na'
na_regions <- na_regions %>%
summarize(start = min(time_value), end = max(time_value), signal = col)
summarize(start = min(time_value), end = max(time_value), signal = col)
na_regions <- na_regions %>%
ungroup()
na_regions_list[[col]] <- na_regions
}
}
x <- x %>% select(-geo_value) %>% gather(key = "signal", value = "value", -time_value)
if(verbose){
x <- x %>%
select(-geo_value) %>%
gather(key = "signal", value = "value", -time_value)
if (verbose) {
plot <- ggplot(x, aes(x = time_value, y = value, color = signal)) +

Check warning on line 157 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=157,col=62,[object_usage_linter] no visible binding for global variable 'signal'
geom_line() +
labs(title = title, x = "Time", y = "Normalized Value") +
theme_minimal()
for (col in names(na_regions_list)) {
cat(paste0("Signal: ", col, "\n"))
na_regions <- na_regions_list[[col]]
if (nrow(na_regions) > 0) {
for (i in 1:nrow(na_regions)) {

Check warning on line 167 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=167,col=19,[seq_linter] 1:nrow(...) is likely to be wrong in the empty edge case. Use seq_len(nrow(...)) instead.
cat(paste0(" Gap: ", na_regions$start[i], " to ", na_regions$end[i], "\n"))
Expand All @@ -172,25 +173,29 @@ plot_signals <- function(x, version=NULL, verbose=TRUE){
}
for (col in names(na_regions_list)) {
plot <- plot +
geom_rect(data = na_regions_list[[col]],
aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf, fill = signal),
color = NA, alpha = 0.2, inherit.aes = FALSE)
geom_rect(
data = na_regions_list[[col]],
aes(xmin = start, xmax = end, ymin = -Inf, ymax = Inf, fill = signal),

Check warning on line 178 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=178,col=73,[object_usage_linter] no visible binding for global variable 'signal'
color = NA, alpha = 0.2, inherit.aes = FALSE
)
lines <- na_regions_list[[col]] %>% filter(start == end)
plot <- plot +
geom_vline(data = lines, aes(xintercept = as.numeric(start)), color = alpha("red", 0.2), linetype = "solid")
}
cat("\n")
print(plot)
}
max_num_gaps <- max(sapply(na_regions_list, function(x) length(x[["start"]])))
max_num_gaps
}
```

```{r visualize_signal, warning=FALSE}
aux_signal_latest_ca <- aux_signal %>% latest() %>% filter(geo_value == 'ca')
aux_signal_latest_ca <- aux_signal %>%
latest() %>%
filter(geo_value == "ca")
plot_signals(aux_signal_latest_ca)
```

Expand All @@ -202,10 +207,10 @@ However, NAs can arise in different settings as well. For example if we look at

```{r, warning=FALSE, echo=FALSE}
# Generating False Data
# initially thought to be real data, however issue dates were incorrect, and
# initially thought to be real data, however issue dates were incorrect, and
# once fixed, there were not many NA values as I had hoped for
generate_signal <- function(){
generate_signal <- function() {
cov_adm <- pub_covidcast(
source = "hhs",
signals = "confirmed_admissions_covid_1d",
Expand All @@ -217,27 +222,28 @@ generate_signal <- function(){
) %>%
select(geo_value, time_value, version = issue, confirmed_cov = value) %>%

Check warning on line 223 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=223,col=45,[object_usage_linter] no visible binding for global variable 'issue'
as_epi_archive()
versions <- unique(cov_adm[["DT"]][["version"]])
# random_dates <- sample(versions, 100, replace = FALSE)

Check warning on line 227 in vignettes/na-notebook.Rmd

View workflow job for this annotation

GitHub Actions / lint

file=vignettes/na-notebook.Rmd,line=227,col=5,[commented_code_linter] Commented code should be removed.
max_gaps <- 0
max_df <- NULL
max_version <- NULL
for (date in sort(versions)) {
date <- as.Date(date)
for(geo_val in unlist(strsplit(states, split = ","))){
current <- epix_as_of(cov_adm, max_version = date) %>% filter(geo_value == geo_val) %>%
for (geo_val in unlist(strsplit(states, split = ","))) {
current <- epix_as_of(cov_adm, max_version = date) %>%
filter(geo_value == geo_val) %>%
na.omit()
daily <- tibble(time_value = seq(
from = as.Date(min(current$time_value)),
to = as.Date(max(current$time_value)),
by = "1 day"
))
current <- daily %>% left_join(current, by = "time_value")
current$geo_value <- geo_val
num_gaps <- plot_signals(current, version=date, verbose=FALSE)
num_gaps <- plot_signals(current, version = date, verbose = FALSE)
if (num_gaps > max_gaps) {
max_gaps <- num_gaps
max_df <- current
Expand Down Expand Up @@ -269,7 +275,7 @@ data <- tibble(
data_completed <- data %>%
complete(
geo,
geo,
time = seq.Date(from = min(time), to = max(time), by = "day")
)
Expand Down Expand Up @@ -315,10 +321,10 @@ No more NAs in our final signal! But this a little bit of a naive approach. Rath
impute_moving_average <- function(col, window_size = 3) {
n <- length(col)
for (i in seq(window_size, n - 1)) {
curr_sum <- sum(col[max(1, i-window_size+1):i])
curr_sum <- sum(col[max(1, i - window_size + 1):i])
average <- curr_sum / window_size
if (col[i + 1] %>% is.na()) {
col[i + 1] = average
col[i + 1] <- average
}
}
col
Expand All @@ -329,7 +335,7 @@ We will plot it so you can see how the NAs were filled in. The red background is

```{r, warning=FALSE}
cov_NAs_subset_ma <- cov_NAs_subset
cov_NAs_subset_ma$confirmed_cov_ma <- cov_NAs_subset_ma$confirmed_cov %>% impute_moving_average
cov_NAs_subset_ma$confirmed_cov_ma <- cov_NAs_subset_ma$confirmed_cov %>% impute_moving_average()
plot_signals(cov_NAs_subset_ma)
```

Expand All @@ -342,14 +348,13 @@ cov_NAs_subset
Let's do this by hand to verify that our results are correct, by taking the previous 3 values (window size specified) and computing the average

```{r}
jul_31 = (944+848+863)/3
aug_01 = (848+863+jul_31)/3
aug_04 = (aug_01+704+761)/3
jul_31 <- (944 + 848 + 863) / 3
aug_01 <- (848 + 863 + jul_31) / 3
aug_04 <- (aug_01 + 704 + 761) / 3
paste0("Value for Jul 31st: ", jul_31)
paste0("Value for Aug 1st: ", aug_01)
paste0("Value for Aug 4th: ", aug_04)
```

```{r}
Expand All @@ -359,12 +364,12 @@ cov_NAs_subset_ma
Note that we can do this exact same appraoch also using multiple calls to `epi_slide_mean()`. Because we have to continually update the average each time we update an NA, especially for NAs that occur within the window range.

```{r}
cov_NAs_subset_epi_ma <- cov_NAs_subset %>% as_epi_df()
cov_NAs_subset_epi_ma <- cov_NAs_subset %>% as_epi_df()
first <- TRUE
curr_pass <- cov_NAs_subset_epi_ma %>% epi_slide_mean(confirmed_cov, before = 3, na.rm = TRUE)
for (i in 1:length(cov_NAs_subset_epi_ma$confirmed_cov)) {
curr_val = cov_NAs_subset_epi_ma$confirmed_cov[i]
curr_val <- cov_NAs_subset_epi_ma$confirmed_cov[i]
if (is.na(curr_val)) {
cov_NAs_subset_epi_ma$confirmed_cov[i] <- curr_pass$slide_value_confirmed_cov[i]
curr_pass <- cov_NAs_subset_epi_ma %>% epi_slide_mean(confirmed_cov, before = 3, na.rm = TRUE)
Expand All @@ -387,7 +392,7 @@ The next step is linear interpolation. Here we can think of this as using linear
# second passes goes through and runs the regressions between each end point
linear_interpolate_2pass <- function(values) {
interpolated_values <- values
# first pass
na_gaps <- list()
in_gap <- FALSE
Expand All @@ -407,7 +412,7 @@ linear_interpolate_2pass <- function(values) {
}
}
}
# second pass
for (gap in na_gaps) {
start <- gap[1]
Expand All @@ -419,7 +424,7 @@ linear_interpolate_2pass <- function(values) {
interpolated_values[(start + 1):(end - 1)] <- interpolated_section[-c(1, length(interpolated_section))]
}
}
return(interpolated_values)
}
```
Expand Down Expand Up @@ -509,9 +514,8 @@ example_archive_y <- tribble(
version = as.Date(version)
) %>%
as_epi_archive(compactify = FALSE)
```

```{r}
epix_merge(example_archive_x, example_archive_y, sync='locf')
```
epix_merge(example_archive_x, example_archive_y, sync = "locf")
```

0 comments on commit 98620f7

Please sign in to comment.