Skip to content

Commit

Permalink
progress towards #53
Browse files Browse the repository at this point in the history
  • Loading branch information
beanumber committed Jun 1, 2020
1 parent 80c8cda commit 2243d36
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 21 deletions.
36 changes: 20 additions & 16 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,17 @@ valid_year_month <- function(years, months,
begin <- as.Date(begin)
end <- as.Date(end)

valid_months <- data.frame(expand.grid(years, months)) %>%
valid_months <- tibble::tibble(expand.grid(years, months)) %>%
rename(year = Var1, month = Var2) %>%
mutate(month_begin = lubridate::ymd(paste(year, month,
"01", sep = "/"))) %>%
mutate(month_end = lubridate::ymd(
ifelse(month == 12, paste(year + 1, "01/01", sep = "/"),
paste(year, month + 1, "01", sep = "/"))) - 1) %>%
filter(year > 0 & month >= 1 & month <= 12) %>%
filter(month_begin >= begin & month_begin <= end) %>%
mutate(
month_begin = lubridate::ymd(paste(year, month, "01", sep = "/")),
month_end = lubridate::ymd(
ifelse(month == 12, paste(year + 1, "01/01", sep = "/"),
paste(year, month + 1, "01", sep = "/"))) - 1) %>%
filter(
year > 0 & month >= 1 & month <= 12,
month_begin >= begin & month_begin <= end
) %>%
arrange(month_begin)
return(valid_months)
}
Expand All @@ -90,11 +92,11 @@ valid_year_month <- function(years, months,
#' @examples
#' \dontrun{
#' if (require(airlines)) {
#' airlines <- etl("airlines", dir = "~/dumps/airlines") %>%
#' airlines <- etl("airlines", dir = "~/Data/airlines") %>%
#' etl_extract(year = 1987)
#' summary(airlines)
#' match_files_by_year_months(list.files(attr(airlines, "raw_dir")),
#' pattern = "On_Time_On_Time_Performance_%Y_%m.zip"), year = 1987)
#' pattern = "On_Time_On_Time_Performance_%Y_%m.zip", year = 1987)
#' }
#' }

Expand All @@ -105,16 +107,18 @@ match_files_by_year_months <- function(files, pattern,
if (length(files) < 1) {
return(NULL)
}
file_df <- data.frame(filename = files,
file_date = extract_date_from_filename(files,
pattern)) %>%
mutate(file_year = lubridate::year(file_date),
file_month = lubridate::month(file_date))
file_df <- tibble::tibble(
filename = files,
file_date = extract_date_from_filename(files, pattern)) %>%
mutate(
file_year = lubridate::year(file_date),
file_month = lubridate::month(file_date)
)
valid <- valid_year_month(years, months)
good <- file_df %>%
left_join(valid, by = c("file_year" = "year", "file_month" = "month")) %>%
filter(!is.na(month_begin))
return(as.character(good$filename))
return(fs::as_fs_path(good$filename))
}

#' @description Extracts a date from filenames
Expand Down
4 changes: 2 additions & 2 deletions man/match_files_by_year_months.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 12 additions & 3 deletions tests/testthat/test-etl.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,26 @@ test_that("mysql works", {


test_that("valid_year_month works", {
expect_equal(
nrow(valid_year_month(years = 1999:2001, months = c(1:3, 7))), 12)
dates <- valid_year_month(years = 1999:2001, months = c(1:3, 7))
expect_is(dates, "tbl_df")
expect_equal(nrow(dates), 12)
})

test_that("extract_date_from_filename works", {
test <- expand.grid(year = 1999:2001, month = c(1:6, 9)) %>%
mutate(filename = paste0("myfile_", year, "_", month, ".ext"))
expect_is(
extract_date_from_filename(test$filename, pattern = "myfile_%Y_%m.ext"),
"Date")
"Date"
)
expect_null(extract_date_from_filename(list.files("/cdrom"), pattern = "*"))
skip_if_not(require(airlines) && dir.exists("~/Data/airlines") && list.files(attr(airlines, "raw_dir")) >= 12)
airlines <- etl("airlines", dir = "~/Data/airlines") %>%
etl_extract(year = 1987)
summary(airlines)
res <- match_files_by_year_months(list.files(attr(airlines, "raw_dir")),
pattern = "On_Time_On_Time_Performance_%Y_%m.zip", year = 1987)
expect_is(res, "fs_path")
})

test_that("etl works", {
Expand Down

0 comments on commit 2243d36

Please sign in to comment.