From ce92f4dff4ff9b60c9788a9aff64c2fb889458eb Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Mon, 4 Nov 2019 23:03:45 -0800 Subject: [PATCH 01/64] add delimeter detection --- R/identify_delim.R | 72 + R/tt_datasets.R | 23 +- R/tt_read_data.R | 28 +- R/utils.R | 3 +- man/download_read.Rd | 8 +- man/identify_delim.Rd | 25 + tests/testthat/test-identify_delim.R | 42 + tests/testthat/test-tt_load_gh.R | 9 +- tests/testthat/testfiles/strange_delim.txt | 1387 ++++++++++++++++++++ 9 files changed, 1571 insertions(+), 26 deletions(-) create mode 100644 R/identify_delim.R create mode 100644 man/identify_delim.Rd create mode 100644 tests/testthat/test-identify_delim.R create mode 100644 tests/testthat/testfiles/strange_delim.txt diff --git a/R/identify_delim.R b/R/identify_delim.R new file mode 100644 index 0000000..85b32f7 --- /dev/null +++ b/R/identify_delim.R @@ -0,0 +1,72 @@ +#' @title Identify potential delimeters of file +#' +#' @param path path to file +#' @param delims a vector of delimeters to try +#' @param n number of rows to look at in the file to determine the delimters +#' @param comment identify lines that are comments if this character is at the beginning +#' @param skip number of lines to skip at the beginning +#' @param quote set of quoting characters +#' @importFrom utils download.file +#' + +identify_delim <- function(file, + delims = c("\t", ",", " ", "|", ";" ), + n = 10, + comment = "#", + skip = 0, + quote = "\"" + ) { + + # Load lines of file in + test <- readLines(file, n = n + skip) + if (skip > 0) { + test <- test[-c(seq(skip))] + } + comment_lines <- grepl("^[#]", test) + if(sum(comment_lines) > 0){ + eof<- FALSE + while((length(test) - sum(comment_lines) < n) & !eof){ + test <- readLines(file, n = n + skip + sum(comment_lines)) + if(length(test) < n + skip + sum(comment_lines)){ + eof <- TRUE + } + if (skip > 0) { + test <- test[-c(seq(skip))] + } + comment_lines <- grepl("^[#]", test) + } + test <- test[!comment_lines] + } + + # Attempt splitting on list of delimieters + num_splits <- list() + for (delim in delims) { + delim_regex <- paste0("[",delim,"](?=(?:[^",quote,"]*",quote,"[^",quote,"]*",quote,")*[^",quote,"]*$)") + num_splits[[delim]] <- do.call("c", lapply(strsplit(test, delim_regex,perl = TRUE), length)) + } + + if (all(unlist(num_splits) == 1)) { + warning("Not able to detect delimiter for the file. Defaulting to `\t`.") + return("\t") + } + + # which delims that produced consistent splits and greater than 1? + good_delims <- do.call("c", lapply(num_splits, function(cuts) { + all(cuts == cuts[1]) & cuts[1] > 1 + })) + + good_delims <- names(good_delims)[good_delims] + + if(length(good_delims)==0){ + warning("Not able to detect delimiter for the file. Defaulting to ` `.") + return(" ") + }else if(length(good_delims) > 1){ + warning("Detected multiple possible delimeters:", + paste0("`",good_delims,"`",collapse=", "),". Defaulting to ", + paste0("`",good_delims[1],"`"),".") + return(good_delims[1]) + }else{ + return(good_delims) + } + +} diff --git a/R/tt_datasets.R b/R/tt_datasets.R index a838001..eb08ac4 100644 --- a/R/tt_datasets.R +++ b/R/tt_datasets.R @@ -53,12 +53,7 @@ tt_datasets<-function(year){ #' @export print.tt_dataset_table<-function(x,...,printConsole=FALSE){ if(rstudioapi::isAvailable() & !printConsole){ - tmpHTML<-tempfile(fileext = ".html") - cat(" - - - ",file=tmpHTML) - cat("
",file = tmpHTML,append = TRUE) + tmpHTML<-setup_doc() x$html%>% as.character%>% purrr::walk(~cat(gsub("href=\"/rfordatascience/tidytuesday/", @@ -84,14 +79,8 @@ print.tt_dataset_table<-function(x,...,printConsole=FALSE){ print.tt_dataset_table_list<-function(x,...,printConsole=FALSE){ if(rstudioapi::isAvailable() & !printConsole){ - tmpHTML<-tempfile(fileext = ".html") - cat(" - - - ",file=tmpHTML) - cat("
",file = tmpHTML,append = TRUE) + tmpHTML<-setup_doc() cat("

TidyTuesday Datasets

",file = tmpHTML,append = TRUE) - names(x)%>% purrr::map(function(.x,x){list(html=as.character(x[[.x]]$html),year=.x)},x=x)%>% purrr::walk(~cat(paste0("

",.x$year,"

\n",gsub("href=\"/rfordatascience/tidytuesday/", @@ -109,3 +98,11 @@ print.tt_dataset_table_list<-function(x,...,printConsole=FALSE){ } +setup_doc<-function(tmpHTML = tempfile(fileext = ".html")){ + cat(" + + + ",file=tmpHTML) + cat("
",file = tmpHTML,append = TRUE) + return(tmpHTML) +} diff --git a/R/tt_read_data.R b/R/tt_read_data.R index b2a75b2..14171f8 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -52,8 +52,8 @@ tt_read_url<-function(url){ switch(tools::file_ext(gsub("[?]raw=true","",url)), "xls"=download_read(url,readxl::read_xls,mode="wb"), "xlsx"=download_read(url,readxl::read_xlsx,mode="wb"), - "tsv"=readr::read_delim(url,"\t",guess_max = 21474836,progress = FALSE), - "csv"=readr::read_delim(url,",",guess_max = 21474836,progress = FALSE)) + download_read(url,readr::read_delim,guess_max = 21474836,progress = FALSE,find_delim = TRUE) + ) } #' @title utility to assist with 'reading' urls that cannot normally be read by file functions @@ -62,11 +62,27 @@ tt_read_url<-function(url){ #' @param func the function to perform reading of url #' @param ... args to pass to func #' @param mode mode passed to \code{utils::download.file}. default is "w" +#' @param find_delim should the delimeters be found for the file #' @importFrom utils download.file #' -download_read<-function(url,func,...,mode="w"){ - temp_file<-tempfile(fileext = paste0(".",tools::file_ext(url))) - utils::download.file(url,temp_file,quiet = TRUE,mode=mode) - func(temp_file,...) +download_read<-function(path, func, ..., mode="w", find_delim = FALSE){ + + temp_file<-tempfile(fileext = paste0(".",tools::file_ext(path))) + utils::download.file(path,temp_file,quiet = TRUE,mode=mode) + + dots <- as.list(substitute(substitute(...)))[-1] + func_call <- c(substitute(func),substitute(temp_file),dots) + + if(find_delim){ + if(!(!is.null(names(func_call)) & + "delim"%in%names(func_call)) & + "delim" %in% names(as.list(args(func)))){ + func_call$delim <- identify_delim(temp_file) + } + } + + eval(as.call(func_call)) } + +# diff --git a/R/utils.R b/R/utils.R index 60fb09b..cd97cfa 100644 --- a/R/utils.R +++ b/R/utils.R @@ -22,8 +22,7 @@ readme<-function(tt){ if(length(tt[['tt']]$readme)>0 ){ #if running in rstudio, print out that if(rstudioapi::isAvailable()){ - readmeURL<-tt_make_html(tt) - rstudioapi::viewer(url = readmeURL) + rstudioapi::viewer(url = tt_make_html(tt)) } } } diff --git a/man/download_read.Rd b/man/download_read.Rd index 7d7f74c..ff52011 100644 --- a/man/download_read.Rd +++ b/man/download_read.Rd @@ -4,16 +4,18 @@ \alias{download_read} \title{utility to assist with 'reading' urls that cannot normally be read by file functions} \usage{ -download_read(url, func, ..., mode = "w") +download_read(path, func, ..., mode = "w", find_delim = FALSE) } \arguments{ -\item{url}{path to online file to be read} - \item{func}{the function to perform reading of url} \item{...}{args to pass to func} \item{mode}{mode passed to \code{utils::download.file}. default is "w"} + +\item{find_delim}{should the delimeters be found for the file} + +\item{url}{path to online file to be read} } \description{ utility to assist with 'reading' urls that cannot normally be read by file functions diff --git a/man/identify_delim.Rd b/man/identify_delim.Rd new file mode 100644 index 0000000..123faf4 --- /dev/null +++ b/man/identify_delim.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/identify_delim.R +\name{identify_delim} +\alias{identify_delim} +\title{Identify potential delimeters of file} +\usage{ +identify_delim(file, delims = c("\\t", ",", " ", "|", ";"), n = 10, + comment = "#", skip = 0, quote = "\\"") +} +\arguments{ +\item{delims}{a vector of delimeters to try} + +\item{n}{number of rows to look at in the file to determine the delimters} + +\item{comment}{identify lines that are comments if this character is at the beginning} + +\item{skip}{number of lines to skip at the beginning} + +\item{quote}{set of quoting characters} + +\item{path}{path to file} +} +\description{ +Identify potential delimeters of file +} diff --git a/tests/testthat/test-identify_delim.R b/tests/testthat/test-identify_delim.R new file mode 100644 index 0000000..d217889 --- /dev/null +++ b/tests/testthat/test-identify_delim.R @@ -0,0 +1,42 @@ +context("test-identify_delim") + +test_that("Correctly identify the delimeter", { + delim_file <- tempfile() + writeLines(c("test,the,delim","this,is,a comma"),delim_file) + expect_equal(identify_delim(delim_file),",") + }) + +test_that("If multiple possible delimeter exist, pick the `simplest` one", { + delim_file <- tempfile() + writeLines(c("test\t,the\t,delim","this\t,is\t,a twofer"),delim_file) + + expect_warning( + identify_delim(delim_file), + "Detected multiple possible delimeters:" + ) + suppressWarnings({ + expect_equal( + identify_delim(delim_file), + "\t" + ) + }) +}) + +test_that("If unable to identify a delimeter, give a warning", { + delim_file <- tempfile() + writeLines(c("test\tthe\tdelim","this,is|a twofer"),delim_file) + expect_warning(identify_delim(delim_file),"Not able to detect delimiter for") + suppressWarnings({ + expect_equal( + identify_delim(delim_file), + " " + ) + }) +}) + +test_that("Can skip lines with comments to find delimeters, or ones identified to skip", { + delim_file <- tempfile() + writeLines(c("#this,line|isskipped","test,the,delim","this,is,a comma"),delim_file) + expect_equal(identify_delim(delim_file),",") + expect_equal(identify_delim(delim_file,skip = 1),",") +}) diff --git a/tests/testthat/test-tt_load_gh.R b/tests/testthat/test-tt_load_gh.R index 1e9abca..1dafe36 100644 --- a/tests/testthat/test-tt_load_gh.R +++ b/tests/testthat/test-tt_load_gh.R @@ -20,9 +20,10 @@ test_that("tt_load_gh returns tt_gh object when provided proper year and TT week testthat::expect_equal(tt_gh$url,"https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") }) + #check that errors are returned test_that("tt_load_gh returns error when incorrect date", { - testthat::expect_error(tt_load_gh("2019-01-16"),"is not a date that has TidyTuesday data") + nullout <- capture.output({testthat::expect_error(tt_load_gh("2019-01-16"),"is not a date that has TidyTuesday data")}) }) test_that("tt_load_gh returns error when incorrect years or week number entries", { testthat::expect_error(tt_load_gh(2018,92),"Please enter a value for week between 1") @@ -34,7 +35,11 @@ test_that("tt_load_gh returns error when incorrect years or week number entries" testthat::expect_error(tt_load_gh(2017,92),"TidyTuesday did not exist for") }) test_that("tt_load_gh returns error when nothing is entered", { - testthat::expect_error(tt_load_gh(),"Enter either the year or date of the TidyTuesday Data") + nullout <- capture.output({testthat::expect_error(tt_load_gh(),"Enter either the year or date of the TidyTuesday Data")}) +}) +test_that("tt_load_gh returns error when week is not a valid entry between 1 and n weeks", { + testthat::expect_error(tt_load_gh(2019,0), + "Week entry must be a valid positive integer") }) #test driven dev, new feature to add diff --git a/tests/testthat/testfiles/strange_delim.txt b/tests/testthat/testfiles/strange_delim.txt new file mode 100644 index 0000000..7bd2449 --- /dev/null +++ b/tests/testthat/testfiles/strange_delim.txt @@ -0,0 +1,1387 @@ +season|number|production_code|episode_title|guest_star|role +1|002–102|7G02|Bart the Genius|Marcia Wallace|Edna Krabappel; Ms. Melon +1|003–103|7G03|Homer's Odyssey|Sam McMurray|Worker +1|003–103|7G03|Homer's Odyssey|Marcia Wallace|Edna Krabappel +1|006–106|7G06|Moaning Lisa|Miriam Flynn|Ms. Barr +1|006–106|7G06|Moaning Lisa|Ron Taylor|Bleeding Gums Murphy +1|007–107|7G09|The Call of the Simpsons|Albert Brooks|Cowboy Bob +1|008–108|7G07|The Telltale Head|Marcia Wallace|Edna Krabappel +1|009–109|7G11|Life on the Fast Lane|Albert Brooks|Jacques +1|010–110|7G10|Homer's Night Out|Sam McMurray|Gulliver Dark +1|011–111|7G13|The Crepes of Wrath|Christian Coffinet|Gendarme Officer +1|012–112|7G12|Krusty Gets Busted|Kelsey Grammer|Sideshow Bob +1|013–113|7G01|Some Enchanted Evening|June Foray|Babysitter service receptionist; Doofy the Elf +1|013–113|7G01|Some Enchanted Evening|Penny Marshall|Ms. Botz; Lucille Botzcowski +1|013–113|7G01|Some Enchanted Evening|Paul Willson|Florist +2|014–201|7F03|Bart Gets an "F"|Marcia Wallace|Edna Krabappel +2|015–202|7F02|Simpson and Delilah|Harvey Fierstein|Karl +2|016–203|7F04|Treehouse of Horror|James Earl Jones|Removal man; Serak the Preparer; Narrator +2|018–205|7F05|Dancin' Homer|Tony Bennett|Himself +2|018–205|7F05|Dancin' Homer|Daryl Coley|Bleeding Gums Murphy +2|018–205|7F05|Dancin' Homer|Ken Levine|Dan Horde +2|018–205|7F05|Dancin' Homer|Tom Poston|Capital City Goofball +2|020–207|7F07|Bart vs. Thanksgiving|Greg Berg|Rory; Eddie; Radio voice; "Hooray for Everything" Announcer; Security Man +2|020–207|7F07|Bart vs. Thanksgiving|Carol Kane|Maggie Simpson +2|022–209|7F09|Itchy & Scratchy & Marge|Alex Rocco|Roger Meyers Jr. +2|023–210|7F10|Bart Gets Hit by a Car|Phil Hartman|Lionel Hutz; Heaven +2|024–211|7F11|One Fish, Two Fish, Blowfish, Blue Fish|Larry King|Himself +2|024–211|7F11|One Fish, Two Fish, Blowfish, Blue Fish|Joey Miyashima|Toshiro +2|024–211|7F11|One Fish, Two Fish, Blowfish, Blue Fish|Sab Shimono|Master Sushi Chef +2|024–211|7F11|One Fish, Two Fish, Blowfish, Blue Fish|George Takei|Akira +2|024–211|7F11|One Fish, Two Fish, Blowfish, Blue Fish|Diana Tanaka|Hostess +2|025–212|7F12|The Way We Was|Jon Lovitz|Artie Ziff; Mr. Seckofsky +2|026–213|7F13|Homer vs. Lisa and the 8th Commandment|Phil Hartman|Troy McClure; Moses; Cable guy +2|027–214|7F15|Principal Charming|Marcia Wallace|Edna Krabappel +2|028–215|7F16|Oh Brother, Where Art Thou?|Danny DeVito|Herbert Powell +2|029–216|7F14|Bart's Dog Gets an F|Tracey Ullman|Emily Winthropp; Sylvia Winfield +2|029–216|7F14|Bart's Dog Gets an F|Frank Welker|Santa's Little Helper; Additional dog voices +2|030–217|7F17|Old Money|Phil Hartman|Lionel Hutz; Plato +2|030–217|7F17|Old Money|Audrey Meadows|Bea Simmons +2|030–217|7F17|Old Money|Marcia Wallace|Edna Krabappel +2|031–218|7F18|Brush with Greatness|Jon Lovitz|Professor Lombardo +2|031–218|7F18|Brush with Greatness|Ringo Starr|Himself +2|032–219|7F19|Lisa's Substitute|Dustin Hoffman|Mr. Bergstrom +2|032–219|7F19|Lisa's Substitute|Marcia Wallace|Edna Krabappel +2|034–221|7F21|Three Men and a Comic Book|Cloris Leachman|Mrs. Glick +2|034–221|7F21|Three Men and a Comic Book|Daniel Stern|Narrator +3|036–301|7F24|Stark Raving Dad|Michael Jackson|Leon Kompowsky +3|036–301|7F24|Stark Raving Dad|Kipp Lennon|Leon Kompowsky's singing voice +3|037–302|8F01|Mr. Lisa Goes to Washington|Lona Williams|Minnesota girl +3|039–304|8F03|Bart the Murderer|Neil Patrick Harris|Himself playing Bart Simpson +3|039–304|8F03|Bart the Murderer|Phil Hartman|Lionel Hutz; Troy McClure; Joey; Godfather +3|039–304|8F03|Bart the Murderer|Joe Mantegna|Fat Tony; Himself playing Fat Tony +3|039–304|8F03|Bart the Murderer|Marcia Wallace|Edna Krabappel +3|040–305|8F04|Homer Defined|Chick Hearn|Himself +3|040–305|8F04|Homer Defined|Magic Johnson|Himself +3|040–305|8F04|Homer Defined|Jon Lovitz|Aristotle Amadopolis; Mr. Devaro +3|041–306|8F05|Like Father, Like Clown|Jackie Mason|Rabbi Hyman Krustofsky +3|042–307|8F02|Treehouse of Horror II|Marcia Wallace|Edna Krabappel +3|043–308|8F06|Lisa's Pony|Marcia Wallace|Edna Krabappel +3|043–308|8F06|Lisa's Pony|Frank Welker|Princess +3|044–309|8F07|Saturdays of Thunder|Phil Hartman|Troy McClure +3|044–309|8F07|Saturdays of Thunder|Larry McKay|Announcer +3|045–310|8F08|Flaming Moe's|Aerosmith|Themselves +3|045–310|8F08|Flaming Moe's|Phil Hartman|Lionel Hutz +3|045–310|8F08|Flaming Moe's|Kipp Lennon|Sings "Flaming Moe's" (parody of the Cheers theme song) +3|045–310|8F08|Flaming Moe's|Marcia Wallace|Edna Krabappel +3|046–311|8F09|Burns Verkaufen der Kraftwerk|Phil Hartman|Horst; Stockbroker +3|048–313|8F11|Radio Bart|Sting|Himself +3|048–313|8F11|Radio Bart|Marcia Wallace|Edna Krabappel +3|049–314|8F12|Lisa the Greek|Phil Hartman|Troy McClure; Smooth Jimmy Apollo +3|050–315|8F14|Homer Alone|Phil Hartman|Troy McClure +3|051–316|8F16|Bart the Lover|Marcia Wallace|Edna Krabappel +3|052–317|8F13|Homer at the Bat|Wade Boggs|Himself +3|052–317|8F13|Homer at the Bat|Jose Canseco|Himself +3|052–317|8F13|Homer at the Bat|Roger Clemens|Himself +3|052–317|8F13|Homer at the Bat|Terry Cashman|Sings "Talkin' Softball" (parody of the song Talkin' Baseball) +3|052–317|8F13|Homer at the Bat|Ken Griffey, Jr.|Himself +3|052–317|8F13|Homer at the Bat|Don Mattingly|Himself +3|052–317|8F13|Homer at the Bat|Steve Sax|Himself +3|052–317|8F13|Homer at the Bat|Mike Scioscia|Himself +3|052–317|8F13|Homer at the Bat|Ozzie Smith|Himself +3|052–317|8F13|Homer at the Bat|Darryl Strawberry|Himself +3|053–318|8F15|Separate Vocations|Steve Allen|Electronically altered voice of Bart +3|053–318|8F15|Separate Vocations|Marcia Wallace|Edna Krabappel +3|054–319|8F17|Dog of Death|Frank Welker|Santa's Little Helper +3|055–320|8F19|Colonel Homer|Beverly D'Angelo|Lurleen Lumpkin +3|056–321|8F20|Black Widower|Kelsey Grammer|Sideshow Bob +3|057–322|8F21|The Otto Show|Christopher Guest|Nigel Tufnel +3|057–322|8F21|The Otto Show|Michael McKean|David St. Hubbins +3|058–323|8F22|Bart's Friend Falls in Love|Phil Hartman|Troy McClure +3|058–323|8F22|Bart's Friend Falls in Love|Kimmy Robertson|Samantha Stanky +3|058–323|8F22|Bart's Friend Falls in Love|Marcia Wallace|Edna Krabappel +3|058–323|8F23|Brother, Can You Spare Two Dimes?|Danny DeVito|Herbert Powell +3|058–323|8F23|Brother, Can You Spare Two Dimes?|Joe Frazier|Himself +4|060–401|8F24|Kamp Krusty|Gene Merlino|Sings "South of the Border" +4|060–401|8F24|Kamp Krusty|Marcia Wallace|Edna Krabappel +4|061–402|8F18|A Streetcar Named Marge|Phil Hartman|Troy McClure; Lionel Hutz +4|061–402|8F18|A Streetcar Named Marge|Jon Lovitz|Llewellyn Sinclair; Ms. Sinclair +4|061–402|8F18|A Streetcar Named Marge|Lona Williams|Debra Jo Smallwood +4|063–404|9F02|Lisa the Beauty Queen|Bob Hope|Himself +4|063–404|9F02|Lisa the Beauty Queen|Lona Williams|Amber Dempsey +4|064–405|9F04|Treehouse of Horror III|Marcia Wallace|Edna Krabappel +4|065–406|9F03|Itchy & Scratchy: The Movie|Neil Armstrong (archival)|Himself +4|065–406|9F03|Itchy & Scratchy: The Movie|Marcia Wallace|Edna Krabappel +4|066–407|9F05|Marge Gets a Job|Phil Hartman|Troy McClure; Lionel Hutz +4|066–407|9F05|Marge Gets a Job|Tom Jones|Himself +4|066–407|9F05|Marge Gets a Job|Marcia Wallace|Edna Krabappel +4|067–408|9F06|New Kid on the Block|Sara Gilbert|Laura Powers +4|067–408|9F06|New Kid on the Block|Phil Hartman|Lionel Hutz +4|067–408|9F06|New Kid on the Block|Pamela Reed|Ruth Powers +4|068–409|9F07|Mr. Plow|Phil Hartman|Troy McClure +4|068–409|9F07|Mr. Plow|Linda Ronstadt|Herself +4|068–409|9F07|Mr. Plow|Adam West|Himself +4|069–410|9F08|Lisa's First Word|Elizabeth Taylor|Maggie Simpson +4|071–412|9F10|Marge vs. the Monorail|Phil Hartman|Lyle Lanley +4|071–412|9F10|Marge vs. the Monorail|Leonard Nimoy|Himself +4|072–413|9F11|Selma's Choice|Phil Hartman|Lionel Hutz; Troy McClure; Security Guard; Mandy Patinkin +4|072–413|9F11|Selma's Choice|Michele Pillar|Sings "A Natural Woman" +4|073–414|9F12|Brother from the Same Planet|Phil Hartman|Tom; Eddie Muntz +4|073–414|9F12|Brother from the Same Planet|Marcia Wallace|Edna Krabappel +4|074–415|9F13|I Love Lisa|Michael Carrington|Krusty Anniversary Show Announcer; Sideshow Raheem +4|075–416|9F14|Duffless|Phil Hartman|Troy McClure; Lionel Hutz +4|075–416|9F14|Duffless|Kipp Lennon|Sings "Raindrops Keep Fallin' on My Head" +4|075–416|9F14|Duffless|Marcia Wallace|Edna Krabappel +4|076–417|9F15|Last Exit to Springfield|Joyce Brothers|Herself +4|078–419|9F16|The Front|Brooke Shields|Herself +4|079–420|9F18|Whacking Day|Gene Merlino|Sings "Born Free" +4|079–420|9F18|Whacking Day|Barry White|Himself +4|080–421|9F20|Marge in Chains|David Crosby|Himself +4|080–421|9F20|Marge in Chains|Phil Hartman|Troy McClure; Lionel Hutz +4|081–422|9F19|Krusty Gets Kancelled|Johnny Carson|Himself +4|081–422|9F19|Krusty Gets Kancelled|Hugh Hefner|Himself +4|081–422|9F19|Krusty Gets Kancelled|Bette Midler|Herself +4|081–422|9F19|Krusty Gets Kancelled|Luke Perry|Himself +4|081–422|9F19|Krusty Gets Kancelled|Red Hot Chili Peppers|Themselves +4|081–422|9F19|Krusty Gets Kancelled|Elizabeth Taylor|Herself +4|081–422|9F19|Krusty Gets Kancelled|Marcia Wallace|Edna Krabappel +4|081–422|9F19|Krusty Gets Kancelled|Barry White|Himself +5|082–501|9F21|Homer's Barbershop Quartet|David Crosby|Himself +5|082–501|9F21|Homer's Barbershop Quartet|The Dapper Dans|The singing voices of the Be Sharps +5|082–501|9F21|Homer's Barbershop Quartet|George Harrison|Himself +5|083–502|9F22|Cape Feare|Kelsey Grammer|Sideshow Bob +5|083–502|9F22|Cape Feare|Marcia Wallace|Edna Krabappel +5|085–504|1F01|Rosebud|Ramones|Themselves +5|086–505|1F04|Treehouse of Horror IV|Phil Hartman|Lionel Hutz +5|086–505|1F04|Treehouse of Horror IV|Frank Welker|Gremlin +5|086–505|1F03|Marge on the Lam|George Fenneman|Narrator +5|086–505|1F03|Marge on the Lam|Phil Hartman|Troy McClure; Lionel Hutz +5|086–505|1F03|Marge on the Lam|Pamela Reed|Ruth Powers; Truck-stop Waitress +5|088–507|1F05|Bart's Inner Child|Albert Brooks|Brad Goodman +5|088–507|1F05|Bart's Inner Child|James Brown|Himself +5|088–507|1F05|Bart's Inner Child|Phil Hartman|Troy McClure +5|088–507|1F05|Bart's Inner Child|Marcia Wallace|Edna Krabappel +5|089–508|1F06|Boy-Scoutz 'n the Hood|Ernest Borgnine|Himself +5|089–508|1F06|Boy-Scoutz 'n the Hood|Marcia Wallace|Edna Krabappel +5|091–510|1F08|The Last Temptation of Homer|Phil Hartman|Lionel Hutz +5|091–510|1F08|The Last Temptation of Homer|Werner Klemperer|Colonel Klink +5|091–510|1F08|The Last Temptation of Homer|Michelle Pfeiffer|Mindy Simmons +5|091–510|1F08|The Last Temptation of Homer|Marcia Wallace|Edna Krabappel +5|091–510|1F08|$pringfield (Or, How I Learned to; Stop Worrying and Love Legalized Gambling)|Gerry Cooney|Himself +5|091–510|1F08|$pringfield (Or, How I Learned to; Stop Worrying and Love Legalized Gambling)|Robert Goulet|Himself +5|092–511|1F09|Homer the Vigilante|Sam Neill|Molloy +5|093–512|1F11|Bart Gets Famous|Conan O'Brien|Himself +5|093–512|1F11|Bart Gets Famous|Marcia Wallace|Edna Krabappel +5|094–513|1F10|Homer and Apu|Michael Carrington|Black comedian +5|094–513|1F10|Homer and Apu|James Woods|Himself +5|095–514|1F12|Lisa vs. Malibu Stacy|Kathleen Turner|Stacy Lovell +5|096–515|1F13|Deep Space Homer|Buzz Aldrin|Himself +5|096–515|1F13|Deep Space Homer|James Taylor|Himself +5|099–518|1F16|Burns' Heir|Phil Hartman|Lionel Hutz +5|100–519|1F18|Sweet Seymour Skinner's Baadasssss Song|Marcia Wallace|Edna Krabappel +5|100–519|1F18|Sweet Seymour Skinner's Baadasssss Song|Frank Welker|Santa's Little Helper +5|101–520|1F19|The Boy Who Knew Too Much|Phil Hartman|Lionel Hutz +5|101–520|1F19|The Boy Who Knew Too Much|Marcia Wallace|Edna Krabappel +5|102–521|1F21|Lady Bouvier's Lover|Phil Hartman|Troy McClure +5|102–521|1F21|Lady Bouvier's Lover|Kipp Lennon|Sings "The Sound of Grampa" (parody of the song The Sound of Silence) +5|103–522|1F20|Secrets of a Successful Marriage|Phil Hartman|Lionel Hutz +5|103–522|1F20|Secrets of a Successful Marriage|Marcia Wallace|Edna Krabappel +6|105–602|1F17|Lisa's Rival|Winona Ryder|Allison Taylor +6|108–605|2F02|Sideshow Bob Roberts|Henry Corden|Fred Flintstone +6|108–605|2F02|Sideshow Bob Roberts|Dr. Demento|Himself +6|108–605|2F02|Sideshow Bob Roberts|Kelsey Grammer|Sideshow Bob +6|108–605|2F02|Sideshow Bob Roberts|Phil Hartman|Lionel Hutz +6|108–605|2F02|Sideshow Bob Roberts|Larry King|Himself +6|108–605|2F02|Sideshow Bob Roberts|Marcia Wallace|Edna Krabappel +6|109–606|2F03|Treehouse of Horror V|James Earl Jones|Alternate Universe Maggie Simpson +6|109–606|2F03|Treehouse of Horror V|Marcia Wallace|Edna Krabappel +6|110–607|2F04|Bart's Girlfriend|Meryl Streep|Jessica Lovejoy +6|111–608|2F05|Lisa on Ice|Marcia Wallace|Edna Krabappel +6|112–609|2F06|Homer Badman|Dennis Franz|Himself playing Homer Simpson +6|113–610|2F07|Grampa vs. Sexual Inadequacy|Phil Hartman|Troy McClure +6|114–611|2F08|Fear of Flying|Anne Bancroft|Dr. Zweig +6|114–611|2F08|Fear of Flying|Ted Danson|Sam Malone +6|114–611|2F08|Fear of Flying|Woody Harrelson|Woody Boyd +6|114–611|2F08|Fear of Flying|John Ratzenberger|Cliff Clavin +6|114–611|2F08|Fear of Flying|Rhea Perlman|Carla Tortelli +6|114–611|2F08|Fear of Flying|George Wendt|Norm Peterson +6|115–612|2F09|Homer the Great|Patrick Stewart|Number One +6|118–615|2F12|Homie the Clown|Dick Cavett|Himself +6|118–615|2F12|Homie the Clown|Joe Mantegna|Fat Tony +6|118–615|2F12|Homie the Clown|Johnny Unitas|Himself +6|119–616|2F13|Bart vs. Australia|Phil Hartman|Evan Conover +6|120–617|2F14|Homer vs. Patty and Selma|Mel Brooks|Himself +6|120–617|2F14|Homer vs. Patty and Selma|Susan Sarandon|Ballet teacher +6|121–618|2F31|A Star Is Burns|Phil Hartman|Charlton Heston +6|121–618|2F31|A Star Is Burns|Maurice LaMarche|George C. Scott; Hannibal Lecter; Captain James T. Kirk; Eudora Welty +6|121–618|2F31|A Star Is Burns|Jon Lovitz|Jay Sherman +6|122–619|2F15|Lisa's Wedding|Phil Hartman|Troy McClure +6|122–619|2F15|Lisa's Wedding|Mandy Patinkin|Hugh Parkfield +6|123–620|2F18|Two Dozen and One Greyhounds|Frank Welker|Santa's Little Helper; Various dogs +6|124–621|2F19|The PTA Disbands|Marcia Wallace|Edna Krabappel +6|125–622|2F32|'Round Springfield|Steve Allen|Himself +6|125–622|2F32|'Round Springfield|Phil Hartman|Lionel Hutz +6|125–622|2F32|'Round Springfield|Ron Taylor|Bleeding Gums Murphy +6|125–622|2F32|'Round Springfield|Marcia Wallace|Edna Krabappel +6|126–623|2F21|The Springfield Connection|Phil Hartman|Lionel Hutz +6|126–623|2F21|The Springfield Connection|Marcia Wallace|Edna Krabappel +6|127–624|2F22|Lemon of Troy|Marcia Wallace|Edna Krabappel +6|128–625|2F16|Who Shot Mr. Burns? Part 1|Tito Puente|Himself +7|129–701|2F20|Who Shot Mr. Burns? Part 2|Tito Puente|Himself +7|130–702|2F17|Radioactive Man|Phil Hartman|Lionel Hutz +7|130–702|2F17|Radioactive Man|Mickey Rooney|Himself +7|131–703|3F01|Home Sweet Homediddly-Dum-Doodily|Joan Kenley|Telephone voice +7|131–703|3F01|Home Sweet Homediddly-Dum-Doodily|Marcia Wallace|Edna Krabappel +7|131–703|3F01|Home Sweet Homediddly-Dum-Doodily|Frank Welker|Monkey +7|133–705|3F03|Lisa the Vegetarian|Phil Hartman|Troy McClure +7|133–705|3F03|Lisa the Vegetarian|Linda McCartney|Herself +7|133–705|3F03|Lisa the Vegetarian|Paul McCartney|Himself +7|134–706|3F04|Treehouse of Horror VI|Paul Anka|Himself +7|134–706|3F04|Treehouse of Horror VI|Marcia Wallace|Edna Krabappel +7|135–707|3F05|King-Size Homer|Joan Kenley|Telephone voice +7|136–708|3F06|Mother Simpson|Glenn Close|Mona Simpson +7|136–708|3F06|Mother Simpson|Harry Morgan|Bill Gannon +7|137–709|3F08|Sideshow Bob's Last Gleaming|R. Lee Ermey|Colonel Leslie Hapablap +7|137–709|3F08|Sideshow Bob's Last Gleaming|Kelsey Grammer|Sideshow Bob +7|138–710|3F31|The Simpsons 138th Episode Spectacular|Phil Hartman|Troy McClure; Lionel Hutz +7|139–711|3F07|Marge Be Not Proud|Phil Hartman|Troy McClure +7|139–711|3F07|Marge Be Not Proud|Lawrence Tierney|Don Brodka +7|140–712|3F10|Team Homer|Marcia Wallace|Edna Krabappel +7|142–714|3F11|Scenes from the Class Struggle in Springfield|Tom Kite|Himself +7|143–715|3F12|Bart the Fink|Phil Hartman|Troy McClure +7|143–715|3F12|Bart the Fink|Bob Newhart|Himself +7|144–716|3F13|Lisa the Iconoclast|Phil Hartman|Troy McClure +7|144–716|3F13|Lisa the Iconoclast|Donald Sutherland|Hollis Hurlbut +7|144–716|3F13|Lisa the Iconoclast|Marcia Wallace|Edna Krabappel +7|146–718|3F16|The Day the Violence Died|Kirk Douglas|Chester J. Lampwick +7|146–718|3F16|The Day the Violence Died|Phil Hartman|Lionel Hutz +7|146–718|3F16|The Day the Violence Died|Alex Rocco|Roger Meyers Jr. +7|146–718|3F16|The Day the Violence Died|Jack Sheldon|Amendment +7|146–718|3F16|The Day the Violence Died|Suzanne Somers|Herself +7|147–719|3F15|A Fish Called Selma|Jeff Goldblum|MacArthur Parker +7|147–719|3F15|A Fish Called Selma|Phil Hartman|Troy McClure; Fat Tony +7|148–720|3F17|Bart on the Road|Jim Lau|Hong Kong doctor +7|149–721|3F18|22 Short Films About Springfield|Phil Hartman|Lionel Hutz; Hospital chairman +7|150–722|3F19|Raging Abe Simpson and His Grumbling Grandson in "The Curse of the Flying Hellfish"|Marcia Wallace|Edna Krabappel +7|151–723|3F20|Much Apu About Nothing|Joe Mantegna|Fat Tony +7|152–724|3F21|Homerpalooza|Cypress Hill|Themselves +7|152–724|3F21|Homerpalooza|Peter Frampton|Himself +7|152–724|3F21|Homerpalooza|The Smashing Pumpkins|Themselves +7|152–724|3F21|Homerpalooza|Sonic Youth|Themselves +7|153–725|3F22|Summer of 4 Ft. 2|Christina Ricci|Erin +7|153–725|3F22|Summer of 4 Ft. 2|Marcia Wallace|Edna Krabappel +8|154–801|4F02|Treehouse of Horror VII|Phil Hartman|Bill Clinton +8|155–802|3F23|You Only Move Twice|Albert Brooks|Hank Scorpio +8|155–802|3F23|You Only Move Twice|Sally Stevens|Sings "Scorpio" +8|156–803|4F03|The Homer They Fall|Michael Buffer|Himself +8|156–803|4F03|The Homer They Fall|Sally Stevens|Sings "People" +8|156–803|4F03|The Homer They Fall|Paul Winfield|Lucius Sweet +8|157–804|4F05|Burns, Baby Burns|Rodney Dangerfield|Larry Burns +8|161–808|4F07|Hurricane Neddy|Jon Lovitz|Jay Sherman +8|162–809|3F24|El Viaje Misterioso de Nuestro Jomer; (The Mysterious Voyage of Homer)|Johnny Cash|Coyote +8|163–810|3G01|The Springfield Files|Gillian Anderson|Dana Scully +8|163–810|3G01|The Springfield Files|David Duchovny|Fox Mulder +8|163–810|3G01|The Springfield Files|Leonard Nimoy|Himself +8|164–811|4F08|The Twisted World of Marge Simpson|Joe Mantegna|Fat Tony +8|164–811|4F08|The Twisted World of Marge Simpson|Jack Lemmon|Frank Ormand +8|164–811|4F08|The Twisted World of Marge Simpson|Marcia Wallace|Edna Krabappel +8|167–814|4F12|The Itchy & Scratchy & Poochie Show|Phil Hartman|Troy McClure +8|167–814|4F12|The Itchy & Scratchy & Poochie Show|Alex Rocco|Roger Meyers Jr. +8|168–815|4F11|Homer's Phobia|John Waters|John +8|169–816|4F14|Brother from Another Series|Kelsey Grammer|Sideshow Bob +8|169–816|4F14|Brother from Another Series|David Hyde Pierce|Cecil Terwilliger; Man in the crowd +8|169–816|4F14|Brother from Another Series|Marcia Wallace|Edna Krabappel +8|171–818|4F15|Homer vs. the Eighteenth Amendment|Joe Mantegna|Fat Tony +8|171–818|4F15|Homer vs. the Eighteenth Amendment|Dave Thomas|Rex Banner +8|172–819|4F09|Grade School Confidential|Michael Dees|Sings "Embraceable You" +8|172–819|4F09|Grade School Confidential|Marcia Wallace|Edna Krabappel +8|173–820|4F16|The Canine Mutiny|Frank Welker|Santa's Little Helper; Laddie +8|174–821|4F17|The Old Man and the Lisa|Bret Hart|Himself +8|175–822|4F18|In Marge We Trust|Denice Kumagai|Dancer +8|175–822|4F18|In Marge We Trust|Karen Maruyama|Dancer +8|175–822|4F18|In Marge We Trust|Sab Shimono|Mr. Sparkle +8|175–822|4F18|In Marge We Trust|Gedde Watanabe|Factory foreman +8|175–822|4F18|In Marge We Trust|Frank Welker|Baboons +8|176–823|4F19|Homer's Enemy|Frank Welker|Executive Vice President dog +8|177–824|4F20|The Simpsons Spin-Off Showcase|Tim Conway|Himself +8|177–824|4F20|The Simpsons Spin-Off Showcase|Phil Hartman|Troy McClure +8|177–824|4F20|The Simpsons Spin-Off Showcase|Gailard Sartain|Charles "Big" Daddy +8|176–825|4F19|The Secret War of Lisa Simpson|Willem Dafoe|The Commandant +8|176–825|4F19|The Secret War of Lisa Simpson|Marcia Wallace|Edna Krabappel +9|179–901|4F22|The City of New York vs. Homer Simpson|Michael Dees|Sings "New York, New York" +9|179–901|4F22|The City of New York vs. Homer Simpson|Joan Kenley|Telephone voice +9|179–901|4F22|The City of New York vs. Homer Simpson|Kipp Lennon|Sings "I'm Checking In" +9|180–902|4F23|The Principal and the Pauper|Martin Sheen|The real Seymour Skinner +9|180–902|4F23|The Principal and the Pauper|Marcia Wallace|Edna Krabappel +9|181–903|3G02|Lisa's Sax|Fyvush Finkel|Himself playing Krusty +9|182–904|5F02|Treehouse of Horror VIII|Marcia Wallace|Edna Krabappel +9|184–906|5F03|Bart Star|Roy Firestone|Himself +9|184–906|5F03|Bart Star|Mike Judge|Hank Hill +9|184–906|5F03|Bart Star|Joe Namath|Himself +9|185–907|5F04|The Two Mrs. Nahasapeemapetilons|Jan Hooks|Manjula Nahasapeemapetilon +9|185–907|5F04|The Two Mrs. Nahasapeemapetilons|Andrea Martin|Apu's mother +9|185–907|5F04|The Two Mrs. Nahasapeemapetilons|Marcia Wallace|Edna Krabappel +9|186–908|5F05|Lisa the Skeptic|Stephen Jay Gould|Himself +9|186–908|5F05|Lisa the Skeptic|Phil Hartman|Lionel Hutz +9|186–908|5F05|Lisa the Skeptic|Marcia Wallace|Edna Krabappel +9|187–909|5F06|Realty Bites|Phil Hartman|Lionel Hutz +9|188–910|5F07|Miracle on Evergreen Terrace|Alex Trebek|Himself +9|188–910|5F07|Miracle on Evergreen Terrace|Marcia Wallace|Edna Krabappel +9|190–912|5F08|Bart Carny|Jim Varney|Cooder +9|191–913|5F23|The Joy of Sect|Marcia Wallace|Edna Krabappel +9|192–914|5F11|Das Bus|Phil Hartman|Troy McClure +9|192–914|5F11|Das Bus|James Earl Jones|Narrator +9|192–914|5F11|Das Bus|Jack Ong|Fisherman +9|193–915|5F10|The Last Temptation of Krust|Bruce Baum|Himself +9|193–915|5F10|The Last Temptation of Krust|Janeane Garofalo|Herself +9|193–915|5F10|The Last Temptation of Krust|Bobcat Goldthwait|Himself +9|193–915|5F10|The Last Temptation of Krust|Jay Leno|Himself +9|193–915|5F10|The Last Temptation of Krust|Marcia Wallace|Edna Krabappel +9|193–915|5F10|The Last Temptation of Krust|Hank Williams, Jr.|Sings "Canyonero!" +9|193–915|5F10|The Last Temptation of Krust|Steven Wright|Himself +9|194–916|5F12|Dumbbell Indemnity|Helen Hunt|Renee +9|194–916|5F12|Dumbbell Indemnity|Marcia Wallace|Edna Krabappel +9|195–917|4F24|Lisa the Simpson|Phil Hartman|Troy McClure +9|196–918|5F13|This Little Wiggy|Phil Hartman|Troy McClure +9|196–918|5F13|This Little Wiggy|Marcia Wallace|Edna Krabappel +9|197–919|3G04|Simpson Tide|Michael Carrington|Drill instructor +9|197–919|3G04|Simpson Tide|Bob Denver|Himself +9|197–919|3G04|Simpson Tide|Rod Steiger|Captain Tenille +9|198–920|5F14|The Trouble with Trillions|Marcia Wallace|Edna Krabappel +9|198–920|5F14|The Trouble with Trillions|Paul Winfield|Lucius Sweet +9|199–921|5F15|Girly Edition|Marcia Wallace|Edna Krabappel +9|200–922|5F09|Trash of the Titans|Steve Martin|Ray Patterson +9|200–922|5F09|Trash of the Titans|Paul McGuinness|Himself +9|200–922|5F09|Trash of the Titans|Susie Smith|Herself +9|200–922|5F09|Trash of the Titans|U2|Themselves +9|200–922|5F09|Trash of the Titans|Marcia Wallace|Edna Krabappel +9|201–923|5F16|King of the Hill|Brendan Fraser|Brad +9|201–923|5F16|King of the Hill|Steven Weber|Neil +9|202–924|5F17|Lost Our Lisa|Marcia Wallace|Edna Krabappel +9|203–925|5F18|Natural Born Kissers|Marcia Wallace|Edna Krabappel +10|204–1001|5F20|Lard of the Dance|Lisa Kudrow|Alex Whitney +10|205–1002|5F21|The Wizard of Evergreen Terrace|William Daniels|KITT +10|206–1003|5F22|Bart the Mother|Phil Hartman|Troy McClure +10|206–1003|5F22|Bart the Mother|Marcia Wallace|Edna Krabappel +10|207–1004|AABF01|Treehouse of Horror IX|Robert Englund|Freddy Krueger +10|207–1004|AABF01|Treehouse of Horror IX|Kathie Lee Gifford|Herself (live action) +10|207–1004|AABF01|Treehouse of Horror IX|Ed McMahon|Himself +10|207–1004|AABF01|Treehouse of Horror IX|Regis Philbin|Himself (live action) +10|207–1004|AABF01|Treehouse of Horror IX|Jerry Springer|Himself +10|208–1005|5F19|When You Dish Upon a Star|Alec Baldwin|Himself +10|208–1005|5F19|When You Dish Upon a Star|Kim Basinger|Herself +10|208–1005|5F19|When You Dish Upon a Star|Brian Grazer|Himself +10|208–1005|5F19|When You Dish Upon a Star|Ron Howard|Himself +10|209–1006|AABF02|D'oh-in in the Wind|George Carlin|Munchie +10|209–1006|AABF02|D'oh-in in the Wind|Martin Mull|Seth +10|209–1006|AABF02|D'oh-in in the Wind|Yo La Tengo|Play the end credits +10|210–1007|AABF03|Lisa Gets an "A"|Marcia Wallace|Edna Krabappel +10|212–1009|AABF05|Mayored to the Mob|Mark Hamill|Himself; Leavelle; Theater owner +10|212–1009|AABF05|Mayored to the Mob|Joe Mantegna|Fat Tony +10|212–1009|AABF05|Mayored to the Mob|Dick Tufeld|Robot B-9 +10|212–1009|AABF05|Mayored to the Mob|Marcia Wallace|Edna Krabappel +10|213–1010|AABF06|Viva Ned Flanders|The Moody Blues|Themselves +10|213–1010|AABF06|Viva Ned Flanders|Marcia Wallace|Edna Krabappel +10|214–1011|AABF07|Wild Barts Can't Be Broken|Cyndi Lauper|Herself +10|214–1011|AABF07|Wild Barts Can't Be Broken|Franklin D. Roosevelt (archival)|Himself +10|214–1011|AABF07|Wild Barts Can't Be Broken|Marcia Wallace|Edna Krabappel +10|215–1012|AABF08|Sunday, Cruddy Sunday|Troy Aikman|Himself +10|215–1012|AABF08|Sunday, Cruddy Sunday|Rosey Grier|Himself +10|215–1012|AABF08|Sunday, Cruddy Sunday|John Madden|Himself +10|215–1012|AABF08|Sunday, Cruddy Sunday|Dan Marino|Himself +10|215–1012|AABF08|Sunday, Cruddy Sunday|Rupert Murdoch|Himself +10|215–1012|AABF08|Sunday, Cruddy Sunday|Dolly Parton|Herself +10|215–1012|AABF08|Sunday, Cruddy Sunday|Pat Summerall|Himself +10|215–1012|AABF08|Sunday, Cruddy Sunday|Fred Willard|Wally Kogen +10|216–1013|AABF09|Homer to the Max|Ed Begley, Jr.|Himself +10|217–1014|AABF11|I'm with Cupid|Jan Hooks|Manjula Nahasapeemapetilon +10|217–1014|AABF11|I'm with Cupid|Elton John|Himself +10|217–1014|AABF11|I'm with Cupid|Marcia Wallace|Edna Krabappel +10|218–1015|AABF10|Marge Simpson in: "Screaming Yellow Honkers"|John Kassir|Possum +10|218–1015|AABF10|Marge Simpson in: "Screaming Yellow Honkers"|Marcia Wallace|Edna Krabappel +10|218–1015|AABF10|Marge Simpson in: "Screaming Yellow Honkers"|Hank Williams, Jr.|Sings "Canyonero!" +10|221–1018|AABF14|Simpsons Bible Stories|Marcia Wallace|Mrs. Krabapatra +10|222–1019|AABF15|Mom and Pop Art|Michael Dees|Sings "Arrivederci Roma" +10|222–1019|AABF15|Mom and Pop Art|Jasper Johns|Himself +10|222–1019|AABF15|Mom and Pop Art|Isabella Rossellini|Astrid Weller +10|222–1019|AABF15|Mom and Pop Art|Marcia Wallace|Edna Krabappel +10|223–1020|AABF16|The Old Man and the "C" Student|Jack LaLanne|Himself +10|223–1020|AABF16|The Old Man and the "C" Student|NRBQ|Sing "Can't Buy Me Love" +10|224–1021|AABF17|Monty Can't Buy Me Love|Michael McKean|Jerry Rude +10|225–1022|AABF18|They Saved Lisa's Brain|Stephen Hawking|Himself +10|226–1023|AABF20|Thirty Minutes over Tokyo|Denice Kumagai|Mother +10|226–1023|AABF20|Thirty Minutes over Tokyo|Karen Maruyama|Stewardess +10|226–1023|AABF20|Thirty Minutes over Tokyo|George Takei|Wink +10|226–1023|AABF20|Thirty Minutes over Tokyo|Gedde Watanabe|Waiter; Father; Toilet +10|226–1023|AABF20|Thirty Minutes over Tokyo|Keone Young|Sumo wrestler +11|227–1101|AABF23|Beyond Blunderdome|Jack Burns|Edward Christian +11|227–1101|AABF23|Beyond Blunderdome|Mel Gibson|Himself +11|227–1101|AABF23|Beyond Blunderdome|Marcia Wallace|Edna Krabappel +11|228–1102|AABF22|Brother's Little Helper|Mark McGwire|Himself +11|228–1102|AABF22|Brother's Little Helper|Marcia Wallace|Edna Krabappel +11|229–1103|AABF21|Guess Who's Coming to Criticize Dinner?|Edward Asner|Newspaper editor +11|229–1103|AABF21|Guess Who's Coming to Criticize Dinner?|Marcia Wallace|Edna Krabappel +11|230–1104|BABF01|Treehouse of Horror X|Tom Arnold|Himself +11|230–1104|BABF01|Treehouse of Horror X|Dick Clark|Himself +11|230–1104|BABF01|Treehouse of Horror X|Lucy Lawless|Herself +11|230–1104|BABF01|Treehouse of Horror X|Frank Welker|Werewolf; Werewolf Flanders +11|231–1105|AABF19|E-I-E-I-(Annoyed Grunt)|The B-52's|Sing "Glove Slap" (parody of the song Love Shack) +11|231–1105|AABF19|E-I-E-I-(Annoyed Grunt)|Marcia Wallace|Edna Krabappel +11|231–1105|AABF19|E-I-E-I-(Annoyed Grunt)|Frank Welker|Various animals +11|232–1106|BABF02|Hello Gutter, Hello Fadder|Ron Howard|Himself +11|232–1106|BABF02|Hello Gutter, Hello Fadder|Penn Jillette|Himself +11|232–1106|BABF02|Hello Gutter, Hello Fadder|Pat O'Brien|Himself +11|232–1106|BABF02|Hello Gutter, Hello Fadder|Nancy O'Dell|Herself +11|232–1106|BABF02|Hello Gutter, Hello Fadder|Teller|Himself +11|232–1106|BABF02|Hello Gutter, Hello Fadder|Marcia Wallace|Edna Krabappel +11|233–1107|BABF03|Eight Misbehavin'|Jan Hooks|Manjula Nahasapeemapetilon +11|233–1107|BABF03|Eight Misbehavin'|Garry Marshall|Larry Kidkill +11|233–1107|BABF03|Eight Misbehavin'|Butch Patrick|Himself +11|233–1107|BABF03|Eight Misbehavin'|Marcia Wallace|Edna Krabappel +11|233–1107|BABF03|Eight Misbehavin'|Frank Welker|Zoo animals +11|234–1108|BABF05|Take My Wife, Sleaze|John Goodman|Meathook +11|234–1108|BABF05|Take My Wife, Sleaze|Jan Hooks|Manjula Nahasapeemapetilon +11|234–1108|BABF05|Take My Wife, Sleaze|Jay North|Himself +11|234–1108|BABF05|Take My Wife, Sleaze|NRBQ|Perform various songs +11|234–1108|BABF05|Take My Wife, Sleaze|Marcia Wallace|Edna Krabappel +11|234–1108|BABF05|Take My Wife, Sleaze|Henry Winkler|Ramrod +11|235–1109|BABF07|Grift of the Magi|Clarence Clemons|Narrator +11|235–1109|BABF07|Grift of the Magi|Gary Coleman|Himself +11|235–1109|BABF07|Grift of the Magi|Joe Mantegna|Fat Tony +11|235–1109|BABF07|Grift of the Magi|Tim Robbins|Jim Hope +11|235–1109|BABF07|Grift of the Magi|Marcia Wallace|Edna Krabappel +11|236–1110|BABF04|Little Big Mom|Elwood Edwards|Virtual doctor +11|236–1110|BABF04|Little Big Mom|Marcia Wallace|Edna Krabappel +11|237–1111|BABF06|Faith Off|Don Cheadle|Brother Faith +11|237–1111|BABF06|Faith Off|Joe Mantegna|Fat Tony +11|237–1111|BABF06|Faith Off|Marcia Wallace|Edna Krabappel +11|238–1112|BABF08|The Mansion Family|Britney Spears|Herself +11|239–1113|BABF09|Saddlesore Galactica|Randy Bachman|Himself +11|239–1113|BABF09|Saddlesore Galactica|Jim Cummings|Duncan; Furious D +11|239–1113|BABF09|Saddlesore Galactica|Trevor Denman|Track announcer +11|239–1113|BABF09|Saddlesore Galactica|Fred Turner|Himself +11|240–1114|BABF10|Alone Again, Natura-Diddily|Shawn Colvin|Rachel Jordan +11|240–1114|BABF10|Alone Again, Natura-Diddily|Marcia Wallace|Edna Krabappel +11|240–1114|BABF10|Alone Again, Natura-Diddily|Frank Welker|Parrot +11|241–1115|BABF11|Missionary: Impossible|Betty White|Herself +11|242–1116|BABF12|Pygmoelian|Marcia Wallace|Edna Krabappel +11|244–1118|BABF14|Days of Wine and D'oh'ses|Marcia Wallace|Edna Krabappel +11|245–1119|BABF16|Kill the Alligator and Run|Diedrich Bader|Sheriff +11|245–1119|BABF16|Kill the Alligator and Run|Joe C.|Himself +11|245–1119|BABF16|Kill the Alligator and Run|Robert Evans|Himself +11|245–1119|BABF16|Kill the Alligator and Run|Kid Rock|Himself +11|245–1119|BABF16|Kill the Alligator and Run|Charlie Rose|Himself +11|246–1120|BABF15|Last Tap Dance in Springfield|Frank Welker|Lion +11|247–1121|BABF18|It's a Mad, Mad, Mad, Mad Marge|Parker Posey|Becky +11|247–1121|BABF18|It's a Mad, Mad, Mad, Mad Marge|Marcia Wallace|Edna Krabappel +11|247–1121|BABF18|It's a Mad, Mad, Mad, Mad Marge|Marc Wilmore|Dr. Wilmore +11|248–1122|BABF19|Behind the Laughter|Jim Forbes|Narrator +11|248–1122|BABF19|Behind the Laughter|Willie Nelson|Himself +11|248–1122|BABF19|Behind the Laughter|Marcia Wallace|Edna Krabappel +12|249–1201|BABF21|Treehouse of Horror XI|Frank Welker|Dolphins +12|250–1202|BABF20|A Tale of Two Springfields|Gary Coleman|Himself +12|250–1202|BABF20|A Tale of Two Springfields|Marcia Wallace|Edna Krabappel +12|250–1202|BABF20|A Tale of Two Springfields|Frank Welker|Badger; Woodpecker +12|250–1202|BABF20|A Tale of Two Springfields|The Who|Themselves +12|251–1203|BABF17|Insane Clown Poppy|Drew Barrymore|Sophie +12|251–1203|BABF17|Insane Clown Poppy|Stephen King|Himself +12|251–1203|BABF17|Insane Clown Poppy|Joe Mantegna|Fat Tony +12|251–1203|BABF17|Insane Clown Poppy|Jay Mohr|Christopher Walken +12|251–1203|BABF17|Insane Clown Poppy|Amy Tan|Herself +12|251–1203|BABF17|Insane Clown Poppy|John Updike|Himself +12|252–1204|CABF01|Lisa the Tree Hugger|Joshua Jackson|Jesse Grass +12|253–1205|CABF04|Homer vs. Dignity|Leeza Gibbons|Herself +12|253–1205|CABF04|Homer vs. Dignity|Marcia Wallace|Edna Krabappel +12|254–1206|CABF02|The Computer Wore Menace Shoes|Patrick McGoohan|Number Six +12|254–1206|CABF02|The Computer Wore Menace Shoes|Marcia Wallace|Edna Krabappel +12|255–1207|CABF03|The Great Money Caper|Edward Norton|Devon Bradley +12|256–1208|CABF06|Skinner's Sense of Snow|Marcia Wallace|Edna Krabappel +12|258–1210|CABF05|Pokey Mom|Michael Keaton|Jack Crowley +12|258–1210|CABF05|Pokey Mom|Charles Napier|Warden +12|258–1210|CABF05|Pokey Mom|Robert Schimmel|Convict +12|258–1210|CABF05|Pokey Mom|Bruce Vilanch|Himself +12|258–1210|CABF05|Pokey Mom|Marcia Wallace|Edna Krabappel +12|259–1211|CABF08|Worst Episode Ever|Neil Armstrong (archival)|Himself +12|259–1211|CABF08|Worst Episode Ever|Johnnie Cochran (archival)|Himself +12|259–1211|CABF08|Worst Episode Ever|Richard Nixon (archival)|Himself +12|259–1211|CABF08|Worst Episode Ever|Tom Savini|Himself +12|260–1212|CABF07|Tennis the Menace|Andre Agassi|Himself +12|260–1212|CABF07|Tennis the Menace|Pete Sampras|Himself +12|260–1212|CABF07|Tennis the Menace|Marcia Wallace|Edna Krabappel +12|260–1212|CABF07|Tennis the Menace|Serena Williams|Herself +12|260–1212|CABF07|Tennis the Menace|Venus Williams|Herself +12|261–1213|CABF10|Day of the Jackanapes|Gary Coleman|Himself +12|261–1213|CABF10|Day of the Jackanapes|Kelsey Grammer|Sideshow Bob +12|262–1214|CABF12|New Kids on the Blecch|'N Sync|Themselves +12|263–1215|CABF09|Hungry, Hungry Homer|Stacy Keach|Howard K. Duff VIII +12|263–1215|CABF09|Hungry, Hungry Homer|Marcia Wallace|Edna Krabappel +12|264–1216|CABF11|Bye Bye Nerdie|Kathy Griffin|Francine +12|264–1216|CABF11|Bye Bye Nerdie|Jan Hooks|Manjula Nahasapeemapetilon +12|264–1216|CABF11|Bye Bye Nerdie|Marcia Wallace|Edna Krabappel +12|265–1217|CABF13|Simpson Safari|Frank Welker|Various animals +12|266–1218|CABF14|Trilogy of Error|Joe Mantegna|Fat Tony +12|266–1218|CABF14|Trilogy of Error|Frankie Muniz|Thelonius +12|267–1219|CABF15|I'm Goin' to Praiseland|Shawn Colvin|Rachel Jordan +12|267–1219|CABF15|I'm Goin' to Praiseland|Marcia Wallace|Edna Krabappel +12|269–1221|CABF17|Simpsons Tall Tales|Marcia Wallace|Edna Krabappel +12|269–1221|CABF17|Simpsons Tall Tales|Frank Welker|Babe, the Blue Ox; Various animals +13|270–1301|CABF19|Treehouse of Horror XII|Pierce Brosnan|Ultrahouse's Pierce Brosnan voice; Himself +13|270–1301|CABF19|Treehouse of Horror XII|Matthew Perry|Ultrahouse's Matthew Perry voice +13|270–1301|CABF19|Treehouse of Horror XII|Marcia Wallace|Edna Krabappel +13|271–1302|CABF22|The Parent Rap|Jess Harnell|Various animals +13|271–1302|CABF22|The Parent Rap|Jane Kaczmarek|Judge Constance Harm +13|271–1302|CABF22|The Parent Rap|Marcia Wallace|Edna Krabappel +13|272–1303|CABF20|Homer the Moe|R.E.M.|Themselves +13|273–1304|CABF18|A Hunka Hunka Burns in Love|Julia Louis-Dreyfus|Gloria +13|273–1304|CABF18|A Hunka Hunka Burns in Love|George Takei|Waiter +13|274–1305|CABF21|The Blunder Years|Joe Mantegna|Fat Tony +13|274–1305|CABF21|The Blunder Years|Paul Newman|Himself +13|274–1305|CABF21|The Blunder Years|Judith Owen|Herself +13|275–1306|DABF02|She of Little Faith|Richard Gere|Himself +13|276–1307|DABF01|Brawl in the Family|Jane Kaczmarek|Judge Constance Harm +13|276–1307|DABF01|Brawl in the Family|Delroy Lindo|Gabriel +13|277–1308|DABF03|Sweets and Sour Marge|Ben Stiller|Garth Motherloving +13|278–1309|DABF05|Jaws Wired Shut|Dana Gould|Producer +13|278–1309|DABF05|Jaws Wired Shut|John Kassir|Various Animals +13|279–1310|DABF04|Half-Decent Proposal|Jon Lovitz|Artie Ziff +13|279–1310|DABF04|Half-Decent Proposal|Marcia Wallace|Edna Krabappel +13|280–1311|DABF06|The Bart Wants What It Wants|Wolfgang Puck|Himself +13|280–1311|DABF06|The Bart Wants What It Wants|Reese Witherspoon|Greta Wolfcastle +13|281–1312|DABF07|The Lastest Gun in the West|Dennis Weaver|Buck McCoy +13|281–1312|DABF07|The Lastest Gun in the West|Frank Welker|Dog +13|282–1313|DABF09|The Old Man and the Key|Olympia Dukakis|Zelda +13|282–1313|DABF09|The Old Man and the Key|Bill Saluga|Ray J. Johnson +13|283–1314|DABF08|Tales from the Public Domain|Sally Stevens|Sings "Island of Sirens" (parody of the song Copacabana) +13|285–1316|DABF11|Weekend at Burnsie's|Phish|Themselves +13|287–1318|DABF13|I Am Furious (Yellow)|Stan Lee|Himself +13|287–1318|DABF13|I Am Furious (Yellow)|Marcia Wallace|Edna Krabappel +13|288–1319|DABF14|The Sweetest Apu|James Lipton|Himself +13|289–1320|DABF15|Little Girl in the Big Ten|Robert Pinsky|Himself +13|290–1321|DABF16|The Frying Game|Carmen Electra|Herself +13|290–1321|DABF16|The Frying Game|Frances Sternhagen|Mrs. Bellamy +13|291–1322|DABF17|Poppa's Got a Brand New Badge|Joe Mantegna|Fat Tony +13|291–1322|DABF17|Poppa's Got a Brand New Badge|Marcia Wallace|Edna Krabappel +14|293–1402|DABF22|How I Spent My Strummer Vacation|Elvis Costello|Himself +14|293–1402|DABF22|How I Spent My Strummer Vacation|Mick Jagger|Himself +14|293–1402|DABF22|How I Spent My Strummer Vacation|Lenny Kravitz|Himself +14|293–1402|DABF22|How I Spent My Strummer Vacation|Tom Petty|Himself +14|293–1402|DABF22|How I Spent My Strummer Vacation|Keith Richards|Himself +14|293–1402|DABF22|How I Spent My Strummer Vacation|Brian Setzer|Himself +14|293–1402|DABF22|How I Spent My Strummer Vacation|Marcia Wallace|Edna Krabappel +14|294–1403|DABF20|Bart vs. Lisa vs. the Third Grade|Tony Bennett|Sings "Capital City" +14|294–1403|DABF20|Bart vs. Lisa vs. the Third Grade|Marcia Wallace|Edna Krabappel +14|295–1404|DABF18|Large Marge|Baha Men|Themselves +14|295–1404|DABF18|Large Marge|Jan Hooks|Manjula Nahasapeemapetilon +14|295–1404|DABF18|Large Marge|Marcia Wallace|Edna Krabappel +14|295–1404|DABF18|Large Marge|Burt Ward|Robin +14|295–1404|DABF18|Large Marge|Adam West|Batman +14|296–1405|DABF21|Helter Shelter|Larry Holmes|Himself +14|296–1405|DABF21|Helter Shelter|David Lander|Himself +14|297–1406|EABF01|The Great Louse Detective|Kelsey Grammer|Sideshow Bob +14|297–1406|EABF01|The Great Louse Detective|Sally Stevens|Sings "The Way We Were" +14|298–1407|EABF02|Special Edna|Little Richard|Himself +14|298–1407|EABF02|Special Edna|Marcia Wallace|Edna Krabappel +14|299–1408|EABF03|The Dad Who Knew Too Little|Elliott Gould|Himself +14|300–1409|EABF04|Strong Arms of the Ma|Pamela Reed|Ruth Powers +14|301–1410|EABF06|Pray Anything|Ken Burns|Himself +14|301–1410|EABF06|Pray Anything|Lisa Leslie|Herself +14|302–1411|EABF05|Barting Over|Blink-182|Themselves +14|302–1411|EABF05|Barting Over|Tony Hawk|Himself +14|302–1411|EABF05|Barting Over|Jane Kaczmarek|Judge Constance Harm +14|302–1411|EABF05|Barting Over|Marcia Wallace|Edna Krabappel +14|303–1412|EABF07|I'm Spelling as Fast as I Can|George Plimpton|Himself +14|304–1413|EABF08|A Star Is Born Again|James L. Brooks|Himself +14|304–1413|EABF08|A Star Is Born Again|Helen Fielding|Herself +14|304–1413|EABF08|A Star Is Born Again|Gene Merlino|Sings "Jellyfish" +14|304–1413|EABF08|A Star Is Born Again|Marisa Tomei|Sara Sloane +14|304–1413|EABF08|A Star Is Born Again|Marcia Wallace|Edna Krabappel +14|305–1414|EABF09|Mr. Spritz Goes to Washington|Joe Mantegna|Fat Tony +14|307–1416|EABF11|'Scuse Me While I Miss the Sky|Eric Idle|Declan Desmond +14|307–1416|EABF11|'Scuse Me While I Miss the Sky|Joe Mantegna|Fat Tony +14|307–1416|EABF11|'Scuse Me While I Miss the Sky|Marcia Wallace|Edna Krabappel +14|308–1417|EABF12|Three Gays of the Condo|Terry W. Greene|Large Gay Military Man +14|308–1417|EABF12|Three Gays of the Condo|Ben Schatz|Himself +14|308–1417|EABF12|Three Gays of the Condo|Scott Thompson|Grady +14|308–1417|EABF12|Three Gays of the Condo|"Weird Al" Yankovic|Himself +14|309–1418|EABF13|Dude, Where's My Ranch?|David Byrne|Himself +14|309–1418|EABF13|Dude, Where's My Ranch?|Andy Serkis|Cleanie +14|309–1418|EABF13|Dude, Where's My Ranch?|Jonathan Taylor Thomas|Luke Stetson +14|310–1419|EABF14|Old Yeller Belly|Stacy Keach|Howard K. Duff VII +14|310–1419|EABF14|Old Yeller Belly|John Kassir|Various animals +14|311–1420|EABF15|Brake My Wife, Please|Jackson Browne|Himself +14|311–1420|EABF15|Brake My Wife, Please|Steve Buscemi|Himself +14|311–1420|EABF15|Brake My Wife, Please|Jane Kaczmarek|Judge Constance Harm +14|311–1420|EABF15|Brake My Wife, Please|Marcia Wallace|Edna Krabappel +14|313–1422|EABF17|Moe Baby Blues|Joe Mantegna|Fat Tony +15|314–1501|EABF21|Treehouse of Horror XIV|Oscar De La Hoya|Himself +15|314–1501|EABF21|Treehouse of Horror XIV|Jennifer Garner|Herself +15|314–1501|EABF21|Treehouse of Horror XIV|Dudley R. Herschbach|Himself +15|314–1501|EABF21|Treehouse of Horror XIV|Jerry Lewis|Professor John Frink Sr. +15|315–1502|EABF18|My Mother the Carjacker|Glenn Close|Mona Simpson +15|316–1503|EABF20|The President Wore Pearls|Michael Moore|Himself +15|316–1503|EABF20|The President Wore Pearls|Marcia Wallace|Edna Krabappel +15|317–1504|EABF22|The Regina Monologues|Tony Blair|Himself +15|317–1504|EABF22|The Regina Monologues|Jane Leeves|Edwina +15|317–1504|EABF22|The Regina Monologues|Evan Marriott|Himself +15|317–1504|EABF22|The Regina Monologues|Ian McKellen|Himself +15|317–1504|EABF22|The Regina Monologues|J. K. Rowling|Herself +15|318–1505|EABF19|The Fat and the Furriest|Charles Napier|Grant Connor +15|319–1506|FABF01|Today I Am A Clown|Jackie Mason|Rabbi Hyman Krustofsky +15|319–1506|FABF01|Today I Am A Clown|Mr. T|Himself +15|320–1507|FABF02|'Tis the Fifteenth Season|Jim Gilstrap|Soulful Prune +15|321–1508|FABF03|Marge vs. Singles, Seniors, Childless Couples and Teens and Gays|Marcia Wallace|Edna Krabappel +15|323–1510|FABF05|Diatribe of a Mad Housewife|Tom Clancy|Himself +15|323–1510|FABF05|Diatribe of a Mad Housewife|Ashley Olsen|Herself +15|323–1510|FABF05|Diatribe of a Mad Housewife|Mary-Kate Olsen|Herself +15|323–1510|FABF05|Diatribe of a Mad Housewife|Thomas Pynchon|Himself +15|325–1512|FABF07|Milhouse Doesn't Live Here Anymore|Nick Bakay|Salem Saberhagen +15|325–1512|FABF07|Milhouse Doesn't Live Here Anymore|William Daniels|KITT +15|325–1512|FABF07|Milhouse Doesn't Live Here Anymore|Isabel Sanford|Herself +15|325–1512|FABF07|Milhouse Doesn't Live Here Anymore|Dick Tufeld|Robot B-9 +15|325–1512|FABF07|Milhouse Doesn't Live Here Anymore|Marcia Wallace|Edna Krabappel +15|326–1513|FABF09|Smart and Smarter|Simon Cowell|Henry +15|327–1514|FABF08|The Ziff Who Came to Dinner|Jon Lovitz|Artie Ziff; Jay Sherman; Llewelyn Sinclair; Aristotle Amadopoulis; Professor Lombardo +15|328–1515|FABF10|Co-Dependent's Day|Brave Combo|Play the end credits +15|328–1515|FABF10|Co-Dependent's Day|Marcia Wallace|Edna Krabappel +15|329–1516|FABF11|The Wandering Juvie|Sarah Michelle Gellar|Gina Vendetti +15|329–1516|FABF11|The Wandering Juvie|Jane Kaczmarek|Judge Constance Harm +15|329–1516|FABF11|The Wandering Juvie|Charles Napier|Warden +15|329–1516|FABF11|The Wandering Juvie|Marcia Wallace|Edna Krabappel +15|330–1517|FABF12|My Big Fat Geek Wedding|Matt Groening|Himself +15|330–1517|FABF12|My Big Fat Geek Wedding|Marcia Wallace|Edna Krabappel +15|332–1519|FABF15|Simple Simpson|Mark Campbell|Country singer +15|332–1519|FABF15|Simple Simpson|Nichelle Nichols|Herself +15|334–1521|FABF17|The Bart-Mangled Banner|Marcia Wallace|Edna Krabappel +16|336–1601|FABF23|Treehouse of Horror XV|Marie Cain|Sings "I've Got You Under My Skin" +16|337–1602|FABF20|All's Fair in Oven War|James Caan|Himself +16|337–1602|FABF20|All's Fair in Oven War|Thomas Pynchon|Himself +16|337–1602|FABF20|All's Fair in Oven War|Marcia Wallace|Edna Krabappel +16|338–1603|FABF19|Sleeping with the Enemy|Marcia Wallace|Edna Krabappel +16|339–1604|FABF22|She Used to Be My Girl|Kim Cattrall|Chloe Talbot +16|339–1604|FABF22|She Used to Be My Girl|Michael Dees|Sings "Too-Ra-Loo-Ra-Loo-Ral (That's an Irish Lullaby)" +16|340–1605|FABF21|Fat Man and Little Boy|Terry W. Greene|Sgt. Activity +16|340–1605|FABF21|Fat Man and Little Boy|Eric Idle|Declan Desmond +16|343–1608|GABF02|Homer and Ned's Hail Mary Pass|Tom Brady|Himself +16|343–1608|GABF02|Homer and Ned's Hail Mary Pass|LeBron James|Himself +16|343–1608|GABF02|Homer and Ned's Hail Mary Pass|Michelle Kwan|Herself +16|343–1608|GABF02|Homer and Ned's Hail Mary Pass|Warren Sapp|Himself +16|343–1608|GABF02|Homer and Ned's Hail Mary Pass|Yao Ming|Himself +16|344–1609|GABF03|Pranksta Rap|50 Cent|Himself +16|344–1609|GABF03|Pranksta Rap|Dana Gould|Barney Fife +16|345–1610|GABF04|There's Something About Marrying|Marcia Wallace|Edna Krabappel +16|346–1611|GABF05|On a Clear Day I Can't See My Sister|Gary Busey|Himself +16|346–1611|GABF05|On a Clear Day I Can't See My Sister|Jane Kaczmarek|Judge Constance Harm +16|346–1611|GABF05|On a Clear Day I Can't See My Sister|Marcia Wallace|Edna Krabappel +16|347–1612|GABF06|Goo Goo Gai Pan|Lucy Liu|Madam Wu +16|347–1612|GABF06|Goo Goo Gai Pan|Robert Wagner|Himself +16|349–1614|GABF08|The Seven-Beer Snitch|Frank Gehry|Himself +16|349–1614|GABF08|The Seven-Beer Snitch|Joe Mantegna|Fat Tony +16|349–1614|GABF08|The Seven-Beer Snitch|Charles Napier|Officer Krackney +16|350–1615|GABF12|Future-Drama|John DiMaggio|Bender +16|350–1615|GABF12|Future-Drama|Amy Poehler|Jenda +16|350–1615|GABF12|Future-Drama|Marcia Wallace|Edna Krabappel +16|351–1616|GABF10|Don't Fear the Roofer|Stephen Hawking|Himself +16|351–1616|GABF10|Don't Fear the Roofer|Ray Romano|Ray Magini +16|352–1617|GABF11|The Heartbroke Kid|Albert Brooks|Tab Spangler; Jacques +16|352–1617|GABF11|The Heartbroke Kid|Marcia Wallace|Edna Krabappel +16|353–1618|GABF13|A Star Is Torn|Fantasia Barrino|Clarissa Wellington +16|354–1619|GABF14|Thank God, It's Doomsday|Baha Men|Sing "Who Wants a Haircut?" (parody of the song Who Let the Dogs Out?) +16|354–1619|GABF14|Thank God, It's Doomsday|Los Lobos|Play end credits +16|355–1620|GABF15|Home Away from Homer|Jason Bateman|Himself +16|356–1621|GABF09|The Father, the Son, and the Holy Guest Star|Liam Neeson|Father Sean +16|356–1621|GABF09|The Father, the Son, and the Holy Guest Star|Marcia Wallace|Edna Krabappel +17|357–1701|GABF18|Bonfire of the Manatees|Alec Baldwin|Caleb Thorn +17|357–1701|GABF18|Bonfire of the Manatees|Joe Mantegna|Fat Tony +17|360–1704|GABF17|Treehouse of Horror XVI|Terry Bradshaw|Himself +17|360–1704|GABF17|Treehouse of Horror XVI|Terry W. Greene|Lava-tron +17|360–1704|GABF17|Treehouse of Horror XVI|Dennis Rodman|Himself +17|363–1707|GABF22|The Last of the Red Hat Mamas|Lily Tomlin|Tammy +17|364–1708|HABF02|The Italian Bob|Maria Grazia Cucinotta|Francesca Terwilliger +17|364–1708|HABF02|The Italian Bob|Kelsey Grammer|Sideshow Bob +17|364–1708|HABF02|The Italian Bob|Marcia Wallace|Edna Krabappel +17|366–1710|HABF03|Homer's Paternity Coot|Joe Frazier|Himself +17|366–1710|HABF03|Homer's Paternity Coot|William H. Macy|Himself +17|366–1710|HABF03|Homer's Paternity Coot|Michael York|Mason Fairbanks +17|367–1711|HABF04|We're on the Road to D'ohwhere|Marcia Wallace|Edna Krabappel +17|368–1712|HABF05|My Fair Laddy|Marcia Wallace|Edna Krabappel +17|369–1713|HABF06|The Seemingly Never-Ending Story|Michael Dees|Sings "Fly Me to the Moon" +17|369–1713|HABF06|The Seemingly Never-Ending Story|Maurice LaMarche|Commander McBragg +17|369–1713|HABF06|The Seemingly Never-Ending Story|Marcia Wallace|Edna Krabappel +17|370–1714|HABF07|Bart Has Two Mommies|Antonio Fargas|Huggy Bear +17|370–1714|HABF07|Bart Has Two Mommies|Randy Johnson|Himself +17|370–1714|HABF07|Bart Has Two Mommies|Susan Sarandon|Herself +17|370–1714|HABF07|Bart Has Two Mommies|Dave Thomas|Bob Hope +17|371–1715|HABF08|Homer Simpson, This Is Your Wife|Ricky Gervais|Charles Heathbar +17|371–1715|HABF08|Homer Simpson, This Is Your Wife|Marcia Wallace|Edna Krabappel +17|372–1716|HABF09|Million Dollar Abie|Michael Carrington|Sports anchor #2 +17|372–1716|HABF09|Million Dollar Abie|Rob Reiner|Himself +17|373–1717|HABF10|Kiss Kiss, Bang Bangalore|Richard Dean Anderson|Himself +17|373–1717|HABF10|Kiss Kiss, Bang Bangalore|Meher Tatna|Indian passenger +17|375–1719|HABF12|Girls Just Want to Have Sums|Amick Byram|Singing Itchy +17|375–1719|HABF12|Girls Just Want to Have Sums|Jim Gilstrap|Wise Old Cat singer +17|375–1719|HABF12|Girls Just Want to Have Sums|Bob Joyce|Singing Scratchy +17|375–1719|HABF12|Girls Just Want to Have Sums|Frances McDormand|Melanie Upfoot +17|375–1719|HABF12|Girls Just Want to Have Sums|Sally Stevens|Singing Female Cat +17|375–1719|HABF12|Girls Just Want to Have Sums|Marcia Wallace|Edna Krabappel +17|375–1719|HABF12|Girls Just Want to Have Sums|Oren Waters|Singing Male Cat +17|376–1720|HABF13|Regarding Margie|Sal Bando|Himself +17|376–1720|HABF13|Regarding Margie|Gene Tenace|Himself +17|376–1720|HABF13|Regarding Margie|Marcia Wallace|Edna Krabappel +17|377–1721|HABF14|The Monkey Suit|Melanie Griffith|Herself +17|377–1721|HABF14|The Monkey Suit|Larry Hagman|Wallace Brady +17|378–1722|HABF16|Marge and Homer Turn a Couple Play|Stacy Keach|Howard K. Duff VII +17|378–1722|HABF16|Marge and Homer Turn a Couple Play|Mandy Moore|Tabitha Vixx +18|379–1801|HABF15|The Mook, the Chef, the Wife and Her Homer|Michael Imperioli|Dante Calabresis Jr. +18|379–1801|HABF15|The Mook, the Chef, the Wife and Her Homer|Joe Mantegna|Fat Tony +18|379–1801|HABF15|The Mook, the Chef, the Wife and Her Homer|Metallica|Themselves +18|379–1801|HABF15|The Mook, the Chef, the Wife and Her Homer|Joe Pantoliano|Dante Calabresis +18|380–1802|HABF18|Jazzy and the Pussycats|Larina Jean Adamson|Waitress +18|380–1802|HABF18|Jazzy and the Pussycats|The White Stripes|Themselves +18|382–1804|HABF17|Treehouse of Horror XVII|Fran Drescher|Female Golem +18|382–1804|HABF17|Treehouse of Horror XVII|Maurice LaMarche|Orson Welles +18|382–1804|HABF17|Treehouse of Horror XVII|Richard Lewis|Male Golem +18|382–1804|HABF17|Treehouse of Horror XVII|Phil McGraw|Himself +18|382–1804|HABF17|Treehouse of Horror XVII|Sir Mix-a-Lot|Sings "Baby Likes Fat" (parody of the song Baby Got Back) +18|382–1804|HABF17|Treehouse of Horror XVII|Marcia Wallace|Edna Krabappel +18|383–1805|HABF21|G.I. (Annoyed Grunt)|Maurice LaMarche|Recruiter #2; Cap'n Crunch +18|383–1805|HABF21|G.I. (Annoyed Grunt)|Kiefer Sutherland|Colonel +18|384–1806|HABF19|Moe'N'a Lisa|Michael Chabon|Himself +18|384–1806|HABF19|Moe'N'a Lisa|Jonathan Franzen|Himself +18|384–1806|HABF19|Moe'N'a Lisa|J. K. Simmons|J. Jonah Jameson +18|384–1806|HABF19|Moe'N'a Lisa|Gore Vidal|Himself +18|384–1806|HABF19|Moe'N'a Lisa|Tom Wolfe|Himself +18|387–1809|JABF01|Kill Gil, Volumes I & II|Elvis Stojko|Himself +18|388–1810|JABF03|The Wife Aquatic|Dana Gould|Rotting Pelican Crewman +18|388–1810|JABF03|The Wife Aquatic|Maurice LaMarche|First Mate Billy; Oceanographer +18|388–1810|JABF03|The Wife Aquatic|Sab Shimono|Japanese fisherman +18|390–1812|JABF04|Little Big Girl|Natalie Portman|Darcy +18|391–1813|JABF07|Springfield Up|Eric Idle|Declan Desmond +18|392–1814|JABF09|Yokel Chords|Peter Bogdanovich|Psychologist +18|392–1814|JABF09|Yokel Chords|Andy Dick|Himself +18|392–1814|JABF09|Yokel Chords|James Patterson|Himself +18|392–1814|JABF09|Yokel Chords|Meg Ryan|Dr. Stacy Swanson +18|392–1814|JABF09|Yokel Chords|Stephen Sondheim|Himself +18|392–1814|JABF09|Yokel Chords|Marcia Wallace|Edna Krabappel +18|393–1815|JABF08|Rome-old and Juli-eh|Jane Kaczmarek|Judge Constance Harm +18|394–1816|JABF06|Homerazzi|Jon Lovitz|Enrico Irritazio +18|394–1816|JABF06|Homerazzi|J. K. Simmons|Tabloid editor +18|394–1816|JABF06|Homerazzi|Betty White|Herself +18|395–1817|JABF10|Marge Gamer|Ronaldo|Himself +18|395–1817|JABF10|Marge Gamer|Marcia Wallace|Edna Krabappel +18|398–1818|JABF12|Stop or My Dog Will Shoot|Rudy Giuliani|Himself +18|398–1818|JABF12|Stop or My Dog Will Shoot|Stephen Hawking|Himself +18|398–1818|JABF12|Stop or My Dog Will Shoot|Maurice LaMarche|Farmer; Horn Stuffer +18|398–1818|JABF12|Stop or My Dog Will Shoot|Marcia Wallace|Edna Krabappel +18|399–1821|JABF14|24 Minutes|Mary Lynn Rajskub|Chloe O'Brian +18|399–1821|JABF14|24 Minutes|Kiefer Sutherland|Jack Bauer +18|400–1822|JABF15|You Kent Always Say What You Want|Maurice LaMarche|Fox announcer +18|400–1822|JABF15|You Kent Always Say What You Want|Ludacris|Himself +Movie|M1|—|The Simpsons Movie|Albert Brooks|Russ Cargill +Movie|M1|—|The Simpsons Movie|Green Day|Themselves +Movie|M1|—|The Simpsons Movie|Tom Hanks|Himself +Movie|M1|—|The Simpsons Movie|Joe Mantegna|Fat Tony +Movie|M1|—|The Simpsons Movie|Philip Rosenthal|TV Dad +Movie|M1|—|The Simpsons Movie|Marcia Wallace|Edna Krabappel +19|401–1901|JABF20|He Loves to Fly and He D'ohs|Stephen Colbert|Colby Kraus +19|401–1901|JABF20|He Loves to Fly and He D'ohs|Lionel Richie|Himself +19|402–1902|JABF18|Homer of Seville|Plácido Domingo|Himself +19|402–1902|JABF18|Homer of Seville|Maya Rudolph|Julia +19|403–1903|JABF21|Midnight Towboy|Matt Dillon|Louie +19|404–1904|JABF19|I Don't Wanna Know Why the Caged Bird Sings|Steve Buscemi|Dwight David Diddlehoffer +19|404–1904|JABF19|I Don't Wanna Know Why the Caged Bird Sings|Julia Louis-Dreyfus|Gloria +19|404–1904|JABF19|I Don't Wanna Know Why the Caged Bird Sings|Ted Nugent|Himself +19|405–1905|JABF16|Treehouse of Horror XVIII|Maurice LaMarche|Government Official +19|407–1907|JABF17|Husbands and Knives|Jack Black|Milo +19|407–1907|JABF17|Husbands and Knives|Daniel Clowes|Himself +19|407–1907|JABF17|Husbands and Knives|Maurice LaMarche|Jock +19|407–1907|JABF17|Husbands and Knives|Sang Am Lee|Korean singer +19|407–1907|JABF17|Husbands and Knives|Alan Moore|Himself +19|407–1907|JABF17|Husbands and Knives|Art Spiegelman|Himself +19|408–1908|KABF01|Funeral for a Fiend|Kelsey Grammer|Sideshow Bob +19|408–1908|KABF01|Funeral for a Fiend|John Mahoney|Dr. Robert Terwilliger, Sr. +19|408–1908|KABF01|Funeral for a Fiend|Keith Olbermann|Himself +19|408–1908|KABF01|Funeral for a Fiend|David Hyde Pierce|Cecil Terwilliger +19|410–1910|KABF03|E Pluribus Wiggum|Dan Rather|Himself +19|410–1910|KABF03|E Pluribus Wiggum|Jon Stewart|Himself +19|411–1911|KABF04|That '90s Show|Michael Dees|Sings "Winter Wonderland" +19|411–1911|KABF04|That '90s Show|Kurt Loder|Himself +19|411–1911|KABF04|That 90's Show|"Weird Al" Yankovic|Himself +19|413–1913|KABF06|The Debarted|Topher Grace|Donny +19|413–1913|KABF06|The Debarted|Terry Gross|Herself +19|413–1913|KABF06|The Debarted|Marcia Wallace|Edna Krabappel +19|414–1914|KABF07|Dial "N" for Nerder|Marcia Wallace|Edna Krabappel +19|416–1916|KABF09|Papa Don't Leech|Beverly D'Angelo|Lurleen Lumpkin +19|416–1916|KABF09|Papa Don't Leech|Dixie Chicks|Themselves +19|417–1917|KABF10|Apocalypse Cow|Zooey Deschanel|Mary Spuckler +19|418–1918|KABF11|Any Given Sundance|Jim Jarmusch|Himself +19|418–1918|KABF11|Any Given Sundance|John C. Reilly|Himself +19|419–1919|KABF12|Mona Leaves-a|Lance Armstrong|Himself +19|419–1919|KABF12|Mona Leaves-a|Glenn Close|Mona Simpson +19|420–1920|KABF13|All About Lisa|Drew Carey|Himself +20|421–2001|KABF17|Sex, Pies and Idiot Scrapes|Robert Forster|Lucky Jim +20|421–2001|KABF17|Sex, Pies and Idiot Scrapes|Julia Louis-Dreyfus|Gloria +20|421–2001|KABF17|Sex, Pies and Idiot Scrapes|Joe Mantegna|Fat Tony +20|422–2002|KABF15|Lost Verizon|Brian Grazer|Himself +20|422–2002|KABF15|Lost Verizon|Denis Leary|Himself +20|423–2003|KABF14|Double, Double, Boy in Trouble|Joe Montana|Himself +20|424–2004|KABF16|Treehouse of Horror XIX|Marcia Wallace|Edna Krabappel +20|425–2005|KABF18|Dangerous Curves|Maurice LaMarche|Toucan Sam; Cap'n Crunch; Trix Rabbit +20|426–2006|KABF19|Homer and Lisa Exchange Cross Words|Merl Reagle|Himself +20|426–2006|KABF19|Homer and Lisa Exchange Cross Words|Will Shortz|Himself +20|426–2006|KABF19|Homer and Lisa Exchange Cross Words|Scott Thompson|Grady +20|426–2006|KABF19|Homer and Lisa Exchange Cross Words|Marcia Wallace|Edna Krabappel +20|427–2007|KABF20|Mypods and Boomsticks|Shohreh Aghdashloo|Mina bin Laden +20|428–2008|KABF21|The Burns and the Bees|Marv Albert|Himself +20|428–2008|KABF21|The Burns and the Bees|Jeff Bezos|Himself +20|428–2008|KABF21|The Burns and the Bees|Mark Cuban|Himself +20|429–2009|KABF22|Lisa the Drama Queen|Emily Blunt|Juliet Hobbes +20|429–2009|KABF22|Lisa the Drama Queen|Fall Out Boy|Play the end credits +20|430–2010|LABF01|Take My Life, Please|Marcia Wallace|Edna Krabappel +20|431–2011|LABF02|How the Test Was Won|Marcia Wallace|Edna Krabappel +20|432–2012|LABF03|No Loan Again, Naturally|Maurice LaMarche|Dwight D. Eisenhower +20|432–2012|LABF03|No Loan Again, Naturally|Marcia Wallace|Edna Krabappel +20|433–2013|LABF04|Gone Maggie Gone|Ed Begley, Jr.|Himself +20|434–2014|LABF11|In the Name of the Grandfather|Glen Hansard|Street musician +20|434–2014|LABF11|In the Name of the Grandfather|Markéta Irglová|Czech singer +20|434–2014|LABF11|In the Name of the Grandfather|Colm Meaney|Tom O'Flanagan +20|434–2014|LABF11|In the Name of the Grandfather|Marcia Wallace|Edna Krabappel +20|435–2015|LABF05|Wedding for Disaster|Kelsey Grammer|Sideshow Bob +20|437–2017|LABF07|The Good, the Sad and the Drugly|Anne Hathaway|Jenny +20|438–2018|LABF08|Father Knows Worst|Marcia Wallace|Edna Krabappel +20|439–2019|LABF10|Waverly Hills 9-0-2-1-D'oh|Maurice LaMarche|City Inspector +20|439–2019|LABF10|Waverly Hills 9-0-2-1-D'oh|Ellen Page|Alaska Nebraska +20|439–2019|LABF10|Waverly Hills 9-0-2-1-D'oh|Marcia Wallace|Edna Krabappel +20|440–2020|LABF09|Four Great Women and a Manicure|Jodie Foster|Maggie Simpson +21|442–2101|LABF13|Homer the Whopper|Matt Groening|Himself +21|442–2101|LABF13|Homer the Whopper|Seth Rogen|Lyle McCarthy +21|442–2101|LABF13|Homer the Whopper|Kevin Michael Richardson|Security guard +21|443–2102|LABF15|Bart Gets a "Z"|Marcia Wallace|Edna Krabappel +21|444–2103|LABF16|The Great Wife Hope|Chuck Liddell|Himself +21|444–2103|LABF16|The Great Wife Hope|Marcia Wallace|Edna Krabappel +21|445–2104|LABF14|Treehouse of Horror XX|Marcia Wallace|Edna Krabappel +21|446–2105|LABF17|The Devil Wears Nada|Marcia Wallace|Edna Krabappel +21|447–2106|LABF18|Pranks and Greens|Jonah Hill|Andy Hamilton +21|447–2106|LABF18|Pranks and Greens|Marcia Wallace|Edna Krabappel +21|448–2107|LABF19|Rednecks and Broomsticks|Neve Campbell|Cassandra +21|449–2108|MABF01|O Brother, Where Bart Thou?|Kim Cattrall|Fourth Simpson child +21|449–2108|MABF01|O Brother, Where Bart Thou?|Huell Howser|Himself +21|449–2108|MABF01|O Brother, Where Bart Thou?|Cooper Manning|Himself +21|449–2108|MABF01|O Brother, Where Bart Thou?|Eli Manning|Himself +21|449–2108|MABF01|O Brother, Where Bart Thou?|Peyton Manning|Himself +21|449–2108|MABF01|O Brother, Where Bart Thou?|Jordan Nagai|Charlie +21|449–2108|MABF01|O Brother, Where Bart Thou?|Smothers Brothers|Themselves +21|450–2109|MABF02|Thursdays with Abie|Mitch Albom|Himself +21|450–2109|MABF02|Thursdays with Abie|Marcia Wallace|Edna Krabappel +21|451–2110|LABF20|Once Upon a Time in Springfield|Anne Hathaway|Princess Penelope +21|451–2110|LABF20|Once Upon a Time in Springfield|Eartha Kitt|Herself +21|451–2110|LABF20|Once Upon a Time in Springfield|Maurice LaMarche|Nuclear Power Plant Guard +21|451–2110|LABF20|Once Upon a Time in Springfield|Gary Larson|Himself +21|451–2110|LABF20|Once Upon a Time in Springfield|Jackie Mason|Rabbi Hyman Krustofsky +21|452–2111|MABF03|Million Dollar Maybe|Chris Martin|Himself +21|452–2111|MABF03|Million Dollar Maybe|Marcia Wallace|Edna Krabappel +21|453–2112|MABF05|Boy Meets Curl|Bob Costas|Himself +21|454–2113|MABF06|The Color Yellow|Wren T. Brown|Virgil Simpson +21|455–2114|MABF04|Postcards from the Wedge|Marcia Wallace|Edna Krabappel +21|456–2115|MABF07|Stealing First Base|Angela Bassett|Michelle Obama +21|456–2115|MABF07|Stealing First Base|Sarah Silverman|Nikki McKenna +21|457–2116|MABF10|The Greatest Story Ever D'ohed|Sacha Baron Cohen|Jakob +21|457–2116|MABF10|The Greatest Story Ever D'ohed|Yael Naim|Dorit +21|458–2117|MABF08|American History X-cellent|Joe Mantegna|Fat Tony +21|458–2117|MABF08|American History X-cellent|Kevin Michael Richardson|Prisoner +21|459–2118|MABF09|Chief of Hearts|Jane Kaczmarek|Judge Constance Harm +21|459–2118|MABF09|Chief of Hearts|Maurice LaMarche|David Starsky +21|459–2118|MABF09|Chief of Hearts|Joe Mantegna|Fat Tony +21|461–2120|MABF12|To Surveil with Love|Eddie Izzard|Queen Elizabeth II; Prince Charles; Nigel Bakerbutcher +21|461–2120|MABF12|To Surveil with Love|Marcia Wallace|Edna Krabappel +21|462–2121|MABF13|Moe Letter Blues|Don Pardo|Himself +21|463–2122|MABF11|The Bob Next Door|Kelsey Grammer|Sideshow Bob +21|464–2123|MABF15|Judge Me Tender|Simon Cowell|Himself +21|464–2123|MABF15|Judge Me Tender|Ellen DeGeneres|Herself +21|464–2123|MABF15|Judge Me Tender|Kara DioGuardi|Herself +21|464–2123|MABF15|Judge Me Tender|Randy Jackson|Himself +21|464–2123|MABF15|Judge Me Tender|Rupert Murdoch|Himself +21|464–2123|MABF15|Judge Me Tender|Ryan Seacrest|Himself +22|465–2201|MABF21|Elementary School Musical|Jemaine Clement|Ethan Ballantyne +22|465–2201|MABF21|Elementary School Musical|Ira Glass|Himself +22|465–2201|MABF21|Elementary School Musical|Stephen Hawking|Himself +22|465–2201|MABF21|Elementary School Musical|Bret McKenzie|Kurt Hardwick +22|465–2201|MABF21|Elementary School Musical|Lea Michele|Sarah +22|465–2201|MABF21|Elementary School Musical|Cory Monteith|Flynn +22|465–2201|MABF21|Elementary School Musical|Amber Riley|Aiesha +22|466–2202|MABF17|Loan-a Lisa|Terry W. Greene|Janitor +22|466–2202|MABF17|Loan-a Lisa|Chris Hansen|Himself +22|466–2202|MABF17|Loan-a Lisa|Muhammad Yunus|Himself +22|466–2202|MABF17|Loan-a Lisa|Mark Zuckerberg|Himself +22|467–2203|MABF18|MoneyBART|Bill James|Himself +22|467–2203|MABF18|MoneyBART|Mike Scioscia|Himself +22|468–2204|MABF16|Treehouse of Horror XXI|Hugh Laurie|Roger +22|468–2204|MABF16|Treehouse of Horror XXI|Daniel Radcliffe|Edmund +22|470–2206|NABF01|The Fool Monty|Terry W. Greene|Sgt. Activity +22|471–2207|NABF02|How Munched is That Birdie in the Window?|Gregg Berger|Various animals +22|471–2207|NABF02|How Munched is That Birdie in the Window?|Danica Patrick|Herself +22|471–2207|NABF02|How Munched is That Birdie in the Window?|Marcia Wallace|Edna Krabappel +22|471–2207|NABF02|How Munched is That Birdie in the Window?|Rachel Weisz|Dr. Thurston +22|472–2208|MABF22|The Fight Before Christmas|Katy Perry|Herself (live action) +22|472–2208|MABF22|The Fight Before Christmas|Martha Stewart|Herself +22|473–2209|MABF19|Donnie Fatso|Jon Hamm|FBI agent +22|473–2209|MABF19|Donnie Fatso|Joe Mantegna|Fat Tony; Fit Tony +22|474–2210|NABF03|Moms I'd Like to Forget|Marcia Wallace|Edna Krabappel +22|475–2211|NABF04|Flaming Moe|Alyson Hannigan|Melody Juniper +22|475–2211|NABF04|Flaming Moe|Scott Thompson|Grady +22|475–2211|NABF04|Flaming Moe|Kristen Wiig|Calliope Juniper +22|476–2212|NABF05|Homer the Father|Michael Paul Chan|Chinese agent +22|476–2212|NABF05|Homer the Father|James Lipton|Himself +22|476–2212|NABF05|Homer the Father|David Mamet|Himself +22|476–2212|NABF05|Homer the Father|Garry Marshall|Sheldon Leavitt +22|477–2213|NABF06|The Blue and the Gray|Marcia Wallace|Edna Krabappel +22|478–2214|NABF07|Angry Dad: The Movie|Halle Berry|Herself +22|478–2214|NABF07|Angry Dad: The Movie|Russell Brand|Himself +22|478–2214|NABF07|Angry Dad: The Movie|Ricky Gervais|Himself +22|478–2214|NABF07|Angry Dad: The Movie|Terry W. Greene|Ridley Scott Seat Filler +22|478–2214|NABF07|Angry Dad: The Movie|Maurice LaMarche|Anthony Hopkins +22|478–2214|NABF07|Angry Dad: The Movie|Nick Park|Himself +22|478–2214|NABF07|Angry Dad: The Movie|J. B. Smoove|DJ Kwanzaa +22|479–2215|NABF08|The Scorpion's Tale|Werner Herzog|Walter Hotenhoffer +22|479–2215|NABF08|The Scorpion's Tale|Kevin Michael Richardson|Retirement Castle orderly +22|480–2216|NABF09|A Midsummer's Nice Dream|Tommy Chong|Himself +22|480–2216|NABF09|A Midsummer's Nice Dream|Cheech Marin|Himself +22|481–2217|NABF10|Love Is a Many Strangled Thing|Kareem Abdul-Jabbar|Himself +22|481–2217|NABF10|Love Is a Many Strangled Thing|Kevin Michael Richardson|Masseur +22|481–2217|NABF10|Love Is a Many Strangled Thing|Paul Rudd|Dr. Zander +22|481–2217|NABF10|Love Is a Many Strangled Thing|Marcia Wallace|Edna Krabappel +22|482–2218|NABF11|The Great Simpsina|David Copperfield|Himself +22|482–2218|NABF11|The Great Simpsina|Ricky Jay|Himself +22|482–2218|NABF11|The Great Simpsina|Penn Jillette|Himself +22|482–2218|NABF11|The Great Simpsina|Martin Landau|The Great Raymondo +22|482–2218|NABF11|The Great Simpsina|Jack McBrayer|Ewell Freestone +22|482–2218|NABF11|The Great Simpsina|Teller|Himself +22|483–2219|NABF12|The Real Housewives of Fat Tony|Joe Mantegna|Fat Tony +22|484–2220|NABF13|Homer Scissorhands|Kristen Schaal|Taffy +22|484–2220|NABF13|Homer Scissorhands|Marcia Wallace|Edna Krabappel +22|485–2221|NABF14|500 Keys|Albert Brooks|Hank Scorpio +22|486–2222|NABF15|The Ned-Liest Catch|Ken Burns|Himself +22|486–2222|NABF15|The Ned-Liest Catch|Joey Kramer|Himself +22|486–2222|NABF15|The Ned-Liest Catch|Marcia Wallace|Edna Krabappel +23|487–2301|NABF16|The Falcon and the D'ohman|Tom Colicchio|Himself +23|487–2301|NABF16|The Falcon and the D'ohman|Ping Marshall|Viktor +23|487–2301|NABF16|The Falcon and the D'ohman|Kevin Michael Richardson|SendEx Courier +23|487–2301|NABF16|The Falcon and the D'ohman|Kiefer Sutherland|Wayne Slater +23|487–2301|NABF16|The Falcon and the D'ohman|Marcia Wallace|Edna Krabappel +23|488–2302|NABF17|Bart Stops to Smell the Roosevelts|Theodore Roosevelt (archival)|Himself +23|488–2302|NABF17|Bart Stops to Smell the Roosevelts|Marcia Wallace|Edna Krabappel +23|489–2303|NABF19|Treehouse of Horror XXII|Jackie Mason|Rabbi Hyman Krustofsky +23|489–2303|NABF19|Treehouse of Horror XXII|Aron Ralston|911 operator +23|490–2304|NABF21|Replaceable You|Jane Lynch|Roz Davis +23|491–2305|NABF20|The Food Wife|Mario Batali|Himself +23|491–2305|NABF20|The Food Wife|Anthony Bourdain|Himself +23|491–2305|NABF20|The Food Wife|Tim Heidecker|Amus Bruse +23|491–2305|NABF20|The Food Wife|Gordon Ramsay|Himself +23|491–2305|NABF20|The Food Wife|Eric Wareheim|Fois Garth +23|492–2306|NABF22|The Book Job|Neil Gaiman|Himself +23|492–2306|NABF22|The Book Job|Andy García|TweenLit Inc. Publisher +23|493–2307|PABF01|The Man in the Blue Flannel Pants|Kevin Michael Richardson|Party guest +23|493–2307|PABF01|The Man in the Blue Flannel Pants|John Slattery|Robert Marlowe +23|493–2307|PABF01|The Man in the Blue Flannel Pants|Matthew Weiner|Nuclear regulator +23|494–2308|PABF02|The Ten-Per-Cent Solution|Kevin Dillon|Himself +23|494–2308|PABF02|The Ten-Per-Cent Solution|Janeane Garofalo|Herself +23|494–2308|PABF02|The Ten-Per-Cent Solution|Jackie Mason|Rabbi Hyman Krustofsky +23|494–2308|PABF02|The Ten-Per-Cent Solution|Joan Rivers|Annie Dubinsky +23|494–2309|PABF02|Holidays of Future Passed|Matt Groening|Announcer +23|496–2310|PABF03|Politically Inept, with Homer Simpson|Dana Gould|Air marshall +23|496–2310|PABF03|Politically Inept, with Homer Simpson|Ted Nugent|Himself +23|497–2311|PABF04|The D'oh-cial Network|Armie Hammer|The Winklevoss twins +23|497–2311|PABF04|The D'oh-cial Network|David Letterman|Himself +23|497–2311|PABF04|The D'oh-cial Network|Tiger Lillies|Play the end credits +23|498–2312|PABF05|Moe Goes from Rags to Riches|Jeremy Irons|Bar rag +23|499–2313|PABF06|The Daughter Also Rises|Michael Cera|Nick +23|499–2313|PABF06|The Daughter Also Rises|Jamie Hyneman|Himself +23|499–2313|PABF06|The Daughter Also Rises|Adam Savage|Himself +23|500–2314|PABF07|At Long Last Leave|Julian Assange|Himself +23|500–2314|PABF07|At Long Last Leave|Kelsey Grammer|Sideshow Bob +23|500–2314|PABF07|At Long Last Leave|Alison Krauss and Union Station|Perform the theme +23|500–2314|PABF07|At Long Last Leave|Susie Stevens|Sings "We'll Meet Again" +23|500–2314|PABF07|At Long Last Leave|Jackie Mason|Rabbi Hyman Krustofsky +23|501–2315|PABF09|Exit Through the Kwik-E-Mart|Robbie Conal|Himself +23|501–2315|PABF09|Exit Through the Kwik-E-Mart|Ron English|Himself +23|501–2315|PABF09|Exit Through the Kwik-E-Mart|Shepard Fairey|Himself +23|501–2315|PABF09|Exit Through the Kwik-E-Mart|Nicholas McKaig|Sings the end credits +23|501–2315|PABF09|Exit Through the Kwik-E-Mart|Kenny Scharf|Himself +23|502–2316|PABF08|How I Wet Your Mother|David Byrne|Sings "Dream Operator" +23|502–2316|PABF08|How I Wet Your Mother|Glenn Close|Mona Simpson +23|503–2317|PABF10|Them, Robot|Brent Spiner|Robots +23|504–2318|PABF11|Beware My Cheating Bart|Kevin Michael Richardson|Security guard +23|505–2319|PABF12|A Totally Fun Thing That Bart Will Never Do Again|Steve Coogan|Rowan Priddis +23|505–2319|PABF12|A Totally Fun Thing That Bart Will Never Do Again|Renee Ridgeley|Cruise Ship Receptionist +23|505–2319|PABF12|A Totally Fun Thing That Bart Will Never Do Again|Treat Williams|Himself +23|506–2320|PABF13|The Spy Who Learned Me|Bryan Cranston|Stradivarius Cain +23|506–2320|PABF13|The Spy Who Learned Me|Eric Idle|Declan Desmond +23|506–2320|PABF13|The Spy Who Learned Me|Marcia Wallace|Edna Krabappel +23|507–2321|PABF15|Ned 'n Edna's Blend|Marcia Wallace|Edna Krabappel +23|508–2322|PABF14|Lisa Goes Gaga|Lady Gaga|Herself +23|508–2322|PABF14|Lisa Goes Gaga|Kevin Michael Richardson|Conductor +24|509–2401|PABF21|Moonshine River|Ken Burns|Narrator +24|509–2401|PABF21|Moonshine River|Zooey Deschanel|Mary Spuckler +24|509–2401|PABF21|Moonshine River|Sarah Michelle Gellar|Gina Vendetti +24|509–2401|PABF21|Moonshine River|Anne Hathaway|Jenny +24|509–2401|PABF21|Moonshine River|Maurice LaMarche|Charlie Sheen +24|509–2401|PABF21|Moonshine River|Don Pardo|Himself +24|509–2401|PABF21|Moonshine River|Natalie Portman|Darcy +24|509–2401|PABF21|Moonshine River|Kevin Michael Richardson|Drummer +24|509–2401|PABF21|Moonshine River|Al Roker|Himself +24|509–2401|PABF21|Moonshine River|Sarah Silverman|Nikki McKenna +24|509–2401|PABF21|Moonshine River|Marcia Wallace|Edna Krabappel +24|510–2402|PABF17|Treehouse of Horror XXIII|Jon Lovitz|Artie Ziff +24|510–2402|PABF17|Treehouse of Horror XXIII|Marcia Wallace|Edna Krabappel +24|511–2403|PABF18|Adventures in Baby-Getting|Jeff Gordon|Himself +24|512–2404|PABF15|Gone Abie Gone|Marvin Hamlisch|Himself +24|512–2404|PABF15|Gone Abie Gone|Anika Noni Rose|Rita LaFleur +24|512–2404|PABF15|Gone Abie Gone|Jennifer Tilly|Herself +24|513–2405|PABF19|Penny-Wiseguys|Steve Carell|Dan Gillick +24|513–2405|PABF19|Penny-Wiseguys|Terry W. Greene|Giant Ant +24|513–2405|PABF19|Penny-Wiseguys|Joe Mantegna|Fat Tony +24|513–2405|PABF19|Penny-Wiseguys|Alex Trebek|Himself +24|514–2406|PABF22|A Tree Grows in Springfield|Kelsey Grammer|Sideshow Bob +24|514–2406|PABF22|A Tree Grows in Springfield|Marcia Wallace|Edna Krabappel +24|515–2407|PABF20|The Day the Earth Stood Cool|Fred Armisen|Terrence +24|515–2407|PABF20|The Day the Earth Stood Cool|Carrie Brownstein|Emily +24|515–2407|PABF20|The Day the Earth Stood Cool|Colin Meloy|Himself +24|515–2407|PABF20|The Day the Earth Stood Cool|Patton Oswalt|T-Rex +24|515–2407|PABF20|The Day the Earth Stood Cool|Marcia Wallace|Edna Krabappel +24|517–2409|RABF02|Homer Goes to Prep School|Maurice LaMarche|Prepper +24|517–2409|RABF02|Homer Goes to Prep School|Tom Waits|Lloyd +24|518–2410|RABF03|A Test Before Trying|Valerie Harper|Proctor Jennifer Clarkeson +24|518–2410|RABF03|A Test Before Trying|Marcia Wallace|Edna Krabappel +24|519–2411|RABF04|The Changing of the Guardian|Danny DeVito|Herbert Powell +24|519–2411|RABF04|The Changing of the Guardian|Terry W. Greene|Man Baby +24|519–2411|RABF04|The Changing of the Guardian|Rashida Jones|Portia +24|520–2412|RABF07|Love is a Many-Splintered Thing|Robert A. Caro|Himself +24|520–2412|RABF07|Love is a Many-Splintered Thing|Benedict Cumberbatch|Prime Minister; Alan Rickman +24|520–2412|RABF07|Love is a Many-Splintered Thing|Zooey Deschanel|Mary Spuckler +24|520–2412|RABF07|Love is a Many-Splintered Thing|Max Weinberg|Himself +24|521–2413|RABF05|Hardly Kirk-ing|Kevin Michael Richardson|Book Store Security Guard +24|523–2415|RABF09|Black Eyed, Please|Richard Dawkins|Himself +24|523–2415|RABF09|Black Eyed, Please|Tina Fey|Ms. Cantwell +24|523–2415|RABF09|Black Eyed, Please|Marcia Wallace|Edna Krabappel +24|524–2416|RABF10|Dark Knight Court|Michael Dees|Sings "Easter Parade" +24|524–2416|RABF10|Dark Knight Court|Janet Reno|Herself +24|524–2416|RABF10|Dark Knight Court|Maggy Reno Hurchalla|Janet Reno +24|525–2417|RABF08|What Animated Women Want|Bryan Cranston|Walter White (live action) +24|525–2417|RABF08|What Animated Women Want|Maurice LaMarche|Chef Naziwa; Karl Malden +24|525–2417|RABF08|What Animated Women Want|Aaron Paul|Jesse Pinkman (live action) +24|525–2417|RABF08|What Animated Women Want|Wanda Sykes|School therapist +24|525–2417|RABF08|What Animated Women Want|George Takei|Akira +24|525–2417|RABF08|What Animated Women Want|Marcia Wallace|Edna Krabappel +24|526–2418|RABF11|Pulpit Friction|Edward Norton|Rev. Elijah Hooper +24|526–2418|RABF11|Pulpit Friction|Susie Stevens|Sings "Day By Day" +24|527–2419|RABF13|Whiskey Business|Tony Bennett|Sings "Capital City" +24|527–2419|RABF13|Whiskey Business|Kevin Michael Richardson|Police officer +24|527–2419|RABF13|Whiskey Business|Sonny Rollins|Himself +24|527–2419|RABF13|Whiskey Business|Ron Taylor|Bleeding Gums Murphy +24|528–2420|RABF12|The Fabulous Faker Boy|Justin Bieber|Himself +24|528–2420|RABF12|The Fabulous Faker Boy|Bill Hader|Slava +24|528–2420|RABF12|The Fabulous Faker Boy|Jane Krakowski|Zhenya +24|528–2420|RABF12|The Fabulous Faker Boy|Patrick Stewart|Power plant employee +24|529–2421|RABF14|The Saga of Carl|Sigur Rós|Themselves +24|530–2422|RABF17|Dangers on a Train|Lisa Lampanelli|Ramona +24|530–2422|RABF17|Dangers on a Train|Seth MacFarlane|Ben +25|531–2501|RABF20|Homerland|Kevin Michael Richardson|FBI agent +25|531–2501|RABF20|Homerland|Kristen Wiig|Annie Crawford +25|532–2502|RABF16|Treehouse of Horror XXIV|Marcia Wallace|Edna Krabappel +25|533–2503|RABF18|Four Regrettings and a Funeral|Rachel Maddow|Herself +25|533–2503|RABF18|Four Regrettings and a Funeral|Joe Namath|Himself +25|533–2503|RABF18|Four Regrettings and a Funeral|Marcia Wallace|Edna Krabappel +25|534–2504|RABF22|YOLO|Denise Donatelli|Sings "You Only Live Once" (parody of the song You Only Live Twice) +25|534–2504|RABF22|YOLO|Jon Lovitz|Llewellyn Sinclair +25|534–2504|RABF22|YOLO|Marcia Wallace|Edna Krabappel +25|534–2504|RABF19|Labor Pains|Elisabeth Moss|Gretchen +25|536–2506|SABF02|The Kid Is All Right|Anderson Cooper|Himself +25|536–2506|SABF02|The Kid Is All Right|Maurice LaMarche|John Kerry +25|536–2506|SABF02|The Kid Is All Right|Eva Longoria|Isabel Gutierrez +25|537–2507|SABF04|Yellow Subterfuge|Kevin Michael Richardson|Jamaican Krusty +25|539–2509|SABF05|Steal This Episode|Judd Apatow|Himself +25|539–2509|SABF05|Steal This Episode|Will Arnett|Deputy Director Gratman +25|539–2509|SABF05|Steal This Episode|Rob Halford|Himself +25|539–2509|SABF05|Steal This Episode|Leslie Mann|Herself +25|539–2509|SABF05|Steal This Episode|Kevin Michael Richardson|Inmate; Hollywood Bigshot +25|539–2509|SABF05|Steal This Episode|Seth Rogen|Himself +25|539–2509|SABF05|Steal This Episode|Paul Rudd|Himself +25|539–2509|SABF05|Steal This Episode|Channing Tatum|Himself playing Homer +25|540–2510|SABF03|Married to the Blob|Harlan Ellison|Himself +25|540–2510|SABF03|Married to the Blob|Maurice LaMarche|Milo +25|540–2510|SABF03|Married to the Blob|Stan Lee|Himself +25|541–2511|SABF06|Specs and the City|Maurice LaMarche|Football Commentator +25|541–2511|SABF06|Specs and the City|Will Lyman|Himself +25|542–2512|SABF08|Diggs|Daniel Radcliffe|Diggs +25|543–2513|SABF07|The Man Who Grew Too Much|Kelsey Grammer|Sideshow Bob +25|543–2513|SABF07|The Man Who Grew Too Much|Marcia Wallace|Edna Krabappel +25|544–2514|SABF09|The Winter of His Content|Kevin Michael Richardson|Hibbert's Father; S.A.T. Preppers Member +25|545–2515|SABF10|The War of Art|Renee Ridgeley|Waitress +25|545–2515|SABF10|The War of Art|Max von Sydow|Klaus Ziegler +25|546–2516|SABF11|You Don't Have to Live Like a Referee|Andrés Cantor|Himself +25|546–2516|SABF11|You Don't Have to Live Like a Referee|Joey Vieira|Soccer Player +25|547–2517|SABF12|Luca$|Zach Galifianakis|Lucas Bortner +25|548–2518|SABF13|Days of Future Future|Amy Poehler|Jenda +25|549–2519|SABF14|What to Expect When Bart's Expecting|Tavi Gevinson|Jenny +25|549–2519|SABF14|What to Expect When Bart's Expecting|Joe Mantegna|Fat Tony +25|551–2521|SABF15|Pay Pal|Carl Kasell|Himself +25|551–2521|SABF15|Pay Pal|John Oliver|Booth Wilkes-John +25|551–2521|SABF15|Pay Pal|Peter Sagal|Himself +25|552–2522|SABF18|The Yellow Badge of Cowardge|Glenn Close|Mona Simpson +25|552–2522|SABF18|The Yellow Badge of Cowardge|Edwin Moses|Himself +26|553–2601|SABF20|Clown in the Dumps|Kelsey Grammer|Sideshow Bob +26|553–2601|SABF20|Clown in the Dumps|Don Hertzfeldt|Couch gag Simpsons +26|553–2601|SABF20|Clown in the Dumps|Maurice LaMarche|Clive Meriwether; Neil Simon; Rodney Dangerfield +26|553–2601|SABF20|Clown in the Dumps|Jackie Mason|Rabbi Hyman Krustofsky +26|553–2601|SABF20|Clown in the Dumps|David Hyde Pierce|Himself +26|553–2601|SABF20|Clown in the Dumps|Jeff Ross|Himself +26|553–2601|SABF20|Clown in the Dumps|Sarah Silverman|Herself +26|554–2602|SABF17|The Wreck of the Relationship|Nick Offerman|Captain Bowditch +26|554–2602|SABF17|The Wreck of the Relationship|Marc Wilmore|Narrator +26|556–2604|SABF21|Treehouse of Horror XXV|John Ratzenberger|CGI Homer Simpson +26|557–2605|SABF22|Opposites A-Frack|Jane Fonda|Maxine Lombard +26|557–2605|SABF22|Opposites A-Frack|Robert Siegel|Himself +26|558–2606|SABF16|Simpsorama|John DiMaggio|Bender +26|558–2606|SABF16|Simpsorama|David Herman|Scruffy +26|558–2606|SABF16|Simpsorama|Maurice LaMarche|Morbo; Hedonismbot; Lrrr +26|558–2606|SABF16|Simpsorama|Phil LaMarr|Hermes Conrad +26|558–2606|SABF16|Simpsorama|Katey Sagal|Turanga Leela +26|558–2606|SABF16|Simpsorama|Sally Stevens|Sings "I Will Wait For You" and "A Natural Woman" +26|558–2606|SABF16|Simpsorama|Lauren Tom|Amy Wong +26|558–2606|SABF16|Simpsorama|Frank Welker|Nibbler +26|558–2606|SABF16|Simpsorama|Billy West|Philip J. Fry; Professor Farnsworth; Dr. Zoidberg +26|559–2607|TABF01|Blazed and Confused|Willem Dafoe|Jack Lassen +26|559–2607|TABF01|Blazed and Confused|Kelsey Grammer|Sideshow Bob +26|559–2607|TABF01|Blazed and Confused|David Silverman|Himself +26|560–2608|TABF02|Covercraft|Will Forte|King Toot +26|560–2608|TABF02|Covercraft|Sammy Hagar|Himself +26|560–2608|TABF02|Covercraft|Heavy Young Heathens|performing "Hopin' for a Dream" +26|560–2608|TABF02|Covercraft|Matthew Sweet|Nick Delacourt +26|561–2609|TABF03|I Won't Be Home for Christmas|Gene Lockhart (archival)|Henry X. Harper +26|563–2611|TABF05|Bart's New Friend|Stacy Keach|Don Bookner +26|564–2612|TABF04|The Musk Who Fell To Earth|Elon Musk|Himself +26|565–2613|TABF06|Walking Big & Tall|Kevin Michael Richardson|Albert +26|565–2613|TABF06|Walking Big & Tall|Pharrell Williams|Himself +26|566–2614|TABF07|My Fare Lady|Christopher Lloyd|Jim Ignatowski +26|566–2614|TABF07|My Fare Lady|Rich Sommer|Young Man +26|567–2615|TABF08|The Princess Guide|Richard Branson|Himself +26|567–2615|TABF08|The Princess Guide|Yaya DaCosta|Princess Kemi +26|567–2615|TABF08|The Princess Guide|Jon Lovitz|Enrico Irritazio +26|567–2615|TABF08|The Princess Guide|Kevin Michael Richardson|The Nigerian King +26|568–2616|TABF09|Sky Police|Nathan Fielder|Doug Blattner +26|569–2617|TABF10|Waiting for Duffman|Cat Deeley|Herself +26|569–2617|TABF10|Waiting for Duffman|R. Lee Ermey|Colonel Leslie Hapablap +26|569–2617|TABF10|Waiting for Duffman|Stacy Keach|Howard K. Duff VIII +26|572–2620|TABF13|Let's Go Fly a Coot|Glenn Close|Mona Simpson +26|572–2620|TABF13|Let's Go Fly a Coot|Carice van Houten|Annika Van Houten +26|573–2621|TABF15|Bull-E|Albert Brooks|Dr. Raufbold +26|573–2621|TABF15|Bull-E|Joe Mantegna|Fat Tony +26|573–2621|TABF15|Bull-E|Johnny Mathis|Himself +26|574–2622|TABF16|Mathlete's Feat|Justin Roiland|Rick Sanchez and Morty Smith +27|575–2701|TABF14|Every Man's Dream|Adam Driver|Adam Sackler +27|575–2701|TABF14|Every Man's Dream|Lena Dunham|Candace; Hannah Helene Horvath +27|575–2701|TABF14|Every Man's Dream|Laura Ingraham|Dr. Zilowitz +27|575–2701|TABF14|Every Man's Dream|Jemima Kirke|Candace's Friend #3 +27|575–2701|TABF14|Every Man's Dream|Zosia Mamet|Candace's Friend #2 +27|575–2701|TABF14|Every Man's Dream|Allison Williams|Candace's Friend #1 +27|576–2702|TABF17|'Cue Detective|Alton Brown|Himself +27|576–2702|TABF17|'Cue Detective|Rex Harrison (archival)|Doctor John Dolittle +27|576–2702|TABF17|'Cue Detective|Bobby Moynihan|Tyler Boom +27|576–2702|TABF17|'Cue Detective|Anthony Newley (archival)|Matthew Mugg +27|576–2702|TABF17|'Cue Detective|Edward James Olmos|Pit Master +27|576–2702|TABF17|'Cue Detective|Ben Schwartz|Clerk +27|577–2703|TABF19|Puffless|Jon Lovitz|Cigarette +27|577–2703|TABF19|Puffless|Yo-Yo Ma|Himself +27|578–2704|TABF22|Halloween of Horror|Blake Anderson|Dickie +27|578–2704|TABF22|Halloween of Horror|Nick Kroll|Lem +27|578–2704|TABF22|Halloween of Horror|Renee Ridgeley|Krustyland Security Guard +27|579–2705|TABF18|Treehouse of Horror XXVI|Kelsey Grammer|Sideshow Bob +27|579–2705|TABF18|Treehouse of Horror XXVI|Chris Wedge|Scrat +27|580–2706|TABF21|Friend With Benefit|Kristen Bell|Harper Jambowski +27|580–2706|TABF21|Friend With Benefit|David Copperfield|Himself +27|584–2710|VABF03|The Girl Code|Stephen Merchant|CONRAD +27|584–2710|VABF03|The Girl Code|Kaitlin Olson|Quinn Hopper +27|585–2711|VABF04|Teenage Mutant Milk-Caused Hurdles|Sofía Vergara|Ms. Carol Berrera +27|586–2712|VABF05|Much Apu About Something|Utkarsh Ambudkar|Jamshed "Jay" Nahasapeemapetilon +27|587–2713|VABF07|Love Is in the N2-O2-Ar-CO2-Ne-He-CH4|Glenn Close|Mona Simpson +27|588–2714|VABF06|Gal of Constant Sorrow|Bob Boilen|Himself +27|588–2714|VABF06|Gal of Constant Sorrow|Kelsey Grammer|Sideshow Bob +27|588–2714|VABF06|Gal of Constant Sorrow|Kate McKinnon|Hettie Mae Boggs +27|588–2714|VABF06|Gal of Constant Sorrow|Natalie Maines|Hettie Mae Boggs' singing voice +27|589–2715|VABF08|Lisa the Veterinarian|Michael York|Dr. Lionel Budgie +27|590–2716|VABF09|The Marge-ian Chronicles|Brian J. Kaufman|Exploration Incorporated Candidate +27|590–2716|VABF09|The Marge-ian Chronicles|Tom Scharpling|Paul +27|590–2716|VABF09|The Marge-ian Chronicles|Jon Wurster|Barry +27|591–2717|VABF10|The Burns Cage|George Takei|Himself +27|592–2718|VABF11|How Lisa Got Her Marge Back|Andrew Rannells|Himself +27|594–2720|VABF14|To Courier with Love|Jay Leno|Himself +27|596–2722|VABF15|Orange Is the New Yellow|Kevin Michael Richardson|Prison Guard +28|597–2801|VABF20|Monty Burns' Fleeing Circus|Amy Schumer|Daphne Burns +28|597–2801|VABF20|Monty Burns' Fleeing Circus|Pendleton Ward|Sings "Simpsons Time" (parody of Adventure Time theme song) +28|598–2802|VABF18|Friends and Family|Allison Janney|Julia +28|599–2803|VABF17|The Town|Bill Burr|Townie; Man in Tunnel; Cart-Warning Man +28|599–2803|VABF17|The Town|Michael Chiklis|Handsome Quarterback +28|599–2803|VABF17|The Town|Rachel Dratch|Bostonian Doctor +28|599–2803|VABF17|The Town|Doris Kearns Goodwin|Herself +28|599–2803|VABF17|The Town|Dana Gould|Murphy +28|599–2803|VABF17|The Town|Mike Mitchell|Jay +28|599–2803|VABF17|The Town|Jason Nash|Southie Criminals ; Southie Philanthropists +28|600–2804|VABF16|Treehouse of Horror XXVII|Drew Carey|Himself +28|600–2804|VABF16|Treehouse of Horror XXVII|Donald Fagen|Himself +28|600–2804|VABF16|Treehouse of Horror XXVII|Kelsey Grammer|Sideshow Bob +28|600–2804|VABF16|Treehouse of Horror XXVII|Maurice LaMarche|Hedonismbot +28|600–2804|VABF16|Treehouse of Horror XXVII|Judith Owen|Sings "600" (parody of the song Goldfinger) +28|600–2804|VABF16|Treehouse of Horror XXVII|Sarah Silverman|Rachel Cohen +28|601–2805|VABF21|Trust But Clarify|Dan Rather|Himself +28|603–2807|VABF19|Havana Wild Weekend|Stacy Keach|Howard K. Duff VIII +28|603–2807|VABF19|Havana Wild Weekend|Deb Lacusta|Isabella +28|604–2808|WABF01|Dad Behavior|Matt Leinart|Himself +28|606–2810|WABF02|The Nightmare After Krustmas|Wayne Gretzky|Himself +28|606–2810|WABF02|The Nightmare After Krustmas|Theo Jansen|Himself +28|606–2810|WABF02|The Nightmare After Krustmas|Natasha Lyonne|Sophie +28|606–2810|WABF02|The Nightmare After Krustmas|Jackie Mason|Rabbi Hyman Krustofsky +28|607–2811|WABF06|Pork and Burns|Joyce Carol Oates|Herself +28|607–2811|WABF06|Pork and Burns|Michael York|Dr. Lionel Budgie +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Charles Barkley|Himself +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Jim Beanz|performing End-Titles music +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|V. Bozeman|Additional vocals +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Charlean Carmon|Additional vocals +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Common|Himself +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Snoop Dogg|Himself +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Taraji P. Henson|Praline +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Keegan-Michael Key|Jazzy James +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Phil LaMarr|Auctioneer; Informed Party Guest +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Dawnn Lewis|Miata; Female Party Guest +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|RZA|Himself +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Sally Stevens|Additional vocals +28|608–2812; 609–2813|WABF04; WABF05|The Great Phatsby|Destiny Torres|"CEO of Obsidian" Female Vocalist; Additional vocals +28|610–2814|WABF07|Fatzcarraldo|Glenn Close|Mona Simpson +28|611–2815|WABF08|The Cad and the Hat|Magnus Carlsen|Himself +28|611–2815|WABF08|The Cad and the Hat|Seth Green|Robot Chicken Nerd +28|611–2815|WABF08|The Cad and the Hat|Patton Oswalt|Bart's Guilt +28|612–2816|WABF09|Kamp Krustier|Lizzy Caplan|Virginia Johnson +28|612–2816|WABF09|Kamp Krustier|Michael Sheen|William Masters +28|613–2817|WABF10|22 for 30|Stephen Curry|Himself +28|613–2817|WABF10|22 for 30|Earl Mann|Narrator (Eddie Muntz) +28|613–2817|WABF10|22 for 30|Joe Mantegna|Fat Tony +28|614–2818|WABF11|A Father's Watch|Vanessa Bayer|Dr. Clarity Hoffman-Roth +28|614–2818|WABF11|A Father's Watch|Brian Posehn|Dumlee +28|614–2818|WABF11|A Father's Watch|Rob Riggle|Dr. Fenton Pooltoy +28|614–2818|WABF11|A Father's Watch|Adam Silver|Himself +28|615–2819|WABF12|The Caper Chase|Jason Alexander|Bourbon Verlander +28|615–2819|WABF12|The Caper Chase|Ken Jennings|Himself +28|615–2819|WABF12|The Caper Chase|Stan Lee|Himself +28|615–2819|WABF12|The Caper Chase|Robert McKee|Himself +28|615–2819|WABF12|The Caper Chase|Suze Orman|Herself +28|615–2819|WABF12|The Caper Chase|Neil deGrasse Tyson|Himself +28|616–2820|WABF13|Looking for Mr. Goodbart|Valerie Harper|Ma-Ma +28|616–2820|WABF13|Looking for Mr. Goodbart|Kipp Lennon|Sings "Small Bang Hypothesis" (parody of The Big Bang Theory theme song) +28|616–2820|WABF13|Looking for Mr. Goodbart|Jennifer Saunders|Phoebe Pratt +28|617–2821|WABF14|Moho House|Valerie Harper|Mrs. Butterworth +28|617–2821|WABF14|Moho House|Michael York|Nigel +28|618–2822|WABF15|Dogtown|Michael York|Dr. Lionel Budgie +29|619–2901|WABF17|The Serfsons|Billy Boyd|Sings "The Perfect Tale" (parody of the song The Last Goodbye) +29|619–2901|WABF17|The Serfsons|Nikolaj Coster-Waldau|Markery +29|620–2902|WABF22|Springfield Splendor|Alison Bechdel|Herself +29|620–2902|WABF22|Springfield Splendor|Rachel Bloom|Annette +29|620–2902|WABF22|Springfield Splendor|Roz Chast|Herself +29|620–2902|WABF22|Springfield Splendor|Dan Harmon|Himself +29|620–2902|WABF22|Springfield Splendor|Kipp Lennon|Sings "Collaboration" (parody of the song Infatuation) +29|620–2902|WABF22|Springfield Splendor|Marjane Satrapi|Herself +29|620–2902|WABF22|Springfield Splendor|Martin Short|Guthrie Frenel +29|621–2903|WABF16|Whistler's Father|Nick Fascitelli|Whistler +29|621–2903|WABF16|Whistler's Father|Valerie Harper|Backstage Mother #2 +29|621–2903|WABF16|Whistler's Father|Joe Mantegna|Fat Tony +29|622–2904|WABF18|Treehouse of Horror XXVIII|Mario Batali|Himself +29|622–2904|WABF18|Treehouse of Horror XXVIII|Ben Daniels|Irish Priest +29|622–2904|WABF18|Treehouse of Horror XXVIII|William Friedkin|Dr. Kenneth Humphries +29|622–2904|WABF18|Treehouse of Horror XXVIII|Neil Gaiman|Snowball II +29|624–2906|WABF20|The Old Blue Mayor She Ain't What She Used to Be|Carolyn Omine|Focus Group Woman +29|626–2908|XABF01|Mr. Lisa's Opus|Kat Dennings|Valerie +29|626–2908|XABF01|Mr. Lisa's Opus|Valerie Harper|Ms. Myles +29|626–2908|XABF01|Mr. Lisa's Opus|Norman Lear|Himself +29|626–2908|XABF01|Mr. Lisa's Opus|Kipp Lennon|Leon Kompowsky +29|626–2908|XABF01|Mr. Lisa's Opus|Jon Lovitz|Artie Ziff +29|627–2909|XABF02|Gone Boy|Kelsey Grammer|Sideshow Bob +29|627–2909|XABF02|Gone Boy|Valerie Harper|Nurse +29|627–2909|XABF02|Gone Boy|John F. Kennedy (archival)|Himself +29|627–2909|XABF02|Gone Boy|Shaquille O'Neal|Himself +29|628–2910|XABF03|Haw-Haw Land|Ed Sheeran|Brendan Beiderbecke +29|629–2911|XABF04|Frink Gets Testy|Valerie Harper|Proctor Jennifer Clarkeson +29|629–2911|XABF04|Frink Gets Testy|Maurice LaMarche|Orson Welles +29|630–2912|XABF05|Homer Is Where the Art Isn't|Bill Hader|Manacek +29|630–2912|XABF05|Homer Is Where the Art Isn't|Cecily Strong|Megan Matheson +29|631–2913|XABF06|3 Scenes Plus a Tag from a Marriage|John Baldessari|Himself +29|631–2913|XABF06|3 Scenes Plus a Tag from a Marriage|Kevin Pollak|Ross; Bagel Man; Professor Thernstrom +29|631–2913|XABF06|3 Scenes Plus a Tag from a Marriage|J. K. Simmons|J.J. Gruff +29|632–2914|XABF08|Fears of a Clown|Andy Daly|Judge Dowd +29|632–2914|XABF08|Fears of a Clown|Damian Kulash|Himself +29|632–2914|XABF08|Fears of a Clown|Dawnn Lewis|Children's Court Bailiff +29|632–2914|XABF08|Fears of a Clown|Jon Lovitz|Llewellyn Sinclair +29|632–2914|XABF08|Fears of a Clown|Jackie Mason|Rabbi Hyman Krustofsky +29|632–2914|XABF08|Fears of a Clown|Tim Nordwind|Himself +29|633–2915|XABF07|No Good Read Goes Unpunished|Daniel Radcliffe|Himself +29|633–2915|XABF07|No Good Read Goes Unpunished|Jimmy O. Yang|Sun Tzu +29|634–2916|XABF10|King Leer|Ray Liotta|Morty Szyslak +29|634–2916|XABF10|King Leer|Debi Mazar|Minnie Szyslak +29|634–2916|XABF10|King Leer|Jonathan Schmock|Johnny Bermuda Salesman +29|635–2917|XABF11|Lisa Gets the Blues|Dawnn Lewis|Co-Pilot; Pat O'Brien's Waitress +29|635–2917|XABF11|Lisa Gets the Blues|Trombone Shorty|Himself +29|636–2918|XABF09|Forgive and Regret|Glenn Close|Mona Simpson +29|636–2918|XABF09|Forgive and Regret|Caitlin Parrott|Sings "Four-Legged Angel" +29|637–2919|XABF12|Left Behind|Michael Dees|Sings "My Funny Valentine" +29|637–2919|XABF12|Left Behind|Marcia Wallace|Edna Krabappel +29|638–2920|XABF13|Throw Grampa from the Dane|Sidse Babett Knudsen|Danish Bar Woman +29|639–2921|XABF14|Flanders' Ladder|Jackie Mason|Rabbi Hyman Krustofsky +30|640–3001|XABF19|Bart's Not Dead|Dave Attell|Luke +30|640–3001|XABF19|Bart's Not Dead|Emily Deschanel|Herself; Herself playing Marge Simpson +30|640–3001|XABF19|Bart's Not Dead|Gal Gadot|Herself; Herself playing Lisa Simpson +30|640–3001|XABF19|Bart's Not Dead|Jonathan Groff|Actor playing Bart Simpson +30|640–3001|XABF19|Bart's Not Dead|Pete Holmes|Matthew +30|641–3002|XABF15|Heartbreak Hotel|Joe Clabby|Curtis +30|641–3002|XABF15|Heartbreak Hotel|Rhys Darby|Tag Tuckerbag +30|641–3002|XABF15|Heartbreak Hotel|Renee Ridgeley|Honey +30|641–3002|XABF15|Heartbreak Hotel|George Segal|Nick +30|642–3003|XABF17|My Way or the Highway to Heaven|H. Jon Benjamin|Bob Belcher +30|642–3003|XABF17|My Way or the Highway to Heaven|Jon Lovitz|Himself +30|642–3003|XABF17|My Way or the Highway to Heaven|Dan Mintz|Tina Belcher +30|642–3003|XABF17|My Way or the Highway to Heaven|Eugene Mirman|Gene Belcher +30|642–3003|XABF17|My Way or the Highway to Heaven|Ted Moock|Sings "Cheek to Cheek" +30|642–3003|XABF17|My Way or the Highway to Heaven|Tracy Morgan|Himself +30|642–3003|XABF17|My Way or the Highway to Heaven|John Roberts|Linda Belcher +30|642–3003|XABF17|My Way or the Highway to Heaven|Kristen Schaal|Louise Belcher +30|644–3005|XABF18|Baby You Can't Drive My Car|Tracy Morgan|Tow Truck Driver +30|645–3006|XABF20|From Russia Without Love|Jon Lovitz|Hacky; Snitchy the Weasel +30|645–3006|XABF20|From Russia Without Love|Ksenia Solo|Anastasia Alekova +30|646–3007|XABF21|Werking Mom|Sutan Amrull|Himself +30|646–3007|XABF21|Werking Mom|RuPaul|Queen Chante +30|646–3007|XABF21|Werking Mom|Scott Thompson|Grady +30|647–3008|XABF22|Krusty the Clown|Billy Eichner|Billy +30|647–3008|XABF22|Krusty the Clown|Peter Serafinowicz|Google-Disney CEO +30|648–3009|YABF01|Daddicus Finch|Phillip Alford (archival)|Jeremy Atticus "Jem" Finch +30|648–3009|YABF01|Daddicus Finch|Mary Badham (archival)|Jean Louise "Scout" Finch +30|648–3009|YABF01|Daddicus Finch|Jon Lovitz|Llewellyn Sinclair; Rabbi +30|648–3009|YABF01|Daddicus Finch|John Megna (archival)|Charles "Dill" Baker Harris +30|648–3009|YABF01|Daddicus Finch|Gregory Peck (archival)|Atticus Finch +30|648–3009|YABF01|Daddicus Finch|J. K. Simmons|Dr. Jessup +30|649–3010|YABF02|'Tis the 30th Season|Jane Lynch|Jeanie +30|650–3011|YABF03|Mad About the Toy|Bryan Batt|Philip Hefflin +30|650–3011|YABF03|Mad About the Toy|Bill de Blasio|Himself +30|650–3011|YABF03|Mad About the Toy|Lawrence O'Donnell|Himself +30|651–3012|YABF04|The Girl on the Bus|Terry Gross|Herself +30|651–3012|YABF04|The Girl on the Bus|Patti LuPone|Cheryl Monroe +30|652–3013|YABF06|I'm Dancing as Fat as I Can|Ted Sarandos|Himself +30|653–3014|YABF05|The Clown Stays in the Picture|Marc Maron|Himself +30|654–3015|YABF07|101 Mitigations|Guillermo del Toro|Himself +30|655–3016|YABF08|I Want You (She's So Heavy)|Wallace Shawn|Wallace the Hernia +30|656–3017|YABF09|E My Sports|Ken Jeong|Korean Monk #1; Korean Monk #2 +30|656–3017|YABF09|E My Sports|Natasha Lyonne|Sophie +30|656–3017|YABF09|E My Sports|David Turley|"Conflict of Enemies" Commentator +30|657–3018|YABF10|Bart vs. Itchy & Scratchy|Awkwafina|Carmen +30|657–3018|YABF10|Bart vs. Itchy & Scratchy|Nicole Byer|Erica +30|657–3018|YABF10|Bart vs. Itchy & Scratchy|Chelsea Peretti|Piper +30|658–3019|YABF11|Girl's in the Band|Dave Matthews|Lloyd the Bartender +30|658–3019|YABF11|Girl's in the Band|J. K. Simmons|Victor Kleskow +30|659–3020|YABF12|I'm Just a Girl Who Can't Say D'oh|Josh Groban|Professor John Frink's singing voice +30|659–3020|YABF12|I'm Just a Girl Who Can't Say D'oh|John Lithgow|Himself +30|659–3020|YABF12|I'm Just a Girl Who Can't Say D'oh|Jon Lovitz|Llewellyn Sinclair +30|659–3020|YABF12|I'm Just a Girl Who Can't Say D'oh|Okilly Dokilly|Themselves performing "White Wine Spritzer" +30|660–3021|YABF14|D'oh Canada|Awkwafina|Dr. Chang +30|660–3021|YABF14|D'oh Canada|Judy Blume|Herself +30|660–3021|YABF14|D'oh Canada|Lucas Meyer|Justin Trudeau +30|661–3022|YABF15|Woo-Hoo Dunnit?|Ken Burns|Himself +30|661–3022|YABF15|Woo-Hoo Dunnit?|Will Forte|King Toot +30|661–3022|YABF15|Woo-Hoo Dunnit?|Jackie Mason|Rabbi Hyman Krustofsky +30|661–3022|YABF15|Woo-Hoo Dunnit?|Liev Schreiber|"Dateline: Springfield" Narrator +30|662–3023|YABF16|Crystal Blue-Haired Persuasion|Illeana Douglas|New Age Clerk +30|662–3023|YABF16|Crystal Blue-Haired Persuasion|Werner Herzog|Walter Hotenhoffer +30|662–3023|YABF16|Crystal Blue-Haired Persuasion|Jenny Slate|Piper Paisley From f3b788a41fe8b3ce0dd3738a4987ebcc28952efd Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Mon, 4 Nov 2019 23:15:12 -0800 Subject: [PATCH 02/64] fix documentation --- R/identify_delim.R | 2 +- R/tt_read_data.R | 2 +- man/download_read.Rd | 6 +++--- man/identify_delim.Rd | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/identify_delim.R b/R/identify_delim.R index 85b32f7..783ec95 100644 --- a/R/identify_delim.R +++ b/R/identify_delim.R @@ -9,7 +9,7 @@ #' @importFrom utils download.file #' -identify_delim <- function(file, +identify_delim <- function(path, delims = c("\t", ",", " ", "|", ";" ), n = 10, comment = "#", diff --git a/R/tt_read_data.R b/R/tt_read_data.R index 0936ae1..2d4d3c0 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -68,7 +68,7 @@ tt_read_url<-function(url){ #' @param find_delim should the delimeters be found for the file #' @importFrom utils download.file #' -download_read<-function(path, func, ..., mode="w", find_delim = FALSE){ +download_read<-function(url, func, ..., mode="w", find_delim = FALSE){ temp_file<-tempfile(fileext = paste0(".",tools::file_ext(path))) utils::download.file(path,temp_file,quiet = TRUE,mode=mode) diff --git a/man/download_read.Rd b/man/download_read.Rd index ff52011..8af753c 100644 --- a/man/download_read.Rd +++ b/man/download_read.Rd @@ -4,9 +4,11 @@ \alias{download_read} \title{utility to assist with 'reading' urls that cannot normally be read by file functions} \usage{ -download_read(path, func, ..., mode = "w", find_delim = FALSE) +download_read(url, func, ..., mode = "w", find_delim = FALSE) } \arguments{ +\item{url}{path to online file to be read} + \item{func}{the function to perform reading of url} \item{...}{args to pass to func} @@ -14,8 +16,6 @@ download_read(path, func, ..., mode = "w", find_delim = FALSE) \item{mode}{mode passed to \code{utils::download.file}. default is "w"} \item{find_delim}{should the delimeters be found for the file} - -\item{url}{path to online file to be read} } \description{ utility to assist with 'reading' urls that cannot normally be read by file functions diff --git a/man/identify_delim.Rd b/man/identify_delim.Rd index 123faf4..6cc626c 100644 --- a/man/identify_delim.Rd +++ b/man/identify_delim.Rd @@ -4,10 +4,12 @@ \alias{identify_delim} \title{Identify potential delimeters of file} \usage{ -identify_delim(file, delims = c("\\t", ",", " ", "|", ";"), n = 10, +identify_delim(path, delims = c("\\t", ",", " ", "|", ";"), n = 10, comment = "#", skip = 0, quote = "\\"") } \arguments{ +\item{path}{path to file} + \item{delims}{a vector of delimeters to try} \item{n}{number of rows to look at in the file to determine the delimters} @@ -17,8 +19,6 @@ identify_delim(file, delims = c("\\t", ",", " ", "|", ";"), n = 10, \item{skip}{number of lines to skip at the beginning} \item{quote}{set of quoting characters} - -\item{path}{path to file} } \description{ Identify potential delimeters of file From 7343e9ddd1557d97e80a16e5ce19cf7ac9f7bf70 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Mon, 4 Nov 2019 23:22:12 -0800 Subject: [PATCH 03/64] update the documentation vs actual code --- R/identify_delim.R | 4 ++-- R/tt_read_data.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/identify_delim.R b/R/identify_delim.R index 783ec95..34ef509 100644 --- a/R/identify_delim.R +++ b/R/identify_delim.R @@ -18,7 +18,7 @@ identify_delim <- function(path, ) { # Load lines of file in - test <- readLines(file, n = n + skip) + test <- readLines(path, n = n + skip) if (skip > 0) { test <- test[-c(seq(skip))] } @@ -26,7 +26,7 @@ identify_delim <- function(path, if(sum(comment_lines) > 0){ eof<- FALSE while((length(test) - sum(comment_lines) < n) & !eof){ - test <- readLines(file, n = n + skip + sum(comment_lines)) + test <- readLines(path, n = n + skip + sum(comment_lines)) if(length(test) < n + skip + sum(comment_lines)){ eof <- TRUE } diff --git a/R/tt_read_data.R b/R/tt_read_data.R index 2d4d3c0..3e9c312 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -70,8 +70,8 @@ tt_read_url<-function(url){ #' download_read<-function(url, func, ..., mode="w", find_delim = FALSE){ - temp_file<-tempfile(fileext = paste0(".",tools::file_ext(path))) - utils::download.file(path,temp_file,quiet = TRUE,mode=mode) + temp_file<-tempfile(fileext = paste0(".",tools::file_ext(url))) + utils::download.file(url,temp_file,quiet = TRUE,mode=mode) dots <- as.list(substitute(substitute(...)))[-1] func_call <- c(substitute(func),substitute(temp_file),dots) From 30e35812b8b72f9aa655d69e0b8801f58a99139e Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Tue, 5 Nov 2019 23:32:50 -0800 Subject: [PATCH 04/64] cannot access data via `$`for some reason. but now it will only load files called out in the readme, resolving issue #20 --- R/identify_delim.R | 32 +++++----- R/tt_datasets.R | 87 ++++++++++++++-------------- R/tt_load.R | 69 +++++++++++++++++----- R/tt_make_url.R | 20 ++++--- R/tt_read_data.R | 85 ++++++++++++++++----------- R/utils.R | 21 +++---- tests/testthat/test-identify_delim.R | 24 ++++---- tests/testthat/test-tt_available.R | 6 +- tests/testthat/test-tt_load_gh.R | 65 ++++++++++++--------- tests/testthat/test-tt_read_data.R | 12 ++-- tests/testthat/test-utils.R | 54 ++++++++++------- 11 files changed, 276 insertions(+), 199 deletions(-) diff --git a/R/identify_delim.R b/R/identify_delim.R index 34ef509..44235fa 100644 --- a/R/identify_delim.R +++ b/R/identify_delim.R @@ -10,12 +10,11 @@ #' identify_delim <- function(path, - delims = c("\t", ",", " ", "|", ";" ), + delims = c("\t", ",", " ", "|", ";"), n = 10, comment = "#", skip = 0, - quote = "\"" - ) { + quote = "\"") { # Load lines of file in test <- readLines(path, n = n + skip) @@ -23,11 +22,11 @@ identify_delim <- function(path, test <- test[-c(seq(skip))] } comment_lines <- grepl("^[#]", test) - if(sum(comment_lines) > 0){ - eof<- FALSE - while((length(test) - sum(comment_lines) < n) & !eof){ + if (sum(comment_lines) > 0) { + eof <- FALSE + while ((length(test) - sum(comment_lines) < n) & !eof) { test <- readLines(path, n = n + skip + sum(comment_lines)) - if(length(test) < n + skip + sum(comment_lines)){ + if (length(test) < n + skip + sum(comment_lines)) { eof <- TRUE } if (skip > 0) { @@ -41,8 +40,8 @@ identify_delim <- function(path, # Attempt splitting on list of delimieters num_splits <- list() for (delim in delims) { - delim_regex <- paste0("[",delim,"](?=(?:[^",quote,"]*",quote,"[^",quote,"]*",quote,")*[^",quote,"]*$)") - num_splits[[delim]] <- do.call("c", lapply(strsplit(test, delim_regex,perl = TRUE), length)) + delim_regex <- paste0("[", delim, "](?=(?:[^", quote, "]*", quote, "[^", quote, "]*", quote, ")*[^", quote, "]*$)") + num_splits[[delim]] <- do.call("c", lapply(strsplit(test, delim_regex, perl = TRUE), length)) } if (all(unlist(num_splits) == 1)) { @@ -57,16 +56,17 @@ identify_delim <- function(path, good_delims <- names(good_delims)[good_delims] - if(length(good_delims)==0){ + if (length(good_delims) == 0) { warning("Not able to detect delimiter for the file. Defaulting to ` `.") return(" ") - }else if(length(good_delims) > 1){ - warning("Detected multiple possible delimeters:", - paste0("`",good_delims,"`",collapse=", "),". Defaulting to ", - paste0("`",good_delims[1],"`"),".") + } else if (length(good_delims) > 1) { + warning( + "Detected multiple possible delimeters:", + paste0("`", good_delims, "`", collapse = ", "), ". Defaulting to ", + paste0("`", good_delims[1], "`"), "." + ) return(good_delims[1]) - }else{ + } else { return(good_delims) } - } diff --git a/R/tt_datasets.R b/R/tt_datasets.R index 8884104..13841d8 100644 --- a/R/tt_datasets.R +++ b/R/tt_datasets.R @@ -12,17 +12,18 @@ tt_available <- function() { ) %>% purrr::set_names(as.character(tt_year[-which.max(tt_year)])) - currDatasets<-tt_datasets()%>% - list()%>% + currDatasets <- tt_datasets() %>% + list() %>% purrr::set_names(as.character(tt_year[which.max(tt_year)])) - datasets<-c( + datasets <- c( currDatasets, pastDatasets - )[tt_year[order(tt_year,decreasing = TRUE)]] + )[tt_year[order(tt_year, decreasing = TRUE)]] structure(datasets, - class=c("tt_dataset_table_list")) + class = c("tt_dataset_table_list") + ) } #' @title Available datasets @@ -51,8 +52,9 @@ tt_datasets <- function(year) { structure( datasets, - ".html"=datasets, - class="tt_dataset_table") + ".html" = datasets, + class = "tt_dataset_table" + ) } #' @title print utility for tt_dataset_table object @@ -62,19 +64,21 @@ tt_datasets <- function(year) { #' @importFrom rstudioapi isAvailable viewer #' @importFrom rvest html_table #' @export -print.tt_dataset_table<-function(x,...,printConsole=FALSE){ - if(rstudioapi::isAvailable() & !printConsole){ - tmpHTML<-setup_doc() - x$html%>% - as.character%>% - purrr::walk(~cat(gsub("href=\"/rfordatascience/tidytuesday/", - "href=\"https://github.com/rfordatascience/tidytuesday/", - .x),file = tmpHTML,append = TRUE)) - cat("
",file = tmpHTML,append = TRUE) - cat("",file = tmpHTML,append = TRUE) +print.tt_dataset_table <- function(x, ..., printConsole = FALSE) { + if (rstudioapi::isAvailable() & !printConsole) { + tmpHTML <- setup_doc() + x$html %>% + as.character() %>% + purrr::walk(~ cat(gsub( + "href=\"/rfordatascience/tidytuesday/", + "href=\"https://github.com/rfordatascience/tidytuesday/", + .x + ), file = tmpHTML, append = TRUE)) + cat("
", file = tmpHTML, append = TRUE) + cat("", file = tmpHTML, append = TRUE) rstudioapi::viewer(url = tmpHTML) - }else{ - attr(x,".html")%>% + } else { + attr(x, ".html") %>% rvest::html_table() } } @@ -86,20 +90,19 @@ print.tt_dataset_table<-function(x,...,printConsole=FALSE){ #' @importFrom rstudioapi isAvailable viewer #' @importFrom rvest html_table #' @export -print.tt_dataset_table_list<-function(x,...,printConsole=FALSE){ - - if(rstudioapi::isAvailable() & !printConsole){ - tmpHTML<-setup_doc() - cat("

TidyTuesday Datasets

",file = tmpHTML,append = TRUE) - names(x)%>% +print.tt_dataset_table_list <- function(x, ..., printConsole = FALSE) { + if (rstudioapi::isAvailable() & !printConsole) { + tmpHTML <- setup_doc() + cat("

TidyTuesday Datasets

", file = tmpHTML, append = TRUE) + names(x) %>% purrr::map( function(.x, x) { - list(html=as.character(attr(x[[.x]],".html")),year=.x) + list(html = as.character(attr(x[[.x]], ".html")), year = .x) }, x = x - )%>% + ) %>% purrr::walk( - ~cat( + ~ cat( paste0( "

", .x$year, @@ -108,28 +111,29 @@ print.tt_dataset_table_list<-function(x,...,printConsole=FALSE){ "href=\"/rfordatascience/tidytuesday/", "href=\"https://github.com/rfordatascience/tidytuesday/", .x$html - ) - ), + ) + ), file = tmpHTML, append = TRUE - ) ) + ) cat("

", file = tmpHTML, append = TRUE) cat("", file = tmpHTML, append = TRUE) rstudioapi::viewer(url = tmpHTML) - }else{ - names(x)%>% + } else { + names(x) %>% purrr::map( function(.x, x) { list( - table=rvest::html_table(attr(x[[.x]],".html")),year=.x) - }, - x=x - )%>% + table = rvest::html_table(attr(x[[.x]], ".html")), year = .x + ) + }, + x = x + ) %>% purrr::walk( function(.x) { - cat(paste0("Year: ",.x$year,"\n")) + cat(paste0("Year: ", .x$year, "\n")) print(.x$table) cat("\n\n") } @@ -137,12 +141,11 @@ print.tt_dataset_table_list<-function(x,...,printConsole=FALSE){ } } -setup_doc<-function(tmpHTML = tempfile(fileext = ".html")){ +setup_doc <- function(tmpHTML = tempfile(fileext = ".html")) { cat(" - ",file=tmpHTML) - cat("
",file = tmpHTML,append = TRUE) + ", file = tmpHTML) + cat("
", file = tmpHTML, append = TRUE) return(tmpHTML) } - diff --git a/R/tt_load.R b/R/tt_load.R index 38d98cd..dd3a582 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -9,16 +9,25 @@ #' @importFrom purrr map #' #' @examples -#' tt_output<-tt_load("2019-01-15") -tt_load<-function(x, week){ +#' tt_output <- tt_load("2019-01-15") +tt_load <- function(x, week) { tt <- tt_load_gh(x, week) - tt_data <- purrr::map(attr(tt,".files"), ~tt_read_data(tt, .x)) + tt_data <- purrr::map(attr(tt, ".files"), ~ tt_read_data(tt, .x)) names(tt_data) <- tools::file_path_sans_ext(attr(tt, ".files")) structure( tt_data, - ".tt"=tt, - class="tt_data") + ".tt" = tt, + class = "tt_data" + ) +} + +#' @title access data in tt_data object +#' @param x tt_data object +#' @param name name of dataset to access +#' @exportMethod +`$.tt_data` <-function(x,name){ + x[[name]] } #' @title Load TidyTuesday data from Github @@ -41,8 +50,8 @@ tt_load<-function(x, week){ #' tt_gh <- tt_load_gh("2019-01-15") #' #' show_readme(tt_gh) -tt_load_gh <- function(x, week){ - if(missing(x)){ +tt_load_gh <- function(x, week) { + if (missing(x)) { on.exit({ print(tt_available()) }) @@ -56,26 +65,54 @@ tt_load_gh <- function(x, week){ readme_html <- tt_gh_page %>% rvest::html_nodes(".Box-body") %>% as.character() + readme_html <- gsub( "href=\"/rfordatascience/tidytuesday/", "href=\"https://github.com/rfordatascience/tidytuesday/", readme_html ) - files <- tt_gh_page %>% + # Find Files + available_files <- tt_gh_page %>% rvest::html_nodes(".files") %>% rvest::html_nodes(".content a") %>% rvest::html_attrs() %>% purrr::map_chr(`[`, "title") - #remove readme or directory paths - files <- files[ !(files %in% "readme.md"|file_path_sans_ext(files ) == files)] + files_to_use <- available_files - structure( - files, - ".files"=files, - ".readme"=readme_html, - ".url"=tt_git_url, - class="tt_gh") + # do not try if we don't have a read me or no files listed + if(length(available_files)>0 & length(readme_html)>0){ + files_in_readme <- readme_html %>% + xml2::read_html() %>% + rvest::html_node("code") %>% + rvest::html_text() %>% + base::strsplit("\\n") %>% + `[[`(1) %>% + purrr::map_chr(function(x){ + file_match<-do.call('c',lapply(available_files,grepl,x)) + if(any(file_match)){ + matched_file <- available_files[file_match] + }else{ + matched_file <- NA + } + return(matched_file) + }) %>% + `[`(!is.na(.)) + if(length(files_in_readme)>0){ + files_to_use<-files_in_readme + } + } + + # remove readme or directory paths + files_to_use <- files_to_use[ !(files_to_use %in% "readme.md" | file_path_sans_ext(files_to_use) == files_to_use)] + + structure( + files_to_use, + ".files" = files_to_use, + ".readme" = readme_html, + ".url" = tt_git_url, + class = "tt_gh" + ) } diff --git a/R/tt_make_url.R b/R/tt_make_url.R index 0dd45e0..336088f 100644 --- a/R/tt_make_url.R +++ b/R/tt_make_url.R @@ -4,13 +4,13 @@ #' @param week left empty unless x is a numeric year entry, in which case the week of interest should be entered #' tt_make_url <- function(x, week) { - if(valid_date(x)){ - tt_make_url.date(x) - }else if(valid_year(x)){ - tt_make_url.year(x, week) - }else{ - stop("Entries must render to a valid date or year") - } + if (valid_date(x)) { + tt_make_url.date(x) + } else if (valid_year(x)) { + tt_make_url.year(x, week) + } else { + stop("Entries must render to a valid date or year") + } } tt_make_url.date <- function(x) { @@ -68,8 +68,10 @@ valid_date <- function(x) { }) } -valid_year <- function(x){ - suppressWarnings({!is.na(lubridate::as_date(paste0(as.character(x),"-01-01")))}) +valid_year <- function(x) { + suppressWarnings({ + !is.na(lubridate::as_date(paste0(as.character(x), "-01-01"))) + }) } #' @importFrom lubridate year month day ymd diff --git a/R/tt_read_data.R b/R/tt_read_data.R index 3e9c312..6df804a 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -14,48 +14,53 @@ #' @family tt_read_data #' #' @examples -#' tt_gh<-tt_load_gh("2019-01-15") +#' tt_gh <- tt_load_gh("2019-01-15") #' -#' tt_dataset_1<-tt_read_data(tt_gh,tt_gh[1]) -tt_read_data<-function(tt,x){ - suppressMessages({switch (class(x), - "character" = tt_read_data.character(tt,x), - "numeric" = tt_read_data.numeric(tt,x), - "integer" = tt_read_data.numeric(tt,x), - stop(paste("No method for entry of class:",class(x))) - )}) - +#' tt_dataset_1 <- tt_read_data(tt_gh, tt_gh[1]) +tt_read_data <- function(tt, x) { + suppressMessages({ + switch(class(x), + "character" = tt_read_data.character(tt, x), + "numeric" = tt_read_data.numeric(tt, x), + "integer" = tt_read_data.numeric(tt, x), + stop(paste("No method for entry of class:", class(x))) + ) + }) } -tt_read_data.character <- function(tt, x){ - if ( x%in%attr(tt,".files")){ - url<-paste0(gsub("tree","blob",file.path(attr(tt,".url"),x)),"?raw=true") +tt_read_data.character <- function(tt, x) { + if (x %in% attr(tt, ".files")) { + url <- paste0(gsub("tree", "blob", file.path(attr(tt, ".url"), x)), "?raw=true") tt_read_url(url) } else { stop(paste0( "That is not an available file for this TidyTuesday week!\nAvailable Datasets:\n", - paste(attr(tt,".files"),"\n\t",collapse=""))) + paste(attr(tt, ".files"), "\n\t", collapse = "") + )) } } -tt_read_data.numeric <- function(tt, x){ - if( x>0 & x <= length(attr(tt,".files")) ) { - url<-paste0(gsub("tree","blob",file.path(attr(tt,".url"),attr(tt,".files")[x])),"?raw=true") +tt_read_data.numeric <- function(tt, x) { + if (x > 0 & x <= length(attr(tt, ".files"))) { + url <- paste0(gsub("tree", "blob", file.path(attr(tt, ".url"), attr(tt, ".files")[x])), "?raw=true") tt_read_url(url) - }else{ + } else { stop(paste0( "That is not an available index for the files for this TidyTuesday week!\nAvailable Datasets:\n\t", - paste0(seq(1,length(attr(tt,".files"))),": ",attr(tt,".files"),"\n\t",collapse=""))) + paste0(seq(1, length(attr(tt, ".files"))), ": ", attr(tt, ".files"), "\n\t", collapse = "") + )) } } -tt_read_url<-function(url){ - url<-gsub(" ","%20",url) - switch(tools::file_ext(gsub("[?]raw=true","",url)), - "xls"=download_read(url,readxl::read_xls,mode="wb"), - "xlsx"=download_read(url,readxl::read_xlsx,mode="wb"), - download_read(url,readr::read_delim,guess_max = 21474836,progress = FALSE,find_delim = TRUE) +tt_read_url <- function(url) { + url <- gsub(" ", "%20", url) + switch(tools::file_ext(gsub("[?]raw=true", "", tolower(url))), + "xls" = download_read(url, readxl::read_xls, mode = "wb"), + "xlsx" = download_read(url, readxl::read_xlsx, mode = "wb"), + "rds" = download_read(url, readRDS, mode = "wb"), + "rda" = download_read(url, read_rda, mode = "wb"), + download_read(url, readr::read_delim, guess_max = 21474836, progress = FALSE, find_delim = TRUE) ) } @@ -68,22 +73,32 @@ tt_read_url<-function(url){ #' @param find_delim should the delimeters be found for the file #' @importFrom utils download.file #' -download_read<-function(url, func, ..., mode="w", find_delim = FALSE){ - - temp_file<-tempfile(fileext = paste0(".",tools::file_ext(url))) - utils::download.file(url,temp_file,quiet = TRUE,mode=mode) +download_read <- function(url, func, ..., mode = "w", find_delim = FALSE) { + temp_file <- tempfile(fileext = paste0(".", tools::file_ext(url))) + utils::download.file(url, temp_file, quiet = TRUE, mode = mode) dots <- as.list(substitute(substitute(...)))[-1] - func_call <- c(substitute(func),substitute(temp_file),dots) + func_call <- c(substitute(func), substitute(temp_file), dots) - if(find_delim){ - if(!(!is.null(names(func_call)) & - "delim"%in%names(func_call)) & - "delim" %in% names(as.list(args(func)))){ + if (find_delim) { + if (!(!is.null(names(func_call)) & + "delim" %in% names(func_call)) & + "delim" %in% names(as.list(args(func)))) { func_call$delim <- identify_delim(temp_file) } } - eval(as.call(func_call)) + return(eval(as.call(func_call))) } +#' @title utility to load RDA with out using assigned name in envir +#' +#' @param path path to RDA file +#' @importFrom base load +#' @importFrom base new.env +# +read_rda <- function(path){ + load_env<-new.env() + load(path,envir = load_env) + load_env[[ls(load_env)[1]]] +} diff --git a/R/utils.R b/R/utils.R index be70c6f..2dc6c59 100644 --- a/R/utils.R +++ b/R/utils.R @@ -5,19 +5,19 @@ #' @export print.tt_data <- function(x, ...) { readme(x) - message("Available Datasets:\n\t",paste(tools::file_path_sans_ext(names(x)),"\n\t",collapse="")) + message("Available Datasets:\n\t", paste(tools::file_path_sans_ext(names(x)), "\n\t", collapse = "")) } #' @title Readme HTML maker and Viewer #' @param tt tt_data object for printing #' @importFrom rstudioapi viewer -readme <- function(tt){ - if ("tt_data"%in%class(tt )) { - tt<-attr(tt,".tt") +readme <- function(tt) { + if ("tt_data" %in% class(tt)) { + tt <- attr(tt, ".tt") } - if ( length(attr(tt,".readme"))>0 ) { - #if running in rstudio, print out that - if(rstudioapi::isAvailable()){ + if (length(attr(tt, ".readme")) > 0) { + # if running in rstudio, print out that + if (rstudioapi::isAvailable()) { rstudioapi::viewer(url = tt_make_html(tt)) } } @@ -37,8 +37,9 @@ tt_make_html <- function(x) { "", "", - ""),file=tmpHTML,sep = " ") - cat(attr(x,".readme"),file = tmpHTML,append = TRUE) - cat("",file = tmpHTML,append = TRUE) + "" + ), file = tmpHTML, sep = " ") + cat(attr(x, ".readme"), file = tmpHTML, append = TRUE) + cat("", file = tmpHTML, append = TRUE) return(tmpHTML) } diff --git a/tests/testthat/test-identify_delim.R b/tests/testthat/test-identify_delim.R index d217889..50cbae7 100644 --- a/tests/testthat/test-identify_delim.R +++ b/tests/testthat/test-identify_delim.R @@ -2,30 +2,30 @@ context("test-identify_delim") test_that("Correctly identify the delimeter", { delim_file <- tempfile() - writeLines(c("test,the,delim","this,is,a comma"),delim_file) - expect_equal(identify_delim(delim_file),",") - }) + writeLines(c("test,the,delim", "this,is,a comma"), delim_file) + expect_equal(identify_delim(delim_file), ",") +}) test_that("If multiple possible delimeter exist, pick the `simplest` one", { delim_file <- tempfile() - writeLines(c("test\t,the\t,delim","this\t,is\t,a twofer"),delim_file) + writeLines(c("test\t,the\t,delim", "this\t,is\t,a twofer"), delim_file) expect_warning( identify_delim(delim_file), "Detected multiple possible delimeters:" - ) + ) suppressWarnings({ expect_equal( identify_delim(delim_file), "\t" - ) - }) + ) + }) }) test_that("If unable to identify a delimeter, give a warning", { delim_file <- tempfile() - writeLines(c("test\tthe\tdelim","this,is|a twofer"),delim_file) - expect_warning(identify_delim(delim_file),"Not able to detect delimiter for") + writeLines(c("test\tthe\tdelim", "this,is|a twofer"), delim_file) + expect_warning(identify_delim(delim_file), "Not able to detect delimiter for") suppressWarnings({ expect_equal( identify_delim(delim_file), @@ -36,7 +36,7 @@ test_that("If unable to identify a delimeter, give a warning", { test_that("Can skip lines with comments to find delimeters, or ones identified to skip", { delim_file <- tempfile() - writeLines(c("#this,line|isskipped","test,the,delim","this,is,a comma"),delim_file) - expect_equal(identify_delim(delim_file),",") - expect_equal(identify_delim(delim_file,skip = 1),",") + writeLines(c("#this,line|isskipped", "test,the,delim", "this,is,a comma"), delim_file) + expect_equal(identify_delim(delim_file), ",") + expect_equal(identify_delim(delim_file, skip = 1), ",") }) diff --git a/tests/testthat/test-tt_available.R b/tests/testthat/test-tt_available.R index 9cfbffa..8153b5c 100644 --- a/tests/testthat/test-tt_available.R +++ b/tests/testthat/test-tt_available.R @@ -13,7 +13,7 @@ test_that("tt_available returns all years", { test_that("tt_datasets prints to console when rstudio viewer is not available", { - ds<-tt_datasets(2018) - consoleOutput<-print(ds,printConsole=TRUE) - testthat::expect_equivalent(attr(ds,".html")%>%rvest::html_table(),consoleOutput) + ds <- tt_datasets(2018) + consoleOutput <- print(ds, printConsole = TRUE) + testthat::expect_equivalent(attr(ds, ".html") %>% rvest::html_table(), consoleOutput) }) diff --git a/tests/testthat/test-tt_load_gh.R b/tests/testthat/test-tt_load_gh.R index cf91a46..a2182e7 100644 --- a/tests/testthat/test-tt_load_gh.R +++ b/tests/testthat/test-tt_load_gh.R @@ -2,35 +2,37 @@ context("test-tt_load_gh") # check that correct data are returned test_that("tt_load_gh returns tt_gh object when provided proper date", { - tt_gh<-tt_load_gh("2019-01-15") + tt_gh <- tt_load_gh("2019-01-15") - testthat::expect_s3_class(tt_gh,"tt_gh") - testthat::expect_equal(attr(tt_gh,".files"),c("agencies.csv","launches.csv")) - testthat::expect_equal(attr(tt_gh,".url"),"https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") + testthat::expect_s3_class(tt_gh, "tt_gh") + testthat::expect_equal(attr(tt_gh, ".files"), c("agencies.csv", "launches.csv")) + testthat::expect_equal(attr(tt_gh, ".url"), "https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") }) -#check that correct data are returned +# check that correct data are returned test_that("tt_load_gh returns tt_gh object when provided proper year and TT week number", { + tt_gh <- tt_load_gh(2019, 3) - tt_gh<-tt_load_gh(2019,3) - - testthat::expect_s3_class(tt_gh,"tt_gh") - testthat::expect_equal(attr(tt_gh,".files"),c("agencies.csv","launches.csv")) - testthat::expect_equal(attr(tt_gh,".url"),"https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") + testthat::expect_s3_class(tt_gh, "tt_gh") + testthat::expect_equal(attr(tt_gh, ".files"), c("agencies.csv", "launches.csv")) + testthat::expect_equal(attr(tt_gh, ".url"), "https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") }) -#check that errors are returned +# check that errors are returned test_that("tt_load_gh returns error when incorrect date", { - nullout <- capture.output({testthat::expect_error(tt_load_gh("2019-01-16"),"is not a date that has TidyTuesday data")}) + nullout <- capture.output({ + testthat::expect_error(tt_load_gh("2019-01-16"), "is not a date that has TidyTuesday data") + }) }) test_that("tt_load_gh returns error when incorrect years or week number entries", { - testthat::expect_error(tt_load_gh(2018,92),"Please enter a value for week between 1") - testthat::expect_error(tt_load_gh(2017,92),"TidyTuesday did not exist for") + testthat::expect_error(tt_load_gh(2018, 92), "Please enter a value for week between 1") + testthat::expect_error(tt_load_gh(2017, 92), "TidyTuesday did not exist for") }) test_that( - "tt_load_gh returns error when incorrect years or week number entries", { + "tt_load_gh returns error when incorrect years or week number entries", + { expect_error( tt_load_gh(2018, 92), "Please enter a value for week between 1" @@ -54,28 +56,35 @@ test_that("tt_load_gh returns error when incorrect years or week number entries" }) test_that("tt_load_gh returns error when nothing is entered", { - nullout <- capture.output({testthat::expect_error(tt_load_gh(),"Enter either the year or date of the TidyTuesday Data")}) + nullout <- capture.output({ + testthat::expect_error(tt_load_gh(), "Enter either the year or date of the TidyTuesday Data") + }) }) test_that("tt_load_gh returns error when week is not a valid entry between 1 and n weeks", { - testthat::expect_error(tt_load_gh(2019,0), - "Week entry must be a valid positive integer") + testthat::expect_error( + tt_load_gh(2019, 0), + "Week entry must be a valid positive integer" + ) }) # test driven dev, new feature to add test_that("Returns simple list of object when no readme.md available", { - tt_gh<-tt_load_gh("2018-04-09") - expect_s3_class(tt_gh,"tt_gh") - expect_true(length(attr(tt_gh,".readme"))==0) #object should not exist + tt_gh <- tt_load_gh("2018-04-09") + expect_s3_class(tt_gh, "tt_gh") + expect_true(length(attr(tt_gh, ".readme")) == 0) # object should not exist }) -test_that("tt_load loads all data available",{ - tt_obj<-tt_load("2019-01-15") - expect_equal(tt_obj$agencies, - readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-15/agencies.csv")) - expect_equal(tt_obj$launches, - readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-15/launches.csv")) - +test_that("tt_load loads all data available", { + tt_obj <- tt_load("2019-01-15") + expect_equal( + tt_obj$agencies, + readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-15/agencies.csv") + ) + expect_equal( + tt_obj$launches, + readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-15/launches.csv") + ) }) test_that("tt_load loads excel files properly", { diff --git a/tests/testthat/test-tt_read_data.R b/tests/testthat/test-tt_read_data.R index 1d4ff02..16a40f1 100644 --- a/tests/testthat/test-tt_read_data.R +++ b/tests/testthat/test-tt_read_data.R @@ -7,14 +7,14 @@ test_that("tt_read_data only works for numeric,integer, or character entries", { integerRead <- tt_read_data(tt_gh_data, 1L) characterRead <- tt_read_data(tt_gh_data, "agencies.csv") - numericRead<-tt_read_data(tt_gh_data,1) - integerRead<-tt_read_data(tt_gh_data,1L) - characterRead<-tt_read_data(tt_gh_data,'agencies.csv') + numericRead <- tt_read_data(tt_gh_data, 1) + integerRead <- tt_read_data(tt_gh_data, 1L) + characterRead <- tt_read_data(tt_gh_data, "agencies.csv") - url<-paste0( - gsub("tree","blob",file.path(attr(tt_gh_data,".url"),"agencies.csv")), + url <- paste0( + gsub("tree", "blob", file.path(attr(tt_gh_data, ".url"), "agencies.csv")), "?raw=true" - ) + ) readURL <- read_csv(url) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 0d95580..617b864 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,39 +1,49 @@ context("test-utils") -tt_data<-structure( - list(value1="value1", - value2="value2"), - .tt=structure( - c("value1.csv","value2.csv"), - .files=c("value1.csv","value2.csv"), +tt_data <- structure( + list( + value1 = "value1", + value2 = "value2" + ), + .tt = structure( + c("value1.csv", "value2.csv"), + .files = c("value1.csv", "value2.csv"), .url = "fake_url", .readme = "

README contents

", - class = "tt_gh"), - class="tt_data") + class = "tt_gh" + ), + class = "tt_data" +) test_that("tt_make_html generates a properly formatted html doc", { - enteredValues<-read_html(tt_make_html(attr(tt_data,".tt")))%>% - html_nodes(".contents")%>% - as.character - testthat::expect_equal(enteredValues,"

README contents

") + enteredValues <- read_html(tt_make_html(attr(tt_data, ".tt"))) %>% + html_nodes(".contents") %>% + as.character() + testthat::expect_equal(enteredValues, "

README contents

") }) test_that("print.tt_data lists the available datasets", { - tt_data<-structure( - list(value1="value1", - value2="value2"), - .tt=structure( - c("value1.csv","value2.csv"), - .files=c("value1.csv","value2.csv"), + tt_data <- structure( + list( + value1 = "value1", + value2 = "value2" + ), + .tt = structure( + c("value1.csv", "value2.csv"), + .files = c("value1.csv", "value2.csv"), .url = "fake_url", .readme = "README", - class = "tt_gh"), - class="tt_data") + class = "tt_gh" + ), + class = "tt_data" + ) - capturedOutput<-capture_message({print(tt_data)}) + capturedOutput <- capture_message({ + print(tt_data) + }) testthat::expect_equal( capturedOutput$message, "Available Datasets:\n\tvalue1 \n\tvalue2 \n\t\n" - ) + ) }) From 19f4c5aa7eefa8aab2eb1b3770b94f3fc23ab608 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 6 Nov 2019 15:45:21 -0800 Subject: [PATCH 05/64] drop show_readme function - redundant to readme function --- R/utils.R | 7 ------- man/show_readme.Rd | 14 -------------- 2 files changed, 21 deletions(-) delete mode 100644 man/show_readme.Rd diff --git a/R/utils.R b/R/utils.R index 2dc6c59..c426d4f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -23,13 +23,6 @@ readme <- function(tt) { } } -#' @title Print Readme to RStudio HTML Viewer -#' @param tt tt_data object for printing -#' @export -show_readme <- function(tt) { - readme(tt) -} - tt_make_html <- function(x) { tmpHTML <- tempfile(fileext = ".html") cat(c( diff --git a/man/show_readme.Rd b/man/show_readme.Rd deleted file mode 100644 index 203edb1..0000000 --- a/man/show_readme.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{show_readme} -\alias{show_readme} -\title{Print Readme to RStudio HTML Viewer} -\usage{ -show_readme(tt) -} -\arguments{ -\item{tt}{tt_data object for printing} -} -\description{ -Print Readme to RStudio HTML Viewer -} From 748ab5359b8f698dcfb90aa7a0cde8a665e055ef Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 6 Nov 2019 15:45:40 -0800 Subject: [PATCH 06/64] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index c779f55..fb666af 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidytuesdayR Type: Package Title: Access The Weekly TidyTuesday Project Dataset -Version: 0.2.0 +Version: 0.2.1 Authors@R: c( person( From 9f9031a3f7bb2b73003b0a5124e6cfedc5ea377a Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 6 Nov 2019 16:07:41 -0800 Subject: [PATCH 07/64] improve read speed, and add user ability to pass args to tt_read_data from tt_load --- NAMESPACE | 1 - R/tt_load.R | 19 ++++++++-------- R/tt_read_data.R | 35 ++++++++++++++++------------- man/download_read.Rd | 3 ++- man/read_rda.Rd | 14 ++++++++++++ man/tt_load.Rd | 6 +++-- man/tt_read_data.Rd | 8 ++++--- tests/testthat/test-tt_read_data.R | 19 ++++++++++++++++ tests/testthat/testfiles/test.rda | Bin 0 -> 183 bytes 9 files changed, 73 insertions(+), 32 deletions(-) create mode 100644 man/read_rda.Rd create mode 100644 tests/testthat/testfiles/test.rda diff --git a/NAMESPACE b/NAMESPACE index 71502e8..78d1db5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,6 @@ S3method(print,tt_data) S3method(print,tt_dataset_table) S3method(print,tt_dataset_table_list) -export(show_readme) export(tt_available) export(tt_datasets) export(tt_load) diff --git a/R/tt_load.R b/R/tt_load.R index dd3a582..d7241a9 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -2,6 +2,7 @@ #' #' @param x string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year #' @param week left empty unless x is a numeric year entry, in which case the week of interest should be entered +#' @param ... pass methods to the parsing functions. These will be passed to ALL files, so be careful. #' @return tt_data object (list class) #' #' @export @@ -10,9 +11,9 @@ #' #' @examples #' tt_output <- tt_load("2019-01-15") -tt_load <- function(x, week) { +tt_load <- function(x, week, ...) { tt <- tt_load_gh(x, week) - tt_data <- purrr::map(attr(tt, ".files"), ~ tt_read_data(tt, .x)) + tt_data <- purrr::map(attr(tt, ".files"), function(x) tt_read_data(tt, x, ... )) names(tt_data) <- tools::file_path_sans_ext(attr(tt, ".files")) structure( @@ -22,13 +23,13 @@ tt_load <- function(x, week) { ) } -#' @title access data in tt_data object -#' @param x tt_data object -#' @param name name of dataset to access -#' @exportMethod -`$.tt_data` <-function(x,name){ - x[[name]] -} +# #' @title access data in tt_data object +# #' @param x tt_data object +# #' @param name name of dataset to access +# #' @exportMethod `$` +# `$.tt_data` <-function(x,name){ +# x[[ name ]] +# } #' @title Load TidyTuesday data from Github #' diff --git a/R/tt_read_data.R b/R/tt_read_data.R index 6df804a..0eb577b 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -4,6 +4,7 @@ #' #' @param tt tt_gh object from tt_load_gh function #' @param x index/name of data object to read in. string or int +#' @param guess_max number of rows to use to esimate col type, defaults to 5000. Only used for text files. #' @return tibble #' @export #' @@ -17,21 +18,21 @@ #' tt_gh <- tt_load_gh("2019-01-15") #' #' tt_dataset_1 <- tt_read_data(tt_gh, tt_gh[1]) -tt_read_data <- function(tt, x) { +tt_read_data <- function(tt, x, guess_max = 5000) { suppressMessages({ switch(class(x), - "character" = tt_read_data.character(tt, x), - "numeric" = tt_read_data.numeric(tt, x), - "integer" = tt_read_data.numeric(tt, x), + "character" = tt_read_data.character(tt, x, guess_max = guess_max ), + "numeric" = tt_read_data.numeric(tt, x, guess_max = guess_max ), + "integer" = tt_read_data.numeric(tt, x, guess_max = guess_max ), stop(paste("No method for entry of class:", class(x))) ) }) } -tt_read_data.character <- function(tt, x) { +tt_read_data.character <- function(tt, x, guess_max = 5000) { if (x %in% attr(tt, ".files")) { url <- paste0(gsub("tree", "blob", file.path(attr(tt, ".url"), x)), "?raw=true") - tt_read_url(url) + tt_read_url(url, guess_max = guess_max) } else { stop(paste0( "That is not an available file for this TidyTuesday week!\nAvailable Datasets:\n", @@ -40,10 +41,10 @@ tt_read_data.character <- function(tt, x) { } } -tt_read_data.numeric <- function(tt, x) { +tt_read_data.numeric <- function(tt, x, guess_max = 5000) { if (x > 0 & x <= length(attr(tt, ".files"))) { url <- paste0(gsub("tree", "blob", file.path(attr(tt, ".url"), attr(tt, ".files")[x])), "?raw=true") - tt_read_url(url) + tt_read_url(url, guess_max = guess_max) } else { stop(paste0( "That is not an available index for the files for this TidyTuesday week!\nAvailable Datasets:\n\t", @@ -53,14 +54,14 @@ tt_read_data.numeric <- function(tt, x) { } -tt_read_url <- function(url) { +tt_read_url <- function(url, guess_max = 5000) { url <- gsub(" ", "%20", url) switch(tools::file_ext(gsub("[?]raw=true", "", tolower(url))), - "xls" = download_read(url, readxl::read_xls, mode = "wb"), - "xlsx" = download_read(url, readxl::read_xlsx, mode = "wb"), + "xls" = download_read(url, readxl::read_xls, guess_max = guess_max, mode = "wb"), + "xlsx" = download_read(url, readxl::read_xlsx, guess_max = guess_max, mode = "wb"), "rds" = download_read(url, readRDS, mode = "wb"), "rda" = download_read(url, read_rda, mode = "wb"), - download_read(url, readr::read_delim, guess_max = 21474836, progress = FALSE, find_delim = TRUE) + download_read(url, readr::read_delim, guess_max = guess_max, progress = FALSE, find_delim = TRUE) ) } @@ -73,7 +74,7 @@ tt_read_url <- function(url) { #' @param find_delim should the delimeters be found for the file #' @importFrom utils download.file #' -download_read <- function(url, func, ..., mode = "w", find_delim = FALSE) { +download_read <- function(url, func, ..., guess_max, mode = "w", find_delim = FALSE) { temp_file <- tempfile(fileext = paste0(".", tools::file_ext(url))) utils::download.file(url, temp_file, quiet = TRUE, mode = mode) @@ -88,17 +89,19 @@ download_read <- function(url, func, ..., mode = "w", find_delim = FALSE) { } } + if("guess_max"%in%names(as.list(args(func)))){ + func_call$guess_max = guess_max + } + return(eval(as.call(func_call))) } #' @title utility to load RDA with out using assigned name in envir #' #' @param path path to RDA file -#' @importFrom base load -#' @importFrom base new.env # read_rda <- function(path){ load_env<-new.env() load(path,envir = load_env) - load_env[[ls(load_env)[1]]] + load_env[[ ls(load_env)[1] ]] } diff --git a/man/download_read.Rd b/man/download_read.Rd index 8af753c..0686f64 100644 --- a/man/download_read.Rd +++ b/man/download_read.Rd @@ -4,7 +4,8 @@ \alias{download_read} \title{utility to assist with 'reading' urls that cannot normally be read by file functions} \usage{ -download_read(url, func, ..., mode = "w", find_delim = FALSE) +download_read(url, func, ..., guess_max, mode = "w", + find_delim = FALSE) } \arguments{ \item{url}{path to online file to be read} diff --git a/man/read_rda.Rd b/man/read_rda.Rd new file mode 100644 index 0000000..28559bf --- /dev/null +++ b/man/read_rda.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_read_data.R +\name{read_rda} +\alias{read_rda} +\title{utility to load RDA with out using assigned name in envir} +\usage{ +read_rda(path) +} +\arguments{ +\item{path}{path to RDA file} +} +\description{ +utility to load RDA with out using assigned name in envir +} diff --git a/man/tt_load.Rd b/man/tt_load.Rd index fa1763d..da79e61 100644 --- a/man/tt_load.Rd +++ b/man/tt_load.Rd @@ -4,12 +4,14 @@ \alias{tt_load} \title{Load TidyTuesday data from Github} \usage{ -tt_load(x, week) +tt_load(x, week, ...) } \arguments{ \item{x}{string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year} \item{week}{left empty unless x is a numeric year entry, in which case the week of interest should be entered} + +\item{...}{pass methods to the parsing functions. These will be passed to ALL files, so be careful.} } \value{ tt_data object (list class) @@ -18,5 +20,5 @@ tt_data object (list class) Load TidyTuesday data from Github } \examples{ -tt_output<-tt_load("2019-01-15") +tt_output <- tt_load("2019-01-15") } diff --git a/man/tt_read_data.Rd b/man/tt_read_data.Rd index 8da20ad..9c73b5b 100644 --- a/man/tt_read_data.Rd +++ b/man/tt_read_data.Rd @@ -4,12 +4,14 @@ \alias{tt_read_data} \title{Reads in TidyTuesday datasets from Github repo} \usage{ -tt_read_data(tt, x) +tt_read_data(tt, x, guess_max = 5000) } \arguments{ \item{tt}{tt_gh object from tt_load_gh function} \item{x}{index/name of data object to read in. string or int} + +\item{guess_max}{number of rows to use to esimate col type, defaults to 5000. Only used for text files.} } \value{ tibble @@ -18,8 +20,8 @@ tibble Reads in the actual data from the TidyTuesday github } \examples{ -tt_gh<-tt_load_gh("2019-01-15") +tt_gh <- tt_load_gh("2019-01-15") -tt_dataset_1<-tt_read_data(tt_gh,tt_gh[1]) +tt_dataset_1 <- tt_read_data(tt_gh, tt_gh[1]) } \concept{tt_read_data} diff --git a/tests/testthat/test-tt_read_data.R b/tests/testthat/test-tt_read_data.R index 16a40f1..8ce71dd 100644 --- a/tests/testthat/test-tt_read_data.R +++ b/tests/testthat/test-tt_read_data.R @@ -53,3 +53,22 @@ test_that("tt_read_data informs when selection is out of range/not available", { "That is not an available index" ) }) + + +test_that("tt_read_data can load RDS files just as easily as text files",{ + tt_gh_data <- tt_load_gh("2019-01-01") + + expect_is( + tt_read_data(tt_gh_data, 1), + c("tbl_df","tbl","data.frame") + ) + +}) + + +test_that("read_rda will not arbitrarily assign the object to the current environment",{ + new_dataset<-read_rda("testfiles/test.rda") + expect_false(exists("testdf")) + expect_equal(data.frame(x=c(1,2,3),y=c("A","B","C")), + new_dataset) +}) diff --git a/tests/testthat/testfiles/test.rda b/tests/testthat/testfiles/test.rda new file mode 100644 index 0000000000000000000000000000000000000000..439eba803b320eb3bba151dc97e01c209cd59b8f GIT binary patch literal 183 zcmV;o07(BIiwFP!0000023?O!4uUWchJRWxLZUJ89xS|o&l9*}+dx?%B+&TSd1Nmj zb!ZtEZPJ-9?LS}B>!sLbMFv1XCIUhd#)3+O7%BCI(SxaKKyFMr(RbYpL>qVOPopEliQya!CS{`rp8MNsRhq6R8pC+2ZkpqQrB8;Jk4qZ} lvq~A2*F6mf!nE%mIZLNsiZ>N&?bpAK@dvKbOOsCl002UoO^^Tp literal 0 HcmV?d00001 From 680f413835229fc3d9c1b4ebafc8d722fa414b01 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 6 Nov 2019 16:33:48 -0800 Subject: [PATCH 08/64] resolve build check errors, make readme exported... --- NAMESPACE | 1 + R/tt_load.R | 7 ++++--- R/tt_read_data.R | 1 + R/utils.R | 1 + man/download_read.Rd | 2 ++ man/tt_load_gh.Rd | 2 +- 6 files changed, 10 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 78d1db5..98e06ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(print,tt_data) S3method(print,tt_dataset_table) S3method(print,tt_dataset_table_list) +export(readme) export(tt_available) export(tt_datasets) export(tt_load) diff --git a/R/tt_load.R b/R/tt_load.R index d7241a9..159fc35 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -50,7 +50,7 @@ tt_load <- function(x, week, ...) { #' @examples #' tt_gh <- tt_load_gh("2019-01-15") #' -#' show_readme(tt_gh) +#' readme(tt_gh) tt_load_gh <- function(x, week) { if (missing(x)) { on.exit({ @@ -98,8 +98,9 @@ tt_load_gh <- function(x, week) { matched_file <- NA } return(matched_file) - }) %>% - `[`(!is.na(.)) + }) + + files_in_readme<- files_in_readme[!is.na(files_in_readme)] if(length(files_in_readme)>0){ files_to_use<-files_in_readme diff --git a/R/tt_read_data.R b/R/tt_read_data.R index 0eb577b..0a3c7b9 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -70,6 +70,7 @@ tt_read_url <- function(url, guess_max = 5000) { #' @param url path to online file to be read #' @param func the function to perform reading of url #' @param ... args to pass to func +#' @param guess_max number of rows to use to predict column type. Only used if is an arg in `func` #' @param mode mode passed to \code{utils::download.file}. default is "w" #' @param find_delim should the delimeters be found for the file #' @importFrom utils download.file diff --git a/R/utils.R b/R/utils.R index c426d4f..8121fbb 100644 --- a/R/utils.R +++ b/R/utils.R @@ -11,6 +11,7 @@ print.tt_data <- function(x, ...) { #' @title Readme HTML maker and Viewer #' @param tt tt_data object for printing #' @importFrom rstudioapi viewer +#' @export readme <- function(tt) { if ("tt_data" %in% class(tt)) { tt <- attr(tt, ".tt") diff --git a/man/download_read.Rd b/man/download_read.Rd index 0686f64..37b595c 100644 --- a/man/download_read.Rd +++ b/man/download_read.Rd @@ -14,6 +14,8 @@ download_read(url, func, ..., guess_max, mode = "w", \item{...}{args to pass to func} +\item{guess_max}{number of rows to use to predict column type. Only used if is an arg in `func`} + \item{mode}{mode passed to \code{utils::download.file}. default is "w"} \item{find_delim}{should the delimeters be found for the file} diff --git a/man/tt_load_gh.Rd b/man/tt_load_gh.Rd index 5be2ceb..0bc1f24 100644 --- a/man/tt_load_gh.Rd +++ b/man/tt_load_gh.Rd @@ -20,5 +20,5 @@ Pulls the Readme and URLs of the data from the TidyTuesday github folder based o \examples{ tt_gh <- tt_load_gh("2019-01-15") -show_readme(tt_gh) +readme(tt_gh) } From 16d8579081a4c039171b7e15cb766105fbc284b7 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Wed, 6 Nov 2019 22:05:48 -0800 Subject: [PATCH 09/64] try rolling back and specifying min version of rlang necessary --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index bdb0606..bcfe6cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,6 +22,7 @@ jobs: - r: release install: - Rscript -e 'install.packages("devtools")' + - Rscript -e 'devtools::install_version("rlang", version = "0.3.2",upgrade="never")' - Rscript -e 'devtools::install_version("dplyr", version = "0.5.0",upgrade="never")' - Rscript -e 'devtools::install_version("readxl", version = "1.0.0",upgrade="never")' - Rscript -e 'devtools::install_version("rvest", version = "0.3.0",upgrade="never")' From 1b164d34a41abd326e0023d0cf6026dffb5aedf3 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Wed, 6 Nov 2019 22:16:44 -0800 Subject: [PATCH 10/64] move up rlang and dplyr version --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index bcfe6cc..f0b7469 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,8 +22,8 @@ jobs: - r: release install: - Rscript -e 'install.packages("devtools")' - - Rscript -e 'devtools::install_version("rlang", version = "0.3.2",upgrade="never")' - - Rscript -e 'devtools::install_version("dplyr", version = "0.5.0",upgrade="never")' + - Rscript -e 'devtools::install_version("rlang", version = "0.4.1",upgrade="never")' + - Rscript -e 'devtools::install_version("dplyr", version = "0.8.0",upgrade="never")' - Rscript -e 'devtools::install_version("readxl", version = "1.0.0",upgrade="never")' - Rscript -e 'devtools::install_version("rvest", version = "0.3.0",upgrade="never")' - Rscript -e 'devtools::install_version("lubridate", version = "1.7.0",upgrade="never")' From cc4dcabe4d5edc3493c03b4101484eb76d2e2e30 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Wed, 6 Nov 2019 22:47:41 -0800 Subject: [PATCH 11/64] update min package versions for travis? --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index f0b7469..e69b9a1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,15 +22,14 @@ jobs: - r: release install: - Rscript -e 'install.packages("devtools")' - - Rscript -e 'devtools::install_version("rlang", version = "0.4.1",upgrade="never")' - Rscript -e 'devtools::install_version("dplyr", version = "0.8.0",upgrade="never")' - Rscript -e 'devtools::install_version("readxl", version = "1.0.0",upgrade="never")' - - Rscript -e 'devtools::install_version("rvest", version = "0.3.0",upgrade="never")' + - Rscript -e 'devtools::install_version("rvest", version = "0.3.2",upgrade="never")' - Rscript -e 'devtools::install_version("lubridate", version = "1.7.0",upgrade="never")' - Rscript -e 'devtools::install_version("purrr", version = "0.2.5",upgrade="never")' - Rscript -e 'devtools::install_version("readr", version = "1.0.0",upgrade="never")' - Rscript -e 'devtools::install_version("rstudioapi", version = "0.2",upgrade="never")' - - Rscript -e 'devtools::install_version("xml2", version = "0.1.0",upgrade="never")' + - Rscript -e 'devtools::install_version("xml2", version = "1.2.0",upgrade="never")' matrix: fast_finish: true From 092b0cc33f2246e656152064c612c4c89a0d4e27 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Wed, 6 Nov 2019 23:08:20 -0800 Subject: [PATCH 12/64] update minimum package versions --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fb666af..f706499 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,12 +45,12 @@ Suggests: testthat, covr Imports: - dplyr, + dplyr (>= 0.8.0), readxl (>= 1.0.0), - rvest (>= 0.3.0), + rvest (>= 0.3.2), tools (>= 3.1.0), lubridate (>= 1.7.0), purrr (>= 0.2.5), readr (>= 1.0.0), rstudioapi (>= 0.2), - xml2 + xml2 (>= 1.2.0) From ba8c14d00b3f1a6765c7d11f7e6d4cc47184c580 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Thu, 7 Nov 2019 08:10:50 -0800 Subject: [PATCH 13/64] attempting to make pkgdown site for {tidytuesdayR} --- .travis.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index e69b9a1..31e0e40 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,12 +12,12 @@ jobs: - r: release after_success: - Rscript -e 'covr::codecov()' - before_cache: + before_deploy: - Rscript -e 'remotes::install_cran("pkgdown")' - #deploy: - # provider: script - # script: Rscript -e 'pkgdown::deploy_site_github(verbose = TRUE)' - # skip_cleanup: true + deploy: + provider: script + script: Rscript -e 'pkgdown::deploy_site_github()' + skip_cleanup: true - r: 3.4 - r: release install: From 8452f3abfe4054eecd247ac4afa8f0e87f13994a Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 10 Nov 2019 16:41:59 -0800 Subject: [PATCH 14/64] bump version to dev number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f706499..06ef685 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidytuesdayR Type: Package Title: Access The Weekly TidyTuesday Project Dataset -Version: 0.2.1 +Version: 0.2.1.9000 Authors@R: c( person( From 9ad19c817d3b98134cfe86334ac1e82dd9f95677 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 10 Nov 2019 17:31:35 -0800 Subject: [PATCH 15/64] add print method and add more tests --- R/tt_load.R | 2 +- R/utils.R | 11 ++++++++++- tests/testthat/test-make_url.R | 24 ++++++++++++++++++++++++ tests/testthat/test-tt_load_gh.R | 26 +++++++++++++------------- tests/testthat/test-utils.R | 16 +++++++++++++++- 5 files changed, 63 insertions(+), 16 deletions(-) create mode 100644 tests/testthat/test-make_url.R diff --git a/R/tt_load.R b/R/tt_load.R index 159fc35..694c90d 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -115,6 +115,6 @@ tt_load_gh <- function(x, week) { ".files" = files_to_use, ".readme" = readme_html, ".url" = tt_git_url, - class = "tt_gh" + class = "tt" ) } diff --git a/R/utils.R b/R/utils.R index 8121fbb..e6ac83f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -5,7 +5,16 @@ #' @export print.tt_data <- function(x, ...) { readme(x) - message("Available Datasets:\n\t", paste(tools::file_path_sans_ext(names(x)), "\n\t", collapse = "")) + message("Available datasets:\n\t", paste(tools::file_path_sans_ext(names(x)), "\n\t", collapse = "")) +} + +#' @title print utility for tt_data objects +#' @inheritParams base::print +#' @importFrom tools file_path_sans_ext +#' @export +print.tt <- function(x,...){ + readme(x) + message("Available datasets for download:\n\t", paste(attr(x,".files"), "\n\t", collapse = "")) } #' @title Readme HTML maker and Viewer diff --git a/tests/testthat/test-make_url.R b/tests/testthat/test-make_url.R new file mode 100644 index 0000000..e0997e3 --- /dev/null +++ b/tests/testthat/test-make_url.R @@ -0,0 +1,24 @@ +context("test-make_url") + +test_that("valid dates work", { + url <- tt_make_url("2019-04-02") + expect_equal(basename(url), + "2019-04-02") + +}) + +test_that("valid year-week combinations work", { + url <- tt_make_url(2019,14) + url2 <- tt_make_url("2019",14) + + expect_equal(basename(url), + "2019-04-02") + expect_equal(basename(url2), + "2019-04-02") +}) + +test_that("invalid entries are flagged", { + expect_error(tt_make_url("xyz"), + "Entries must render to a valid date or year") +}) + diff --git a/tests/testthat/test-tt_load_gh.R b/tests/testthat/test-tt_load_gh.R index a2182e7..f1ef0c7 100644 --- a/tests/testthat/test-tt_load_gh.R +++ b/tests/testthat/test-tt_load_gh.R @@ -1,21 +1,21 @@ context("test-tt_load_gh") # check that correct data are returned -test_that("tt_load_gh returns tt_gh object when provided proper date", { - tt_gh <- tt_load_gh("2019-01-15") +test_that("tt_load_gh returns tt object when provided proper date", { + tt <- tt_load_gh("2019-01-15") - testthat::expect_s3_class(tt_gh, "tt_gh") - testthat::expect_equal(attr(tt_gh, ".files"), c("agencies.csv", "launches.csv")) - testthat::expect_equal(attr(tt_gh, ".url"), "https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") + testthat::expect_s3_class(tt, "tt") + testthat::expect_equal(attr(tt, ".files"), c("agencies.csv", "launches.csv")) + testthat::expect_equal(attr(tt, ".url"), "https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") }) # check that correct data are returned -test_that("tt_load_gh returns tt_gh object when provided proper year and TT week number", { - tt_gh <- tt_load_gh(2019, 3) +test_that("tt_load_gh returns tt object when provided proper year and TT week number", { + tt <- tt_load_gh(2019, 3) - testthat::expect_s3_class(tt_gh, "tt_gh") - testthat::expect_equal(attr(tt_gh, ".files"), c("agencies.csv", "launches.csv")) - testthat::expect_equal(attr(tt_gh, ".url"), "https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") + testthat::expect_s3_class(tt, "tt") + testthat::expect_equal(attr(tt, ".files"), c("agencies.csv", "launches.csv")) + testthat::expect_equal(attr(tt, ".url"), "https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") }) @@ -69,9 +69,9 @@ test_that("tt_load_gh returns error when week is not a valid entry between 1 and # test driven dev, new feature to add test_that("Returns simple list of object when no readme.md available", { - tt_gh <- tt_load_gh("2018-04-09") - expect_s3_class(tt_gh, "tt_gh") - expect_true(length(attr(tt_gh, ".readme")) == 0) # object should not exist + tt <- tt_load_gh("2018-04-09") + expect_s3_class(tt, "tt") + expect_true(length(attr(tt, ".readme")) == 0) # object should not exist }) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 617b864..f812c98 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -44,6 +44,20 @@ test_that("print.tt_data lists the available datasets", { testthat::expect_equal( capturedOutput$message, - "Available Datasets:\n\tvalue1 \n\tvalue2 \n\t\n" + "Available datasets:\n\tvalue1 \n\tvalue2 \n\t\n" ) }) + +test_that("print.tt lists all the available files for the weeks tt",{ + tt_obj <- tt_load_gh(2019, week = 16) + + capturedOutput <- capture_message({ + print(tt_obj) + }) + + expect_equal( + capturedOutput$message, + "Available datasets for download:\n\tbrexit.csv \n\tcorbyn.csv \n\tdogs.csv \n\teu_balance.csv \n\tpensions.csv \n\ttrade.csv \n\twomen_research.csv \n\t\n" + ) + +}) From bf2d67cf845287ea6905101977770c9fd1887193 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 10 Nov 2019 17:56:56 -0800 Subject: [PATCH 16/64] update documentation for print method --- NAMESPACE | 1 + man/print.tt.Rd | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 man/print.tt.Rd diff --git a/NAMESPACE b/NAMESPACE index 98e06ee..1c3e1f7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(print,tt) S3method(print,tt_data) S3method(print,tt_dataset_table) S3method(print,tt_dataset_table_list) diff --git a/man/print.tt.Rd b/man/print.tt.Rd new file mode 100644 index 0000000..c168c3b --- /dev/null +++ b/man/print.tt.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{print.tt} +\alias{print.tt} +\title{print utility for tt_data objects} +\usage{ +\method{print}{tt}(x, ...) +} +\arguments{ +\item{x}{an object used to select a method.} + +\item{...}{further arguments passed to or from other methods.} +} +\description{ +print utility for tt_data objects +} From 2b807375bd4a6af1a8b6ccf44ae00c441012b2b8 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 10 Nov 2019 19:41:10 -0800 Subject: [PATCH 17/64] Add README.Rmd --- .Rbuildignore | 1 + README.Rmd | 105 +++++++++++++++++++++++++++++++++++++++++++ readme.md | 122 +++++++++++++++++++++++++++++++++----------------- 3 files changed, 186 insertions(+), 42 deletions(-) create mode 100644 README.Rmd diff --git a/.Rbuildignore b/.Rbuildignore index 88f9db1..416d0e3 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,4 @@ +^README\.Rmd$ ^codecov\.yml$ ^appveyor\.yml$ ^\.travis\.yml$ diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..de9221f --- /dev/null +++ b/README.Rmd @@ -0,0 +1,105 @@ +--- +output: github_document +--- + + + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` + +## tidytuesdayR + +Ellis Hughes + +[![Travis build status](https://travis-ci.com/thebioengineer/tidytuesdayR.svg?branch=master)](https://travis-ci.com/thebioengineer/tidytuesdayR) +[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/thebioengineer/tidytuesdayR?branch=master&svg=true)](https://ci.appveyor.com/project/thebioengineer/tidytuesdayR) +[![Coverage status](https://codecov.io/gh/thebioengineer/tidytuesdayR/branch/master/graph/badge.svg)](https://codecov.io/github/thebioengineer/tidytuesdayR?branch=master) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +{tidytuesdayR} has the main goal to make it easy to participate in the weekly [#TidyTuesday](https://github.com/rfordatascience/tidytuesday) project. +Currently this is done by assisting with the import of data posted on the [R4DataScience](https://github.com/rfordatascience) Tidy Tuesday repository. + +## Installation + +Currently this package is only available on GitHub: + +```{r, eval=FALSE} +#install.package("devtools") +devtools::install_github("thebioengineer/tidytuesdayR") +``` + +## Usage + +There are currently two methods to access the data from the respository. + +### tt_load() +The first and simplest way is to use the 'tt_load()' function. +This function has accepts two types of inputs to determine which data to grab. +It can be a date as a string in the YYYY-MM-DD format like below. + +```{r} +library(tidytuesdayR) +tt_data <- tt_load("2019-01-15") +``` + +Or the function can accept the year as the first argument, and which week of the year as the second. + +```{r} +tt_data <- tt_load(2019, week=3) +``` + +`tt_load()` naively downloads *all* the data that is available and stores them in the resulting `tt_data` object. +To access the data, use the `$` or `[[` notation and the name of the dataset. + +```{r, eval=FALSE} +tt_data$agencies +tt_data[["agencies"]] +``` + +### tt_load_gh() and tt_read_data() +The second method to access the data from the repository is to use the combination of `tt_load_gh()` and `tt_read_data()` functions. +`tt_load_gh()` takes similar arguments as `tt_load()`, in that either the date or a combination of year and week can be entered. + +```{r, eval = FALSE} +tt <- tt_load_gh("2019-01-15") +``` + +The `tt` object lists the available files for download. +To download the data, use the `tt_read_data()` function. +`tt_read_data()` expects the first argument to be the `tt` object. +The second argument can be a string indicating the name of the file to download from the repository, or the index in the `tt` object +```{r, eval = FALSE} +agencies <- tt %>% + tt_read_data("agencies.csv") + +# The first index of the tt object is `agencies.csv` +# agencies <- tt %>% +# tt_read_data(1) +``` + +## Tidy Tuesday Details + +The tt_data and tt objects both have a function for showing the readme for that week called `readme()`. +In addition, the print methods for both objects show the readme in a viewer and the available datasets in the console. + +```r +readme(tt_data) +print(tt_data) +``` + + ## Available Datasets: + ## agencies + ## launches + ## + +## Contributing +Please note that the 'tidytuesdayR' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. + + + diff --git a/readme.md b/readme.md index 967ef51..4802459 100644 --- a/readme.md +++ b/readme.md @@ -1,74 +1,112 @@ -tidytuesdayR -================ -Ellis Hughes -[![Travis build status](https://travis-ci.com/thebioengineer/tidytuesdayR.svg?branch=master)](https://travis-ci.com/thebioengineer/tidytuesdayR) -[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/thebioengineer/tidytuesdayR?branch=master&svg=true)](https://ci.appveyor.com/project/thebioengineer/tidytuesdayR) -[![Coverage status](https://codecov.io/gh/thebioengineer/tidytuesdayR/branch/master/graph/badge.svg)](https://codecov.io/github/thebioengineer/tidytuesdayR?branch=master) -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + + +## tidytuesdayR + +Ellis Hughes -tidytuesdayR is made to assist with the import of data posted for #TidyTuesday by the [R4DataScience](https://github.com/rfordatascience) team. Just enter a string formatted as "YYYY-MM-dd", and if there is a tidytuesday dataset available, it will download the readme and the data. +[![Travis build +status](https://travis-ci.com/thebioengineer/tidytuesdayR.svg?branch=master)](https://travis-ci.com/thebioengineer/tidytuesdayR) +[![AppVeyor build +status](https://ci.appveyor.com/api/projects/status/github/thebioengineer/tidytuesdayR?branch=master&svg=true)](https://ci.appveyor.com/project/thebioengineer/tidytuesdayR) +[![Coverage +status](https://codecov.io/gh/thebioengineer/tidytuesdayR/branch/master/graph/badge.svg)](https://codecov.io/github/thebioengineer/tidytuesdayR?branch=master) +[![License: +MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +{tidytuesdayR} has the main goal to make it easy to participate in the +weekly [\#TidyTuesday](https://github.com/rfordatascience/tidytuesday) +project. Currently this is done by assisting with the import of data +posted on the [R4DataScience](https://github.com/rfordatascience) Tidy +Tuesday repository. ## Installation Currently this package is only available on GitHub: + ``` r +#install.package("devtools") devtools::install_github("thebioengineer/tidytuesdayR") ``` ## Usage -The way this is used is by simply calling the 'tt_load' function and entering the date to pull. +There are currently two methods to access the data from the respository. + +### tt\_load() -``` r +The first and simplest way is to use the ‘tt\_load()’ function. This +function has accepts two types of inputs to determine which data to +grab. It can be a date as a string in the YYYY-MM-DD format like below. + +``` r library(tidytuesdayR) -tt_data<-tt_load("2019-01-15") +tt_data <- tt_load("2019-01-15") +``` +Or the function can accept the year as the first argument, and which +week of the year as the second. + +``` r +tt_data <- tt_load(2019, week=3) ``` -You can also enter the year and which week that you are interested in as well. +`tt_load()` naively downloads *all* the data that is available and +stores them in the resulting `tt_data` object. To access the data, use +the `$` or `[[` notation and the name of the dataset. + ``` r -tt_data<-tt_load(2019,week=3) +tt_data$agencies +tt_data[["agencies"]] +``` + +### tt\_load\_gh() and tt\_read\_data() +The second method to access the data from the repository is to use the +combination of `tt_load_gh()` and `tt_read_data()` functions. +`tt_load_gh()` takes similar arguments as `tt_load()`, in that either +the date or a combination of year and week can be entered. + +``` r +tt <- tt_load_gh("2019-01-15") ``` -To view the readme and the datasets available, simply print the tt_data object. +The `tt` object lists the available files for download. To download the +data, use the `tt_read_data()` function. `tt_read_data()` expects the +first argument to be the `tt` object. The second argument can be a +string indicating the name of the file to download from the repository, +or the index in the `tt` object ``` r -print(tt_data) +agencies <- tt %>% + tt_read_data("agencies.csv") + +# The first index of the tt object is `agencies.csv` +# agencies <- tt %>% +# tt_read_data(1) ``` - ## Available Datasets: - ## agencies - ## launches - ## +## Tidy Tuesday Details -Finally, to access the datasets, use the `$` access and the name of the dataset +The tt\_data and tt objects both have a function for showing the readme +for that week called `readme()`. In addition, the print methods for both +objects show the readme in a viewer and the available datasets in the +console. ``` r -tt_data$agencies +readme(tt_data) +print(tt_data) ``` - ## # A tibble: 74 x 19 - ## agency count ucode state_code type class tstart tstop short_name name - ## - ## 1 RVSN 1528 RVSN SU O/LA D 1960 1991~ RVSN Rake~ - ## 2 UNKS 904 GUKOS SU O/LA D 1986 ~ 1991 UNKS Upra~ - ## 3 NASA 469 NASA US O/LA~ C 1958 ~ - NASA Nati~ - ## 4 USAF 388 USAF US O/LA~ D 1947 ~ - USAF Unit~ - ## 5 AE 258 AE F O/LA B 1980 ~ * Arianespa~ Aria~ - ## 6 AFSC 247 AFSC US LA D 1961 ~ 1992~ AFSC US A~ - ## 7 VKSR 200 GUKOS RU O/LA D 1997 ~ 2001~ VKS RVSN Voen~ - ## 8 CALT 181 CALT CN LA/L~ C 1957 ~ - CALT Zhon~ - ## 9 FKA 128 MOM RU O/LA C 2004 2016~ Roskosmos Fede~ - ## 10 SAST 105 SBA CN O/LA~ B 1993 - SAST Shan~ - ## # ... with 64 more rows, and 9 more variables: location , - ## # longitude , latitude , error , parent , - ## # short_english_name , english_name , unicode_name , - ## # agency_type - - -## Contributing -Please note that the 'tidytuesdayR' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. +``` +## Available Datasets: +## agencies +## launches +## +``` +## Contributing +Please note that the ‘tidytuesdayR’ project is released with a +[Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to +this project, you agree to abide by its terms. From 33c684c8f8b401671fc417d1b6be71169782e8bc Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Sun, 10 Nov 2019 21:31:20 -0800 Subject: [PATCH 18/64] Make pkgdown work (#32) * try to get pkgdown to use the readme * Add pkgdown to suggests --- .Rbuildignore | 2 ++ .gitignore | 1 + DESCRIPTION | 3 ++- _pkgdown.yml | 0 4 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 _pkgdown.yml diff --git a/.Rbuildignore b/.Rbuildignore index 416d0e3..037d047 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ +^docs$ +^_pkgdown\.yml$ ^README\.Rmd$ ^codecov\.yml$ ^appveyor\.yml$ diff --git a/.gitignore b/.gitignore index 5b6a065..3bf2496 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .Rhistory .RData .Ruserdata +docs/* diff --git a/DESCRIPTION b/DESCRIPTION index 06ef685..1db7620 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,7 +43,8 @@ RoxygenNote: 6.1.1 Depends: R (>= 3.4.0) Suggests: testthat, - covr + covr, + pkgdown Imports: dplyr (>= 0.8.0), readxl (>= 1.0.0), diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..e69de29 From c476736306c9c58a8255034c406ad8f09fd3d30c Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Sun, 10 Nov 2019 22:13:01 -0800 Subject: [PATCH 19/64] Update DESCRIPTION bump version number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1db7620..2f3b7db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidytuesdayR Type: Package Title: Access The Weekly TidyTuesday Project Dataset -Version: 0.2.1.9000 +Version: 0.2.2 Authors@R: c( person( From 85e87accf2fbd965d0cee90191c97694183e95fc Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Wed, 18 Dec 2019 23:00:59 -0800 Subject: [PATCH 20/64] remove README.Rmd --- README.Rmd | 105 ----------------------------------------------------- readme.md | 4 -- 2 files changed, 109 deletions(-) delete mode 100644 README.Rmd diff --git a/README.Rmd b/README.Rmd deleted file mode 100644 index de9221f..0000000 --- a/README.Rmd +++ /dev/null @@ -1,105 +0,0 @@ ---- -output: github_document ---- - - - -```{r setup, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - fig.path = "man/figures/README-", - out.width = "100%" -) -``` - -## tidytuesdayR - -Ellis Hughes - -[![Travis build status](https://travis-ci.com/thebioengineer/tidytuesdayR.svg?branch=master)](https://travis-ci.com/thebioengineer/tidytuesdayR) -[![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/thebioengineer/tidytuesdayR?branch=master&svg=true)](https://ci.appveyor.com/project/thebioengineer/tidytuesdayR) -[![Coverage status](https://codecov.io/gh/thebioengineer/tidytuesdayR/branch/master/graph/badge.svg)](https://codecov.io/github/thebioengineer/tidytuesdayR?branch=master) -[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) - -{tidytuesdayR} has the main goal to make it easy to participate in the weekly [#TidyTuesday](https://github.com/rfordatascience/tidytuesday) project. -Currently this is done by assisting with the import of data posted on the [R4DataScience](https://github.com/rfordatascience) Tidy Tuesday repository. - -## Installation - -Currently this package is only available on GitHub: - -```{r, eval=FALSE} -#install.package("devtools") -devtools::install_github("thebioengineer/tidytuesdayR") -``` - -## Usage - -There are currently two methods to access the data from the respository. - -### tt_load() -The first and simplest way is to use the 'tt_load()' function. -This function has accepts two types of inputs to determine which data to grab. -It can be a date as a string in the YYYY-MM-DD format like below. - -```{r} -library(tidytuesdayR) -tt_data <- tt_load("2019-01-15") -``` - -Or the function can accept the year as the first argument, and which week of the year as the second. - -```{r} -tt_data <- tt_load(2019, week=3) -``` - -`tt_load()` naively downloads *all* the data that is available and stores them in the resulting `tt_data` object. -To access the data, use the `$` or `[[` notation and the name of the dataset. - -```{r, eval=FALSE} -tt_data$agencies -tt_data[["agencies"]] -``` - -### tt_load_gh() and tt_read_data() -The second method to access the data from the repository is to use the combination of `tt_load_gh()` and `tt_read_data()` functions. -`tt_load_gh()` takes similar arguments as `tt_load()`, in that either the date or a combination of year and week can be entered. - -```{r, eval = FALSE} -tt <- tt_load_gh("2019-01-15") -``` - -The `tt` object lists the available files for download. -To download the data, use the `tt_read_data()` function. -`tt_read_data()` expects the first argument to be the `tt` object. -The second argument can be a string indicating the name of the file to download from the repository, or the index in the `tt` object -```{r, eval = FALSE} -agencies <- tt %>% - tt_read_data("agencies.csv") - -# The first index of the tt object is `agencies.csv` -# agencies <- tt %>% -# tt_read_data(1) -``` - -## Tidy Tuesday Details - -The tt_data and tt objects both have a function for showing the readme for that week called `readme()`. -In addition, the print methods for both objects show the readme in a viewer and the available datasets in the console. - -```r -readme(tt_data) -print(tt_data) -``` - - ## Available Datasets: - ## agencies - ## launches - ## - -## Contributing -Please note that the 'tidytuesdayR' project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. - - - diff --git a/readme.md b/readme.md index dea6145..3fea70a 100644 --- a/readme.md +++ b/readme.md @@ -1,7 +1,3 @@ - - - - # tidytuesdayR Ellis Hughes From 1ed932699a5ddc2d7e86e16504704f0bdc535cea Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Mon, 27 Apr 2020 18:01:21 -0700 Subject: [PATCH 21/64] Gdpr data (#45) * Path replace fix (#43) * update URL path corrections * Improvement on file-finding abilities... --- R/tt_load.R | 17 +++++------------ R/tt_read_data.R | 4 ++-- tests/testthat/test-tt_load_gh.R | 9 +++++++++ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/R/tt_load.R b/R/tt_load.R index 75f17c9..cf23e10 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -94,19 +94,12 @@ tt_load_gh <- function(x, week) { if(length(files_to_use)>0 & length(readme_html)>0){ files_in_readme <- readme_html %>% xml2::read_html() %>% - rvest::html_node("code") %>% rvest::html_text() %>% - base::strsplit("\\n") %>% - `[[`(1) %>% - purrr::map_chr(function(x){ - file_match<-do.call('c',lapply(files_to_use,grepl,x)) - if(any(file_match)){ - matched_file <- files_to_use[file_match] - }else{ - matched_file <- NA - } - return(matched_file) - }) + strsplit("\\s+") %>% + do.call('c',.) %>% + grep(".+[.].{3}",.,value = TRUE) %>% + .[ .%in% files_to_use] %>% + unique files_in_readme<- files_in_readme[!is.na(files_in_readme)] diff --git a/R/tt_read_data.R b/R/tt_read_data.R index c449115..62d45c9 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -31,7 +31,7 @@ tt_read_data <- function(tt, x, guess_max = 5000) { tt_read_data.character <- function(tt, x, guess_max = 5000) { if (x %in% attr(tt, ".files")) { - url <- paste0(gsub("tree", "blob", file.path(attr(tt, ".url"), x)), "?raw=true") + url <- paste0(gsub("/tree/", "/blob/", file.path(attr(tt, ".url"), x)), "?raw=true") tt_read_url(url, guess_max = guess_max) } else { stop(paste0( @@ -43,7 +43,7 @@ tt_read_data.character <- function(tt, x, guess_max = 5000) { tt_read_data.numeric <- function(tt, x, guess_max = 5000) { if (x > 0 & x <= length(attr(tt, ".files"))) { - url <- paste0(gsub("tree", "blob", file.path(attr(tt, ".url"), attr(tt, ".files")[x])), "?raw=true") + url <- paste0(gsub("/tree/", "/blob/", file.path(attr(tt, ".url"), attr(tt, ".files")[x])), "?raw=true") tt_read_url(url, guess_max = guess_max) } else { stop(paste0( diff --git a/tests/testthat/test-tt_load_gh.R b/tests/testthat/test-tt_load_gh.R index f12d518..901ce67 100644 --- a/tests/testthat/test-tt_load_gh.R +++ b/tests/testthat/test-tt_load_gh.R @@ -120,3 +120,12 @@ test_that("tt_load_gh ignores extra files/diretory paths", { expect_equal(tt_obj_2[1:3],c("grand_slam_timeline.csv","grand_slams.csv","player_dob.csv")) }) +test_that("tt_load_gh finds all the files in the readme", { + tt_obj <- tt_load_gh("2020-04-21") + + expect_equal(length(tt_obj),2) + expect_equal(tt_obj[1:2],c("gdpr_violations.tsv", "gdpr_text.tsv")) + +}) + + From 25803c2c0b8e2ddd330f4c16bcfd58e7bfba5582 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Sun, 17 May 2020 19:57:56 -0700 Subject: [PATCH 22/64] first updates to api --- DESCRIPTION | 2 +- NAMESPACE | 5 ++ R/get_tt_html.R | 28 +++++---- R/github_api.R | 129 +++++++++++++++++++++++++++++++++++++++++ R/tt_datasets.R | 66 +++++++-------------- R/tt_load.R | 94 ------------------------------ R/tt_load_gh.R | 44 ++++++++++++++ R/tt_make_url.R | 54 ++++++++--------- R/tt_master_file.R | 47 +++++++++++++++ R/utils.R | 25 +++----- man/base_64_to_char.Rd | 11 ++++ man/download_read.Rd | 3 +- man/github_contents.Rd | 21 +++++++ man/github_html.Rd | 28 +++++++++ man/github_page.Rd | 17 ++++++ man/identify_delim.Rd | 10 +++- test.html | 83 ++++++++++++++++++++++++++ 17 files changed, 468 insertions(+), 199 deletions(-) create mode 100644 R/github_api.R create mode 100644 R/tt_load_gh.R create mode 100644 R/tt_master_file.R create mode 100644 man/base_64_to_char.Rd create mode 100644 man/github_contents.Rd create mode 100644 man/github_html.Rd create mode 100644 man/github_page.Rd create mode 100644 test.html diff --git a/DESCRIPTION b/DESCRIPTION index 3c3365d..aceff87 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,7 @@ URL: https://github.com/thebioengineer/tidytuesdayR BugReports: https://github.com/thebioengineer/tidytuesdayR/issues Encoding: UTF-8 LazyData: true -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.0 Depends: R (>= 3.4.0) Suggests: testthat (>= 2.1.0), diff --git a/NAMESPACE b/NAMESPACE index ee4cb00..e0fe991 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,10 @@ import(readxl) import(rvest) import(tools) import(xml2) +importFrom(httr,GET) +importFrom(httr,add_headers) +importFrom(jsonlite,base64_dec) +importFrom(jsonlite,read_json) importFrom(lubridate,as_date) importFrom(lubridate,day) importFrom(lubridate,is.Date) @@ -34,3 +38,4 @@ importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) importFrom(utils,download.file) importFrom(xml2,read_html) +importFrom(xml2,xml_add_sibling) diff --git a/R/get_tt_html.R b/R/get_tt_html.R index d496452..b22bf01 100644 --- a/R/get_tt_html.R +++ b/R/get_tt_html.R @@ -1,11 +1,19 @@ -#' @title Get TidyTuesday URL and HTML -#' @param git_url url to tidytuesday files -#' @importFrom xml2 read_html -get_tt_html <- function(git_url) { - tt_html <- try(xml2::read_html(git_url), silent = TRUE) - if (inherits(tt_html, "try-error")) { - stop(tt_html[1]) - } else { - return(tt_html) - } +#' @title Get TidyTuesday Readme and list of files and HTML +#' @param date date of tidytuesday of interest +#' @importFrom lubridate year +tt_compile <- function(date) { + + ttmf <- tt_master_file() + + #list of files + files <- ttmf %>% + filter(week_date == date) %>% + select(data_files, data_type, delim) + + readme <- github_html(file.path("data",year(date),date,"readme.md")) + + list( + files = files, + readme = readme + ) } diff --git a/R/github_api.R b/R/github_api.R new file mode 100644 index 0000000..849fc4e --- /dev/null +++ b/R/github_api.R @@ -0,0 +1,129 @@ +#' Read Contents from GitHub +#' +#' Provide tool to read raw data and return as text the raw data using the github api +#' +#' @param path Relative path from within the TidyTuesday Repository +#' @param read_func Function to parse the text. Defaults to \code{read.csv} +#' @param ... optional arguments to pass to \code{read_func} +#' +#' @return result of read_func on the content +#' +#' @importFrom jsonlite read_json +github_contents <- + function(path, + read_func = read.csv, + ..., + record_ref = TRUE) { + base_url <- + file.path("https://api.github.com/repos/rfordatascience/tidytuesday/contents", + path) + url_json <- try(read_json(base_url), silent = TRUE) + if (!inherits(url_json, "try-error")) { + content <- read_func(base_64_to_char(url_json$content), ...) + + if (record_ref) { + attr(content, ".sha") <- url_json$sha + } + + content + } else{ + NULL + } + } + +#' Read Contents from GitHub as html +#' +#' provide tools to read and process readme's as html using the github api +#' +#' @param path Relative path from within the TidyTuesday Repository to contents that can be returned as HTML +#' @param ... optional arguments to pass to \code{read_html} +#' +#' @return result of read_html on the contents +#' +#' @examples +#' \dontrun{ +#' +#' main_readme <- github_html("README.md") +#' week_readme <- github_html("data/2020/2020-01-07/readme.md") +#' +#' } +#' +#' @importFrom httr GET add_headers +#' @importFrom xml2 read_html +github_html <- + function(path, + ...) { + base_url <- + file.path("https://api.github.com/repos/rfordatascience/tidytuesday/contents", + path) + url_response <- + GET(base_url, + add_headers(Accept = "application/vnd.github.v3.html")) + if (url_response$status_code == 200) { + github_page(read_html(x = url_response$content, ...)) + } else{ + NULL + } +} + + +#' Read Contents from GitHub as html +#' +#' provide tools to read and process readme's as html using the github api +#' +#' @param path Relative path from within the TidyTuesday Repository to contents that can be returned as HTML +#' @param ... optional arguments to pass to \code{read_html} +#' +#' @return result of read_html on the contents +#' +#' @examples +#' \dontrun{ +#' +#' main_readme <- github_html("README.md") +#' week_readme <- github_html("data/2020/2020-01-07/readme.md") +#' +#' } +#' +#' @importFrom httr GET add_headers +#' @importFrom xml2 read_html +github_sha <- + function(dirpath, branch = "master") { + base_url <- + file.path( + "https://api.github.com/repos/rfordatascience/tidytuesday/git/trees", + URLencode(paste(branch, dirpath, sep = ":")) + ) + url_response <- GET(base_url) + if (url_response$status_code == 200) { + do.call('rbind', lapply(jsonlite::parse_json(rawToChar( + url_response$content + ))$tree, data.frame, stringsAsFactors = FALSE)) + } else{ + NULL + } + } + + +#' read json base64 contents from github +#' +#' provide tools to read and process data using the github api +#' +#' @importFrom jsonlite base64_dec +base_64_to_char <- function(b64){ + rawToChar(jsonlite::base64_dec(b64)) +} + +#' Create shell for HTML content from github +#' +#' Provide the necessary section to wrap around raw html content read from github. +#' +#' @param content html content +#' +#' @return xml_document with github header +#' +#' @importFrom xml2 read_html xml_add_sibling +github_page <- function(page_content){ + header <- read_html("") + xml_add_sibling(header,page_content) +} + diff --git a/R/tt_datasets.R b/R/tt_datasets.R index 13841d8..c69cd49 100644 --- a/R/tt_datasets.R +++ b/R/tt_datasets.R @@ -5,21 +5,16 @@ #' @importFrom purrr set_names map #' @export tt_available <- function() { - tt_year <- tt_years() - pastDatasets <- purrr::map( - tt_year[-which.max(tt_year)], - ~ tt_datasets(.x) - ) %>% - purrr::set_names(as.character(tt_year[-which.max(tt_year)])) - currDatasets <- tt_datasets() %>% - list() %>% - purrr::set_names(as.character(tt_year[which.max(tt_year)])) + tt_update_master_file() - datasets <- c( - currDatasets, - pastDatasets - )[tt_year[order(tt_year, decreasing = TRUE)]] + tt_year <- sort(tt_years(),decreasing = TRUE,) + + datasets <- setNames(vector("list", length(tt_year)), tt_year) + + for(year in tt_year){ + datasets[[as.character(year)]] <- tt_datasets(year) + } structure(datasets, class = c("tt_dataset_table_list") @@ -28,31 +23,20 @@ tt_available <- function() { #' @title Available datasets #' @description list available datasets for that year -#' @param year numeric entry representing the year of tidytuesday you want the list of datasets for. Leave empty for most recent year. +#' @param year numeric entry representing the year of tidytuesday you want the list of datasets +#' for. Leave empty for most recent year. #' @import xml2 #' @import rvest #' @export #' tt_datasets <- function(year) { - if (missing(year)) { - url <- - "https://github.com/rfordatascience/tidytuesday/blob/master/README.md" - table <- 1 - } else { - url <- file.path( - "https://github.com/rfordatascience/tidytuesday/tree/master/data", year - ) - table <- 2 - } - - datasets <- url %>% - xml2::read_html() %>% - rvest::html_nodes("table") %>% - `[`(table) - + readme <- github_html(file.path("data",year,"readme.md")) + datasets <- readme %>% + html_table() %>% + `[`(1) structure( datasets, - ".html" = datasets, + .html = readme, class = "tt_dataset_table" ) } @@ -60,27 +44,19 @@ tt_datasets <- function(year) { #' @title print utility for tt_dataset_table object #' @inheritParams base::print #' @param printConsole should output go to the console? TRUE/FALSE -#' @importFrom purrr walk #' @importFrom rstudioapi isAvailable viewer -#' @importFrom rvest html_table +#' @importFrom xml2 write_html #' @export print.tt_dataset_table <- function(x, ..., printConsole = FALSE) { if (rstudioapi::isAvailable() & !printConsole) { - tmpHTML <- setup_doc() - x$html %>% - as.character() %>% - purrr::walk(~ cat(gsub( - "href=\"/rfordatascience/tidytuesday/", - "href=\"https://github.com/rfordatascience/tidytuesday/", - .x - ), file = tmpHTML, append = TRUE)) - cat("
", file = tmpHTML, append = TRUE) - cat("", file = tmpHTML, append = TRUE) + tmpHTML <- tempfile(fileext = ".html") + readme <- attr(x,".html") + write_html(readme, file = tmpHTML) rstudioapi::viewer(url = tmpHTML) } else { - attr(x, ".html") %>% - rvest::html_table() + data.frame(x) } + invisible(x) } #' @title print utility for tt_dataset_table_list object diff --git a/R/tt_load.R b/R/tt_load.R index cf23e10..933cb87 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -24,97 +24,3 @@ tt_load <- function(x, week, ...) { ) } -# #' @title access data in tt_data object -# #' @param x tt_data object -# #' @param name name of dataset to access -# #' @exportMethod `$` -# `$.tt_data` <-function(x,name){ -# x[[ name ]] -# } - -#' @title Load TidyTuesday data from Github -#' -#' @description Pulls the Readme and URLs of the data from the TidyTuesday github folder based on the date provided -#' -#' @param x string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year -#' @param week left empty unless x is a numeric year entry, in which case the week of interest should be entered -#' -#' @return tt_gh object. List object with the following entries: readme, files, url -#' @export -#' -#' @importFrom xml2 read_html -#' @importFrom lubridate year -#' @importFrom purrr map_chr -#' @importFrom tools file_path_sans_ext file_ext -#' @import rvest -#' @import dplyr -#' -#' @examples -#' tt_gh <- tt_load_gh("2019-01-15") -#' -#' readme(tt_gh) -tt_load_gh <- function(x, week) { - if (missing(x)) { - on.exit({ - print(tt_available()) - }) - stop("Enter either the year or date of the TidyTuesday Data to extract!") - } - - tt_git_url <- tt_make_url(x, week) - message("--- Downloading #TidyTuesday Information for ",basename(tt_git_url)," ----") - tt_gh_page <- get_tt_html(tt_git_url) - - # Extract the raw text as a list - readme_html <- tt_gh_page %>% - rvest::html_nodes(".Box-body") %>% - as.character() - - readme_html <- gsub( - "href=\"/rfordatascience/tidytuesday/", - "href=\"https://github.com/rfordatascience/tidytuesday/", - readme_html - ) - - # Find Files - available_files <- tt_gh_page %>% - rvest::html_nodes(".files") %>% - rvest::html_nodes(".content a") %>% - rvest::html_attrs() %>% - purrr::map_chr(`[`, "title") - - files_to_use <- available_files - - # remove readme or directory folders or pictures - files_to_use <- files_to_use[!(tolower(files_to_use) %in% "readme.md" | - file_path_sans_ext(files_to_use) == files_to_use| - tolower(file_ext(files_to_use)) %in% c("png","jpg","rmd","r"))] - - # do not try if we don't have a read me or no files listed - if(length(files_to_use)>0 & length(readme_html)>0){ - files_in_readme <- readme_html %>% - xml2::read_html() %>% - rvest::html_text() %>% - strsplit("\\s+") %>% - do.call('c',.) %>% - grep(".+[.].{3}",.,value = TRUE) %>% - .[ .%in% files_to_use] %>% - unique - - files_in_readme<- files_in_readme[!is.na(files_in_readme)] - - if(length(files_in_readme)>0){ - files_to_use<-files_in_readme - } - } - - message("--- Identified ",length(files_to_use)," files available for download ----") - - structure( - files_to_use, - ".files" = files_to_use, - ".readme" = readme_html, - ".url" = tt_git_url, - class = "tt" - ) -} diff --git a/R/tt_load_gh.R b/R/tt_load_gh.R new file mode 100644 index 0000000..953e6d5 --- /dev/null +++ b/R/tt_load_gh.R @@ -0,0 +1,44 @@ +#' @title Load TidyTuesday data from Github +#' +#' @description Pulls the Readme and URLs of the data from the TidyTuesday github folder based on the date provided +#' +#' @param x string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year +#' @param week left empty unless x is a numeric year entry, in which case the week of interest should be entered +#' +#' @return tt_gh object. List object with the following entries: readme, files, url +#' @export +#'' +#' @examples +#' tt_gh <- tt_load_gh("2019-01-15") +#' +#' readme(tt_gh) +tt_load_gh <- function(x, week) { + + if (missing(x)) { + on.exit({ + print(tt_available()) + }) + stop("Enter either the year or date of the TidyTuesday Data to extract!") + } + + #Update master file reference + tt_update_master_file() + + #Check Dates + tt_date <- tt_check_date(x, week) + + message("--- Compiling #TidyTuesday Information for ",tt_date," ----") + + # Find Files and extract readme + tt_compilation <- tt_compile(tt_date) + + + message("--- There are ", nrow(tt_compilation$files), " file(s) available in this tidy tuesday ---") + + structure( + tt_compilation$files$data_files, + ".files" = tt_compilation$files, + ".readme" = tt_compilation$readme, + class = "tt" + ) +} diff --git a/R/tt_make_url.R b/R/tt_make_url.R index 8fcb0ba..5b2ef4c 100644 --- a/R/tt_make_url.R +++ b/R/tt_make_url.R @@ -3,7 +3,7 @@ #' @param x either a string or numeric entry indicating the full date of #' @param week left empty unless x is a numeric year entry, in which case the week of interest should be entered #' -tt_make_url <- function(x, week) { +tt_check_date <- function(x, week) { if (valid_date(x)) { tt_make_url.date(x) } else if (valid_year(x)) { @@ -18,10 +18,16 @@ tt_make_url.date <- function(x) { tt_formatted_date <- tt_date_format(x) tt_folders <- tt_weeks(tt_year) if (!as.character(tt_formatted_date) %in% tt_folders[["folders"]]) { - stop(paste0(tt_formatted_date, " is not a date that has TidyTuesday data.\n\tDid you mean: ", tt_closest_date(tt_formatted_date, tt_folders$folders), "?")) + stop( + paste0( + tt_formatted_date, + " is not a date that has TidyTuesday data.\n\tDid you mean: ", + tt_closest_date(tt_formatted_date, tt_folders$folders), + "?" + ) + ) } - - file.path("https://github.com/rfordatascience/tidytuesday/tree/master/data", tt_year, tt_formatted_date) + tt_formatted_date } tt_make_url.year <- function(x, week) { @@ -38,49 +44,39 @@ tt_make_url.year <- function(x, week) { stop(paste0("Week ", week, " of TidyTuesday for ", x," does not have data available for download from github.")) } - file.path("https://github.com/rfordatascience/tidytuesday/tree/master/data", x, tt_date) + tt_date } + + tt_weeks <- function(year) { + tt_year <- tt_years() + if (!as.character(year) %in% tt_year) { stop(paste0( "TidyTuesday did not exist for ", year, ". \n\t TidyTuesday has only existed from ", - min(as.numeric(tt_year)), " to ", max(as.numeric(tt_year)) - )) + min(tt_year)), " to ", max(tt_year) + ) } - tt_base_url <- file.path("https://github.com/rfordatascience/tidytuesday/tree/master/data", year) - tt_base_html <- xml2::read_html(tt_base_url) - tt_folders <- tt_base_html %>% - rvest::html_nodes(".files") %>% - rvest::html_nodes(".content") %>% - rvest::html_nodes("a") %>% - rvest::html_attr("title") - - tt_folders <- tt_folders[valid_date(tt_folders)] + ttmf <- tt_master_file() + tt_week <- unique(basename(ttmf$weeks[ttmf$year == year])) - weekNum <- tt_base_html %>% - rvest::html_nodes("table") %>% - `[[`(2) %>% - rvest::html_table() + weekNum <- tt_week %>% + as.Date(format = "%Y-%m-%d") %>% + `+`(3) %>% # move to accomodate + lubridate::week() list( week_desc = weekNum, - folders = tt_folders + folders = tt_week ) } tt_years <- function() { - tt_years <- xml2::read_html("https://github.com/rfordatascience/tidytuesday/tree/master/data") %>% - rvest::html_nodes(".files") %>% - rvest::html_nodes(".content") %>% - rvest::html_nodes("a") %>% - rvest::html_attr("title") - suppressWarnings({ - tt_years[!is.na(as.numeric(tt_years))] - }) + unique(tt_master_file()$year) } #' @importFrom lubridate as_date is.Date diff --git a/R/tt_master_file.R b/R/tt_master_file.R new file mode 100644 index 0000000..043a54a --- /dev/null +++ b/R/tt_master_file.R @@ -0,0 +1,47 @@ +#' Get Master List of Files from TidyTuesday +#' +#' Import or update dataset from github that records the entire list of objects from tidytuesday +#' +#' @keywords internal +tt_update_master_file <- function(force = FALSE){ + # get sha to see if need to update + sha_df <- github_sha("static") + sha <- sha_df$sha[sha_df$path == "tt_data_type.csv"] + + if( is.null(tt_master_file()) || sha != attr(tt_master_file(), ".sha") || force ){ + tt_master_file( + github_contents( + "static/tt_data_type.csv", + read_func = function(x, ...) { + read.csv(text = x, + header = TRUE, + stringsAsFactors = FALSE) + } + ) + ) + } +} + +#' Get Master List of Files from Local Environment +#' +#' return or update tt master file +#' +#' @param assign value to overwrite the TT_MASTER_ENV$TT_MASTER_FILE contents with +#' +#' @keywords internal +tt_master_file <- function(assign = NULL){ + if(!is.null(assign)){ + TT_MASTER_ENV$TT_MASTER_FILE <- assign + }else{ + TT_MASTER_ENV$TT_MASTER_FILE + } +} + +#' The Master List of Files from TidyTuesday +#' +#' @keywords internal + +TT_MASTER_ENV <- new.env() +TT_MASTER_ENV$TT_MASTER_FILE <- NULL + + diff --git a/R/utils.R b/R/utils.R index d5a7a51..3a6bf2c 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,6 +6,7 @@ print.tt_data <- function(x, ...) { readme(x) message("Available datasets:\n\t", paste(tools::file_path_sans_ext(names(x)), "\n\t", collapse = "")) + invisible(x) } #' @title print utility for tt_data objects @@ -13,12 +14,15 @@ print.tt_data <- function(x, ...) { #' @importFrom tools file_path_sans_ext #' @export print.tt <- function(x,...){ - message("Available datasets for download:\n\t", paste(attr(x,".files"), "\n\t", collapse = "")) + message("Available datasets in this TidyTuesday:\n\t", paste(attr(x,".files")[[1]], "\n\t", collapse = "")) + invisible(x) } #' @title Readme HTML maker and Viewer #' @param tt tt_data object for printing #' @importFrom rstudioapi viewer +#' @importFrom xml2 write_html +#' @return NULL #' @export readme <- function(tt) { if ("tt_data" %in% class(tt)) { @@ -27,21 +31,10 @@ readme <- function(tt) { if (length(attr(tt, ".readme")) > 0) { # if running in rstudio, print out that if (rstudioapi::isAvailable()) { - rstudioapi::viewer(url = tt_make_html(tt)) + tmpdir <- tempfile(fileext = ".html") + write_html(attr(tt, ".readme"), file = tmpdir) + rstudioapi::viewer(url = tmpdir) } } -} - -tt_make_html <- function(x) { - tmpHTML <- tempfile(fileext = ".html") - cat(c( - "", - "", - "", - "" - ), file = tmpHTML, sep = " ") - cat(attr(x, ".readme"), file = tmpHTML, append = TRUE) - cat("", file = tmpHTML, append = TRUE) - return(tmpHTML) + invisible(NULL) } diff --git a/man/base_64_to_char.Rd b/man/base_64_to_char.Rd new file mode 100644 index 0000000..b1f236f --- /dev/null +++ b/man/base_64_to_char.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{base_64_to_char} +\alias{base_64_to_char} +\title{read contents from github} +\usage{ +base_64_to_char(b64) +} +\description{ +provide tools to read and process data using the github api +} diff --git a/man/download_read.Rd b/man/download_read.Rd index 37b595c..34a9b8a 100644 --- a/man/download_read.Rd +++ b/man/download_read.Rd @@ -4,8 +4,7 @@ \alias{download_read} \title{utility to assist with 'reading' urls that cannot normally be read by file functions} \usage{ -download_read(url, func, ..., guess_max, mode = "w", - find_delim = FALSE) +download_read(url, func, ..., guess_max, mode = "w", find_delim = FALSE) } \arguments{ \item{url}{path to online file to be read} diff --git a/man/github_contents.Rd b/man/github_contents.Rd new file mode 100644 index 0000000..886a0bb --- /dev/null +++ b/man/github_contents.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{github_contents} +\alias{github_contents} +\title{Read Contents from GitHub} +\usage{ +github_contents(path, read_func = read.csv, ...) +} +\arguments{ +\item{path}{Relative path from within the TidyTuesday Repository} + +\item{read_func}{Function to parse the text. Defaults to \code{read.csv}} + +\item{...}{optional arguments to pass to \code{read_func}} +} +\value{ +result of read_func on the content +} +\description{ +Provide tool to read raw data and return as text the raw data using the github api +} diff --git a/man/github_html.Rd b/man/github_html.Rd new file mode 100644 index 0000000..3fce528 --- /dev/null +++ b/man/github_html.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{github_html} +\alias{github_html} +\title{Read Contents from GitHub as html} +\usage{ +github_html(path, ...) +} +\arguments{ +\item{path}{Relative path from within the TidyTuesday Repository to contents that can be returned as HTML} + +\item{...}{optional arguments to pass to \code{read_html}} +} +\value{ +result of read_html on the contents +} +\description{ +provide tools to read and process readme's as html using the github api +} +\examples{ +\dontrun{ + +main_readme <- github_html("README.md") +week_readme <- github_html("data/2020/2020-01-07/readme.md") + +} + +} diff --git a/man/github_page.Rd b/man/github_page.Rd new file mode 100644 index 0000000..b61026b --- /dev/null +++ b/man/github_page.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{github_page} +\alias{github_page} +\title{Create shell for HTML content from github} +\usage{ +github_page(page_content) +} +\arguments{ +\item{content}{html content} +} +\value{ +xml_document with github header +} +\description{ +Provide the necessary section to wrap around raw html content read from github. +} diff --git a/man/identify_delim.Rd b/man/identify_delim.Rd index 6cc626c..7d12936 100644 --- a/man/identify_delim.Rd +++ b/man/identify_delim.Rd @@ -4,8 +4,14 @@ \alias{identify_delim} \title{Identify potential delimeters of file} \usage{ -identify_delim(path, delims = c("\\t", ",", " ", "|", ";"), n = 10, - comment = "#", skip = 0, quote = "\\"") +identify_delim( + path, + delims = c("\\t", ",", " ", "|", ";"), + n = 10, + comment = "#", + skip = 0, + quote = "\\"" +) } \arguments{ \item{path}{path to file} diff --git a/test.html b/test.html new file mode 100644 index 0000000..be30c4c --- /dev/null +++ b/test.html @@ -0,0 +1,83 @@ +

https://xkcd.com/936/

+

XKCD Source for Comic

+

Passwords

+

This week's data is all about passwords. Data is sourced from Information is Beautiful, with the graphic coming from the same group here.

+

There's lots of additional information about password quality & strength in the source Doc. Please note that the "strength" column in this dataset is relative to these common aka "bad" passwords and YOU SHOULDN'T USE ANY OF THEM!

+

Wikipedia has a nice article on password strength as well.

+

Get the data here

+
# Get the Data
+
+passwords <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-14/passwords.csv')
+
+# Or read in with tidytuesdayR package (https://github.com/thebioengineer/tidytuesdayR)
+# PLEASE NOTE TO USE 2020 DATA YOU NEED TO UPDATE tidytuesdayR from GitHub
+
+# Either ISO-8601 date or year/week works!
+
+# Install via devtools::install_github("thebioengineer/tidytuesdayR")
+
+tuesdata <- tidytuesdayR::tt_load('2020-01-14') 
+tuesdata <- tidytuesdayR::tt_load(2020, week = 3)
+
+
+passwords <- tuesdata$passwords
+
+

Data Dictionary

+

passwords.csv

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
variableclassdescription
rankdoublepopularity in their database of released passwords
passwordcharacterActual text of the password
categorycharacterWhat category does the password fall in to?
valuedoubleTime to crack by online guessing
time_unitcharacterTime unit to match with value
offline_crack_secdoubleTime to crack offline in seconds
rank_altdoubleRank 2
strengthdoubleStrength = quality of password where 10 is highest, 1 is lowest, please note that these are relative to these generally bad passwords
font_sizedoubleUsed to create the graphic for KIB
+
\ No newline at end of file From 99143e3575de26229359f13b6e4bba87d46d4d11 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 17 May 2020 23:25:52 -0700 Subject: [PATCH 23/64] update github calls to add blob and authentication --- NAMESPACE | 8 +- R/github_api.R | 228 +++++++++++++++++++---- R/{get_tt_html.R => tt_compile.R} | 0 R/tt_download.R | 57 ++++++ R/tt_load.R | 13 +- R/tt_load_gh.R | 16 +- R/tt_make_url.R | 4 +- R/tt_master_file.R | 15 +- R/tt_read_data.R | 38 +--- man/GET_json.Rd | 11 ++ man/TT_MASTER_ENV.Rd | 16 ++ man/base_64_to_char.Rd | 4 +- man/get_tt_html.Rd | 14 -- man/github_GET.Rd | 21 +++ man/github_blob.Rd | 40 ++++ man/github_contents.Rd | 17 +- man/github_html.Rd | 15 +- man/github_pat.Rd | 17 ++ man/github_sha.Rd | 39 ++++ man/{tt_make_url.Rd => tt_check_date.Rd} | 6 +- man/tt_compile.Rd | 14 ++ man/tt_datasets.Rd | 3 +- man/tt_download.Rd | 28 +++ man/tt_load.Rd | 2 +- man/tt_load_gh.Rd | 2 +- man/tt_master_file.Rd | 15 ++ man/tt_read_data.Rd | 2 +- man/tt_update_master_file.Rd | 12 ++ test.html | 83 --------- 29 files changed, 531 insertions(+), 209 deletions(-) rename R/{get_tt_html.R => tt_compile.R} (100%) create mode 100644 R/tt_download.R create mode 100644 man/GET_json.Rd create mode 100644 man/TT_MASTER_ENV.Rd delete mode 100644 man/get_tt_html.Rd create mode 100644 man/github_GET.Rd create mode 100644 man/github_blob.Rd create mode 100644 man/github_pat.Rd create mode 100644 man/github_sha.Rd rename man/{tt_make_url.Rd => tt_check_date.Rd} (86%) create mode 100644 man/tt_compile.Rd create mode 100644 man/tt_download.Rd create mode 100644 man/tt_master_file.Rd create mode 100644 man/tt_update_master_file.Rd delete mode 100644 test.html diff --git a/NAMESPACE b/NAMESPACE index e0fe991..7fa36ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,13 +4,13 @@ S3method(print,tt) S3method(print,tt_data) S3method(print,tt_dataset_table) S3method(print,tt_dataset_table_list) +export("'") export(readme) export(tt_available) export(tt_datasets) +export(tt_download) export(tt_load) -export(tt_load_gh) export(tt_read_data) -import(dplyr) import(readxl) import(rvest) import(tools) @@ -18,7 +18,6 @@ import(xml2) importFrom(httr,GET) importFrom(httr,add_headers) importFrom(jsonlite,base64_dec) -importFrom(jsonlite,read_json) importFrom(lubridate,as_date) importFrom(lubridate,day) importFrom(lubridate,is.Date) @@ -26,7 +25,6 @@ importFrom(lubridate,month) importFrom(lubridate,year) importFrom(lubridate,ymd) importFrom(purrr,map) -importFrom(purrr,map_chr) importFrom(purrr,set_names) importFrom(purrr,walk) importFrom(readr,read_csv) @@ -34,8 +32,8 @@ importFrom(readr,read_delim) importFrom(rstudioapi,isAvailable) importFrom(rstudioapi,viewer) importFrom(rvest,html_table) -importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) importFrom(utils,download.file) importFrom(xml2,read_html) +importFrom(xml2,write_html) importFrom(xml2,xml_add_sibling) diff --git a/R/github_api.R b/R/github_api.R index 849fc4e..d8345a9 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -3,40 +3,62 @@ #' Provide tool to read raw data and return as text the raw data using the github api #' #' @param path Relative path from within the TidyTuesday Repository -#' @param read_func Function to parse the text. Defaults to \code{read.csv} -#' @param ... optional arguments to pass to \code{read_func} +#' @param auth github PAT. See PAT section for more information #' -#' @return result of read_func on the content +#' @section PAT: #' -#' @importFrom jsonlite read_json -github_contents <- - function(path, - read_func = read.csv, - ..., - record_ref = TRUE) { +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' +#' @return raw text of the content with the sha as an attribute +#' +#' @examples +#' \dontrun{ +#' text_csv <- github_contents("data/2020/2020-04-07/tdf_stages.csv") +#' tour_de_france_stages <- read_csv(text_csv) +#' +#' } +#' +github_contents <- function(path, auth = github_pat()) { base_url <- file.path("https://api.github.com/repos/rfordatascience/tidytuesday/contents", path) - url_json <- try(read_json(base_url), silent = TRUE) - if (!inherits(url_json, "try-error")) { - content <- read_func(base_64_to_char(url_json$content), ...) - if (record_ref) { - attr(content, ".sha") <- url_json$sha - } - content - } else{ + url_response <- github_GET(base_url, auth = auth, type= "application/json") + json_response <- GET_json(url_response) + + + if (url_response$status_code == 200) { + content <- base_64_to_char(json_response$content) + attr(content, ".sha") <- json_response$sha + return(content) + + } else if(url_response$status_code == 403 & url_json$errors$code == "too_large"){ + + github_blob(path) + + }else{ NULL } } #' Read Contents from GitHub as html #' -#' provide tools to read and process readme's as html using the github api +#' Provide tools to read and process readme's as html using the github api #' #' @param path Relative path from within the TidyTuesday Repository to contents that can be returned as HTML #' @param ... optional arguments to pass to \code{read_html} +#' @param auth github PAT. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. #' #' @return result of read_html on the contents #' @@ -48,23 +70,24 @@ github_contents <- #' #' } #' -#' @importFrom httr GET add_headers #' @importFrom xml2 read_html github_html <- function(path, - ...) { - base_url <- - file.path("https://api.github.com/repos/rfordatascience/tidytuesday/contents", - path) - url_response <- - GET(base_url, - add_headers(Accept = "application/vnd.github.v3.html")) - if (url_response$status_code == 200) { - github_page(read_html(x = url_response$content, ...)) - } else{ - NULL + ..., + auth = github_pat()) { + base_url <- + file.path("https://api.github.com/repos/rfordatascience/tidytuesday/contents", + path) + + url_response <- + github_GET(base_url, auth = auth, Accept = "application/vnd.github.v3.html") + + if (url_response$status_code == 200) { + github_page(read_html(x = url_response$content, ...)) + } else{ + NULL + } } -} #' Read Contents from GitHub as html @@ -73,8 +96,16 @@ github_html <- #' #' @param path Relative path from within the TidyTuesday Repository to contents that can be returned as HTML #' @param ... optional arguments to pass to \code{read_html} +#' @param auth github PAT. See PAT section for more information #' -#' @return result of read_html on the contents +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' +#' @return result data.frame of SHA and other information of directory contents #' #' @examples #' \dontrun{ @@ -84,35 +115,107 @@ github_html <- #' #' } #' -#' @importFrom httr GET add_headers #' @importFrom xml2 read_html github_sha <- - function(dirpath, branch = "master") { + function(dirpath, + branch = "master", + auth = github_pat()) { base_url <- file.path( "https://api.github.com/repos/rfordatascience/tidytuesday/git/trees", URLencode(paste(branch, dirpath, sep = ":")) ) - url_response <- GET(base_url) + + github_GET(base_url, auth = auth) + if (url_response$status_code == 200) { - do.call('rbind', lapply(jsonlite::parse_json(rawToChar( - url_response$content - ))$tree, data.frame, stringsAsFactors = FALSE)) + do.call( + 'rbind', + lapply( + jsonlite::parse_json(rawToChar(url_response$content))$tree, + data.frame, + stringsAsFactors = FALSE + ) + ) } else{ NULL } } +#' Read blob Contents from GitHub +#' +#' provide tools to read and process blob's using the github api +#' +#' @param path Relative path from within the TidyTuesday Repository to contents, +#' usually because it was too large to be read with the contencts api. +#' @param raw optional arguments to pass to \code{read_html} +#' @param auth github PAT. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' +#' @return a raw/character object based on the blob +#' +#' @examples +#' \dontrun{ +#' +#' main_readme <- github_html("README.md") +#' week_readme <- github_html("data/2020/2020-01-07/readme.md") +#' +#' } +#' +github_blob <- + function(path, as_raw = FALSE, auth = github_pat()){ + dir_sha <- github_sha(dirname(path)) + file_sha <- dir_sha$sha[dir_sha$path == basename(path)] + file_ext <- file_ext(path) + + base_url <- + file.path("https://api.github.com/repos/rfordatascience/tidytuesday/git/blobs", + file_sha) + + url_response <- + github_GET(base_url, auth = auth, Accept = "application/vnd.github.VERSION.raw") + + if (url_response$status_code == 200) { + if(as_raw == TRUE){ + content <- url_response$content + }else{ + content <- rawToChar(url_response$content) + } + attr(content, ".sha") <- file_sha + return(content) + + } else{ + NULL + } + + } + #' read json base64 contents from github #' -#' provide tools to read and process data using the github api +#' provide tool to read and process data using the github api #' #' @importFrom jsonlite base64_dec base_64_to_char <- function(b64){ - rawToChar(jsonlite::base64_dec(b64)) + rawToChar(base64_dec(b64)) } +#' read GET json contents to char +#' +#' provide tool to read and process data using the github api from GET command +#' +#' @importFrom jsonlite base64_dec +GET_json <- function(get_response){ + jsonlite::parse_json(rawToChar(get_response$content)) +} + + #' Create shell for HTML content from github #' #' Provide the necessary section to wrap around raw html content read from github. @@ -127,3 +230,48 @@ github_page <- function(page_content){ xml_add_sibling(header,page_content) } +#' Get the github PAT +#' +#' Extract the github PAT from the system environment for authenticated requests. +#' +#' @param quiet Should this be loud? default TRUE. +#' +#' @return PAT as a character. +github_pat <- function (quiet = TRUE) { + pat <- Sys.getenv("GITHUB_PAT") + if (nchar(pat)) { + if (!quiet) { + message("Using github PAT from envvar GITHUB_PAT") + } + return(pat) + } + NULL +} + +#' Get for github API +#' +#' Extract the github PAT from the system environment for authenticated requests. +#' +#' @param url URL to GET from +#' @param auth github PAT +#' @param ... any additional headers to add +#' +#' @return response from GET +#' +#' @importFrom httr GET add_headers +#' +github_GET <- function(url, auth = github_pat(), ...){ + + if(!is.null(auth)){ + headers <- add_headers( + ..., + Authorization = paste("token", auth) + ) + }else{ + headers <- add_headers(...) + } + + GET(url, headers) + +} + diff --git a/R/get_tt_html.R b/R/tt_compile.R similarity index 100% rename from R/get_tt_html.R rename to R/tt_compile.R diff --git a/R/tt_download.R b/R/tt_download.R new file mode 100644 index 0000000..dd14e7e --- /dev/null +++ b/R/tt_download.R @@ -0,0 +1,57 @@ +#' @title download tt data +#' +#' Download all or specific files identified in the tt dataset +#' +#' @param tt string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year +#' @param files List the file names to download. Default to asking. +#' @param ... pass methods to the parsing functions. These will be passed to ALL files, so be careful. +#' @return tt_data object (list class) +#' +#' @export +#' +#' @importFrom lubridate year +#' +#' @examples +#' tt_output <- tt_load("2019-01-15") + +tt_download <- function(tt, files = c("All", attr(tt, ".files")$data_files), ...){ + + data_info <- attr(tt, ".files") + tt_date <- attr(tt, ".date") + tt_year <- year(tt_date) + + + #define which files to download + files <- match.arg(files, several.ok = TRUE) + + if("All" %in% files){ + files <- data_info$data_files + } + + message("--- Starting Download ---") + + tt_data <- setNames( + vector("list", length = length(files)), + files) + + for(file in files){ + cat(sprintf('\rdownloading file %d of %d: `%s`', + which(files == file), + length(files), + file)) + + file_info <- data_info[ data_info$data_files == file, ] + + tt_data[[file]] <- tt_read_data( + file = file, + type = file_info$data_type, + delim = file_info$delim, + dir = file.path("data",tt_year,tt_date) + ) + } + message("--- Download complete ---") + + names(tt_data) <- tools::file_path_sans_ext(attr(tt, ".files")$data_files) + +} + diff --git a/R/tt_load.R b/R/tt_load.R index 933cb87..3d6fd0d 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -11,12 +11,15 @@ #' #' @examples #' tt_output <- tt_load("2019-01-15") -tt_load <- function(x, week, ...) { +tt_load <- function(x, week, download_files = "All", ...) { + + # download readme and identify files tt <- tt_load_gh(x, week) - message("--- Downloading files ---") - tt_data <- purrr::map(attr(tt, ".files"), function(x) tt_read_data(tt, x, ... )) - names(tt_data) <- tools::file_path_sans_ext(attr(tt, ".files")) - message("--- Download complete ---") + + #download files + tt_data <- tt_download_data(tt, files = download_files, ... ) + + ## return tt_data object structure( tt_data, ".tt" = tt, diff --git a/R/tt_load_gh.R b/R/tt_load_gh.R index 953e6d5..5056a41 100644 --- a/R/tt_load_gh.R +++ b/R/tt_load_gh.R @@ -32,13 +32,27 @@ tt_load_gh <- function(x, week) { # Find Files and extract readme tt_compilation <- tt_compile(tt_date) + n_files <- as.character(nrow(tt_compilation$files)) - message("--- There are ", nrow(tt_compilation$files), " file(s) available in this tidy tuesday ---") + are_is <- switch( n_files, + "0" = "are", + "1" = "is", + "are") + + file_s <- switch( n_files, + "0" = "files", + "1" = "file", + "files") + + n_files <- ifelse( n_files == 0, "no", n_files) + + message("--- There ",are_is," ", n_files, " ", file_s," available ---") structure( tt_compilation$files$data_files, ".files" = tt_compilation$files, ".readme" = tt_compilation$readme, + ".date" = tt_date, class = "tt" ) } diff --git a/R/tt_make_url.R b/R/tt_make_url.R index 5b2ef4c..aeccf0a 100644 --- a/R/tt_make_url.R +++ b/R/tt_make_url.R @@ -13,7 +13,7 @@ tt_check_date <- function(x, week) { } } -tt_make_url.date <- function(x) { +tt_check_date.date <- function(x) { tt_year <- lubridate::year(x) tt_formatted_date <- tt_date_format(x) tt_folders <- tt_weeks(tt_year) @@ -30,7 +30,7 @@ tt_make_url.date <- function(x) { tt_formatted_date } -tt_make_url.year <- function(x, week) { +tt_check_date.year <- function(x, week) { tt_folders <- tt_weeks(x) if (week > nrow(tt_folders[["week_desc"]])) { stop(paste0("Only ", length(tt_folders), " TidyTuesday Weeks exist in ", x, ". Please enter a value for week between 1 and ", length(tt_folders))) diff --git a/R/tt_master_file.R b/R/tt_master_file.R index 043a54a..91d24c0 100644 --- a/R/tt_master_file.R +++ b/R/tt_master_file.R @@ -9,16 +9,11 @@ tt_update_master_file <- function(force = FALSE){ sha <- sha_df$sha[sha_df$path == "tt_data_type.csv"] if( is.null(tt_master_file()) || sha != attr(tt_master_file(), ".sha") || force ){ - tt_master_file( - github_contents( - "static/tt_data_type.csv", - read_func = function(x, ...) { - read.csv(text = x, - header = TRUE, - stringsAsFactors = FALSE) - } - ) - ) + file_text <- github_contents("static/tt_data_type.csv") + content <- read.csv(text = file_text, header = TRUE, stringsAsFactors = FALSE) + attr(content,".sha") <- file_text + + tt_master_file(content) } } diff --git a/R/tt_read_data.R b/R/tt_read_data.R index 62d45c9..a751be4 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -18,43 +18,15 @@ #' tt_gh <- tt_load_gh("2019-01-15") #' #' tt_dataset_1 <- tt_read_data(tt_gh, tt_gh[1]) -tt_read_data <- function(tt, x, guess_max = 5000) { - suppressMessages({ - switch(class(x), - "character" = tt_read_data.character(tt, x, guess_max = guess_max ), - "numeric" = tt_read_data.numeric(tt, x, guess_max = guess_max ), - "integer" = tt_read_data.numeric(tt, x, guess_max = guess_max ), - stop(paste("No method for entry of class:", class(x))) - ) - }) -} +tt_read_data <- function(filename, type, delim, dir) { + -tt_read_data.character <- function(tt, x, guess_max = 5000) { - if (x %in% attr(tt, ".files")) { - url <- paste0(gsub("/tree/", "/blob/", file.path(attr(tt, ".url"), x)), "?raw=true") - tt_read_url(url, guess_max = guess_max) - } else { - stop(paste0( - "That is not an available file for this TidyTuesday week!\nAvailable Datasets:\n", - paste(attr(tt, ".files"), "\n\t", collapse = "") - )) - } -} -tt_read_data.numeric <- function(tt, x, guess_max = 5000) { - if (x > 0 & x <= length(attr(tt, ".files"))) { - url <- paste0(gsub("/tree/", "/blob/", file.path(attr(tt, ".url"), attr(tt, ".files")[x])), "?raw=true") - tt_read_url(url, guess_max = guess_max) - } else { - stop(paste0( - "That is not an available index for the files for this TidyTuesday week!\nAvailable Datasets:\n\t", - paste0(seq(1, length(attr(tt, ".files"))), ": ", attr(tt, ".files"), "\n\t", collapse = "") - )) - } -} -tt_read_url <- function(url, guess_max = 5000) { +} + +tt_read_func <- function(url, guess_max = 5000) { url <- gsub(" ", "%20", url) switch(tools::file_ext(gsub("[?]raw=true", "", tolower(url))), "xls" = download_read(url, readxl::read_xls, guess_max = guess_max, mode = "wb"), diff --git a/man/GET_json.Rd b/man/GET_json.Rd new file mode 100644 index 0000000..e4cb922 --- /dev/null +++ b/man/GET_json.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{GET_json} +\alias{GET_json} +\title{read GET json contents to char} +\usage{ +GET_json(get_response) +} +\description{ +provide tool to read and process data using the github api from GET command +} diff --git a/man/TT_MASTER_ENV.Rd b/man/TT_MASTER_ENV.Rd new file mode 100644 index 0000000..dbda3db --- /dev/null +++ b/man/TT_MASTER_ENV.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_master_file.R +\docType{data} +\name{TT_MASTER_ENV} +\alias{TT_MASTER_ENV} +\title{The Master List of Files from TidyTuesday} +\format{ +An object of class \code{environment} of length 1. +} +\usage{ +TT_MASTER_ENV +} +\description{ +The Master List of Files from TidyTuesday +} +\keyword{internal} diff --git a/man/base_64_to_char.Rd b/man/base_64_to_char.Rd index b1f236f..84cafc6 100644 --- a/man/base_64_to_char.Rd +++ b/man/base_64_to_char.Rd @@ -2,10 +2,10 @@ % Please edit documentation in R/github_api.R \name{base_64_to_char} \alias{base_64_to_char} -\title{read contents from github} +\title{read json base64 contents from github} \usage{ base_64_to_char(b64) } \description{ -provide tools to read and process data using the github api +provide tool to read and process data using the github api } diff --git a/man/get_tt_html.Rd b/man/get_tt_html.Rd deleted file mode 100644 index 6781feb..0000000 --- a/man/get_tt_html.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_tt_html.R -\name{get_tt_html} -\alias{get_tt_html} -\title{Get TidyTuesday URL and HTML} -\usage{ -get_tt_html(git_url) -} -\arguments{ -\item{git_url}{url to tidytuesday files} -} -\description{ -Get TidyTuesday URL and HTML -} diff --git a/man/github_GET.Rd b/man/github_GET.Rd new file mode 100644 index 0000000..c52463b --- /dev/null +++ b/man/github_GET.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{github_GET} +\alias{github_GET} +\title{Get for github API} +\usage{ +github_GET(url, auth = github_pat(), ...) +} +\arguments{ +\item{url}{URL to GET from} + +\item{auth}{github PAT} + +\item{...}{any additional headers to add} +} +\value{ +response from GET +} +\description{ +Extract the github PAT from the system environment for authenticated requests. +} diff --git a/man/github_blob.Rd b/man/github_blob.Rd new file mode 100644 index 0000000..c2a30c4 --- /dev/null +++ b/man/github_blob.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{github_blob} +\alias{github_blob} +\title{Read blob Contents from GitHub} +\usage{ +github_blob(path, as_raw = FALSE, auth = github_pat()) +} +\arguments{ +\item{path}{Relative path from within the TidyTuesday Repository to contents, +usually because it was too large to be read with the contencts api.} + +\item{auth}{github PAT. See PAT section for more information} + +\item{raw}{optional arguments to pass to \code{read_html}} +} +\value{ +a raw/character object based on the blob +} +\description{ +provide tools to read and process blob's using the github api +} +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + +\examples{ +\dontrun{ + +main_readme <- github_html("README.md") +week_readme <- github_html("data/2020/2020-01-07/readme.md") + +} + +} diff --git a/man/github_contents.Rd b/man/github_contents.Rd index 886a0bb..e077602 100644 --- a/man/github_contents.Rd +++ b/man/github_contents.Rd @@ -4,18 +4,25 @@ \alias{github_contents} \title{Read Contents from GitHub} \usage{ -github_contents(path, read_func = read.csv, ...) +github_contents(path, auth = github_pat()) } \arguments{ \item{path}{Relative path from within the TidyTuesday Repository} -\item{read_func}{Function to parse the text. Defaults to \code{read.csv}} - -\item{...}{optional arguments to pass to \code{read_func}} +\item{auth}{github PAT. See PAT section for more information} } \value{ -result of read_func on the content +raw text of the content with the sha as an attribute } \description{ Provide tool to read raw data and return as text the raw data using the github api } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + diff --git a/man/github_html.Rd b/man/github_html.Rd index 3fce528..c6eb032 100644 --- a/man/github_html.Rd +++ b/man/github_html.Rd @@ -4,19 +4,30 @@ \alias{github_html} \title{Read Contents from GitHub as html} \usage{ -github_html(path, ...) +github_html(path, ..., auth = github_pat()) } \arguments{ \item{path}{Relative path from within the TidyTuesday Repository to contents that can be returned as HTML} \item{...}{optional arguments to pass to \code{read_html}} + +\item{auth}{github PAT. See PAT section for more information} } \value{ result of read_html on the contents } \description{ -provide tools to read and process readme's as html using the github api +Provide tools to read and process readme's as html using the github api } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + \examples{ \dontrun{ diff --git a/man/github_pat.Rd b/man/github_pat.Rd new file mode 100644 index 0000000..321e5d4 --- /dev/null +++ b/man/github_pat.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{github_pat} +\alias{github_pat} +\title{Get the github PAT} +\usage{ +github_pat(quiet = TRUE) +} +\arguments{ +\item{quiet}{Should this be loud? default TRUE.} +} +\value{ +PAT as a character. +} +\description{ +Extract the github PAT from the system environment for authenticated requests. +} diff --git a/man/github_sha.Rd b/man/github_sha.Rd new file mode 100644 index 0000000..6206060 --- /dev/null +++ b/man/github_sha.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\name{github_sha} +\alias{github_sha} +\title{Read Contents from GitHub as html} +\usage{ +github_sha(dirpath, branch = "master", auth = github_pat()) +} +\arguments{ +\item{auth}{github PAT. See PAT section for more information} + +\item{path}{Relative path from within the TidyTuesday Repository to contents that can be returned as HTML} + +\item{...}{optional arguments to pass to \code{read_html}} +} +\value{ +result data.frame of SHA and other information of directory contents +} +\description{ +provide tools to read and process readme's as html using the github api +} +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + +\examples{ +\dontrun{ + +main_readme <- github_html("README.md") +week_readme <- github_html("data/2020/2020-01-07/readme.md") + +} + +} diff --git a/man/tt_make_url.Rd b/man/tt_check_date.Rd similarity index 86% rename from man/tt_make_url.Rd rename to man/tt_check_date.Rd index d5e1ab7..f092012 100644 --- a/man/tt_make_url.Rd +++ b/man/tt_check_date.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tt_make_url.R -\name{tt_make_url} -\alias{tt_make_url} +\name{tt_check_date} +\alias{tt_check_date} \title{given inputs generate valid TidyTuesday URL} \usage{ -tt_make_url(x, week) +tt_check_date(x, week) } \arguments{ \item{x}{either a string or numeric entry indicating the full date of} diff --git a/man/tt_compile.Rd b/man/tt_compile.Rd new file mode 100644 index 0000000..f429a98 --- /dev/null +++ b/man/tt_compile.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_compile.R +\name{tt_compile} +\alias{tt_compile} +\title{Get TidyTuesday Readme and list of files and HTML} +\usage{ +tt_compile(date) +} +\arguments{ +\item{date}{date of tidytuesday of interest} +} +\description{ +Get TidyTuesday Readme and list of files and HTML +} diff --git a/man/tt_datasets.Rd b/man/tt_datasets.Rd index 8fbb68d..640aeff 100644 --- a/man/tt_datasets.Rd +++ b/man/tt_datasets.Rd @@ -7,7 +7,8 @@ tt_datasets(year) } \arguments{ -\item{year}{numeric entry representing the year of tidytuesday you want the list of datasets for. Leave empty for most recent year.} +\item{year}{numeric entry representing the year of tidytuesday you want the list of datasets +for. Leave empty for most recent year.} } \description{ list available datasets for that year diff --git a/man/tt_download.Rd b/man/tt_download.Rd new file mode 100644 index 0000000..dbc8818 --- /dev/null +++ b/man/tt_download.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_download.R +\name{tt_download} +\alias{tt_download} +\title{download tt data + +Download all or specific files identified in the tt dataset} +\usage{ +tt_download(tt, files = c("All", attr(tt, ".files")$data_files), ...) +} +\arguments{ +\item{tt}{string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year} + +\item{files}{List the file names to download. Default to asking.} + +\item{...}{pass methods to the parsing functions. These will be passed to ALL files, so be careful.} +} +\value{ +tt_data object (list class) +} +\description{ +download tt data + +Download all or specific files identified in the tt dataset +} +\examples{ +tt_output <- tt_load("2019-01-15") +} diff --git a/man/tt_load.Rd b/man/tt_load.Rd index da79e61..2ebfbb3 100644 --- a/man/tt_load.Rd +++ b/man/tt_load.Rd @@ -4,7 +4,7 @@ \alias{tt_load} \title{Load TidyTuesday data from Github} \usage{ -tt_load(x, week, ...) +tt_load(x, week, download_files = "All", ...) } \arguments{ \item{x}{string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year} diff --git a/man/tt_load_gh.Rd b/man/tt_load_gh.Rd index 0bc1f24..b87c15c 100644 --- a/man/tt_load_gh.Rd +++ b/man/tt_load_gh.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_load.R +% Please edit documentation in R/tt_load_gh.R \name{tt_load_gh} \alias{tt_load_gh} \title{Load TidyTuesday data from Github} diff --git a/man/tt_master_file.Rd b/man/tt_master_file.Rd new file mode 100644 index 0000000..635ae36 --- /dev/null +++ b/man/tt_master_file.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_master_file.R +\name{tt_master_file} +\alias{tt_master_file} +\title{Get Master List of Files from Local Environment} +\usage{ +tt_master_file(assign = NULL) +} +\arguments{ +\item{assign}{value to overwrite the TT_MASTER_ENV$TT_MASTER_FILE contents with} +} +\description{ +return or update tt master file +} +\keyword{internal} diff --git a/man/tt_read_data.Rd b/man/tt_read_data.Rd index 9c73b5b..9c78ed1 100644 --- a/man/tt_read_data.Rd +++ b/man/tt_read_data.Rd @@ -4,7 +4,7 @@ \alias{tt_read_data} \title{Reads in TidyTuesday datasets from Github repo} \usage{ -tt_read_data(tt, x, guess_max = 5000) +tt_read_data(filename, type, delim, dir) } \arguments{ \item{tt}{tt_gh object from tt_load_gh function} diff --git a/man/tt_update_master_file.Rd b/man/tt_update_master_file.Rd new file mode 100644 index 0000000..16924f8 --- /dev/null +++ b/man/tt_update_master_file.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_master_file.R +\name{tt_update_master_file} +\alias{tt_update_master_file} +\title{Get Master List of Files from TidyTuesday} +\usage{ +tt_update_master_file(force = FALSE) +} +\description{ +Import or update dataset from github that records the entire list of objects from tidytuesday +} +\keyword{internal} diff --git a/test.html b/test.html deleted file mode 100644 index be30c4c..0000000 --- a/test.html +++ /dev/null @@ -1,83 +0,0 @@ -

https://xkcd.com/936/

-

XKCD Source for Comic

-

Passwords

-

This week's data is all about passwords. Data is sourced from Information is Beautiful, with the graphic coming from the same group here.

-

There's lots of additional information about password quality & strength in the source Doc. Please note that the "strength" column in this dataset is relative to these common aka "bad" passwords and YOU SHOULDN'T USE ANY OF THEM!

-

Wikipedia has a nice article on password strength as well.

-

Get the data here

-
# Get the Data
-
-passwords <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-14/passwords.csv')
-
-# Or read in with tidytuesdayR package (https://github.com/thebioengineer/tidytuesdayR)
-# PLEASE NOTE TO USE 2020 DATA YOU NEED TO UPDATE tidytuesdayR from GitHub
-
-# Either ISO-8601 date or year/week works!
-
-# Install via devtools::install_github("thebioengineer/tidytuesdayR")
-
-tuesdata <- tidytuesdayR::tt_load('2020-01-14') 
-tuesdata <- tidytuesdayR::tt_load(2020, week = 3)
-
-
-passwords <- tuesdata$passwords
-
-

Data Dictionary

-

passwords.csv

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
variableclassdescription
rankdoublepopularity in their database of released passwords
passwordcharacterActual text of the password
categorycharacterWhat category does the password fall in to?
valuedoubleTime to crack by online guessing
time_unitcharacterTime unit to match with value
offline_crack_secdoubleTime to crack offline in seconds
rank_altdoubleRank 2
strengthdoubleStrength = quality of password where 10 is highest, 1 is lowest, please note that these are relative to these generally bad passwords
font_sizedoubleUsed to create the graphic for KIB
-
\ No newline at end of file From c3b10cc682858e7f74afc1c49702e05c5f5876e2 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Mon, 18 May 2020 08:14:38 -0700 Subject: [PATCH 24/64] working on updating file downloading via github api --- NAMESPACE | 1 - R/github_api.R | 2 +- R/{tt_make_url.R => tt_check_date.R} | 4 +-- R/tt_compile.R | 17 +++++++--- R/tt_datasets.R | 28 +++++++++++++---- R/tt_load.R | 15 +++++++-- R/tt_load_gh.R | 14 +++++++-- R/tt_master_file.R | 16 ++++++++-- R/tt_read_data.R | 46 ++++++++++------------------ man/github_contents.Rd | 8 +++++ man/tt_check_date.Rd | 2 +- man/tt_compile.Rd | 13 +++++++- man/tt_datasets.Rd | 13 +++++++- man/tt_load.Rd | 13 +++++++- man/tt_load_gh.Rd | 13 +++++++- man/tt_update_master_file.Rd | 16 +++++++++- 16 files changed, 162 insertions(+), 59 deletions(-) rename R/{tt_make_url.R => tt_check_date.R} (97%) diff --git a/NAMESPACE b/NAMESPACE index 7fa36ab..fba5f30 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,7 +14,6 @@ export(tt_read_data) import(readxl) import(rvest) import(tools) -import(xml2) importFrom(httr,GET) importFrom(httr,add_headers) importFrom(jsonlite,base64_dec) diff --git a/R/github_api.R b/R/github_api.R index d8345a9..25b85a0 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -126,7 +126,7 @@ github_sha <- URLencode(paste(branch, dirpath, sep = ":")) ) - github_GET(base_url, auth = auth) + url_response <- github_GET(base_url, auth = auth) if (url_response$status_code == 200) { do.call( diff --git a/R/tt_make_url.R b/R/tt_check_date.R similarity index 97% rename from R/tt_make_url.R rename to R/tt_check_date.R index aeccf0a..0724f02 100644 --- a/R/tt_make_url.R +++ b/R/tt_check_date.R @@ -5,9 +5,9 @@ #' tt_check_date <- function(x, week) { if (valid_date(x)) { - tt_make_url.date(x) + tt_check_date.date(x) } else if (valid_year(x)) { - tt_make_url.year(x, week) + tt_check_date.year(x, week) } else { stop("Entries must render to a valid date or year") } diff --git a/R/tt_compile.R b/R/tt_compile.R index b22bf01..bd097e5 100644 --- a/R/tt_compile.R +++ b/R/tt_compile.R @@ -1,16 +1,23 @@ #' @title Get TidyTuesday Readme and list of files and HTML #' @param date date of tidytuesday of interest +#' @param auth github Personal Access Token. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' #' @importFrom lubridate year -tt_compile <- function(date) { +tt_compile <- function(date, auth = github_pat()) { ttmf <- tt_master_file() #list of files - files <- ttmf %>% - filter(week_date == date) %>% - select(data_files, data_type, delim) + files <- ttmf[ ttmf$week_date == date, c("data_files","data_type","delim")] - readme <- github_html(file.path("data",year(date),date,"readme.md")) + readme <- github_html(file.path("data",year(date),date,"readme.md"), auth = auth) list( files = files, diff --git a/R/tt_datasets.R b/R/tt_datasets.R index c69cd49..8158aca 100644 --- a/R/tt_datasets.R +++ b/R/tt_datasets.R @@ -25,15 +25,26 @@ tt_available <- function() { #' @description list available datasets for that year #' @param year numeric entry representing the year of tidytuesday you want the list of datasets #' for. Leave empty for most recent year. -#' @import xml2 -#' @import rvest +#' @param auth github Personal Access Token. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' +#' @importFrom rvest html_table #' @export #' -tt_datasets <- function(year) { - readme <- github_html(file.path("data",year,"readme.md")) +tt_datasets <- function(year, auth = github_pat()) { + + readme <- github_html(file.path("data",year,"readme.md"), auth = auth) + datasets <- readme %>% html_table() %>% `[`(1) + structure( datasets, .html = readme, @@ -52,7 +63,7 @@ print.tt_dataset_table <- function(x, ..., printConsole = FALSE) { tmpHTML <- tempfile(fileext = ".html") readme <- attr(x,".html") write_html(readme, file = tmpHTML) - rstudioapi::viewer(url = tmpHTML) + viewer(url = tmpHTML) } else { data.frame(x) } @@ -67,8 +78,11 @@ print.tt_dataset_table <- function(x, ..., printConsole = FALSE) { #' @importFrom rvest html_table #' @export print.tt_dataset_table_list <- function(x, ..., printConsole = FALSE) { - if (rstudioapi::isAvailable() & !printConsole) { + + if (isAvailable() & !printConsole) { + tmpHTML <- setup_doc() + cat("

TidyTuesday Datasets

", file = tmpHTML, append = TRUE) names(x) %>% purrr::map( @@ -97,7 +111,9 @@ print.tt_dataset_table_list <- function(x, ..., printConsole = FALSE) { cat("
", file = tmpHTML, append = TRUE) cat("", file = tmpHTML, append = TRUE) rstudioapi::viewer(url = tmpHTML) + } else { + names(x) %>% purrr::map( function(.x, x) { diff --git a/R/tt_load.R b/R/tt_load.R index 3d6fd0d..943b08f 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -3,6 +3,15 @@ #' @param x string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year #' @param week left empty unless x is a numeric year entry, in which case the week of interest should be entered #' @param ... pass methods to the parsing functions. These will be passed to ALL files, so be careful. +#' @param auth github Personal Access Token. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' #' @return tt_data object (list class) #' #' @export @@ -11,13 +20,13 @@ #' #' @examples #' tt_output <- tt_load("2019-01-15") -tt_load <- function(x, week, download_files = "All", ...) { +tt_load <- function(x, week, download_files = "All", ..., auth = github_pat()) { # download readme and identify files - tt <- tt_load_gh(x, week) + tt <- tt_load_gh(x, week, auth = auth) #download files - tt_data <- tt_download_data(tt, files = download_files, ... ) + tt_data <- tt_download(tt, files = download_files, ... , auth = auth) ## return tt_data object structure( diff --git a/R/tt_load_gh.R b/R/tt_load_gh.R index 5056a41..d15f7f1 100644 --- a/R/tt_load_gh.R +++ b/R/tt_load_gh.R @@ -4,6 +4,14 @@ #' #' @param x string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year #' @param week left empty unless x is a numeric year entry, in which case the week of interest should be entered +#' @param auth github Personal Access Token. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. #' #' @return tt_gh object. List object with the following entries: readme, files, url #' @export @@ -12,17 +20,17 @@ #' tt_gh <- tt_load_gh("2019-01-15") #' #' readme(tt_gh) -tt_load_gh <- function(x, week) { +tt_load_gh <- function(x, week, auth = github_pat()) { if (missing(x)) { on.exit({ - print(tt_available()) + print(tt_available(auth = auth)) }) stop("Enter either the year or date of the TidyTuesday Data to extract!") } #Update master file reference - tt_update_master_file() + tt_update_master_file(auth = auth) #Check Dates tt_date <- tt_check_date(x, week) diff --git a/R/tt_master_file.R b/R/tt_master_file.R index 91d24c0..f7783b6 100644 --- a/R/tt_master_file.R +++ b/R/tt_master_file.R @@ -2,16 +2,26 @@ #' #' Import or update dataset from github that records the entire list of objects from tidytuesday #' +#' @param force force the update to occur even if the SHA matches +#' @param auth github Personal Access Token. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' #' @keywords internal -tt_update_master_file <- function(force = FALSE){ +tt_update_master_file <- function(force = FALSE, auth = github_pat()){ # get sha to see if need to update sha_df <- github_sha("static") sha <- sha_df$sha[sha_df$path == "tt_data_type.csv"] if( is.null(tt_master_file()) || sha != attr(tt_master_file(), ".sha") || force ){ - file_text <- github_contents("static/tt_data_type.csv") + file_text <- github_contents("static/tt_data_type.csv", auth = auth) content <- read.csv(text = file_text, header = TRUE, stringsAsFactors = FALSE) - attr(content,".sha") <- file_text + attr(content,".sha") <- sha tt_master_file(content) } diff --git a/R/tt_read_data.R b/R/tt_read_data.R index a751be4..ed87b61 100644 --- a/R/tt_read_data.R +++ b/R/tt_read_data.R @@ -9,7 +9,7 @@ #' @export #' #' @importFrom readr read_csv read_delim -#' @import tools +#' @importFrom tools file_ext #' @import readxl #' #' @family tt_read_data @@ -18,24 +18,20 @@ #' tt_gh <- tt_load_gh("2019-01-15") #' #' tt_dataset_1 <- tt_read_data(tt_gh, tt_gh[1]) -tt_read_data <- function(filename, type, delim, dir) { - - +tt_read_data <- function(filename, type, delim, dir, ...) { + read_func <- switch(type, + "xls" = read_data(readxl::read_xls,..., raw = TRUE), + "xlsx" = read_data(readxl::read_xlsx,..., raw = TRUE), + "rds" = read_data(readRDS, raw = TRUE), + "rda" = read_data(read_rda, raw = TRUE), + read_data(readr::read_delim, progress = FALSE,...) + ) + read_func(file.path(dir, filename)) } -tt_read_func <- function(url, guess_max = 5000) { - url <- gsub(" ", "%20", url) - switch(tools::file_ext(gsub("[?]raw=true", "", tolower(url))), - "xls" = download_read(url, readxl::read_xls, guess_max = guess_max, mode = "wb"), - "xlsx" = download_read(url, readxl::read_xlsx, guess_max = guess_max, mode = "wb"), - "rds" = download_read(url, readRDS, mode = "wb"), - "rda" = download_read(url, read_rda, mode = "wb"), - download_read(url, readr::read_delim, guess_max = guess_max, progress = FALSE, find_delim = TRUE) - ) -} #' @title utility to assist with 'reading' urls that cannot normally be read by file functions #' @@ -47,26 +43,18 @@ tt_read_func <- function(url, guess_max = 5000) { #' @param find_delim should the delimeters be found for the file #' @importFrom utils download.file #' -download_read <- function(url, func, ..., guess_max, mode = "w", find_delim = FALSE) { - temp_file <- tempfile(fileext = paste0(".", tools::file_ext(gsub("[?]raw=true", "",url)))) - utils::download.file(url, temp_file, quiet = TRUE, mode = mode) +read_data <- function(func, ..., raw = FALSE) { + + + read_data_txt + + function(path){ + blob <- github_blob(path, as_raw = raw) - dots <- as.list(substitute(substitute(...)))[-1] - func_call <- c(substitute(func), substitute(temp_file), dots) - if (find_delim) { - if (!(!is.null(names(func_call)) & - "delim" %in% names(func_call)) & - "delim" %in% names(as.list(args(func)))) { - func_call$delim <- identify_delim(temp_file) - } - } - if("guess_max"%in%names(as.list(args(func)))){ - func_call$guess_max = guess_max } - return(eval(as.call(func_call))) } #' @title utility to load RDA with out using assigned name in envir diff --git a/man/github_contents.Rd b/man/github_contents.Rd index e077602..8a9db55 100644 --- a/man/github_contents.Rd +++ b/man/github_contents.Rd @@ -26,3 +26,11 @@ the github api, and increases the limit on the number of requests allowed from to set the PAT. } +\examples{ +\dontrun{ +text_csv <- github_contents("data/2020/2020-04-07/tdf_stages.csv") +tour_de_france_stages <- read_csv(text_csv) + +} + +} diff --git a/man/tt_check_date.Rd b/man/tt_check_date.Rd index f092012..48b7734 100644 --- a/man/tt_check_date.Rd +++ b/man/tt_check_date.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_make_url.R +% Please edit documentation in R/tt_check_date.R \name{tt_check_date} \alias{tt_check_date} \title{given inputs generate valid TidyTuesday URL} diff --git a/man/tt_compile.Rd b/man/tt_compile.Rd index f429a98..93567ca 100644 --- a/man/tt_compile.Rd +++ b/man/tt_compile.Rd @@ -4,11 +4,22 @@ \alias{tt_compile} \title{Get TidyTuesday Readme and list of files and HTML} \usage{ -tt_compile(date) +tt_compile(date, auth = github_pat()) } \arguments{ \item{date}{date of tidytuesday of interest} + +\item{auth}{github Personal Access Token. See PAT section for more information} } \description{ Get TidyTuesday Readme and list of files and HTML } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + diff --git a/man/tt_datasets.Rd b/man/tt_datasets.Rd index 640aeff..a43261f 100644 --- a/man/tt_datasets.Rd +++ b/man/tt_datasets.Rd @@ -4,12 +4,23 @@ \alias{tt_datasets} \title{Available datasets} \usage{ -tt_datasets(year) +tt_datasets(year, auth = github_pat()) } \arguments{ \item{year}{numeric entry representing the year of tidytuesday you want the list of datasets for. Leave empty for most recent year.} + +\item{auth}{github Personal Access Token. See PAT section for more information} } \description{ list available datasets for that year } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + diff --git a/man/tt_load.Rd b/man/tt_load.Rd index 2ebfbb3..2fa0860 100644 --- a/man/tt_load.Rd +++ b/man/tt_load.Rd @@ -4,7 +4,7 @@ \alias{tt_load} \title{Load TidyTuesday data from Github} \usage{ -tt_load(x, week, download_files = "All", ...) +tt_load(x, week, download_files = "All", ..., auth = github_pat()) } \arguments{ \item{x}{string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year} @@ -12,6 +12,8 @@ tt_load(x, week, download_files = "All", ...) \item{week}{left empty unless x is a numeric year entry, in which case the week of interest should be entered} \item{...}{pass methods to the parsing functions. These will be passed to ALL files, so be careful.} + +\item{auth}{github Personal Access Token. See PAT section for more information} } \value{ tt_data object (list class) @@ -19,6 +21,15 @@ tt_data object (list class) \description{ Load TidyTuesday data from Github } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + \examples{ tt_output <- tt_load("2019-01-15") } diff --git a/man/tt_load_gh.Rd b/man/tt_load_gh.Rd index b87c15c..1beb372 100644 --- a/man/tt_load_gh.Rd +++ b/man/tt_load_gh.Rd @@ -4,12 +4,14 @@ \alias{tt_load_gh} \title{Load TidyTuesday data from Github} \usage{ -tt_load_gh(x, week) +tt_load_gh(x, week, auth = github_pat()) } \arguments{ \item{x}{string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year} \item{week}{left empty unless x is a numeric year entry, in which case the week of interest should be entered} + +\item{auth}{github Personal Access Token. See PAT section for more information} } \value{ tt_gh object. List object with the following entries: readme, files, url @@ -17,6 +19,15 @@ tt_gh object. List object with the following entries: readme, files, url \description{ Pulls the Readme and URLs of the data from the TidyTuesday github folder based on the date provided } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + \examples{ tt_gh <- tt_load_gh("2019-01-15") diff --git a/man/tt_update_master_file.Rd b/man/tt_update_master_file.Rd index 16924f8..c62e5cf 100644 --- a/man/tt_update_master_file.Rd +++ b/man/tt_update_master_file.Rd @@ -4,9 +4,23 @@ \alias{tt_update_master_file} \title{Get Master List of Files from TidyTuesday} \usage{ -tt_update_master_file(force = FALSE) +tt_update_master_file(force = FALSE, auth = github_pat()) +} +\arguments{ +\item{force}{force the update to occur even if the SHA matches} + +\item{auth}{github Personal Access Token. See PAT section for more information} } \description{ Import or update dataset from github that records the entire list of objects from tidytuesday } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + \keyword{internal} From f7844a9cc89b82738aae1c27ee00074907b2c8dd Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 20 May 2020 22:51:12 -0700 Subject: [PATCH 25/64] updates to allow for setting ref repo in options --- R/github_api.R | 50 +++++++++++++++--------- R/identify_delim.R | 66 -------------------------------- R/tt_check_date.R | 45 +++++++++++++++------- R/tt_datasets.R | 28 ++++++++++---- R/tt_download.R | 46 +++++++++++++--------- R/tt_download_file.R | 91 ++++++++++++++++++++++++++++++++++++++++++++ R/tt_load.R | 4 +- R/tt_load_gh.R | 1 - R/tt_master_file.R | 9 ++++- R/tt_parse.R | 41 ++++++++++++++++++++ R/tt_read_data.R | 68 --------------------------------- R/utils.R | 2 +- R/zzz.R | 3 ++ 13 files changed, 259 insertions(+), 195 deletions(-) delete mode 100644 R/identify_delim.R create mode 100644 R/tt_download_file.R create mode 100644 R/tt_parse.R delete mode 100644 R/tt_read_data.R create mode 100644 R/zzz.R diff --git a/R/github_api.R b/R/github_api.R index 25b85a0..02636b3 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -23,7 +23,9 @@ #' github_contents <- function(path, auth = github_pat()) { base_url <- - file.path("https://api.github.com/repos/rfordatascience/tidytuesday/contents", + file.path("https://api.github.com/repos", + options("tidytuesdayR.tt_repo"), + "contents", path) @@ -36,9 +38,9 @@ github_contents <- function(path, auth = github_pat()) { attr(content, ".sha") <- json_response$sha return(content) - } else if(url_response$status_code == 403 & url_json$errors$code == "too_large"){ + } else if(url_response$status_code == 403 & json_response$errors[[1]]$code == "too_large"){ - github_blob(path) + github_blob(path, auth = auth) }else{ NULL @@ -76,7 +78,9 @@ github_html <- ..., auth = github_pat()) { base_url <- - file.path("https://api.github.com/repos/rfordatascience/tidytuesday/contents", + file.path("https://api.github.com/repos", + options("tidytuesdayR.tt_repo"), + "contents", path) url_response <- @@ -120,22 +124,27 @@ github_sha <- function(dirpath, branch = "master", auth = github_pat()) { + + if(dirpath == "."){ + dirpath <- "" + } + base_url <- file.path( - "https://api.github.com/repos/rfordatascience/tidytuesday/git/trees", + "https://api.github.com/repos", + options("tidytuesdayR.tt_repo"), + "git/trees", URLencode(paste(branch, dirpath, sep = ":")) ) url_response <- github_GET(base_url, auth = auth) + url_json <- GET_json(url_response) if (url_response$status_code == 200) { - do.call( - 'rbind', - lapply( - jsonlite::parse_json(rawToChar(url_response$content))$tree, - data.frame, - stringsAsFactors = FALSE - ) + do.call('rbind', + lapply(url_json$tree, + function(x) + data.frame(x[c("path", "sha")], stringsAsFactors = FALSE)) ) } else{ NULL @@ -172,10 +181,11 @@ github_blob <- function(path, as_raw = FALSE, auth = github_pat()){ dir_sha <- github_sha(dirname(path)) file_sha <- dir_sha$sha[dir_sha$path == basename(path)] - file_ext <- file_ext(path) base_url <- - file.path("https://api.github.com/repos/rfordatascience/tidytuesday/git/blobs", + file.path("https://api.github.com/repos", + options("tidytuesdayR.tt_repo"), + "git/blobs", file_sha) url_response <- @@ -224,10 +234,16 @@ GET_json <- function(get_response){ #' #' @return xml_document with github header #' -#' @importFrom xml2 read_html xml_add_sibling +#' @importFrom xml2 read_html xml_add_sibling html_nodes github_page <- function(page_content){ - header <- read_html("") - xml_add_sibling(header,page_content) + + header <- "" + body <- page_content %>% + html_nodes("body") %>% + as.character + + read_html(paste(header, body)) + } #' Get the github PAT diff --git a/R/identify_delim.R b/R/identify_delim.R deleted file mode 100644 index 785e858..0000000 --- a/R/identify_delim.R +++ /dev/null @@ -1,66 +0,0 @@ -#' @title Identify potential delimeters of file -#' -#' @param path path to file -#' @param delims a vector of delimeters to try -#' @param n number of rows to look at in the file to determine the delimters -#' @param comment identify lines that are comments if this character is at the beginning -#' @param skip number of lines to skip at the beginning -#' @param quote set of quoting characters -#' @importFrom utils download.file -#' - -identify_delim <- function(path, - delims = c("\t", ",", " ", "|", ";"), - n = 10, - comment = "#", - skip = 0, - quote = "\"") { - - # Attempt splitting on list of delimieters - num_splits <- list() - for (delim in delims) { - - test <- scan(path, - what = "character", - nlines = n, - allowEscapes = FALSE, - encoding = "UTF-8", - sep = delim, - quote = quote, - skip = skip, - comment.char = comment, - quiet = TRUE) - - num_splits[[delim]] <- length(test) - } - - if(all(unlist(num_splits) < n)){ - n <- as.numeric(names(sort(table(unlist(num_splits)),decreasing = TRUE)[1])) - } - - if (all(unlist(num_splits) == n)) { - warning("Not able to detect delimiter for the file. Defaulting to `\t`.") - return("\t") - } - - # which delims that produced consistent splits and greater than 1? - good_delims <- do.call("c", lapply(num_splits, function(cuts, nrows) { - (cuts %% nrows == 0) & cuts > nrows - }, n)) - - good_delims <- names(good_delims)[good_delims] - - if (length(good_delims) == 0) { - warning("Not able to detect delimiter for the file. Defaulting to ` `.") - return(" ") - } else if (length(good_delims) > 1) { - warning( - "Detected multiple possible delimeters:", - paste0("`", good_delims, "`", collapse = ", "), ". Defaulting to ", - paste0("`", good_delims[1], "`"), "." - ) - return(good_delims[1]) - } else { - return(good_delims) - } -} diff --git a/R/tt_check_date.R b/R/tt_check_date.R index 0724f02..7374ed5 100644 --- a/R/tt_check_date.R +++ b/R/tt_check_date.R @@ -31,20 +31,43 @@ tt_check_date.date <- function(x) { } tt_check_date.year <- function(x, week) { + tt_folders <- tt_weeks(x) - if (week > nrow(tt_folders[["week_desc"]])) { - stop(paste0("Only ", length(tt_folders), " TidyTuesday Weeks exist in ", x, ". Please enter a value for week between 1 and ", length(tt_folders))) + + if (week > length(tt_folders$week_desc)) { + stop( + paste0( + "Only ", + length(tt_folders$week_desc), + " TidyTuesday Weeks exist in ", + x, + ". Please enter a value for week between 1 and ", + length(tt_folders$week_desc) + ) + ) } else if (week < 1) { - stop(paste0("Week entry must be a valid positive integer between 1 and ", length(tt_folders$week_desc), ".")) + stop(paste0( + "Week entry must be a valid positive integer between 1 and ", + length(tt_folders$week_desc), + "." + )) } - tt_date <- tt_folders[["week_desc"]][week,"Date"] + tt_date <- tt_folders$folders[tt_folders$week_desc == week] if (!tt_date %in% tt_folders[["folders"]]) { - stop(paste0("Week ", week, " of TidyTuesday for ", x," does not have data available for download from github.")) + stop( + paste0( + "Week ", + week, + " of TidyTuesday for ", + x, + " does not have data available for download from github." + ) + ) } - tt_date + tt_date_format(tt_date) } @@ -61,16 +84,12 @@ tt_weeks <- function(year) { } ttmf <- tt_master_file() - tt_week <- unique(basename(ttmf$weeks[ttmf$year == year])) - weekNum <- tt_week %>% - as.Date(format = "%Y-%m-%d") %>% - `+`(3) %>% # move to accomodate - lubridate::week() + tt_week <- unique(ttmf[ttmf$year == year, c("Week","Date")]) list( - week_desc = weekNum, - folders = tt_week + week_desc = tt_week$Week, + folders = tt_week$Date ) } diff --git a/R/tt_datasets.R b/R/tt_datasets.R index 8158aca..0cea165 100644 --- a/R/tt_datasets.R +++ b/R/tt_datasets.R @@ -6,8 +6,6 @@ #' @export tt_available <- function() { - tt_update_master_file() - tt_year <- sort(tt_years(),decreasing = TRUE,) datasets <- setNames(vector("list", length(tt_year)), tt_year) @@ -16,6 +14,8 @@ tt_available <- function() { datasets[[as.character(year)]] <- tt_datasets(year) } + + structure(datasets, class = c("tt_dataset_table_list") ) @@ -35,19 +35,33 @@ tt_available <- function() { #' to set the PAT. #' #' @importFrom rvest html_table +#' @importFrom xml2 read_html #' @export -#' tt_datasets <- function(year, auth = github_pat()) { - readme <- github_html(file.path("data",year,"readme.md"), auth = auth) + files <- github_sha(file.path("data",year)) + + readme <- grep(pattern = "readme", files$path, value = TRUE, ignore.case = TRUE) + + readme_html <- github_html(file.path("data",year,readme), auth = auth) + + readme_html <- read_html( + gsub("\\n","", + gsub( + x = as.character(readme_html), + pattern = "", + replacement = "", + perl = TRUE + ) + )) - datasets <- readme %>% + datasets <- readme_html %>% html_table() %>% - `[`(1) + `[[`(1) structure( datasets, - .html = readme, + .html = readme_html, class = "tt_dataset_table" ) } diff --git a/R/tt_download.R b/R/tt_download.R index dd14e7e..f5fa877 100644 --- a/R/tt_download.R +++ b/R/tt_download.R @@ -5,6 +5,15 @@ #' @param tt string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year #' @param files List the file names to download. Default to asking. #' @param ... pass methods to the parsing functions. These will be passed to ALL files, so be careful. +#' @param auth github Personal Access Token. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' #' @return tt_data object (list class) #' #' @export @@ -14,44 +23,45 @@ #' @examples #' tt_output <- tt_load("2019-01-15") -tt_download <- function(tt, files = c("All", attr(tt, ".files")$data_files), ...){ +tt_download <- function(tt, files = c("All"), ..., auth = github_pat()){ - data_info <- attr(tt, ".files") - tt_date <- attr(tt, ".date") - tt_year <- year(tt_date) + data_files <- attr(tt, ".files")$data_files #define which files to download - files <- match.arg(files, several.ok = TRUE) + files <- match.arg(files, several.ok = TRUE, choices = c("All", data_files)) if("All" %in% files){ - files <- data_info$data_files + files <- data_files } message("--- Starting Download ---") + cat("\n") tt_data <- setNames( vector("list", length = length(files)), files) for(file in files){ - cat(sprintf('\rdownloading file %d of %d: `%s`', - which(files == file), - length(files), - file)) - - file_info <- data_info[ data_info$data_files == file, ] - - tt_data[[file]] <- tt_read_data( - file = file, - type = file_info$data_type, - delim = file_info$delim, - dir = file.path("data",tt_year,tt_date) + dl_message <- sprintf('\tDownloading file %d of %d: `%s`\n', + which(files == file), + length(files), + file) + + cat(dl_message) + + tt_data[[file]] <- tt_download_file( + tt, + file, + ..., + auth = auth ) } + message("--- Download complete ---") names(tt_data) <- tools::file_path_sans_ext(attr(tt, ".files")$data_files) + tt_data } diff --git a/R/tt_download_file.R b/R/tt_download_file.R new file mode 100644 index 0000000..7e6a782 --- /dev/null +++ b/R/tt_download_file.R @@ -0,0 +1,91 @@ +#' @title Reads in TidyTuesday datasets from Github repo +#' +#' @description Reads in the actual data from the TidyTuesday github +#' +#' @param tt tt_gh object from tt_load_gh function +#' @param x index/name of data object to read in. string or int +#' @param ... pass methods to the parsing functions. These will be passed to ALL files, so be careful. +#' @param auth github Personal Access Token. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' +#' @return tibble +#' @export +#' +#' @importFrom readr read_csv read_delim +#' @import tools +#' @import readxl +#' +#' @family tt_download_file +#' +#' @examples +#' tt_gh <- tt_load_gh("2019-01-15") +#' +#' agencies <- tt_download_file(tt_gh, 1) +#' launches <- tt_download_file(tt_gh, "launches.csv") +#' +tt_download_file <- function(tt, x, ..., auth = github_pat()) { + suppressMessages({ + switch(class(x), + "character" = tt_download_file.character(tt, x, guess_max = guess_max ), + "numeric" = tt_download_file.numeric(tt, x, guess_max = guess_max ), + "integer" = tt_download_file.numeric(tt, x, guess_max = guess_max ), + stop(paste("No method for entry of class:", class(x))) + ) + }) +} + +#' @importFrom lubridate year +#' @importFrom tools file_ext +tt_download_file.character <- function(tt, x, ..., auth = github_pat()) { + + file_info <- attr(tt, ".files") + + if (x %in% file_info$data_file) { + + tt_date <- attr(tt, ".date") + tt_year <- year(tt_date) + as_raw <- tools::file_ext(x) %in% c("xlsx","xls","rda","rds") + + blob <- github_blob(file.path("data",tt_year,tt_date,x), as_raw = as_raw, auth = auth) + + tt_parse_blob(blob, file_info = file_info[file_info$data_file == x,]) + + } else { + stop(paste0( + "That is not an available file for this TidyTuesday week!\nAvailable Datasets:\n", + paste(attr(tt, ".files"), "\n\t", collapse = "") + )) + } +} + +tt_download_file.numeric <- function(tt, x, ...) { + files <- attr(tt, ".files")$data_files + if (x > 0 & x <= length(files)) { + tt_download_file.character(tt, files[x], ...) + } else { + stop(paste0( + "That is not an available index for the files for this TidyTuesday week!\nAvailable Datasets:\n\t", + paste0(seq(1, length(files)), ": ", files, "\n\t", collapse = "") + )) + } +} + + + + + +#' @title utility to load RDA with out using assigned name in envir +#' +#' @param path path to RDA file +# +read_rda <- function(path){ + load_env<-new.env() + load(path,envir = load_env) + load_env[[ ls(load_env)[1] ]] +} diff --git a/R/tt_load.R b/R/tt_load.R index 943b08f..8549f35 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -14,12 +14,12 @@ #' #' @return tt_data object (list class) #' -#' @export -#' #' @importFrom purrr map #' #' @examples #' tt_output <- tt_load("2019-01-15") +#' +#' @export tt_load <- function(x, week, download_files = "All", ..., auth = github_pat()) { # download readme and identify files diff --git a/R/tt_load_gh.R b/R/tt_load_gh.R index d15f7f1..3383d9a 100644 --- a/R/tt_load_gh.R +++ b/R/tt_load_gh.R @@ -15,7 +15,6 @@ #' #' @return tt_gh object. List object with the following entries: readme, files, url #' @export -#'' #' @examples #' tt_gh <- tt_load_gh("2019-01-15") #' diff --git a/R/tt_master_file.R b/R/tt_master_file.R index f7783b6..33634d2 100644 --- a/R/tt_master_file.R +++ b/R/tt_master_file.R @@ -18,7 +18,7 @@ tt_update_master_file <- function(force = FALSE, auth = github_pat()){ sha_df <- github_sha("static") sha <- sha_df$sha[sha_df$path == "tt_data_type.csv"] - if( is.null(tt_master_file()) || sha != attr(tt_master_file(), ".sha") || force ){ + if( is.null(TT_MASTER_ENV$TT_MASTER_FILE) || sha != attr(TT_MASTER_ENV$TT_MASTER_FILE, ".sha") || force ){ file_text <- github_contents("static/tt_data_type.csv", auth = auth) content <- read.csv(text = file_text, header = TRUE, stringsAsFactors = FALSE) attr(content,".sha") <- sha @@ -38,7 +38,12 @@ tt_master_file <- function(assign = NULL){ if(!is.null(assign)){ TT_MASTER_ENV$TT_MASTER_FILE <- assign }else{ - TT_MASTER_ENV$TT_MASTER_FILE + ttmf <- TT_MASTER_ENV$TT_MASTER_FILE + if(is.null(ttmf)){ + tt_update_master_file() + ttmf <- TT_MASTER_ENV$TT_MASTER_FILE + } + return(ttmf) } } diff --git a/R/tt_parse.R b/R/tt_parse.R new file mode 100644 index 0000000..f6a0a12 --- /dev/null +++ b/R/tt_parse.R @@ -0,0 +1,41 @@ + +#' @title general utility to assist with parsing the raw data +#' +#' @param blob raw data to be parsed +#' @param func the function to perform parsing of the file +#' @param ... args to pass to func +#' @param fileinfo data.frame of information about the blob being read +tt_parse_blob <- function(blob, ..., file_info) { + switch( tolower(file_info$data_type), + "xls" = tt_parse_binary(blob, readxl::read_xls, ..., filename = file_info$data_files), + "xlsx" = tt_parse_binary(blob, readxl::read_xlsx, ..., filename = file_info$data_file), + "rds" = tt_parse_binary(blob, readRDS, filename = file_info$data_file), + "rda" = tt_parse_binary(blob, read_rda, filename = file_info$data_file), + tt_parse_text(blob, readr::read_delim, progress = FALSE, delim = file_info$delim, ...) + ) +} + + +#' @title utility to assist with parsing the raw binary data +#' +#' @param blob raw data to be parsed +#' @param func the function to perform parsing of the file +#' @param ... args to pass to func +#' @param filename the original name of the file +tt_parse_binary <- function(blob, func, ... , filename) { + temp_file <- file.path(tempdir(), filename) + writeBin(blob, temp_file) + on.exit(unlink(temp_file)) + func(temp_file, ...) +} + + +#' @title utility to assist with parsing the text data +#' +#' @param blob raw text data to be parsed +#' @param func the function to perform parsing of the file +#' @param ... args to pass to func +#' +tt_parse_text <- function(blob, func, ... ) { + func(blob, ...) +} diff --git a/R/tt_read_data.R b/R/tt_read_data.R deleted file mode 100644 index ed87b61..0000000 --- a/R/tt_read_data.R +++ /dev/null @@ -1,68 +0,0 @@ -#' @title Reads in TidyTuesday datasets from Github repo -#' -#' @description Reads in the actual data from the TidyTuesday github -#' -#' @param tt tt_gh object from tt_load_gh function -#' @param x index/name of data object to read in. string or int -#' @param guess_max number of rows to use to esimate col type, defaults to 5000. Only used for text files. -#' @return tibble -#' @export -#' -#' @importFrom readr read_csv read_delim -#' @importFrom tools file_ext -#' @import readxl -#' -#' @family tt_read_data -#' -#' @examples -#' tt_gh <- tt_load_gh("2019-01-15") -#' -#' tt_dataset_1 <- tt_read_data(tt_gh, tt_gh[1]) -tt_read_data <- function(filename, type, delim, dir, ...) { - - read_func <- switch(type, - "xls" = read_data(readxl::read_xls,..., raw = TRUE), - "xlsx" = read_data(readxl::read_xlsx,..., raw = TRUE), - "rds" = read_data(readRDS, raw = TRUE), - "rda" = read_data(read_rda, raw = TRUE), - read_data(readr::read_delim, progress = FALSE,...) - ) - - read_func(file.path(dir, filename)) - -} - - -#' @title utility to assist with 'reading' urls that cannot normally be read by file functions -#' -#' @param url path to online file to be read -#' @param func the function to perform reading of url -#' @param ... args to pass to func -#' @param guess_max number of rows to use to predict column type. Only used if is an arg in `func` -#' @param mode mode passed to \code{utils::download.file}. default is "w" -#' @param find_delim should the delimeters be found for the file -#' @importFrom utils download.file -#' -read_data <- function(func, ..., raw = FALSE) { - - - read_data_txt - - function(path){ - blob <- github_blob(path, as_raw = raw) - - - - } - -} - -#' @title utility to load RDA with out using assigned name in envir -#' -#' @param path path to RDA file -# -read_rda <- function(path){ - load_env<-new.env() - load(path,envir = load_env) - load_env[[ ls(load_env)[1] ]] -} diff --git a/R/utils.R b/R/utils.R index 3a6bf2c..29e1856 100644 --- a/R/utils.R +++ b/R/utils.R @@ -14,7 +14,7 @@ print.tt_data <- function(x, ...) { #' @importFrom tools file_path_sans_ext #' @export print.tt <- function(x,...){ - message("Available datasets in this TidyTuesday:\n\t", paste(attr(x,".files")[[1]], "\n\t", collapse = "")) + message("Available datasets in this TidyTuesday:\n\t", paste(attr(x,".files")$data_files, "\n\t", collapse = "")) invisible(x) } diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..9e5d43a --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,3 @@ +.onLoad <- function(libname, pkgname) { + options("tidytuesdayR.tt_repo" = "rfordatascience/tidytuesday") +} From 0000c0436fe66fb7029fc22d0fb58c81b962ea1b Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 20 May 2020 22:53:10 -0700 Subject: [PATCH 26/64] add documentation --- man/download_read.Rd | 24 ------------------------ man/identify_delim.Rd | 31 ------------------------------- man/read_rda.Rd | 2 +- man/tt_download.Rd | 13 ++++++++++++- man/tt_download_file.Rd | 40 ++++++++++++++++++++++++++++++++++++++++ man/tt_load.Rd | 1 + man/tt_parse_binary.Rd | 20 ++++++++++++++++++++ man/tt_parse_blob.Rd | 20 ++++++++++++++++++++ man/tt_parse_text.Rd | 18 ++++++++++++++++++ man/tt_read_data.Rd | 27 --------------------------- 10 files changed, 112 insertions(+), 84 deletions(-) delete mode 100644 man/download_read.Rd delete mode 100644 man/identify_delim.Rd create mode 100644 man/tt_download_file.Rd create mode 100644 man/tt_parse_binary.Rd create mode 100644 man/tt_parse_blob.Rd create mode 100644 man/tt_parse_text.Rd delete mode 100644 man/tt_read_data.Rd diff --git a/man/download_read.Rd b/man/download_read.Rd deleted file mode 100644 index 34a9b8a..0000000 --- a/man/download_read.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_read_data.R -\name{download_read} -\alias{download_read} -\title{utility to assist with 'reading' urls that cannot normally be read by file functions} -\usage{ -download_read(url, func, ..., guess_max, mode = "w", find_delim = FALSE) -} -\arguments{ -\item{url}{path to online file to be read} - -\item{func}{the function to perform reading of url} - -\item{...}{args to pass to func} - -\item{guess_max}{number of rows to use to predict column type. Only used if is an arg in `func`} - -\item{mode}{mode passed to \code{utils::download.file}. default is "w"} - -\item{find_delim}{should the delimeters be found for the file} -} -\description{ -utility to assist with 'reading' urls that cannot normally be read by file functions -} diff --git a/man/identify_delim.Rd b/man/identify_delim.Rd deleted file mode 100644 index 7d12936..0000000 --- a/man/identify_delim.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/identify_delim.R -\name{identify_delim} -\alias{identify_delim} -\title{Identify potential delimeters of file} -\usage{ -identify_delim( - path, - delims = c("\\t", ",", " ", "|", ";"), - n = 10, - comment = "#", - skip = 0, - quote = "\\"" -) -} -\arguments{ -\item{path}{path to file} - -\item{delims}{a vector of delimeters to try} - -\item{n}{number of rows to look at in the file to determine the delimters} - -\item{comment}{identify lines that are comments if this character is at the beginning} - -\item{skip}{number of lines to skip at the beginning} - -\item{quote}{set of quoting characters} -} -\description{ -Identify potential delimeters of file -} diff --git a/man/read_rda.Rd b/man/read_rda.Rd index 28559bf..59307dd 100644 --- a/man/read_rda.Rd +++ b/man/read_rda.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_read_data.R +% Please edit documentation in R/tt_download_file.R \name{read_rda} \alias{read_rda} \title{utility to load RDA with out using assigned name in envir} diff --git a/man/tt_download.Rd b/man/tt_download.Rd index dbc8818..582c6d9 100644 --- a/man/tt_download.Rd +++ b/man/tt_download.Rd @@ -6,7 +6,7 @@ Download all or specific files identified in the tt dataset} \usage{ -tt_download(tt, files = c("All", attr(tt, ".files")$data_files), ...) +tt_download(tt, files = c("All"), ..., auth = github_pat()) } \arguments{ \item{tt}{string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year} @@ -14,6 +14,8 @@ tt_download(tt, files = c("All", attr(tt, ".files")$data_files), ...) \item{files}{List the file names to download. Default to asking.} \item{...}{pass methods to the parsing functions. These will be passed to ALL files, so be careful.} + +\item{auth}{github Personal Access Token. See PAT section for more information} } \value{ tt_data object (list class) @@ -23,6 +25,15 @@ download tt data Download all or specific files identified in the tt dataset } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + \examples{ tt_output <- tt_load("2019-01-15") } diff --git a/man/tt_download_file.Rd b/man/tt_download_file.Rd new file mode 100644 index 0000000..2ed37bd --- /dev/null +++ b/man/tt_download_file.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_download_file.R +\name{tt_download_file} +\alias{tt_download_file} +\title{Reads in TidyTuesday datasets from Github repo} +\usage{ +tt_download_file(tt, x, ..., auth = github_pat()) +} +\arguments{ +\item{tt}{tt_gh object from tt_load_gh function} + +\item{x}{index/name of data object to read in. string or int} + +\item{...}{pass methods to the parsing functions. These will be passed to ALL files, so be careful.} + +\item{auth}{github Personal Access Token. See PAT section for more information} +} +\value{ +tibble +} +\description{ +Reads in the actual data from the TidyTuesday github +} +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + +\examples{ +tt_gh <- tt_load_gh("2019-01-15") + +agencies <- tt_download_file(tt_gh, 1) +launches <- tt_download_file(tt_gh, "launches.csv") + +} +\concept{tt_download_file} diff --git a/man/tt_load.Rd b/man/tt_load.Rd index 2fa0860..7b82a3e 100644 --- a/man/tt_load.Rd +++ b/man/tt_load.Rd @@ -32,4 +32,5 @@ to set the PAT. \examples{ tt_output <- tt_load("2019-01-15") + } diff --git a/man/tt_parse_binary.Rd b/man/tt_parse_binary.Rd new file mode 100644 index 0000000..6faef8d --- /dev/null +++ b/man/tt_parse_binary.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_parse.R +\name{tt_parse_binary} +\alias{tt_parse_binary} +\title{utility to assist with parsing the raw binary data} +\usage{ +tt_parse_binary(blob, func, ..., filename) +} +\arguments{ +\item{blob}{raw data to be parsed} + +\item{func}{the function to perform parsing of the file} + +\item{...}{args to pass to func} + +\item{filename}{the original name of the file} +} +\description{ +utility to assist with parsing the raw binary data +} diff --git a/man/tt_parse_blob.Rd b/man/tt_parse_blob.Rd new file mode 100644 index 0000000..5a724e3 --- /dev/null +++ b/man/tt_parse_blob.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_parse.R +\name{tt_parse_blob} +\alias{tt_parse_blob} +\title{general utility to assist with parsing the raw data} +\usage{ +tt_parse_blob(blob, ..., file_info) +} +\arguments{ +\item{blob}{raw data to be parsed} + +\item{...}{args to pass to func} + +\item{func}{the function to perform parsing of the file} + +\item{fileinfo}{data.frame of information about the blob being read} +} +\description{ +general utility to assist with parsing the raw data +} diff --git a/man/tt_parse_text.Rd b/man/tt_parse_text.Rd new file mode 100644 index 0000000..0f9f81a --- /dev/null +++ b/man/tt_parse_text.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_parse.R +\name{tt_parse_text} +\alias{tt_parse_text} +\title{utility to assist with parsing the text data} +\usage{ +tt_parse_text(blob, func, ...) +} +\arguments{ +\item{blob}{raw text data to be parsed} + +\item{func}{the function to perform parsing of the file} + +\item{...}{args to pass to func} +} +\description{ +utility to assist with parsing the text data +} diff --git a/man/tt_read_data.Rd b/man/tt_read_data.Rd deleted file mode 100644 index 9c78ed1..0000000 --- a/man/tt_read_data.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_read_data.R -\name{tt_read_data} -\alias{tt_read_data} -\title{Reads in TidyTuesday datasets from Github repo} -\usage{ -tt_read_data(filename, type, delim, dir) -} -\arguments{ -\item{tt}{tt_gh object from tt_load_gh function} - -\item{x}{index/name of data object to read in. string or int} - -\item{guess_max}{number of rows to use to esimate col type, defaults to 5000. Only used for text files.} -} -\value{ -tibble -} -\description{ -Reads in the actual data from the TidyTuesday github -} -\examples{ -tt_gh <- tt_load_gh("2019-01-15") - -tt_dataset_1 <- tt_read_data(tt_gh, tt_gh[1]) -} -\concept{tt_read_data} From f1b443e693386ca2ee07c70c35b5588e685d01fa Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 20 May 2020 23:13:06 -0700 Subject: [PATCH 27/64] updates to printing tt_available --- NAMESPACE | 7 ++-- R/{tt_datasets.R => tt_available.R} | 61 +++++++++++------------------ 2 files changed, 26 insertions(+), 42 deletions(-) rename R/{tt_datasets.R => tt_available.R} (69%) diff --git a/NAMESPACE b/NAMESPACE index fba5f30..2a1f9c6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,13 +4,13 @@ S3method(print,tt) S3method(print,tt_data) S3method(print,tt_dataset_table) S3method(print,tt_dataset_table_list) -export("'") export(readme) export(tt_available) export(tt_datasets) export(tt_download) +export(tt_download_file) export(tt_load) -export(tt_read_data) +export(tt_load_gh) import(readxl) import(rvest) import(tools) @@ -31,8 +31,9 @@ importFrom(readr,read_delim) importFrom(rstudioapi,isAvailable) importFrom(rstudioapi,viewer) importFrom(rvest,html_table) +importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) -importFrom(utils,download.file) +importFrom(xml2,html_nodes) importFrom(xml2,read_html) importFrom(xml2,write_html) importFrom(xml2,xml_add_sibling) diff --git a/R/tt_datasets.R b/R/tt_available.R similarity index 69% rename from R/tt_datasets.R rename to R/tt_available.R index 0cea165..b6a3328 100644 --- a/R/tt_datasets.R +++ b/R/tt_available.R @@ -14,8 +14,6 @@ tt_available <- function() { datasets[[as.character(year)]] <- tt_datasets(year) } - - structure(datasets, class = c("tt_dataset_table_list") ) @@ -89,42 +87,36 @@ print.tt_dataset_table <- function(x, ..., printConsole = FALSE) { #' @param printConsole should output go to the console? TRUE/FALSE #' @importFrom purrr walk map #' @importFrom rstudioapi isAvailable viewer -#' @importFrom rvest html_table +#' @importFrom rvest html_node +#' @importFrom xml2 read_html write_html #' @export print.tt_dataset_table_list <- function(x, ..., printConsole = FALSE) { if (isAvailable() & !printConsole) { - tmpHTML <- setup_doc() - - cat("

TidyTuesday Datasets

", file = tmpHTML, append = TRUE) - names(x) %>% - purrr::map( + readme <- names(x) %>% + purrr::map_chr( function(.x, x) { - list(html = as.character(attr(x[[.x]], ".html")), year = .x) + year_table <- attr(x[[.x]],".html") %>% + html_node("table") + paste("

",.x,"

", + as.character(year_table), + "") }, x = x ) %>% - purrr::walk( - ~ cat( - paste0( - "

", - .x$year, - "

\n", - gsub( - "href=\"/rfordatascience/tidytuesday/", - "href=\"https://github.com/rfordatascience/tidytuesday/", - .x$html - ) - ), - file = tmpHTML, - append = TRUE - ) - ) + paste(collapse = "") %>% + paste("

TidyTuesday Datasets

",.) %>% + paste("
",.,"
") %>% + read_html() %>% + github_page() + + tmp_html <- tempfile(fileext = ".html") + write_html(readme, file = tmp_html) + on.exit(unlink(tmp_html)) + + rstudioapi::viewer(url = tmp_html) - cat("", file = tmpHTML, append = TRUE) - cat("", file = tmpHTML, append = TRUE) - rstudioapi::viewer(url = tmpHTML) } else { @@ -132,26 +124,17 @@ print.tt_dataset_table_list <- function(x, ..., printConsole = FALSE) { purrr::map( function(.x, x) { list( - table = rvest::html_table(attr(x[[.x]], ".html")), year = .x + table = data.frame(unclass(x[[.x]])), year = .x ) }, x = x ) %>% purrr::walk( function(.x) { - cat(paste0("Year: ", .x$year, "\n")) + cat(paste0("Year: ", .x$year, "\n\n")) print(.x$table) cat("\n\n") } ) } } - -setup_doc <- function(tmpHTML = tempfile(fileext = ".html")) { - cat(" - - - ", file = tmpHTML) - cat("
", file = tmpHTML, append = TRUE) - return(tmpHTML) -} From 488c2c2def850907a9633b8c77d5e5004bee9537 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Sat, 23 May 2020 16:40:18 -0700 Subject: [PATCH 28/64] updates to tests & docs to pass devtools::check --- .Rbuildignore | 2 + .github/.gitignore | 1 + .github/workflows/R-CMD-check.yaml | 24 +++ DESCRIPTION | 16 +- NAMESPACE | 14 +- R/github_api.R | 52 +++--- R/tt_available.R | 104 ++++++----- R/tt_check_date.R | 12 +- R/tt_compile.R | 2 +- R/tt_download.R | 21 ++- R/tt_download_file.R | 14 +- R/tt_load.R | 4 +- R/tt_load_gh.R | 7 +- R/tt_master_file.R | 1 + R/tt_parse.R | 16 +- R/utils-pipe.R | 11 ++ R/utils.R | 18 +- docs/404.html | 41 +++-- docs/CODE_OF_CONDUCT.html | 45 +++-- docs/LICENSE-text.html | 45 +++-- docs/LICENSE.html | 45 +++-- docs/authors.html | 39 +++-- docs/index.html | 74 ++++---- docs/news/index.html | 54 +++--- docs/pkgdown.css | 161 ++++++++++++++++-- docs/pkgdown.js | 15 +- docs/pkgdown.yml | 3 +- docs/readme.html | 84 +++++---- docs/reference/index.html | 141 +++++++++++---- docs/reference/print.tt.html | 51 +++--- docs/reference/print.tt_data.html | 51 +++--- docs/reference/print.tt_dataset_table.html | 57 ++++--- .../print.tt_dataset_table_list.html | 57 ++++--- docs/reference/read_rda.html | 51 +++--- docs/reference/readme.html | 49 +++--- docs/reference/tt_available.html | 69 +++++--- docs/reference/tt_datasets.html | 69 +++++--- docs/reference/tt_load.html | 74 +++++--- docs/reference/tt_load_gh.html | 71 +++++--- man/GET_json.Rd | 3 + man/base_64_to_char.Rd | 3 + man/github_blob.Rd | 11 +- man/github_page.Rd | 2 +- man/github_sha.Rd | 9 +- man/pipe.Rd | 12 ++ man/print.tt_dataset_table.Rd | 6 +- man/print.tt_dataset_table_list.Rd | 6 +- man/tt_available.Rd | 16 +- man/tt_datasets.Rd | 2 +- man/tt_download.Rd | 9 +- man/tt_download_file.Rd | 2 + man/tt_load.Rd | 5 +- man/tt_load_gh.Rd | 5 +- man/tt_parse_blob.Rd | 4 +- man/tt_parse_text.Rd | 6 +- readme.md | 1 + tests/testthat/helper-tt_ref_test_that.R | 11 ++ tests/testthat/test-00-github_api.R | 68 ++++++++ tests/testthat/test-01-tt_master_file.R | 40 +++++ tests/testthat/test-02-tt_available.R | 86 ++++++++++ tests/testthat/test-03-tt_check_date.R | 30 ++++ tests/testthat/test-04-tt_compile.R | 47 +++++ tests/testthat/test-05-tt_load_gh.R | 109 ++++++++++++ tests/testthat/test-06-tt_parse_blob.R | 132 ++++++++++++++ tests/testthat/test-07-tt_read_data.R | 69 ++++++++ tests/testthat/test-08-tt_load.R | 46 +++++ .../{test-utils.R => test-10-utils.R} | 24 +-- tests/testthat/test-get_tt_html.R | 10 -- tests/testthat/test-identify_delim.R | 48 ------ tests/testthat/test-make_url.R | 24 --- tests/testthat/test-tt_available.R | 19 --- tests/testthat/test-tt_load_gh.R | 131 -------------- tests/testthat/test-tt_read_data.R | 74 -------- 73 files changed, 1834 insertions(+), 901 deletions(-) create mode 100644 .github/.gitignore create mode 100644 .github/workflows/R-CMD-check.yaml create mode 100644 R/utils-pipe.R create mode 100644 man/pipe.Rd create mode 100644 tests/testthat/helper-tt_ref_test_that.R create mode 100644 tests/testthat/test-00-github_api.R create mode 100644 tests/testthat/test-01-tt_master_file.R create mode 100644 tests/testthat/test-02-tt_available.R create mode 100644 tests/testthat/test-03-tt_check_date.R create mode 100644 tests/testthat/test-04-tt_compile.R create mode 100644 tests/testthat/test-05-tt_load_gh.R create mode 100644 tests/testthat/test-06-tt_parse_blob.R create mode 100644 tests/testthat/test-07-tt_read_data.R create mode 100644 tests/testthat/test-08-tt_load.R rename tests/testthat/{test-utils.R => test-10-utils.R} (52%) delete mode 100644 tests/testthat/test-get_tt_html.R delete mode 100644 tests/testthat/test-identify_delim.R delete mode 100644 tests/testthat/test-make_url.R delete mode 100644 tests/testthat/test-tt_available.R delete mode 100644 tests/testthat/test-tt_load_gh.R delete mode 100644 tests/testthat/test-tt_read_data.R diff --git a/.Rbuildignore b/.Rbuildignore index 037d047..f6c559b 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,5 @@ ^.*\.Rproj$ ^\.Rproj\.user$ ^readme\.md$ +^pkgdown$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..24a3ba4 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,24 @@ +on: + push: + branches: + - master + pull_request: + branches: + - master + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: macOS-latest + steps: + - uses: actions/checkout@v2 + - uses: r-lib/actions/setup-r@master + - name: Install dependencies + run: | + install.packages(c("remotes", "rcmdcheck")) + remotes::install_deps(dependencies = TRUE) + shell: Rscript {0} + - name: Check + run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error") + shell: Rscript {0} diff --git a/DESCRIPTION b/DESCRIPTION index aceff87..e5f88f6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,14 +44,18 @@ Depends: R (>= 3.4.0) Suggests: testthat (>= 2.1.0), covr, - pkgdown + pkgdown, + tibble, Imports: dplyr (>= 0.8.0), readxl (>= 1.0.0), - rvest (>= 0.3.2), - tools (>= 3.1.0), + rvest (>= 0.3.2), + tools (>= 3.1.0), lubridate (>= 1.7.0), purrr (>= 0.2.5), - readr (>= 1.0.0), - rstudioapi (>= 0.2), - xml2 (>= 1.2.0) + readr (>= 1.0.0), + rstudioapi (>= 0.2), + xml2 (>= 1.2.0), + httr, + jsonlite, + magrittr diff --git a/NAMESPACE b/NAMESPACE index 2a1f9c6..b0f05be 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(print,tt) S3method(print,tt_data) S3method(print,tt_dataset_table) S3method(print,tt_dataset_table_list) +export("%>%") export(readme) export(tt_available) export(tt_datasets) @@ -12,7 +13,6 @@ export(tt_download_file) export(tt_load) export(tt_load_gh) import(readxl) -import(rvest) import(tools) importFrom(httr,GET) importFrom(httr,add_headers) @@ -23,17 +23,23 @@ importFrom(lubridate,is.Date) importFrom(lubridate,month) importFrom(lubridate,year) importFrom(lubridate,ymd) +importFrom(magrittr,"%>%") importFrom(purrr,map) -importFrom(purrr,set_names) importFrom(purrr,walk) importFrom(readr,read_csv) importFrom(readr,read_delim) importFrom(rstudioapi,isAvailable) importFrom(rstudioapi,viewer) +importFrom(rvest,html_node) +importFrom(rvest,html_nodes) importFrom(rvest,html_table) +importFrom(stats,aggregate) +importFrom(stats,na.pass) +importFrom(stats,setNames) importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) -importFrom(xml2,html_nodes) +importFrom(utils,URLencode) +importFrom(utils,browseURL) +importFrom(utils,read.csv) importFrom(xml2,read_html) importFrom(xml2,write_html) -importFrom(xml2,xml_add_sibling) diff --git a/R/github_api.R b/R/github_api.R index 02636b3..3cf0cf2 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -30,18 +30,18 @@ github_contents <- function(path, auth = github_pat()) { url_response <- github_GET(base_url, auth = auth, type= "application/json") - json_response <- GET_json(url_response) - if (url_response$status_code == 200) { + json_response <- GET_json(url_response) content <- base_64_to_char(json_response$content) attr(content, ".sha") <- json_response$sha return(content) - } else if(url_response$status_code == 403 & json_response$errors[[1]]$code == "too_large"){ - - github_blob(path, auth = auth) - + } else if(url_response$status_code == 403){ + json_response <- GET_json(url_response) + if( json_response$errors[[1]]$code == "too_large"){ + github_blob(path, auth = auth) + } }else{ NULL } @@ -98,8 +98,8 @@ github_html <- #' #' provide tools to read and process readme's as html using the github api #' -#' @param path Relative path from within the TidyTuesday Repository to contents that can be returned as HTML -#' @param ... optional arguments to pass to \code{read_html} +#' @param dirpath Relative path from within the TidyTuesday Repository to folder of contents wanting sha for +#' @param branch which branch to get sha for. assumed to be master (and usually should be) #' @param auth github PAT. See PAT section for more information #' #' @section PAT: @@ -114,12 +114,12 @@ github_html <- #' @examples #' \dontrun{ #' -#' main_readme <- github_html("README.md") -#' week_readme <- github_html("data/2020/2020-01-07/readme.md") +#' sha <- github_sha("data/2020/2020-01-07") #' #' } #' #' @importFrom xml2 read_html +#' @importFrom utils URLencode github_sha <- function(dirpath, branch = "master", @@ -138,9 +138,9 @@ github_sha <- ) url_response <- github_GET(base_url, auth = auth) - url_json <- GET_json(url_response) if (url_response$status_code == 200) { + url_json <- GET_json(url_response) do.call('rbind', lapply(url_json$tree, function(x) @@ -157,7 +157,8 @@ github_sha <- #' #' @param path Relative path from within the TidyTuesday Repository to contents, #' usually because it was too large to be read with the contencts api. -#' @param raw optional arguments to pass to \code{read_html} +#' @param as_raw optional arguments to pass to \code{read_html} +#' @param sha sha of object if known in liu of path (usually best to give both for clarity) #' @param auth github PAT. See PAT section for more information #' #' @section PAT: @@ -172,21 +173,23 @@ github_sha <- #' @examples #' \dontrun{ #' -#' main_readme <- github_html("README.md") -#' week_readme <- github_html("data/2020/2020-01-07/readme.md") +#' main_readme_blob <- github_blob("README.md", as_raw = TRUE) #' #' } #' github_blob <- - function(path, as_raw = FALSE, auth = github_pat()){ - dir_sha <- github_sha(dirname(path)) - file_sha <- dir_sha$sha[dir_sha$path == basename(path)] + function(path, as_raw = FALSE, sha = NULL, auth = github_pat()){ + + if(is.null(sha)){ + dir_sha <- github_sha(dirname(path)) + sha <- dir_sha$sha[dir_sha$path == basename(path)] + } base_url <- file.path("https://api.github.com/repos", options("tidytuesdayR.tt_repo"), "git/blobs", - file_sha) + sha) url_response <- github_GET(base_url, auth = auth, Accept = "application/vnd.github.VERSION.raw") @@ -197,7 +200,7 @@ github_blob <- }else{ content <- rawToChar(url_response$content) } - attr(content, ".sha") <- file_sha + attr(content, ".sha") <- sha return(content) } else{ @@ -210,7 +213,7 @@ github_blob <- #' read json base64 contents from github #' #' provide tool to read and process data using the github api -#' +#' @param b64 base64 character value to be decoded and converted to character value #' @importFrom jsonlite base64_dec base_64_to_char <- function(b64){ rawToChar(base64_dec(b64)) @@ -219,7 +222,7 @@ base_64_to_char <- function(b64){ #' read GET json contents to char #' #' provide tool to read and process data using the github api from GET command -#' +#' @param get_response object of class "response" from GET command. returns JSON value. #' @importFrom jsonlite base64_dec GET_json <- function(get_response){ jsonlite::parse_json(rawToChar(get_response$content)) @@ -230,11 +233,12 @@ GET_json <- function(get_response){ #' #' Provide the necessary section to wrap around raw html content read from github. #' -#' @param content html content +#' @param page_content html content in xml_document class #' #' @return xml_document with github header #' -#' @importFrom xml2 read_html xml_add_sibling html_nodes +#' @importFrom xml2 read_html +#' @importFrom rvest html_nodes github_page <- function(page_content){ header <- "" @@ -242,7 +246,7 @@ github_page <- function(page_content){ html_nodes("body") %>% as.character - read_html(paste(header, body)) + read_html(paste0(header, body)) } diff --git a/R/tt_available.R b/R/tt_available.R index b6a3328..2c52e63 100644 --- a/R/tt_available.R +++ b/R/tt_available.R @@ -1,17 +1,23 @@ #' @title Show all TidyTuesdays #' @description Show all the available datasets, and corresponding weeks -#' @importFrom xml2 read_html -#' @import rvest -#' @importFrom purrr set_names map +#' @param auth github Personal Access Token. See PAT section for more information +#' +#' @section PAT: +#' +#' A Github PAT is a personal Access Token. This allows for signed queries to +#' the github api, and increases the limit on the number of requests allowed from +#' 60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +#' to set the PAT. +#' #' @export -tt_available <- function() { +tt_available <- function(auth = github_pat()) { tt_year <- sort(tt_years(),decreasing = TRUE,) datasets <- setNames(vector("list", length(tt_year)), tt_year) for(year in tt_year){ - datasets[[as.character(year)]] <- tt_datasets(year) + datasets[[as.character(year)]] <- tt_datasets(year, auth = auth) } structure(datasets, @@ -37,6 +43,10 @@ tt_available <- function() { #' @export tt_datasets <- function(year, auth = github_pat()) { + if(!year %in% tt_years()){ + stop("Invalid `year` provided to list available tidytuesday datasets.\n\tUse one of the following years: ", paste(tt_years(), collapse = ", "), ".") + } + files <- github_sha(file.path("data",year)) readme <- grep(pattern = "readme", files$path, value = TRUE, ignore.case = TRUE) @@ -66,58 +76,39 @@ tt_datasets <- function(year, auth = github_pat()) { #' @title print utility for tt_dataset_table object #' @inheritParams base::print -#' @param printConsole should output go to the console? TRUE/FALSE -#' @importFrom rstudioapi isAvailable viewer -#' @importFrom xml2 write_html +#' @param interactive is the console interactive #' @export -print.tt_dataset_table <- function(x, ..., printConsole = FALSE) { - if (rstudioapi::isAvailable() & !printConsole) { +print.tt_dataset_table <- function(x, ..., interactive = interactive()) { + if(interactive){ tmpHTML <- tempfile(fileext = ".html") - readme <- attr(x,".html") - write_html(readme, file = tmpHTML) - viewer(url = tmpHTML) - } else { - data.frame(x) + make_tt_dataset_html(x, file = tmpHTML <- tempfile(fileext = ".html")) + html_viewer(tmpHTML) + }else { + print(data.frame(unclass(x))) } invisible(x) } +#' @importFrom xml2 write_html +make_tt_dataset_html <- function(x, file = tempfile(fileext = ".html")){ + readme <- attr(x,".html") + write_html(readme, file = file) + invisible(readme) +} + + #' @title print utility for tt_dataset_table_list object #' @inheritParams base::print -#' @param printConsole should output go to the console? TRUE/FALSE +#' @param interactive is the console interactive #' @importFrom purrr walk map -#' @importFrom rstudioapi isAvailable viewer #' @importFrom rvest html_node #' @importFrom xml2 read_html write_html #' @export -print.tt_dataset_table_list <- function(x, ..., printConsole = FALSE) { - - if (isAvailable() & !printConsole) { - - readme <- names(x) %>% - purrr::map_chr( - function(.x, x) { - year_table <- attr(x[[.x]],".html") %>% - html_node("table") - paste("

",.x,"

", - as.character(year_table), - "") - }, - x = x - ) %>% - paste(collapse = "") %>% - paste("

TidyTuesday Datasets

",.) %>% - paste("
",.,"
") %>% - read_html() %>% - github_page() - - tmp_html <- tempfile(fileext = ".html") - write_html(readme, file = tmp_html) - on.exit(unlink(tmp_html)) - - rstudioapi::viewer(url = tmp_html) - +print.tt_dataset_table_list <- function(x, ...,interactive = interactive()) { + if (interactive) { + make_tt_dataset_list_html(x, file = tmpHTML <- tempfile(fileext = ".html")) + html_viewer(tmpHTML) } else { names(x) %>% @@ -137,4 +128,29 @@ print.tt_dataset_table_list <- function(x, ..., printConsole = FALSE) { } ) } + invisible(x) +} + +make_tt_dataset_list_html <- function(x, file = tempfile(fileext = ".html")){ + readme <- names(x) %>% + purrr::map_chr( + function(.x, x) { + year_table <- attr(x[[.x]],".html") %>% + html_node("table") + paste("

",.x,"

", + as.character(year_table), + "") + }, + x = x + ) %>% + paste(collapse = "") + + readme <- readme %>% + paste("
", + paste("

TidyTuesday Datasets

",readme),"
") %>% + read_html() %>% + github_page() + + write_html(readme, file = file) + invisible(readme) } diff --git a/R/tt_check_date.R b/R/tt_check_date.R index 7374ed5..20fa528 100644 --- a/R/tt_check_date.R +++ b/R/tt_check_date.R @@ -55,7 +55,7 @@ tt_check_date.year <- function(x, week) { tt_date <- tt_folders$folders[tt_folders$week_desc == week] - if (!tt_date %in% tt_folders[["folders"]]) { + if (!tt_date %in% tt_folders[["folders"]] | !tt_folders[["data"]][tt_folders[["folders"]] == tt_date]) { stop( paste0( "Week ", @@ -71,7 +71,7 @@ tt_check_date.year <- function(x, week) { } - +#' @importFrom stats aggregate na.pass setNames tt_weeks <- function(year) { tt_year <- tt_years() @@ -85,11 +85,15 @@ tt_weeks <- function(year) { ttmf <- tt_master_file() - tt_week <- unique(ttmf[ttmf$year == year, c("Week","Date")]) + tt_week <- aggregate(data_files ~ Week + Date, + ttmf[ttmf$year == year, ], + FUN = function(x) !anyNA(x), + na.action = na.pass) list( week_desc = tt_week$Week, - folders = tt_week$Date + folders = tt_week$Date, + data = tt_week$data_files ) } diff --git a/R/tt_compile.R b/R/tt_compile.R index bd097e5..02ec47b 100644 --- a/R/tt_compile.R +++ b/R/tt_compile.R @@ -15,7 +15,7 @@ tt_compile <- function(date, auth = github_pat()) { ttmf <- tt_master_file() #list of files - files <- ttmf[ ttmf$week_date == date, c("data_files","data_type","delim")] + files <- ttmf[ ttmf$Date == date, c("data_files","data_type","delim")] readme <- github_html(file.path("data",year(date),date,"readme.md"), auth = auth) diff --git a/R/tt_download.R b/R/tt_download.R index f5fa877..255f3ae 100644 --- a/R/tt_download.R +++ b/R/tt_download.R @@ -5,6 +5,7 @@ #' @param tt string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year #' @param files List the file names to download. Default to asking. #' @param ... pass methods to the parsing functions. These will be passed to ALL files, so be careful. +#' @param branch which branch to be downloading data from. Default and always should be "master". #' @param auth github Personal Access Token. See PAT section for more information #' #' @section PAT: @@ -21,27 +22,35 @@ #' @importFrom lubridate year #' #' @examples -#' tt_output <- tt_load("2019-01-15") +#' \dontrun{ +#' tt_output <- tt_load_gh("2019-01-15") +#' datasets <- tt_download(tt_output, files = "All") +#' } -tt_download <- function(tt, files = c("All"), ..., auth = github_pat()){ +tt_download <- function(tt, files = c("All"), ..., branch = "master", auth = github_pat()){ - data_files <- attr(tt, ".files")$data_files + tt_date <- attr(tt, ".date") + tt_year <- year(tt_date) + file_info <- attr(tt, ".files") #define which files to download - files <- match.arg(files, several.ok = TRUE, choices = c("All", data_files)) + files <- match.arg(files, several.ok = TRUE, choices = c("All", file_info$data_files)) if("All" %in% files){ - files <- data_files + files <- file_info$data_files } message("--- Starting Download ---") cat("\n") + tt_sha <- github_sha(file.path("data",tt_year,tt_date)) + tt_data <- setNames( vector("list", length = length(files)), files) + for(file in files){ dl_message <- sprintf('\tDownloading file %d of %d: `%s`\n', which(files == file), @@ -54,10 +63,12 @@ tt_download <- function(tt, files = c("All"), ..., auth = github_pat()){ tt, file, ..., + sha = tt_sha$sha[tt_sha$path == file], auth = auth ) } + cat("\n") message("--- Download complete ---") names(tt_data) <- tools::file_path_sans_ext(attr(tt, ".files")$data_files) diff --git a/R/tt_download_file.R b/R/tt_download_file.R index 7e6a782..e2626bc 100644 --- a/R/tt_download_file.R +++ b/R/tt_download_file.R @@ -24,17 +24,19 @@ #' @family tt_download_file #' #' @examples +#' \dontrun{ #' tt_gh <- tt_load_gh("2019-01-15") #' #' agencies <- tt_download_file(tt_gh, 1) #' launches <- tt_download_file(tt_gh, "launches.csv") +#' } #' tt_download_file <- function(tt, x, ..., auth = github_pat()) { suppressMessages({ switch(class(x), - "character" = tt_download_file.character(tt, x, guess_max = guess_max ), - "numeric" = tt_download_file.numeric(tt, x, guess_max = guess_max ), - "integer" = tt_download_file.numeric(tt, x, guess_max = guess_max ), + "character" = tt_download_file.character(tt, x, ... ), + "numeric" = tt_download_file.numeric(tt, x, ... ), + "integer" = tt_download_file.numeric(tt, x, ... ), stop(paste("No method for entry of class:", class(x))) ) }) @@ -42,7 +44,7 @@ tt_download_file <- function(tt, x, ..., auth = github_pat()) { #' @importFrom lubridate year #' @importFrom tools file_ext -tt_download_file.character <- function(tt, x, ..., auth = github_pat()) { +tt_download_file.character <- function(tt, x, ..., sha = NULL, auth = github_pat()) { file_info <- attr(tt, ".files") @@ -50,9 +52,9 @@ tt_download_file.character <- function(tt, x, ..., auth = github_pat()) { tt_date <- attr(tt, ".date") tt_year <- year(tt_date) - as_raw <- tools::file_ext(x) %in% c("xlsx","xls","rda","rds") + # as_raw <- tools::file_ext(x) %in% c("xlsx","xls","rda","rds") - blob <- github_blob(file.path("data",tt_year,tt_date,x), as_raw = as_raw, auth = auth) + blob <- github_blob(file.path("data",tt_year,tt_date,x), as_raw = TRUE, sha = sha, auth = auth) tt_parse_blob(blob, file_info = file_info[file_info$data_file == x,]) diff --git a/R/tt_load.R b/R/tt_load.R index 8549f35..8536c4e 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -2,6 +2,7 @@ #' #' @param x string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year #' @param week left empty unless x is a numeric year entry, in which case the week of interest should be entered +#' @param download_files which files to download from repo. defaults and assumes "All" for the week. #' @param ... pass methods to the parsing functions. These will be passed to ALL files, so be careful. #' @param auth github Personal Access Token. See PAT section for more information #' @@ -17,8 +18,9 @@ #' @importFrom purrr map #' #' @examples +#' \dontrun{ #' tt_output <- tt_load("2019-01-15") -#' +#' } #' @export tt_load <- function(x, week, download_files = "All", ..., auth = github_pat()) { diff --git a/R/tt_load_gh.R b/R/tt_load_gh.R index 3383d9a..5aa53f8 100644 --- a/R/tt_load_gh.R +++ b/R/tt_load_gh.R @@ -16,14 +16,15 @@ #' @return tt_gh object. List object with the following entries: readme, files, url #' @export #' @examples -#' tt_gh <- tt_load_gh("2019-01-15") -#' +#' \dontrun{ +#' tt_gh <- tt_load_gh("2019-01-15" #' readme(tt_gh) +#' } tt_load_gh <- function(x, week, auth = github_pat()) { if (missing(x)) { on.exit({ - print(tt_available(auth = auth)) + tt_available(auth = auth) }) stop("Enter either the year or date of the TidyTuesday Data to extract!") } diff --git a/R/tt_master_file.R b/R/tt_master_file.R index 33634d2..f3ca6cf 100644 --- a/R/tt_master_file.R +++ b/R/tt_master_file.R @@ -13,6 +13,7 @@ #' to set the PAT. #' #' @keywords internal +#' @importFrom utils read.csv tt_update_master_file <- function(force = FALSE, auth = github_pat()){ # get sha to see if need to update sha_df <- github_sha("static") diff --git a/R/tt_parse.R b/R/tt_parse.R index f6a0a12..6167100 100644 --- a/R/tt_parse.R +++ b/R/tt_parse.R @@ -2,19 +2,20 @@ #' @title general utility to assist with parsing the raw data #' #' @param blob raw data to be parsed -#' @param func the function to perform parsing of the file #' @param ... args to pass to func -#' @param fileinfo data.frame of information about the blob being read +#' @param file_info data.frame of information about the blob being read tt_parse_blob <- function(blob, ..., file_info) { switch( tolower(file_info$data_type), "xls" = tt_parse_binary(blob, readxl::read_xls, ..., filename = file_info$data_files), "xlsx" = tt_parse_binary(blob, readxl::read_xlsx, ..., filename = file_info$data_file), "rds" = tt_parse_binary(blob, readRDS, filename = file_info$data_file), - "rda" = tt_parse_binary(blob, read_rda, filename = file_info$data_file), - tt_parse_text(blob, readr::read_delim, progress = FALSE, delim = file_info$delim, ...) + tt_parse_text(blob =blob, func = readr::read_delim, delim = file_info[["delim"]], progress = FALSE, ...) ) } +# rda option just in case +# "rda" = tt_parse_binary(blob, read_rda, filename = file_info$data_file), + #' @title utility to assist with parsing the raw binary data #' @@ -24,6 +25,7 @@ tt_parse_blob <- function(blob, ..., file_info) { #' @param filename the original name of the file tt_parse_binary <- function(blob, func, ... , filename) { temp_file <- file.path(tempdir(), filename) + attr(blob, ".sha") <- NULL writeBin(blob, temp_file) on.exit(unlink(temp_file)) func(temp_file, ...) @@ -34,8 +36,10 @@ tt_parse_binary <- function(blob, func, ... , filename) { #' #' @param blob raw text data to be parsed #' @param func the function to perform parsing of the file +#' @param delim what delimeter to use when parsing +#' @param progress should parsing process be shared. Assumed to be FALSE #' @param ... args to pass to func #' -tt_parse_text <- function(blob, func, ... ) { - func(blob, ...) +tt_parse_text <- function(blob, func, delim, progress = FALSE, ... ) { + func(blob, delim = delim, progress = progress, ...) } diff --git a/R/utils-pipe.R b/R/utils-pipe.R new file mode 100644 index 0000000..e79f3d8 --- /dev/null +++ b/R/utils-pipe.R @@ -0,0 +1,11 @@ +#' Pipe operator +#' +#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +#' +#' @name %>% +#' @rdname pipe +#' @keywords internal +#' @export +#' @importFrom magrittr %>% +#' @usage lhs \%>\% rhs +NULL diff --git a/R/utils.R b/R/utils.R index 29e1856..0f8d69a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -20,7 +20,6 @@ print.tt <- function(x,...){ #' @title Readme HTML maker and Viewer #' @param tt tt_data object for printing -#' @importFrom rstudioapi viewer #' @importFrom xml2 write_html #' @return NULL #' @export @@ -29,12 +28,19 @@ readme <- function(tt) { tt <- attr(tt, ".tt") } if (length(attr(tt, ".readme")) > 0) { + write_html(attr(tt, ".readme"), file = tmpHTML <- tempfile(fileext = ".html")) # if running in rstudio, print out that - if (rstudioapi::isAvailable()) { - tmpdir <- tempfile(fileext = ".html") - write_html(attr(tt, ".readme"), file = tmpdir) - rstudioapi::viewer(url = tmpdir) - } + html_viewer(tmpHTML) } invisible(NULL) } + +#' @importFrom utils browseURL +#' @importFrom rstudioapi viewer isAvailable +html_viewer <- function(url){ + if (isAvailable()) { + viewer(url = url) + } else{ + browseURL(url = url) + } +} diff --git a/docs/404.html b/docs/404.html index 55f1e84..02beade 100644 --- a/docs/404.html +++ b/docs/404.html @@ -15,23 +15,29 @@ + - + - + + + + - + + + - - + + - + - - + + @@ -39,10 +45,10 @@ - + - + @@ -59,7 +65,7 @@ - +
@@ -92,10 +98,9 @@ Changelog -
+ +
@@ -130,7 +141,7 @@

Page not found (404)

-

Site built with pkgdown 1.4.1.

+

Site built with pkgdown 1.5.0.

diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html index 96f9345..5f2aad6 100644 --- a/docs/CODE_OF_CONDUCT.html +++ b/docs/CODE_OF_CONDUCT.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -42,7 +48,6 @@ - @@ -57,9 +62,10 @@ + - +
@@ -81,7 +87,7 @@
+
@@ -126,22 +133,32 @@

Contributor Code of Conduct

+ + +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index d09b60b..aeca269 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -42,7 +48,6 @@ - @@ -57,9 +62,10 @@ + - +
@@ -81,7 +87,7 @@
+
@@ -120,22 +127,32 @@

License

+ + +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 6f57568..d1f5565 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -42,7 +48,6 @@ - @@ -57,9 +62,10 @@ + - +
@@ -81,7 +87,7 @@
+
@@ -124,22 +131,32 @@

MIT License

+ + +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/authors.html b/docs/authors.html index 493e811..e760205 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -42,7 +48,6 @@ - @@ -57,9 +62,10 @@ + - +
@@ -81,7 +87,7 @@
+
@@ -138,19 +145,23 @@

Authors

+
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/index.html b/docs/index.html index 801d8a1..b744913 100644 --- a/docs/index.html +++ b/docs/index.html @@ -12,9 +12,11 @@ - - - + + + + + - - +
@@ -90,8 +92,9 @@

Installation

Currently this package is only available on GitHub:

- +
#install.packages("devtools")
+
+devtools::install_github("thebioengineer/tidytuesdayR")

@@ -101,34 +104,34 @@

tt_load()

The first and simplest way is to use the ‘tt_load()’ function. This function has accepts two types of inputs to determine which data to grab. It can be a date as a string in the YYYY-MM-DD format like below.

-
library(tidytuesdayR)
-tt_data <- tt_load("2019-01-15")
+
library(tidytuesdayR)
+tt_data 

Or the function can accept the year as the first argument, and which week of the year as the second.

-
tt_data <- tt_load(2019, week=3) 
-

tt_load() naively downloads all the data that is available and stores them in the resulting tt_data object. To access the data, use the $ or [[ notation and the name of the dataset.

- +
tt_data <- tt_load(2019, week=3)
+

tt_load() naively downloads all the data that is available and stores them in the resulting tt_data object. To access the data, use the $ or [[ notation and the name of the dataset.

+
tt_data$agencies
+tt_data[["agencies"]]

tt_load_gh() and tt_read_data()

-

The second method to access the data from the repository is to use the combination of tt_load_gh() and tt_read_data() functions. tt_load_gh() takes similar arguments as tt_load(), in that either the date or a combination of year and week can be entered.

-
tt <- tt_load_gh("2019-01-15")
-

The tt object lists the available files for download. To download the data, use the tt_read_data() function. tt_read_data() expects the first argument to be the tt object. The second argument can be a string indicating the name of the file to download from the repository, or the index in the tt object

- +

The second method to access the data from the repository is to use the combination of tt_load_gh() and tt_read_data() functions. tt_load_gh() takes similar arguments as tt_load(), in that either the date or a combination of year and week can be entered.

+
tt <- tt_load_gh("2019-01-15")
+

The tt object lists the available files for download. To download the data, use the tt_read_data() function. tt_read_data() expects the first argument to be the tt object. The second argument can be a string indicating the name of the file to download from the repository, or the index in the tt object

+
agencies % 
+  tt_read_data("agencies.csv")
+
+# The first index of the tt object is `agencies.csv`
+# agencies % 
+#   tt_read_data(1)

Tidy Tuesday Details

The tt_data and tt objects both have a function for showing the readme for that week called readme(). In addition, the print methods for both objects show the readme in a viewer and the available datasets in the console.

-
readme(tt_data)
-print(tt_data)
+
readme(tt_data)
+print(tt_data)
## Available Datasets:
 ##  agencies 
 ##  launches 
@@ -142,11 +145,11 @@ 

- + diff --git a/docs/news/index.html b/docs/news/index.html index 451d127..792c8ff 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -42,7 +48,6 @@ - @@ -57,9 +62,10 @@ + - +
@@ -81,7 +87,7 @@
+
-

+

tidytuesdayR 0.2.2

  • Added a NEWS.md file to track changes to the package.
  • @@ -125,30 +132,31 @@

- +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index c03fb08..c01e592 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -17,12 +17,14 @@ html, body { height: 100%; } +body { + position: relative; +} + body > .container { display: flex; height: 100%; flex-direction: column; - - padding-top: 60px; } body > .container .row { @@ -69,6 +71,10 @@ summary { margin-top: calc(-60px + 1em); } +dd { + margin-left: 3em; +} + /* Section anchors ---------------------------------*/ a.anchor { @@ -102,37 +108,135 @@ a.anchor { margin-top: -40px; } -/* Static header placement on mobile devices */ -@media (max-width: 767px) { - .navbar-fixed-top { - position: absolute; - } - .navbar { - padding: 0; - } +/* Navbar submenu --------------------------*/ + +.dropdown-submenu { + position: relative; } +.dropdown-submenu>.dropdown-menu { + top: 0; + left: 100%; + margin-top: -6px; + margin-left: -1px; + border-radius: 0 6px 6px 6px; +} + +.dropdown-submenu:hover>.dropdown-menu { + display: block; +} + +.dropdown-submenu>a:after { + display: block; + content: " "; + float: right; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; + border-width: 5px 0 5px 5px; + border-left-color: #cccccc; + margin-top: 5px; + margin-right: -10px; +} + +.dropdown-submenu:hover>a:after { + border-left-color: #ffffff; +} + +.dropdown-submenu.pull-left { + float: none; +} + +.dropdown-submenu.pull-left>.dropdown-menu { + left: -100%; + margin-left: 10px; + border-radius: 6px 0 6px 6px; +} /* Sidebar --------------------------*/ -#sidebar { +#pkgdown-sidebar { margin-top: 30px; + position: -webkit-sticky; + position: sticky; + top: 70px; } -#sidebar h2 { + +#pkgdown-sidebar h2 { font-size: 1.5em; margin-top: 1em; } -#sidebar h2:first-child { +#pkgdown-sidebar h2:first-child { margin-top: 0; } -#sidebar .list-unstyled li { +#pkgdown-sidebar .list-unstyled li { margin-bottom: 0.5em; } +/* bootstrap-toc tweaks ------------------------------------------------------*/ + +/* All levels of nav */ + +nav[data-toggle='toc'] .nav > li > a { + padding: 4px 20px 4px 6px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; +} + +nav[data-toggle='toc'] .nav > li > a:hover, +nav[data-toggle='toc'] .nav > li > a:focus { + padding-left: 5px; + color: inherit; + border-left: 1px solid #878787; +} + +nav[data-toggle='toc'] .nav > .active > a, +nav[data-toggle='toc'] .nav > .active:hover > a, +nav[data-toggle='toc'] .nav > .active:focus > a { + padding-left: 5px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; + border-left: 2px solid #878787; +} + +/* Nav: second level (shown on .active) */ + +nav[data-toggle='toc'] .nav .nav { + display: none; /* Hide by default, but at >768px, show it */ + padding-bottom: 10px; +} + +nav[data-toggle='toc'] .nav .nav > li > a { + padding-left: 16px; + font-size: 1.35rem; +} + +nav[data-toggle='toc'] .nav .nav > li > a:hover, +nav[data-toggle='toc'] .nav .nav > li > a:focus { + padding-left: 15px; +} + +nav[data-toggle='toc'] .nav .nav > .active > a, +nav[data-toggle='toc'] .nav .nav > .active:hover > a, +nav[data-toggle='toc'] .nav .nav > .active:focus > a { + padding-left: 15px; + font-weight: 500; + font-size: 1.35rem; +} + +/* orcid ------------------------------------------------------------------- */ + .orcid { - height: 16px; + font-size: 16px; + color: #A6CE39; + /* margins are required by official ORCID trademark and display guidelines */ + margin-left:4px; + margin-right:4px; vertical-align: middle; } @@ -222,6 +326,19 @@ a.sourceLine:hover { visibility: visible; } +/* headroom.js ------------------------ */ + +.headroom { + will-change: transform; + transition: transform 200ms linear; +} +.headroom--pinned { + transform: translateY(0%); +} +.headroom--unpinned { + transform: translateY(-100%); +} + /* mark.js ----------------------------*/ mark { @@ -234,3 +351,17 @@ mark { .html-widget { margin-bottom: 10px; } + +/* fontawesome ------------------------ */ + +.fab { + font-family: "Font Awesome 5 Brands" !important; +} + +/* don't display links in code chunks when printing */ +/* source: https://stackoverflow.com/a/10781533 */ +@media print { + code a:link:after, code a:visited:after { + content: ""; + } +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index eb7e83d..7e7048f 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -2,18 +2,11 @@ (function($) { $(function() { - $("#sidebar") - .stick_in_parent({offset_top: 40}) - .on('sticky_kit:bottom', function(e) { - $(this).parent().css('position', 'static'); - }) - .on('sticky_kit:unbottom', function(e) { - $(this).parent().css('position', 'relative'); - }); + $('.navbar-fixed-top').headroom(); - $('body').scrollspy({ - target: '#sidebar', - offset: 60 + $('body').css('padding-top', $('.navbar').height() + 10); + $(window).resize(function(){ + $('body').css('padding-top', $('.navbar').height() + 10); }); $('[data-toggle="tooltip"]').tooltip(); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index f777eb9..30218c6 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,5 +1,6 @@ pandoc: 2.7.2 -pkgdown: 1.3.0 +pkgdown: 1.5.0 pkgdown_sha: ~ articles: [] +last_built: 2020-05-23T23:37Z diff --git a/docs/readme.html b/docs/readme.html index 28e545e..c1eabc9 100644 --- a/docs/readme.html +++ b/docs/readme.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -42,7 +48,6 @@ - @@ -57,9 +62,10 @@ + - +
@@ -81,7 +87,7 @@
+
@@ -123,8 +130,9 @@

tidytuesdayR

Installation

Currently this package is only available on GitHub:

- +
#install.packages("devtools")
+
+devtools::install_github("thebioengineer/tidytuesdayR")

@@ -134,34 +142,34 @@

tt_load()

The first and simplest way is to use the ‘tt_load()’ function. This function has accepts two types of inputs to determine which data to grab. It can be a date as a string in the YYYY-MM-DD format like below.

-
library(tidytuesdayR)
-tt_data <- tt_load("2019-01-15")
+
library(tidytuesdayR)
+tt_data 

Or the function can accept the year as the first argument, and which week of the year as the second.

-
tt_data <- tt_load(2019, week=3) 
-

tt_load() naively downloads all the data that is available and stores them in the resulting tt_data object. To access the data, use the $ or [[ notation and the name of the dataset.

- +
tt_data <- tt_load(2019, week=3)
+

tt_load() naively downloads all the data that is available and stores them in the resulting tt_data object. To access the data, use the $ or [[ notation and the name of the dataset.

+
tt_data$agencies
+tt_data[["agencies"]]

tt_load_gh() and tt_read_data()

-

The second method to access the data from the repository is to use the combination of tt_load_gh() and tt_read_data() functions. tt_load_gh() takes similar arguments as tt_load(), in that either the date or a combination of year and week can be entered.

-
tt <- tt_load_gh("2019-01-15")
-

The tt object lists the available files for download. To download the data, use the tt_read_data() function. tt_read_data() expects the first argument to be the tt object. The second argument can be a string indicating the name of the file to download from the repository, or the index in the tt object

- +

The second method to access the data from the repository is to use the combination of tt_load_gh() and tt_read_data() functions. tt_load_gh() takes similar arguments as tt_load(), in that either the date or a combination of year and week can be entered.

+
tt <- tt_load_gh("2019-01-15")
+

The tt object lists the available files for download. To download the data, use the tt_read_data() function. tt_read_data() expects the first argument to be the tt object. The second argument can be a string indicating the name of the file to download from the repository, or the index in the tt object

+
agencies % 
+  tt_read_data("agencies.csv")
+
+# The first index of the tt object is `agencies.csv`
+# agencies % 
+#   tt_read_data(1)

Tidy Tuesday Details

The tt_data and tt objects both have a function for showing the readme for that week called readme(). In addition, the print methods for both objects show the readme in a viewer and the available datasets in the console.

-
readme(tt_data)
-print(tt_data)
+
readme(tt_data)
+print(tt_data)
## Available Datasets:
 ##  agencies 
 ##  launches 
@@ -176,22 +184,32 @@ 

+ + +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/index.html b/docs/reference/index.html index 2457d0b..55d1b01 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -42,7 +48,6 @@ - @@ -57,9 +62,10 @@ + - +
@@ -81,7 +87,7 @@
+
@@ -129,24 +136,65 @@

+ + + + + -

download_read()

+

base_64_to_char()

+ +

read json base64 contents from github

+ + + +

GET_json()

+ +

read GET json contents to char

+ + + +

github_blob()

+ +

Read blob Contents from GitHub

+ + + +

github_contents()

+ +

Read Contents from GitHub

+ + + +

github_GET()

+ +

Get for github API

+ + + +

github_html()

+ +

Read Contents from GitHub as html

+ + + +

github_page()

-

utility to assist with 'reading' urls that cannot normally be read by file functions

+

Create shell for HTML content from github

-

get_tt_html()

+

github_pat()

-

Get TidyTuesday URL and HTML

+

Get the github PAT

-

identify_delim()

+

github_sha()

-

Identify potential delimeters of file

+

Read Contents from GitHub as html

@@ -191,12 +239,38 @@

tt_check_date()

+ +

given inputs generate valid TidyTuesday URL

+ + + +

tt_compile()

+ +

Get TidyTuesday Readme and list of files and HTML

+ +

tt_datasets()

Available datasets

+ +

tt_download()

+ +

download tt data + +Download all or specific files identified in the tt dataset

+ + + +

tt_download_file()

+ +

Reads in TidyTuesday datasets from Github repo

+ +

tt_load()

@@ -210,41 +284,50 @@

tt_make_url()

+

tt_parse_binary()

-

given inputs generate valid TidyTuesday URL

+

utility to assist with parsing the raw binary data

-

tt_read_data()

+

tt_parse_blob()

-

Reads in TidyTuesday datasets from Github repo

+

general utility to assist with parsing the raw data

+ + + +

tt_parse_text()

+ +

utility to assist with parsing the text data

- +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/print.tt.html b/docs/reference/print.tt.html index b4618e1..9e644e8 100644 --- a/docs/reference/print.tt.html +++ b/docs/reference/print.tt.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -122,7 +129,7 @@

print utility for tt_data objects

# S3 method for tt
-print(x, ...)
+print(x, ...)

Arguments

@@ -139,28 +146,30 @@

Arg - +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/print.tt_data.html b/docs/reference/print.tt_data.html index 8389197..1587ea7 100644 --- a/docs/reference/print.tt_data.html +++ b/docs/reference/print.tt_data.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -122,7 +129,7 @@

print utility for tt_data objects

# S3 method for tt_data
-print(x, ...)
+print(x, ...)

Arguments

@@ -139,28 +146,30 @@

Arg - +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/print.tt_dataset_table.html b/docs/reference/print.tt_dataset_table.html index 6bd0ed6..cce3cc0 100644 --- a/docs/reference/print.tt_dataset_table.html +++ b/docs/reference/print.tt_dataset_table.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -122,7 +129,7 @@

print utility for tt_dataset_table object

# S3 method for tt_dataset_table
-print(x, ..., printConsole = FALSE)
+print(x, ..., interactive = interactive())

Arguments

@@ -136,35 +143,37 @@

Arg

- - + +

further arguments passed to or from other methods.

printConsole

should output go to the console? TRUE/FALSE

interactive

is the console interactive

- +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/print.tt_dataset_table_list.html b/docs/reference/print.tt_dataset_table_list.html index a7c45c0..0f02074 100644 --- a/docs/reference/print.tt_dataset_table_list.html +++ b/docs/reference/print.tt_dataset_table_list.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -122,7 +129,7 @@

print utility for tt_dataset_table_list object

# S3 method for tt_dataset_table_list
-print(x, ..., printConsole = FALSE)
+print(x, ..., interactive = interactive())

Arguments

@@ -136,35 +143,37 @@

Arg

- - + +

further arguments passed to or from other methods.

printConsole

should output go to the console? TRUE/FALSE

interactive

is the console interactive

- +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/read_rda.html b/docs/reference/read_rda.html index f9accaf..522d794 100644 --- a/docs/reference/read_rda.html +++ b/docs/reference/read_rda.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -134,28 +141,30 @@

Arg

- +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/readme.html b/docs/reference/readme.html index 9b4bcc3..faa27cb 100644 --- a/docs/reference/readme.html +++ b/docs/reference/readme.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -134,28 +141,30 @@

Arg

- +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/tt_available.html b/docs/reference/tt_available.html index f7ec450..9e88762 100644 --- a/docs/reference/tt_available.html +++ b/docs/reference/tt_available.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -121,32 +128,52 @@

Show all TidyTuesdays

Show all the available datasets, and corresponding weeks

-
tt_available()
+
tt_available(auth = github_pat())
+

Arguments

+ + + + + + +
auth

github Personal Access Token. See PAT section for more information

+

PAT

-
- + +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/tt_datasets.html b/docs/reference/tt_datasets.html index e1b595d..1414da3 100644 --- a/docs/reference/tt_datasets.html +++ b/docs/reference/tt_datasets.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -121,41 +128,57 @@

Available datasets

list available datasets for that year

-
tt_datasets(year)
+
tt_datasets(year, auth = github_pat())

Arguments

- + + + + +
year

numeric entry representing the year of tidytuesday you want the list of datasets for. Leave empty for most recent year.

numeric entry representing the year of tidytuesday you want the list of datasets +for. Leave empty for most recent year.

auth

github Personal Access Token. See PAT section for more information

+

PAT

+ + -
- + +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/tt_load.html b/docs/reference/tt_load.html index 3b4779a..3966f3a 100644 --- a/docs/reference/tt_load.html +++ b/docs/reference/tt_load.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -121,7 +128,7 @@

Load TidyTuesday data from Github

Load TidyTuesday data from Github

-
tt_load(x, week, ...)
+
tt_load(x, week, download_files = "All", ..., auth = github_pat())

Arguments

@@ -134,43 +141,62 @@

Arg

+ + + + + + + +
week

left empty unless x is a numeric year entry, in which case the week of interest should be entered

download_files

which files to download from repo. defaults and assumes "All" for the week.

...

pass methods to the parsing functions. These will be passed to ALL files, so be careful.

auth

github Personal Access Token. See PAT section for more information

Value

tt_data object (list class)

+

PAT

+ + + + +

A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT.

Examples

-
tt_output <- tt_load("2019-01-15")
#> --- Downloading #TidyTuesday Information for 2019-01-15 ----
#> --- Identified 2 files available for download ----
#> --- Downloading files ---
#> Warning: Detected multiple possible delimeters:`,`, ` `. Defaulting to `,`.
#> --- Download complete ---
+
if (FALSE) { +tt_output <- tt_load("2019-01-15") +}
- +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/docs/reference/tt_load_gh.html b/docs/reference/tt_load_gh.html index fc55cb8..5a19f2e 100644 --- a/docs/reference/tt_load_gh.html +++ b/docs/reference/tt_load_gh.html @@ -17,21 +17,27 @@ - + - + - + + + + + - + + - + - - + + + @@ -43,7 +49,6 @@ - @@ -58,9 +63,10 @@ + - +
@@ -82,7 +88,7 @@
+
@@ -121,7 +128,7 @@

Load TidyTuesday data from Github

Pulls the Readme and URLs of the data from the TidyTuesday github folder based on the date provided

-
tt_load_gh(x, week)
+
tt_load_gh(x, week, auth = github_pat())

Arguments

@@ -134,40 +141,52 @@

Arg

+ + + +
week

left empty unless x is a numeric year entry, in which case the week of interest should be entered

auth

github Personal Access Token. See PAT section for more information

Value

tt_gh object. List object with the following entries: readme, files, url

+

PAT

+ + + + +

A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT.

Examples

-
tt_gh <- tt_load_gh("2019-01-15")
#> --- Downloading #TidyTuesday Information for 2019-01-15 ----
#> --- Identified 2 files available for download ----
-readme(tt_gh)
+

   
- +
-

Site built with pkgdown 1.3.0.

+

Site built with pkgdown 1.5.0.

+
+ + diff --git a/man/GET_json.Rd b/man/GET_json.Rd index e4cb922..5ab8c60 100644 --- a/man/GET_json.Rd +++ b/man/GET_json.Rd @@ -6,6 +6,9 @@ \usage{ GET_json(get_response) } +\arguments{ +\item{get_response}{object of class "response" from GET command. returns JSON value.} +} \description{ provide tool to read and process data using the github api from GET command } diff --git a/man/base_64_to_char.Rd b/man/base_64_to_char.Rd index 84cafc6..6bbf02e 100644 --- a/man/base_64_to_char.Rd +++ b/man/base_64_to_char.Rd @@ -6,6 +6,9 @@ \usage{ base_64_to_char(b64) } +\arguments{ +\item{b64}{base64 character value to be decoded and converted to character value} +} \description{ provide tool to read and process data using the github api } diff --git a/man/github_blob.Rd b/man/github_blob.Rd index c2a30c4..666a471 100644 --- a/man/github_blob.Rd +++ b/man/github_blob.Rd @@ -4,15 +4,17 @@ \alias{github_blob} \title{Read blob Contents from GitHub} \usage{ -github_blob(path, as_raw = FALSE, auth = github_pat()) +github_blob(path, as_raw = FALSE, sha = NULL, auth = github_pat()) } \arguments{ \item{path}{Relative path from within the TidyTuesday Repository to contents, usually because it was too large to be read with the contencts api.} -\item{auth}{github PAT. See PAT section for more information} +\item{as_raw}{optional arguments to pass to \code{read_html}} + +\item{sha}{sha of object if known in liu of path (usually best to give both for clarity)} -\item{raw}{optional arguments to pass to \code{read_html}} +\item{auth}{github PAT. See PAT section for more information} } \value{ a raw/character object based on the blob @@ -32,8 +34,7 @@ to set the PAT. \examples{ \dontrun{ -main_readme <- github_html("README.md") -week_readme <- github_html("data/2020/2020-01-07/readme.md") +main_readme_blob <- github_blob("README.md", as_raw = TRUE) } diff --git a/man/github_page.Rd b/man/github_page.Rd index b61026b..d2d376f 100644 --- a/man/github_page.Rd +++ b/man/github_page.Rd @@ -7,7 +7,7 @@ github_page(page_content) } \arguments{ -\item{content}{html content} +\item{page_content}{html content in xml_document class} } \value{ xml_document with github header diff --git a/man/github_sha.Rd b/man/github_sha.Rd index 6206060..f01f75a 100644 --- a/man/github_sha.Rd +++ b/man/github_sha.Rd @@ -7,11 +7,11 @@ github_sha(dirpath, branch = "master", auth = github_pat()) } \arguments{ -\item{auth}{github PAT. See PAT section for more information} +\item{dirpath}{Relative path from within the TidyTuesday Repository to folder of contents wanting sha for} -\item{path}{Relative path from within the TidyTuesday Repository to contents that can be returned as HTML} +\item{branch}{which branch to get sha for. assumed to be master (and usually should be)} -\item{...}{optional arguments to pass to \code{read_html}} +\item{auth}{github PAT. See PAT section for more information} } \value{ result data.frame of SHA and other information of directory contents @@ -31,8 +31,7 @@ to set the PAT. \examples{ \dontrun{ -main_readme <- github_html("README.md") -week_readme <- github_html("data/2020/2020-01-07/readme.md") +sha <- github_sha("data/2020/2020-01-07") } diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..0eec752 --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\description{ +See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. +} +\keyword{internal} diff --git a/man/print.tt_dataset_table.Rd b/man/print.tt_dataset_table.Rd index 68e0223..437601b 100644 --- a/man/print.tt_dataset_table.Rd +++ b/man/print.tt_dataset_table.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_datasets.R +% Please edit documentation in R/tt_available.R \name{print.tt_dataset_table} \alias{print.tt_dataset_table} \title{print utility for tt_dataset_table object} \usage{ -\method{print}{tt_dataset_table}(x, ..., printConsole = FALSE) +\method{print}{tt_dataset_table}(x, ..., interactive = interactive()) } \arguments{ \item{x}{an object used to select a method.} \item{...}{further arguments passed to or from other methods.} -\item{printConsole}{should output go to the console? TRUE/FALSE} +\item{interactive}{is the console interactive} } \description{ print utility for tt_dataset_table object diff --git a/man/print.tt_dataset_table_list.Rd b/man/print.tt_dataset_table_list.Rd index 525e818..2de6d90 100644 --- a/man/print.tt_dataset_table_list.Rd +++ b/man/print.tt_dataset_table_list.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_datasets.R +% Please edit documentation in R/tt_available.R \name{print.tt_dataset_table_list} \alias{print.tt_dataset_table_list} \title{print utility for tt_dataset_table_list object} \usage{ -\method{print}{tt_dataset_table_list}(x, ..., printConsole = FALSE) +\method{print}{tt_dataset_table_list}(x, ..., interactive = interactive()) } \arguments{ \item{x}{an object used to select a method.} \item{...}{further arguments passed to or from other methods.} -\item{printConsole}{should output go to the console? TRUE/FALSE} +\item{interactive}{is the console interactive} } \description{ print utility for tt_dataset_table_list object diff --git a/man/tt_available.Rd b/man/tt_available.Rd index 8e3a4c4..d11c014 100644 --- a/man/tt_available.Rd +++ b/man/tt_available.Rd @@ -1,11 +1,23 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_datasets.R +% Please edit documentation in R/tt_available.R \name{tt_available} \alias{tt_available} \title{Show all TidyTuesdays} \usage{ -tt_available() +tt_available(auth = github_pat()) +} +\arguments{ +\item{auth}{github Personal Access Token. See PAT section for more information} } \description{ Show all the available datasets, and corresponding weeks } +\section{PAT}{ + + +A Github PAT is a personal Access Token. This allows for signed queries to +the github api, and increases the limit on the number of requests allowed from +60 to 5000. Follow instructions https://happygitwithr.com/github-pat.html +to set the PAT. +} + diff --git a/man/tt_datasets.Rd b/man/tt_datasets.Rd index a43261f..64fe171 100644 --- a/man/tt_datasets.Rd +++ b/man/tt_datasets.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tt_datasets.R +% Please edit documentation in R/tt_available.R \name{tt_datasets} \alias{tt_datasets} \title{Available datasets} diff --git a/man/tt_download.Rd b/man/tt_download.Rd index 582c6d9..95c5c54 100644 --- a/man/tt_download.Rd +++ b/man/tt_download.Rd @@ -6,7 +6,7 @@ Download all or specific files identified in the tt dataset} \usage{ -tt_download(tt, files = c("All"), ..., auth = github_pat()) +tt_download(tt, files = c("All"), ..., branch = "master", auth = github_pat()) } \arguments{ \item{tt}{string representation of the date of data to pull, in YYYY-MM-dd format, or just numeric entry for year} @@ -15,6 +15,8 @@ tt_download(tt, files = c("All"), ..., auth = github_pat()) \item{...}{pass methods to the parsing functions. These will be passed to ALL files, so be careful.} +\item{branch}{which branch to be downloading data from. Default and always should be "master".} + \item{auth}{github Personal Access Token. See PAT section for more information} } \value{ @@ -35,5 +37,8 @@ to set the PAT. } \examples{ -tt_output <- tt_load("2019-01-15") +\dontrun{ +tt_output <- tt_load_gh("2019-01-15") +datasets <- tt_download(tt_output, files = "All") +} } diff --git a/man/tt_download_file.Rd b/man/tt_download_file.Rd index 2ed37bd..6acd055 100644 --- a/man/tt_download_file.Rd +++ b/man/tt_download_file.Rd @@ -31,10 +31,12 @@ to set the PAT. } \examples{ +\dontrun{ tt_gh <- tt_load_gh("2019-01-15") agencies <- tt_download_file(tt_gh, 1) launches <- tt_download_file(tt_gh, "launches.csv") +} } \concept{tt_download_file} diff --git a/man/tt_load.Rd b/man/tt_load.Rd index 7b82a3e..55e4750 100644 --- a/man/tt_load.Rd +++ b/man/tt_load.Rd @@ -11,6 +11,8 @@ tt_load(x, week, download_files = "All", ..., auth = github_pat()) \item{week}{left empty unless x is a numeric year entry, in which case the week of interest should be entered} +\item{download_files}{which files to download from repo. defaults and assumes "All" for the week.} + \item{...}{pass methods to the parsing functions. These will be passed to ALL files, so be careful.} \item{auth}{github Personal Access Token. See PAT section for more information} @@ -31,6 +33,7 @@ to set the PAT. } \examples{ +\dontrun{ tt_output <- tt_load("2019-01-15") - +} } diff --git a/man/tt_load_gh.Rd b/man/tt_load_gh.Rd index 1beb372..de5d187 100644 --- a/man/tt_load_gh.Rd +++ b/man/tt_load_gh.Rd @@ -29,7 +29,8 @@ to set the PAT. } \examples{ -tt_gh <- tt_load_gh("2019-01-15") - +\dontrun{ +tt_gh <- tt_load_gh("2019-01-15" readme(tt_gh) } +} diff --git a/man/tt_parse_blob.Rd b/man/tt_parse_blob.Rd index 5a724e3..a5ac576 100644 --- a/man/tt_parse_blob.Rd +++ b/man/tt_parse_blob.Rd @@ -11,9 +11,7 @@ tt_parse_blob(blob, ..., file_info) \item{...}{args to pass to func} -\item{func}{the function to perform parsing of the file} - -\item{fileinfo}{data.frame of information about the blob being read} +\item{file_info}{data.frame of information about the blob being read} } \description{ general utility to assist with parsing the raw data diff --git a/man/tt_parse_text.Rd b/man/tt_parse_text.Rd index 0f9f81a..2e6100b 100644 --- a/man/tt_parse_text.Rd +++ b/man/tt_parse_text.Rd @@ -4,13 +4,17 @@ \alias{tt_parse_text} \title{utility to assist with parsing the text data} \usage{ -tt_parse_text(blob, func, ...) +tt_parse_text(blob, func, delim, progress = FALSE, ...) } \arguments{ \item{blob}{raw text data to be parsed} \item{func}{the function to perform parsing of the file} +\item{delim}{what delimeter to use when parsing} + +\item{progress}{should parsing process be shared. Assumed to be FALSE} + \item{...}{args to pass to func} } \description{ diff --git a/readme.md b/readme.md index 3fea70a..d8f4848 100644 --- a/readme.md +++ b/readme.md @@ -6,6 +6,7 @@ Ellis Hughes status](https://travis-ci.com/thebioengineer/tidytuesdayR.svg?branch=master)](https://travis-ci.com/thebioengineer/tidytuesdayR) [![AppVeyor build status](https://ci.appveyor.com/api/projects/status/github/thebioengineer/tidytuesdayR?branch=master&svg=true)](https://ci.appveyor.com/project/thebioengineer/tidytuesdayR) +[![R build status](https://github.com/thebioengineer/tidytuesdayR/workflows/R-CMD-check/badge.svg)](https://github.com/thebioengineer/tidytuesdayR/actions) [![Coverage status](https://codecov.io/gh/thebioengineer/tidytuesdayR/branch/master/graph/badge.svg)](https://codecov.io/github/thebioengineer/tidytuesdayR?branch=master) [![License: diff --git a/tests/testthat/helper-tt_ref_test_that.R b/tests/testthat/helper-tt_ref_test_that.R new file mode 100644 index 0000000..5418195 --- /dev/null +++ b/tests/testthat/helper-tt_ref_test_that.R @@ -0,0 +1,11 @@ + +#Provide a wraper to temporarily change location set for tidytuesday reference to preserve consistency + +tt_ref_test_that <- function(desc, ...){ + ref_repo <- options("tidytuesdayR.tt_repo") + options("tidytuesdayR.tt_repo" = "thebioengineer/tt_ref") + on.exit({ + options("tidytuesdayR.tt_repo" = ref_repo[[1]]) + }) + testthat::test_that(desc = desc, ...) +} diff --git a/tests/testthat/test-00-github_api.R b/tests/testthat/test-00-github_api.R new file mode 100644 index 0000000..be188b5 --- /dev/null +++ b/tests/testthat/test-00-github_api.R @@ -0,0 +1,68 @@ +context("Github API") + +tt_ref_test_that("github_contents returns contents as text", { + license_text <- github_contents("LICENSE") + + expect_is(license_text, "character") + expect_equivalent( + substr(license_text, 0, 76), + "MIT License\n\nCopyright (c) 2018 R for Data Science online learning community") +}) + +tt_ref_test_that("github_contents flips to github_blob and returns contents as text by default when larger than 1MB", { + + tweets_text <- github_contents("static/tidytuesday_tweets.csv") + + expect_is(tweets_text, "character") + expect_true(object.size(tweets_text) > 1000000) + expect_equivalent( + substr(tweets_text, 0, 84), + "\"user_id\",\"status_id\",\"created_at\",\"screen_name\",\"text\",\"pic1\",\"pic2\",\"pic3\",\"pic4\"\n") +}) + +tt_ref_test_that("github_contents returns NULL on failure", { + NULL_contents <- github_contents("static/BAD_ENTRY") + expect_equal(NULL_contents, NULL) +}) + + +tt_ref_test_that("github_html returns contents that can be html as html", { + README <- github_html("README.md") + BADFILE <- github_html("bad_file") + + expect_s3_class(README,"xml_document") + expect_equal(BADFILE, NULL) +}) + +tt_ref_test_that("github_sha get a data.frame with the sha for all files in the directory",{ + SHA <- github_sha("static") + + expect_is(SHA, "data.frame") + expect_equal(colnames(SHA), c("path","sha")) +}) + +tt_ref_test_that("github_sha returns NULL when bad entry",{ + NULL_SHA <- github_sha("bad_file_path") + expect_equal(NULL_SHA, NULL) +}) + +tt_ref_test_that("github_blob gets contents as either text or raw",{ + license_text <- github_blob("LICENSE") + license_raw <- github_blob("LICENSE", as_raw = TRUE) + + expect_is(license_text, "character") + expect_equivalent( + substr(license_text, 0, 76), + "MIT License\n\nCopyright (c) 2018 R for Data Science online learning community") + + expect_is(license_raw, "raw") + expect_equivalent( + license_raw[1:76], + charToRaw("MIT License\n\nCopyright (c) 2018 R for Data Science online learning community")) + +}) + +tt_ref_test_that("github_blob retuns NULL on bad entry",{ + NULL_blob <- github_blob("BAD_ENTRY", sha = "BAD_SHA") + expect_equal(NULL_blob, NULL) +}) diff --git a/tests/testthat/test-01-tt_master_file.R b/tests/testthat/test-01-tt_master_file.R new file mode 100644 index 0000000..2fa9726 --- /dev/null +++ b/tests/testthat/test-01-tt_master_file.R @@ -0,0 +1,40 @@ +context("tt_master_file API") + +tt_ref_test_that("`tt_master_file()` will update the masterfile reference if is null", { + + TT_MASTER_ENV$TT_MASTER_FILE <- NULL + + expect_true(is.null(TT_MASTER_ENV$TT_MASTER_FILE)) + + ttmf <- tt_master_file() + + expect_true(!is.null(TT_MASTER_ENV$TT_MASTER_FILE)) + +}) + +tt_ref_test_that("`tt_update_master_file()` will update if the sha is old", { + + setup_df <- data.frame(x=1) + attr(setup_df, ".sha") <- "old sha" + tt_master_file( assign = setup_df ) + + ttmf <- tt_master_file() + + + tt_update_master_file() + updated_ttmf <- tt_master_file() + + expect_true(identical(ttmf, setup_df)) + expect_true(!identical(updated_ttmf, setup_df)) + +}) + + +tt_ref_test_that("`tt_update_master_file()` will update if the sha is old", { + + ttmf <- tt_master_file() + expect_equal(colnames(ttmf), c("Week", "Date", "year","data_files", "data_type", "delim")) + +}) + + diff --git a/tests/testthat/test-02-tt_available.R b/tests/testthat/test-02-tt_available.R new file mode 100644 index 0000000..d20c8a6 --- /dev/null +++ b/tests/testthat/test-02-tt_available.R @@ -0,0 +1,86 @@ +context("tt_available lists available weeks of tidy tuesday") + +tt_ref_test_that("tt_datasets prints to console when rstudio viewer is not available", { + ds <- tt_datasets(2018) + consoleOutput <- data.frame(unclass(ds), stringsAsFactors=FALSE) + expect_equivalent( + rvest::html_table(attr(ds, ".html"))[[1]], + consoleOutput + ) +}) + +tt_ref_test_that("tt_datasets throws errors when asking for invalid years", { + expect_error( + tt_datasets(2017), + "Invalid `year` provided to list available tidytuesday datasets.\n\tUse one of the following years:" + ) +}) + +tt_ref_test_that("printing tt_datasets returns all the values as a printed data.frame if not interactive", { + ds <- tt_datasets(2018) + + printed_ds <- capture.output(print(ds, interactive = FALSE)) + consoleOutput <- capture.output(print(data.frame(unclass(ds), stringsAsFactors=FALSE))) + + expect_equal( + printed_ds, + consoleOutput + ) +}) + + +tt_ref_test_that("tt_available returns object of with all years data available", { + ds <- tt_available() + + testthat::expect_s3_class(ds, "tt_dataset_table_list") + expect_equivalent(names(ds), as.character(rev( tt_years()))) + + ds_content <- as.list(unclass(ds)) + + ds_content_data <- lapply(ds_content, function(x) data.frame(unclass(x), stringsAsFactors=FALSE)) + ds_content_html <- lapply(ds_content, function(x) rvest::html_table(attr(x, ".html"))[[1]]) + + expect_equivalent( + ds_content_html, + ds_content_data + ) + +}) + +tt_ref_test_that("printing tt_available returns all the values as a printed data.frame if not interactive", { + ds <- tt_available() + + printed_ds <- capture.output(print(ds, interactive = FALSE)) + consoleOutput <- capture.output(quiet<-lapply(as.list(unclass(ds)), function(x) print(data.frame(unclass(x), stringsAsFactors = FALSE)))) + + printed_ds <- printed_ds[!(grepl("^Year:", printed_ds) | printed_ds == "")] + consoleOutput <- consoleOutput[!(grepl("^Year:", consoleOutput) | consoleOutput == "")] + + expect_equal( + printed_ds, + consoleOutput + ) +}) + + +tt_ref_test_that("tt_dataset_table and tt_dataset_table_list objects can make html outputs",{ + ds_tl <- tt_available() + ds_t <- tt_datasets(2019) + + tmpfile <- tempfile(fileext = ".html") + ds_tl_html <- make_tt_dataset_list_html(ds_tl, file = tmpfile) + + tmpfile2 <- tempfile(fileext = ".html") + ds_t_html <- make_tt_dataset_html(ds_t, file = tmpfile2) + + expect_true(file.exists(tmpfile)) + expect_true(file.exists(tmpfile2)) + expect_equal( + xml2::read_html(tmpfile), + ds_tl_html + ) + expect_equal( + xml2::read_html(tmpfile2), + ds_t_html + ) +}) diff --git a/tests/testthat/test-03-tt_check_date.R b/tests/testthat/test-03-tt_check_date.R new file mode 100644 index 0000000..9c0795b --- /dev/null +++ b/tests/testthat/test-03-tt_check_date.R @@ -0,0 +1,30 @@ +context("Checking supplied date/week") + +tt_ref_test_that("valid dates work", { + tt_date <- tt_check_date("2019-04-02") + expect_equal(tt_date, + as.Date("2019-04-02")) + +}) + +tt_ref_test_that("valid year-week combinations work", { + tt_date_1 <- tt_check_date(2019,14) + tt_date_2 <- tt_check_date("2019",14) + + expect_equal(tt_date_1, as.Date("2019-04-02")) + expect_equal(tt_date_2, as.Date("2019-04-02")) +}) + +tt_ref_test_that("Close dates are suggested if provided date is incorrect", { + expect_error( + tt_check_date("2019-04-04"), + "2019-04-04 is not a date that has TidyTuesday data.\n\tDid you mean: 2019-04-02?", + fixed = TRUE + ) +}) + +tt_ref_test_that("invalid entries are flagged", { + expect_error(tt_check_date("xyz"), + "Entries must render to a valid date or year") +}) + diff --git a/tests/testthat/test-04-tt_compile.R b/tests/testthat/test-04-tt_compile.R new file mode 100644 index 0000000..57a1ebf --- /dev/null +++ b/tests/testthat/test-04-tt_compile.R @@ -0,0 +1,47 @@ +context("Compile Files and Readme for the Week ") + +tt_ref_test_that("Check that tt_compile lists all files for the date", { + + tt_c <- tt_compile("2019-01-15") + + expect_equal( + tt_c$files$data_files, + c("agencies.csv","launches.csv") + ) + + expect_equal( + tt_c$files$data_type, + c("csv","csv") + ) + + expect_equal( + tt_c$files$delim, + c(",",",") + ) + + expect_true( + !is.null(tt_c$readme) + ) + +}) + +tt_ref_test_that("Check that tt_compile returns NULL for missing readme's", { + + tt_c <- tt_compile("2018-04-02") + + expect_equal( + tt_c$files$data_files, + "us_avg_tuition.xlsx" + ) + expect_equal( + tt_c$files$data_type, + "xlsx" + ) + expect_true( + is.na(tt_c$files$delim) + ) + expect_true( + is.null(tt_c$readme) + ) + +}) diff --git a/tests/testthat/test-05-tt_load_gh.R b/tests/testthat/test-05-tt_load_gh.R new file mode 100644 index 0000000..58f015b --- /dev/null +++ b/tests/testthat/test-05-tt_load_gh.R @@ -0,0 +1,109 @@ +context("Load all information from Github") + +ref_repo <- options("tidytuesdayR.tt_repo") +options("tidytuesdayR.tt_repo" = "thebioengineer/tt_ref") +on.exit({ + options("tidytuesdayR.tt_repo" = ref_repo[[1]]) +}) + +# check that correct data are returned +tt_ref_test_that("tt_load_gh returns tt object when provided proper date", { + + tt <- tt_load_gh("2019-01-15") + + testthat::expect_s3_class(tt, "tt") + testthat::expect_equal(attr(tt, ".files")$data_files, c("agencies.csv", "launches.csv")) + +}) + +# check that correct data are returned +tt_ref_test_that("tt_load_gh returns tt object when provided proper year and TT week number", { + tt <- tt_load_gh(2019, 3) + + testthat::expect_s3_class(tt, "tt") + testthat::expect_equal(attr(tt, ".files")$data_files, c("agencies.csv", "launches.csv")) +}) + + +# check that errors are returned +tt_ref_test_that("tt_load_gh returns error when incorrect date", { + nullout <- capture.output({ + testthat::expect_error(tt_load_gh("2019-01-16"), "is not a date that has TidyTuesday data") + }) +}) +tt_ref_test_that("tt_load_gh returns error when incorrect years or week number entries", { + testthat::expect_error(tt_load_gh(2018, 92), "Please enter a value for week between 1") + testthat::expect_error(tt_load_gh(2017, 92), "TidyTuesday did not exist for") +}) +# check that error is thrown when requesting data from a week that did not exist for that year +tt_ref_test_that("tt_load_gh returns tt object when provided proper year and TT week number", { + testthat::expect_error(tt_load_gh(2020, 1), "does not have data available for download from github") + +}) + +tt_ref_test_that( + "tt_load_gh returns error when incorrect years or week number entries", + { + expect_error( + tt_load_gh(2018, 92), + "Please enter a value for week between 1" + ) + expect_error( + tt_load_gh(2017, 92), + "TidyTuesday did not exist for" + ) + } +) + +tt_ref_test_that("tt_load_gh returns error when incorrect years or week number entries", { + expect_error( + tt_load_gh(2018, 92), + "Please enter a value for week between 1" + ) + expect_error( + tt_load_gh(2017, 92), + "TidyTuesday did not exist for" + ) +}) + +tt_ref_test_that("tt_load_gh returns error when nothing is entered", { + expect_error( + tt_load_gh(), + "Enter either the year or date of the TidyTuesday Data" + ) +}) + +tt_ref_test_that("tt_load_gh returns error when week is not a valid entry between 1 and n weeks", { + testthat::expect_error( + tt_load_gh(2019, 0), + "Week entry must be a valid positive integer" + ) +}) + +# test driven dev, new feature to add +tt_ref_test_that("Returns simple list of object when no readme.md available", { + tt <- tt_load_gh("2018-04-09") + expect_s3_class(tt, "tt") + expect_true(length(attr(tt, ".readme")) == 0) # object should not exist +}) + +tt_ref_test_that("tt_load_gh ignores extra files/diretory paths", { + tt_obj <- tt_load_gh("2019-04-02") + tt_obj_2 <- tt_load_gh("2019-04-09") + + expect_equal(length(tt_obj),1) + expect_equal(tt_obj[1],"bike_traffic.csv") + + expect_equal(length(tt_obj_2),3) + expect_equal(tt_obj_2[1:3],c("grand_slam_timeline.csv","grand_slams.csv","player_dob.csv")) +}) + +tt_ref_test_that("tt_load_gh finds all the files in the readme", { + tt_obj <- tt_load_gh("2020-04-21") + + expect_equal(length(tt_obj),2) + expect_equal(tt_obj[1:2],c("gdpr_text.tsv", "gdpr_violations.tsv")) + +}) + + diff --git a/tests/testthat/test-06-tt_parse_blob.R b/tests/testthat/test-06-tt_parse_blob.R new file mode 100644 index 0000000..a6ddda5 --- /dev/null +++ b/tests/testthat/test-06-tt_parse_blob.R @@ -0,0 +1,132 @@ +context("parsing blob/text") + +test_that("`tt_parse_text` can parse text", { + + result_comma <- + tt_parse_text( + "col1,col2\nval1,val2\nval3,val4", + func = readr::read_delim, + delim = "," + ) + + result_tab <- + tt_parse_text( + "col1\tcol2\nval1\tval2\nval3\tval4", + func = readr::read_delim, + delim = "\t" + ) + + result_special <- + tt_parse_text( + "col1|col2\nval1|val2\nval3|val4", + func = readr::read_delim, + delim = "|" + ) + + expected <- tibble::tribble( ~ col1, ~ col2, + "val1", "val2", + "val3", "val4") + + + expect_equivalent(result_comma,expected) + expect_equivalent(result_tab,expected) + expect_equivalent(result_special,expected) +}) + + +test_that("`tt_parse_binary` can parse raw inputs", { + + input_raw <- serialize("RAW VALUE",connection = NULL) + + result_rds <- + tt_parse_binary( + input_raw, + func = readRDS, + filename = "test_rds.rds" + ) + + expect_equal(result_rds,"RAW VALUE") +}) + + +test_that("`tt_parse_blob` can figure out how to handle text or raw",{ + + input_raw <- serialize("RAW VALUE",connection = NULL) + + expected_text <- tibble::tribble( ~ col1, ~ col2, + "val1", "val2", + "val3", "val4") + + result_text_comma <- + tt_parse_blob( + blob = "col1,col2\nval1,val2\nval3,val4", + file_info = data.frame( + data_file = "text.txt", + data_type = "txt", + delim = ",", + stringsAsFactors = FALSE + ) + ) + + result_text_tab <- + tt_parse_blob( + "col1\tcol2\nval1\tval2\nval3\tval4", + file_info = data.frame( + data_file = "text.txt", + data_type = "txt", + delim = "\t", + stringsAsFactors = FALSE + ) + ) + + result_text_special <- + tt_parse_blob( + "col1|col2\nval1|val2\nval3|val4", + file_info = data.frame(data_file = "text.txt", + data_type = "txt", + delim = "|", stringsAsFactors = FALSE) + ) + + result_raw_rda <- + tt_parse_blob( + input_raw, + file_info = data.frame(data_file = "test_rds.rds", + data_type = "rds", + delim = "") + ) + + + + expect_equivalent(result_text_comma,expected_text) + expect_equivalent(result_text_tab,expected_text) + expect_equivalent(result_text_special,expected_text) + expect_equivalent(result_raw_rda,"RAW VALUE") + +}) + +tt_ref_test_that("tt_parse_blob can handle a xls file",{ + xls_blob <- github_blob("data/2019/2019-11-26/PCA_Report_FY17Q3.xls", + sha = "e2313e902423c398883c01d3ecdfe77ae1b84862", + as_raw = TRUE) + + xls_object <- try(tt_parse_blob( + xls_blob, + file_info = data.frame( + data_files = "PCA_Report_FY17Q3.xls", + data_type = "xls", + stringsAsFactors = FALSE), + skip = 4 + ),silent = TRUE) + + expect_true(!inherits(xls_object,"try-error")) + expect_equal(xls_object[1:4, 1:3], + tibble::tribble( + ~`Agency Name`,~`At Start of Quarter`,~`Added`, + NA,NA,NA, + "Account Control Technology, Inc.",10659143750,0, + "Coast Professional, Inc.",7251296633,0, + "ConServe",10025995146,0 + ) + ) + +}) diff --git a/tests/testthat/test-07-tt_read_data.R b/tests/testthat/test-07-tt_read_data.R new file mode 100644 index 0000000..5f54e39 --- /dev/null +++ b/tests/testthat/test-07-tt_read_data.R @@ -0,0 +1,69 @@ +context("Download data using tt") + +tt_ref_test_that("tt_read_data only works for numeric,integer, or character entries", { + tt_gh_data <- tt_load_gh("2019-01-15") + + numericRead <- tt_download_file(tt_gh_data, 1) + integerRead <- tt_download_file(tt_gh_data, 1L) + characterRead <- tt_download_file(tt_gh_data, "agencies.csv") + + numericRead <- tt_download_file(tt_gh_data, 1) + integerRead <- tt_download_file(tt_gh_data, 1L) + characterRead <- tt_download_file(tt_gh_data, "agencies.csv") + + readURL <- read_csv(github_blob("data/2019/2019-01-15/agencies.csv",as_raw = TRUE)) + + expect_equal(numericRead, readURL) + expect_equal(integerRead, readURL) + expect_equal(characterRead, readURL) + + # fails when not expected class + expect_error( + { + tt_download_file(tt_gh_data, factor("agencies.csv")) + }, + "No method for entry of class:" + ) +}) + +tt_ref_test_that("tt_read_data informs when selection is out of range/not available", { + tt_gh_data <- tt_load_gh("2019-01-15") + + expect_error( + { + tt_download_file(tt_gh_data, "wrong_entry.csv") + }, + "That is not an available file" + ) + expect_error( + { + tt_download_file(tt_gh_data, 45) + }, + "That is not an available index" + ) + expect_error( + { + tt_download_file(tt_gh_data, 45L) + }, + "That is not an available index" + ) +}) + + +tt_ref_test_that("tt_read_data can load RDS files just as easily as text files",{ + tt_gh_data <- tt_load_gh("2019-01-01") + + expect_is( + tt_download_file(tt_gh_data, 1), + c("tbl_df","tbl","data.frame") + ) + +}) + + +tt_ref_test_that("read_rda will not arbitrarily assign the object to the current environment",{ + new_dataset<-read_rda("testfiles/test.rda") + expect_false(exists("testdf")) + expect_equal(data.frame(x=c(1,2,3),y=c("A","B","C")), + new_dataset) +}) diff --git a/tests/testthat/test-08-tt_load.R b/tests/testthat/test-08-tt_load.R new file mode 100644 index 0000000..cf1b502 --- /dev/null +++ b/tests/testthat/test-08-tt_load.R @@ -0,0 +1,46 @@ +context("Load and Download all data from Github") + +ref_repo <- options("tidytuesdayR.tt_repo") +options("tidytuesdayR.tt_repo" = "thebioengineer/tt_ref") +on.exit({ + options("tidytuesdayR.tt_repo" = ref_repo[[1]]) +}) + +tt_ref_test_that("tt_load loads all data available", { + + output <- capture.output({ + tt_obj <- tt_load("2019-01-15") + agencies <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-15/agencies.csv") + launches <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-15/launches.csv") + }) + + expect_equal( + tt_obj$agencies, + agencies + ) + + expect_equal( + tt_obj$launches, + launches + ) + +}) + +tt_ref_test_that("tt_load loads excel files properly", { + output <- capture.output({ + tt_obj <- tt_load("2018-04-02") + + tempExcelFile <- tempfile(fileext = ".xlsx") + utils::download.file( + paste0( + "https://www.github.com/rfordatascience/tidytuesday/raw/master/data/", + "2018/2018-04-02/us_avg_tuition.xlsx?raw=true" + ), + tempExcelFile, + quiet = TRUE, + mode = "wb" + ) + }) + + expect_equal(tt_obj$us_avg_tuition, readxl::read_xlsx(tempExcelFile)) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-10-utils.R similarity index 52% rename from tests/testthat/test-utils.R rename to tests/testthat/test-10-utils.R index f812c98..aabcb10 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-10-utils.R @@ -1,5 +1,3 @@ -context("test-utils") - tt_data <- structure( list( value1 = "value1", @@ -8,21 +6,13 @@ tt_data <- structure( .tt = structure( c("value1.csv", "value2.csv"), .files = c("value1.csv", "value2.csv"), - .url = "fake_url", - .readme = "

README contents

", + .readme = NULL, class = "tt_gh" ), class = "tt_data" ) -test_that("tt_make_html generates a properly formatted html doc", { - enteredValues <- read_html(tt_make_html(attr(tt_data, ".tt"))) %>% - html_nodes(".contents") %>% - as.character() - testthat::expect_equal(enteredValues, "

README contents

") -}) - -test_that("print.tt_data lists the available datasets", { +tt_ref_test_that("print.tt_data lists the available datasets", { tt_data <- structure( list( value1 = "value1", @@ -32,7 +22,7 @@ test_that("print.tt_data lists the available datasets", { c("value1.csv", "value2.csv"), .files = c("value1.csv", "value2.csv"), .url = "fake_url", - .readme = "README", + .readme = NULL, class = "tt_gh" ), class = "tt_data" @@ -48,16 +38,16 @@ test_that("print.tt_data lists the available datasets", { ) }) -test_that("print.tt lists all the available files for the weeks tt",{ +tt_ref_test_that("print.tt lists all the available files for the weeks tt",{ tt_obj <- tt_load_gh(2019, week = 16) capturedOutput <- capture_message({ print(tt_obj) - }) + })$message expect_equal( - capturedOutput$message, - "Available datasets for download:\n\tbrexit.csv \n\tcorbyn.csv \n\tdogs.csv \n\teu_balance.csv \n\tpensions.csv \n\ttrade.csv \n\twomen_research.csv \n\t\n" + capturedOutput, + "Available datasets in this TidyTuesday:\n\tbrexit.csv \n\tcorbyn.csv \n\tdogs.csv \n\teu_balance.csv \n\tpensions.csv \n\ttrade.csv \n\twomen_research.csv \n\t\n" ) }) diff --git a/tests/testthat/test-get_tt_html.R b/tests/testthat/test-get_tt_html.R deleted file mode 100644 index 518e830..0000000 --- a/tests/testthat/test-get_tt_html.R +++ /dev/null @@ -1,10 +0,0 @@ -context("Read URLS Gracefully") - -test_that("Returns html when url is successful", { - google <- get_tt_html("https://www.google.com") - expect_s3_class(google,"xml_document") -}) - -test_that("Returns error when url is unsuccessful", { - expect_error(get_tt_html("https://www.THISISAFAKEURL.com")) -}) diff --git a/tests/testthat/test-identify_delim.R b/tests/testthat/test-identify_delim.R deleted file mode 100644 index 5a7aae5..0000000 --- a/tests/testthat/test-identify_delim.R +++ /dev/null @@ -1,48 +0,0 @@ -context("test-identify_delim") - -test_that("Correctly identify the delimeter", { - delim_file <- tempfile() - writeLines(c("test,the,delim", "this,is,a comma"), delim_file) - expect_equal(identify_delim(delim_file), ",") -}) - -test_that("If multiple possible delimeter exist, pick the `simplest` one", { - delim_file <- tempfile() - writeLines(c("test\t,the\t,delim", "this\t,is\t,a twofer"), delim_file) - - expect_warning( - identify_delim(delim_file), - "Detected multiple possible delimeters:" - ) - suppressWarnings({ - expect_equal( - identify_delim(delim_file), - "\t" - ) - }) -}) - -test_that("If unable to identify a delimeter, give a warning", { - delim_file <- tempfile() - writeLines(c("test\tthe\tdelim", "this,is|a twofer"), delim_file) - expect_warning(identify_delim(delim_file), "Not able to detect delimiter for") - suppressWarnings({ - expect_equal( - identify_delim(delim_file), - " " - ) - }) -}) - -test_that("Can skip lines with comments to find delimeters, or ones identified to skip", { - delim_file <- tempfile() - writeLines(c("#this,line|isskipped", "test,the,delim", "this,is,a comma"), delim_file) - expect_equal(identify_delim(delim_file), ",") - expect_equal(identify_delim(delim_file, skip = 1), ",") -}) - -test_that("Can handle new line values in quotes", { - delim_file <- tempfile() - writeLines(c("test,the,\"delim\nnewline\"", "this,is,\"a comma\nwith a new line\""), delim_file) - expect_equal(identify_delim(delim_file), ",") -}) diff --git a/tests/testthat/test-make_url.R b/tests/testthat/test-make_url.R deleted file mode 100644 index e0997e3..0000000 --- a/tests/testthat/test-make_url.R +++ /dev/null @@ -1,24 +0,0 @@ -context("test-make_url") - -test_that("valid dates work", { - url <- tt_make_url("2019-04-02") - expect_equal(basename(url), - "2019-04-02") - -}) - -test_that("valid year-week combinations work", { - url <- tt_make_url(2019,14) - url2 <- tt_make_url("2019",14) - - expect_equal(basename(url), - "2019-04-02") - expect_equal(basename(url2), - "2019-04-02") -}) - -test_that("invalid entries are flagged", { - expect_error(tt_make_url("xyz"), - "Entries must render to a valid date or year") -}) - diff --git a/tests/testthat/test-tt_available.R b/tests/testthat/test-tt_available.R deleted file mode 100644 index 8153b5c..0000000 --- a/tests/testthat/test-tt_available.R +++ /dev/null @@ -1,19 +0,0 @@ -context("test-tt_available") - -test_that("tt_available returns object of class 'tt_dataset_table_list", { - ds <- tt_available() - testthat::expect_s3_class(ds, "tt_dataset_table_list") -}) - -test_that("tt_available returns all years", { - ds <- tt_available() - years <- tt_years() - testthat::expect_equivalent(names(ds), years[order(years, decreasing = TRUE)]) -}) - - -test_that("tt_datasets prints to console when rstudio viewer is not available", { - ds <- tt_datasets(2018) - consoleOutput <- print(ds, printConsole = TRUE) - testthat::expect_equivalent(attr(ds, ".html") %>% rvest::html_table(), consoleOutput) -}) diff --git a/tests/testthat/test-tt_load_gh.R b/tests/testthat/test-tt_load_gh.R deleted file mode 100644 index 901ce67..0000000 --- a/tests/testthat/test-tt_load_gh.R +++ /dev/null @@ -1,131 +0,0 @@ -context("test-tt_load_gh") - -# check that correct data are returned -test_that("tt_load_gh returns tt object when provided proper date", { - tt <- tt_load_gh("2019-01-15") - - testthat::expect_s3_class(tt, "tt") - testthat::expect_equal(attr(tt, ".files"), c("agencies.csv", "launches.csv")) - testthat::expect_equal(attr(tt, ".url"), "https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") -}) - -# check that correct data are returned -test_that("tt_load_gh returns tt object when provided proper year and TT week number", { - tt <- tt_load_gh(2019, 3) - - testthat::expect_s3_class(tt, "tt") - testthat::expect_equal(attr(tt, ".files"), c("agencies.csv", "launches.csv")) - testthat::expect_equal(attr(tt, ".url"), "https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-01-15") -}) - - -# check that errors are returned -test_that("tt_load_gh returns error when incorrect date", { - nullout <- capture.output({ - testthat::expect_error(tt_load_gh("2019-01-16"), "is not a date that has TidyTuesday data") - }) -}) -test_that("tt_load_gh returns error when incorrect years or week number entries", { - testthat::expect_error(tt_load_gh(2018, 92), "Please enter a value for week between 1") - testthat::expect_error(tt_load_gh(2017, 92), "TidyTuesday did not exist for") -}) -# check that error is thrown when requesting data from a week that did not exist for that year -test_that("tt_load_gh returns tt object when provided proper year and TT week number", { - testthat::expect_error(tt_load_gh(2020, 1), "does not have data available for download from github") - -}) - -test_that( - "tt_load_gh returns error when incorrect years or week number entries", - { - expect_error( - tt_load_gh(2018, 92), - "Please enter a value for week between 1" - ) - expect_error( - tt_load_gh(2017, 92), - "TidyTuesday did not exist for" - ) - } -) - -test_that("tt_load_gh returns error when incorrect years or week number entries", { - expect_error( - tt_load_gh(2018, 92), - "Please enter a value for week between 1" - ) - expect_error( - tt_load_gh(2017, 92), - "TidyTuesday did not exist for" - ) -}) - -test_that("tt_load_gh returns error when nothing is entered", { - nullout <- capture.output({ - testthat::expect_error(tt_load_gh(), "Enter either the year or date of the TidyTuesday Data") - }) -}) -test_that("tt_load_gh returns error when week is not a valid entry between 1 and n weeks", { - testthat::expect_error( - tt_load_gh(2019, 0), - "Week entry must be a valid positive integer" - ) -}) - -# test driven dev, new feature to add -test_that("Returns simple list of object when no readme.md available", { - tt <- tt_load_gh("2018-04-09") - expect_s3_class(tt, "tt") - expect_true(length(attr(tt, ".readme")) == 0) # object should not exist -}) - - -test_that("tt_load loads all data available", { - tt_obj <- tt_load("2019-01-15") - expect_equal( - tt_obj$agencies, - readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-15/agencies.csv") - ) - expect_equal( - tt_obj$launches, - readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-01-15/launches.csv") - ) -}) - -test_that("tt_load loads excel files properly", { - tt_obj <- tt_load("2018-04-02") - - tempExcelFile <- tempfile(fileext = ".xlsx") - utils::download.file( - paste0( - "https://www.github.com/rfordatascience/tidytuesday/raw/master/data/", - "2018/2018-04-02/us_avg_tuition.xlsx?raw=true" - ), - tempExcelFile, - quiet = TRUE, - mode = "wb" - ) - - expect_equal(tt_obj$us_avg_tuition, readxl::read_xlsx(tempExcelFile)) -}) - -test_that("tt_load_gh ignores extra files/diretory paths", { - tt_obj <- tt_load_gh("2019-04-02") - tt_obj_2 <- tt_load_gh("2019-04-09") - - expect_equal(length(tt_obj),1) - expect_equal(tt_obj[1],"bike_traffic.csv") - - expect_equal(length(tt_obj_2),3) - expect_equal(tt_obj_2[1:3],c("grand_slam_timeline.csv","grand_slams.csv","player_dob.csv")) -}) - -test_that("tt_load_gh finds all the files in the readme", { - tt_obj <- tt_load_gh("2020-04-21") - - expect_equal(length(tt_obj),2) - expect_equal(tt_obj[1:2],c("gdpr_violations.tsv", "gdpr_text.tsv")) - -}) - - diff --git a/tests/testthat/test-tt_read_data.R b/tests/testthat/test-tt_read_data.R deleted file mode 100644 index 8ce71dd..0000000 --- a/tests/testthat/test-tt_read_data.R +++ /dev/null @@ -1,74 +0,0 @@ -context("test-tt_read_data") - -test_that("tt_read_data only works for numeric,integer, or character entries", { - tt_gh_data <- tt_load_gh("2019-01-15") - - numericRead <- tt_read_data(tt_gh_data, 1) - integerRead <- tt_read_data(tt_gh_data, 1L) - characterRead <- tt_read_data(tt_gh_data, "agencies.csv") - - numericRead <- tt_read_data(tt_gh_data, 1) - integerRead <- tt_read_data(tt_gh_data, 1L) - characterRead <- tt_read_data(tt_gh_data, "agencies.csv") - - url <- paste0( - gsub("tree", "blob", file.path(attr(tt_gh_data, ".url"), "agencies.csv")), - "?raw=true" - ) - - readURL <- read_csv(url) - - expect_equal(numericRead, readURL) - expect_equal(integerRead, readURL) - expect_equal(characterRead, readURL) - - # fails when not expected class - expect_error( - { - tt_read_data(tt_gh_data, factor("agencies.csv")) - }, - "No method for entry of class:" - ) -}) - -test_that("tt_read_data informs when selection is out of range/not available", { - tt_gh_data <- tt_load_gh("2019-01-15") - - expect_error( - { - tt_read_data(tt_gh_data, "wrong_entry.csv") - }, - "That is not an available file" - ) - expect_error( - { - tt_read_data(tt_gh_data, 45) - }, - "That is not an available index" - ) - expect_error( - { - tt_read_data(tt_gh_data, 45L) - }, - "That is not an available index" - ) -}) - - -test_that("tt_read_data can load RDS files just as easily as text files",{ - tt_gh_data <- tt_load_gh("2019-01-01") - - expect_is( - tt_read_data(tt_gh_data, 1), - c("tbl_df","tbl","data.frame") - ) - -}) - - -test_that("read_rda will not arbitrarily assign the object to the current environment",{ - new_dataset<-read_rda("testfiles/test.rda") - expect_false(exists("testdf")) - expect_equal(data.frame(x=c(1,2,3),y=c("A","B","C")), - new_dataset) -}) From 7d33d478bb9c20585ae5c2cf5f6ec90bd1d622ed Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Sat, 23 May 2020 19:58:42 -0700 Subject: [PATCH 29/64] add auth to downloading sha within tt_download --- R/tt_download.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tt_download.R b/R/tt_download.R index 255f3ae..5205c1c 100644 --- a/R/tt_download.R +++ b/R/tt_download.R @@ -44,7 +44,7 @@ tt_download <- function(tt, files = c("All"), ..., branch = "master", auth = git message("--- Starting Download ---") cat("\n") - tt_sha <- github_sha(file.path("data",tt_year,tt_date)) + tt_sha <- github_sha(file.path("data",tt_year,tt_date), auth = auth) tt_data <- setNames( vector("list", length = length(files)), From a050df347889d0e191150934da776bd284f40c1e Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 24 May 2020 09:34:02 -0700 Subject: [PATCH 30/64] Looking to remove travis/appveyor dep --- .github/workflows/R-CMD-check.yaml | 63 ++++++++++++++++++++++++++++-- .github/workflows/pr-commands.yaml | 51 ++++++++++++++++++++++++ .travis.yml | 35 ----------------- appveyor.yml | 48 ----------------------- 4 files changed, 111 insertions(+), 86 deletions(-) create mode 100644 .github/workflows/pr-commands.yaml delete mode 100644 .travis.yml delete mode 100644 appveyor.yml diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 24a3ba4..3a53fd7 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -5,20 +5,77 @@ on: pull_request: branches: - master + - dev name: R-CMD-check jobs: R-CMD-check: - runs-on: macOS-latest + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macOS-latest, r: 'release'} + - {os: macOS-latest, r: 'devel'} + - {os: ubuntu-16.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + steps: - uses: actions/checkout@v2 + - uses: r-lib/actions/setup-r@master + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@master + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- + + - name: Install system dependencies + if: runner.os == 'Linux' + env: + RHUB_PLATFORM: linux-x86_64-ubuntu-gcc + run: | + Rscript -e "remotes::install_github('r-hub/sysreqs')" + sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") + sudo -s eval "$sysreqs" + - name: Install dependencies run: | - install.packages(c("remotes", "rcmdcheck")) remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") shell: Rscript {0} + - name: Check - run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error") + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml new file mode 100644 index 0000000..0d3cb71 --- /dev/null +++ b/.github/workflows/pr-commands.yaml @@ -0,0 +1,51 @@ +on: + issue_comment: + types: [created] +name: Commands +jobs: + document: + if: startsWith(github.event.comment.body, '/document') + name: document + runs-on: macOS-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + - uses: r-lib/actions/pr-fetch@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: r-lib/actions/setup-r@master + - name: Install dependencies + run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)' + - name: Document + run: Rscript -e 'roxygen2::roxygenise()' + - name: commit + run: | + git add man/\* NAMESPACE + git commit -m 'Document' + - uses: r-lib/actions/pr-push@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + style: + if: startsWith(github.event.comment.body, '/style') + name: style + runs-on: macOS-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v2 + - uses: r-lib/actions/pr-fetch@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + - uses: r-lib/actions/setup-r@master + - name: Install dependencies + run: Rscript -e 'install.packages("styler")' + - name: Style + run: Rscript -e 'styler::style_pkg()' + - name: commit + run: | + git add \*.R + git commit -m 'Style' + - uses: r-lib/actions/pr-push@master + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 13f63a6..0000000 --- a/.travis.yml +++ /dev/null @@ -1,35 +0,0 @@ -# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r - -language: R -sudo: false -cache: packages - -jobs: - include: - - r: devel - - stage: full - os: osx - - r: release - after_success: - - Rscript -e 'covr::codecov()' - #before_deploy: - #- Rscript -e 'remotes::install_cran("pkgdown")' - #deploy: - # provider: script - # script: Rscript -e 'pkgdown::deploy_site_github()' - # skip_cleanup: true - - r: 3.4 - - r: release - install: - - Rscript -e 'install.packages("devtools")' - - Rscript -e 'devtools::install_version("dplyr", version = "0.8.0",upgrade="never")' - - Rscript -e 'devtools::install_version("readxl", version = "1.0.0",upgrade="never")' - - Rscript -e 'devtools::install_version("rvest", version = "0.3.2",upgrade="never")' - - Rscript -e 'devtools::install_version("lubridate", version = "1.7.0",upgrade="never")' - - Rscript -e 'devtools::install_version("purrr", version = "0.2.5",upgrade="never")' - - Rscript -e 'devtools::install_version("readr", version = "1.0.0",upgrade="never")' - - Rscript -e 'devtools::install_version("rstudioapi", version = "0.2",upgrade="never")' - - Rscript -e 'devtools::install_version("xml2", version = "1.2.0",upgrade="never")' - -matrix: - fast_finish: true diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 2d43edc..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,48 +0,0 @@ -# DO NOT CHANGE the "init" and "install" sections below - -# Download script file from GitHub -init: - ps: | - $ErrorActionPreference = "Stop" - Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" - Import-Module '..\appveyor-tool.ps1' - -install: - ps: Bootstrap - -cache: - - C:\RLibrary - -# Adapt as necessary starting from here - -environment: - USE_RTOOLS: true - -build_script: - - travis-tool.sh install_deps - -test_script: - - travis-tool.sh run_tests - -on_failure: - - 7z a failure.zip *.Rcheck\* - - appveyor PushArtifact failure.zip - -artifacts: - - path: '*.Rcheck\**\*.log' - name: Logs - - - path: '*.Rcheck\**\*.out' - name: Logs - - - path: '*.Rcheck\**\*.fail' - name: Logs - - - path: '*.Rcheck\**\*.Rout' - name: Logs - - - path: '\*_*.tar.gz' - name: Bits - - - path: '\*_*.zip' - name: Bits From 666ea1628840132b7c561b2247384e30aa6c6474 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 24 May 2020 20:52:53 -0700 Subject: [PATCH 31/64] update to reference full path and stringsAsFactors not being TRUE by default now. --- tests/testthat/test-07-tt_read_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-07-tt_read_data.R b/tests/testthat/test-07-tt_read_data.R index 5f54e39..32ab75b 100644 --- a/tests/testthat/test-07-tt_read_data.R +++ b/tests/testthat/test-07-tt_read_data.R @@ -62,8 +62,8 @@ tt_ref_test_that("tt_read_data can load RDS files just as easily as text files", tt_ref_test_that("read_rda will not arbitrarily assign the object to the current environment",{ - new_dataset<-read_rda("testfiles/test.rda") + new_dataset<-read_rda(testthat::test_path("testfiles/test.rda")) expect_false(exists("testdf")) - expect_equal(data.frame(x=c(1,2,3),y=c("A","B","C")), + expect_equal(data.frame(x=c(1,2,3),y=c("A","B","C"), stringsAsFactors = TRUE), new_dataset) }) From 90db303673f2c30d92b5d7982ca5978ab6b6f2ec Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 24 May 2020 21:10:37 -0700 Subject: [PATCH 32/64] Add token to github_pat function --- R/github_api.R | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/R/github_api.R b/R/github_api.R index 3cf0cf2..136ad85 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -258,10 +258,14 @@ github_page <- function(page_content){ #' #' @return PAT as a character. github_pat <- function (quiet = TRUE) { - pat <- Sys.getenv("GITHUB_PAT") - if (nchar(pat)) { + pat <- Sys.getenv("GITHUB_PAT", "") + token <- Sys.getenv("GITHUB_TOKEN", "") + if (nchar(pat) | nchar(pat)) { if (!quiet) { - message("Using github PAT from envvar GITHUB_PAT") + message("Using github PAT from envvar GITHUB_PAT | GITHUB_TOKEN") + } + if(!nchar(pat)){ + pat <- token } return(pat) } From ae0985ad49fea3e351eff3e0ef0eaf5cd49a3097 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 24 May 2020 21:34:56 -0700 Subject: [PATCH 33/64] add GITHUB_PAT as token --- .github/workflows/R-CMD-check.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 3a53fd7..3b67292 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -27,6 +27,7 @@ jobs: env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v2 From 11422bf1488a4340c512cc57d2efa8b407281445 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 24 May 2020 22:13:02 -0700 Subject: [PATCH 34/64] add travis and appveyor back in --- .travis.yml | 35 +++++++++++++++++++++++++++++++++++ appveyor.yml | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 .travis.yml create mode 100644 appveyor.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..13f63a6 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,35 @@ +# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r + +language: R +sudo: false +cache: packages + +jobs: + include: + - r: devel + - stage: full + os: osx + - r: release + after_success: + - Rscript -e 'covr::codecov()' + #before_deploy: + #- Rscript -e 'remotes::install_cran("pkgdown")' + #deploy: + # provider: script + # script: Rscript -e 'pkgdown::deploy_site_github()' + # skip_cleanup: true + - r: 3.4 + - r: release + install: + - Rscript -e 'install.packages("devtools")' + - Rscript -e 'devtools::install_version("dplyr", version = "0.8.0",upgrade="never")' + - Rscript -e 'devtools::install_version("readxl", version = "1.0.0",upgrade="never")' + - Rscript -e 'devtools::install_version("rvest", version = "0.3.2",upgrade="never")' + - Rscript -e 'devtools::install_version("lubridate", version = "1.7.0",upgrade="never")' + - Rscript -e 'devtools::install_version("purrr", version = "0.2.5",upgrade="never")' + - Rscript -e 'devtools::install_version("readr", version = "1.0.0",upgrade="never")' + - Rscript -e 'devtools::install_version("rstudioapi", version = "0.2",upgrade="never")' + - Rscript -e 'devtools::install_version("xml2", version = "1.2.0",upgrade="never")' + +matrix: + fast_finish: true diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..2d43edc --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,48 @@ +# DO NOT CHANGE the "init" and "install" sections below + +# Download script file from GitHub +init: + ps: | + $ErrorActionPreference = "Stop" + Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" + Import-Module '..\appveyor-tool.ps1' + +install: + ps: Bootstrap + +cache: + - C:\RLibrary + +# Adapt as necessary starting from here + +environment: + USE_RTOOLS: true + +build_script: + - travis-tool.sh install_deps + +test_script: + - travis-tool.sh run_tests + +on_failure: + - 7z a failure.zip *.Rcheck\* + - appveyor PushArtifact failure.zip + +artifacts: + - path: '*.Rcheck\**\*.log' + name: Logs + + - path: '*.Rcheck\**\*.out' + name: Logs + + - path: '*.Rcheck\**\*.fail' + name: Logs + + - path: '*.Rcheck\**\*.Rout' + name: Logs + + - path: '\*_*.tar.gz' + name: Bits + + - path: '\*_*.zip' + name: Bits From cc34e340ce2813b75b2b6e90dbd62d9812a9ae49 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 24 May 2020 22:48:16 -0700 Subject: [PATCH 35/64] hopefully this fixes appveyor --- appveyor.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/appveyor.yml b/appveyor.yml index 2d43edc..1879a9e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -17,6 +17,8 @@ cache: environment: USE_RTOOLS: true + GITHUB_PAT: + secure: 0ZN45U7+LZR36fLVTcHHuGtmpoQl2Rtr/epTN/lppJOToFmYi4k/kvXoT7F0vD8p build_script: - travis-tool.sh install_deps From ab6b7399e750100113c5c9d57428e28aadfe9fdf Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Mon, 25 May 2020 10:18:40 -0700 Subject: [PATCH 36/64] remove travis and move code coverage to github actions (#49) --- .github/workflows/R-CMD-check.yaml | 7 +++++ .travis.yml | 35 --------------------- appveyor.yml | 50 ------------------------------ 3 files changed, 7 insertions(+), 85 deletions(-) delete mode 100644 .travis.yml delete mode 100644 appveyor.yml diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 3b67292..89194d6 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -80,3 +80,10 @@ jobs: with: name: ${{ runner.os }}-r${{ matrix.config.r }}-results path: check + + - name: Test coverage + if: matrix.config.os == 'macOS-latest' && matrix.config.r == '3.6' + run: | + install.packages('covr') + covr::codecov(token = "${{secrets.CODECOV_TOKEN}}") + shell: Rscript {0} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 13f63a6..0000000 --- a/.travis.yml +++ /dev/null @@ -1,35 +0,0 @@ -# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r - -language: R -sudo: false -cache: packages - -jobs: - include: - - r: devel - - stage: full - os: osx - - r: release - after_success: - - Rscript -e 'covr::codecov()' - #before_deploy: - #- Rscript -e 'remotes::install_cran("pkgdown")' - #deploy: - # provider: script - # script: Rscript -e 'pkgdown::deploy_site_github()' - # skip_cleanup: true - - r: 3.4 - - r: release - install: - - Rscript -e 'install.packages("devtools")' - - Rscript -e 'devtools::install_version("dplyr", version = "0.8.0",upgrade="never")' - - Rscript -e 'devtools::install_version("readxl", version = "1.0.0",upgrade="never")' - - Rscript -e 'devtools::install_version("rvest", version = "0.3.2",upgrade="never")' - - Rscript -e 'devtools::install_version("lubridate", version = "1.7.0",upgrade="never")' - - Rscript -e 'devtools::install_version("purrr", version = "0.2.5",upgrade="never")' - - Rscript -e 'devtools::install_version("readr", version = "1.0.0",upgrade="never")' - - Rscript -e 'devtools::install_version("rstudioapi", version = "0.2",upgrade="never")' - - Rscript -e 'devtools::install_version("xml2", version = "1.2.0",upgrade="never")' - -matrix: - fast_finish: true diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 1879a9e..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,50 +0,0 @@ -# DO NOT CHANGE the "init" and "install" sections below - -# Download script file from GitHub -init: - ps: | - $ErrorActionPreference = "Stop" - Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" - Import-Module '..\appveyor-tool.ps1' - -install: - ps: Bootstrap - -cache: - - C:\RLibrary - -# Adapt as necessary starting from here - -environment: - USE_RTOOLS: true - GITHUB_PAT: - secure: 0ZN45U7+LZR36fLVTcHHuGtmpoQl2Rtr/epTN/lppJOToFmYi4k/kvXoT7F0vD8p - -build_script: - - travis-tool.sh install_deps - -test_script: - - travis-tool.sh run_tests - -on_failure: - - 7z a failure.zip *.Rcheck\* - - appveyor PushArtifact failure.zip - -artifacts: - - path: '*.Rcheck\**\*.log' - name: Logs - - - path: '*.Rcheck\**\*.out' - name: Logs - - - path: '*.Rcheck\**\*.fail' - name: Logs - - - path: '*.Rcheck\**\*.Rout' - name: Logs - - - path: '\*_*.tar.gz' - name: Bits - - - path: '\*_*.zip' - name: Bits From daaa884b0a668c6a2ade07fbe8ea84d69d3b3480 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Mon, 25 May 2020 10:30:06 -0700 Subject: [PATCH 37/64] update news, bump major version --- DESCRIPTION | 2 +- NEWS.md | 7 +++++++ codecov.yml | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e5f88f6..65a37ee 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidytuesdayR Type: Package Title: Access the Weekly TidyTuesday Project Dataset -Version: 0.3.1.900 +Version: 1.0.0 Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index 6588198..439717b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# tidytuesdayR 1.0.0 + +* Massive update to all the internals of tidytuesdayR +* [feature] allow for using authentication using github PAT's +* finer control of downloading files via the `download_files` argument of `tt_load()` +* internal functions all now use GET arguments to use the github API + # tidytuesdayR 0.2.2 * Added a `NEWS.md` file to track changes to the package. diff --git a/codecov.yml b/codecov.yml index 8f36b6c..04c5585 100644 --- a/codecov.yml +++ b/codecov.yml @@ -6,7 +6,9 @@ coverage: default: target: auto threshold: 1% + informational: true patch: default: target: auto threshold: 1% + informational: true From 4045d1c9b83540a24f28a020fff33d9d3b16e238 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Mon, 25 May 2020 16:20:49 -0700 Subject: [PATCH 38/64] add github api limiting feature --- .github/workflows/R-CMD-check.yaml | 2 +- R/github_api.R | 70 +++++++++++++++++++++++- R/zzz.R | 1 + tests/testthat/helper-tt_ref_test_that.R | 6 ++ tests/testthat/test-00-github_api.R | 28 ++++++++++ tests/testthat/test-01-tt_master_file.R | 3 + tests/testthat/test-02-tt_available.R | 6 ++ tests/testthat/test-03-tt_check_date.R | 4 ++ tests/testthat/test-04-tt_compile.R | 2 + tests/testthat/test-05-tt_load_gh.R | 11 ++++ tests/testthat/test-06-tt_parse_blob.R | 2 + tests/testthat/test-07-tt_read_data.R | 4 ++ tests/testthat/test-08-tt_load.R | 2 + 13 files changed, 139 insertions(+), 2 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 89194d6..c9c8763 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -27,7 +27,7 @@ jobs: env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PAT: ${{ secrets.dummy_pat }} steps: - uses: actions/checkout@v2 diff --git a/R/github_api.R b/R/github_api.R index 136ad85..1850f3d 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -295,7 +295,75 @@ github_GET <- function(url, auth = github_pat(), ...){ headers <- add_headers(...) } - GET(url, headers) + rate_limit_check() + + if(exists("headers")){ + get_res <- GET(url, headers) + }else{ + get_res <- GET(url) + } + + rate_limit_update(header_to_rate_info(get_res)) + + get_res + +} + +#' The Master List of Files from TidyTuesday +#' +#' @keywords internal + +TT_GITHUB_ENV <- new.env() +TT_GITHUB_ENV$RATE_LIMIT <- NULL +TT_GITHUB_ENV$RATE_REMAINING <- NULL +TT_GITHUB_ENV$RATE_RESET <- NULL + +rate_limit_update <- function(rate_info = NULL, auth = github_pat()){ + + if (is.null(rate_info)) { + if (!is.null(auth)) { + rate_lim <- GET("https://api.github.com/rate_limit", + add_headers(Authorization = paste("token", auth))) + } else { + rate_lim <- GET("https://api.github.com/rate_limit") + } + rate_info <- GET_json(rate_lim)$rate + } + + TT_GITHUB_ENV$RATE_LIMIT <- rate_info$limit + TT_GITHUB_ENV$RATE_REMAINING <- rate_info$remaining + TT_GITHUB_ENV$RATE_RESET <- as.POSIXct(rate_info$reset, origin = "1970-01-01") } +rate_limit_check <- function(n = 10, quiet = TRUE, silent = FALSE){ + + if(TT_GITHUB_ENV$RATE_REMAINING == 0 & !silent){ + stop("Github API Rate Limit hit. You must wait until ", + format(TT_GITHUB_ENV$RATE_RESET, + "%Y-%m-%d %r %Z"), + " to make calls again!") + } else if (TT_GITHUB_ENV$RATE_REMAINING <= n & !silent & !quiet){ + warning( + paste0( + "Only ", + TT_GITHUB_ENV$RATE_REMAINING, + " Github queries remaining until ", + format(TT_GITHUB_ENV$RATE_RESET, + "%Y-%m-%d %r %Z"), + "." + ) + ) + } + TT_GITHUB_ENV$RATE_REMAINING +} + + +header_to_rate_info <- function(res){ + headers <- res$headers + rate_json <- list() + rate_json$limit <- as.numeric(headers$`x-ratelimit-limit`) + rate_json$remaining <- as.numeric(headers$`x-ratelimit-remaining`) + rate_json$reset <- as.numeric(headers$`x-ratelimit-reset`) + rate_json +} diff --git a/R/zzz.R b/R/zzz.R index 9e5d43a..6a7c708 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,3 +1,4 @@ .onLoad <- function(libname, pkgname) { options("tidytuesdayR.tt_repo" = "rfordatascience/tidytuesday") + rate_limit_update() } diff --git a/tests/testthat/helper-tt_ref_test_that.R b/tests/testthat/helper-tt_ref_test_that.R index 5418195..68ca834 100644 --- a/tests/testthat/helper-tt_ref_test_that.R +++ b/tests/testthat/helper-tt_ref_test_that.R @@ -9,3 +9,9 @@ tt_ref_test_that <- function(desc, ...){ }) testthat::test_that(desc = desc, ...) } + +check_api <- function( n = 10){ + if(rate_limit_check(silent = TRUE) <= n ){ + skip("Rate Limit Met") + } +} diff --git a/tests/testthat/test-00-github_api.R b/tests/testthat/test-00-github_api.R index be188b5..341889a 100644 --- a/tests/testthat/test-00-github_api.R +++ b/tests/testthat/test-00-github_api.R @@ -1,6 +1,8 @@ context("Github API") tt_ref_test_that("github_contents returns contents as text", { + check_api() + license_text <- github_contents("LICENSE") expect_is(license_text, "character") @@ -10,6 +12,7 @@ tt_ref_test_that("github_contents returns contents as text", { }) tt_ref_test_that("github_contents flips to github_blob and returns contents as text by default when larger than 1MB", { + check_api() tweets_text <- github_contents("static/tidytuesday_tweets.csv") @@ -21,12 +24,14 @@ tt_ref_test_that("github_contents flips to github_blob and returns contents as t }) tt_ref_test_that("github_contents returns NULL on failure", { + check_api() NULL_contents <- github_contents("static/BAD_ENTRY") expect_equal(NULL_contents, NULL) }) tt_ref_test_that("github_html returns contents that can be html as html", { + check_api() README <- github_html("README.md") BADFILE <- github_html("bad_file") @@ -35,6 +40,7 @@ tt_ref_test_that("github_html returns contents that can be html as html", { }) tt_ref_test_that("github_sha get a data.frame with the sha for all files in the directory",{ + check_api() SHA <- github_sha("static") expect_is(SHA, "data.frame") @@ -42,11 +48,13 @@ tt_ref_test_that("github_sha get a data.frame with the sha for all files in the }) tt_ref_test_that("github_sha returns NULL when bad entry",{ + check_api() NULL_SHA <- github_sha("bad_file_path") expect_equal(NULL_SHA, NULL) }) tt_ref_test_that("github_blob gets contents as either text or raw",{ + check_api() license_text <- github_blob("LICENSE") license_raw <- github_blob("LICENSE", as_raw = TRUE) @@ -63,6 +71,26 @@ tt_ref_test_that("github_blob gets contents as either text or raw",{ }) tt_ref_test_that("github_blob retuns NULL on bad entry",{ + check_api() NULL_blob <- github_blob("BAD_ENTRY", sha = "BAD_SHA") expect_equal(NULL_blob, NULL) }) + +tt_ref_test_that("rate_limit_check returns actual value in environment",{ + val <- rate_limit_check() + expect_equal(val,TT_GITHUB_ENV$RATE_REMAINING) +}) + +tt_ref_test_that("rate_limit_check throws warning when within n of 0",{ + rate_limit_update(list(limit = 50, remaining = 5, reset = 1000)) + on.exit({rate_limit_update()}) + expect_warning(rate_limit_check(n = 10, quiet = FALSE)) +}) + +tt_ref_test_that("rate_limit_check throws error when 0, except when silent = TRUE",{ + rate_limit_update(list(limit = 50, remaining = 0, reset = 1000)) + on.exit({rate_limit_update()}) + expect_error(rate_limit_check(silent = FALSE)) + output <- try(rate_limit_check(n = 10, silent = TRUE), silent = TRUE) + expect_true(!inherits(output,"try-error")) +}) diff --git a/tests/testthat/test-01-tt_master_file.R b/tests/testthat/test-01-tt_master_file.R index 2fa9726..fb7a196 100644 --- a/tests/testthat/test-01-tt_master_file.R +++ b/tests/testthat/test-01-tt_master_file.R @@ -1,6 +1,7 @@ context("tt_master_file API") tt_ref_test_that("`tt_master_file()` will update the masterfile reference if is null", { + check_api() TT_MASTER_ENV$TT_MASTER_FILE <- NULL @@ -13,6 +14,7 @@ tt_ref_test_that("`tt_master_file()` will update the masterfile reference if is }) tt_ref_test_that("`tt_update_master_file()` will update if the sha is old", { + check_api() setup_df <- data.frame(x=1) attr(setup_df, ".sha") <- "old sha" @@ -31,6 +33,7 @@ tt_ref_test_that("`tt_update_master_file()` will update if the sha is old", { tt_ref_test_that("`tt_update_master_file()` will update if the sha is old", { + check_api() ttmf <- tt_master_file() expect_equal(colnames(ttmf), c("Week", "Date", "year","data_files", "data_type", "delim")) diff --git a/tests/testthat/test-02-tt_available.R b/tests/testthat/test-02-tt_available.R index d20c8a6..2d30c92 100644 --- a/tests/testthat/test-02-tt_available.R +++ b/tests/testthat/test-02-tt_available.R @@ -1,6 +1,7 @@ context("tt_available lists available weeks of tidy tuesday") tt_ref_test_that("tt_datasets prints to console when rstudio viewer is not available", { + check_api() ds <- tt_datasets(2018) consoleOutput <- data.frame(unclass(ds), stringsAsFactors=FALSE) expect_equivalent( @@ -10,6 +11,7 @@ tt_ref_test_that("tt_datasets prints to console when rstudio viewer is not avail }) tt_ref_test_that("tt_datasets throws errors when asking for invalid years", { + check_api() expect_error( tt_datasets(2017), "Invalid `year` provided to list available tidytuesday datasets.\n\tUse one of the following years:" @@ -17,6 +19,7 @@ tt_ref_test_that("tt_datasets throws errors when asking for invalid years", { }) tt_ref_test_that("printing tt_datasets returns all the values as a printed data.frame if not interactive", { + check_api() ds <- tt_datasets(2018) printed_ds <- capture.output(print(ds, interactive = FALSE)) @@ -30,6 +33,7 @@ tt_ref_test_that("printing tt_datasets returns all the values as a printed data. tt_ref_test_that("tt_available returns object of with all years data available", { + check_api() ds <- tt_available() testthat::expect_s3_class(ds, "tt_dataset_table_list") @@ -48,6 +52,7 @@ tt_ref_test_that("tt_available returns object of with all years data available", }) tt_ref_test_that("printing tt_available returns all the values as a printed data.frame if not interactive", { + check_api() ds <- tt_available() printed_ds <- capture.output(print(ds, interactive = FALSE)) @@ -64,6 +69,7 @@ tt_ref_test_that("printing tt_available returns all the values as a printed data tt_ref_test_that("tt_dataset_table and tt_dataset_table_list objects can make html outputs",{ + check_api() ds_tl <- tt_available() ds_t <- tt_datasets(2019) diff --git a/tests/testthat/test-03-tt_check_date.R b/tests/testthat/test-03-tt_check_date.R index 9c0795b..4ca2eb1 100644 --- a/tests/testthat/test-03-tt_check_date.R +++ b/tests/testthat/test-03-tt_check_date.R @@ -1,6 +1,7 @@ context("Checking supplied date/week") tt_ref_test_that("valid dates work", { + check_api() tt_date <- tt_check_date("2019-04-02") expect_equal(tt_date, as.Date("2019-04-02")) @@ -8,6 +9,7 @@ tt_ref_test_that("valid dates work", { }) tt_ref_test_that("valid year-week combinations work", { + check_api() tt_date_1 <- tt_check_date(2019,14) tt_date_2 <- tt_check_date("2019",14) @@ -16,6 +18,7 @@ tt_ref_test_that("valid year-week combinations work", { }) tt_ref_test_that("Close dates are suggested if provided date is incorrect", { + check_api() expect_error( tt_check_date("2019-04-04"), "2019-04-04 is not a date that has TidyTuesday data.\n\tDid you mean: 2019-04-02?", @@ -24,6 +27,7 @@ tt_ref_test_that("Close dates are suggested if provided date is incorrect", { }) tt_ref_test_that("invalid entries are flagged", { + check_api() expect_error(tt_check_date("xyz"), "Entries must render to a valid date or year") }) diff --git a/tests/testthat/test-04-tt_compile.R b/tests/testthat/test-04-tt_compile.R index 57a1ebf..d90cbc2 100644 --- a/tests/testthat/test-04-tt_compile.R +++ b/tests/testthat/test-04-tt_compile.R @@ -1,6 +1,7 @@ context("Compile Files and Readme for the Week ") tt_ref_test_that("Check that tt_compile lists all files for the date", { + check_api() tt_c <- tt_compile("2019-01-15") @@ -26,6 +27,7 @@ tt_ref_test_that("Check that tt_compile lists all files for the date", { }) tt_ref_test_that("Check that tt_compile returns NULL for missing readme's", { + check_api() tt_c <- tt_compile("2018-04-02") diff --git a/tests/testthat/test-05-tt_load_gh.R b/tests/testthat/test-05-tt_load_gh.R index 58f015b..e96f8bd 100644 --- a/tests/testthat/test-05-tt_load_gh.R +++ b/tests/testthat/test-05-tt_load_gh.R @@ -8,6 +8,7 @@ on.exit({ # check that correct data are returned tt_ref_test_that("tt_load_gh returns tt object when provided proper date", { + check_api() tt <- tt_load_gh("2019-01-15") @@ -18,6 +19,7 @@ tt_ref_test_that("tt_load_gh returns tt object when provided proper date", { # check that correct data are returned tt_ref_test_that("tt_load_gh returns tt object when provided proper year and TT week number", { + check_api() tt <- tt_load_gh(2019, 3) testthat::expect_s3_class(tt, "tt") @@ -27,6 +29,7 @@ tt_ref_test_that("tt_load_gh returns tt object when provided proper year and TT # check that errors are returned tt_ref_test_that("tt_load_gh returns error when incorrect date", { + check_api() nullout <- capture.output({ testthat::expect_error(tt_load_gh("2019-01-16"), "is not a date that has TidyTuesday data") }) @@ -37,6 +40,7 @@ tt_ref_test_that("tt_load_gh returns error when incorrect years or week number e }) # check that error is thrown when requesting data from a week that did not exist for that year tt_ref_test_that("tt_load_gh returns tt object when provided proper year and TT week number", { + check_api() testthat::expect_error(tt_load_gh(2020, 1), "does not have data available for download from github") }) @@ -44,6 +48,7 @@ tt_ref_test_that("tt_load_gh returns tt object when provided proper year and TT tt_ref_test_that( "tt_load_gh returns error when incorrect years or week number entries", { + check_api() expect_error( tt_load_gh(2018, 92), "Please enter a value for week between 1" @@ -56,6 +61,7 @@ tt_ref_test_that( ) tt_ref_test_that("tt_load_gh returns error when incorrect years or week number entries", { + check_api() expect_error( tt_load_gh(2018, 92), "Please enter a value for week between 1" @@ -67,6 +73,7 @@ tt_ref_test_that("tt_load_gh returns error when incorrect years or week number e }) tt_ref_test_that("tt_load_gh returns error when nothing is entered", { + check_api() expect_error( tt_load_gh(), "Enter either the year or date of the TidyTuesday Data" @@ -74,6 +81,7 @@ tt_ref_test_that("tt_load_gh returns error when nothing is entered", { }) tt_ref_test_that("tt_load_gh returns error when week is not a valid entry between 1 and n weeks", { + check_api() testthat::expect_error( tt_load_gh(2019, 0), "Week entry must be a valid positive integer" @@ -82,12 +90,14 @@ tt_ref_test_that("tt_load_gh returns error when week is not a valid entry betwee # test driven dev, new feature to add tt_ref_test_that("Returns simple list of object when no readme.md available", { + check_api() tt <- tt_load_gh("2018-04-09") expect_s3_class(tt, "tt") expect_true(length(attr(tt, ".readme")) == 0) # object should not exist }) tt_ref_test_that("tt_load_gh ignores extra files/diretory paths", { + check_api() tt_obj <- tt_load_gh("2019-04-02") tt_obj_2 <- tt_load_gh("2019-04-09") @@ -99,6 +109,7 @@ tt_ref_test_that("tt_load_gh ignores extra files/diretory paths", { }) tt_ref_test_that("tt_load_gh finds all the files in the readme", { + check_api() tt_obj <- tt_load_gh("2020-04-21") expect_equal(length(tt_obj),2) diff --git a/tests/testthat/test-06-tt_parse_blob.R b/tests/testthat/test-06-tt_parse_blob.R index a6ddda5..eed5fc2 100644 --- a/tests/testthat/test-06-tt_parse_blob.R +++ b/tests/testthat/test-06-tt_parse_blob.R @@ -105,6 +105,8 @@ test_that("`tt_parse_blob` can figure out how to handle text or raw",{ }) tt_ref_test_that("tt_parse_blob can handle a xls file",{ + + check_api() xls_blob <- github_blob("data/2019/2019-11-26/PCA_Report_FY17Q3.xls", sha = "e2313e902423c398883c01d3ecdfe77ae1b84862", as_raw = TRUE) diff --git a/tests/testthat/test-07-tt_read_data.R b/tests/testthat/test-07-tt_read_data.R index 32ab75b..6f72baf 100644 --- a/tests/testthat/test-07-tt_read_data.R +++ b/tests/testthat/test-07-tt_read_data.R @@ -1,6 +1,7 @@ context("Download data using tt") tt_ref_test_that("tt_read_data only works for numeric,integer, or character entries", { + check_api() tt_gh_data <- tt_load_gh("2019-01-15") numericRead <- tt_download_file(tt_gh_data, 1) @@ -27,6 +28,7 @@ tt_ref_test_that("tt_read_data only works for numeric,integer, or character entr }) tt_ref_test_that("tt_read_data informs when selection is out of range/not available", { + check_api() tt_gh_data <- tt_load_gh("2019-01-15") expect_error( @@ -51,6 +53,7 @@ tt_ref_test_that("tt_read_data informs when selection is out of range/not availa tt_ref_test_that("tt_read_data can load RDS files just as easily as text files",{ + check_api() tt_gh_data <- tt_load_gh("2019-01-01") expect_is( @@ -62,6 +65,7 @@ tt_ref_test_that("tt_read_data can load RDS files just as easily as text files", tt_ref_test_that("read_rda will not arbitrarily assign the object to the current environment",{ + check_api() new_dataset<-read_rda(testthat::test_path("testfiles/test.rda")) expect_false(exists("testdf")) expect_equal(data.frame(x=c(1,2,3),y=c("A","B","C"), stringsAsFactors = TRUE), diff --git a/tests/testthat/test-08-tt_load.R b/tests/testthat/test-08-tt_load.R index cf1b502..09e2145 100644 --- a/tests/testthat/test-08-tt_load.R +++ b/tests/testthat/test-08-tt_load.R @@ -7,6 +7,7 @@ on.exit({ }) tt_ref_test_that("tt_load loads all data available", { + check_api() output <- capture.output({ tt_obj <- tt_load("2019-01-15") @@ -27,6 +28,7 @@ tt_ref_test_that("tt_load loads all data available", { }) tt_ref_test_that("tt_load loads excel files properly", { + check_api() output <- capture.output({ tt_obj <- tt_load("2018-04-02") From 19066493738aabc34989285e71663ba7636922b6 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 3 Jun 2020 22:17:00 -0700 Subject: [PATCH 39/64] Cran release prep v1 (#52) * prepping for CRAN release * Add Cran Comments --- .Rbuildignore | 1 + R/github_api.R | 2 +- cran-comments.md | 15 +++++++++++++++ man/TT_GITHUB_ENV.Rd | 16 ++++++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 cran-comments.md create mode 100644 man/TT_GITHUB_ENV.Rd diff --git a/.Rbuildignore b/.Rbuildignore index f6c559b..d5feabc 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -11,3 +11,4 @@ ^readme\.md$ ^pkgdown$ ^\.github$ +^cran-comments\.md$ diff --git a/R/github_api.R b/R/github_api.R index 1850f3d..3c7bb5f 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -309,7 +309,7 @@ github_GET <- function(url, auth = github_pat(), ...){ } -#' The Master List of Files from TidyTuesday +#' Environment containing state of Github API limits #' #' @keywords internal diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..91981a8 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,15 @@ +## Test environments +* local R installation, R 4.0.0 +* ubuntu 16.04 (on github actions), R 4.0.0 +* mac OS 10.15.4 (on github actions) R-devel, R 4.0.0, +* win-builder (devel) + +## R CMD check results + +0 errors | 0 warnings | 1 note + +* This is a new release. + +## Downstream dependencies + +There are currently no downstream dependencies on this package as it is the first CRAN submission diff --git a/man/TT_GITHUB_ENV.Rd b/man/TT_GITHUB_ENV.Rd new file mode 100644 index 0000000..77de3f5 --- /dev/null +++ b/man/TT_GITHUB_ENV.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/github_api.R +\docType{data} +\name{TT_GITHUB_ENV} +\alias{TT_GITHUB_ENV} +\title{Environment containing state of Github API limits} +\format{ +An object of class \code{environment} of length 3. +} +\usage{ +TT_GITHUB_ENV +} +\description{ +Environment containing state of Github API limits +} +\keyword{internal} From 6c47ac68d238802f27aacc6b3a48fb86418404fc Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Mon, 29 Jun 2020 21:03:59 -0700 Subject: [PATCH 40/64] Increment version number --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 300dc91..e80f401 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidytuesdayR Type: Package Title: Access the Weekly 'TidyTuesday' Project Dataset -Version: 1.0.0 +Version: 1.0.0.9000 Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index 439717b..5f1d0d7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# tidytuesdayR (development version) + # tidytuesdayR 1.0.0 * Massive update to all the internals of tidytuesdayR From 5043917256b2588baf216e84534b7d1df46fa272 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 8 Jul 2020 20:51:13 -0700 Subject: [PATCH 41/64] Tidytemplates (#61) * Create tidytemplate * update documentation * update for missing imports and partial arg match * complete updates to tidytemplate * wrap tests in tt_ref_test_that and add api check --- DESCRIPTION | 3 +- NAMESPACE | 5 ++ NEWS.md | 2 + R/last_tuesday.R | 40 ++++++++++++ R/tt_check_date.R | 16 +++++ R/use_tidytemplate.R | 39 ++++++++++++ inst/templates/tidytemplate.Rmd | 90 +++++++++++++++++++++++++++ man/TT_GITHUB_ENV.Rd | 16 ----- man/last_tuesday.Rd | 21 +++++++ man/tt_date.Rd | 25 ++++++++ man/use_tidytemplate.Rd | 31 +++++++++ readme.md | 70 +++++++++++---------- tests/testthat/test-11-last_tuesday.R | 46 ++++++++++++++ 13 files changed, 353 insertions(+), 51 deletions(-) create mode 100644 R/last_tuesday.R create mode 100644 R/use_tidytemplate.R create mode 100644 inst/templates/tidytemplate.Rmd delete mode 100644 man/TT_GITHUB_ENV.Rd create mode 100644 man/last_tuesday.Rd create mode 100644 man/tt_date.Rd create mode 100644 man/use_tidytemplate.Rd create mode 100644 tests/testthat/test-11-last_tuesday.R diff --git a/DESCRIPTION b/DESCRIPTION index e80f401..294f3c7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,4 +57,5 @@ Imports: xml2 (>= 1.2.0), httr, jsonlite, - magrittr + magrittr, + usethis diff --git a/NAMESPACE b/NAMESPACE index 1848195..57416c7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ S3method(print,tt_dataset_table) S3method(print,tt_dataset_table_list) export("%>%") export(github_pat) +export(last_tuesday) export(rate_limit_check) export(readme) export(tt_available) @@ -14,6 +15,7 @@ export(tt_download) export(tt_download_file) export(tt_load) export(tt_load_gh) +export(use_tidytemplate) importFrom(httr,GET) importFrom(httr,add_headers) importFrom(jsonlite,base64_dec) @@ -23,6 +25,8 @@ importFrom(lubridate,as_date) importFrom(lubridate,day) importFrom(lubridate,is.Date) importFrom(lubridate,month) +importFrom(lubridate,today) +importFrom(lubridate,wday) importFrom(lubridate,year) importFrom(lubridate,ymd) importFrom(magrittr,"%>%") @@ -42,6 +46,7 @@ importFrom(stats,na.pass) importFrom(stats,setNames) importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) +importFrom(usethis,use_template) importFrom(utils,URLencode) importFrom(utils,browseURL) importFrom(utils,read.csv) diff --git a/NEWS.md b/NEWS.md index 5f1d0d7..83d04bc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # tidytuesdayR (development version) +* [feature] Provide a template Rmd for users, populated with date and proper `tt_load` call + # tidytuesdayR 1.0.0 * Massive update to all the internals of tidytuesdayR diff --git a/R/last_tuesday.R b/R/last_tuesday.R new file mode 100644 index 0000000..fdc95de --- /dev/null +++ b/R/last_tuesday.R @@ -0,0 +1,40 @@ +#' Find the most recent tuesday +#' +#' Utility function to assist users in identifying the most recent 'tidytuesday' date +#' +#' @param date todays date as a date object or character string in +#' YYYY-MM-DD format. +#' +#' @importFrom lubridate wday today +#' @examples +#' +#' last_tuesday() # get last tuesday from todays date +#' last_tuesday("2020-01-01") # get last tuesday from specified date +#' +#' @export + +last_tuesday <- function(date = today(tzone = "America/New_York")) { + + stopifnot(inherits(date,"Date") | valid_date(date)) + + date <- as.Date(tt_date_format(date)) + + diff_tuesday <- 3 - lubridate::wday(date) + + if (diff_tuesday < 0) { + diff_tuesday <- diff_tuesday + 7 + } + + tuesday <- date + diff_tuesday + + # data is usually released on a monday + if(tuesday - date > 1){ + tuesday = tuesday - 7 + } + + return(tuesday) +} + +tt_date <- function(x,week){ + tt_check_date(year, week) +} diff --git a/R/tt_check_date.R b/R/tt_check_date.R index 73d55d3..1a26af5 100644 --- a/R/tt_check_date.R +++ b/R/tt_check_date.R @@ -1,3 +1,19 @@ +#' @title Get date of Tidytuesday, given the year and week +#' @description Sometimes we don't know the date we want, but we do know the +#' week. this function provides the ability to pass the year and week we are +#' interested in to get the correct date +#' @param year what year of TidyTuesday to use +#' @param week what week of Tidytuesday to use +#' @examples +#' \donttest{ +#' if(interactive()){ +#' tt_date(2019, week = 42) +#' } +#' } +tt_date <- function(year,week){ + tt_check_date(year, week) +} + #' @title given inputs generate valid TidyTuesday URL #' @description Given multiple types of inputs, generate #' @param x either a string or numeric entry indicating the full date of diff --git a/R/use_tidytemplate.R b/R/use_tidytemplate.R new file mode 100644 index 0000000..acf9ae2 --- /dev/null +++ b/R/use_tidytemplate.R @@ -0,0 +1,39 @@ +#' @title Call and open the tidytemplate +#' @description Use the tidytemplate Rmd for starting your analysis with a +#' leg up for processing +#' @param name name of your tidytuesday analysis file +#' @param open should the file be opened after being created +#' @param ... arguments to be passed to \link[usethis]{use_template} +#' @param refdate date to use as reference to determine which 'tidytuesday' to +#' use for the template. Either date object or character string in +#' YYYY-MM-DD format. +#' @importFrom usethis use_template +#' @importFrom lubridate today +#' @examples +#' \donttest{ +#' if(interactive()){ +#' use_tidytemplate(name = "My_Awesome_TidyTuesday.Rmd") +#' } +#' } +#' +#' @export +use_tidytemplate <- + function(name = NULL, + open = interactive(), + ..., + refdate = today()) { + + stopifnot(inherits(refdate,"Date") | valid_date(refdate)) + last_tt <- last_tuesday(refdate) + + if(is.null(name)){ + name <- paste0(format(last_tt,"%Y_%m_%d"),"_tidy_tuesday.Rmd") + } + + use_template("tidytemplate.Rmd", + save_as=name, + data = list( + call_date = today(), + call_tuesday = format(last_tt,"%Y-%m-%d")), + package = "tidytuesdayR", ..., open = open) +} diff --git a/inst/templates/tidytemplate.Rmd b/inst/templates/tidytemplate.Rmd new file mode 100644 index 0000000..36baeeb --- /dev/null +++ b/inst/templates/tidytemplate.Rmd @@ -0,0 +1,90 @@ +--- +title: "TidyTemplate" +date: {{{call_date}}} +output: html_output +--- + +# TidyTuesday + +Join the R4DS Online Learning Community in the weekly #TidyTuesday event! +Every week we post a raw dataset, a chart or article related to that dataset, and ask you to explore the data. +While the dataset will be “tamed”, it will not always be tidy! As such you might need to apply various R for Data Science techniques to wrangle the data into a true tidy format. +The goal of TidyTuesday is to apply your R skills, get feedback, explore other’s work, and connect with the greater #RStats community! +As such we encourage everyone of all skills to participate! + +```{r setup, include=FALSE} + +knitr::opts_chunk$set(echo = TRUE) + +library(tidyverse) +library(tidytuesdayR) + +``` + +# Load the weekly Data + +Dowload the weekly data and make available in the `tt` object. + +```{r Load} + +tt <- tt_load("{{{call_tuesday}}}") + +``` + + +# Readme + +Take a look at the readme for the weekly data to get insight on the dataset. +This includes a data dictionary, source, and a link to an article on the data. + +```{r Readme, eval = interactive()} + +tt + +``` + + +# Glimpse Data + +Take an initial look at the format of the data available. + +```{r Glimpse} + +tt %>% + map(glimpse) + +``` + +# Wrangle + +Explore the data and process it into a nice format for plotting! Access each dataset by name by using a dollarsign after the `tt` object and then the name of the data set. + +```{r Wrangle} + + + +``` + + +# Visualize + +Using your processed dataset, create your unique visualization. + +```{r Visualize} + + + +``` + +# Save Image + +Save your image for sharing. Be sure to use the `#TidyTuesday` hashtag in your post on twitter! + +```{r} + +# This will save your most recent plot +ggsave( + filename = "My TidyTuesday Plot.png", + device = "png") + +``` diff --git a/man/TT_GITHUB_ENV.Rd b/man/TT_GITHUB_ENV.Rd deleted file mode 100644 index 77de3f5..0000000 --- a/man/TT_GITHUB_ENV.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/github_api.R -\docType{data} -\name{TT_GITHUB_ENV} -\alias{TT_GITHUB_ENV} -\title{Environment containing state of Github API limits} -\format{ -An object of class \code{environment} of length 3. -} -\usage{ -TT_GITHUB_ENV -} -\description{ -Environment containing state of Github API limits -} -\keyword{internal} diff --git a/man/last_tuesday.Rd b/man/last_tuesday.Rd new file mode 100644 index 0000000..9843835 --- /dev/null +++ b/man/last_tuesday.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/last_tuesday.R +\name{last_tuesday} +\alias{last_tuesday} +\title{Find the most recent tuesday} +\usage{ +last_tuesday(date = today(tzone = "America/New_York")) +} +\arguments{ +\item{date}{todays date as a date object or character string in +YYYY-MM-DD format.} +} +\description{ +Utility function to assist users in identifying the most recent 'tidytuesday' date +} +\examples{ + +last_tuesday() # get last tuesday from todays date +last_tuesday("2020-01-01") # get last tuesday from specified date + +} diff --git a/man/tt_date.Rd b/man/tt_date.Rd new file mode 100644 index 0000000..36d727c --- /dev/null +++ b/man/tt_date.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tt_check_date.R +\name{tt_date} +\alias{tt_date} +\title{Get date of Tidytuesday, given the year and week} +\usage{ +tt_date(year, week) +} +\arguments{ +\item{year}{what year of TidyTuesday to use} + +\item{week}{what week of Tidytuesday to use} +} +\description{ +Sometimes we don't know the date we want, but we do know the +week. this function provides the ability to pass the year and week we are +interested in to get the correct date +} +\examples{ +\donttest{ +if(interactive()){ + tt_date(2019, week = 42) + } +} +} diff --git a/man/use_tidytemplate.Rd b/man/use_tidytemplate.Rd new file mode 100644 index 0000000..1ee6fc8 --- /dev/null +++ b/man/use_tidytemplate.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/use_tidytemplate.R +\name{use_tidytemplate} +\alias{use_tidytemplate} +\title{Call and open the tidytemplate} +\usage{ +use_tidytemplate(name = NULL, open = interactive(), ..., refdate = today()) +} +\arguments{ +\item{name}{name of your tidytuesday analysis file} + +\item{open}{should the file be opened after being created} + +\item{...}{arguments to be passed to \link[usethis]{use_template}} + +\item{refdate}{date to use as reference to determine which 'tidytuesday' to +use for the template. Either date object or character string in +YYYY-MM-DD format.} +} +\description{ +Use the tidytemplate Rmd for starting your analysis with a +leg up for processing +} +\examples{ +\donttest{ +if(interactive()){ + use_tidytemplate(name = "My_Awesome_TidyTuesday.Rmd") +} +} + +} diff --git a/readme.md b/readme.md index ec51822..92cfee6 100644 --- a/readme.md +++ b/readme.md @@ -37,9 +37,9 @@ remotes::install_github("thebioengineer/tidytuesdayR") There are currently two methods to access the data from the respository. -### tt\_load() +### Load the Data! -The first and simplest way is to use the ‘tt\_load()’ function. This +The simplest way is to use the ‘tt\_load()’ function. This function has accepts two types of inputs to determine which data to grab. It can be a date as a string in the YYYY-MM-DD format like below. @@ -64,38 +64,9 @@ tt_data$agencies tt_data[["agencies"]] ``` -### tt\_load\_gh() and tt\_download\_file() - -The second method to access the data from the repository is to use the -combination of `tt_load_gh()` and `tt_download_file()` functions. -`tt_load_gh()` takes similar arguments as `tt_load()`, in that either -the date or a combination of year and week can be entered. - -``` r -tt <- tt_load_gh("2019-01-15") -``` - -The `tt` object lists the available files for download. To download the -data, use the `tt_download_file()` function. `tt_download_file()` expects the -first argument to be the `tt` object. The second argument can be a -string indicating the name of the file to download from the repository, -or the index in the `tt` object - -``` r -agencies <- tt %>% - tt_download_file("agencies.csv") - -# The first index of the tt object is `agencies.csv` -# agencies <- tt %>% -# tt_download_file(1) -``` - -## Tidy Tuesday Details - -The tt\_data and tt objects both have a function for showing the readme -for that week called `readme()`. In addition, the print methods for both -objects show the readme in a viewer and the available datasets in the -console. +To view the readme, either print the `tt_data` object or use the `readme()` +function. When you print the `tt_data` object, you also get the available +datasets names printed in the console. ``` r readme(tt_data) @@ -109,6 +80,37 @@ print(tt_data) ## ``` +### TidyTemplate + +As part of the goal of making partcipating in #TidyTuesday easier, {tidytuesdayR} now also provides a template! +To use it, just use the `use_tidytemplate()` function! + +By default, the template will assume to be using the most recent tidytuesday. +However, you can pass a date object or character string in YYYY-MM-DD format +defining a different date you want to use. If you don't recall the exact date, +no worries, you can use the `tt_date()` function to calculate and get the date +for you! + +```r +## this weeks TidyTuesday! +tidytuesdayR::use_tidytemplate() + +## TidyTuesday from Week 42 of 2019 +tidytuesdayR::use_tidytemplate(refdate = "2019-10-15") +tidytuesdayR::use_tidytemplate(refdate = tidytuesdayR::tt_date(2019, week = 42)) + +``` + +Additionally, by default the template will create the new file in your working +directory, using the "YYYY_MM_DD" format per good practices. +However, if you are so inclined, you can rename it to whatever you wish. + +```r +tidytuesdayR::use_tidytemplate(name = "My Super Great TidyTuesday.Rmd") +``` + + + ## Contributing Please note that the ‘tidytuesdayR’ project is released with a diff --git a/tests/testthat/test-11-last_tuesday.R b/tests/testthat/test-11-last_tuesday.R new file mode 100644 index 0000000..745fdfa --- /dev/null +++ b/tests/testthat/test-11-last_tuesday.R @@ -0,0 +1,46 @@ + +tt_ref_test_that("last_tuesday will give you the most recent tuesday", { + check_api() + + ## Look backwards to the last tt + date_1 <- as.Date("2020-01-01") + last_tuesday_1 <- last_tuesday(date_1) + + ## Look forwards to the "next" tt (they can be posted on mondays) + date_2 <- as.Date("2020-01-06") + last_tuesday_2 <- last_tuesday(date_2) + + ## day of returns same day + date_3 <- as.Date("2020-01-07") + last_tuesday_3 <- last_tuesday(date_2) + + expect_equal( + last_tuesday_1, + as.Date("2019-12-31") + ) + expect_equal( + last_tuesday_2, + as.Date("2020-01-07") + ) + expect_equal( + last_tuesday_3, + as.Date("2020-01-07") + ) +}) + + +tt_ref_test_that("tt_date will give you the date of the tuesday", { + check_api() + + ## Look backwards to the last tt + refdate1 <- tt_date(2018, week = 1) + refdate2 <- tt_date(2019, week = 1) + refdate3 <- tt_date(2020, week = 2) # no data available for week 1! + + expect_equal(refdate1, + as.Date("2018-04-02")) + expect_equal(refdate2, + as.Date("2019-01-01")) + expect_equal(refdate3, + as.Date("2020-01-07")) +}) From fb0e889249a64539d0cc032b2f1809f3e4780c54 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Thu, 9 Jul 2020 08:56:59 -0700 Subject: [PATCH 42/64] html encoding error (#62) * unpdate to allow for general encoding * Fix so only specific locale needed updating (CTYPE) --- R/github_api.R | 3 ++- R/tt_available.R | 6 ++--- tests/testthat/helper-tt_ref_test_that.R | 20 ++++++++++++++ tests/testthat/test-12-encoding.R | 33 ++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-12-encoding.R diff --git a/R/github_api.R b/R/github_api.R index 9d34f08..48ddd49 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -240,7 +240,8 @@ github_page <- function(page_content){ body <- page_content %>% html_nodes("body") %>% - as.character + as.character %>% + enc2native() read_html(paste0(header, body)) diff --git a/R/tt_available.R b/R/tt_available.R index 1919731..984ba17 100644 --- a/R/tt_available.R +++ b/R/tt_available.R @@ -213,9 +213,9 @@ make_tt_dataset_list_html <- function(x, file = tempfile(fileext = ".html")){ ) %>% paste(collapse = "") - readme <- readme %>% - paste("
", - paste("

TidyTuesday Datasets

",readme),"
") %>% + readme <- paste( + "
", + paste("

TidyTuesday Datasets

", readme),"
" ) %>% read_html() %>% github_page() diff --git a/tests/testthat/helper-tt_ref_test_that.R b/tests/testthat/helper-tt_ref_test_that.R index 1898396..dad008b 100644 --- a/tests/testthat/helper-tt_ref_test_that.R +++ b/tests/testthat/helper-tt_ref_test_that.R @@ -31,3 +31,23 @@ tt_no_internet_test_that <- function(desc, ...){ }) testthat::test_that(desc = desc, ...) } + +tt_ref_encoding <- function(desc, encoding, ...){ + ref_repo <- getOption("tidytuesdayR.tt_repo") + options("tidytuesdayR.tt_repo" = "thebioengineer/tt_ref") + + ref_local_ctype <- Sys.getlocale(category = "LC_CTYPE") + quiet <- capture.output({ + Sys.setlocale(category = "LC_CTYPE",locale = encoding) + }) + + on.exit({ + options("tidytuesdayR.tt_repo" = ref_repo) + Sys.setlocale("LC_CTYPE",ref_local_ctype) + }) + + if(get_connectivity()){ + testthat::test_that(desc = desc, ...) + } + +} diff --git a/tests/testthat/test-12-encoding.R b/tests/testthat/test-12-encoding.R new file mode 100644 index 0000000..3e26a04 --- /dev/null +++ b/tests/testthat/test-12-encoding.R @@ -0,0 +1,33 @@ + +tt_ref_encoding( + encoding = "Korean", + "Korean: Non-English encodings don't fail reading unicode from github", + { + check_api() + tt_data <- try(tt_datasets("2019"), silent = TRUE) + res <- nrow(data.frame(unclass(tt_data))) + expect_true(!inherits(tt_data, "try-error")) + expect_equal(res, 52) + }) + +tt_ref_encoding( + encoding = "Japanese", + "Japanese: Non-English encodings don't fail reading unicode from github", + { + check_api() + tt_data <- try(tt_datasets("2019"), silent = TRUE) + res <- nrow(data.frame(unclass(tt_data))) + expect_true(!inherits(tt_data, "try-error")) + expect_equal(res, 52) + }) + +tt_ref_encoding( + encoding = "Russian", + "Russian: Non-English encodings don't fail reading unicode from github", + { + check_api() + tt_data <- try(tt_datasets("2019"), silent = TRUE) + res <- nrow(data.frame(unclass(tt_data))) + expect_true(!inherits(tt_data, "try-error")) + expect_equal(res, 52) + }) From 80ba41e571a1057bf8ce5dd1b6e86da08256c9db Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Thu, 9 Jul 2020 09:23:04 -0700 Subject: [PATCH 43/64] Increment version number --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 294f3c7..ffdb530 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidytuesdayR Type: Package Title: Access the Weekly 'TidyTuesday' Project Dataset -Version: 1.0.0.9000 +Version: 1.0.1 Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index 83d04bc..3653c6a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# tidytuesdayR (development version) +# tidytuesdayR 1.0.1 * [feature] Provide a template Rmd for users, populated with date and proper `tt_load` call From 5327bbc5b4a6bfa5a24cf59824b51ec321a0c3e1 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Thu, 9 Jul 2020 13:03:15 -0700 Subject: [PATCH 44/64] update news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 3653c6a..2b6a47e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # tidytuesdayR 1.0.1 * [feature] Provide a template Rmd for users, populated with date and proper `tt_load` call +* [bug fix] On CRAN Solaris build, the :link:(link) emoji caused issues. Added fix to change encoding to native. +* [bug fix] `tt_available()` printed out twice. This has been corrected. # tidytuesdayR 1.0.0 From 8fdfc6d28a3f9908537009581aaa2a2cb89b6ac4 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Thu, 9 Jul 2020 17:46:00 -0700 Subject: [PATCH 45/64] add CRAN comments --- DESCRIPTION | 2 +- cran-comments.md | 21 ++++++++------------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ffdb530..02d8cbc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,7 +39,7 @@ URL: https://github.com/thebioengineer/tidytuesdayR BugReports: https://github.com/thebioengineer/tidytuesdayR/issues Encoding: UTF-8 LazyData: true -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 Depends: R (>= 3.4.0) Suggests: testthat (>= 2.1.0), diff --git a/cran-comments.md b/cran-comments.md index 926e0bc..85aa674 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,22 +1,17 @@ -## Resubmission -This is a resubmission. In this version I have: +## Release summary -* I have wrapped all non-essential functions in a "\donttest" instead of "\dontrun" +* Update to address failing solaris build on CRAN, with some additional features -* Functions will now return a message if there is no internet connectivity or no more API calls available rather than erroring with an informative message +* There are no reverse dependencies to check at this time ## Test environments -* local R installation, R 4.0.0 -* ubuntu 16.04 (on github actions), R 4.0.0 -* mac OS 10.15.4 (on github actions) R-devel, R 4.0.0, +* local R installation, R 4.0.2 +* ubuntu 16.04 (on github actions), , R 4.0.2 +* mac OS 10.15.4 (on github actions) R-devel, R 4.0.2, * win-builder (devel) ## R CMD check results -0 errors | 0 warnings | 1 note +0 errors | 0 warnings | 0 note -* This is a new release. - -## Downstream dependencies - -There are currently no downstream dependencies on this package as it is the first CRAN submission +R CMD check succeeded From 042d9a2d326c70b662309eeb41eb4680f68f776d Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Thu, 9 Jul 2020 17:52:43 -0700 Subject: [PATCH 46/64] spelling --- R/last_tuesday.R | 2 +- R/tt_available.R | 2 +- R/tt_check_date.R | 2 +- R/tt_compile.R | 4 ++-- R/tt_load.R | 2 +- R/tt_load_gh.R | 4 ++-- R/tt_master_file.R | 2 +- R/use_tidytemplate.R | 4 ++-- R/utils.R | 6 +++--- man/available.Rd | 2 +- man/last_tuesday.Rd | 2 +- man/printing.Rd | 4 ++-- man/readme.Rd | 2 +- man/tt_date.Rd | 2 +- man/tt_load.Rd | 2 +- man/tt_load_gh.Rd | 4 ++-- man/use_tidytemplate.Rd | 4 ++-- readme.md | 4 ++-- 18 files changed, 27 insertions(+), 27 deletions(-) diff --git a/R/last_tuesday.R b/R/last_tuesday.R index fdc95de..af915f6 100644 --- a/R/last_tuesday.R +++ b/R/last_tuesday.R @@ -1,6 +1,6 @@ #' Find the most recent tuesday #' -#' Utility function to assist users in identifying the most recent 'tidytuesday' date +#' Utility function to assist users in identifying the most recent 'TidyTuesday' date #' #' @param date todays date as a date object or character string in #' YYYY-MM-DD format. diff --git a/R/tt_available.R b/R/tt_available.R index 984ba17..ed415fd 100644 --- a/R/tt_available.R +++ b/R/tt_available.R @@ -28,7 +28,7 @@ #' #' @name available #' -#' @param year numeric entry representing the year of tidytuesday you want the +#' @param year numeric entry representing the year of TidyTuesday you want the #' list of datasets for. Leave empty for most recent year. #' @param auth github Personal Access Token. See PAT section for #' more information diff --git a/R/tt_check_date.R b/R/tt_check_date.R index 1a26af5..8c8be31 100644 --- a/R/tt_check_date.R +++ b/R/tt_check_date.R @@ -1,4 +1,4 @@ -#' @title Get date of Tidytuesday, given the year and week +#' @title Get date of TidyTuesday, given the year and week #' @description Sometimes we don't know the date we want, but we do know the #' week. this function provides the ability to pass the year and week we are #' interested in to get the correct date diff --git a/R/tt_compile.R b/R/tt_compile.R index e56146a..b596c30 100644 --- a/R/tt_compile.R +++ b/R/tt_compile.R @@ -1,5 +1,5 @@ -#' @title Get TidyTuesday Readme and list of files and HTML based on the date -#' @param date date of tidytuesday of interest +#' @title Get TidyTuesday readme and list of files and HTML based on the date +#' @param date date of TidyTuesday of interest #' @param auth github Personal Access Token #' #' @importFrom lubridate year diff --git a/R/tt_load.R b/R/tt_load.R index f5d7154..63cb4a0 100644 --- a/R/tt_load.R +++ b/R/tt_load.R @@ -18,7 +18,7 @@ #' to set the PAT. #' #' @return tt_data object, which contains data that can be accessed via `$`, -#' and the readme for the weeks tidytuesday through printing the object or +#' and the readme for the weeks TidyTuesday through printing the object or #' calling `readme()` #' #' @importFrom purrr map diff --git a/R/tt_load_gh.R b/R/tt_load_gh.R index 0cc7886..a0f7ca1 100644 --- a/R/tt_load_gh.R +++ b/R/tt_load_gh.R @@ -1,6 +1,6 @@ #' @title Load TidyTuesday data from Github #' -#' @description Pulls the Readme and URLs of the data from the TidyTuesday +#' @description Pulls the readme and URLs of the data from the TidyTuesday #' github folder based on the date provided #' #' @param x string representation of the date of data to pull, in @@ -18,7 +18,7 @@ #' to set the PAT. #' #' @return a 'tt' object. This contains the files available for the week, -#' readme html, and the date of the tidytuesday. +#' readme html, and the date of the TidyTuesday. #' @export #' @examples #' diff --git a/R/tt_master_file.R b/R/tt_master_file.R index 270ae77..84c5376 100644 --- a/R/tt_master_file.R +++ b/R/tt_master_file.R @@ -1,7 +1,7 @@ #' Get Master List of Files from TidyTuesday #' #' Import or update dataset from github that records the entire list of objects -#' from tidytuesday +#' from TidyTuesday #' #' @param force force the update to occur even if the SHA matches #' @param auth github Personal Access Token. diff --git a/R/use_tidytemplate.R b/R/use_tidytemplate.R index acf9ae2..ed40129 100644 --- a/R/use_tidytemplate.R +++ b/R/use_tidytemplate.R @@ -1,10 +1,10 @@ #' @title Call and open the tidytemplate #' @description Use the tidytemplate Rmd for starting your analysis with a #' leg up for processing -#' @param name name of your tidytuesday analysis file +#' @param name name of your TidyTuesday analysis file #' @param open should the file be opened after being created #' @param ... arguments to be passed to \link[usethis]{use_template} -#' @param refdate date to use as reference to determine which 'tidytuesday' to +#' @param refdate date to use as reference to determine which TidyTuesday to #' use for the template. Either date object or character string in #' YYYY-MM-DD format. #' @importFrom usethis use_template diff --git a/R/utils.R b/R/utils.R index 996c5f4..7f36ac0 100644 --- a/R/utils.R +++ b/R/utils.R @@ -3,7 +3,7 @@ #' In tidytuesdayR there are nice print methods for the objects that were used #' to download and store the data from the TidyTuesday repo. They will always #' print the available datasets/files. If there is a readme available, -#' it will try to display the tidytuesday readme. +#' it will try to display the TidyTuesday readme. #' #' @name printing #' @@ -38,7 +38,7 @@ print.tt_data <- function(x, ...) { #' @rdname printing #' @importFrom tools file_path_sans_ext #' @export -#' @return used to show available datasets for the tidytuesday +#' @return used to show available datasets for the TidyTuesday #' print.tt <- function(x,...){ message( @@ -54,7 +54,7 @@ print.tt <- function(x,...){ #' @return NULL #' @export #' @return Does not return anything. Used to show readme of the downloaded -#' tidytuesday dataset in the Viewer. +#' TidyTuesday dataset in the Viewer. #' @examples #' \donttest{ #' tt_output <- tt_load_gh("2019-01-15") diff --git a/man/available.Rd b/man/available.Rd index 1bffecf..f2304db 100644 --- a/man/available.Rd +++ b/man/available.Rd @@ -14,7 +14,7 @@ tt_datasets(year, auth = github_pat()) \item{auth}{github Personal Access Token. See PAT section for more information} -\item{year}{numeric entry representing the year of tidytuesday you want the +\item{year}{numeric entry representing the year of TidyTuesday you want the list of datasets for. Leave empty for most recent year.} } \value{ diff --git a/man/last_tuesday.Rd b/man/last_tuesday.Rd index 9843835..045b59e 100644 --- a/man/last_tuesday.Rd +++ b/man/last_tuesday.Rd @@ -11,7 +11,7 @@ last_tuesday(date = today(tzone = "America/New_York")) YYYY-MM-DD format.} } \description{ -Utility function to assist users in identifying the most recent 'tidytuesday' date +Utility function to assist users in identifying the most recent 'TidyTuesday' date } \examples{ diff --git a/man/printing.Rd b/man/printing.Rd index 943b1a0..9ab17ff 100644 --- a/man/printing.Rd +++ b/man/printing.Rd @@ -18,13 +18,13 @@ \value{ used to show readme and list names of available datasets -used to show available datasets for the tidytuesday +used to show available datasets for the TidyTuesday } \description{ In tidytuesdayR there are nice print methods for the objects that were used to download and store the data from the TidyTuesday repo. They will always print the available datasets/files. If there is a readme available, - it will try to display the tidytuesday readme. + it will try to display the TidyTuesday readme. } \examples{ diff --git a/man/readme.Rd b/man/readme.Rd index a835bd3..95a0fb5 100644 --- a/man/readme.Rd +++ b/man/readme.Rd @@ -11,7 +11,7 @@ readme(tt) } \value{ Does not return anything. Used to show readme of the downloaded - tidytuesday dataset in the Viewer. + TidyTuesday dataset in the Viewer. } \description{ Readme HTML maker and Viewer diff --git a/man/tt_date.Rd b/man/tt_date.Rd index 36d727c..d6f1b0b 100644 --- a/man/tt_date.Rd +++ b/man/tt_date.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tt_check_date.R \name{tt_date} \alias{tt_date} -\title{Get date of Tidytuesday, given the year and week} +\title{Get date of TidyTuesday, given the year and week} \usage{ tt_date(year, week) } diff --git a/man/tt_load.Rd b/man/tt_load.Rd index 1848496..59d0522 100644 --- a/man/tt_load.Rd +++ b/man/tt_load.Rd @@ -24,7 +24,7 @@ information} } \value{ tt_data object, which contains data that can be accessed via `$`, - and the readme for the weeks tidytuesday through printing the object or + and the readme for the weeks TidyTuesday through printing the object or calling `readme()` } \description{ diff --git a/man/tt_load_gh.Rd b/man/tt_load_gh.Rd index 97e7c1a..e14541d 100644 --- a/man/tt_load_gh.Rd +++ b/man/tt_load_gh.Rd @@ -18,10 +18,10 @@ information} } \value{ a 'tt' object. This contains the files available for the week, - readme html, and the date of the tidytuesday. + readme html, and the date of the TidyTuesday. } \description{ -Pulls the Readme and URLs of the data from the TidyTuesday +Pulls the readme and URLs of the data from the TidyTuesday github folder based on the date provided } \section{PAT}{ diff --git a/man/use_tidytemplate.Rd b/man/use_tidytemplate.Rd index 1ee6fc8..f6bfc43 100644 --- a/man/use_tidytemplate.Rd +++ b/man/use_tidytemplate.Rd @@ -7,13 +7,13 @@ use_tidytemplate(name = NULL, open = interactive(), ..., refdate = today()) } \arguments{ -\item{name}{name of your tidytuesday analysis file} +\item{name}{name of your TidyTuesday analysis file} \item{open}{should the file be opened after being created} \item{...}{arguments to be passed to \link[usethis]{use_template}} -\item{refdate}{date to use as reference to determine which 'tidytuesday' to +\item{refdate}{date to use as reference to determine which TidyTuesday to use for the template. Either date object or character string in YYYY-MM-DD format.} } diff --git a/readme.md b/readme.md index 92cfee6..5383c9f 100644 --- a/readme.md +++ b/readme.md @@ -82,10 +82,10 @@ print(tt_data) ### TidyTemplate -As part of the goal of making partcipating in #TidyTuesday easier, {tidytuesdayR} now also provides a template! +As part of the goal of making participating in #TidyTuesday easier, {tidytuesdayR} now also provides a template! To use it, just use the `use_tidytemplate()` function! -By default, the template will assume to be using the most recent tidytuesday. +By default, the template will assume to be using the most recent TidyTuesday. However, you can pass a date object or character string in YYYY-MM-DD format defining a different date you want to use. If you don't recall the exact date, no worries, you can use the `tt_date()` function to calculate and get the date From 54c945eb067aa24ec66914d7be508a32d2a12354 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Thu, 9 Jul 2020 23:08:51 -0700 Subject: [PATCH 47/64] cran release --- CRAN-RELEASE | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CRAN-RELEASE b/CRAN-RELEASE index ad5feef..c8019de 100644 --- a/CRAN-RELEASE +++ b/CRAN-RELEASE @@ -1,2 +1,2 @@ -This package was submitted to CRAN on 2020-06-25. -Once it is accepted, delete this file and tag the release (commit e11af077e1). +This package was submitted to CRAN on 2020-07-09. +Once it is accepted, delete this file and tag the release (commit 042d9a2d32). From 996f3c59a58d69ed386f49f258522dcd534f23d8 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Tue, 21 Jul 2020 22:27:39 -0700 Subject: [PATCH 48/64] update func to pass args to parsing funcs --- R/tt_download_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tt_download_file.R b/R/tt_download_file.R index 12b9ade..c9dd3c0 100644 --- a/R/tt_download_file.R +++ b/R/tt_download_file.R @@ -80,7 +80,7 @@ tt_download_file.character <- auth = auth ) - tt_parse_blob(blob, file_info = file_info[file_info$data_file == x,]) + tt_parse_blob(blob, file_info = file_info[file_info$data_file == x,], ...) } else { stop(paste0( From 4d6d6086ea09dc519bd7a0b1f752dbf6551efd47 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Tue, 21 Jul 2020 22:29:09 -0700 Subject: [PATCH 49/64] Increment version number --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 02d8cbc..f41ba40 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidytuesdayR Type: Package Title: Access the Weekly 'TidyTuesday' Project Dataset -Version: 1.0.1 +Version: 1.0.1.9000 Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index 2b6a47e..ec61288 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# tidytuesdayR (development version) + # tidytuesdayR 1.0.1 * [feature] Provide a template Rmd for users, populated with date and proper `tt_load` call From 8d9d7dad8e2eca13669feae0ff50294917ea62a6 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Mon, 27 Jul 2020 10:57:33 -0700 Subject: [PATCH 50/64] Update github_api.R (#67) malformed check against github api, for some reason worked everywhere but rstudio cloud? #65 --- R/github_api.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/github_api.R b/R/github_api.R index 48ddd49..6a9c733 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -492,7 +492,7 @@ check_connectivity <- function(rerun = FALSE){ #if internet connection is not set or is false, lets try again if(!getOption("tidytuesdayR.tt_testing", FALSE)){ if(is.na(internet_connection) | !internet_connection | rerun){ - res <- try(GET("https:/api.github.com"), silent = TRUE) + res <- try(GET("https://api.github.com"), silent = TRUE) if(inherits(res,"try-error")){ options("tidytuesdayR.tt_internet_connectivity" = FALSE) }else{ From 3b0cb1fe6fdf8b12a60a105a3fba08169df9c849 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Wed, 29 Jul 2020 11:41:05 -0700 Subject: [PATCH 51/64] Documentation correction (#70) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update to make missing delim acceptable (#66) * Update to make missing delim acceptable * Update R-CMD-check.yaml * Update R-CMD-check.yaml undo updates 🙄 * Update tt_download.R Correct documentation * update docs * fix order of link and code --- R/tt_download.R | 3 +-- R/tt_parse.R | 7 ++++++- man/tt_download.Rd | 3 +-- tests/testthat/test-06-tt_parse_blob.R | 12 +++++++++++- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/R/tt_download.R b/R/tt_download.R index d479913..5c18d98 100644 --- a/R/tt_download.R +++ b/R/tt_download.R @@ -2,8 +2,7 @@ #' #' Download all or specific files identified in the tt dataset #' -#' @param tt string representation of the date of data to pull, in YYYY-MM-dd -#' format, or just numeric entry for year +#' @param tt a `tt` object, output from \code{\link{tt_load_gh}} #' @param files List the file names to download. Default to asking. #' @param ... pass methods to the parsing functions. These will be passed to #' ALL files, so be careful. diff --git a/R/tt_parse.R b/R/tt_parse.R index 0ec7ba7..ca7b230 100644 --- a/R/tt_parse.R +++ b/R/tt_parse.R @@ -21,7 +21,12 @@ tt_parse_blob <- function(blob, ..., file_info) { tt_parse_text( blob = blob, func = readr::read_delim, - delim = file_info[["delim"]], + delim = ifelse(is.na(file_info[["delim"]]), + switch(tolower(file_info$data_type), + csv = ",", + tsv = "\t", + ","), + file_info[["delim"]]), progress = FALSE, ... ) diff --git a/man/tt_download.Rd b/man/tt_download.Rd index 02dbefc..d48bc74 100644 --- a/man/tt_download.Rd +++ b/man/tt_download.Rd @@ -9,8 +9,7 @@ Download all or specific files identified in the tt dataset} tt_download(tt, files = c("All"), ..., branch = "master", auth = github_pat()) } \arguments{ -\item{tt}{string representation of the date of data to pull, in YYYY-MM-dd -format, or just numeric entry for year} +\item{tt}{a `tt` object, output from \code{\link{tt_load_gh}}} \item{files}{List the file names to download. Default to asking.} diff --git a/tests/testthat/test-06-tt_parse_blob.R b/tests/testthat/test-06-tt_parse_blob.R index eed5fc2..de2a46e 100644 --- a/tests/testthat/test-06-tt_parse_blob.R +++ b/tests/testthat/test-06-tt_parse_blob.R @@ -95,12 +95,22 @@ test_that("`tt_parse_blob` can figure out how to handle text or raw",{ delim = "") ) - + result_text_guess <- + tt_parse_blob( + blob = "col1,col2\nval1,val2\nval3,val4", + file_info = data.frame( + data_file = "text.csv", + data_type = "csv", + delim = NA, + stringsAsFactors = FALSE + ) + ) expect_equivalent(result_text_comma,expected_text) expect_equivalent(result_text_tab,expected_text) expect_equivalent(result_text_special,expected_text) expect_equivalent(result_raw_rda,"RAW VALUE") + expect_equivalent(result_text_guess,expected_text) }) From ea803e0550593635296e6a4b994558a65d7e3ba1 Mon Sep 17 00:00:00 2001 From: Frie Date: Tue, 27 Oct 2020 14:12:41 +0100 Subject: [PATCH 52/64] very small typo --- inst/templates/tidytemplate.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/templates/tidytemplate.Rmd b/inst/templates/tidytemplate.Rmd index 36baeeb..844a6d1 100644 --- a/inst/templates/tidytemplate.Rmd +++ b/inst/templates/tidytemplate.Rmd @@ -23,7 +23,7 @@ library(tidytuesdayR) # Load the weekly Data -Dowload the weekly data and make available in the `tt` object. +Download the weekly data and make available in the `tt` object. ```{r Load} From a00dfc752fc8460d2f4012930ad0b9e70ae00503 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Mon, 4 Jan 2021 11:56:49 -0800 Subject: [PATCH 53/64] updates to correct when there are missing weeks in the dataset --- R/tt_check_date.R | 20 +++++------ R/utils.R | 53 +++++++++++++++++++++++++++++ tests/testthat/test-05-tt_load_gh.R | 8 ++--- tests/testthat/test-10-utils.R | 25 ++++++++++++++ 4 files changed, 90 insertions(+), 16 deletions(-) diff --git a/R/tt_check_date.R b/R/tt_check_date.R index 8c8be31..476d232 100644 --- a/R/tt_check_date.R +++ b/R/tt_check_date.R @@ -51,23 +51,18 @@ tt_check_date.year <- function(x, week) { tt_folders <- tt_weeks(x) - if (week > length(tt_folders$week_desc)) { + if (!week %in% tt_folders$week_desc & week > 1) { stop( paste0( - "Only ", - length(tt_folders$week_desc), - " TidyTuesday Weeks exist in ", - x, - ". Please enter a value for week between 1 and ", - length(tt_folders$week_desc) + "'",week,"' is not a valid TidyTuesday week entry for ",x,".\n", + "Please enter a valid value for week:\n\t", + contiguous_weeks(tt_folders$week_desc) ) ) } else if (week < 1) { - stop(paste0( - "Week entry must be a valid positive integer between 1 and ", - length(tt_folders$week_desc), - "." - )) + stop( + "Week entry must be a valid positive integer value." + ) } tt_date <- tt_folders$folders[tt_folders$week_desc == week] @@ -156,3 +151,4 @@ tt_closest_date <- function(inputdate, availabledates) { availabledates[ which.min(abs(difftime(inputdate, availabledates, units = "days")))] } + diff --git a/R/utils.R b/R/utils.R index 7f36ac0..6f4df7a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -85,3 +85,56 @@ html_viewer <- function(url, is_interactive = interactive()){ browseURL(url = url) } } + +#' @noRd +contiguous_weeks <- function(week_vctr){ + if(length(week_vctr) == 1){ + text_out <- as.character(week_vctr) + }else{ + is_not_contig <- which(diff(week_vctr) != 1) + if(length(is_not_contig) == 0){ + text_out <- paste0(week_vctr[1], "-",week_vctr[length(week_vctr)]) + }else{ + if(is_not_contig[[1]] == 1){ + text_out <- as.character(week_vctr[1]) + }else{ + text_out <- paste0(week_vctr[1], "-",week_vctr[is_not_contig[[1]]]) + } + contig_split <- 1 + while(contig_split < length(is_not_contig)){ + + if( diff(c(is_not_contig[contig_split], is_not_contig[contig_split+1])) == 1){ + text_out <- paste0( + text_out, ", ", week_vctr[is_not_contig[contig_split]+1] + ) + }else{ + text_out <- paste0( + text_out,", ", paste0(week_vctr[is_not_contig[contig_split]+1], "-",week_vctr[is_not_contig[contig_split+1]]) + ) + } + contig_split %+=% 1 + } + + if(length(week_vctr) == (is_not_contig[contig_split]+1)){ + text_out <- paste0( + text_out, ", ", week_vctr[length(week_vctr)] + ) + }else{ + text_out <- paste0( + text_out,", ", paste0(week_vctr[is_not_contig[contig_split]+1], "-",week_vctr[length(week_vctr)]) + ) + } + }} + return(text_out) +} + +`%+=%` <- function(x,y, env = parent.frame()){ + x_name <- as.character(substitute(x)) + x_new <- x + y + assign( + x = x_name, + value = x_new, + envir = env + ) +} + diff --git a/tests/testthat/test-05-tt_load_gh.R b/tests/testthat/test-05-tt_load_gh.R index 0141613..3a07833 100644 --- a/tests/testthat/test-05-tt_load_gh.R +++ b/tests/testthat/test-05-tt_load_gh.R @@ -45,7 +45,7 @@ tt_ref_test_that( check_api() testthat::expect_error(tt_load_gh(2018, 92), - "Please enter a value for week between 1") + "'92' is not a valid TidyTuesday week entry for") testthat::expect_error(tt_load_gh(2017, 92), "TidyTuesday did not exist for") }) @@ -66,7 +66,7 @@ tt_ref_test_that( check_api() expect_error( tt_load_gh(2018, 92), - "Please enter a value for week between 1" + "'92' is not a valid TidyTuesday week entry for" ) expect_error( tt_load_gh(2017, 92), @@ -80,7 +80,7 @@ tt_ref_test_that( check_api() expect_error( tt_load_gh(2018, 92), - "Please enter a value for week between 1" + "'92' is not a valid TidyTuesday week entry for" ) expect_error( tt_load_gh(2017, 92), @@ -103,7 +103,7 @@ tt_ref_test_that( check_api() testthat::expect_error( tt_load_gh(2019, 0), - "Week entry must be a valid positive integer" + "Week entry must be a valid positive integer value." ) }) diff --git a/tests/testthat/test-10-utils.R b/tests/testthat/test-10-utils.R index 8a9f217..041827d 100644 --- a/tests/testthat/test-10-utils.R +++ b/tests/testthat/test-10-utils.R @@ -91,3 +91,28 @@ test_that("readme() will attempt to display the contents of the readme attribute ) }) + + +test_that("contiguous_weeks() will attempt to display the contiguous weeks, collapsing continuous weeks",{ + + expect_equal(contiguous_weeks(1),"1") + + expect_equal(contiguous_weeks(c(1,2)),"1-2") + + expect_equal(contiguous_weeks(c(1:5)),"1-5") + + expect_equal(contiguous_weeks(c(1:5, 7)),"1-5, 7") + + expect_equal(contiguous_weeks(c(1:5, 7, 8)),"1-5, 7-8") + + expect_equal(contiguous_weeks(c(1:5, 7, 9)),"1-5, 7, 9") + + expect_equal(contiguous_weeks(c(1:5, 7, 9:11)),"1-5, 7, 9-11") + + expect_equal(contiguous_weeks(c(1:5, 7, 9:11, 15)),"1-5, 7, 9-11, 15") + + expect_equal(contiguous_weeks(c(5, 7, 9:11, 15)),"5, 7, 9-11, 15") + + expect_equal(contiguous_weeks(c(5, 7, 9:11, 15:100)),"5, 7, 9-11, 15-100") + +}) From 9949c2b285baa7e12b439fab7cf46fec98733571 Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Mon, 4 Jan 2021 13:48:55 -0800 Subject: [PATCH 54/64] update github actions --- .github/workflows/R-CMD-check.yaml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index a7bf76e..73f3046 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -20,10 +20,14 @@ jobs: matrix: config: - {os: windows-latest, r: 'release'} + - {os: windows-latest, r: '3.6'} - {os: macOS-latest, r: 'release'} - - {os: macOS-latest, r: 'devel'} - - {os: ubuntu-16.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - + - {os: ubuntu-16.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest", http-user-agent: "R/4.0.0 (ubuntu-16.04) R (4.0.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" } + - {os: ubuntu-16.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + - {os: ubuntu-16.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + - {os: ubuntu-16.04, r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + - {os: ubuntu-16.04, r: '3.4', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + - {os: ubuntu-16.04, r: '3.3', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} From 2aa63455331ab66c952f3cf0f28bd711a1922c9e Mon Sep 17 00:00:00 2001 From: Ellis Hughes Date: Mon, 4 Jan 2021 14:15:02 -0800 Subject: [PATCH 55/64] remove Rv3.3 matrix from github action --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 73f3046..c1fcb28 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -27,7 +27,7 @@ jobs: - {os: ubuntu-16.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - {os: ubuntu-16.04, r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - {os: ubuntu-16.04, r: '3.4', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: '3.3', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true RSPM: ${{ matrix.config.rspm }} From a45c190494bfade848098d26131a1f8844e1b2a5 Mon Sep 17 00:00:00 2001 From: Enrico Spinielli Date: Sat, 27 Mar 2021 16:59:28 +0100 Subject: [PATCH 56/64] output needs to be html_document --- inst/templates/tidytemplate.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/templates/tidytemplate.Rmd b/inst/templates/tidytemplate.Rmd index 36baeeb..772dc92 100644 --- a/inst/templates/tidytemplate.Rmd +++ b/inst/templates/tidytemplate.Rmd @@ -1,7 +1,7 @@ --- title: "TidyTemplate" date: {{{call_date}}} -output: html_output +output: html_document --- # TidyTuesday From e150dffc0520e1f4ebd435977afa6e7b3ac41f7a Mon Sep 17 00:00:00 2001 From: Maximilian Girlich Date: Thu, 29 Apr 2021 06:23:57 +0000 Subject: [PATCH 57/64] fix partial matches --- R/tt_download_file.R | 4 ++-- R/tt_parse.R | 6 +++--- tests/testthat/test-06-tt_parse_blob.R | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/tt_download_file.R b/R/tt_download_file.R index c9dd3c0..210f25a 100644 --- a/R/tt_download_file.R +++ b/R/tt_download_file.R @@ -67,7 +67,7 @@ tt_download_file.character <- file_info <- attr(tt, ".files") - if (x %in% file_info$data_file) { + if (x %in% file_info$data_files) { tt_date <- attr(tt, ".date") tt_year <- year(tt_date) @@ -80,7 +80,7 @@ tt_download_file.character <- auth = auth ) - tt_parse_blob(blob, file_info = file_info[file_info$data_file == x,], ...) + tt_parse_blob(blob, file_info = file_info[file_info$data_files == x,], ...) } else { stop(paste0( diff --git a/R/tt_parse.R b/R/tt_parse.R index ca7b230..482ef22 100644 --- a/R/tt_parse.R +++ b/R/tt_parse.R @@ -15,9 +15,9 @@ tt_parse_blob <- function(blob, ..., file_info) { "xls" = tt_parse_binary(blob, readxl::read_xls, ..., filename = file_info$data_files), "xlsx" = tt_parse_binary(blob, readxl::read_xlsx, ..., - filename = file_info$data_file), + filename = file_info$data_files), "rds" = tt_parse_binary(blob, readRDS, - filename = file_info$data_file), + filename = file_info$data_files), tt_parse_text( blob = blob, func = readr::read_delim, @@ -34,7 +34,7 @@ tt_parse_blob <- function(blob, ..., file_info) { } # rda option just in case -# "rda" = tt_parse_binary(blob, read_rda, filename = file_info$data_file), +# "rda" = tt_parse_binary(blob, read_rda, filename = file_info$data_files), #' @title utility to assist with parsing the raw binary data diff --git a/tests/testthat/test-06-tt_parse_blob.R b/tests/testthat/test-06-tt_parse_blob.R index de2a46e..9fdb065 100644 --- a/tests/testthat/test-06-tt_parse_blob.R +++ b/tests/testthat/test-06-tt_parse_blob.R @@ -61,7 +61,7 @@ test_that("`tt_parse_blob` can figure out how to handle text or raw",{ tt_parse_blob( blob = "col1,col2\nval1,val2\nval3,val4", file_info = data.frame( - data_file = "text.txt", + data_files = "text.txt", data_type = "txt", delim = ",", stringsAsFactors = FALSE @@ -72,7 +72,7 @@ test_that("`tt_parse_blob` can figure out how to handle text or raw",{ tt_parse_blob( "col1\tcol2\nval1\tval2\nval3\tval4", file_info = data.frame( - data_file = "text.txt", + data_files = "text.txt", data_type = "txt", delim = "\t", stringsAsFactors = FALSE @@ -82,7 +82,7 @@ test_that("`tt_parse_blob` can figure out how to handle text or raw",{ result_text_special <- tt_parse_blob( "col1|col2\nval1|val2\nval3|val4", - file_info = data.frame(data_file = "text.txt", + file_info = data.frame(data_files = "text.txt", data_type = "txt", delim = "|", stringsAsFactors = FALSE) ) @@ -90,7 +90,7 @@ test_that("`tt_parse_blob` can figure out how to handle text or raw",{ result_raw_rda <- tt_parse_blob( input_raw, - file_info = data.frame(data_file = "test_rds.rds", + file_info = data.frame(data_files = "test_rds.rds", data_type = "rds", delim = "") ) @@ -99,7 +99,7 @@ test_that("`tt_parse_blob` can figure out how to handle text or raw",{ tt_parse_blob( blob = "col1,col2\nval1,val2\nval3,val4", file_info = data.frame( - data_file = "text.csv", + data_files = "text.csv", data_type = "csv", delim = NA, stringsAsFactors = FALSE From 7f7d41c046542299028c7a54bad713676901fd96 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 23 Jan 2022 11:48:22 -0800 Subject: [PATCH 58/64] Add 502 rerun error and update test to look at different RDS --- R/github_api.R | 24 ++++++++++++++++++------ tests/testthat/test-07-tt_read_data.R | 3 ++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/R/github_api.R b/R/github_api.R index 6a9c733..60bca38 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -299,7 +299,7 @@ github_pat <- function (quiet = TRUE) { #' #' @importFrom httr GET add_headers #' @importFrom jsonlite base64_enc -github_GET <- function(url, auth = github_pat(), ...){ +github_GET <- function(url, auth = github_pat(), ..., times_run = 1){ if(!is.null(auth)){ headers <- add_headers( @@ -328,15 +328,24 @@ github_GET <- function(url, auth = github_pat(), ...){ if(inherits(get_res,"try-error")){ check_connectivity(rerun=TRUE) - if(!check_connectivity()){ + if(!get_connectivity()){ return(no_internet_error()) }else{ ## Unexpected issue stop(attr(get_res,"condition")) } }else{ - rate_limit_update(header_to_rate_info(get_res)) - return(get_res) + if(get_res$status_code == 502){ + ## rerun when 502 status code - server error, not tidytuesdayR code error + if(times_run < 5){ + github_GET(url, auth = github_pat(), ..., times_run = times_run + 1) + }else{ + tt_gh_error.response(get_res) + } + }else{ + rate_limit_update(header_to_rate_info(get_res)) + return(get_res) + } } }else{ rate_limit_error() @@ -397,8 +406,11 @@ rate_limit_update <- function(rate_info = NULL, auth = github_pat()){ } else { rate_lim <- GET("https://api.github.com/rate_limit") } - rate_info <- GET_json(rate_lim)$rate - rate_info$remaining = rate_info$remaining - 1 # we have one less than we think + + if(rate_lim$status_code == 200){ + rate_info <- GET_json(rate_lim)$rate + rate_info$remaining = rate_info$remaining - 1 # we have one less than we think + } } } diff --git a/tests/testthat/test-07-tt_read_data.R b/tests/testthat/test-07-tt_read_data.R index c26f8b3..a741c9f 100644 --- a/tests/testthat/test-07-tt_read_data.R +++ b/tests/testthat/test-07-tt_read_data.R @@ -58,10 +58,11 @@ tt_ref_test_that( tt_ref_test_that( "tt_read_data can load RDS files just as easily as text files",{ check_api() + skip_on_cran() tt_gh_data <- tt_load_gh("2019-01-01") expect_is( - tt_download_file(tt_gh_data, 1), + tt_download_file(tt_gh_data, 2), c("tbl_df","tbl","data.frame") ) From 289d6d9b5714045e5f3e39139dbc3914c0fbfeeb Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 23 Jan 2022 17:07:09 -0800 Subject: [PATCH 59/64] update ubuntu runners to 18.04 --- .github/workflows/R-CMD-check.yaml | 11 ++++++----- tests/testthat/test-07-tt_read_data.R | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index c1fcb28..301471a 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,11 +22,12 @@ jobs: - {os: windows-latest, r: 'release'} - {os: windows-latest, r: '3.6'} - {os: macOS-latest, r: 'release'} - - {os: ubuntu-16.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest", http-user-agent: "R/4.0.0 (ubuntu-16.04) R (4.0.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" } - - {os: ubuntu-16.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: 'oldrel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: '3.5', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: '3.4', rspm: "https://packagemanager.rstudio.com/cran/__linux__/xenial/latest"} + - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-18.04, r: 'release'} + - {os: ubuntu-18.04, r: 'oldrel-1'} + - {os: ubuntu-18.04, r: 'oldrel-2'} + - {os: ubuntu-18.04, r: 'oldrel-3'} + - {os: ubuntu-18.04, r: 'oldrel-4'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true diff --git a/tests/testthat/test-07-tt_read_data.R b/tests/testthat/test-07-tt_read_data.R index a741c9f..a0492fe 100644 --- a/tests/testthat/test-07-tt_read_data.R +++ b/tests/testthat/test-07-tt_read_data.R @@ -59,6 +59,7 @@ tt_ref_test_that( "tt_read_data can load RDS files just as easily as text files",{ check_api() skip_on_cran() + tt_gh_data <- tt_load_gh("2019-01-01") expect_is( From 13728dd6a358c575d49bbdb67d51b9a312edf918 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 30 Jan 2022 10:41:53 -0800 Subject: [PATCH 60/64] limit 502 reruns and add rate limit check. added skipping if not able to find github --- R/github_api.R | 8 ++++++-- tests/testthat/helper-tt_ref_test_that.R | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/github_api.R b/R/github_api.R index 60bca38..d22887a 100644 --- a/R/github_api.R +++ b/R/github_api.R @@ -337,8 +337,12 @@ github_GET <- function(url, auth = github_pat(), ..., times_run = 1){ }else{ if(get_res$status_code == 502){ ## rerun when 502 status code - server error, not tidytuesdayR code error - if(times_run < 5){ - github_GET(url, auth = github_pat(), ..., times_run = times_run + 1) + if(times_run < 3){ + if(rate_limit_check() > 0){ + github_GET(url, auth = github_pat(), ..., times_run = times_run + 1) + }else{ + rate_limit_error() + } }else{ tt_gh_error.response(get_res) } diff --git a/tests/testthat/helper-tt_ref_test_that.R b/tests/testthat/helper-tt_ref_test_that.R index dad008b..2e03675 100644 --- a/tests/testthat/helper-tt_ref_test_that.R +++ b/tests/testthat/helper-tt_ref_test_that.R @@ -13,7 +13,9 @@ tt_ref_test_that <- function(desc, ...){ } } +#' @importFrom testthat skip skip_if_offline check_api <- function(n = 30){ + skip_if_offline("github.com") if(rate_limit_check(quiet = TRUE) <= n ){ skip("Rate Limit Met") } From 84bbeb6c222d47dc50bd93ee4d50e63f8761aaf5 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 30 Jan 2022 16:08:34 -0800 Subject: [PATCH 61/64] Put checks on examples that code is interactive to prevent failures --- R/tt_load_gh.R | 2 +- R/utils.R | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/tt_load_gh.R b/R/tt_load_gh.R index a0f7ca1..b99bfc9 100644 --- a/R/tt_load_gh.R +++ b/R/tt_load_gh.R @@ -23,7 +23,7 @@ #' @examples #' #' # check to make sure there are requests still available -#' if(rate_limit_check(quiet = TRUE) > 10){ +#' if(rate_limit_check(quiet = TRUE) > 10 & interactive()){ #' tt_gh <- tt_load_gh("2019-01-15") #' #' ## readme attempts to open the readme for the weekly dataset diff --git a/R/utils.R b/R/utils.R index 6f4df7a..580784e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -57,9 +57,11 @@ print.tt <- function(x,...){ #' TidyTuesday dataset in the Viewer. #' @examples #' \donttest{ +#' if(interactive()){ #' tt_output <- tt_load_gh("2019-01-15") #' readme(tt_output) #' } +#' } readme <- function(tt) { if ("tt_data" %in% class(tt)) { tt <- attr(tt, ".tt") From 7474682a5105960a31fe63a55930e94cbc5b5ae0 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Sun, 30 Jan 2022 17:22:20 -0800 Subject: [PATCH 62/64] have examples complete checks to confirm api rate limit has not been reached. --- R/utils.R | 2 +- man/readme.Rd | 2 ++ man/tt_load_gh.Rd | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index 580784e..c63a6ad 100644 --- a/R/utils.R +++ b/R/utils.R @@ -57,7 +57,7 @@ print.tt <- function(x,...){ #' TidyTuesday dataset in the Viewer. #' @examples #' \donttest{ -#' if(interactive()){ +#' if(rate_limit_check(quiet = TRUE) > 10 & interactive()){ #' tt_output <- tt_load_gh("2019-01-15") #' readme(tt_output) #' } diff --git a/man/readme.Rd b/man/readme.Rd index 95a0fb5..65849f5 100644 --- a/man/readme.Rd +++ b/man/readme.Rd @@ -18,7 +18,9 @@ Readme HTML maker and Viewer } \examples{ \donttest{ +if(rate_limit_check(quiet = TRUE) > 10 & interactive()){ tt_output <- tt_load_gh("2019-01-15") readme(tt_output) } } +} diff --git a/man/tt_load_gh.Rd b/man/tt_load_gh.Rd index e14541d..2c8d658 100644 --- a/man/tt_load_gh.Rd +++ b/man/tt_load_gh.Rd @@ -36,7 +36,7 @@ from 60 to 5000. Follow instructions from \examples{ # check to make sure there are requests still available -if(rate_limit_check(quiet = TRUE) > 10){ +if(rate_limit_check(quiet = TRUE) > 10 & interactive()){ tt_gh <- tt_load_gh("2019-01-15") ## readme attempts to open the readme for the weekly dataset From 3f3bd38ee5ef1016685213e5a7ed69b91638d667 Mon Sep 17 00:00:00 2001 From: thebioengineer Date: Mon, 31 Jan 2022 20:23:10 -0800 Subject: [PATCH 63/64] rerun pkgdown and other updates for release 1.0.2 --- DESCRIPTION | 2 +- NEWS.md | 6 ++++- cran-comments.md | 10 ++++---- docs/404.html | 8 +++---- docs/CODE_OF_CONDUCT.html | 8 +++---- docs/LICENSE-text.html | 8 +++---- docs/LICENSE.html | 8 +++---- docs/authors.html | 8 +++---- docs/index.html | 44 ++++++++++++++++++++-------------- docs/news/index.html | 8 +++---- docs/pkgdown.css | 4 ++-- docs/pkgdown.yml | 6 ++--- docs/readme.html | 44 ++++++++++++++++++++-------------- docs/reference/index.html | 8 +++---- docs/reference/readme.html | 16 +++++++------ docs/reference/tt_load.html | 21 ++++++++-------- docs/reference/tt_load_gh.html | 27 +++++++++++---------- 17 files changed, 131 insertions(+), 105 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f41ba40..e398e72 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: tidytuesdayR Type: Package Title: Access the Weekly 'TidyTuesday' Project Dataset -Version: 1.0.1.9000 +Version: 1.0.2 Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index ec61288..add4c19 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -# tidytuesdayR (development version) +# tidytuesdayR (1.0.2 version) + +* [bug fix] During testing it was identified that 502 errors from github servers would cause the code to error out. Now it will retry a few times before giving an error. +* [bug fix] No internet connection bug on rstudio resolved to due malformed url checks (https). +* [bug fix] Partial argument matching correction in `tt_download_file.character()`, `tt_parse_blob()`, and in tests. (thanks @mgirlich) # tidytuesdayR 1.0.1 diff --git a/cran-comments.md b/cran-comments.md index 85aa674..4b39a93 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,13 +1,15 @@ ## Release summary -* Update to address failing solaris build on CRAN, with some additional features +* Update to address failing examples build on CRAN causing removal +* small bug fixes identified by the community (partial argument matching, malformed url) * There are no reverse dependencies to check at this time ## Test environments -* local R installation, R 4.0.2 -* ubuntu 16.04 (on github actions), , R 4.0.2 -* mac OS 10.15.4 (on github actions) R-devel, R 4.0.2, +* local R installation, R 4.1.0 +* ubuntu 18.04 (on github actions),R-devel, R-release +* mac OS (on github actions) R-devel, R-release +* Windows-lated (on github actions) R-devel, R-release * win-builder (devel) ## R CMD check results diff --git a/docs/404.html b/docs/404.html index 9dd5669..42a4a37 100644 --- a/docs/404.html +++ b/docs/404.html @@ -79,7 +79,7 @@ tidytuesdayR - 1.0.1 + 1.0.2 @@ -87,7 +87,7 @@