From 24f4c41829f7d46c212535d6cbfaae0841670ae6 Mon Sep 17 00:00:00 2001 From: Tony ElHabr Date: Mon, 2 Sep 2024 06:49:07 -0500 Subject: [PATCH] use read_html_live, point to correct html table element for fb_league_stats(..., team_or_player = "player") --- DESCRIPTION | 12 ++++----- NAMESPACE | 4 +-- NEWS.md | 17 +++++++------ R/chromote-fbref.R | 61 --------------------------------------------- R/fb_league_stats.R | 24 ++++++++++-------- 5 files changed, 29 insertions(+), 89 deletions(-) delete mode 100644 R/chromote-fbref.R diff --git a/DESCRIPTION b/DESCRIPTION index 6b25d180..0b880663 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: worldfootballR Title: Extract and Clean World Football (Soccer) Data -Version: 0.6.5.0008 +Version: 0.6.6.0000 Authors@R: c( person("Jason", "Zivkovic", , "jaseziv83@gmail.com", role = c("aut", "cre", "cph")), person("Tony", "ElHabr", , "anthonyelhabr@gmail.com", role = "ctb"), @@ -13,14 +13,14 @@ Description: Allow users to obtain clean and tidy number of popular sites, including 'FBref', transfer and valuations data from 'Transfermarkt' and shooting location - and other match stats data from 'Understat'. + and other match stats data from 'Understat'. It gives users the ability to access data more efficiently, rather than - having to export data tables to files before being able to complete their + having to export data tables to files before being able to complete their analysis. License: GPL-3 URL: https://github.com/JaseZiv/worldfootballR BugReports: https://github.com/JaseZiv/worldfootballR/issues -Depends: +Depends: R (>= 4.0.0) Imports: dplyr, @@ -36,7 +36,7 @@ Imports: readr, rlang, rstudioapi, - rvest (>= 1.0.0), + rvest (>= 1.0.4), stats, stringi, stringr, @@ -47,7 +47,7 @@ Imports: xml2, tibble, cli -Suggests: +Suggests: chromote, R6, knitr, diff --git a/NAMESPACE b/NAMESPACE index b6f391de..78dabe42 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -94,7 +94,6 @@ importFrom(magrittr,"%>%") importFrom(progress,progress_bar) importFrom(purrr,insistently) importFrom(purrr,map) -importFrom(purrr,map_chr) importFrom(purrr,map_dfr) importFrom(purrr,pluck) importFrom(purrr,possibly) @@ -104,7 +103,6 @@ importFrom(readr,type_convert) importFrom(rlang,.data) importFrom(rlang,.env) importFrom(rlang,arg_match0) -importFrom(rlang,check_installed) importFrom(rlang,inform) importFrom(rstudioapi,isAvailable) importFrom(rstudioapi,versionInfo) @@ -116,6 +114,7 @@ importFrom(rvest,html_nodes) importFrom(rvest,html_table) importFrom(rvest,html_text) importFrom(rvest,read_html) +importFrom(rvest,read_html_live) importFrom(stats,runif) importFrom(stats,setNames) importFrom(stringi,stri_unescape_unicode) @@ -130,6 +129,5 @@ importFrom(utils,read.csv) importFrom(utils,sessionInfo) importFrom(xml2,read_html) importFrom(xml2,xml_attr) -importFrom(xml2,xml_children) importFrom(xml2,xml_find_all) importFrom(xml2,xml_text) diff --git a/NEWS.md b/NEWS.md index ab332917..0e5124d5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,21 +2,22 @@ ### Bugs +### Breaking Changes + +### Improvements + +*** + +# worldfootballR 0.6.6 + * `fb_league_stats()` not returning `opponent` table. (0.6.5.0001) [#355](https://github.com/JaseZiv/worldfootballR/issues/355) * `tm_player_bio()` not returning values in the `player_valuation`, `max_player_valuation` and `max_player_valuation_date` fields. Unfortunately, `max_player_valuation` and `max_player_valuation_date` fields are no able to be scraped at this release (0.6.5.0002) [#357](https://github.com/JaseZiv/worldfootballR/issues/357) * `fb_league_stats()` not returning `player` table when hidden on page load. (0.6.5.0003) [#351](https://github.com/JaseZiv/worldfootballR/issues/351) * Fix parameter mis-sepcification in fbref vignette. (0.6.5.0005) [#385](https://github.com/JaseZiv/worldfootballR/issues/385) * `fb_season_team_stats()` failing due to change in FBRef table name. (0.6.5.0007, 0.6.5.0008) [#395](https://github.com/JaseZiv/worldfootballR/issues/389) - -### Breaking Changes - * In addressing the issue with `tm_player_injury_history()` in [#375](https://github.com/JaseZiv/worldfootballR/issues/375), the previously names column `club` has been renamed `club_missed_games_for` to better represent that this column will contain the games the player missed games for, as previously this column could have been misunderstood to be who they were playing for when they were injured (0.6.5.0004) - -### Improvements - * `understat_match_players` and `understat_match_stats` added. (0.6.5.0006) [#386](https://github.com/JaseZiv/worldfootballR/issues/386) - -*** +* `fb_league_stats()` unreliable for `team_or_player = "player"`. (0.6.6) [#395](https://github.com/JaseZiv/worldfootballR/issues/395) # worldfootballR 0.6.5 diff --git a/R/chromote-fbref.R b/R/chromote-fbref.R deleted file mode 100644 index 0b4034d9..00000000 --- a/R/chromote-fbref.R +++ /dev/null @@ -1,61 +0,0 @@ - -#' @source -#' @importFrom rlang check_installed -#' @noRd -worldfootballr_chromote_session <- function(url) { - rlang::check_installed(c("chromote", "R6")) - WorldfootballRDynamicPage$new(url) -} - -WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = list( - session = NULL, - root_id = NULL, - - initialize = function(url) { - self$session <- chromote::ChromoteSession$new() - - p <- self$session$Page$loadEventFired(wait_ = FALSE) - self$session$Page$navigate(url, wait_ = FALSE, timeout_ = 5) - self$session$wait_for(p) - - self$root_id <- self$session$DOM$getDocument(0)$root$nodeId - }, - - find_nodes = function(css) { - unlist(self$session$DOM$querySelectorAll(self$root_id, css)$nodeIds) - }, - - call_node_method = function(node_id, method, ...) { - js_fun <- paste0("function() { return this", method, "}") - obj_id <- self$object_id(node_id) - self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id, ...) - }, - - object_id = function(node_id) { - self$session$DOM$resolveNode(node_id)$object$objectId - } - -)) - -#' @importFrom rvest html_table -#' @importFrom purrr map_chr -#' @importFrom xml2 xml_children read_html -#' @noRd -worldfootballr_html_player_table <- function(session) { - stopifnot(identical(class(session), c("WorldfootballRDynamicPage", "R6"))) - - ## find element "above" commented out table - node_id0 <- session$find_nodes("#stats_shooting_sh") - ## skip 1 for the div "placeholder" - node_id <- node_id0 + 2L - - elements <- session$call_node_method(node_id, ".textContent")[["result"]][["value"]] - n_elements <- length(elements) - if (n_elements != 1) { - warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_elements)) - return(NULL) - } - - html <- paste0("", paste0(elements, collapse = "\n"), "") - xml2::read_html(html) -} diff --git a/R/fb_league_stats.R b/R/fb_league_stats.R index 19cf3240..9aa48d36 100644 --- a/R/fb_league_stats.R +++ b/R/fb_league_stats.R @@ -1,10 +1,10 @@ -#' @importFrom rvest html_table +#' @importFrom rvest html_table read_html_live #' @importFrom purrr map_dfr #' @importFrom dplyr mutate #' @importFrom rlang inform #' @importFrom tibble tibble #' @importFrom readr type_convert -.fb_single_league_stats <- function(url, team_or_player) { +.fb_single_league_stats <- function(url, team_or_player, stat_type) { clean_table <- if (team_or_player == "team") { page <- .load_page(url) @@ -32,26 +32,27 @@ } else { rlang::inform( - 'Please be aware that `fb_league_stats(..., team_or_player = "player")` depends on promises, which may not always work.', + 'Please be aware that `fb_league_stats(..., team_or_player = "player")` depends on `rvest::read_html_live` (and chromote), which may not always work.', .frequency = "once", .frequency_id = "fb_league_stats-player" ) - session <- worldfootballr_chromote_session(url) - player_table <- worldfootballr_html_player_table(session) - session$session$close(wait_ = FALSE) + page <- rvest::read_html_live(url) + ## for keepers: although URLs have plural term, div elements have singular term + stat_type <- gsub("keepers", "keeper", stat_type) + player_table_element <- page$html_elements(paste0("#div_stats_", stat_type)) + page$session$close(wait_ = FALSE) - if (is.null(player_table)) { + if (is.null(player_table_element)) { return(tibble::tibble()) } - player_table_elements <- xml2::xml_children(xml2::xml_children(player_table)) - parsed_player_table <- rvest::html_table(player_table_elements) + parsed_player_table <- rvest::html_table(player_table_element) renamed_player_table <- .rename_fb_cols(parsed_player_table[[1]]) renamed_player_table <- renamed_player_table[renamed_player_table$Rk != "Rk", ] renamed_player_table <- .add_player_href( renamed_player_table, - parent_element = player_table_elements, + parent_element = player_table_element, player_xpath = ".//tbody/tr/td[@data-stat='player']/a" ) suppressMessages( @@ -213,6 +214,7 @@ fb_league_stats <- function( fi <- purrr::insistently( .fb_single_league_stats, + rate = purrr::rate_backoff(max_times = 2), quiet = TRUE ) @@ -232,7 +234,7 @@ fb_league_stats <- function( } pb$tick() url <- urls[.x] - res <- fp(url, team_or_player = team_or_player) + res <- fp(url, team_or_player = team_or_player, stat_type = stat_type) res[["url"]] <- url res }