diff --git a/DESCRIPTION b/DESCRIPTION index 8c149cfd..02059662 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: worldfootballR Title: Extract and Clean World Football (Soccer) Data -Version: 0.6.5.0002 +Version: 0.6.5.0003 Authors@R: c( person("Jason", "Zivkovic", , "jaseziv83@gmail.com", role = c("aut", "cre", "cph")), person("Tony", "ElHabr", , "anthonyelhabr@gmail.com", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index 30d3afa3..b36b2f4a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,8 @@ * `fb_league_stats()` not returning `opponent` table. (0.6.5.0001) [#355](https://github.com/JaseZiv/worldfootballR/issues/355) * `tm_player_bio()` not returning values in the `player_valuation`, `max_player_valuation` and `max_player_valuation_date` fields. Unfortunately, `max_player_valuation` and `max_player_valuation_date` fields are no able to be scraped at this release (0.6.5.0002) [#357](https://github.com/JaseZiv/worldfootballR/issues/357) +* `fb_league_stats()` not returning `player` table when hidden on page load. (0.6.5.0003) [#351](https://github.com/JaseZiv/worldfootballR/issues/351) + *** # worldfootballR 0.6.5 diff --git a/R/chromote-fbref.R b/R/chromote-fbref.R index cf788bea..0b4034d9 100644 --- a/R/chromote-fbref.R +++ b/R/chromote-fbref.R @@ -25,10 +25,10 @@ WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = l unlist(self$session$DOM$querySelectorAll(self$root_id, css)$nodeIds) }, - call_node_method = function(node_id) { - js_fun <- paste0("function() { return this.outerHTML}") + call_node_method = function(node_id, method, ...) { + js_fun <- paste0("function() { return this", method, "}") obj_id <- self$object_id(node_id) - self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id) + self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id, ...) }, object_id = function(node_id) { @@ -41,14 +41,21 @@ WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = l #' @importFrom purrr map_chr #' @importFrom xml2 xml_children read_html #' @noRd -worldfootballr_html_page <- function(x) { - stopifnot(identical(class(x), c("WorldfootballRDynamicPage", "R6"))) - nodes <- x$find_nodes("table") - - elements <- purrr::map_chr(nodes, function(node_id) { - json <- x$call_node_method(node_id) - json$result$value - }) +worldfootballr_html_player_table <- function(session) { + stopifnot(identical(class(session), c("WorldfootballRDynamicPage", "R6"))) + + ## find element "above" commented out table + node_id0 <- session$find_nodes("#stats_shooting_sh") + ## skip 1 for the div "placeholder" + node_id <- node_id0 + 2L + + elements <- session$call_node_method(node_id, ".textContent")[["result"]][["value"]] + n_elements <- length(elements) + if (n_elements != 1) { + warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_elements)) + return(NULL) + } + html <- paste0("", paste0(elements, collapse = "\n"), "") xml2::read_html(html) } diff --git a/R/fb_league_stats.R b/R/fb_league_stats.R index 7e34bcc4..19cf3240 100644 --- a/R/fb_league_stats.R +++ b/R/fb_league_stats.R @@ -36,34 +36,33 @@ .frequency = "once", .frequency_id = "fb_league_stats-player" ) + session <- worldfootballr_chromote_session(url) - page <- worldfootballr_html_page(session) + player_table <- worldfootballr_html_player_table(session) session$session$close(wait_ = FALSE) - elements <- xml2::xml_children(xml2::xml_children(page)) - tables <- rvest::html_table(elements) - n_tables <- length(tables) - if (n_tables != 3) { - warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_tables)) + if (is.null(player_table)) { return(tibble::tibble()) } - renamed_table <- .rename_fb_cols(tables[[3]]) - renamed_table <- renamed_table[renamed_table$Rk != "Rk", ] - renamed_table <- .add_player_href( - renamed_table, - parent_element = elements[[3]], + + player_table_elements <- xml2::xml_children(xml2::xml_children(player_table)) + parsed_player_table <- rvest::html_table(player_table_elements) + renamed_player_table <- .rename_fb_cols(parsed_player_table[[1]]) + renamed_player_table <- renamed_player_table[renamed_player_table$Rk != "Rk", ] + renamed_player_table <- .add_player_href( + renamed_player_table, + parent_element = player_table_elements, player_xpath = ".//tbody/tr/td[@data-stat='player']/a" ) - } - - suppressMessages( - readr::type_convert( - clean_table, - guess_integer = TRUE, - na = "", - trim_ws = TRUE + suppressMessages( + readr::type_convert( + renamed_player_table, + guess_integer = TRUE, + na = "", + trim_ws = TRUE + ) ) - ) + } } diff --git a/tests/testthat/test-fbref.R b/tests/testthat/test-fbref.R index a92ab19c..1ea79754 100644 --- a/tests/testthat/test-fbref.R +++ b/tests/testthat/test-fbref.R @@ -406,7 +406,7 @@ test_that("fb_league_stats() for players works", { testthat::skip_on_cran() testthat::skip_on_ci() expected_player_shooting_cols <- c("Rk", "Player", "Player_Href", "Nation", "Pos", "Squad", "Age", "Born", "Mins_Per_90", "Gls_Standard", "Sh_Standard", "SoT_Standard", "SoT_percent_Standard", "Sh_per_90_Standard", "SoT_per_90_Standard", "G_per_Sh_Standard", "G_per_SoT_Standard", "Dist_Standard", "FK_Standard", "PK_Standard", "PKatt_Standard", "xG_Expected", "npxG_Expected", "npxG_per_Sh_Expected", "G_minus_xG_Expected", "np:G_minus_xG_Expected", "Matches", "url") - epl_player_shooting_22 <- fb_league_stats( + single_player_shooting_22 <- fb_league_stats( country = "BRA", gender = "M", season_end_year = 2022, @@ -414,8 +414,8 @@ test_that("fb_league_stats() for players works", { stat_type = "shooting", team_or_player = "player" ) - expect_gt(nrow(epl_player_shooting_22), 0) - expect_setequal(colnames(epl_player_shooting_22), expected_player_shooting_cols) + expect_gt(nrow(single_player_shooting_22), 0) + expect_setequal(colnames(single_player_shooting_22), expected_player_shooting_cols) expected_player_misc_cols <- c("Rk", "Player", "Player_Href", "Nation", "Pos", "Squad", "Age", "Born", "Mins_Per_90", "CrdY", "CrdR", "2CrdY", "Fls", "Fld", "Off", "Crs", "Int", "TklW", "PKwon", "PKcon", "OG", "Recov", "Won_Aerial Duels", "Lost_Aerial Duels", "Won_percent_Aerial Duels", "Matches", "url") ## testing a lot would take too long, so just test multiple years since that is the most likely input param to have multiple values