From 912875a4272bd487f7439f4b1ac21886f8923e7a Mon Sep 17 00:00:00 2001 From: Tony ElHabr Date: Wed, 17 Jan 2024 09:33:57 -0600 Subject: [PATCH 1/5] parse from comment --- R/chromote-fbref.R | 31 ++++++++++++++++++++----------- R/fb_league_stats.R | 39 ++++++++++++++++++--------------------- 2 files changed, 38 insertions(+), 32 deletions(-) diff --git a/R/chromote-fbref.R b/R/chromote-fbref.R index cf788bea..24a6792e 100644 --- a/R/chromote-fbref.R +++ b/R/chromote-fbref.R @@ -25,10 +25,10 @@ WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = l unlist(self$session$DOM$querySelectorAll(self$root_id, css)$nodeIds) }, - call_node_method = function(node_id) { - js_fun <- paste0("function() { return this.outerHTML}") + call_node_method = function(node_id, method, ...) { + js_fun <- paste0("function() { return this", method, "}") obj_id <- self$object_id(node_id) - self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id) + self$session$Runtime$callFunctionOn(js_fun, objectId = obj_id, ...) }, object_id = function(node_id) { @@ -41,14 +41,23 @@ WorldfootballRDynamicPage <- R6::R6Class("WorldfootballRDynamicPage", public = l #' @importFrom purrr map_chr #' @importFrom xml2 xml_children read_html #' @noRd -worldfootballr_html_page <- function(x) { - stopifnot(identical(class(x), c("WorldfootballRDynamicPage", "R6"))) - nodes <- x$find_nodes("table") - - elements <- purrr::map_chr(nodes, function(node_id) { - json <- x$call_node_method(node_id) - json$result$value - }) +worldfootballr_html_player_table <- function(session) { + stopifnot(identical(class(session), c("WorldfootballRDynamicPage", "R6"))) + + ## find element "above" commented out table + node_id1 <- session$find_nodes("#stats_shooting_sh") + ## find element "below" commented out table + node_id2 <- session$find_nodes("#stats_shooting_control") + ## find commented out element in-between + node_id <- round((node_id1 + node_id2) / 2) + + elements <- session$call_node_method(node_id, ".textContent")[['result']][['value']] + n_elements <- length(elements) + if (n_elements != 1) { + warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_elements)) + return(NULL) + } + html <- paste0("", paste0(elements, collapse = "\n"), "") xml2::read_html(html) } diff --git a/R/fb_league_stats.R b/R/fb_league_stats.R index 7e34bcc4..3a32987d 100644 --- a/R/fb_league_stats.R +++ b/R/fb_league_stats.R @@ -37,33 +37,30 @@ .frequency_id = "fb_league_stats-player" ) session <- worldfootballr_chromote_session(url) - page <- worldfootballr_html_page(session) + player_table <- worldfootballr_html_player_table(session) session$session$close(wait_ = FALSE) - elements <- xml2::xml_children(xml2::xml_children(page)) - tables <- rvest::html_table(elements) - - n_tables <- length(tables) - if (n_tables != 3) { - warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_tables)) + if (is.null(player_table)) { return(tibble::tibble()) } - renamed_table <- .rename_fb_cols(tables[[3]]) - renamed_table <- renamed_table[renamed_table$Rk != "Rk", ] - renamed_table <- .add_player_href( - renamed_table, - parent_element = elements[[3]], + + player_table_elements <- xml2::xml_children(xml2::xml_children(player_table)) + parsed_player_table <- rvest::html_table(player_table_elements) + renamed_player_table <- worldfootballR:::.rename_fb_cols(parsed_player_table[[1]]) + renamed_table <- renamed_player_table[renamed_player_table$Rk != "Rk", ] + renamed_player_table <- worldfootballR:::.add_player_href( + renamed_player_table, + parent_element = player_table_elements, player_xpath = ".//tbody/tr/td[@data-stat='player']/a" ) - } - - suppressMessages( - readr::type_convert( - clean_table, - guess_integer = TRUE, - na = "", - trim_ws = TRUE + suppressMessages( + readr::type_convert( + renamed_player_table, + guess_integer = TRUE, + na = "", + trim_ws = TRUE + ) ) - ) + } } From 35888ec7ef5cc288ad5329a774965e790d03f699 Mon Sep 17 00:00:00 2001 From: Tony ElHabr Date: Wed, 17 Jan 2024 09:34:30 -0600 Subject: [PATCH 2/5] update var names in test --- tests/testthat/test-fbref.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-fbref.R b/tests/testthat/test-fbref.R index a92ab19c..1ea79754 100644 --- a/tests/testthat/test-fbref.R +++ b/tests/testthat/test-fbref.R @@ -406,7 +406,7 @@ test_that("fb_league_stats() for players works", { testthat::skip_on_cran() testthat::skip_on_ci() expected_player_shooting_cols <- c("Rk", "Player", "Player_Href", "Nation", "Pos", "Squad", "Age", "Born", "Mins_Per_90", "Gls_Standard", "Sh_Standard", "SoT_Standard", "SoT_percent_Standard", "Sh_per_90_Standard", "SoT_per_90_Standard", "G_per_Sh_Standard", "G_per_SoT_Standard", "Dist_Standard", "FK_Standard", "PK_Standard", "PKatt_Standard", "xG_Expected", "npxG_Expected", "npxG_per_Sh_Expected", "G_minus_xG_Expected", "np:G_minus_xG_Expected", "Matches", "url") - epl_player_shooting_22 <- fb_league_stats( + single_player_shooting_22 <- fb_league_stats( country = "BRA", gender = "M", season_end_year = 2022, @@ -414,8 +414,8 @@ test_that("fb_league_stats() for players works", { stat_type = "shooting", team_or_player = "player" ) - expect_gt(nrow(epl_player_shooting_22), 0) - expect_setequal(colnames(epl_player_shooting_22), expected_player_shooting_cols) + expect_gt(nrow(single_player_shooting_22), 0) + expect_setequal(colnames(single_player_shooting_22), expected_player_shooting_cols) expected_player_misc_cols <- c("Rk", "Player", "Player_Href", "Nation", "Pos", "Squad", "Age", "Born", "Mins_Per_90", "CrdY", "CrdR", "2CrdY", "Fls", "Fld", "Off", "Crs", "Int", "TklW", "PKwon", "PKcon", "OG", "Recov", "Won_Aerial Duels", "Lost_Aerial Duels", "Won_percent_Aerial Duels", "Matches", "url") ## testing a lot would take too long, so just test multiple years since that is the most likely input param to have multiple values From 87f0f4799e39ff36ca4cac059f48a6bc8761794d Mon Sep 17 00:00:00 2001 From: Tony ElHabr Date: Wed, 17 Jan 2024 09:35:49 -0600 Subject: [PATCH 3/5] update news and bump version --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 73a3fafd..02059662 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: worldfootballR Title: Extract and Clean World Football (Soccer) Data -Version: 0.6.5.0001 +Version: 0.6.5.0003 Authors@R: c( person("Jason", "Zivkovic", , "jaseziv83@gmail.com", role = c("aut", "cre", "cph")), person("Tony", "ElHabr", , "anthonyelhabr@gmail.com", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index 9060421c..416fda17 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ * `fb_league_stats()` not returning `opponent` table. (0.6.5.0001) [#355](https://github.com/JaseZiv/worldfootballR/issues/355) +* `fb_league_stats()` not returning `player` table when hidden on page load. (0.6.5.0003) [#351](https://github.com/JaseZiv/worldfootballR/issues/351) + *** # worldfootballR 0.6.5 From 23aa57cebdbfba73d798a4ce112c8f35e9816e7d Mon Sep 17 00:00:00 2001 From: Tony ElHabr Date: Wed, 17 Jan 2024 09:46:10 -0600 Subject: [PATCH 4/5] small fixes --- R/chromote-fbref.R | 10 ++++------ R/fb_league_stats.R | 4 +++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/R/chromote-fbref.R b/R/chromote-fbref.R index 24a6792e..0b4034d9 100644 --- a/R/chromote-fbref.R +++ b/R/chromote-fbref.R @@ -45,13 +45,11 @@ worldfootballr_html_player_table <- function(session) { stopifnot(identical(class(session), c("WorldfootballRDynamicPage", "R6"))) ## find element "above" commented out table - node_id1 <- session$find_nodes("#stats_shooting_sh") - ## find element "below" commented out table - node_id2 <- session$find_nodes("#stats_shooting_control") - ## find commented out element in-between - node_id <- round((node_id1 + node_id2) / 2) + node_id0 <- session$find_nodes("#stats_shooting_sh") + ## skip 1 for the div "placeholder" + node_id <- node_id0 + 2L - elements <- session$call_node_method(node_id, ".textContent")[['result']][['value']] + elements <- session$call_node_method(node_id, ".textContent")[["result"]][["value"]] n_elements <- length(elements) if (n_elements != 1) { warning(sprintf("Did not find the expected number of tables on the page (3). Found %s.", n_elements)) diff --git a/R/fb_league_stats.R b/R/fb_league_stats.R index 3a32987d..98d6a19f 100644 --- a/R/fb_league_stats.R +++ b/R/fb_league_stats.R @@ -36,9 +36,11 @@ .frequency = "once", .frequency_id = "fb_league_stats-player" ) + session <- worldfootballr_chromote_session(url) player_table <- worldfootballr_html_player_table(session) session$session$close(wait_ = FALSE) + if (is.null(player_table)) { return(tibble::tibble()) } @@ -46,7 +48,7 @@ player_table_elements <- xml2::xml_children(xml2::xml_children(player_table)) parsed_player_table <- rvest::html_table(player_table_elements) renamed_player_table <- worldfootballR:::.rename_fb_cols(parsed_player_table[[1]]) - renamed_table <- renamed_player_table[renamed_player_table$Rk != "Rk", ] + renamed_player_table <- renamed_player_table[renamed_player_table$Rk != "Rk", ] renamed_player_table <- worldfootballR:::.add_player_href( renamed_player_table, parent_element = player_table_elements, From b65ea8495590909f405eb2856d7c90ad7bb33295 Mon Sep 17 00:00:00 2001 From: Tony ElHabr Date: Wed, 17 Jan 2024 09:53:15 -0600 Subject: [PATCH 5/5] rm the worldfootballR prefixing --- R/fb_league_stats.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/fb_league_stats.R b/R/fb_league_stats.R index 98d6a19f..19cf3240 100644 --- a/R/fb_league_stats.R +++ b/R/fb_league_stats.R @@ -47,9 +47,9 @@ player_table_elements <- xml2::xml_children(xml2::xml_children(player_table)) parsed_player_table <- rvest::html_table(player_table_elements) - renamed_player_table <- worldfootballR:::.rename_fb_cols(parsed_player_table[[1]]) + renamed_player_table <- .rename_fb_cols(parsed_player_table[[1]]) renamed_player_table <- renamed_player_table[renamed_player_table$Rk != "Rk", ] - renamed_player_table <- worldfootballR:::.add_player_href( + renamed_player_table <- .add_player_href( renamed_player_table, parent_element = player_table_elements, player_xpath = ".//tbody/tr/td[@data-stat='player']/a"