Skip to content

Commit

Permalink
Merge pull request #353 from JaseZiv/feature/tm-absence
Browse files Browse the repository at this point in the history
Feature/tm absence
  • Loading branch information
JaseZiv authored Jan 14, 2024
2 parents 34dc864 + d7728d0 commit 67c7ac3
Show file tree
Hide file tree
Showing 7 changed files with 216 additions and 2 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: worldfootballR
Title: Extract and Clean World Football (Soccer) Data
Version: 0.6.4.0014
Version: 0.6.5
Authors@R: c(
person("Jason", "Zivkovic", , "[email protected]", role = c("aut", "cre", "cph")),
person("Tony", "ElHabr", , "[email protected]", role = "ctb"),
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export(load_understat_league_shots)
export(player_dictionary_mapping)
export(player_transfer_history)
export(tm_expiring_contracts)
export(tm_get_player_absence)
export(tm_league_debutants)
export(tm_league_injuries)
export(tm_league_team_urls)
Expand All @@ -68,18 +69,21 @@ export(understat_team_players_stats)
export(understat_team_season_shots)
export(understat_team_stats_breakdown)
importFrom(cli,cli_alert)
importFrom(dplyr,across)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,filter)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_all)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,transmute)
importFrom(httr,GET)
importFrom(httr,add_headers)
importFrom(httr,content)
importFrom(httr,set_cookies)
importFrom(janitor,clean_names)
importFrom(jsonlite,fromJSON)
importFrom(lubridate,today)
importFrom(lubridate,ymd)
Expand All @@ -103,6 +107,7 @@ importFrom(rstudioapi,isAvailable)
importFrom(rstudioapi,versionInfo)
importFrom(rvest,html_attr)
importFrom(rvest,html_children)
importFrom(rvest,html_elements)
importFrom(rvest,html_node)
importFrom(rvest,html_nodes)
importFrom(rvest,html_table)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# worldfootballR (development version)

***

# worldfootballR 0.6.5

### Bugs

* `fb_league_stats()` failing for `playing_time`. (0.6.4.0001) [#314](https://github.com/JaseZiv/worldfootballR/issues/314)
Expand All @@ -20,6 +24,7 @@
* `fb_team_match_stats()` and `understat_available_teams()` (0.6.4.0004)
* `fb_match_shooting()`, `fb_advanced_match_stats()`, `fb_league_stats(team_or_player = "player")` gain `Player_Href` column (0.6.4.0005)
* `load_fb_advanced_stats()` and `load_fb_match_summary()` added (0.6.4.0007)
* `tm_get_player_absence()` now available to get a list of absences through suspension for players from transfermarkt


***
Expand Down
143 changes: 143 additions & 0 deletions R/tm_player_absence.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#' @importFrom rvest html_elements html_text html_table html_attr
#' @importFrom xml2 read_html
#' @importFrom purrr pluck
#' @importFrom janitor clean_names
#' @importFrom dplyr mutate across mutate_all
.tm_each_absence_page <- function(absence_page_url) {

absence_pg <- xml2::read_html(absence_page_url)

# get the main table, knowing that come columns won't be returned
main_df <- absence_pg |> rvest::html_elements("#yw1 .items") |> rvest::html_table() |> data.frame()

if(nrow(main_df) > 0) {

# create an object of each table row and the col headings
tab_rows <- absence_pg |> rvest::html_elements("#yw1 .items tbody tr")
tab_head <- absence_pg |> rvest::html_elements("#yw1 .items tr th") |> rvest::html_text()

# index of columns we need to get extra html elements for
competition_idx <- grep("competition", tolower(tab_head))
club_missed_idx <- grep("games missed", tolower(tab_head))


# parse competiton name
comp_name <- c()
for(i in 1:length(tab_rows)) {
each <- tab_rows[i] |>
rvest::html_elements("td") |>
purrr::pluck(competition_idx) |>
rvest::html_elements("img") |> rvest::html_attr("title") |>
.replace_empty_na()

comp_name <- c(comp_name, each)
}


# parse team name
club_name <- c()
for(i in 1:length(tab_rows)) {
each <- tab_rows[i] |>
rvest::html_elements("td") |>
purrr::pluck(club_missed_idx) |>
rvest::html_elements("a") |> rvest::html_attr("title") |>
.replace_empty_na()

club_name <- c(club_name, each)
}


main_df$Competition <- comp_name
main_df$club_missed <- club_name

main_df <- main_df |>
dplyr::mutate_all(as.character) |>
dplyr::mutate(dplyr::across(c("from", "until"), .tm_fix_dates)) |>
janitor::clean_names()

}

return(main_df)

}



#' Get Player Absences
#'
#' Returns data frame of a player's absences from suspension from transfermarkt.com
#'
#' @param player_urls player url(s) from transfermarkt
#'
#' @return returns a dataframe
#'
#' @importFrom rlang .data
#'
#' @export
#'
#' @examples
#' \dontrun{
#' try({
#' player_urls <- c("https://www.transfermarkt.com/cristian-romero/profil/spieler/355915",
#' "https://www.transfermarkt.com/micky-van-de-ven/profil/spieler/557459")
#'
#' df <- tm_get_player_absence(player_urls)
#' })
#' }
#'
tm_get_player_absence <- function(player_urls) {


.tm_each_players_absence <- function(player_url) {

pb$tick()

main_url <- "https://www.transfermarkt.com"

# # change the url to point to the absences url
player_url_changed <- gsub("profil", "ausfaelle", player_url)



player_page <- xml2::read_html(player_url_changed)

player_name <- player_page %>% rvest::html_nodes("h1") %>% rvest::html_text() %>% gsub("#[0-9]+ ", "", .) %>% stringr::str_squish()
player_meta <- data.frame(player_name = player_name,
player_url = player_url)

absence_paginated <- player_page |>
rvest::html_elements(".tm-pagination__list-item a") |> rvest::html_attr("href") |>
unique()

if(length(absence_paginated) == 0) {
absence_paginated <- player_url_changed
} else {
absence_paginated <- paste0(main_url, absence_paginated)
}


f_possibly <- purrr::possibly(.tm_each_absence_page, otherwise = data.frame(), quiet = FALSE)
player_out <- purrr::map_dfr(
absence_paginated,
f_possibly
)

player_out <- dplyr::bind_cols(player_meta, player_out)

return(player_out)

}

# create the progress bar with a progress function.
pb <- progress::progress_bar$new(total = length(player_urls))

f_possibly2 <- purrr::possibly(.tm_each_players_absence, otherwise = data.frame(), quiet = FALSE)
purrr::map_dfr(
player_urls,
f_possibly2
)

}



28 changes: 28 additions & 0 deletions man/tm_get_player_absence.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/test-transfermarkt.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,17 @@ test_that("tm_player_injury_history() works", {
expect_false(nrow(hazard_injuries) == 0)

})


test_that("tm_get_player_absence() works", {
testthat::skip_on_cran()

player_absence <- tm_get_player_absence(player_urls = c("https://www.transfermarkt.com/cristian-romero/profil/spieler/355915",
"https://www.transfermarkt.com/micky-van-de-ven/profil/spieler/557459"))
expect_type(player_absence, "list")
expect_equal(ncol(player_absence), 10)
expect_false(nrow(player_absence) == 0)

})


21 changes: 20 additions & 1 deletion vignettes/extract-transfermarkt-data.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ dplyr::glimpse(hazard_injuries)
#----- for multiple players: -----#
# # can make use of a tm helper function:
# burnley_player_urls <- tm_team_player_urls(team_url = "https://www.transfermarkt.com/fc-burnley/startseite/verein/1132/saison_id/2021")
# # then pass all those URLs to the tm_player_bio
# # then pass all those URLs to the tm_player_injury_history
# burnley_player_injuries <- tm_player_injury_history(player_urls = burnley_player_urls)
```

Expand Down Expand Up @@ -325,6 +325,25 @@ dplyr::glimpse(all_leeds_united_players_transfer_history)
```



### Player Absence

To be able to get a player's (or players') absence history as a result of suspensions from transfermarkt, use the `tm_get_player_absence()` function.

```{r player_absence, eval=FALSE}
#----- for a single player: -----#
romero_absence <- tm_get_player_absence(player_urls = "https://www.transfermarkt.com/cristian-romero/profil/spieler/355915")
dplyr::glimpse(romero_absence)
#----- for multiple players: -----#
# # can make use of a tm helper function:
spurs_player_urls <- tm_team_player_urls(team_url = "https://www.transfermarkt.com/tottenham-hotspur/startseite/verein/148")
# # then pass all those URLs to the tm_get_player_absence
spurs_player_absence <- tm_get_player_absence(player_urls = spurs_player_urls)
```



***


Expand Down

0 comments on commit 67c7ac3

Please sign in to comment.