Skip to content

Commit

Permalink
Merge pull request #386 from jruizcabrejos/main
Browse files Browse the repository at this point in the history
Understat match stats and players
  • Loading branch information
JaseZiv authored Jul 9, 2024
2 parents 2b6f273 + 0ce82b4 commit 5de8e32
Show file tree
Hide file tree
Showing 9 changed files with 212 additions and 1 deletion.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: worldfootballR
Title: Extract and Clean World Football (Soccer) Data
Version: 0.6.5.0005
Version: 0.6.5.0006
Authors@R: c(
person("Jason", "Zivkovic", , "[email protected]", role = c("aut", "cre", "cph")),
person("Tony", "ElHabr", , "[email protected]", role = "ctb"),
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ export(tm_team_transfers)
export(understat_available_teams)
export(understat_league_match_results)
export(understat_league_season_shots)
export(understat_match_players)
export(understat_match_shots)
export(understat_match_stats)
export(understat_player_shots)
export(understat_team_meta)
export(understat_team_players_stats)
Expand Down Expand Up @@ -116,9 +118,11 @@ importFrom(rvest,html_text)
importFrom(rvest,read_html)
importFrom(stats,runif)
importFrom(stats,setNames)
importFrom(stringi,stri_unescape_unicode)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_squish)
importFrom(stringr,str_subset)
importFrom(tibble,tibble)
importFrom(tidyr,crossing)
importFrom(tidyselect,vars_select_helpers)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

* In addressing the issue with `tm_player_injury_history()` in [#375](https://github.com/JaseZiv/worldfootballR/issues/375), the previously names column `club` has been renamed `club_missed_games_for` to better represent that this column will contain the games the player missed games for, as previously this column could have been misunderstood to be who they were playing for when they were injured (0.6.5.0004)

### Improvements

* `understat_match_players` and `understat_match_stats` added. (0.6.5.0006) [#386](https://github.com/JaseZiv/worldfootballR/issues/386)

***

Expand Down
74 changes: 74 additions & 0 deletions R/understat_match_players.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@

#' Get Understat match player data
#'
#' Returns player values for a selected match from Understat.com.
#'
#' @param match_url A `character` string with the URL of the match played.
#'
#' @return returns a `data.frame` with data for all players for the match.
#'
#' @importFrom magrittr %>%
#' @importFrom rvest html_elements
#' @importFrom stringi stri_unescape_unicode
#' @importFrom stringr str_subset
#' @importFrom jsonlite fromJSON
#' @importFrom dplyr bind_rows
#'
#' @export

understat_match_players <- function(match_url) {

match_id <- gsub("[^0-9]", "", match_url)

match_player_data <- .get_understat_json(page_url = match_url) %>%
rvest::html_elements("script") %>%
as.character()

match_player_data <- match_player_data[grep("rostersData\t=", match_player_data)] %>%
stringi::stri_unescape_unicode() %>%
substr(41,nchar(.)) %>%
substr(0,nchar(.)-13) %>%
paste0('[', . , ']') %>%

unlist() %>%
stringr::str_subset("\\[\\]", negate = TRUE)

match_player_data <- lapply(match_player_data, jsonlite::fromJSON) %>%
do.call("rbind", .)

match_player_data_home <- do.call(rbind.data.frame, match_player_data$h)
match_player_data_away <- do.call(rbind.data.frame, match_player_data$a)


match_player_data_rebind <- dplyr::bind_rows(match_player_data_home, match_player_data_away)

match_players <- data.frame(
match_id = as.integer(match_id),
id = as.integer(match_player_data_rebind[["id"]]),
team_id = as.integer(match_player_data_rebind[["team_id"]]),
home_away = as.character(match_player_data_rebind[["h_a"]]),
player_id = as.integer(match_player_data_rebind[["player_id"]]),
swap_id = as.integer(match_player_data_rebind[["id"]]),
player = as.character(match_player_data_rebind[["player"]]),
position = as.character(match_player_data_rebind[["position"]]),
positionOrder = as.integer(match_player_data_rebind[["positionOrder"]]),
time_played = as.integer(match_player_data_rebind[["time"]]),
goals = as.integer(match_player_data_rebind[["goals"]]),
own_goals = as.integer(match_player_data_rebind[["own_goals"]]),
shots = as.integer(match_player_data_rebind[["shots"]]),
xG = as.numeric(match_player_data_rebind[["xG"]]),
yellow_card = as.integer(match_player_data_rebind[["yellow_card"]]),
red_card = as.integer(match_player_data_rebind[["red_card"]]),
roster_in = as.integer(match_player_data_rebind[["roster_in"]]),
roster_out = as.integer(match_player_data_rebind[["roster_out"]]),
key_passes = as.integer(match_player_data_rebind[["key_passes"]]),
assists = as.integer(match_player_data_rebind[["assists"]]),
xA = as.numeric(match_player_data_rebind[["xA"]]),
xGChain = as.numeric(match_player_data_rebind[["xGChain"]]),
xGBuildup = as.numeric(match_player_data_rebind[["xGBuildup"]])
)

return(match_players)
}


58 changes: 58 additions & 0 deletions R/understat_match_stats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

#' Get Understat match stats table data
#'
#' Returns the Stats values for a selected match from Understat.com.
#'
#' @param match_url A `character` string with the URL of the match played.
#'
#' @return returns a `data.frame` with data from the stats table for the match.
#'
#' @details For `draw_chances`, `home_chances` and `away_chances`, values below 10% in the browser will be retrieved as NA (e.g. A "5%" chance will be NA in the `data.frame`).
#'
#' @importFrom magrittr %>%
#' @importFrom rvest html_elements html_text
#'
#' @export

understat_match_stats <- function(match_url) {

match_stats <- .get_understat_json(page_url = match_url) %>%
rvest::html_elements("div.scheme-block.is-hide[data-scheme='stats']") %>%
rvest::html_elements(".progress-value") %>%
rvest::html_text()

away <- match_stats[seq(1, length(match_stats), by=2)]
home <- match_stats[seq(2, length(match_stats), by=2)]

match_stats <- data.frame(

match_id = as.integer(gsub("[^0-9]", "", match_url)),

home_team = as.character(away[1]),
home_chances = as.integer(gsub("[^0-9]", "", away[2]))/100,
home_goals = as.integer(home[3]),
home_xG = as.numeric(home[4]),
home_shots = as.integer(home[5]),
home_shot_on_target = as.integer(home[6]),
home_deep = as.integer(home[7]),
home_PPDA = as.numeric(home[8]),
home_xPTS = as.numeric(home[9]),

draw_chances = as.integer(gsub("[^0-9]", "", home[2]))/100,

away_team = home[1],
away_chances = as.integer(gsub("[^0-9]", "", away[3]))/100 ,
away_goals = as.integer(away[4]),
away_xG = as.numeric(away[5]),
away_shots = as.integer(away[6]),
away_shot_on_target = as.integer(away[7]),
away_deep = as.integer(away[8]),
away_PPDA = as.numeric(away[9]),
away_xPTS = as.numeric(away[10])

)

return(match_stats)
}


17 changes: 17 additions & 0 deletions man/understat_match_players.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/understat_match_stats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-understat.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,19 @@ test_that("understat_team_stats_breakdown() works", {
expect_equal(ncol(team_stats), 11)
expect_gt(nrow(team_stats), 0)
})

test_that("understat_match_players() works", {
testthat::skip_on_cran()
match_players <- understat_match_players(match_url = "https://understat.com/match/14789")
expect_true(any("data.frame" == class(match_players)))
expect_equal(ncol(match_players), 23)
expect_gt(nrow(match_players), 0)
})

test_that("understat_match_stats() works", {
testthat::skip_on_cran()
match_stats <- understat_match_stats(match_url = "https://understat.com/match/14789")
expect_true(any("data.frame" == class(match_stats)))
expect_equal(ncol(match_stats), 20)
expect_equal(nrow(match_stats), 1)
})
19 changes: 19 additions & 0 deletions vignettes/extract-understat-data.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,25 @@ wba_liv_shots <- understat_match_shots(match_url = "https://understat.com/match/
dplyr::glimpse(wba_liv_shots)
```

### Match Stats

To get the data from the stats table for an individual match, use the `understat_match_stats()` function:

```{r match_stats}
wba_liv_stats <- understat_match_stats(match_url = "https://understat.com/match/14789")
dplyr::glimpse(wba_liv_stats)
```


### Match Players

To get the data for player in an individual match, use the `understat_match_players()` function:

```{r match_players}
wba_liv_players <- understat_match_players(match_url = "https://understat.com/match/14789")
dplyr::glimpse(wba_liv_players)
```


***

Expand Down

0 comments on commit 5de8e32

Please sign in to comment.