-
Notifications
You must be signed in to change notification settings - Fork 0
/
CFB EDA.Rmd
107 lines (81 loc) · 2.2 KB
/
CFB EDA.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
---
title: "CFB EDA"
author: "Seth Goldman"
date: "August 18, 2019"
output: html_document
---
```{r setup, include=FALSE}
library(dplyr)
library(ggplot2)
library(inspectdf)
library(forcats)
```
### Data acquisition
Data is queried from the `CFB Data Acquisition.R`
```{r read_data}
all_plays_2018 <- readRDS(file = "all_plays_2018.rds")
all_drives_2018 <- readRDS(file = "all_drives_2018.rds")
all_games_2018 <- readRDS(file = "all_games_2018.rds")
```
```{r preview_data}
glimpse(all_games_2018)
glimpse(all_drives_2018)
glimpse(all_plays_2018)
```
```{r explore_games}
all_games_2018 %>%
ggplot(aes(x=home_points,y=away_points, color = week))+
geom_point()
all_games_2018 %>%
ggplot(aes(conference_game,(home_points+away_points)))+
geom_boxplot()+
coord_flip()+
ylab("total_points")+
ggtitle("total points in conference vs non-conf games")
all_games_2018 %>%
filter(conference_game == TRUE) %>%
ggplot(aes(fct_reorder(
home_conference,
(home_points+away_points)
),
(home_points+away_points)))+
geom_boxplot()+
coord_flip()+
ylab("total_points")+
xlab("conference")+
ggtitle("total points by conference",
subtitle = "conference games only")
```
```{r explore_drives}
all_drives_2018 %>%
group_by(offense) %>%
summarise("total_drives" = n_distinct(id),
"total_plays" = sum(plays)) %>%
arrange(desc(total_drives))
all_drives_2018 %>%
count(drive_result) %>%
ggplot(aes(x=reorder(drive_result,n), y = n))+
geom_bar(stat="identity")+
coord_flip()+
ggtitle("drive result types")+
xlab("")+
ylab("observations")
all_drives_2018 %>%
count(elapsed.minutes,elapsed.seconds)
all_drives_2018 %>%
filter(drive_result %in% c("PUNT","TD", "FG", "INT","DOWNS","FUMBLE","MISSED FG")) %>%
filter(elapsed.minutes > 0) %>%
ggplot(aes(x=fct_reorder(drive_result,elapsed.minutes),y=elapsed.minutes))+
geom_boxplot()+
coord_flip()+
ggtitle("drive minutes by result type")+
xlab("")+
ylab("drive time elapsed (whole mins)")
```
```{r explore_plays}
all_plays_2018 %>%
count(play_type) %>%
ggplot(aes(x=fct_reorder(play_type,n), y = n))+
geom_bar(stat = "identity")+
coord_flip()
```