-
Notifications
You must be signed in to change notification settings - Fork 1
/
210518_tidytuesday_ask_a_manager.Rmd
103 lines (84 loc) · 4.67 KB
/
210518_tidytuesday_ask_a_manager.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
---
title: "210504_tidytuesday_ask_a_manager"
author: "Angela"
date: "5/4/2021"
output: html_document
---
```{r}
knitr::opts_chunk$set(echo = FALSE, messages = FALSE, warnings = FALSE)
```
```{r}
library(tidytuesdayR)
library(tidyverse)
library(ggResidpanel)
library(ggtext)
library(ggpubr)
```
```{r}
# Read in the data from the tidy tuesday library.
tuesdata <- tidytuesdayR::tt_load(2021, week = 21)
readme(tuesdata)
```
```{r}
# Some basic cleaning of strings
survey <- tuesdata$survey
survey$how_old_are_you <- factor(survey$how_old_are_you, levels = c("under 18", "18-24", "25-34", "35-44", "45-54",
"55-64", "65 or over"))
survey <- survey %>%
mutate(overall_years_of_professional_experience = str_replace_all(overall_years_of_professional_experience,
c("year " = "", " years" = "")),
years_of_experience_in_field = str_replace_all(years_of_experience_in_field,
c("year " = "", " years" = ""))) %>%
mutate(overall_years_of_professional_experience = if_else(overall_years_of_professional_experience %in%
c("1 or less", "41 or more"),
overall_years_of_professional_experience,
str_replace_all(overall_years_of_professional_experience, " ", "")),
years_of_experience_in_field = if_else(years_of_experience_in_field %in%
c("1 or less", "41 or more"),
years_of_experience_in_field,
str_replace_all(years_of_experience_in_field, " ", "")))
survey$country <- str_to_title(survey$country)
survey$country <- if_else(survey$country %in% c("California", "San Francisco", "The United States", "The Us", "U. S", "U. S.", "U.s",
"U.s.", "U.s.a", "U.s.a.", "U.s>", "U.sa", "Uniited States", "Unite States",
"United States", "United Sates", "United Sates Of America", "United Stares", "United State",
"United State Of America", "United Statea", "United Stated", "United Stateds",
"United Statees", "United States Of America", "United States Of American",
"United States Of Americas", "United Statesp", "United Statew", "United Statss",
"United Stattes", "United Statues", "United Status", "United Statws", "United Sttes",
"Unitedstates", "Uniteed States", "Unitef Stated", "Uniter Statez", "Unites States",
"Unitied States", "Uniyed States", "Uniyes States", "Unted States", "Untied States",
"Us", "Us Of A", "Usa", "Usaa"), "United States", survey$country)
survey$gender[which(survey$gender == "Prefer not to answer")] <- "Other or prefer not to answer"
```
```{r}
# Select only the rows that I want to use.
survey_reduced <- survey %>%
filter(country == "United States", currency == "USD", !is.na(gender),
!how_old_are_you %in% c("under 18", "65 or over")) %>%
group_by(race) %>%
filter(n() >= 500) %>%
ungroup()
```
```{r}
boxplot_salaries <- survey_reduced %>%
ggplot(aes(y = annual_salary, x = race, fill = how_old_are_you)) +
geom_boxplot(alpha = 0.8, show.legend = FALSE) +
facet_grid(how_old_are_you~.) +
scale_y_continuous(name = "Annual Salary", limits = c(0, 500000),
breaks = c(100000, 200000, 300000, 400000, 500000),
labels = scales::comma) +
scale_fill_brewer(palette = "Greens") +
coord_flip(clip = "off") +
theme_pubclean() +
labs(title = "<b><span style = 'font-size:14pt;face:bold'>Manager Salaries in the US: Differences by Race and Age</span></b>",
x = NULL,
caption = "Data: Ask A Manager Survey | #TidyTuesday | @Alessine") +
theme(
axis.ticks.y = element_blank(),
plot.title = element_textbox_simple(margin = margin(t = 8, b = 6), padding = margin(5.5, 5.5, 5.5, 5.5),
fill = "lightgray"),
plot.title.position = "plot",
plot.caption = element_text(hjust = -0.84))
boxplot_salaries
ggsave("boxplot_salaries.png", boxplot_salaries)
```