-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cd11faf
commit e6b5a9d
Showing
7 changed files
with
327 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,287 @@ | ||
--- | ||
title: 'Lab 4: Exploratory Data Analysis' | ||
format: | ||
html: | ||
self-contained: true | ||
editor: source | ||
knitr: | ||
opts_chunk: | ||
message: false | ||
--- | ||
|
||
```{r} | ||
#| warning: false | ||
#| messages: false | ||
#| document: show | ||
library(tidyverse) | ||
|
||
sqf_url <- "https://www1.nyc.gov/assets/nypd/downloads/zip/analysis_and_planning/stop-question-frisk/sqf-2011-csv.zip" | ||
temp <- tempfile() | ||
download.file(sqf_url, temp) | ||
sqf_zip <- unzip(temp, "2011.csv") | ||
sqf_2011 <- read.csv(sqf_zip, stringsAsFactors = FALSE) | ||
sqf_2011_race_cat <- read.csv("https://raw.githubusercontent.com/lindsaypoirier/STS-101/master/Data/SQF/sqf_race_categories.csv", stringsAsFactors = FALSE) | ||
rm(sqf_url) | ||
rm(temp) | ||
rm(sqf_zip) | ||
``` | ||
|
||
```{r} | ||
#| warning: false | ||
#| messages: false | ||
#| document: show | ||
sqf_2011 <- | ||
sqf_2011 |> | ||
select(pct, race, age, frisked, pistol, riflshot, asltweap, knifcuti, machgun, othrweap, sumissue, arstmade) |> | ||
left_join(sqf_2011_race_cat, by = "race") |> | ||
mutate(across(frisked:arstmade, | ||
~ case_when(. == "Y" ~ 1, . == "N" ~ 0))) | ||
rm(sqf_2011_race_cat) | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
|
||
Add two new columns. The first should indicate whether a weapon was found, and the second should indicate whether an arrest/summons was made. | ||
|
||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
sqf_2011 <- | ||
sqf_2011 |> | ||
#Add a variable for weapon found | ||
mutate(wpnfound = case_when(pistol == 1 | | ||
riflshot == 1 | | ||
asltweap == 1 | | ||
knifcuti == 1 | | ||
machgun == 1 | | ||
othrweap == 1 ~ 1, | ||
TRUE ~ 0)) | ||
sqf_2011 <- | ||
sqf_2011 |> | ||
#Add a variable for arrest made or summons issued | ||
mutate(arrestsumm = case_when(sumissue == 1 | | ||
arstmade == 1 ~ 1, | ||
TRUE ~ 0)) | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
|
||
Subset the dataset to the six variables listed in the data dictionary above. | ||
|
||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
sqf_2011 <- | ||
sqf_2011 |> | ||
select(pct, arrestsumm, age, wpnfound, race_cat, frisked) | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
Calculate the number of stops in 2011. If you are not sure which function to use below, you may want to refer to the list of Summary functions in the the Data Wrangling cheatsheet. Remember that each row in the data frame is a stop. | ||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
total_stops <- | ||
sqf_2011 |> | ||
summarize(Count = n()) |> | ||
pull() | ||
|
||
total_stops | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
How many stops did not result in an arrest or summons in 2011? What percentage of stops did not result in an arrest or summons? | ||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
sqf_2011 |> | ||
#Subset to rows where suspect innocent | ||
filter(arrestsumm == 0) |> | ||
#Calculate number of observations | ||
summarise(total_innocent = n(), | ||
percent_innocent = n() / total_stops * 100) | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
|
||
In how many stops were the individuals aged 14-24? In what percentage of stops were the individuals aged 14-24? | ||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
sqf_2011 |> | ||
#Subset to rows where suspect age 14-24 | ||
filter(age > 14 & age < 24) |> | ||
#Calculate number of observations and percentage of observations | ||
summarise(total_14_24 = n(), | ||
percent_14_24 = n() / total_stops * 100) | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
|
||
Fix the code below to calculate the correct number of stops for individuals 14-24. | ||
|
||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
total_stops_age_recorded <- | ||
sqf_2011 |> | ||
#Subset to rows where age is not 999 | ||
filter(age != 999) |> | ||
summarize(Count = n()) |> | ||
pull() | ||
|
||
sqf_2011 |> | ||
filter(age >= 14 & age <= 24) |> | ||
summarize(total_14_24 = n(), | ||
percent_14_24 = n() / total_stops_age_recorded * 100) | ||
``` | ||
|
||
|
||
This still doesn't match the values we see on the website, but it does match the values we see in the NYCLU's 2011 report on Stop, Question, and Frisk data. This is typically when I would reach out to a representative at the NYCLU to inquire about the discrepancy. | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
|
||
How many stops were there per race in 2011? What percentage of stops per race in 2011? Arrange by number of stops in descending order. | ||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
total_stops_race_recorded <- | ||
sqf_2011 |> | ||
#Subset to rows where race_cat is not NA or "OTHER" | ||
filter(!is.na(race_cat) & race_cat != "OTHER") |> | ||
summarize(Count = n()) |> | ||
pull() | ||
|
||
sqf_2011 |> | ||
#Subset to rows where race_cat is not NA or "OTHER" | ||
filter(!is.na(race_cat) & race_cat != "OTHER") |> | ||
#Group by race | ||
group_by(race_cat) |> | ||
#Calculate number of observations | ||
summarise(stops = n(), | ||
percent_stops = n() / total_stops_race_recorded * 100) |> | ||
#Sort by stops in descending order | ||
arrange(desc(stops)) | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
|
||
In how many stops were the individuals identified as Latinx (i.e. "WHITE-HISPANIC" or "BLACK-HISPANIC")? In what percentage of stops were the individuals identified as Latinx? | ||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
sqf_2011 |> | ||
#Subset to rows where race_cat is "WHITE-HISPANIC" or "BLACK-HISPANIC" | ||
filter(race_cat %in% c("WHITE-HISPANIC", "BLACK-HISPANIC")) |> | ||
#Calculate number of observations | ||
summarise(stops_Latinx = n(), | ||
percent_Latinx = n() / total_stops_race_recorded * 100) | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
|
||
What percentage of stops in 2011 resulted in a frisk per race? What percentage of stops in 2011 resulted in a weapon found per race? What percentage of stops in 2011 resulted in an arrest or summons per race? In your resulting data table, each row should be a race, and there should be columns for `stops`, `percent_stops`, `percent_frisked`, `percent_wpnfound` , and `percent_arrestsumm`. | ||
::: | ||
|
||
|
||
|
||
```{r} | ||
#| eval: false | ||
# Write code here. | ||
|
||
|
||
sqf_2011_percents <- sqf_2011 |> | ||
filter(race_cat %in% c( | ||
"BLACK","WHITE-HISPANIC", "WHITE", "BLACK-HISPANIC", "ASIAN/PACIFIC ISLANDER", "AMERICAN INDIAN/ALASKAN NATIVE") | ||
) |> | ||
group_by(race_cat) |> | ||
summarise(stops = n(), | ||
percent_stops = n()/total_stops *100, | ||
percent_frisked = sum(frisked)/n() *100, | ||
percent_wpnfound = sum(wpnfound)/n() *100, | ||
percent_arrestsumm = sum(arrestsumm)/n() *100 | ||
) | ||
|
||
sqf_2011_percents | ||
``` | ||
|
||
|
||
###### Question | ||
|
||
::: question | ||
|
||
Below, in 2-3 sentences, summarize what you learn from reviewing the summary statistics from the code above. What does this tell us about the constitutionality of 2011 stop and frisk activity in NYC? | ||
::: | ||
|
||
|
||
|
||
|
||
```{r} | ||
#| document: show | ||
Make a data vizualization | ||
``` | ||
|
||
|
||
# Any lab vizualization is acceptable. | ||
|
||
|
||
```{r} | ||
sqf_2011_percents |> | ||
filter(!is.na(race_cat) & race_cat != "OTHER") |> | ||
ggplot() + | ||
geom_col(aes(x=race_cat , y= percent_frisked)) + | ||
coord_flip() | ||
|
||
``` | ||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.