-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.R
83 lines (71 loc) · 2.11 KB
/
analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Analysis file for measuring segregation
#
# Load libraries
library(dplyr)
# Get functions for segregation metrics
source('metric_functions.R')
# Get geographical data (to add to city data)
geo.data <- read.delim('data/Gaz_tracts_national.txt') %>%
select(GEOID,
ALAND) %>%
rename(area = ALAND)
# store all data as a list of dataframes
# help from: http://stackoverflow.com/questions/5319839/,
# http://stackoverflow.com/questions/1105659/
folder.location <- file.path('data/prepped')
filenames <- list.files(path = folder.location,
pattern = '*.csv')
city.data <- lapply(file.path(folder.location,
filenames),
read.csv,
stringsAsFactors = FALSE) %>%
# make list names nice
setNames(gsub('_race.csv',
'',
filenames,
fixed = TRUE)) %>%
# Remove "-exclude" suffix from some GEOIDs
# and convert to double
lapply(function(df) {
df <- df %>%
mutate(GEOID = as.double(gsub('-exclude',
'',
GEOID,
fixed = TRUE)))
return(df)
}) %>%
# add geographical data
lapply(function(df) {
df <- df %>%
inner_join(geo.data,
by = "GEOID")
return(df)
})
# Compute metrics for each city
city.names <- c(
'Baltimore',
'Charleston',
'Chicago',
'Columbus',
'Dayton',
'Denver',
'Kansas City',
'Memphis',
'Milwaukee',
'Oklahoma City',
'Pittsburgh',
'St. Louis',
'Syracuse',
'Wichita')
seg.metrics <- as.data.frame(cbind(
City = city.names,
Gini = sapply(city.data,
giniCoeff),
Correlation = sapply(city.data,
correlationRatio),
Delta = sapply(city.data,
deltaIndex)),
stringsAsFactors = FALSE) %>%
mutate(Gini = as.numeric(Gini),
Correlation = as.numeric(Correlation),
Delta = as.numeric(Delta))