-
Notifications
You must be signed in to change notification settings - Fork 9
/
01_maps_code.R
292 lines (187 loc) · 7.33 KB
/
01_maps_code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# Checking if the packages you need are installed -- if not, it will install for you
packages <- c("tidyverse", "stringr", "censusapi", "sf", "tigris")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
install.packages(setdiff(packages, rownames(installed.packages())), repos = "http://cran.us.r-project.org")
}
# If you haven't installed ggplot2 or sf yet, uncomment and run the lines below
#install.packages("ggplot2")
#install.packages("sf")
library(ggplot2)
library(sf)
# If you're using a Mac, uncomment and run the lines below
#options(device = "X11")
#X11.options(type = "cairo")
fifty_location <- "data/cb_2017_us_state_20m/cb_2017_us_state_20m.shp"
fifty_states <- st_read(fifty_location)
## Mapping a simple shape file
View(fifty_states)
## Map fifty_states
ggplot(fifty_states) + geom_sf()
## Join it to data
# If you don't have readr installed yet, uncomment and run the line below
#install.packages("readr")
library(readr)
populations <- read_csv("data/acs2016_1yr_B02001_04000US55.csv")
View(populations)
## Join data to blank shapefile and map
ncol(fifty_states)
library(dplyr)
fifty_states <- left_join(fifty_states, populations,
by=c("NAME"="name"))
## Did it work?
ncol(fifty_states)
colnames(fifty_states)
## What are the variables
forty_eight <- fifty_states %>%
filter(NAME!="Hawaii" & NAME!="Alaska" & NAME!="Puerto Rico")
ggplot(forty_eight) +
geom_sf(aes(fill=B02001001)) +
scale_fill_distiller(direction=1, name="Population") +
labs(title="Population of 48 states", caption="Source: US Census")
## Downloading shape files directly into R
## Downloading Texas
# If you don't have tigris installed yet, uncomment the line below and run
#install.packages("tigris")
library(tigris)
# set sf option
options(tigris_class = "sf")
tx <- counties("TX", cb=T)
#If cb is set to TRUE, download a generalized (1:500k) counties file. Defaults to FALSE (the most detailed TIGER file).
# tx <- readRDS("backup_data/tx.rds")
View(tx)
## When we imported the file locally
fifty_location <- "data/cb_2017_us_state_20m/cb_2017_us_state_20m.shp"
fifty_states <- st_read(fifty_location)
View(fifty_states)
## Mapping Texas
ggplot(tx) +
geom_sf() +
theme_void() +
theme(panel.grid.major = element_line(colour = 'transparent')) +
labs(title="Texas counties")
## Downloading Census data into
## Load the censusapi library
# Add key to .Renviron
Sys.setenv(CENSUS_KEY="YOURKEYHERE")
# Reload .Renviron
readRenviron("~/.Renviron")
# Check to see that the expected key is output in your R console
Sys.getenv("CENSUS_KEY")
# If you don't have censusapi installed yet, uncomment the line below and run
#install.packages("censusapi")
library(censusapi)
## Look up Census tables
apis <- listCensusApis()
View(apis)
## Downloading Census data
## Downloading median income
tx_income <- getCensus(name = "acs/acs5", vintage = 2016,
vars = c("NAME", "B19013_001E", "B19013_001M"),
region = "county:*", regionin = "state:48")
# tx_income <- readRDS("backup_data/tx_income.rds")
head(tx_income)
## Join and map
# Can't join by NAME because tx_income data frame has "County, Texas" at the end
# We could gsub out the string but we'll join on where there's already a consistent variable, even though the names don't line up
tx4ever <- left_join(tx, tx_income, by=c("COUNTYFP"="county"))
ggplot(tx4ever) +
geom_sf(aes(fill=B19013_001E), color="white") +
theme_void() +
theme(panel.grid.major = element_line(colour = 'transparent')) +
scale_fill_distiller(palette="Oranges", direction=1, name="Median income") +
labs(title="2016 Median income in Texas counties", caption="Source: US Census/ACS5 2016")
## Download Census data and shapefiles together
## Load up tidycensus
# if you don't have tidycensus installed yet, uncomment and run the line below
#install.packages("tidycensus")
library(tidycensus)
# Pass it the census key you set up before
census_api_key("YOUR API KEY GOES HERE")
## Getting unmployment figures
jobs <- c(labor_force = "B23025_005E",
unemployed = "B23025_002E")
jersey <- get_acs(geography="tract", year=2016,
variables= jobs, county = "Hudson",
state="NJ", geometry=T)
# jersey <- readRDS("backup_data/jersey.rds")
head(jersey)
## Transforming and mapping the data
library(tidyr)
jersey %>%
mutate(variable=case_when(
variable=="B23025_005" ~ "Unemployed",
variable=="B23025_002" ~ "Workforce")) %>%
select(-moe) %>%
spread(variable, estimate) %>%
mutate(percent_unemployed=round(Unemployed/Workforce*100,2)) %>%
ggplot(aes(fill=percent_unemployed)) +
geom_sf(color="white") +
theme_void() +
theme(panel.grid.major = element_line(colour = 'transparent')) +
scale_fill_distiller(palette="Reds", direction=1, name="Estimate") +
labs(title="Percent unemployed in Jersey City", caption="Source: US Census/ACS5 2016") +
NULL
## Faceting maps (Small multiples)
racevars <- c(White = "B02001_002",
Black = "B02001_003",
Asian = "B02001_005",
Hispanic = "B03003_003")
harris <- get_acs(geography = "tract", variables = racevars,
state = "TX", county = "Harris County", geometry = TRUE,
summary_var = "B02001_001", year=2017)
# harris <- readRDS("backup_data/harris.rds")
## Faceting maps (Small multiples)
head(harris)
## Transforming and mapping the data
library(viridis)
harris %>%
mutate(pct = 100 * (estimate / summary_est)) %>%
ggplot(aes(fill = pct, color = pct)) +
facet_wrap(~variable) +
geom_sf() +
coord_sf(crs = 26915) +
scale_fill_viridis(direction=-1) +
scale_color_viridis(direction=-1) +
theme_void() +
theme(panel.grid.major = element_line(colour = 'transparent')) +
labs(title="Racial geography of Harris County, Texas", caption="Source: US Census 2010")
## Welcome back Alaska and Hawaii
county_pov <- get_acs(geography = "county",
variables = "B17001_002",
summary_var = "B17001_001",
geometry = TRUE,
shift_geo = TRUE) %>%
mutate(pctpov = 100 * (estimate/summary_est))
# county_pov <- readRDS("backup_data/county_pov.rds")
ggplot(county_pov) +
geom_sf(aes(fill = pctpov), color=NA) +
coord_sf(datum=NA) +
labs(title = "Percent of population in poverty by county",
subtitle = "Alaska and Hawaii are shifted and not to scale",
caption = "Source: ACS 5-year, 2016",
fill = "% in poverty") +
scale_fill_viridis(direction=-1)
## leaflet map
library(leaflet)
tx %>%
leaflet() %>%
addTiles() %>%
addPolygons(popup=~NAME)
# Creating a color palette based on the number range in the B19013_001E column
pal <- colorNumeric("Reds", domain=tx4ever$B19013_001E)
# Setting up the pop up text
popup_sb <- paste0("Median income in ", tx4ever$NAME.x, "\n$", as.character(tx4ever$B19013_001E))
# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.807691, 31.45037, zoom = 6) %>%
addPolygons(data = tx4ever ,
fillColor = ~pal(tx4ever$B19013_001E),
fillOpacity = 0.7,
weight = 0.2,
smoothFactor = 0.2,
popup = ~popup_sb) %>%
addLegend(pal = pal,
values = tx4ever$B19013_001E,
position = "bottomright",
title = "Median income")