-
Notifications
You must be signed in to change notification settings - Fork 0
/
worksheet-4.R
126 lines (94 loc) · 2.47 KB
/
worksheet-4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
## Tidy Concept
trial <- read.delim(sep = ',', header = TRUE, text = "
block, drug, control, placebo
1, 0.22, 0.58, 0.31
2, 0.12, 0.98, 0.47
3, 0.42, 0.19, 0.40
")
## Pivot wide to long
library(tidyr)
tidy_trial <- pivot_longer(trial,
cols = c(drug, control, placebo),
names_to = 'treatment',
values_to = 'response')
## Pivot long to wide
survey <- read.delim(sep = ',', header = TRUE, text = "
participant, attr, val
1 , age, 24
2 , age, 57
3 , age, 13
1 , income, 30
2 , income, 60
")
tidy_survey <- pivot_wider(survey,
names_from = attr,
values_from = val)
tidy_survey <- pivot_wider(survey,
names_from = attr,
values_from = val,
values_fill = 0)
## Sample Data
library(data.table)
cbp <- fread('../data/cbp15co.csv')
cbp <- fread(
'data/cbp15co.csv',
colClasses = c(
FIPSTATE='character',
FIPSCTY='character'))
acs <- fread(
'../data/ACS/sector_ACS_15_5YR_S2413.csv',
colClasses = c(FIPS = 'character'))
## dplyr Functions
library(dplyr)
cbp2 <- filter(cbp,
grepl('----', NAICS),
!grepl('------', NAICS))
library(stringr)
cbp2 <- filter(cbp,
str_detect(NAICS, '[0-9]{2}----'))
cbp3 <- mutate(cbp2,
FIPS = str_c(FIPSTATE, FIPSCTY))
cbp3 <- mutate(cbp2,
FIPS = str_c(FIPSTATE, FIPSCTY),
NAICS = str_remove(NAICS, '-+'))
cbp <- cbp %>%
filter(
str_detect(NAICS, '[0-9]{2}----')
) %>%
mutate(
FIPS = str_c(FIPSTATE, FIPSCTY),
NAICS = str_remove(NAICS, '-+')
)
cbp %>%
select(
FIPS,
NAICS,
starts_with('N')
)
## Join
sector <- fread(
'../data/ACS/sector_naics.csv',
colClasses = c(NAICS = 'character'))
cbp <- cbp %>%
inner_join(sector)
## Group By
cbp_grouped <- cbp %>%
group_by(FIPS, Sector)
## Summarize
cbp <- cbp %>%
group_by(FIPS, Sector) %>%
select(starts_with('N'), -NAICS) %>%
summarize_all(sum)
acs_cbp <- cbp %>%
inner_join(acs)
#Exercise 1
long_survey <- pivot_longer(tidy_survey,
cols = c(" age", " income"),
values_to = 'val',
names_to = 'attr',
)
#Exercise 2
cbp_23 <- fread('../data/cbp15co.csv', na.strings = '') %>%
filter(NAICS == '23----') %>%
select(starts_with('FIPS'), starts_with('AP'))
#Exercise 3