-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinal_project.Rmd
69 lines (58 loc) · 2.5 KB
/
final_project.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
---
title: "final_project"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(plyr)
library(tidyr)
library(ggplot2)
library(mapproj)
library(fiftystater)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r summary}
setwd("/Users/kevinchen/Documents/stor320")
data <- read.csv("h1data.csv", as.is=T)
```
```{r q1}
wage_data <- data[!is.na(data$PWD_WAGE_RATE),]
ddply(wage_data,~PRIMARY_EDUCATION_LEVEL,summarise,mean=mean(PWD_WAGE_RATE),sd=sd(PWD_WAGE_RATE))
level <- c("None", "High School/GED", "Associate's", "Bachelor's", "Master's", "Doctorate (PhD)", "Other Degree (JD, MD, etc.)")
ggplot(wage_data, aes(factor(PRIMARY_EDUCATION_LEVEL, levels = level), PWD_WAGE_RATE)) +
geom_violin(aes(fill = factor(PRIMARY_EDUCATION_LEVEL))) +
ggtitle("Distribution of Wages by Education Level") +
xlab("Primary Education Level") +
ylab("Prevailing Wage Rate") +
labs(fill = "Primary Education Level") +
theme(axis.text.x=element_text(angle=45, hjust=1))
ggplot(wage_data, aes(factor(PRIMARY_EDUCATION_LEVEL, levels = level), PWD_WAGE_RATE)) +
stat_summary(fun.y="mean", geom="bar", fill = "#20456b") +
ggtitle("Mean Wages by Education Level") +
xlab("Primary Education Level") +
ylab("Prevailing Wage Rate") +
theme(axis.text.x=element_text(angle=45, hjust=1))
```
```{r q2}
count <- aggregate(cbind(count = VISA_CLASS) ~ PRIMARY_WORKSITE_STATE,
data = data,
FUN = function(x){NROW(x)})
count <- count[-c(8,35,40,48),]
count
count$PRIMARY_WORKSITE_STATE <- tolower(count$PRIMARY_WORKSITE_STATE)
ggplot(count, aes(map_id = PRIMARY_WORKSITE_STATE)) +
# map points to the fifty_states shape data
geom_map(aes(fill = count), map = fifty_states) +
borders("state", colour = "white") +
scale_fill_gradient(low = "#56B1F7", high = "#132B43", guide = "colorbar") +
expand_limits(x = fifty_states$long, y = fifty_states$lat) +
coord_map() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
ggtitle("Number of H1B Visa Holders by State") +
labs(x = "", y = "", fill = "Count") +
theme(legend.position = "bottom", panel.background = element_blank())
```