-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStructural_Equation_Model_analysis.R
180 lines (172 loc) · 4.9 KB
/
Structural_Equation_Model_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
##Setting our working directory
##setwd("C:\\..")
##Loading our data
df<-read.csv2("..", header=T)
#Part A - Inspection
##Let's take a look at the first rows of our dataframe
head(df)
##Skewness and kurtosis
library("pastecs")# For descriptive statistics
citation("pastecs")
library("lattice")# For density plots
citation("lattice")
##Check the continuous variables
##First from descriptive statistics
stat.desc(df[,2:9], basic = FALSE, desc = FALSE, norm = TRUE)
## Secondly using visualization
densityplot(df$educate)
densityplot(df$income)
densityplot(df$involve)
densityplot(df$school)
densityplot(df$teach)
densityplot(df$accept)
densityplot(df$achieve)
densityplot(df$CITO)
## Although, at first sight, plots look ok, descriptive statistics are clear: only educate is normally distributed
## Look for univariate outliers
standardized <- scale(df[,c(2:9)]) # Standardize variables
standardized<-as.data.frame(standardized)
outliers <- colSums(abs(standardized)>3, na.rm = T) # Count number of cases above/below 3
## Let's see the number of outliers
outliers # We have 9 outliers in total
## Missing values
## Let's see the total number for each variable
summary(df)
## Now let's see the patterns of missingness
library(VIM) # For the patterns of missingness
citation("VIM")
missing <- aggr(df)
summary(missing)
## For checking MCAR
library("BaylorEdPsych")# For MCAR test
library("mvnmle")# For MCAR test
citation("BaylorEdPsych")
citation("mvnmle")
MCAR <- LittleMCAR(df)
MCAR$chi.square
MCAR$df
MCAR$p.value # Our data are not MCAR!
## Test multivariate normality and outliers
library(MVN)
citation("MVN")
## Create subset with complete cases...required for MVN
df_comp <- df[complete.cases(df),]
names(df_comp)
MVdf <- mvn(df_comp[, c(2:9)], mvnTest = "royston", multivariatePlot = "qq",multivariateOutlierMethod = "adj", showOutliers = T, showNewData = T)
MVdf
## Multivariate NON-normal; no outliers; Also, we see that for univariate distributions, only educate variable satisfies the requirements for being normal distributed.
## Now let's check whether the correlations differ depending on listwise or pairwise deletion
library("psych")
citation("psych")
corr.test(df[2:9], df[2:9], use = "complete")
corr.test(df[2:9], df[2:9])# Default pairwise deletion
## No, they are quite similar
#PART B - Let's see the model now
## First the measurement model
library(lavaan)
citation("lavaan")
mdl1 <- ' SQ=~ 1*school + teach + accept
HE =~ 1*educate + income + involve
#variances
educate~~educate
income~~income
involve~~involve
teach~~teach
accept~~ accept
school~~ school
SQ~~SQ
HE~~HE
#Intercepts
educate~1
income~1
involve~1
teach~1
accept~1
school~1
#covariance
HE~~SQ
'
fit1<-lavaan(mdl1,data=df, missing = "fiml.x", estimator='MLR',test='Yuan-Bentler',se= "robust.huber.white")
summary(fit1, fit.measures=T, standardized=T)
fitMeasures(fit1)
## SEM model
mdl2<-' #regression
achieve~SQ+HE
CITO~achieve
#latent variables
SQ=~ 1*school + teach + accept
HE =~ 1*educate + income + involve
#variances
educate~~educate
income~~income
involve~~involve
teach~~teach
accept~~ accept
school~~ school
SQ~~SQ
HE~~HE
CITO~~CITO
achieve~~achieve
#covariance
HE~~SQ
#intercepts
educate~1
income~1
involve~1
teach~1
accept~1
school~1
achieve~1
CITO~1
'
fit2<-lavaan(mdl2,data=df, missing = "fiml.x", estimator='MLR',test='Yuan-Bentler',se= "robust.huber.white")
summary(fit2,fit.measures=T, standardized=T, rsquare=T)
## Let's look for misspecifications
summary(fit2, fit.measures=T, modindices = T)
mod_ind <- modificationindices(fit2)
## Which are the twenty relationships with the higher mi?
head(mod_ind[order(mod_ind$mi, decreasing=TRUE), ], 20)
# Let's visualize our model
library(semPlot)# For semPaths plot
citation("semPlot")
semPaths(fit2, layout = "tree", rotation = 2,
intercepts = F, residuals = F, nCharNodes = 10,
edge.color = "black",sizeMan=10, fade=F, esize=2,asize=2, edge.label.cex=0.5, normalize=T)
## The improved SEM model with CITO~SQ and CITO~HE
mdl3<-' #regression
achieve~SQ+HE
CITO~achieve
CITO~SQ
CITO~HE
#latent variables
SQ=~ 1*school + teach + accept
HE =~ 1*educate + income + involve
#variances
educate~~educate
income~~income
involve~~involve
teach~~teach
accept~~ accept
school~~ school
SQ~~SQ
HE~~HE
CITO~~CITO
achieve~~achieve
#covariance
HE~~SQ
#intercepts
educate~1
income~1
involve~1
teach~1
accept~1
school~1
achieve~1
CITO~1
'
fit3<-lavaan(mdl3,data=df, missing = "fiml.x", estimator='MLR',test='Yuan-Bentler',se= "robust.huber.white" )
summary(fit3,fit.measures=T, standardized=T,rsquare=T)
## Again visualization of our SEM model
semPaths(fit3, what= "col", "std", layout = "tree", rotation = 2,
intercepts = F, residuals = F, curve = 2, nCharNodes = 0,
edge.label.cex = 1, edge.color = "black", sizeMan = 10, nDigits = 3)