-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_filtering.R
64 lines (47 loc) · 2.65 KB
/
data_filtering.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Loading and filtering data =================================
library(dplyr)
library(ESDist)
# The ESDist package includes two data files; ot_dat_raw, which contains all
# effect sizes from the included meta-analyses, and ot_dat, which is the
# filtered version of that data. The code below demonstrates how the data can
# be filtered using the ci_to_se() helper function.
#load data
dat <- ot_dat_raw
# We create a new dataset called ot_dat, which we will filter.
ot_dat <- dat
# We will convert some effect sizes to Hedges' g, so we create a new column
# with effect sizes as they were reported.
ot_dat$raw_es <- ot_dat$yi
ot_dat$raw_es[ot_dat$favours_oxytocin == "negative"] <- -1 * ot_dat$raw_es[ot_dat$favours_oxytocin == "negative"]
# We also give every single effect size an ID, so we know which ones are
# eventually filtered out.
ot_dat$ID <- seq.int(nrow(ot_dat))
# We calculate Standard error for each effect size from the 95% CI.
ot_dat$sei[is.na(ot_dat$sei)] <- ci_to_se(ot_dat$lower[is.na(ot_dat$sei)], ot_dat$upper[is.na(ot_dat$sei)])
ot_dat$raw_se <- ot_dat$sei
# Next, we convert all effect sizes we can to Hedges' g, based on group sizes.
# We use the simplified unbias factor given by Hedges (1981), based on the
# degrees of freedom, which are calculated differently based on study design.
# First we calculate the df for each effect size and add them to a df column.
# Next, we calculate all effect sizes based on Hedges' factor and round
# each effect size to two decimal places.
ot_dat$df[ot_dat$design == 'Between'] <- ot_dat$n1[ot_dat$design == 'Between'] + ot_dat$n2[ot_dat$design == 'Between'] - 2
ot_dat$df[ot_dat$design == 'Within'] <- ot_dat$n_total[ot_dat$design == 'Within'] - 1
ot_dat$h_factor <- (1 - (3/((4*ot_dat$df)-1)))
ot_dat$yi[ot_dat$es_type != "Hedges' g"] <- ot_dat$raw_es[ot_dat$es_type != "Hedges' g"] * ot_dat$h_factor[ot_dat$es_type != "Hedges' g"]
ot_dat$sei[ot_dat$es_type != "Hedges' g"] <- sqrt((ot_dat$raw_se[ot_dat$es_type != "Hedges' g"]^2) * (ot_dat$h_factor[ot_dat$es_type != "Hedges' g"]^2))
ot_dat$yi <- round(ot_dat$yi, digits = 3)
ot_dat$sei <- round(ot_dat$sei, digits = 3)
# Filter out the effect sizes with lowest SE per group per study (some studies
# have multiple groups), and the effects with the lowest SE. In case some
# effects from the same study have the same SE, we only use the effect size
# that is closest to zero in absolute terms.
ot_dat <- ot_dat %>%
group_by(study_doi, group) %>%
filter(!is.na(yi)) %>%
filter(sei == min(sei)) %>%
filter(abs(yi) == min(abs(yi))) %>%
ungroup()
# The filtering process messes up the data type a bit, so let's turn it back
# into a dataframe
ot_dat <- as.data.frame(ot_dat)