-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathinvestigate_pos_roh_ests.R
84 lines (64 loc) · 2.89 KB
/
investigate_pos_roh_ests.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
library(tidyverse)
library(snpStats)
# fitness data
load("data/survival_mods_data.RData")
# gwas
gwas_full <- readRDS("output/gwas_res_oar_roh_fac_full.rds")
# plink name
sheep_plink_name <- "data/sheep_geno_imputed_oar_filt"
# read merged plink data
sheep_bed <- paste0(sheep_plink_name, ".bed")
sheep_bim <- paste0(sheep_plink_name, ".bim")
sheep_fam <- paste0(sheep_plink_name, ".fam")
full_sample <- read.plink(sheep_bed, sheep_bim, sheep_fam)
snps_map <- full_sample$map
pos_ests <- gwas_full %>%
filter(p.value < 0.05/(39149*2)) %>%
filter(estimate >0)
neg_ests <- gwas_full %>%
filter(p.value < 0.05/(39149*2)) %>%
filter(estimate < 0)
table(as(full_sample$genotypes[, "oar3_OAR19_36946184"], "numeric"))
birth_dates <- fitness_data %>%
group_by(id) %>%
mutate(birth_year = as.numeric(as.character(birth_year))) %>%
summarise(birth_year = mean(birth_year))
get_freqs_pos <- function(year) {
ids <- birth_dates %>% filter(birth_year == year) %>% .$id
ids_ind <- rownames(full_sample$genotypes) %in% ids
snp_stats <- col.summary(full_sample$genotypes[ids_ind, pos_ests$snp.name]) %>%
as_tibble(rownames = "snp") %>%
mutate(birth_year = year)
}
all_freqs_pos <- map(sort(unique(birth_dates$birth_year)), get_freqs_pos) %>%
bind_rows()
all_freqs_pos %>%
filter(birth_year > 1990) %>%
ggplot(aes(birth_year, MAF, color = snp)) +
geom_line()
# check wether there is a general trewnd
# check maf of positives
geno_sub <- as_tibble(as(full_sample$genotypes[, pos_ests$snp.name], Class = "numeric"),
rownames = "id")
map(geno_sub[2:ncol(geno_sub)], function(x) as.data.frame(rbind(table(x)))) %>% bind_rows() -> test
snp_stats_sub <- col.summary(full_sample$genotypes[, pos_ests$snp.name])
get_freqs <- function(year, snps_ind) {
ids <- birth_dates %>% filter(birth_year == year) %>% .$id
ids_ind <- rownames(full_sample$genotypes) %in% ids
snp_stats <- col.summary(full_sample$genotypes[ids_ind, snps_ind]) %>%
as_tibble(rownames = "snp") %>%
mutate(birth_year = year)
}
all_snp_stats <- col.summary(full_sample$genotypes)
snps_ind <- which(all_snp_stats$MAF < 0.15 & all_snp_stats$MAF > 0.05) %>%
sample(10)
#snp_ind <- sample(1:417373, 10)
all_freqs <- map(sort(unique(birth_dates$birth_year)), get_freqs, snps_ind ) %>%
bind_rows()
all_freqs %>%
bind_rows(all_freqs_pos, .id = "type") %>%
filter(birth_year > 1990) %>%
ggplot(aes(birth_year, MAF, color = snp, alpha = type)) +
geom_line(size = 1) +
theme(legend.position = "none") +
scale_alpha_manual(values = c(0.2, 1))