Skip to content

Commit e268d2b

Browse files
committed
Add script for computation of wes correction factors
1 parent ece209a commit e268d2b

File tree

2 files changed

+58
-0
lines changed

2 files changed

+58
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env Rscript
2+
3+
## ---------------------------
4+
##
5+
## Script name: computeCaptureKitCorrectionFactor.R
6+
##
7+
## Purpose of script: Compure correction factors for triplets between WGS and WES pipelines.
8+
## For details you can have a look at the documentation of the R package YAPSA:
9+
## https://www.bioconductor.org/packages/release/bioc/vignettes/YAPSA/inst/doc/vignette_exomes.html#introduction
10+
##
11+
## Author: Niklas Reimer, M.Sc.
12+
##
13+
## Date Created: 2020-11-04
14+
15+
##
16+
## ---------------------------
17+
18+
require(bedr)
19+
require(plyr)
20+
require(stringr)
21+
22+
args = commandArgs(trailingOnly=TRUE)
23+
24+
# read bed file
25+
bed <- read.table(args[1])
26+
bed <- transform(bed, V1 = as.character(V1), V2 = as.numeric(V2), V3 = as.numeric(V3))
27+
bed <- bedr.sort.region(bed)
28+
29+
# extract regions from fasta
30+
fasta <- get.fasta(bed, fasta = args[2])
31+
32+
# load targetCapture_cor_factors
33+
load(args[3])
34+
35+
# read triplets with every possible shift
36+
tripletlist1 <- strsplit(as.character(fasta$sequence), split = "(?<=.{3})", perl = TRUE)
37+
tripletlist2 <- strsplit(as.character(substring(fasta$sequence, 2)), split = "(?<=.{3})", perl = TRUE)
38+
tripletlist3 <- strsplit(as.character(substring(fasta$sequence, 3)), split = "(?<=.{3})", perl = TRUE)
39+
triplets1 <- unlist(tripletlist1)
40+
triplets2 <- unlist(tripletlist2)
41+
triplets3 <- unlist(tripletlist3)
42+
triplets <- c(triplets1, triplets2, triplets3)
43+
44+
# filter by sequence
45+
counts <- count(toupper(triplets))
46+
# remove sequences with length lower than 3
47+
countsFiltered <- counts[str_length(counts$x) == 3,]
48+
49+
# remove ambigious triplets
50+
countsFiltered <- countsFiltered[str_detect(countsFiltered$x, "N", negate = TRUE),]
51+
52+
# calculate correction factors
53+
abs_cor <- targetCapture_cor_factors["hs37d5"]$hs37d5$abs_cor / countsFiltered$freq
54+
rel_cor <- abs_cor / (sum(targetCapture_cor_factors["hs37d5"]$hs37d5$abs_cor) / sum(countsFiltered$freq))
55+
56+
# save computated data
57+
targetCapture_cor_factors[[args[4]]] <- list("rel_cor" = rel_cor, "abs_cor" = abs_cor)
58+
save(targetCapture_cor_factors, file = file.path(args[5]))
Binary file not shown.

0 commit comments

Comments
 (0)