-
-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathquantmerge_gene.R
61 lines (50 loc) · 1.89 KB
/
quantmerge_gene.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
library(tximport)
library(readr)
library(RCurl)
# 2019/03/16 Y.Yasumizu
# usage : Rscript quantmerge_gene.R
# automatically detect salmon output (*/salmon_output_${name}/quant.sf)
# metadataがないときはダウンロードする。
meta.m <- 'gencode.vM19.metadata.MGI.gz'
meta.base.m <- 'ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19'
meta.h <- 'gencode.v29.metadata.HGNC.gz'
meta.base.h <- 'ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_29'
# quant.sfの検索
files <- list.files('.', recursive=T, pattern='quant.sf')
# sample nameの回収
names <- c()
for (f in files){
n <- (unlist(strsplit(files, 'salmon_output_')[1])[2])
n <- (unlist(strsplit(n, '/quant.sf')[1])[1])
# print(n)
names <- c(names, n)
}
print(names)
names(files) <- names
# 生物種の判定
sample_tx <- read.table(files[1], nrows = 2)[2,'V1']
if (length(grep("^ENSMUST.*", sample_tx)) == 1) {
print('MOUSE')
meta <- meta.m
meta.base <- meta.base.m
} else if (length(grep("^ENST.*", sample_tx)) == 1) {
print('HUMAN')
meta <- meta.h
meta.base <- meta.base.h
} else {
print('unknown transcript id. use mouse or human transcripts annotation by GENCODE.')
}
# metadataの取得。
f.meta <- list.files('.', recursive=T, pattern=meta)
if (length(f.meta) == 0){
print('Downloading metadata ... ')
print(paste0(meta.base,'/',meta))
download.file(paste0(meta.base,'/',meta), destfile = meta)
tx2knownGene <- read_delim(meta, '\t', col_names = c('TXNAME', 'GENEID'))
} else {
tx2knownGene <- read_delim(f.meta, '\t', col_names = c('TXNAME', 'GENEID'))
}
# txi.salmon <- tximport(files, type = "salmon", tx2gene = tx2knownGene)
txi.salmon <- tximport(files, type = "salmon", tx2gene = tx2knownGene, countsFromAbundance="scaledTPM")
# head(txi.salmon$counts)
write.table(txi.salmon$counts, file="scaledTPM.tsv",sep="\t",col.names=NA,row.names=T,quote=F)