Skip to content

Commit e5f6e96

Browse files
committed
Add R-Script for gene panel
1 parent a11b942 commit e5f6e96

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

genepanels/genepanel.R

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env Rscript
2+
3+
## ---------------------------
4+
##
5+
## Script name: bed2cBioPortalGenePanel.R
6+
##
7+
## Converts a bed file to a gene panel for cBioPortal.
8+
##
9+
## Author: Niklas Reimer, M.Sc.
10+
##
11+
## Date Created: 2020-10-30
12+
13+
##
14+
## ---------------------------
15+
16+
require(tidyr)
17+
require(biomaRt)
18+
19+
args = commandArgs(trailingOnly=TRUE)
20+
21+
# read genes provided by cBioPortal
22+
cbiogenes <- read.table("cbioportal_genes.txt", header = TRUE, sep = "\t", quote="\"")
23+
# read provided metadata for panel
24+
meta <- read.table(paste0(args[1], "/meta.csv"), header = TRUE, sep=";")
25+
# read provided bed file
26+
bed <- read.table(paste0(args[1], "/target.bed"))
27+
28+
# remove chr prefix if present
29+
regions <- paste(gsub("chr", "", bed$V1), paste(bed$V2, bed$V3, sep="-"), sep=":")
30+
31+
# setup ensembl
32+
m <- useMart('ensembl', dataset='hsapiens_gene_ensembl')
33+
df <- getBM(mart=m, attributes=c('entrezgene_id'), filters=c('chromosomal_region'), values=list(c(regions)))
34+
35+
# remove duplicate ids
36+
entrez <- df$entrezgene_id[!duplicated(df$entrezgene_id)]
37+
38+
#get genes using entrez id from cbioportal dataset
39+
genes <- cbiogenes$HUGO_GENE_SYMBOL[!is.na(match(cbiogenes$ENTREZ_GENE_ID, entrez))]
40+
41+
#sort genes
42+
genes <- sort(genes)
43+
44+
# write to file
45+
sink(paste0(args[1], "/panel.txt"))
46+
cat(paste("stable_id", meta$stable_id, sep = ": "))
47+
cat("\n")
48+
cat(paste("description", meta$description, sep = ": "))
49+
cat("\n")
50+
51+
cat("gene_list: ")
52+
cat(writeLines(as.character(genes), sep="\t"))
53+
sink()

0 commit comments

Comments
 (0)