|
| 1 | +#!/usr/bin/env Rscript |
| 2 | + |
| 3 | +## --------------------------- |
| 4 | +## |
| 5 | +## Script name: bed2cBioPortalGenePanel.R |
| 6 | +## |
| 7 | +## Converts a bed file to a gene panel for cBioPortal. |
| 8 | +## |
| 9 | +## Author: Niklas Reimer, M.Sc. |
| 10 | +## |
| 11 | +## Date Created: 2020-10-30 |
| 12 | + |
| 13 | +## |
| 14 | +## --------------------------- |
| 15 | + |
| 16 | +require(tidyr) |
| 17 | +require(biomaRt) |
| 18 | + |
| 19 | +args = commandArgs(trailingOnly=TRUE) |
| 20 | + |
| 21 | +# read genes provided by cBioPortal |
| 22 | +cbiogenes <- read.table("cbioportal_genes.txt", header = TRUE, sep = "\t", quote="\"") |
| 23 | +# read provided metadata for panel |
| 24 | +meta <- read.table(paste0(args[1], "/meta.csv"), header = TRUE, sep=";") |
| 25 | +# read provided bed file |
| 26 | +bed <- read.table(paste0(args[1], "/target.bed")) |
| 27 | + |
| 28 | +# remove chr prefix if present |
| 29 | +regions <- paste(gsub("chr", "", bed$V1), paste(bed$V2, bed$V3, sep="-"), sep=":") |
| 30 | + |
| 31 | +# setup ensembl |
| 32 | +m <- useMart('ensembl', dataset='hsapiens_gene_ensembl') |
| 33 | +df <- getBM(mart=m, attributes=c('entrezgene_id'), filters=c('chromosomal_region'), values=list(c(regions))) |
| 34 | + |
| 35 | +# remove duplicate ids |
| 36 | +entrez <- df$entrezgene_id[!duplicated(df$entrezgene_id)] |
| 37 | + |
| 38 | +#get genes using entrez id from cbioportal dataset |
| 39 | +genes <- cbiogenes$HUGO_GENE_SYMBOL[!is.na(match(cbiogenes$ENTREZ_GENE_ID, entrez))] |
| 40 | + |
| 41 | +#sort genes |
| 42 | +genes <- sort(genes) |
| 43 | + |
| 44 | +# write to file |
| 45 | +sink(paste0(args[1], "/panel.txt")) |
| 46 | +cat(paste("stable_id", meta$stable_id, sep = ": ")) |
| 47 | +cat("\n") |
| 48 | +cat(paste("description", meta$description, sep = ": ")) |
| 49 | +cat("\n") |
| 50 | + |
| 51 | +cat("gene_list: ") |
| 52 | +cat(writeLines(as.character(genes), sep="\t")) |
| 53 | +sink() |
0 commit comments