-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e5da7e5
Showing
25 changed files
with
332 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
venv/* | ||
.pytest_cache/* | ||
Pipfile.lock | ||
.idea/ |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[[source]] | ||
name = "pypi" | ||
url = "https://pypi.org/simple" | ||
verify_ssl = true | ||
|
||
[dev-packages] | ||
|
||
[packages] | ||
pysam = "*" | ||
pytest = "*" | ||
|
||
[requires] | ||
python_version = "3.6" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
TMB (tumor mutation burden) | ||
|
||
### Requirements | ||
* Python >=3.6 | ||
* pip3 | ||
* pipenv (it is the recommended way to rule packages and environments) | ||
* pysam | ||
|
||
Example of installation packages and Python 3.6 in Ubuntu: | ||
`sudo apt install python3.6` | ||
`alias python='usr/bin/python3.6` | ||
`sudo apt install python3-pip` | ||
`pip3 install pipenv` | ||
Run `pipenv install pysam` in project directory |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
1 87 89 gene | ||
1 88 92 gene2 | ||
1 89 94 gene2 | ||
1 90 92 gene3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
1 87 89 | ||
1 88 92 | ||
1 89 94 | ||
1 90 92 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
1 87 89 | ||
1 88 | ||
1 89 94 | ||
1 90 92 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
chr5 665279 665338 GENE |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
from tmb.config import Config | ||
from tmb.bedreader import BedReader | ||
from tmb.distribution import calculate_tmb | ||
from multiprocessing import Pool | ||
|
||
|
||
def main(): | ||
config = parse_config() | ||
exons = BedReader(config.bed).exonList | ||
|
||
pool = Pool(processes=Config.th) | ||
results = [pool.apply_async(calculate_tmb, args=(config.tumor, config.normal, exon)) for exon in exons] | ||
|
||
print_header() | ||
for output in results: | ||
tmb = output.get() | ||
print(tmb) | ||
|
||
|
||
def parse_config(): | ||
parser = argparse.ArgumentParser() | ||
required = parser.add_argument_group("required") | ||
optional = parser.add_argument_group("optional") | ||
optional.add_argument('--freq', type=float, | ||
help="Minimum difference of frequencies for base to consider site as somatic. " | ||
"Default: 5%% (0.05)") | ||
optional.add_argument('--mapq', type=float, help="Minimum read mapping quality. Default: 10.0") | ||
optional.add_argument('--baseq', type=int, help="Minimum base quiality. Default: 25") | ||
optional.add_argument('--mincov', type=int, | ||
help="Minimum coverage of base for nucleotide to be considered as somatic. " | ||
"Default: 2") | ||
optional.add_argument('--th', type=int, help="Number of threads for multiprocessing mode. Default: 1") | ||
required.add_argument('--normal', required=True, type=str, help="Path to normal SAM/BAM/CRAM file.") | ||
required.add_argument('--tumor', required=True, type=str, help="Path to tumor SAM/BAM/CRAM file.") | ||
required.add_argument('--bed', required=True, type=str, help="Path to BED file.") | ||
|
||
args = parser.parse_args() | ||
return Config(args) | ||
|
||
|
||
def print_header(): | ||
print("\t".join(['Chr', 'Start', 'End', 'Gene', 'SomaticSites', 'TMB'])) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
from tmb.bamreader import BamReader | ||
from tmb.bedreader import BedReader | ||
|
||
import pytest | ||
|
||
|
||
def test_bam_test_read(): | ||
path = '../data/bed_bam_test/test.bam' | ||
bam = BamReader(path).file | ||
bam_iter = bam.fetch('1', 28234090, 28234093) | ||
list1 = [x for x in bam_iter] | ||
assert len(list1) == 4 | ||
|
||
|
||
def test_bed_test_read_correct(): | ||
path = '../data/bed_bam_test/test1.bed' | ||
exons = BedReader(path).exonList | ||
assert len(exons) == 4 | ||
|
||
path = '../data/bed_bam_test/test2.bed' | ||
exons = BedReader(path).exonList | ||
assert len(exons) == 4 | ||
|
||
|
||
def test_bed_test_read_incorrect(): | ||
path = '../data/bed_bam_test/test3.bed' | ||
with pytest.raises(IndexError): | ||
BedReader(path) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from numpy.testing import assert_almost_equal | ||
|
||
from tmb.bedreader import BedReader | ||
from tmb.distribution import calculate_tmb | ||
|
||
import pytest | ||
|
||
|
||
def test_distribution(): | ||
normal = '../data/chr5_665281/normal_chr5_665281.bam' | ||
tumor = '../data/chr5_665281/tumour_chr5_665281.bam' | ||
exons = BedReader('../data/chr5_665281/dist.bed').exonList | ||
|
||
for exon in exons: | ||
tmb = calculate_tmb(tumor, normal, exon) | ||
assert tmb.somatic_sites == [665281, 665308, 665336] | ||
assert_almost_equal(tmb.tmb, 50847.46) | ||
|
||
|
||
def test_panel_indexes(): | ||
tumor = '../data/panel_az_600/Dev_731_GTL_16_5_Pool1-ready.bam' | ||
normal = '../data/panel_az_600/Dev_731_NA12878a_Pool1-ready.bam' | ||
exons = BedReader('../data/panel_az_600/panel_az600_chr7_MET.bed').exonList | ||
|
||
for exon in exons: | ||
tmb = calculate_tmb(tumor, normal, exon) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import pysam | ||
|
||
|
||
class BamReader: | ||
def __init__(self, path): | ||
extension = path.strip().split('.')[-1] | ||
if extension == 'bam': | ||
self.file = self.read_bam(path) | ||
if extension == 'cram': | ||
self.file = self.read_cram(path) | ||
if extension == 'cram': | ||
self.file = self.read_sam(path) | ||
|
||
def read_bam(self, path): | ||
file = pysam.AlignmentFile(path, "rb") | ||
return file | ||
|
||
def read_sam(self, path): | ||
file = pysam.AlignmentFile(path, "r") | ||
return file | ||
|
||
def read_cram(self, path): | ||
file = pysam.AlignmentFile(path, "rc") | ||
return file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from tmb.exome import Exome | ||
|
||
|
||
class BedReader: | ||
|
||
def __init__(self, path): | ||
extension = path.split('.')[-1] | ||
if extension == 'bed': | ||
self.exonList = self.read_bed(path) | ||
|
||
def read_bed(self, path): | ||
exonlist = [] | ||
|
||
file = open(path, "r") | ||
for x in file: | ||
line = x.split() | ||
if len(line) == 4: | ||
exome = Exome(str(line[0]), int(line[1]), int(line[2]), str(line[3])) | ||
else: | ||
exome = Exome(str(line[0]), int(line[1]), int(line[2]), '.') | ||
exonlist.append(exome) | ||
return exonlist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Configuration parameters for thresholds of quality etc. | ||
|
||
|
||
class Config: | ||
freq = 0.05 | ||
mapq = 10.0 | ||
baseq = 25 | ||
mincov = 2 | ||
th = 1 | ||
|
||
@staticmethod | ||
def set_config(args): | ||
if args.freq: | ||
Config.freq = args.freq | ||
if args.mapq: | ||
Config.mapq = args.mapq | ||
if args.baseq: | ||
Config.baseq = args.baseq | ||
if args.mincov: | ||
Config.mincov = args.mincov | ||
if args.th: | ||
Config.th = args.th | ||
|
||
def __init__(self, args): | ||
self.normal = args.normal | ||
self.tumor = args.tumor | ||
self.bed = args.bed | ||
self.set_config(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from tmb.tmbresult import TMBResult | ||
from tmb.bamreader import BamReader | ||
from tmb.config import Config | ||
|
||
|
||
def calculate_tmb(tumor_path, normal_path, exon): | ||
tumor_distribution, normal_distribution = collect_distributions(tumor_path, normal_path, exon) | ||
tmb, somatic_sites = hypotesis_test(tumor_distribution, normal_distribution, exon) | ||
tmb = TMBResult(exon, tmb, somatic_sites) | ||
return tmb | ||
|
||
|
||
def collect_distributions(tumor_path, normal_path, exon): | ||
tumor = BamReader(tumor_path).file | ||
normal = BamReader(normal_path).file | ||
|
||
tumor_distribution = fill_maps(tumor, exon) | ||
# print(tumor_distribution) | ||
normal_distribution = fill_maps(normal, exon) | ||
# print(normal_distribution) | ||
return tumor_distribution, normal_distribution | ||
|
||
|
||
def fill_maps(bam, exon): | ||
positions_to_acgt = {} | ||
for i in range(exon.start, exon.end): | ||
positions_to_acgt[i] = {"A": 0, "C": 0, "G": 0, "T": 0, "N": 0, "DEL": 0} | ||
|
||
# Pile can get reads that will cover this position in column-like form | ||
pile = bam.pileup(exon.chr, exon.start, exon.end) | ||
for pileupcolumn in pile: | ||
position = pileupcolumn.pos | ||
if position < exon.start or position >= exon.end: | ||
continue | ||
|
||
acgt_to_counts = {"A": 0, "C": 0, "G": 0, "T": 0, "N": 0, "DEL": 0} | ||
for pileupread in pileupcolumn.pileups: | ||
if bad_read(pileupread): | ||
continue | ||
if pileupread.query_position is None: | ||
base = 'DEL' | ||
else: | ||
base_quality = pileupread.alignment.query_qualities[pileupread.query_position] | ||
if base_quality < Config.baseq: | ||
break | ||
base = pileupread.alignment.query_sequence[pileupread.query_position] | ||
acgt_to_counts[base] += 1 | ||
positions_to_acgt[position] = acgt_to_counts | ||
return positions_to_acgt | ||
|
||
|
||
def hypotesis_test(tumor, normal, exon): | ||
exone_len = exon.end - exon.start | ||
somatic_sites = [] | ||
for position in tumor: | ||
tumor_counts = list(tumor[position].values()) | ||
normal_counts = list(normal[position].values()) | ||
|
||
tumor_total_coverage = sum(tumor_counts) | ||
normal_total_coverage = sum(normal_counts) | ||
|
||
if tumor_total_coverage == 0 or normal_total_coverage == 0: | ||
continue | ||
|
||
tumor_percentage = list([x / tumor_total_coverage for x in tumor_counts]) | ||
normal_percentage = list([x / normal_total_coverage for x in normal_counts]) | ||
|
||
for i in range(len(tumor_counts)): | ||
a = abs(tumor_percentage[i] - normal_percentage[i]) | ||
if a > Config.freq and tumor_counts[i] > Config.mincov and normal_counts[i] > Config.mincov: | ||
# Sam is 0-based, extend position: | ||
somatic_sites.append(position + 1) | ||
break | ||
|
||
tmb = round((len(somatic_sites) / exone_len) * 1000000, 2) | ||
return tmb, somatic_sites | ||
|
||
|
||
# Read doesn't fit criteria for quality | ||
def bad_read(read): | ||
if read.alignment.mapping_quality < Config.mapq: | ||
return True | ||
|
||
return False | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
class Exome: | ||
def __init__(self, chr, start, end, gene): | ||
self.chr = chr | ||
self.start = start | ||
self.end = end | ||
self.gene = gene | ||
self.length = end - start | ||
|
||
def __str__(self): | ||
exon_description = ', '.join(['{key}={value}'.format(key=key, value=self.__dict__.get(key)) | ||
for key in self.__dict__]) | ||
return '\n' + exon_description | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
class TMBResult: | ||
def __init__(self, exon, tmb, somatic_sites): | ||
self.exon = exon | ||
self.tmb = tmb | ||
self.somatic_sites = somatic_sites | ||
|
||
def __str__(self): | ||
return "\t".join([str(self.exon.chr), str(self.exon.start), str(self.exon.end), self.exon.gene, | ||
str(len(self.somatic_sites)), str(self.somatic_sites), str(self.tmb)]) |