forked from ctsa/svtools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bedpeToBed12
executable file
·158 lines (127 loc) · 6.15 KB
/
bedpeToBed12
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python
import sys
import getopt
import string
from optparse import OptionParser
class BEDPE (object):
def __init__(self, bedList):
self.c1 = bedList[0]
self.s1 = int(bedList[1])
self.e1 = int(bedList[2])
self.c2 = bedList[3]
self.s2 = int(bedList[4])
self.e2 = int(bedList[5])
self.name = bedList[6]
self.score = bedList[7]
self.o1 = bedList[8]
self.o2 = bedList[9]
self.type = bedList[10]
def bedpeToBlockedBed(bedpe, dist):
if (bedpe.c1 == bedpe.c2) and (abs(bedpe.e2-bedpe.s1) <= dist) and bedpe.o1 == "+" and bedpe.o2 == "-": color = "153,0,0" # deletion breakpoints are red
elif (bedpe.c1 == bedpe.c2) and (abs(bedpe.e2-bedpe.s1) <= dist) and bedpe.o1 == "-" and bedpe.o2 == "+": color = "0,102,0" # duplication breakpoints are green
elif (bedpe.c1 == bedpe.c2) and (abs(bedpe.e2-bedpe.s1) <= dist) and bedpe.o1 == "+" and bedpe.o2 == "+": color = "0,51,204" # inversion breakpoints are blue
elif (bedpe.c1 == bedpe.c2) and (abs(bedpe.e2-bedpe.s1) <= dist) and bedpe.o1 == "-" and bedpe.o2 == "-": color = "0,51,204" # inversion breakpoints are blue
elif (bedpe.c1 != bedpe.c2) or (abs(bedpe.e2-bedpe.s1) > dist): color = "204,204,204" # distant breakpoints are gray
if (bedpe.c1 == bedpe.c2) and (abs(bedpe.e2-bedpe.s1) <= dist):
print bedpe.c1 + "\t" + str(bedpe.s1) + "\t" + str(bedpe.e2) + "\t" + bedpe.type + "_" + bedpe.name + \
"\t" + str(bedpe.score) + "\t" + \
"+" + "\t" + str(bedpe.s1) + "\t" + str(bedpe.e2) + "\t" + color + "\t" + "2" + "\t" + \
str(bedpe.e1 - bedpe.s1) + "," + str(bedpe.e2 - bedpe.s2) + "\t" + \
"0," + str(bedpe.s2- bedpe.s1)
# intrachromosomals that exceed dist
elif (bedpe.c1 == bedpe.c2) and (abs(bedpe.e2-bedpe.s1) > dist):
if bedpe.o1 == "+":
print bedpe.c1 + "\t" + str(bedpe.s1) + "\t" + str(bedpe.e1+500) + "\t" + bedpe.type + "_" + bedpe.name + \
"\t" + str(bedpe.score) + "\t" + \
"+" + "\t" + str(bedpe.s1) + "\t" + str(bedpe.e1+500) + "\t" + color + "\t" + "2" + "\t" + \
str(bedpe.e1 - bedpe.s1) + "," + "1" + "\t" + \
"0," + str((bedpe.e1-bedpe.s1)+499)
if bedpe.o1 == "-":
print bedpe.c1 + "\t" + str(bedpe.s1-500) + "\t" + str(bedpe.e1) + "\t" + bedpe.type + "_" + bedpe.name + \
"\t" + str(bedpe.score) + "\t" + \
"-" + "\t" + str(bedpe.s1-500) + "\t" + str(bedpe.e1) + "\t" + color + "\t" + "2" + "\t" + \
"1" + "," + str(bedpe.e1 - bedpe.s1) + "\t" + \
"0," + str(500)
if bedpe.o2 == "+":
print bedpe.c2 + "\t" + str(bedpe.s2) + "\t" + str(bedpe.e2+500) + "\t" + bedpe.type + "_" + bedpe.name + \
"\t" + str(bedpe.score) + "\t" + \
"+" + "\t" + str(bedpe.s2) + "\t" + str(bedpe.e2+500) + "\t" + color + "\t" + "2" + "\t" + \
str(bedpe.e2 - bedpe.s2) + "," + "1" + "\t" + \
"0," + str((bedpe.e2-bedpe.s2)+499)
if bedpe.o2 == "-":
print bedpe.c2 + "\t" + str(bedpe.s2-500) + "\t" + str(bedpe.e2) + "\t" + bedpe.type + "_" + bedpe.name + \
"\t" + str(bedpe.score) + "\t" + \
"-" + "\t" + str(bedpe.s2-500) + "\t" + str(bedpe.e2) + "\t" + color + "\t" + "2" + "\t" + \
"1" + "," + str(bedpe.e2 - bedpe.s2) + "\t" + \
"0," + str(500)
# interchromosomals:
elif (bedpe.c1 != bedpe.c2):
if bedpe.o1 == "+":
print bedpe.c1 + "\t" + str(bedpe.s1) + "\t" + str(bedpe.e1+500) + "\t" + bedpe.type + "_" + bedpe.name + "\t" + str(bedpe.score) + "\t" + \
"+" + "\t" + str(bedpe.s1) + "\t" + str(bedpe.e1+500) + "\t" + color + "\t" + "2" + "\t" + \
str(bedpe.e1 - bedpe.s1) + "," + "1" + "\t" + \
"0," + str((bedpe.e1-bedpe.s1)+499)
if bedpe.o1 == "-":
print bedpe.c1 + "\t" + str(bedpe.s1-500) + "\t" + str(bedpe.e1) + "\t" + bedpe.type + "_" + bedpe.name + "\t" + str(bedpe.score) + "\t" + \
"-" + "\t" + str(bedpe.s1-500) + "\t" + str(bedpe.e1) + "\t" + color + "\t" + "2" + "\t" + \
"1" + "," + str(bedpe.e1 - bedpe.s1) + "\t" + \
"0," + str(500)
if bedpe.o2 == "+":
print bedpe.c2 + "\t" + str(bedpe.s2) + "\t" + str(bedpe.e2+500) + "\t" + bedpe.type + "_" + bedpe.name + "\t" + str(bedpe.score) + "\t" + \
"+" + "\t" + str(bedpe.s2) + "\t" + str(bedpe.e2+500) + "\t" + color + "\t" + "2" + "\t" + \
str(bedpe.e2 - bedpe.s2) + "," + "1" + "\t" + \
"0," + str((bedpe.e2-bedpe.s2)+499)
if bedpe.o2 == "-":
print bedpe.c2 + "\t" + str(bedpe.s2-500) + "\t" + str(bedpe.e2) + "\t" + bedpe.type + "_" + bedpe.name + "\t" + str(bedpe.score) + "\t" + \
"-" + "\t" + str(bedpe.s2-500) + "\t" + str(bedpe.e2) + "\t" + color + "\t" + "2" + "\t" + \
"1" + "," + str(bedpe.e2 - bedpe.s2) + "\t" + \
"0," + str(500)
def processBEDPE(bedpeFile, name, dist):
"""
Process the BEDPE file and convert each entry to SAM.
"""
writeTrackName(name)
if bedpeFile == "stdin":
for line in sys.stdin:
# ignore header
if line[0] == "#":
continue
lineList = line.strip().split()
if (len(lineList) > 0):
bedpe = BEDPE(lineList)
bedpeToBlockedBed(bedpe, dist)
else:
for line in open(bedpeFile, 'r'):
# ignore header
if line[0] == "#":
continue
lineList = line.strip().split()
if (len(lineList) > 0):
bedpe = BEDPE(lineList)
bedpeToBlockedBed(bedpe, dist)
def writeTrackName(name):
print "track name=" + name + " itemRgb=On"
def main():
usage = """%prog -i <file> -n <name> -d <dist>
bedpeToBed12 version 1.0
Author: Aaron Quinlan & Ira Hall
Description: converts BEDPE to BED12 format for viewing in IGV or the UCSC browser.
Last Modified: Nov. 13, 2014
"""
parser = OptionParser(usage)
parser.add_option("-i", "--bedpe", dest="bedpe",
help="BEDPE input file", metavar="FILE")
parser.add_option("-n", "--name", default="BEDPE", dest="name", type="str",
help="The name of the track. Default is 'BEDPE'.",
metavar="STR")
parser.add_option("-d", "--maxdist", dest="dist", default = 1000000, type="int",
help="The minimum distance for drawing intrachromosomal features as if they are interchromosomal (i.e., without a line spanning the two footprints). Default is 1Mb.",
metavar="INT")
(opts, args) = parser.parse_args()
if opts.bedpe is None:
parser.print_help()
print
else:
processBEDPE(opts.bedpe, opts.name, opts.dist)
if __name__ == "__main__":
main()