-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoverlap_graphs.py
152 lines (119 loc) · 3.5 KB
/
overlap_graphs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# -*- coding: utf-8 -*-
from MyGraph import MyGraph
class OverlapGraph(MyGraph):
# def __init__(self, frags):
# MyGraph.__init__(self, {})
# self.create_overlap_graph(frags)
# def __init__(self, frags, reps=False):
# if reps: self.create_overlap_graph_with_reps(frags)
# else: self.create_overlap_graph(frags)
# self.reps = reps
def __init__(self, frags, reps=True):
MyGraph.__init__(self, {})
if reps: self.create_overlap_graph_with_reps(frags)
else: self.create_overlap_graph(frags)
self.reps = reps
# create overlap graph from list of sequences (fragments)
def create_overlap_graph(self, frags):
# add vertices
for seq in frags: self.add_vertex(seq)
# add edges
for seq in frags:
suf = suffix(seq)
for seq2 in frags:
if prefix(seq2) == suf: self.add_edge(seq, seq2)
# esta solução tem um problema! Nas repetições!
# esta solução não resolve o problema das repetições!
def create_overlap_graph_with_reps(self, frags): # caso de replicas de fragmentos
idnum = 1
for seq in frags:
self.add_vertex(seq+'-'+str(idnum))
idnum += 1
idnum = 1
for seq in frags:
suf = suffix(seq)
for seq2 in frags:
if prefix(seq2) == suf:
for x in self.get_instances(seq2): self.add_edge(seq+'-'+str(idnum), x)
idnum += 1
def get_instances(self, seq):
res = []
for k in self.graph.keys():
if seq in k: res.append(k)
return res
def get_seq(self, node):
if node not in self.graph.keys(): return None
if self.reps:
return node.split("-")[0]
else:
return node
def seq_from_path(self, path):
if not self.check_if_hamiltonian_path(path):
return None
seq = self.get_seq(path[0])
for i in range(1, len(path)):
nxt = self.get_seq(path[i])
seq += nxt[-1]
return seq
# auxiliary
def composition(k, seq):
res = []
for i in range(len(seq) - k + 1):
res.append(seq[i:i + k])
res.sort()
return res
def suffix(seq):
return seq[1:]
def prefix(seq):
return seq[:-1]
# testing / mains
def test1():
seq = "CAATCATGATG"
k = 3
print(composition(k, seq))
def test2():
frags = ["ACC", "ATA", "CAT", "CCA", "TAA"]
ovgr = OverlapGraph(frags, False)
ovgr.print_graph()
def test3():
frags = ["ATA", "ACC", "ATG", "ATT", "CAT", "CAT", "CAT", "CCA", "GCA", "GGC", "TAA", "TCA", "TGG", "TTC", "TTT"]
ovgr = OverlapGraph(frags, True)
ovgr.print_graph()
def test4():
frags = ["ATA", "ACC", "ATG", "ATT", "CAT", "CAT", "CAT", "CCA", "GCA", "GGC", "TAA", "TCA", "TGG", "TTC", "TTT"]
ovgr = OverlapGraph(frags, True)
# ovgr.print_graph()
path = ["ACC-2", "CCA-8", "CAT-5", "ATG-3"]
print(ovgr.check_if_valid_path(path))
print(ovgr.check_if_hamiltonian_path(path))
path2 = ["ACC-2", "CCA-8", "CAT-5", "ATG-3", "TGG-13", "GGC-10", "GCA-9", "CAT-6", "ATT-4", "TTT-15", "TTC-14", "TCA-12", "CAT-7", "ATA-1", "TAA-11"]
print(ovgr.check_if_valid_path(path2))
print(ovgr.check_if_hamiltonian_path(path2))
print(ovgr.seq_from_path(path2))
def test5():
frags = ["ATA", "ACC", "ATG", "ATT", "CAT", "CAT", "CAT", "CCA", "GCA", "GGC", "TAA", "TCA", "TGG", "TTC", "TTT"]
ovgr = OverlapGraph(frags, True)
path = ovgr.search_hamiltonian_path()
print(path)
print(ovgr.check_if_hamiltonian_path(path))
print(ovgr.seq_from_path(path))
def test6():
orig_sequence = "CAATCATGATGATGATC"
frags = composition(3, orig_sequence)
print(frags)
ovgr = OverlapGraph(frags, True)
ovgr.print_graph()
path = ovgr.search_hamiltonian_path()
print(path)
print(ovgr.seq_from_path(path))
test1()
print()
test2()
print()
test3()
print()
test4()
print()
test5()
print()
test6()