-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEcoliRestrictions.py
59 lines (45 loc) · 1.47 KB
/
EcoliRestrictions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import re
def find_rc(rc):
rc = rc[:: -1]
replacements = {"A": "T",
"T": "A",
"G": "C",
"C": "G"}
rc = "".join([replacements.get(c, c) for c in rc])
return rc
def get_genome(x):
f = open(x).readlines()
for i in f[0]:
if i != 'A' or 'T' or 'G' or 'C':
f.remove(f[0])
break
dna = ''
for line in range(0, len(f) - 1):
dna += f[line].rstrip()
dna = dna.upper()
return dna
def find_rest_indexes(x, y):
split = y.find('/')
y = y.replace('/', '')
regex_seq = '\A{}\Z'.format(y)
split_indexes = [0]
index2 = 11
for index1 in range(len(x)):
match = re.match(regex_seq, x[index1:index2])
index2 += 1
if match:
split_indexes.append(index1 + split - 1)
else:
continue
return split_indexes
file = str(input('file name: '))
recog_seq = str(input('recognition sequence: '))
recog_seq = recog_seq.replace('N', '.')
genome = get_genome(file)
output = find_rest_indexes(genome, recog_seq)
for i in range(1, len(output)):
fragment = genome[output[i - 1]:output[i]]
print('fragment {}, at {} is {} letters long: {}'.format(i, output[i-1], len(fragment), fragment))
last_fragment = genome[output[-1]:len(genome)]
print('fragment {}, at {}, is {} letters long: {}'.format(len(output), output[-1], len(last_fragment), last_fragment))
print('{} fragments total'.format(len(output)))