-
Notifications
You must be signed in to change notification settings - Fork 0
/
.gitignore
81 lines (81 loc) · 3.54 KB
/
.gitignore
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import csv
import sys
def main():
# TODO: Check for command-line usage
if len(sys.argv) != 3:
print("Usage: python dna.py data.csv sequence.txt")
exit()
# TODO: Read database file into a variable
Database = []
with open(sys.argv[1], "r") as file:
reader = csv.DictReader(file)
for row in reader:
row["AGATC"] = int(row["AGATC"])
row["AATG"] = int(row["AATG"])
row["TATC"] = int(row["TATC"])
try:
row["TTTTTTCT"] = int(row["TTTTTTCT"])
row["TCTAG"] = int(row["TCTAG"])
row["GATA"] = int(row["GATA"])
row["GAAA"] = int(row["GAAA"])
row["TCTG"] = int(row["TCTG"])
except:
pass
Database.append(row)
# TODO: Read DNA sequence file into a variable
with open(sys.argv[2], "r") as s:
reader = csv.reader(s)
for row in reader:
sequence = row
Sequence = list(sequence[0])
# TODO: Find longest match of each STR in DNA sequence
longest_AGATC = longest_match(Sequence, "AGATC")
longest_AATG = longest_match(Sequence, "AATG")
longest_TATC = longest_match(Sequence, "TATC")
longest_TTTTTTCT = longest_match(Sequence, "TTTTTTCT")
longest_TCTAG = longest_match(Sequence, "TCTAG")
longest_GATA = longest_match(Sequence, "GATA")
longest_GAAA = longest_match(Sequence, "GAAA")
longest_TCTG = longest_match(Sequence, "TCTG")
print_detector = 1
#print(longest_AGATC, longest_AATG, longest_TATC)
for i in range(len(Database)):
if Database[i]["AGATC"] == longest_AGATC and Database[i]["AATG"] == longest_AATG and Database[i]["TATC"] == longest_TATC:
print(Database[i]["name"])
print_detector = 0
elif Database[i]["AGATC"] == longest_AGATC and Database[i]["AATG"] == longest_AATG and Database[i]["TATC"] == longest_TATC and Database[i]["TTTTTTCT"] == longest_TTTTTTCT and Database[i]["TCTAG"] == longest_TCTAG and Database[i]["GATA"] == longest_GATA and Database[i]["GAAA"] == longest_GAAA and Database[i]["TCTG"] == longest_TCTG:
print(Database[i]["name"])
print_detector = 0
if print_detector == 1:
print("No match")
return
def longest_match(sequence, subsequence):
"""Returns length of longest run of subsequence in sequence."""
# Initialize variables
print(list(subsequence))
#print(list(subsequence))
longest_run = 0
subsequence_length = len(subsequence)
sequence_length = len(sequence)
# Check each character in sequence for most consecutive runs of subsequence
for i in range(sequence_length):
# Initialize count of consecutive runs
count = 0
# Check for a subsequence match in a "substring" (a subset of characters) within sequence
# If a match, move substring to next potential match in sequence
# Continue moving substring and checking for matches until out of consecutive matches
while True:
# Adjust substring start and end
start = i + count * subsequence_length
end = start + subsequence_length
# If there is a match in the substring
if sequence[start:end] == list(subsequence):
count += 1
# If there is no match in the substring
else:
break
# Update most consecutive matches found
longest_run = max(longest_run, count)
# After checking for runs at each character in seqeuence, return longest run found
return longest_run
main()