-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtRNAFasta.py
155 lines (131 loc) · 4.81 KB
/
tRNAFasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python3
# Name: Bryan Thornlow
# Date: 6/6/2017
# gffToBed.py
import sys
import os
import time
import random
import numpy
import gzip
import math
"""
This program converts a .gff or .gff.gz file into a .bed file.
"""
import sys, argparse, random, math
class CommandLine(object):
"""Handles the input arguments from the command line. Manages
the argument parser.
Methods:
Other than initialization, no methods are present, as its purpose is
simply to handle what is passed into the command line and pass that
into the class that performs the searching algorithm."""
def __init__(self, inOpts=None):
'''
CommandLine constructor.
Implements a parser to interpret the command line input using argparse.
'''
self.parser = argparse.ArgumentParser()
self.parser.add_argument("-b", "--inputBed", help="Input .bed"+
" file to be converted to a fasta.")
self.parser.add_argument("-g", "--inputGenomeSeq", help="Input .fa containing "+
"genome sequence to be parsed for sequences corresponding to .bed file.")
self.parser.add_argument("-o", "--outputFile", help="The path to"+
" and the filename of the .fa file you are creating", default='')
self.args = self.parser.parse_args()
class fileConverter(object):
"""
Primary class where filetype is converted.
"""
def __init__(self, inputBed, inputGenomeSeq, outputFile):
self.inputBed = inputBed
self.inputGenomeSeq = inputGenomeSeq
self.outputFile = outputFile
def convertFile(self):
chromTotRNACoords = {}
for line in open(self.inputBed):
splitLine = (line.strip()).split('\t')
myChrom = splitLine[0]
myStart = int(splitLine[1])
myEnd = int(splitLine[2])
myName = str(splitLine[3])
myStrand = str(splitLine[5])
if not myChrom in chromTotRNACoords:
chromTotRNACoords[myChrom] = []
(chromTotRNACoords[myChrom]).append([myStart, myEnd, myName, myStrand])
currentChrom = ''
myCurrentIndex = 0
tRNAToSeq = {}
for line in open(self.inputGenomeSeq):
stripLine = line.strip()
if stripLine.startswith('>'):
if len(currentChrom) > 0:
for tRNA in mytRNAs:
if tRNA[3] == '+':
tRNAToSeq[str(tRNA[2])] = currentChrom[(tRNA[0]):(tRNA[1])]
else:
tRNAToSeq[str(tRNA[2])] = revComp(currentChrom[(tRNA[0]):(tRNA[1])])
myChrom = stripLine[1:]
if ' ' in myChrom:
myChrom = (stripLine.split()[0])[1:]
currentChrom = ''
mytRNAs = []
if myChrom in chromTotRNACoords:
mytRNAs = sorted(chromTotRNACoords[myChrom], key=lambda x: x[1])
elif len(mytRNAs) > 0:
currentChrom += (stripLine.upper())
if len(currentChrom) > 0:
for tRNA in mytRNAs:
if tRNA[3] == '+':
tRNAToSeq[str(tRNA[2])] = currentChrom[(tRNA[0]):(tRNA[1])]
else:
tRNAToSeq[str(tRNA[2])] = revComp(currentChrom[(tRNA[0]):(tRNA[1])])
seqTotRNA = {}
myOutString = ''
for tRNA in sorted(tRNAToSeq.keys()):
mySeq = tRNAToSeq[tRNA]
myOutString += tRNA+'\t'+mySeq+'\n'
if not mySeq in seqTotRNA:
seqTotRNA[mySeq] = tRNA
open(self.outputFile, 'w').write(myOutString)
def revComp(seq):
diction = {}
diction['A'] = 'T'
diction['C'] = 'G'
diction['G'] = 'C'
diction['T'] = 'A'
diction['N'] = 'N'
mySeq = seq[::-1]
myReturn = ''
i = 0
while i < len(mySeq):
myReturn += diction[mySeq[i]]
i += 1
return(myReturn)
def joiner(entry):
newList = []
for k in entry:
newList.append(str(k))
return '\t'.join(newList)
def main(myCommandLine=None):
"""
Initializes a CommandLine object and passes the provided
arguments into a new fileConverter object and calls main method.
"""
myCommandLine = CommandLine()
if myCommandLine.args.inputBed:
inputBed = myCommandLine.args.inputBed
if myCommandLine.args.inputGenomeSeq:
inputGenomeSeq = myCommandLine.args.inputGenomeSeq
if myCommandLine.args.outputFile:
outputFile = myCommandLine.args.outputFile
if len(myCommandLine.args.outputFile) == 0:
outputFile = inputBed.split('.')[0]+'.fa'
myFileConverter = fileConverter(inputBed, inputGenomeSeq, outputFile)
myFileConverter.convertFile()
if __name__ == "__main__":
"""
Calls main when program is run by user.
"""
main();
raise SystemExit