Skip to content

Commit 22aa09f

Browse files
committed
Remove Biopython dependency
1 parent a592d6b commit 22aa09f

File tree

1 file changed

+61
-11
lines changed

1 file changed

+61
-11
lines changed

fasta_min_len.py

+61-11
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,71 @@
1-
#!/usr/bin/env python2
1+
#!/usr/bin/env python3
22

33
"""Display length of the shortest sequence of a fasta file.
44
55
Usage:
6-
%program file.fasta"""
6+
<program> input_fasta
7+
"""
78

9+
# Modules
10+
import gzip
811
import sys
912

10-
try:
11-
from Bio import SeqIO
12-
except:
13-
print "This program requires the Biopython library"
14-
sys.exit(0)
13+
# Classes
14+
class Fasta(object):
15+
"""Fasta object with name and sequence
16+
"""
17+
18+
def __init__(self, name, sequence):
19+
self.name = name
20+
self.sequence = sequence
21+
22+
def write_to_file(self, handle):
23+
handle.write(">" + self.name + "\n")
24+
handle.write(self.sequence + "\n")
25+
26+
def __repr__(self):
27+
return self.name + " " + self.sequence[:31]
28+
29+
# Defining functions
30+
def myopen(_file, mode="rt"):
31+
if _file.endswith(".gz"):
32+
return gzip.open(_file, mode=mode)
33+
34+
else:
35+
return open(_file, mode=mode)
36+
37+
def fasta_iterator(input_file):
38+
"""Takes a fasta file input_file and returns a fasta iterator
39+
"""
40+
with myopen(input_file) as f:
41+
sequence = []
42+
name = ""
43+
begun = False
1544

45+
for line in f:
46+
line = line.strip()
47+
48+
if line.startswith(">"):
49+
if begun:
50+
yield Fasta(name, "".join(sequence))
51+
52+
name = line[1:]
53+
sequence = ""
54+
begun = True
55+
56+
else:
57+
sequence += line
58+
59+
if name != "":
60+
yield Fasta(name, "".join(sequence))
61+
62+
# Parsing user input
1663
try:
17-
handle = open(sys.argv[1], 'rU')
18-
print "Minimum sequence length:", min(map(lambda seq: len(seq.seq), \
19-
SeqIO.parse(handle, 'fasta')))
64+
input_fasta = sys.argv[1]
2065
except:
21-
print __doc__
66+
print(__doc__)
67+
sys.exit(1)
68+
69+
sequences = fasta_iterator(input_fasta)
70+
71+
print(min([len(s.sequence) for s in sequences]))

0 commit comments

Comments
 (0)