|
1 |
| -#!/usr/bin/env python2 |
| 1 | +#!/usr/bin/env python3 |
2 | 2 |
|
3 | 3 | """Display length of the shortest sequence of a fasta file.
|
4 | 4 |
|
5 | 5 | Usage:
|
6 |
| - %program file.fasta""" |
| 6 | + <program> input_fasta |
| 7 | +""" |
7 | 8 |
|
| 9 | +# Modules |
| 10 | +import gzip |
8 | 11 | import sys
|
9 | 12 |
|
10 |
| -try: |
11 |
| - from Bio import SeqIO |
12 |
| -except: |
13 |
| - print "This program requires the Biopython library" |
14 |
| - sys.exit(0) |
| 13 | +# Classes |
| 14 | +class Fasta(object): |
| 15 | + """Fasta object with name and sequence |
| 16 | + """ |
| 17 | + |
| 18 | + def __init__(self, name, sequence): |
| 19 | + self.name = name |
| 20 | + self.sequence = sequence |
| 21 | + |
| 22 | + def write_to_file(self, handle): |
| 23 | + handle.write(">" + self.name + "\n") |
| 24 | + handle.write(self.sequence + "\n") |
| 25 | + |
| 26 | + def __repr__(self): |
| 27 | + return self.name + " " + self.sequence[:31] |
| 28 | + |
| 29 | +# Defining functions |
| 30 | +def myopen(_file, mode="rt"): |
| 31 | + if _file.endswith(".gz"): |
| 32 | + return gzip.open(_file, mode=mode) |
| 33 | + |
| 34 | + else: |
| 35 | + return open(_file, mode=mode) |
| 36 | + |
| 37 | +def fasta_iterator(input_file): |
| 38 | + """Takes a fasta file input_file and returns a fasta iterator |
| 39 | + """ |
| 40 | + with myopen(input_file) as f: |
| 41 | + sequence = [] |
| 42 | + name = "" |
| 43 | + begun = False |
15 | 44 |
|
| 45 | + for line in f: |
| 46 | + line = line.strip() |
| 47 | + |
| 48 | + if line.startswith(">"): |
| 49 | + if begun: |
| 50 | + yield Fasta(name, "".join(sequence)) |
| 51 | + |
| 52 | + name = line[1:] |
| 53 | + sequence = "" |
| 54 | + begun = True |
| 55 | + |
| 56 | + else: |
| 57 | + sequence += line |
| 58 | + |
| 59 | + if name != "": |
| 60 | + yield Fasta(name, "".join(sequence)) |
| 61 | + |
| 62 | +# Parsing user input |
16 | 63 | try:
|
17 |
| - handle = open(sys.argv[1], 'rU') |
18 |
| - print "Minimum sequence length:", min(map(lambda seq: len(seq.seq), \ |
19 |
| - SeqIO.parse(handle, 'fasta'))) |
| 64 | + input_fasta = sys.argv[1] |
20 | 65 | except:
|
21 |
| - print __doc__ |
| 66 | + print(__doc__) |
| 67 | + sys.exit(1) |
| 68 | + |
| 69 | +sequences = fasta_iterator(input_fasta) |
| 70 | + |
| 71 | +print(min([len(s.sequence) for s in sequences])) |
0 commit comments