-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfasta_index.rb
60 lines (53 loc) · 1.19 KB
/
fasta_index.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# purpose:
# to make an index file for each fasta result, usually big fasta file, with lots of genomes together
# usage: ruby fasta_index.rb fasta_file > fasta.index
# fasta.index : each line composed of >seq_name ||start_pointer||end_pointer
filename = ARGV.shift
#puts "opening #{filename}"
begin
f= File.open(filename);
rescue
puts 'file open failed';
# raise ' Error IO'
end
started=0
name=""
pointer_left=-1;
pointer_right=-1;
seq_left=-1
width=-1
n=0
while l=f.gets do
#puts l.size
if l.match(/^\s*>/)
# > of fasta file as an indicator for processing last seq
if started >0
puts "#{name}:#{pointer_left}:#{pointer_right}:#{seq_left}:#{width}"
pointer_left=pointer_right+1
pointer_right=pointer_left+l.bytesize-1
else
started =1;
pointer_left=0;
pointer_right=pointer_left+l.bytesize-1
end
name = l.chomp
name = name[1,name.size]
seq_left = pointer_right +1
width =0
else
if started
pointer_right = pointer_right + l.bytesize
if width>0
next
else
width = l.bytesize-1 # each line width of the seq
end
else
next
end
end
end
if started >0
puts "#{name}:#{pointer_left}:#{pointer_right}:#{seq_left}:#{width}"
end
f.close;