-
-
Notifications
You must be signed in to change notification settings - Fork 41
/
index.go
124 lines (106 loc) · 3.08 KB
/
index.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// Copyright ©2015 The bíogo Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package index provides common code for CSI and tabix BGZF indexing.
package index
import (
"errors"
"io"
"github.com/biogo/hts/bgzf"
)
var (
ErrNoReference = errors.New("index: no reference")
ErrInvalid = errors.New("index: invalid interval")
)
// ReferenceStats holds mapping statistics for a genomic reference.
type ReferenceStats struct {
// Chunk is the span of the indexed BGZF
// holding alignments to the reference.
Chunk bgzf.Chunk
// Mapped is the count of mapped reads.
Mapped uint64
// Unmapped is the count of unmapped reads.
Unmapped uint64
}
// Reader wraps a bgzf.Reader to provide a mechanism to read a selection of
// BGZF chunks.
type ChunkReader struct {
r *bgzf.Reader
wasBlocked bool
chunks []bgzf.Chunk
}
// NewChunkReader returns a ChunkReader to read from r, limiting the reads to
// the provided chunks. The provided bgzf.Reader will be put into Blocked mode.
func NewChunkReader(r *bgzf.Reader, chunks []bgzf.Chunk) (*ChunkReader, error) {
b := r.Blocked
r.Blocked = true
if len(chunks) != 0 {
err := r.Seek(chunks[0].Begin)
if err != nil {
return nil, err
}
}
return &ChunkReader{r: r, wasBlocked: b, chunks: chunks}, nil
}
// Read satisfies the io.Reader interface.
func (r *ChunkReader) Read(p []byte) (int, error) {
if len(r.chunks) == 0 {
return 0, io.EOF
}
last := r.r.LastChunk()
if vOffset(last.End) >= vOffset(r.chunks[0].End) {
return 0, io.EOF
}
// Ensure the byte slice does not extend beyond the end of
// the current chunk. We do not need to consider reading
// beyond the end of the block because the bgzf.Reader is in
// blocked mode and so will stop there anyway.
want := int(r.chunks[0].End.Block)
if r.chunks[0].End.Block == 0 && r.chunks[0].End.File > last.End.File {
// Special case for when the current end block offset
// is zero.
want = r.r.BlockLen()
}
var cursor int
if last.End.File == r.chunks[0].End.File {
// Our end is in the same block as the last chunk end
// so set the cursor to the chunk block end to prevent
// reading past the end of the chunk.
cursor = int(last.End.Block)
}
n, err := r.r.Read(p[:min(len(p), want-cursor)])
if err != nil {
if n != 0 && err == io.EOF {
err = nil
}
return n, err
}
// Check whether we are at or past the end of the current
// chunk or we have not made progress for reasons other than
// zero length p.
this := r.r.LastChunk()
if (len(p) != 0 && this == last) || vOffset(this.End) >= vOffset(r.chunks[0].End) {
r.chunks = r.chunks[1:]
if len(r.chunks) == 0 {
return n, io.EOF
}
err = r.r.Seek(r.chunks[0].Begin)
}
return n, err
}
func vOffset(o bgzf.Offset) int64 {
return o.File<<16 | int64(o.Block)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
// Close returns the bgzf.Reader to its original blocking mode and releases it.
// The bgzf.Reader is not closed.
func (r *ChunkReader) Close() error {
r.r.Blocked = r.wasBlocked
r.r = nil
return nil
}