-
Notifications
You must be signed in to change notification settings - Fork 272
/
reverse-complement-1.cs
147 lines (133 loc) · 5.46 KB
/
reverse-complement-1.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
// Adapted from reverse-complement C# .NET Core program
// http://benchmarksgame.alioth.debian.org/u64q/program.php?test=revcomp&lang=csharpcore&id=1
// aka (as of 2017-09-01) rev 1.2 of https://alioth.debian.org/scm/viewvc.php/benchmarksgame/bench/revcomp/revcomp.csharp?root=benchmarksgame&view=log
// Best-scoring single-threaded C# .NET Core version as of 2017-09-01
/* The Computer Language Benchmarks Game
http://benchmarksgame.alioth.debian.org/
contributed by Robert F. Tobler to process large blocks of byte arrays
*/
using System;
using System.IO;
using System.Collections.Generic;
using BenchmarkDotNet.Attributes;
using MicroBenchmarks;
namespace BenchmarksGame
{
[BenchmarkCategory(Categories.Runtime, Categories.BenchmarksGame, Categories.JIT, Categories.NoWASM)]
public class ReverseComplement_1
{
struct Block
{
public byte[] Data; public int Count;
public int Read(BinaryReader r)
{
Data = r.ReadBytes(16384); Count++; return Data.Length;
}
public Index IndexOf(byte b, int o)
{
return new Index { Block = Count, Pos = Array.IndexOf(Data, b, o) };
}
}
struct Index
{
public int Block; public int Pos;
public static readonly Index None = new Index { Block = -1, Pos = -1 };
public bool InBlock(Block b) { return Block == b.Count; }
}
const byte Gt = (byte)'>';
const byte Lf = (byte)'\n';
static ReverseComplementHelpers helpers = new ReverseComplementHelpers(bigInput: true);
byte[] outBytes = new byte[helpers.FileLength];
[Benchmark(Description = nameof(ReverseComplement_1))]
public void RunBench()
{
using (var input = new FileStream(helpers.InputFile, FileMode.Open))
using (var output = new MemoryStream(outBytes))
{
Bench(input, output);
}
}
static void Bench(Stream input, Stream output)
{
InitComplements();
var seq = new List<byte[]>();
var b = new Block { Count = -1 };
Index line = Index.None, start = Index.None, end = Index.None;
using (var r = new BinaryReader(input))
{
using (var w = output)
{
while (b.Read(r) > 0)
{
seq.Add(b.Data);
if (line.Pos < 0) line = b.IndexOf(Gt, 0);
while (line.Pos >= 0)
{
if (start.Pos < 0)
{
var off = line.InBlock(b) ? line.Pos : 0;
start = b.IndexOf(Lf, off);
if (start.Pos < 0)
{
w.Write(b.Data, off, b.Data.Length - off);
seq.Clear(); break;
}
w.Write(b.Data, off, start.Pos + 1 - off);
}
if (end.Pos < 0)
{
end = b.IndexOf(Gt, start.InBlock(b) ? start.Pos : 0);
if (end.Pos < 0) break;
}
Reverse(w, start.Pos, end.Pos, seq);
if (seq.Count > 1) seq.RemoveRange(0, seq.Count - 1);
line = end; end = Index.None; start = Index.None;
}
}
if (start.Pos >= 0 && end.Pos < 0)
Reverse(w, start.Pos, seq[seq.Count - 1].Length, seq);
}
}
}
const string Seq = "ABCDGHKMRTVYabcdghkmrtvy";
const string Rev = "TVGHCDMKYABRTVGHCDMKYABR";
static byte[] comp = new byte[256];
static void InitComplements()
{
for (byte i = 0; i < 255; i++) comp[i] = i;
for (int i = 0; i < Seq.Length; i++)
comp[(byte)Seq[i]] = (byte)Rev[i];
comp[Lf] = 0; comp[(byte)' '] = 0;
}
const int LineLen = 61;
const int BufSize = LineLen * 269;
static byte[] buf = new byte[BufSize];
static void Reverse(Stream w, int si, int ei, List<byte[]> bl)
{
int bi = 0, line = LineLen - 1;
for (int ri = bl.Count - 1; ri >= 0; ri--)
{
var b = bl[ri]; int off = ri == 0 ? si : 0;
for (int i = (ri == bl.Count - 1 ? ei : b.Length) - 1; i >= off; i--)
{
var c = comp[b[i]]; if (c > 0) buf[bi++] = c;
if (bi == line)
{
buf[bi++] = Lf; line += LineLen;
if (bi == BufSize)
{
w.Write(buf, 0, BufSize); bi = 0; line = LineLen - 1;
}
}
}
}
if (bi > 0)
{
if (buf[bi - 1] != Lf) buf[bi++] = Lf; w.Write(buf, 0, bi);
}
}
}
}