-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbz2-block-reader.js
executable file
·188 lines (179 loc) · 4.93 KB
/
bz2-block-reader.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
var fs = require('fs');
var path = require('path');
var util = require('util');
var SeekBzip = require('@arijs/seek-bzip');
var dataSize = require('./datasize');
var StreamingDecoder = require('./streaming-decoder');
function printCRC(crc) {
return ('00000000'+crc.toString(16)).substr(-8, 8);
}
var fpath = path.resolve(__dirname, '../planet-latest.osm.bz2');
var fstat = fs.statSync(fpath);
var fd = fs.openSync(fpath, 'r');
var tstart = Date.now();
var strDec = new StreamingDecoder({ highWaterMark: 0 });
strDec.on('readable', function() {
var data;
while (data = this.read()) {
// console.log(data.toString('utf8'));
}
});
var proximoDesce = false;
var fopt;
var bzRead = SeekBzip.fdReadFile(fd, 16384, 0);
var bzWrite = SeekBzip.readBlock.makeOutStream(outPrint, 16384);
function outPrint(buf) {
// if (buf.length <= 1024) {
// console.log(buf.toString('utf8'));
// }
strDec.write(buf);
}
console.error(dataSize(fstat.size));
process.on('SIGINT', function() {
proximoDesce = true;
console.error('\n\nParando no próximo ponto...\n\n');
});
bzProcess();
function formatBytes(x) {
return Number(x).toFixed(0);
}
function detailsBlock(fopt, fSize) {
fSize || (fSize = formatBytes);
if (fopt.streamCRC) {
return (
' '+fopt.fileCount+
' file '+fSize(fopt.fileOffset)+
' b.'+fopt.bitOffsetEnd+
' in '+fSize(fopt.bytesInput)+
' crc '+printCRC(fopt.streamCRC)
);
} else if (fopt.blockCRC) {
return (
fopt.fileCount+'.'+fopt.blockCount+
' block '+fSize(fopt.byteOffset)+
' b.'+fopt.bitOffset+
' in '+fSize(fopt.bytesInput)+
' out '+fSize(fopt.bytesOutput)+
' crc '+printCRC(fopt.blockCRC)
);
fopt.swrite.flush();
} else {
return ('no block and no stream ' + util.inspect(fopt, {depth: 0}));
}
}
function bzProcess() {
try {
fopt = SeekBzip.readBlock(bzRead, bzWrite, fopt);
console.error(detailsBlock(fopt, formatBytes));//dataSize));
// if (fopt.byteOffset) {
// var rnd = Math.random();
// if (rnd < 0.01) {
// fopt.byteOffset -= 1;
// } else if (rnd > 0.99) {
// fopt.byteOffset += 1;
// }
// }
var nextFn = ( !proximoDesce && (fopt.fileOffset < fstat.size) )
? bzProcess
: bzEnd;
if (fopt.fileCount % 10) {
process.nextTick(nextFn);
} else {
setTimeout(nextFn, 0);
}
} catch (err) {
bzEnd(err);
}
}
function searchHexString(hexString, bz, bzRead) {
var len = hexString.length;
var lbytesSearch = Math.ceil(len * 0.5);
var lbytes = lbytesSearch * 10;
var startBit = bz.reader.bitOffset;
var filePos = bzRead.filePos;
var bzPos = bzRead.pos;
var bzEnd = bzRead.end;
var bzStart = filePos - bzEnd;
var attempts = [];
for (var bit = 0; bit < 8; bit++) {
var bitAtt = [];
bz.reader.seek(bzStart, bit);
var remain = bzRead.end - 1;//(bit ? 1 : 0);
var getSubstr = function() {
var n = Math.min(lbytes, remain);
var s = n > 0 ? bz.reader.readBytes(n).toString('hex') : '';
remain -= n > 0 ? n : 0;
return s;
}
var fstart = remain;
var start1 = 0;
var prev;
console.error('---- bit '+bit+' ----',{fp:bzRead.filePos,p:bzRead.pos,e:bzRead.end,r:remain});
console.error(bzRead.lastSeek);
var next = getSubstr();
var spos = lbytes;
var cut = 0;
var start2 = fstart - remain;
// console.error('. '.concat(cut,' ',next));
do {
prev = next;
next = getSubstr();
var ix = String(prev+next).indexOf(hexString);
if (ix != -1) {
bitAtt.push({
ix: ix,
// p: prev,
// n: next,
c: cut,
s1: start1,
s2: start2,
o: ix * 0.5 + start1,
fs: fstart,
r: remain
});
}
cut++;
// console.error('. '.concat(cut,' ',next,' ',ix,' ',start1,'-',start2));
start1 = start2;
start2 = fstart - remain;
} while (remain > 0);
attempts.push(bitAtt);
}
bz.reader.seek(filePos - bzEnd + bzPos, startBit);
return attempts;
}
function bzEnd(err) {
if (err) {
if (err.errorCode === SeekBzip.Err.NOT_BZIP_DATA) {
var pi = '314159265359';
bzRead.fillBufferDisabled = true;
var m = SeekBzip.searchHexString(pi, fopt.bz, bzRead, {
searchMult: 10,
onBitStart: function(bit) {
console.error('---- bit '+bit+' ----',{fp:bzRead.filePos,p:bzRead.pos,e:bzRead.end});//,r:remain
console.error(bzRead.lastSeek);
},
onFound: function(res, bit) {
console.error('. '.concat(res.c,' ',res.n,' ',res.ix,' ',res.s1,'-',res.s2));
}
});
for (var i = 0; i < m.length; i++) {
var mil = m[i].length;
console.error(': bit '+i+(mil ? ' - '+mil+' matches' : ''));
for (var j = 0; j < mil; j++) {
console.error(m[i][j]);
}
}
bzRead.fillBufferDisabled = false;
}
console.error(util.inspect(fopt, {depth: 0}));
console.error('Teve um erro', err);
}
console.error(
proximoDesce
? 'Parou antes do final'
: 'Você chegou no final, parabéns!'
);
fs.closeSync(fd);
strDec.end();
}