-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathCompressedFile.cc
169 lines (144 loc) · 3.7 KB
/
CompressedFile.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#include "CompressedFile.h"
#include <cstdio>
#include "PathUtils.h"
#include <inttypes.h>
const size_t CompressedFile::ChunkSize = 4096;
void CompressedFile::throwFormat(const std::string& s) const {
throw FormatException(mPath, s);
}
std::string CompressedFile::destName() const {
return PathUtils::basename(path());
}
void CompressedFile::checkSizes(uint64_t maxBlock) const {
BlockIterator iter;
try {
iter = findBlock(0);
} catch (std::runtime_error& e) {
throw std::runtime_error("no blocks in file?");
}
for (; !iter.end(); ++iter) {
if (iter->usize > maxBlock) {
fprintf(stderr, "WARNING: %s has blocks too large to cache, "
"operations on it will be slow!\n", path().c_str());
break;
}
}
}
void CompressedFile::dumpBlocks() {
fprintf(stderr, "\nBLOCKS\n");
for (BlockIterator iter = findBlock(0); !iter.end(); ++iter) {
fprintf(stderr, "Block: uoff = %9" PRIu64 ", coff = %9" PRIu64
", usize = %9u, csize = %9u\n", iter->uoff, iter->coff,
iter->usize, iter->csize);
}
}
void BlockListCompFile::initialize(uint64_t maxBlock) {
FileHandle fh(path(), O_RDONLY);
checkFileType(fh);
loadIndex(fh);
checkSizes(maxBlock);
}
void BlockListCompFile::loadIndex(FileHandle& fh) {
buildIndex(fh);
}
namespace {
struct BlockOffsetOrdering {
bool operator()(const Block* b, off_t off) {
return (b->uoff + b->usize - 1) < (uint64_t)off;
}
bool operator()(off_t off, const Block* b) {
return (uint64_t)off < b->uoff;
}
};
}
BlockListCompFile::BlockIterator BlockListCompFile::findBlock(off_t off) const {
BlockList::const_iterator iter = std::lower_bound(
mBlocks.begin(), mBlocks.end(), off, BlockOffsetOrdering());
if (iter == mBlocks.end())
throw std::runtime_error("can't find block");
return BlockIterator(new Iterator(iter, mBlocks.end()));
}
BlockListCompFile::~BlockListCompFile() {
BlockList::iterator iter;
for (iter = mBlocks.begin(); iter != mBlocks.end(); ++iter)
delete *iter;
}
off_t BlockListCompFile::uncompressedSize() const {
if (mBlocks.empty())
return 0;
const Block& b = *mBlocks.back();
return b.uoff + b.usize;
}
void IndexedCompFile::loadIndex(FileHandle &fh) {
// Try reading the index
bool index = false;
{
FileHandle idxr;
try {
idxr.open(indexPath(), O_RDONLY);
} catch (FileHandle::Exception& e) {
// ok to fail
}
if (idxr.open() && readIndex(idxr))
index = true;
}
if (!index) {
buildIndex(fh);
FileHandle idxw(indexPath(), O_WRONLY | O_CREAT | O_TRUNC, 0664);
writeIndex(idxw);
}
}
std::string IndexedCompFile::indexPath() const {
return mIndexPath;
}
bool IndexedCompFile::readIndex(FileHandle& fh) {
uint64_t uoff = 0;
while (true) {
Block* b = 0;
try {
b = newBlock();
if (!readBlock(fh, b)) {
delete b;
// dumpBlocks();
return true;
}
b->uoff = uoff;
addBlock(b);
uoff += b->usize;
} catch (...) {
delete b;
throw;
}
}
}
bool IndexedCompFile::readBlock(FileHandle& fh, Block *b) {
fh.readBE(b->usize);
if (b->usize == 0)
return false;
fh.readBE(b->csize);
fh.readBE(b->coff);
// uoff will be calculated
return true;
}
void IndexedCompFile::writeIndex(FileHandle& fh) const {
for (BlockList::const_iterator iter = mBlocks.begin();
iter != mBlocks.end(); ++iter) {
writeBlock(fh, *iter);
}
uint32_t eof = 0;
fh.writeBE(eof);
// fprintf(stderr, "Wrote index\n");
}
void IndexedCompFile::writeBlock(FileHandle& fh, const Block* b) const {
fh.writeBE(b->usize);
fh.writeBE(b->csize);
fh.writeBE(b->coff);
}
IndexedCompFile::IndexedCompFile(const std::string& path, const std::string& indexRoot)
: BlockListCompFile(path) {
if (indexRoot.empty()) {
mIndexPath = path + ".blockIdx";
} else {
mIndexPath = indexRoot + "/" + PathUtils::basename(path) + ".blockIdx";
}
}