-
Notifications
You must be signed in to change notification settings - Fork 4
/
GSSTreeCreator.h
326 lines (273 loc) · 8.2 KB
/
GSSTreeCreator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
/*
Pharmit
Copyright (c) David Ryan Koes, University of Pittsburgh and contributors.
All rights reserved.
Pharmit is licensed under both the BSD 3-clause license and the GNU
Public License version 2. Any use of the code that retains its reliance
on the GPL-licensed OpenBabel library is subject to the terms of the GPL2.
Use of the Pharmit code independently of OpenBabel (or any other
GPL2 licensed software) may choose between the BSD or GPL licenses.
See the LICENSE file provided with the distribution for more information.
*/
/*
* GSSTreeCreator.h
*
* Created on: Oct 13, 2011
* Author: dkoes
*
* This class creates a GSS tree on disk. It assumes the input
* does not fit in memory and behaves accordingly.
*
* It takes an iterator over the input data, which must support
* an intersection (with a cube) method and a write to file method.
* This data is then converted to oct tree representations. The
* data gets written to an indexed file while the oct trees get written
* to another file (storing indices to the object data).
*
* The tree file is then clustered to create leaves which are
* appended to a leaf file as they are created.
*
* The leaf nodes are similarily clustered to create a level of
* nodes, which are written to their own file. These nodes are
* clustered into the next level's file and so on.
*
* Clustering involves a top-down O(n) partitioning that splits the
* data until the a set small enough for an O(n^2) bottom-up packing
* to be performed. Clusters are always packed to contain at least 2
* entries (this may be relaxed for leaves).
*
* Once the final level is created, the internal nodes are all laid out
* in a separate file in depth-first order. The leaves are ordered
* sequentially in a separate file.
*/
#ifndef GSSTREECREATOR_H_
#define GSSTREECREATOR_H_
#include <iostream>
#include <fstream>
#include <vector>
#include "GSSTypes.h"
#include "GSSTreeStructures.h"
#include "TopDownPartitioner.h"
#include "packers/Packer.h"
using namespace std;
#include "WorkFile.h"
#include "Timer.h"
//class for creating levels, follows the CM-tree bulk loading algorithm,
//but can be overridden to implement any arbitrary algorithm
class GSSLevelCreator
{
protected:
const TopDownPartitioner *partitioner;
const Packer *packer;
//configuration settings
unsigned nodePack;
unsigned leafPack;
//class vars used by nextlevelR
unsigned packingSize;
ostream *outNodes;
ostream *outTrees;
vector<file_index> *nodeIndices;
vector<file_index> *treeIndices;
virtual void createNextLevelR(TopDownPartitioner *P);
public:
GSSLevelCreator() :
partitioner(NULL), packer(NULL), nodePack(0), leafPack(0),
packingSize(0), outNodes(NULL), outTrees(NULL)
{
}
GSSLevelCreator(const TopDownPartitioner * part, const Packer *pack,
unsigned np, unsigned lp) :
partitioner(part), packer(pack), nodePack(np), leafPack(lp),
packingSize(0), outNodes(NULL), outTrees(NULL)
{
}
void initialize(const TopDownPartitioner * part, const Packer *pack,
unsigned np = 32768, unsigned lp = 32768)
{
partitioner = part;
packer = pack;
nodePack = np;
leafPack = lp;
}
virtual ~GSSLevelCreator()
{
}
virtual void createNextLevel(DataViewer& data, ostream* nodefile,
vector<file_index>& nodeindices, ostream* treefile,
vector<file_index>& treeindices);
unsigned getPack() const
{
return packer->getPack();
}
};
class GSSTreeCreator
{
WorkFile objects;
WorkFile currenttrees;
vector<file_index> treeindices;
vector<file_index> objindices;
vector<WorkFile> nodes;
boost::filesystem::path dbpath;
GSSLevelCreator *leveler;
float dimension;
float resolution;
unsigned superNodeDepth;
//some bookkeeping for analysis purposes
unsigned numNodes;
unsigned numLeaves;
vector<unsigned> nodeContentDistribution;
vector<unsigned> leafContentDistribution;
file_index optimizeLevelsR(ostream& outnodes, ostream& outleaves,
const GSSNodeCommon *n, unsigned level, file_index& lstart,
file_index& lend);
void optimizeLevels();
void getNodesForSuperNode(const GSSInternalNode* root,
vector<GSSInternalNode*>& newroots, unsigned curlevel,
unsigned stoplevel);
public:
GSSTreeCreator(GSSLevelCreator *l, unsigned sdepth = 3) :
leveler(l), dimension(0), resolution(0), superNodeDepth(sdepth), numNodes(
0), numLeaves(0)
{
}
GSSTreeCreator() :
leveler(NULL), dimension(0), resolution(0), superNodeDepth(3), numNodes(
0), numLeaves(0)
{
}
~GSSTreeCreator()
{
//workfiles must be explicitly cleared
objects.clear();
for (unsigned i = 0, n = nodes.size(); i < n; i++)
{
nodes[i].clear();
}
}
float getDimension() const
{
return dimension;
}
float getResolution() const
{
return resolution;
}
bool create(boost::filesystem::path dir, boost::filesystem::path treedir,
float dim,
float res);
//setup directories
bool initialize(boost::filesystem::path dir, float dim, float res,
GSSLevelCreator* l = NULL)
{
using namespace boost;
dimension = dim;
resolution = res;
if (l)
leveler = l;
//create directory
if (filesystem::exists(dir))
{
cerr << dir << " already exists. Exiting\n";
return false;
}
if (!filesystem::create_directory(dir))
{
cerr << "Unable to create database directory ";
return false;
}
dbpath = dir;
filesystem::path objfile = dbpath / "objs";
string curtreesfile = filesystem::path(dbpath / "trees").string();
//write out objects and trees
objects.set(objfile.string().c_str());
currenttrees.set(curtreesfile.c_str());
treeindices.clear();
objindices.clear();
return true;
}
template<class Object> void addObject(const Object& obj)
{
objindices.push_back((file_index) objects.file->tellp());
obj.write(*objects.file);
//leaf object
treeindices.push_back((file_index) currenttrees.file->tellp());
MappableOctTree *tree = MappableOctTree::create(dimension, resolution,
obj);
tree->write(*currenttrees.file);
delete tree;
}
bool createIndex();
//return true if successful
template<class Object, class ObjectIterator>
bool create(boost::filesystem::path dir, ObjectIterator& itr,
float dim, float res)
{
using namespace boost;
initialize(dir, dim, res);
Timer t;
for (; itr; ++itr)
{
const Object& obj = *itr;
addObject(obj);
}
cout << "Create/write trees\t" << t.elapsed() << "\n";
t.restart();
return createIndex();
}
//write out the object trees to the specified directory, with the object file
//and also indices for reading back in later to save having to regenerate trees
template<class Object, class ObjectIterator>
bool createTreesOnly(boost::filesystem::path dir, ObjectIterator& itr,
float dim,
float res)
{
using namespace boost;
dimension = dim;
resolution = res;
WorkFile currenttrees;
//create directory
if (filesystem::exists(dir))
{
cerr << dir << " already exists. Exiting\n";
return false;
}
if (!filesystem::create_directory(dir))
{
cerr << "Unable to create database directory ";
return false;
}
dbpath = dir;
filesystem::path objfile = dbpath / "objs";
string curtreesfile = filesystem::path(dbpath / "trees").string();
string tipath = filesystem::path(dbpath / "treeindices").string();
string oipath = filesystem::path(dbpath / "objindices").string();
Timer t;
//write out objects and trees
objects.set(objfile.string().c_str());
currenttrees.set(curtreesfile.c_str());
ofstream treeindices(tipath.c_str());
ofstream objindices(oipath.c_str());
if (!treeindices || !objindices)
return false;
unsigned cnt = 0;
for (; itr; ++itr)
{
const Object& obj = *itr;
file_index objindex = (file_index) objects.file->tellp();
objindices.write((char*) &objindex, sizeof(file_index));
obj.write(*objects.file);
//leaf object
file_index treeindex = (file_index) currenttrees.file->tellp();
treeindices.write((char*) &treeindex, sizeof(file_index));
MappableOctTree *tree = MappableOctTree::create(dim, res, obj);
tree->write(*currenttrees.file);
delete tree;
cnt++;
}
currenttrees.clear();
cout << "Create/write trees\t" << t.elapsed() << "\n";
return true;
}
void printStats(ostream& out) const;
};
#endif /* GSSTREECREATOR_H_ */