Skip to content

Commit 167215e

Browse files
committed
Replace stl map/set with robinhood map/set (and 'auto' some iterators).
1 parent c081d85 commit 167215e

8 files changed

+41
-43
lines changed

LICENSE.txt

+4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ Open Bloom Filter
1717
https://code.google.com/p/bloom/source/browse/trunk/bloom_filter.hpp
1818
Common Public License
1919

20+
Robin_Hood Unordered Map and Set
21+
https://github.com/martinus/robin-hood-hashing
22+
MIT License
23+
2024
COPYRIGHT LICENSE
2125

2226
Copyright © 2015, Battelle National Biodefense Institute (BNBI);

src/mash/CommandFind.cpp

+6-8
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include "kseq.h"
1111
#include <iostream>
1212
#include <set>
13-
#include <unordered_set>
13+
#include "robin_hood.h"
1414
#include "ThreadPool.h"
1515
#include "sketchParameterSetup.h"
1616

@@ -229,11 +229,9 @@ CommandFind::FindOutput * find(CommandFind::FindInput * data)
229229

230230
void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput * output, bool minusStrand)
231231
{
232-
typedef std::unordered_map < uint32_t, std::set<uint32_t> > PositionsBySequence_umap;
233-
234232
bool verbose = false;
235233

236-
Sketch::Hash_set minHashes;
234+
robin_hood::unordered_set<Sketch::hash_t> minHashes;
237235

238236
const Sketch & sketch = input->sketch;
239237
int kmerSize = sketch.getKmerSize();
@@ -302,9 +300,9 @@ void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput
302300
// get sorted lists of positions, per reference sequence, that have
303301
// mutual min-hashes with the query
304302
//
305-
PositionsBySequence_umap hits;
306-
//
307-
for ( Sketch::Hash_set::const_iterator i = minHashes.begin(); i != minHashes.end(); i++ )
303+
robin_hood::unordered_map < uint32_t, std::set<uint32_t> > hits;
304+
305+
for ( auto i = minHashes.begin(); i != minHashes.end(); i++ )
308306
{
309307
Sketch::hash_t hash = *i;
310308

@@ -326,7 +324,7 @@ void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput
326324
}
327325
}
328326

329-
for ( PositionsBySequence_umap::iterator i = hits.begin(); i != hits.end(); i++ )
327+
for ( auto i = hits.begin(); i != hits.end(); i++ )
330328
{
331329
using std::set;
332330

src/mash/CommandScreen.cpp

+12-13
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include <zlib.h>
1313
#include "ThreadPool.h"
1414
#include <math.h>
15-
#include <set>
15+
#include "robin_hood.h"
1616

1717
#ifdef USE_BOOST
1818
#include <boost/math/distributions/binomial.hpp>
@@ -29,8 +29,6 @@ using std::cout;
2929
using std::endl;
3030
using std::list;
3131
using std::string;
32-
using std::unordered_map;
33-
using std::unordered_set;
3432
using std::vector;
3533

3634
namespace mash {
@@ -93,8 +91,8 @@ int CommandScreen::run() const
9391
parameters.minHashesPerWindow = sketch.getMinHashesPerWindow();
9492

9593
HashTable hashTable;
96-
unordered_map<uint64_t, std::atomic<uint32_t>> hashCounts;
97-
unordered_map<uint64_t, list<uint32_t> > saturationByIndex;
94+
robin_hood::unordered_map<uint64_t, std::atomic<uint32_t>> hashCounts;
95+
robin_hood::unordered_map<uint64_t, list<uint32_t> > saturationByIndex;
9896

9997
cerr << "Loading " << arguments[0] << "..." << endl;
10098

@@ -117,7 +115,7 @@ int CommandScreen::run() const
117115

118116
cerr << " " << hashTable.size() << " distinct hashes." << endl;
119117

120-
unordered_set<MinHashHeap *> minHashHeaps;
118+
robin_hood::unordered_set<MinHashHeap *> minHashHeaps;
121119

122120
bool trans = (alphabet == alphabetProtein);
123121

@@ -289,7 +287,7 @@ int CommandScreen::run() const
289287

290288
MinHashHeap minHashHeap(sketch.getUse64(), sketch.getMinHashesPerWindow());
291289

292-
for ( unordered_set<MinHashHeap *>::const_iterator i = minHashHeaps.begin(); i != minHashHeaps.end(); i++ )
290+
for ( auto i = minHashHeaps.begin(); i != minHashHeaps.end(); i++ )
293291
{
294292
HashList hashList(parameters.use64);
295293

@@ -337,13 +335,13 @@ int CommandScreen::run() const
337335

338336
memset(shared, 0, sizeof(uint64_t) * sketch.getReferenceCount());
339337

340-
for ( unordered_map<uint64_t, std::atomic<uint32_t> >::const_iterator i = hashCounts.begin(); i != hashCounts.end(); i++ )
338+
for ( auto i = hashCounts.begin(); i != hashCounts.end(); i++ )
341339
{
342340
if ( i->second >= minCov )
343341
{
344-
const unordered_set<uint64_t> & indeces = hashTable.at(i->first);
342+
const auto & indeces = hashTable.at(i->first);
345343

346-
for ( unordered_set<uint64_t>::const_iterator k = indeces.begin(); k != indeces.end(); k++ )
344+
for ( auto k = indeces.begin(); k != indeces.end(); k++ )
347345
{
348346
shared[*k]++;
349347
depths[*k].push_back(i->second);
@@ -381,12 +379,12 @@ int CommandScreen::run() const
381379
continue;
382380
}
383381

384-
const unordered_set<uint64_t> & indeces = i->second;
382+
const auto & indeces = i->second;
385383
double maxScore = 0;
386384
uint64_t maxLength = 0;
387385
uint64_t maxIndex;
388386

389-
for ( unordered_set<uint64_t>::const_iterator k = indeces.begin(); k != indeces.end(); k++ )
387+
for ( auto k = indeces.begin(); k != indeces.end(); k++ )
390388
{
391389
if ( scores[*k] > maxScore )
392390
{
@@ -456,6 +454,7 @@ int CommandScreen::run() const
456454
}
457455
}
458456

457+
delete [] depths;
459458
delete [] shared;
460459

461460
return 0;
@@ -809,7 +808,7 @@ char aaFromCodon(const char * codon)
809808
return aa;//(aa == '*') ? 0 : aa;
810809
}
811810

812-
void useThreadOutput(CommandScreen::HashOutput * output, unordered_set<MinHashHeap *> & minHashHeaps)
811+
void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps)
813812
{
814813
minHashHeaps.emplace(output->minHashHeap);
815814
delete output;

src/mash/CommandScreen.h

+8-9
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
#include <string>
1414
#include <vector>
1515
#include <atomic>
16-
#include <unordered_set>
17-
#include <unordered_map>
16+
#include "robin_hood.h"
1817
#include "MinHashHeap.h"
1918

2019
namespace mash {
@@ -24,13 +23,13 @@ struct HashTableEntry
2423
HashTableEntry() : count(0) {}
2524

2625
uint32_t count;
27-
std::unordered_set<uint64_t> indices;
26+
robin_hood::unordered_set<uint64_t> indices;
2827
};
2928

30-
//typedef std::unordered_map< uint64_t, HashTableEntry > HashTable;
31-
typedef std::unordered_map< uint64_t, std::unordered_set<uint64_t> > HashTable;
29+
//typedef robin_hood::unordered_map< uint64_t, HashTableEntry > HashTable;
30+
typedef robin_hood::unordered_map< uint64_t, robin_hood::unordered_set<uint64_t> > HashTable;
3231

33-
static const std::unordered_map< std::string, char > codons =
32+
static const robin_hood::unordered_map< std::string, char > codons =
3433
{
3534
{"AAA", 'K'},
3635
{"AAC", 'N'},
@@ -104,7 +103,7 @@ class CommandScreen : public Command
104103

105104
struct HashInput
106105
{
107-
HashInput(std::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
106+
HashInput(robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCountsNew, MinHashHeap * minHashHeapNew, char * seqNew, uint64_t lengthNew, const Sketch::Parameters & parametersNew, bool transNew)
108107
:
109108
hashCounts(hashCountsNew),
110109
minHashHeap(minHashHeapNew),
@@ -129,7 +128,7 @@ class CommandScreen : public Command
129128
bool trans;
130129

131130
Sketch::Parameters parameters;
132-
std::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
131+
robin_hood::unordered_map<uint64_t, std::atomic<uint32_t> > & hashCounts;
133132
MinHashHeap * minHashHeap;
134133
};
135134

@@ -165,7 +164,7 @@ double estimateIdentity(uint64_t common, uint64_t denom, int kmerSize, double km
165164
CommandScreen::HashOutput * hashSequence(CommandScreen::HashInput * input);
166165
double pValueWithin(uint64_t x, uint64_t setSize, double kmerSpace, uint64_t sketchSize);
167166
void translate(const char * src, char * dst, uint64_t len);
168-
void useThreadOutput(CommandScreen::HashOutput * output, std::unordered_set<MinHashHeap *> & minHashHeaps);
167+
void useThreadOutput(CommandScreen::HashOutput * output, robin_hood::unordered_set<MinHashHeap *> & minHashHeaps);
169168

170169
} // namespace mash
171170

src/mash/HashSet.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,14 @@ void HashSet::toCounts(std::vector<uint32_t> & counts) const
7878
{
7979
if ( use64 )
8080
{
81-
for ( std::unordered_map<hash64_t, uint32_t>::const_iterator i = hashes64.begin(); i != hashes64.end(); i++ )
81+
for ( auto i = hashes64.begin(); i != hashes64.end(); i++ )
8282
{
8383
counts.push_back(i->second);
8484
}
8585
}
8686
else
8787
{
88-
for ( std::unordered_map<hash32_t, uint32_t>::const_iterator i = hashes32.begin(); i != hashes32.end(); i++ )
88+
for ( auto i = hashes32.begin(); i != hashes32.end(); i++ )
8989
{
9090
counts.push_back(i->second);
9191
}
@@ -96,14 +96,14 @@ void HashSet::toHashList(HashList & hashList) const
9696
{
9797
if ( use64 )
9898
{
99-
for ( std::unordered_map<hash64_t, uint32_t>::const_iterator i = hashes64.begin(); i != hashes64.end(); i++ )
99+
for ( auto i = hashes64.begin(); i != hashes64.end(); i++ )
100100
{
101101
hashList.push_back64(i->first);
102102
}
103103
}
104104
else
105105
{
106-
for ( std::unordered_map<hash32_t, uint32_t>::const_iterator i = hashes32.begin(); i != hashes32.end(); i++ )
106+
for ( auto i = hashes32.begin(); i != hashes32.end(); i++ )
107107
{
108108
hashList.push_back32(i->first);
109109
}

src/mash/HashSet.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#define HashSet_h
99

1010
#include "HashList.h"
11-
#include <unordered_map>
11+
#include "robin_hood.h"
1212
#include <vector>
1313

1414
class HashSet
@@ -28,8 +28,8 @@ class HashSet
2828
private:
2929

3030
bool use64;
31-
std::unordered_map<hash32_t, uint32_t> hashes32;
32-
std::unordered_map<hash64_t, uint32_t> hashes64;
31+
robin_hood::unordered_map<hash32_t, uint32_t> hashes32;
32+
robin_hood::unordered_map<hash64_t, uint32_t> hashes64;
3333
};
3434

3535
#endif

src/mash/Sketch.h

+3-6
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
#define Sketch_h
99

1010
#include "mash/capnp/MinHash.capnp.h"
11-
#include <unordered_map>
12-
#include <unordered_set>
11+
#include "robin_hood.h"
1312
#include <map>
1413
#include <vector>
1514
#include <string>
@@ -126,8 +125,6 @@ class Sketch
126125
uint32_t position;
127126
};
128127

129-
typedef std::unordered_set<hash_t> Hash_set;
130-
131128
struct Reference
132129
{
133130
// no sequence for now
@@ -215,9 +212,9 @@ class Sketch
215212
void createIndex();
216213

217214
std::vector<Reference> references;
218-
std::unordered_map<std::string, int> referenceIndecesById;
215+
robin_hood::unordered_map<std::string, int> referenceIndecesById;
219216
std::vector<std::vector<PositionHash>> positionHashesByReference;
220-
std::unordered_map<hash_t, std::vector<Locus>> lociByHash;
217+
robin_hood::unordered_map<hash_t, std::vector<Locus>> lociByHash;
221218

222219
Parameters parameters;
223220
double kmerSpace;

src/mash/robin_hood.h

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/work/mash-profile/robin-hood-hashing/src/include/robin_hood.h

0 commit comments

Comments
 (0)