From b6e5aa9ae669b3a008632d5cfa2ac0dff0410df2 Mon Sep 17 00:00:00 2001 From: Maciej Buszka Date: Fri, 8 Jun 2018 11:34:22 +0200 Subject: [PATCH] FIX #3: Update Annoy version - Update Annoy library to current master - Add Manhattan metric option to wrapper - getDistance now returns proper distance, not squared - fixed tests, added test for manhattan distance --- Makefile | 1 + annoyindexwrapper.cc | 8 +- annoylib.h | 474 ++++++++++++++++++++++++++++------- kissrandom.h | 106 ++++++++ tests/basic-config.js | 2 +- tests/basictests.js | 2 +- tests/smalltest-manhattan.js | 81 ++++++ tests/smalltest.js | 2 +- tests/very-big-config.js | 2 +- 9 files changed, 580 insertions(+), 98 deletions(-) create mode 100644 kissrandom.h create mode 100644 tests/smalltest-manhattan.js diff --git a/Makefile b/Makefile index 8671ce1..66e1c8f 100644 --- a/Makefile +++ b/Makefile @@ -8,6 +8,7 @@ build-wrapper: test: tests/data/text8-vector.json node tests/smalltest.js + node tests/smalltest-manhattan.js node tests/basictests.js basic-config.js big-test: tests/data/GoogleNews-vectors-negative300.json diff --git a/annoyindexwrapper.cc b/annoyindexwrapper.cc index 687e922..ecd62c5 100644 --- a/annoyindexwrapper.cc +++ b/annoyindexwrapper.cc @@ -1,4 +1,5 @@ #include "annoyindexwrapper.h" +#include "kissrandom.h" #include using namespace v8; @@ -10,10 +11,13 @@ AnnoyIndexWrapper::AnnoyIndexWrapper(int dimensions, const char *metricString) : annoyDimensions(dimensions) { if (strcmp(metricString, "Angular") == 0) { - annoyIndex = new AnnoyIndex(dimensions); + annoyIndex = new AnnoyIndex(dimensions); + } + else if (strcmp(metricString, "Manhattan") == 0) { + annoyIndex = new AnnoyIndex(dimensions); } else { - annoyIndex = new AnnoyIndex(dimensions); + annoyIndex = new AnnoyIndex(dimensions); } } diff --git a/annoylib.h b/annoylib.h index d8d4b88..14e35ab 100644 --- a/annoylib.h +++ b/annoylib.h @@ -12,21 +12,30 @@ // License for the specific language governing permissions and limitations under // the License. + #ifndef ANNOYLIB_H #define ANNOYLIB_H #include #include #include +#ifndef _MSC_VER #include +#endif #include #include #include #include #include +#if defined(_MSC_VER) && _MSC_VER == 1500 +typedef unsigned char uint8_t; +typedef signed __int32 int32_t; +#else #include +#endif -#ifdef __MINGW32__ +#ifdef _MSC_VER +#define NOMINMAX #include "mman.h" #include #else @@ -40,6 +49,11 @@ #include #include +#ifdef _MSC_VER +// Needed for Visual Studio to disable runtime checks for mempcy +#pragma runtime_checks("s", off) +#endif + // This allows others to supply their own logger / error printer without // requiring Annoy to import their headers. See RcppAnnoy for a use case. #ifndef __ERROR_PRINTER_OVERRIDE__ @@ -48,9 +62,34 @@ #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); } #endif + +#ifndef _MSC_VER +#define popcount __builtin_popcountll +#else +#define popcount __popcnt64 +#endif + +#ifndef NO_MANUAL_VECTORIZATION +#if defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__) +#define USE_AVX +#endif +#endif + +#ifdef USE_AVX +#if defined(_MSC_VER) +#include +#elif defined(__GNUC__) +#include +#endif +#endif + #ifndef ANNOY_NODE_ATTRIBUTE - #define ANNOY_NODE_ATTRIBUTE __attribute__((__packed__)) - // TODO: this is turned on by default, but may not work for all architectures! Need to investigate. + #ifndef _MSC_VER + #define ANNOY_NODE_ATTRIBUTE __attribute__((__packed__)) + // TODO: this is turned on by default, but may not work for all architectures! Need to investigate. + #else + #define ANNOY_NODE_ATTRIBUTE + #endif #endif @@ -60,36 +99,103 @@ using std::pair; using std::numeric_limits; using std::make_pair; -struct RandRandom { - // Default implementation of annoy-specific random number generator that uses rand() from standard library. - // Owned by the AnnoyIndex, passed around to the distance metrics - inline int flip() { - // Draw random 0 or 1 - return rand() & 1; +namespace { + +template +inline T dot(const T* x, const T* y, int f) { + T s = 0; + for (int z = 0; z < f; z++) { + s += (*x) * (*y); + x++; + y++; + } + return s; +} + +template +inline T manhattan_distance(const T* x, const T* y, int f) { + T d = 0.0; + for (int i = 0; i < f; i++) + d += fabs(x[i] - y[i]); + return d; +} + +#ifdef USE_AVX +// Horizontal single sum of 256bit vector. +inline float hsum256_ps_avx(__m256 v) { + const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v)); + const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); + const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + return _mm_cvtss_f32(x32); +} + +template<> +inline float dot(const float* x, const float *y, int f) { + float result = 0; + if (f > 7) { + __m256 d = _mm256_setzero_ps(); + for (; f > 7; f -= 8) { + d = _mm256_add_ps(d, _mm256_mul_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y))); + x += 8; + y += 8; + } + // Sum all floats in dot register. + result += hsum256_ps_avx(d); } - inline size_t index(size_t n) { - // Draw random integer between 0 and n-1 where n is at most the number of data points you have - return rand() % n; + // Don't forget the remaining values. + for (; f > 0; f--) { + result += *x * *y; + x++; + y++; } -}; + return result; +} + +template<> +inline float manhattan_distance(const float* x, const float* y, int f) { + float result = 0; + int i = f; + if (f > 7) { + __m256 manhattan = _mm256_setzero_ps(); + __m256 minus_zero = _mm256_set1_ps(-0.0f); + for (; i > 7; i -= 8) { + const __m256 x_minus_y = _mm256_sub_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y)); + const __m256 distance = _mm256_andnot_ps(minus_zero, x_minus_y); // Absolute value of x_minus_y (forces sign bit to zero) + manhattan = _mm256_add_ps(manhattan, distance); + x += 8; + y += 8; + } + // Sum all floats in manhattan register. + result = hsum256_ps_avx(manhattan); + } + // Don't forget the remaining values. + for (; i > 0; i--) { + result += fabsf(*x - *y); + x++; + y++; + } + return result; +} +#endif + + template inline T get_norm(T* v, int f) { - T sq_norm = 0; - for (int z = 0; z < f; z++) - sq_norm += v[z] * v[z]; - return sqrt(sq_norm); + return sqrt(dot(v, v, f)); } template inline void normalize(T* v, int f) { T norm = get_norm(v, f); - for (int z = 0; z < f; z++) - v[z] /= norm; + if (norm > 0) { + for (int z = 0; z < f; z++) + v[z] /= norm; + } } template -inline void two_means(const vector& nodes, int f, Random& random, bool cosine, T* iv, T* jv) { +inline void two_means(const vector& nodes, int f, Random& random, bool cosine, Node* p, Node* q) { /* This algorithm is a huge heuristic. Empirically it works really well, but I can't motivate it well. The basic idea is to keep two centroids and assign @@ -102,28 +208,36 @@ inline void two_means(const vector& nodes, int f, Random& random, bool co size_t i = random.index(count); size_t j = random.index(count-1); j += (j >= i); // ensure that i != j - std::copy(&nodes[i]->v[0], &nodes[i]->v[f], &iv[0]); - std::copy(&nodes[j]->v[0], &nodes[j]->v[f], &jv[0]); - if (cosine) { normalize(&iv[0], f); normalize(&jv[0], f); } + memcpy(p->v, nodes[i]->v, f * sizeof(T)); + memcpy(q->v, nodes[j]->v, f * sizeof(T)); + if (cosine) { normalize(p->v, f); normalize(q->v, f); } + Distance::init_node(p, f); + Distance::init_node(q, f); int ic = 1, jc = 1; for (int l = 0; l < iteration_steps; l++) { size_t k = random.index(count); - T di = ic * Distance::distance(&iv[0], nodes[k]->v, f), - dj = jc * Distance::distance(&jv[0], nodes[k]->v, f); + T di = ic * Distance::distance(p, nodes[k], f), + dj = jc * Distance::distance(q, nodes[k], f); T norm = cosine ? get_norm(nodes[k]->v, f) : 1.0; + if (!(norm > T(0))) { + continue; + } if (di < dj) { for (int z = 0; z < f; z++) - iv[z] = (iv[z] * ic + nodes[k]->v[z] / norm) / (ic + 1); + p->v[z] = (p->v[z] * ic + nodes[k]->v[z] / norm) / (ic + 1); + Distance::init_node(p, f); ic++; } else if (dj < di) { for (int z = 0; z < f; z++) - jv[z] = (jv[z] * jc + nodes[k]->v[z] / norm) / (jc + 1); + q->v[z] = (q->v[z] * jc + nodes[k]->v[z] / norm) / (jc + 1); + Distance::init_node(q, f); jc++; } } } +} // namespace struct Angular { template @@ -143,30 +257,27 @@ struct Angular { * more memory to be able to fit the vector outside */ S n_descendants; - S children[2]; // Will possibly store more than 2 + union { + S children[2]; // Will possibly store more than 2 + T norm; + }; T v[1]; // We let this one overflow intentionally. Need to allocate at least 1 to make GCC happy }; - template - static inline T distance(const T* x, const T* y, int f) { + template + static inline T distance(const Node* x, const Node* y, int f) { // want to calculate (a/|a| - b/|b|)^2 // = a^2 / a^2 + b^2 / b^2 - 2ab/|a||b| // = 2 - 2cos - T pp = 0, qq = 0, pq = 0; - for (int z = 0; z < f; z++, x++, y++) { - pp += (*x) * (*x); - qq += (*y) * (*y); - pq += (*x) * (*y); - } + T pp = x->norm ? x->norm : dot(x->v, x->v, f); // For backwards compatibility reasons, we need to fall back and compute the norm here + T qq = y->norm ? y->norm : dot(y->v, y->v, f); + T pq = dot(x->v, y->v, f); T ppqq = pp * qq; if (ppqq > 0) return 2.0 - 2.0 * pq / sqrt(ppqq); else return 2.0; // cos is 0 } template static inline T margin(const Node* n, const T* y, int f) { - T dot = 0; - for (int z = 0; z < f; z++) - dot += n->v[z] * y[z]; - return dot; + return dot(n->v, y, f); } template static inline bool side(const Node* n, const T* y, int f, Random& random) { @@ -177,12 +288,15 @@ struct Angular { return random.flip(); } template - static inline void create_split(const vector*>& nodes, int f, Random& random, Node* n) { - vector best_iv(f, 0), best_jv(f, 0); // TODO: avoid allocation - two_means >(nodes, f, random, true, &best_iv[0], &best_jv[0]); + static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { + Node* p = (Node*)malloc(s); // TODO: avoid + Node* q = (Node*)malloc(s); // TODO: avoid + two_means >(nodes, f, random, true, p, q); for (int z = 0; z < f; z++) - n->v[z] = best_iv[z] - best_jv[z]; + n->v[z] = p->v[z] - q->v[z]; normalize(n->v, f); + free(p); + free(q); } template static inline T normalized_distance(T distance) { @@ -191,32 +305,123 @@ struct Angular { // so we have to make sure it's a positive number. return sqrt(std::max(distance, T(0))); } + template + static inline T pq_distance(T distance, T margin, int child_nr) { + if (child_nr == 0) + margin = -margin; + return std::min(distance, margin); + } + template + static inline T pq_initial_value() { + return numeric_limits::infinity(); + } + template + static inline void init_node(Node* n, int f) { + n->norm = dot(n->v, n->v, f); + } static const char* name() { return "angular"; } }; -struct Euclidean { +struct Hamming { template struct ANNOY_NODE_ATTRIBUTE Node { S n_descendants; - T a; // need an extra constant term to determine the offset of the plane S children[2]; T v[1]; }; + + static const size_t max_iterations = 20; + + template + static inline T pq_distance(T distance, T margin, int child_nr) { + return distance - (margin != (unsigned int) child_nr); + } + template - static inline T distance(const T* x, const T* y, int f) { - T d = 0.0; - for (int i = 0; i < f; i++, x++, y++) - d += ((*x) - (*y)) * ((*x) - (*y)); - return d; + static inline T pq_initial_value() { + return numeric_limits::max(); + } + template + static inline T distance(const Node* x, const Node* y, int f) { + size_t dist = 0; + for (int i = 0; i < f; i++) { + dist += popcount(x->v[i] ^ y->v[i]); + } + return dist; } template + static inline bool margin(const Node* n, const T* y, int f) { + static const size_t n_bits = sizeof(T) * 8; + T chunk = n->v[0] / n_bits; + return (y[chunk] & (static_cast(1) << (n_bits - 1 - (n->v[0] % n_bits)))) != 0; + } + template + static inline bool side(const Node* n, const T* y, int f, Random& random) { + return margin(n, y, f); + } + template + static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { + size_t cur_size = 0; + size_t i = 0; + int dim = f * 8 * sizeof(T); + for (; i < max_iterations; i++) { + // choose random position to split at + n->v[0] = random.index(dim); + cur_size = 0; + for (typename vector*>::const_iterator it = nodes.begin(); it != nodes.end(); ++it) { + if (margin(n, (*it)->v, f)) { + cur_size++; + } + } + if (cur_size > 0 && cur_size < nodes.size()) { + break; + } + } + // brute-force search for splitting coordinate + if (i == max_iterations) { + int j = 0; + for (; j < dim; j++) { + n->v[0] = j; + cur_size = 0; + for (typename vector*>::const_iterator it = nodes.begin(); it != nodes.end(); ++it) { + if (margin(n, (*it)->v, f)) { + cur_size++; + } + } + if (cur_size > 0 && cur_size < nodes.size()) { + break; + } + } + } + } + template + static inline T normalized_distance(T distance) { + return distance; + } + template + static inline void init_node(Node* n, int f) { + } + static const char* name() { + return "hamming"; + } +}; + +struct Minkowski { + template + struct ANNOY_NODE_ATTRIBUTE Node { + S n_descendants; + T a; // need an extra constant term to determine the offset of the plane + union { + S children[2]; + T norm; + }; + T v[1]; + }; + template static inline T margin(const Node* n, const T* y, int f) { - T dot = n->a; - for (int z = 0; z < f; z++) - dot += n->v[z] * y[z]; - return dot; + return n->a + dot(n->v, y, f); } template static inline bool side(const Node* n, const T* y, int f, Random& random) { @@ -226,33 +431,94 @@ struct Euclidean { else return random.flip(); } + template + static inline T pq_distance(T distance, T margin, int child_nr) { + if (child_nr == 0) + margin = -margin; + return std::min(distance, margin); + } + template + static inline T pq_initial_value() { + return numeric_limits::infinity(); + } +}; + + +struct Euclidean : Minkowski{ + template + static inline T distance(const Node* x, const Node* y, int f) { + T pp = x->norm ? x->norm : dot(x->v, x->v, f); // For backwards compatibility reasons, we need to fall back and compute the norm here + T qq = y->norm ? y->norm : dot(y->v, y->v, f); + T pq = dot(x->v, y->v, f); + return pp + qq - 2*pq; + } template - static inline void create_split(const vector*>& nodes, int f, Random& random, Node* n) { - vector best_iv(f, 0), best_jv(f, 0); - two_means >(nodes, f, random, false, &best_iv[0], &best_jv[0]); + static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { + Node* p = (Node*)malloc(s); // TODO: avoid + Node* q = (Node*)malloc(s); // TODO: avoid + two_means >(nodes, f, random, false, p, q); for (int z = 0; z < f; z++) - n->v[z] = best_iv[z] - best_jv[z]; + n->v[z] = p->v[z] - q->v[z]; normalize(n->v, f); n->a = 0.0; for (int z = 0; z < f; z++) - n->a += -n->v[z] * (best_iv[z] + best_jv[z]) / 2; + n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2; + free(p); + free(q); } template static inline T normalized_distance(T distance) { return sqrt(std::max(distance, T(0))); } + template + static inline void init_node(Node* n, int f) { + n->norm = dot(n->v, n->v, f); + } static const char* name() { return "euclidean"; } }; +struct Manhattan : Minkowski{ + template + static inline T distance(const Node* x, const Node* y, int f) { + return manhattan_distance(x->v, y->v, f); + } + template + static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { + Node* p = (Node*)malloc(s); // TODO: avoid + Node* q = (Node*)malloc(s); // TODO: avoid + two_means >(nodes, f, random, false, p, q); + + for (int z = 0; z < f; z++) + n->v[z] = p->v[z] - q->v[z]; + normalize(n->v, f); + n->a = 0.0; + for (int z = 0; z < f; z++) + n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2; + free(p); + free(q); + } + template + static inline T normalized_distance(T distance) { + return std::max(distance, T(0)); + } + template + static inline void init_node(Node* n, int f) { + } + static const char* name() { + return "manhattan"; + } +}; + template class AnnoyIndexInterface { public: virtual ~AnnoyIndexInterface() {}; virtual void add_item(S item, const T* w) = 0; virtual void build(int q) = 0; + virtual void unbuild() = 0; virtual bool save(const char* filename) = 0; virtual void unload() = 0; virtual bool load(const char* filename) = 0; @@ -262,6 +528,7 @@ class AnnoyIndexInterface { virtual S get_n_items() = 0; virtual void verbose(bool v) = 0; virtual void get_item(S item, T* v) = 0; + virtual void set_seed(int q) = 0; }; template @@ -319,10 +586,9 @@ template n->children[1] = 0; n->n_descendants = 1; - for (int z = 0; z < _f; z++) { + for (int z = 0; z < _f; z++) n->v[z] = w[z]; - // printf("adding: %f", w[z]); - } + D::init_node(n, _f); if (item >= _n_items) _n_items = item + 1; @@ -343,10 +609,12 @@ template if (_verbose) showUpdate("pass %zd...\n", _roots.size()); vector indices; - for (S i = 0; i < _n_items; i++) - indices.push_back(i); + for (S i = 0; i < _n_items; i++) { + if (_get(i)->n_descendants >= 1) // Issue #223 + indices.push_back(i); + } - _roots.push_back(_make_tree(indices)); + _roots.push_back(_make_tree(indices, true)); } // Also, copy the roots into the last segment of the array // This way we can load them faster without reading the whole file @@ -357,6 +625,16 @@ template if (_verbose) showUpdate("has %d nodes\n", _n_nodes); } + + void unbuild() { + if (_loaded) { + showUpdate("You can't unbuild a loaded index\n"); + return; + } + + _roots.clear(); + _n_nodes = _n_items; + } bool save(const char* filename) { FILE *f = fopen(filename, "wb"); @@ -395,9 +673,11 @@ template } bool load(const char* filename) { - _fd = open(filename, O_RDONLY, (mode_t)0400); - if (_fd == -1) + _fd = open(filename, O_RDONLY, (int)0400); + if (_fd == -1) { + _fd = 0; return false; + } off_t size = lseek(_fd, 0, SEEK_END); #ifdef MAP_POPULATE _nodes = (Node*)mmap( @@ -410,6 +690,7 @@ template _n_nodes = (S)(size / _s); // Find the roots by scanning the end of the file and taking the nodes with most descendants + _roots.clear(); S m = -1; for (S i = _n_nodes - 1; i >= 0; i--) { S k = _get(i)->n_descendants; @@ -430,9 +711,7 @@ template } T get_distance(S i, S j) { - const T* x = _get(i)->v; - const T* y = _get(j)->v; - return D::distance(x, y, _f); + return D::normalized_distance(D::distance(_get(i), _get(j), _f)); } void get_nns_by_item(S item, size_t n, size_t search_k, vector* result, vector* distances) { @@ -452,12 +731,11 @@ template void get_item(S item, T* v) { Node* m = _get(item); - // printf("%s\n", "get_item!"); - // printf("Copying item!"); - // for (int i = 0; i < _f; ++i) { - // printf("%f, ", m->v[i]); - // } - std::copy(&m->v[0], &m->v[_f], v); + memcpy(v, m->v, _f * sizeof(T)); + } + + void set_seed(int seed) { + _random.set_seed(seed); } protected: @@ -477,19 +755,25 @@ template return (Node*)((uint8_t *)_nodes + (_s * i)); } - S _make_tree(const vector& indices) { - if (indices.size() == 1) + S _make_tree(const vector& indices, bool is_root) { + // The basic rule is that if we have <= _K items, then it's a leaf node, otherwise it's a split node. + // There's some regrettable complications caused by the problem that root nodes have to be "special": + // 1. We identify root nodes by the arguable logic that _n_items == n->n_descendants, regardless of how many descendants they actually have + // 2. Root nodes with only 1 child need to be a "dummy" parent + // 3. Due to the _n_items "hack", we need to be careful with the cases where _n_items <= _K or _n_items > _K + if (indices.size() == 1 && !is_root) return indices[0]; - if (indices.size() <= (size_t)_K) { + if (indices.size() <= (size_t)_K && (!is_root || _n_items <= (size_t)_K || indices.size() == 1)) { _allocate_size(_n_nodes + 1); S item = _n_nodes++; Node* m = _get(item); - m->n_descendants = (S)indices.size(); + m->n_descendants = is_root ? _n_items : (S)indices.size(); // Using std::copy instead of a loop seems to resolve issues #3 and #13, // probably because gcc 4.8 goes overboard with optimizations. - std::copy(indices.begin(), indices.end(), m->children); + // Using memcpy instead of std::copy for MSVC compatibility. #235 + memcpy(m->children, &indices[0], indices.size() * sizeof(S)); return item; } @@ -503,7 +787,7 @@ template vector children_indices[2]; Node* m = (Node*)malloc(_s); // TODO: avoid - D::create_split(children, _f, _random, m); + D::create_split(children, _f, _s, _random, m); for (size_t i = 0; i < indices.size(); i++) { S j = indices[i]; @@ -535,10 +819,10 @@ template int flip = (children_indices[0].size() > children_indices[1].size()); - m->n_descendants = (S)indices.size(); + m->n_descendants = is_root ? _n_items : (S)indices.size(); for (int side = 0; side < 2; side++) // run _make_tree for the smallest child first (for cache locality) - m->children[side^flip] = _make_tree(children_indices[side^flip]); + m->children[side^flip] = _make_tree(children_indices[side^flip], false); _allocate_size(_n_nodes + 1); S item = _n_nodes++; @@ -549,31 +833,35 @@ template } void _get_all_nns(const T* v, size_t n, size_t search_k, vector* result, vector* distances) { + Node* v_node = (Node *)malloc(_s); // TODO: avoid + memcpy(v_node->v, v, sizeof(T)*_f); + D::init_node(v_node, _f); + std::priority_queue > q; if (search_k == (size_t)-1) search_k = n * _roots.size(); // slightly arbitrary default value for (size_t i = 0; i < _roots.size(); i++) { - q.push(make_pair(numeric_limits::infinity(), _roots[i])); + q.push(make_pair(Distance::template pq_initial_value(), _roots[i])); } - vector nns; + std::vector nns; while (nns.size() < search_k && !q.empty()) { const pair& top = q.top(); T d = top.first; S i = top.second; Node* nd = _get(i); q.pop(); - if (nd->n_descendants == 1) { + if (nd->n_descendants == 1 && i < _n_items) { nns.push_back(i); } else if (nd->n_descendants <= _K) { const S* dst = nd->children; nns.insert(nns.end(), dst, &dst[nd->n_descendants]); } else { T margin = D::margin(nd, v, _f); - q.push(make_pair(std::min(d, +margin), nd->children[1])); - q.push(make_pair(std::min(d, -margin), nd->children[0])); + q.push(make_pair(D::pq_distance(d, margin, 1), nd->children[1])); + q.push(make_pair(D::pq_distance(d, margin, 0), nd->children[0])); } } @@ -587,17 +875,19 @@ template if (j == last) continue; last = j; - nns_dist.push_back(make_pair(D::distance(v, _get(j)->v, _f), j)); + if (_get(j)->n_descendants == 1) // This is only to guard a really obscure case, #284 + nns_dist.push_back(make_pair(D::distance(v_node, _get(j), _f), j)); } size_t m = nns_dist.size(); size_t p = n < m ? n : m; // Return this many items - std::partial_sort(&nns_dist[0], &nns_dist[p], &nns_dist[m]); + std::partial_sort(nns_dist.begin(), nns_dist.begin() + p, nns_dist.end()); for (size_t i = 0; i < p; i++) { if (distances) distances->push_back(D::normalized_distance(nns_dist[i].first)); result->push_back(nns_dist[i].second); } + free(v_node); } }; diff --git a/kissrandom.h b/kissrandom.h new file mode 100644 index 0000000..c423a7c --- /dev/null +++ b/kissrandom.h @@ -0,0 +1,106 @@ +#ifndef KISSRANDOM_H +#define KISSRANDOM_H + +#if defined(_MSC_VER) && _MSC_VER == 1500 +typedef unsigned __int32 uint32_t; +typedef unsigned __int32 uint64_t; +#else +#include +#endif + +// KISS = "keep it simple, stupid", but high quality random number generator +// http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code" +// http://mathforum.org/kb/message.jspa?messageID=6627731 +// https://de.wikipedia.org/wiki/KISS_(Zufallszahlengenerator) + +// 32 bit KISS +struct Kiss32Random { + uint32_t x; + uint32_t y; + uint32_t z; + uint32_t c; + + // seed must be != 0 + Kiss32Random(uint32_t seed = 123456789) { + x = seed; + y = 362436000; + z = 521288629; + c = 7654321; + } + + uint32_t kiss() { + // Linear congruence generator + x = 69069 * x + 12345; + + // Xor shift + y ^= y << 13; + y ^= y >> 17; + y ^= y << 5; + + // Multiply-with-carry + uint64_t t = 698769069ULL * z + c; + c = t >> 32; + z = (uint32_t) t; + + return x + y + z; + } + inline int flip() { + // Draw random 0 or 1 + return kiss() & 1; + } + inline size_t index(size_t n) { + // Draw random integer between 0 and n-1 where n is at most the number of data points you have + return kiss() % n; + } + inline void set_seed(uint32_t seed) { + x = seed; + } +}; + +// 64 bit KISS. Use this if you have more than about 2^24 data points ("big data" ;) ) +struct Kiss64Random { + uint64_t x; + uint64_t y; + uint64_t z; + uint64_t c; + + // seed must be != 0 + Kiss64Random(uint64_t seed = 1234567890987654321ULL) { + x = seed; + y = 362436362436362436ULL; + z = 1066149217761810ULL; + c = 123456123456123456ULL; + } + + uint64_t kiss() { + // Linear congruence generator + z = 6906969069LL*z+1234567; + + // Xor shift + y ^= (y<<13); + y ^= (y>>17); + y ^= (y<<43); + + // Multiply-with-carry (uint128_t t = (2^58 + 1) * x + c; c = t >> 64; x = (uint64_t) t) + uint64_t t = (x<<58)+c; + c = (x>>6); + x += t; + c += (x