From 39aa7199a958b270d5209cdd26125a238c856c5f Mon Sep 17 00:00:00 2001 From: Kornesh Kanan Date: Sun, 12 Jul 2020 00:27:02 +0800 Subject: [PATCH] Sync with upstream --- annoylib.h | 721 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 568 insertions(+), 153 deletions(-) diff --git a/annoylib.h b/annoylib.h index 14e35ab..293ee64 100644 --- a/annoylib.h +++ b/annoylib.h @@ -17,7 +17,6 @@ #define ANNOYLIB_H #include -#include #include #ifndef _MSC_VER #include @@ -27,21 +26,31 @@ #include #include #include + #if defined(_MSC_VER) && _MSC_VER == 1500 typedef unsigned char uint8_t; typedef signed __int32 int32_t; +typedef unsigned __int64 uint64_t; +typedef signed __int64 int64_t; #else #include #endif -#ifdef _MSC_VER -#define NOMINMAX -#include "mman.h" -#include +#if defined(_MSC_VER) || defined(__MINGW32__) + // a bit hacky, but override some definitions to support 64 bit + #define off_t int64_t + #define lseek_getsize(fd) _lseeki64(fd, 0, SEEK_END) + #ifndef NOMINMAX + #define NOMINMAX + #endif + #include "mman.h" + #include #else -#include + #include + #define lseek_getsize(fd) lseek(fd, 0, SEEK_END) #endif +#include #include #include #include @@ -62,20 +71,51 @@ typedef signed __int32 int32_t; #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); } #endif +// Portable alloc definition, cf Writing R Extensions, Section 1.6.4 +#ifdef __GNUC__ + // Includes GCC, clang and Intel compilers + # undef alloca + # define alloca(x) __builtin_alloca((x)) +#elif defined(__sun) || defined(_AIX) + // this is necessary (and sufficient) for Solaris 10 and AIX 6: + # include +#endif + +inline void set_error_from_errno(char **error, const char* msg) { + showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno); + if (error) { + *error = (char *)malloc(256); // TODO: win doesn't support snprintf + sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno); + } +} + +inline void set_error_from_string(char **error, const char* msg) { + showUpdate("%s\n", msg); + if (error) { + *error = (char *)malloc(strlen(msg) + 1); + strcpy(*error, msg); + } +} + +// We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number. +// Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low. +#define V_ARRAY_SIZE 65536 #ifndef _MSC_VER #define popcount __builtin_popcountll -#else -#define popcount __popcnt64 +#else // See #293, #358 +#define isnan(x) _isnan(x) +#define popcount cole_popcount #endif -#ifndef NO_MANUAL_VECTORIZATION -#if defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__) +#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402 +#define USE_AVX512 +#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__) #define USE_AVX -#endif +#else #endif -#ifdef USE_AVX +#if defined(USE_AVX) || defined(USE_AVX512) #if defined(_MSC_VER) #include #elif defined(__GNUC__) @@ -83,24 +123,33 @@ typedef signed __int32 int32_t; #endif #endif -#ifndef ANNOY_NODE_ATTRIBUTE - #ifndef _MSC_VER - #define ANNOY_NODE_ATTRIBUTE __attribute__((__packed__)) - // TODO: this is turned on by default, but may not work for all architectures! Need to investigate. - #else - #define ANNOY_NODE_ATTRIBUTE - #endif -#endif - using std::vector; -using std::string; using std::pair; using std::numeric_limits; using std::make_pair; +inline void* remap_memory(void* _ptr, int _fd, size_t old_size, size_t new_size) { +#ifdef __linux__ + _ptr = mremap(_ptr, old_size, new_size, MREMAP_MAYMOVE); +#else + munmap(_ptr, old_size); +#ifdef MAP_POPULATE + _ptr = mmap(_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0); +#else + _ptr = mmap(_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0); +#endif +#endif + return _ptr; +} + namespace { +template +inline Node* get_node_ptr(const void* _nodes, const size_t _s, const S i) { + return (Node*)((uint8_t *)_nodes + (_s * i)); +} + template inline T dot(const T* x, const T* y, int f) { T s = 0; @@ -120,6 +169,19 @@ inline T manhattan_distance(const T* x, const T* y, int f) { return d; } +template +inline T euclidean_distance(const T* x, const T* y, int f) { + // Don't use dot-product: avoid catastrophic cancellation in #314. + T d = 0.0; + for (int i = 0; i < f; ++i) { + const T tmp=*x - *y; + d += tmp * tmp; + ++x; + ++y; + } + return d; +} + #ifdef USE_AVX // Horizontal single sum of 256bit vector. inline float hsum256_ps_avx(__m256 v) { @@ -177,6 +239,104 @@ inline float manhattan_distance(const float* x, const float* y, int f) { return result; } +template<> +inline float euclidean_distance(const float* x, const float* y, int f) { + float result=0; + if (f > 7) { + __m256 d = _mm256_setzero_ps(); + for (; f > 7; f -= 8) { + const __m256 diff = _mm256_sub_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y)); + d = _mm256_add_ps(d, _mm256_mul_ps(diff, diff)); // no support for fmadd in AVX... + x += 8; + y += 8; + } + // Sum all floats in dot register. + result = hsum256_ps_avx(d); + } + // Don't forget the remaining values. + for (; f > 0; f--) { + float tmp = *x - *y; + result += tmp * tmp; + x++; + y++; + } + return result; +} + +#endif + +#ifdef USE_AVX512 +template<> +inline float dot(const float* x, const float *y, int f) { + float result = 0; + if (f > 15) { + __m512 d = _mm512_setzero_ps(); + for (; f > 15; f -= 16) { + //AVX512F includes FMA + d = _mm512_fmadd_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y), d); + x += 16; + y += 16; + } + // Sum all floats in dot register. + result += _mm512_reduce_add_ps(d); + } + // Don't forget the remaining values. + for (; f > 0; f--) { + result += *x * *y; + x++; + y++; + } + return result; +} + +template<> +inline float manhattan_distance(const float* x, const float* y, int f) { + float result = 0; + int i = f; + if (f > 15) { + __m512 manhattan = _mm512_setzero_ps(); + for (; i > 15; i -= 16) { + const __m512 x_minus_y = _mm512_sub_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y)); + manhattan = _mm512_add_ps(manhattan, _mm512_abs_ps(x_minus_y)); + x += 16; + y += 16; + } + // Sum all floats in manhattan register. + result = _mm512_reduce_add_ps(manhattan); + } + // Don't forget the remaining values. + for (; i > 0; i--) { + result += fabsf(*x - *y); + x++; + y++; + } + return result; +} + +template<> +inline float euclidean_distance(const float* x, const float* y, int f) { + float result=0; + if (f > 15) { + __m512 d = _mm512_setzero_ps(); + for (; f > 15; f -= 16) { + const __m512 diff = _mm512_sub_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y)); + d = _mm512_fmadd_ps(diff, diff, d); + x += 16; + y += 16; + } + // Sum all floats in dot register. + result = _mm512_reduce_add_ps(d); + } + // Don't forget the remaining values. + for (; f > 0; f--) { + float tmp = *x - *y; + result += tmp * tmp; + x++; + y++; + } + return result; +} + #endif @@ -185,15 +345,6 @@ inline T get_norm(T* v, int f) { return sqrt(dot(v, v, f)); } -template -inline void normalize(T* v, int f) { - T norm = get_norm(v, f); - if (norm > 0) { - for (int z = 0; z < f; z++) - v[z] /= norm; - } -} - template inline void two_means(const vector& nodes, int f, Random& random, bool cosine, Node* p, Node* q) { /* @@ -208,9 +359,11 @@ inline void two_means(const vector& nodes, int f, Random& random, bool co size_t i = random.index(count); size_t j = random.index(count-1); j += (j >= i); // ensure that i != j - memcpy(p->v, nodes[i]->v, f * sizeof(T)); - memcpy(q->v, nodes[j]->v, f * sizeof(T)); - if (cosine) { normalize(p->v, f); normalize(q->v, f); } + + Distance::template copy_node(p, nodes[i], f); + Distance::template copy_node(q, nodes[j], f); + + if (cosine) { Distance::template normalize(p, f); Distance::template normalize(q, f); } Distance::init_node(p, f); Distance::init_node(q, f); @@ -219,29 +372,55 @@ inline void two_means(const vector& nodes, int f, Random& random, bool co size_t k = random.index(count); T di = ic * Distance::distance(p, nodes[k], f), dj = jc * Distance::distance(q, nodes[k], f); - T norm = cosine ? get_norm(nodes[k]->v, f) : 1.0; + T norm = cosine ? get_norm(nodes[k]->v, f) : 1; if (!(norm > T(0))) { continue; } if (di < dj) { for (int z = 0; z < f; z++) - p->v[z] = (p->v[z] * ic + nodes[k]->v[z] / norm) / (ic + 1); + p->v[z] = (p->v[z] * ic + nodes[k]->v[z] / norm) / (ic + 1); Distance::init_node(p, f); ic++; } else if (dj < di) { for (int z = 0; z < f; z++) - q->v[z] = (q->v[z] * jc + nodes[k]->v[z] / norm) / (jc + 1); + q->v[z] = (q->v[z] * jc + nodes[k]->v[z] / norm) / (jc + 1); Distance::init_node(q, f); jc++; } } } - } // namespace -struct Angular { +struct Base { + template + static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) { + // Override this in specific metric structs below if you need to do any pre-processing + // on the entire set of nodes passed into this index. + } + + template + static inline void zero_value(Node* dest) { + // Initialize any fields that require sane defaults within this node. + } + + template + static inline void copy_node(Node* dest, const Node* source, const int f) { + memcpy(dest->v, source->v, f * sizeof(T)); + } + + template + static inline void normalize(Node* node, int f) { + T norm = get_norm(node->v, f); + if (norm > 0) { + for (int z = 0; z < f; z++) + node->v[z] /= norm; + } + } +}; + +struct Angular : Base { template - struct ANNOY_NODE_ATTRIBUTE Node { + struct Node { /* * We store a binary tree where each node has two things * - A vector associated with it @@ -261,7 +440,7 @@ struct Angular { S children[2]; // Will possibly store more than 2 T norm; }; - T v[1]; // We let this one overflow intentionally. Need to allocate at least 1 to make GCC happy + T v[V_ARRAY_SIZE]; }; template static inline T distance(const Node* x, const Node* y, int f) { @@ -285,18 +464,16 @@ struct Angular { if (dot != 0) return (dot > 0); else - return random.flip(); + return (bool)random.flip(); } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { - Node* p = (Node*)malloc(s); // TODO: avoid - Node* q = (Node*)malloc(s); // TODO: avoid + Node* p = (Node*)alloca(s); + Node* q = (Node*)alloca(s); two_means >(nodes, f, random, true, p, q); for (int z = 0; z < f; z++) n->v[z] = p->v[z] - q->v[z]; - normalize(n->v, f); - free(p); - free(q); + Base::normalize >(n, f); } template static inline T normalized_distance(T distance) { @@ -324,12 +501,125 @@ struct Angular { } }; -struct Hamming { + +struct DotProduct : Angular { + template + struct Node { + /* + * This is an extension of the Angular node with an extra attribute for the scaled norm. + */ + S n_descendants; + S children[2]; // Will possibly store more than 2 + T dot_factor; + T v[V_ARRAY_SIZE]; + }; + + static const char* name() { + return "dot"; + } + template + static inline T distance(const Node* x, const Node* y, int f) { + return -dot(x->v, y->v, f); + } + + template + static inline void zero_value(Node* dest) { + dest->dot_factor = 0; + } + + template + static inline void init_node(Node* n, int f) { + } + + template + static inline void copy_node(Node* dest, const Node* source, const int f) { + memcpy(dest->v, source->v, f * sizeof(T)); + dest->dot_factor = source->dot_factor; + } + + template + static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { + Node* p = (Node*)alloca(s); + Node* q = (Node*)alloca(s); + DotProduct::zero_value(p); + DotProduct::zero_value(q); + two_means >(nodes, f, random, true, p, q); + for (int z = 0; z < f; z++) + n->v[z] = p->v[z] - q->v[z]; + n->dot_factor = p->dot_factor - q->dot_factor; + DotProduct::normalize >(n, f); + } + + template + static inline void normalize(Node* node, int f) { + T norm = sqrt(dot(node->v, node->v, f) + pow(node->dot_factor, 2)); + if (norm > 0) { + for (int z = 0; z < f; z++) + node->v[z] /= norm; + node->dot_factor /= norm; + } + } + + template + static inline T margin(const Node* n, const T* y, int f) { + return dot(n->v, y, f) + (n->dot_factor * n->dot_factor); + } + + template + static inline bool side(const Node* n, const T* y, int f, Random& random) { + T dot = margin(n, y, f); + if (dot != 0) + return (dot > 0); + else + return (bool)random.flip(); + } + + template + static inline T normalized_distance(T distance) { + return -distance; + } + + template + static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) { + // This uses a method from Microsoft Research for transforming inner product spaces to cosine/angular-compatible spaces. + // (Bachrach et al., 2014, see https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf) + + // Step one: compute the norm of each vector and store that in its extra dimension (f-1) + for (S i = 0; i < node_count; i++) { + Node* node = get_node_ptr(nodes, _s, i); + T norm = sqrt(dot(node->v, node->v, f)); + if (isnan(norm)) norm = 0; + node->dot_factor = norm; + } + + // Step two: find the maximum norm + T max_norm = 0; + for (S i = 0; i < node_count; i++) { + Node* node = get_node_ptr(nodes, _s, i); + if (node->dot_factor > max_norm) { + max_norm = node->dot_factor; + } + } + + // Step three: set each vector's extra dimension to sqrt(max_norm^2 - norm^2) + for (S i = 0; i < node_count; i++) { + Node* node = get_node_ptr(nodes, _s, i); + T node_norm = node->dot_factor; + + T dot_factor = sqrt(pow(max_norm, static_cast(2.0)) - pow(node_norm, static_cast(2.0))); + if (isnan(dot_factor)) dot_factor = 0; + + node->dot_factor = dot_factor; + } + } +}; + +struct Hamming : Base { template - struct ANNOY_NODE_ATTRIBUTE Node { + struct Node { S n_descendants; S children[2]; - T v[1]; + T v[V_ARRAY_SIZE]; }; static const size_t max_iterations = 20; @@ -343,6 +633,17 @@ struct Hamming { static inline T pq_initial_value() { return numeric_limits::max(); } + template + static inline int cole_popcount(T v) { + // Note: Only used with MSVC 9, which lacks intrinsics and fails to + // calculate std::bitset::count for v > 32bit. Uses the generalized + // approach by Eric Cole. + // See https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 + v = v - ((v >> 1) & (T)~(T)0/3); + v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); + v = (v + (v >> 4)) & (T)~(T)0/255*15; + return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8; + } template static inline T distance(const Node* x, const Node* y, int f) { size_t dist = 0; @@ -385,7 +686,7 @@ struct Hamming { for (; j < dim; j++) { n->v[0] = j; cur_size = 0; - for (typename vector*>::const_iterator it = nodes.begin(); it != nodes.end(); ++it) { + for (typename vector*>::const_iterator it = nodes.begin(); it != nodes.end(); ++it) { if (margin(n, (*it)->v, f)) { cur_size++; } @@ -408,16 +709,14 @@ struct Hamming { } }; -struct Minkowski { + +struct Minkowski : Base { template - struct ANNOY_NODE_ATTRIBUTE Node { + struct Node { S n_descendants; T a; // need an extra constant term to determine the offset of the plane - union { - S children[2]; - T norm; - }; - T v[1]; + S children[2]; + T v[V_ARRAY_SIZE]; }; template static inline T margin(const Node* n, const T* y, int f) { @@ -429,7 +728,7 @@ struct Minkowski { if (dot != 0) return (dot > 0); else - return random.flip(); + return (bool)random.flip(); } template static inline T pq_distance(T distance, T margin, int child_nr) { @@ -444,28 +743,23 @@ struct Minkowski { }; -struct Euclidean : Minkowski{ +struct Euclidean : Minkowski { template static inline T distance(const Node* x, const Node* y, int f) { - T pp = x->norm ? x->norm : dot(x->v, x->v, f); // For backwards compatibility reasons, we need to fall back and compute the norm here - T qq = y->norm ? y->norm : dot(y->v, y->v, f); - T pq = dot(x->v, y->v, f); - return pp + qq - 2*pq; + return euclidean_distance(x->v, y->v, f); } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { - Node* p = (Node*)malloc(s); // TODO: avoid - Node* q = (Node*)malloc(s); // TODO: avoid + Node* p = (Node*)alloca(s); + Node* q = (Node*)alloca(s); two_means >(nodes, f, random, false, p, q); for (int z = 0; z < f; z++) n->v[z] = p->v[z] - q->v[z]; - normalize(n->v, f); + Base::normalize >(n, f); n->a = 0.0; for (int z = 0; z < f; z++) n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2; - free(p); - free(q); } template static inline T normalized_distance(T distance) { @@ -473,32 +767,30 @@ struct Euclidean : Minkowski{ } template static inline void init_node(Node* n, int f) { - n->norm = dot(n->v, n->v, f); } static const char* name() { return "euclidean"; } + }; -struct Manhattan : Minkowski{ +struct Manhattan : Minkowski { template static inline T distance(const Node* x, const Node* y, int f) { return manhattan_distance(x->v, y->v, f); } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { - Node* p = (Node*)malloc(s); // TODO: avoid - Node* q = (Node*)malloc(s); // TODO: avoid + Node* p = (Node*)alloca(s); + Node* q = (Node*)alloca(s); two_means >(nodes, f, random, false, p, q); for (int z = 0; z < f; z++) n->v[z] = p->v[z] - q->v[z]; - normalize(n->v, f); + Base::normalize >(n, f); n->a = 0.0; for (int z = 0; z < f; z++) n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2; - free(p); - free(q); } template static inline T normalized_distance(T distance) { @@ -515,20 +807,23 @@ struct Manhattan : Minkowski{ template class AnnoyIndexInterface { public: + // Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL virtual ~AnnoyIndexInterface() {}; - virtual void add_item(S item, const T* w) = 0; - virtual void build(int q) = 0; - virtual void unbuild() = 0; - virtual bool save(const char* filename) = 0; + virtual bool add_item(S item, const T* w, char** error=NULL) = 0; + virtual bool build(int q, char** error=NULL) = 0; + virtual bool unbuild(char** error=NULL) = 0; + virtual bool save(const char* filename, bool prefault=false, char** error=NULL) = 0; virtual void unload() = 0; - virtual bool load(const char* filename) = 0; - virtual T get_distance(S i, S j) = 0; - virtual void get_nns_by_item(S item, size_t n, size_t search_k, vector* result, vector* distances) = 0; - virtual void get_nns_by_vector(const T* w, size_t n, size_t search_k, vector* result, vector* distances) = 0; - virtual S get_n_items() = 0; + virtual bool load(const char* filename, bool prefault=false, char** error=NULL) = 0; + virtual T get_distance(S i, S j) const = 0; + virtual void get_nns_by_item(S item, size_t n, int search_k, vector* result, vector* distances) const = 0; + virtual void get_nns_by_vector(const T* w, size_t n, int search_k, vector* result, vector* distances) const = 0; + virtual S get_n_items() const = 0; + virtual S get_n_trees() const = 0; virtual void verbose(bool v) = 0; - virtual void get_item(S item, T* v) = 0; + virtual void get_item(S item, T* v) const = 0; virtual void set_seed(int q) = 0; + virtual bool on_disk_build(const char* filename, char** error=NULL) = 0; }; template @@ -557,12 +852,15 @@ template bool _loaded; bool _verbose; int _fd; + bool _on_disk; + bool _built; public: - AnnoyIndex(int f) : _f(f), _random() { - _s = offsetof(Node, v) + f * sizeof(T); // Size of each node + AnnoyIndex(int f) : _f(f), _random() { + _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node _verbose = false; - _K = (_s - offsetof(Node, children)) / sizeof(S); // Max number of descendants to fit into node + _built = false; + _K = (S) (((size_t) (_s - offsetof(Node, children))) / sizeof(S)); // Max number of descendants to fit into node reinitialize(); // Reset everything } ~AnnoyIndex() { @@ -573,33 +871,70 @@ template return _f; } - void add_item(S item, const T* w) { - add_item_impl(item, w); + bool add_item(S item, const T* w, char** error=NULL) { + return add_item_impl(item, w, error); } template - void add_item_impl(S item, const W& w) { + bool add_item_impl(S item, const W& w, char** error=NULL) { + if (_loaded) { + set_error_from_string(error, "You can't add an item to a loaded index"); + return false; + } _allocate_size(item + 1); Node* n = _get(item); + D::zero_value(n); + n->children[0] = 0; n->children[1] = 0; n->n_descendants = 1; for (int z = 0; z < _f; z++) n->v[z] = w[z]; + D::init_node(n, _f); if (item >= _n_items) _n_items = item + 1; - } - void build(int q) { + return true; + } + + bool on_disk_build(const char* file, char** error=NULL) { + _on_disk = true; + _fd = open(file, O_RDWR | O_CREAT | O_TRUNC, (int) 0600); + if (_fd == -1) { + set_error_from_errno(error, "Unable to open"); + _fd = 0; + return false; + } + _nodes_size = 1; + if (ftruncate(_fd, _s * _nodes_size) == -1) { + set_error_from_errno(error, "Unable to truncate"); + return false; + } +#ifdef MAP_POPULATE + _nodes = (Node*) mmap(0, _s * _nodes_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0); +#else + _nodes = (Node*) mmap(0, _s * _nodes_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0); +#endif + return true; + } + + bool build(int q, char** error=NULL) { if (_loaded) { - // TODO: throw exception - showUpdate("You can't build a loaded index\n"); - return; + set_error_from_string(error, "You can't build a loaded index"); + return false; } + + if (_built) { + set_error_from_string(error, "You can't build a built index"); + return false; + } + + D::template preprocess(_nodes, _s, _n_items, _f); + _n_nodes = _n_items; while (1) { if (q == -1 && _n_nodes >= _n_items * 2) @@ -610,12 +945,13 @@ template vector indices; for (S i = 0; i < _n_items; i++) { - if (_get(i)->n_descendants >= 1) // Issue #223 + if (_get(i)->n_descendants >= 1) // Issue #223 indices.push_back(i); } _roots.push_back(_make_tree(indices, true)); } + // Also, copy the roots into the last segment of the array // This way we can load them faster without reading the whole file _allocate_size(_n_nodes + (S)_roots.size()); @@ -624,28 +960,63 @@ template _n_nodes += _roots.size(); if (_verbose) showUpdate("has %d nodes\n", _n_nodes); + + if (_on_disk) { + _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * _n_nodes); + if (ftruncate(_fd, _s * _n_nodes)) { + // TODO: this probably creates an index in a corrupt state... not sure what to do + set_error_from_errno(error, "Unable to truncate"); + return false; + } + _nodes_size = _n_nodes; + } + _built = true; + return true; } - void unbuild() { + bool unbuild(char** error=NULL) { if (_loaded) { - showUpdate("You can't unbuild a loaded index\n"); - return; + set_error_from_string(error, "You can't unbuild a loaded index"); + return false; } _roots.clear(); _n_nodes = _n_items; + _built = false; + + return true; } - bool save(const char* filename) { - FILE *f = fopen(filename, "wb"); - if (f == NULL) + bool save(const char* filename, bool prefault=false, char** error=NULL) { + if (!_built) { + set_error_from_string(error, "You can't save an index that hasn't been built"); return false; + } + if (_on_disk) { + return true; + } else { + // Delete file if it already exists (See issue #335) + unlink(filename); + + FILE *f = fopen(filename, "wb"); + if (f == NULL) { + set_error_from_errno(error, "Unable to open"); + return false; + } - fwrite(_nodes, _s, _n_nodes, f); - fclose(f); + if (fwrite(_nodes, _s, _n_nodes, f) != (size_t) _n_nodes) { + set_error_from_errno(error, "Unable to write"); + return false; + } - unload(); - return load(filename); + if (fclose(f) == EOF) { + set_error_from_errno(error, "Unable to close"); + return false; + } + + unload(); + return load(filename, prefault, error); + } } void reinitialize() { @@ -655,38 +1026,57 @@ template _n_items = 0; _n_nodes = 0; _nodes_size = 0; + _on_disk = false; _roots.clear(); } void unload() { - if (_fd) { - // we have mmapped data + if (_on_disk && _fd) { close(_fd); - off_t size = _n_nodes * _s; - munmap(_nodes, size); - } else if (_nodes) { - // We have heap allocated data - free(_nodes); + munmap(_nodes, _s * _nodes_size); + } else { + if (_fd) { + // we have mmapped data + close(_fd); + munmap(_nodes, _n_nodes * _s); + } else if (_nodes) { + // We have heap allocated data + free(_nodes); + } } reinitialize(); if (_verbose) showUpdate("unloaded\n"); } - bool load(const char* filename) { + bool load(const char* filename, bool prefault=false, char** error=NULL) { _fd = open(filename, O_RDONLY, (int)0400); if (_fd == -1) { + set_error_from_errno(error, "Unable to open"); _fd = 0; return false; } - off_t size = lseek(_fd, 0, SEEK_END); + off_t size = lseek_getsize(_fd); + if (size == -1) { + set_error_from_errno(error, "Unable to get size"); + return false; + } else if (size == 0) { + set_error_from_errno(error, "Size of file is zero"); + return false; + } else if (size % _s) { + // Something is fishy with this index! + set_error_from_errno(error, "Index size is not a multiple of vector size. Ensure you are opening using the same metric you used to create the index."); + return false; + } + + int flags = MAP_SHARED; + if (prefault) { #ifdef MAP_POPULATE - _nodes = (Node*)mmap( - 0, size, PROT_READ, MAP_SHARED | MAP_POPULATE, _fd, 0); + flags |= MAP_POPULATE; #else - _nodes = (Node*)mmap( - 0, size, PROT_READ, MAP_SHARED, _fd, 0); + showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform"); #endif - + } + _nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0); _n_nodes = (S)(size / _s); // Find the roots by scanning the end of the file and taking the nodes with most descendants @@ -705,33 +1095,42 @@ template if (_roots.size() > 1 && _get(_roots.front())->children[0] == _get(_roots.back())->children[0]) _roots.pop_back(); _loaded = true; + _built = true; _n_items = m; if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m); return true; } - T get_distance(S i, S j) { + T get_distance(S i, S j) const { return D::normalized_distance(D::distance(_get(i), _get(j), _f)); } - void get_nns_by_item(S item, size_t n, size_t search_k, vector* result, vector* distances) { + void get_nns_by_item(S item, size_t n, int search_k, vector* result, vector* distances) const { + // TODO: handle OOB const Node* m = _get(item); _get_all_nns(m->v, n, search_k, result, distances); } - void get_nns_by_vector(const T* w, size_t n, size_t search_k, vector* result, vector* distances) { + void get_nns_by_vector(const T* w, size_t n, int search_k, vector* result, vector* distances) const { _get_all_nns(w, n, search_k, result, distances); } - S get_n_items() { + + S get_n_items() const { return _n_items; } + + S get_n_trees() const { + return (S)_roots.size(); + } + void verbose(bool v) { _verbose = v; } - void get_item(S item, T* v) { + void get_item(S item, T* v) const { + // TODO: handle OOB Node* m = _get(item); - memcpy(v, m->v, _f * sizeof(T)); + memcpy(v, m->v, (_f) * sizeof(T)); } void set_seed(int seed) { @@ -742,17 +1141,25 @@ template void _allocate_size(S n) { if (n > _nodes_size) { const double reallocation_factor = 1.3; - S new_nodes_size = std::max(n, - (S)((_nodes_size + 1) * reallocation_factor)); - if (_verbose) showUpdate("Reallocating to %d nodes\n", new_nodes_size); - _nodes = realloc(_nodes, _s * new_nodes_size); - memset((char *)_nodes + (_nodes_size * _s)/sizeof(char), 0, (new_nodes_size - _nodes_size) * _s); + S new_nodes_size = std::max(n, (S) ((_nodes_size + 1) * reallocation_factor)); + void *old = _nodes; + + if (_on_disk) { + int rc = ftruncate(_fd, _s * new_nodes_size); + if (_verbose && rc) showUpdate("File truncation error\n"); + _nodes = remap_memory(_nodes, _fd, _s * _nodes_size, _s * new_nodes_size); + } else { + _nodes = realloc(_nodes, _s * new_nodes_size); + memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s); + } + _nodes_size = new_nodes_size; + if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes); } } - inline Node* _get(S i) { - return (Node*)((uint8_t *)_nodes + (_s * i)); + inline Node* _get(const S i) const { + return get_node_ptr(_nodes, _s, i); } S _make_tree(const vector& indices, bool is_root) { @@ -764,7 +1171,7 @@ template if (indices.size() == 1 && !is_root) return indices[0]; - if (indices.size() <= (size_t)_K && (!is_root || _n_items <= (size_t)_K || indices.size() == 1)) { + if (indices.size() <= (size_t)_K && (!is_root || (size_t)_n_items <= (size_t)_K || indices.size() == 1)) { _allocate_size(_n_nodes + 1); S item = _n_nodes++; Node* m = _get(item); @@ -773,7 +1180,9 @@ template // Using std::copy instead of a loop seems to resolve issues #3 and #13, // probably because gcc 4.8 goes overboard with optimizations. // Using memcpy instead of std::copy for MSVC compatibility. #235 - memcpy(m->children, &indices[0], indices.size() * sizeof(S)); + // Only copy when necessary to avoid crash in MSVC 9. #293 + if (!indices.empty()) + memcpy(m->children, &indices[0], indices.size() * sizeof(S)); return item; } @@ -786,7 +1195,7 @@ template } vector children_indices[2]; - Node* m = (Node*)malloc(_s); // TODO: avoid + Node* m = (Node*)alloca(_s); D::create_split(children, _f, _s, _random, m); for (size_t i = 0; i < indices.size(); i++) { @@ -795,11 +1204,16 @@ template if (n) { bool side = D::side(m, n->v, _f, _random); children_indices[side].push_back(j); + } else { + showUpdate("No node for index %d?\n", j); } } // If we didn't find a hyperplane, just randomize sides as a last option while (children_indices[0].size() == 0 || children_indices[1].size() == 0) { + if (_verbose) + showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n", + children_indices[0].size(), children_indices[1].size()); if (_verbose && indices.size() > 100000) showUpdate("Failed splitting %lu items\n", indices.size()); @@ -808,7 +1222,7 @@ template // Set the vector to 0.0 for (int z = 0; z < _f; z++) - m->v[z] = 0.0; + m->v[z] = 0; for (size_t i = 0; i < indices.size(); i++) { S j = indices[i]; @@ -820,34 +1234,36 @@ template int flip = (children_indices[0].size() > children_indices[1].size()); m->n_descendants = is_root ? _n_items : (S)indices.size(); - for (int side = 0; side < 2; side++) + for (int side = 0; side < 2; side++) { // run _make_tree for the smallest child first (for cache locality) m->children[side^flip] = _make_tree(children_indices[side^flip], false); + } _allocate_size(_n_nodes + 1); S item = _n_nodes++; memcpy(_get(item), m, _s); - free(m); return item; } - void _get_all_nns(const T* v, size_t n, size_t search_k, vector* result, vector* distances) { - Node* v_node = (Node *)malloc(_s); // TODO: avoid - memcpy(v_node->v, v, sizeof(T)*_f); + void _get_all_nns(const T* v, size_t n, int search_k, vector* result, vector* distances) const { + Node* v_node = (Node *)alloca(_s); + D::template zero_value(v_node); + memcpy(v_node->v, v, sizeof(T) * _f); D::init_node(v_node, _f); std::priority_queue > q; - if (search_k == (size_t)-1) - search_k = n * _roots.size(); // slightly arbitrary default value + if (search_k == -1) { + search_k = n * _roots.size(); + } for (size_t i = 0; i < _roots.size(); i++) { q.push(make_pair(Distance::template pq_initial_value(), _roots[i])); } std::vector nns; - while (nns.size() < search_k && !q.empty()) { + while (nns.size() < (size_t)search_k && !q.empty()) { const pair& top = q.top(); T d = top.first; S i = top.second; @@ -860,14 +1276,14 @@ template nns.insert(nns.end(), dst, &dst[nd->n_descendants]); } else { T margin = D::margin(nd, v, _f); - q.push(make_pair(D::pq_distance(d, margin, 1), nd->children[1])); - q.push(make_pair(D::pq_distance(d, margin, 0), nd->children[0])); + q.push(make_pair(D::pq_distance(d, margin, 1), static_cast(nd->children[1]))); + q.push(make_pair(D::pq_distance(d, margin, 0), static_cast(nd->children[0]))); } } // Get distances for all items // To avoid calculating distance multiple times for any items, sort by id - sort(nns.begin(), nns.end()); + std::sort(nns.begin(), nns.end()); vector > nns_dist; S last = -1; for (size_t i = 0; i < nns.size(); i++) { @@ -876,7 +1292,7 @@ template continue; last = j; if (_get(j)->n_descendants == 1) // This is only to guard a really obscure case, #284 - nns_dist.push_back(make_pair(D::distance(v_node, _get(j), _f), j)); + nns_dist.push_back(make_pair(D::distance(v_node, _get(j), _f), j)); } size_t m = nns_dist.size(); @@ -887,9 +1303,8 @@ template distances->push_back(D::normalized_distance(nns_dist[i].first)); result->push_back(nns_dist[i].second); } - free(v_node); } }; #endif -// vim: tabstop=2 shiftwidth=2 +// vim: tabstop=2 shiftwidth=2 \ No newline at end of file