diff --git a/faiss/python/loader.py b/faiss/python/loader.py index fa75edb468..977ada1d9f 100644 --- a/faiss/python/loader.py +++ b/faiss/python/loader.py @@ -3,13 +3,15 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from packaging.version import Version import platform import subprocess import logging import os +def Version(v): + return [int(x) for x in v.split('.')] + def supported_instruction_sets(): """ Returns the set of supported CPU features, see diff --git a/faiss/python/swigfaiss.swig b/faiss/python/swigfaiss.swig index 752b64b55b..4d44fb650b 100644 --- a/faiss/python/swigfaiss.swig +++ b/faiss/python/swigfaiss.swig @@ -1245,10 +1245,13 @@ void * cast_integer_to_void_ptr (int64_t x) { %} %inline %{ - void wait() { - // in gdb, use return to get out of this function - for(int i = 0; i == 0; i += 0); - } - %} + +// the SWIG version is a 6-digit hex string, eg. version 3.2.1 is encoded as +// 0x030201 +uint64_t swig_version() { + return SWIG_VERSION; +} + +%} // End of file... diff --git a/tests/test_build_blocks.py b/tests/test_build_blocks.py index fdf9ad8bd7..a63f62a8d9 100644 --- a/tests/test_build_blocks.py +++ b/tests/test_build_blocks.py @@ -66,43 +66,6 @@ def test_pca_epsilon(self): self.assertTrue(np.all(np.isfinite(y))) -class TestRevSwigPtr(unittest.TestCase): - - def test_rev_swig_ptr(self): - - index = faiss.IndexFlatL2(4) - xb0 = np.vstack([ - i * 10 + np.array([1, 2, 3, 4], dtype='float32') - for i in range(5)]) - index.add(xb0) - xb = faiss.rev_swig_ptr(index.get_xb(), 4 * 5).reshape(5, 4) - self.assertEqual(np.abs(xb0 - xb).sum(), 0) - - -class TestException(unittest.TestCase): - - def test_exception(self): - - index = faiss.IndexFlatL2(10) - - a = np.zeros((5, 10), dtype='float32') - b = np.zeros(5, dtype='int64') - - # an unsupported operation for IndexFlat - self.assertRaises( - RuntimeError, - index.add_with_ids, a, b - ) - # assert 'add_with_ids not implemented' in str(e) - - def test_exception_2(self): - self.assertRaises( - RuntimeError, - faiss.index_factory, 12, 'IVF256,Flat,PQ8' - ) - # assert 'could not parse' in str(e) - - class TestMapLong2Long(unittest.TestCase): def test_maplong2long(self): @@ -380,7 +343,6 @@ def test_rand_vector(self): self.assertLess(ninter, 460) - class TestPairwiseDis(unittest.TestCase): def test_L2(self): @@ -418,142 +380,6 @@ def test_IP(self): dis[i], np.dot(x[ix[i]], y[iy[i]])) -class TestSWIGWrap(unittest.TestCase): - """ various regressions with the SWIG wrapper """ - - def test_size_t_ptr(self): - # issue 1064 - index = faiss.IndexHNSWFlat(10, 32) - - hnsw = index.hnsw - index.add(np.random.rand(100, 10).astype('float32')) - be = np.empty(2, 'uint64') - hnsw.neighbor_range(23, 0, faiss.swig_ptr(be), faiss.swig_ptr(be[1:])) - - def test_id_map_at(self): - # issue 1020 - n_features = 100 - feature_dims = 10 - - features = np.random.random((n_features, feature_dims)).astype(np.float32) - idx = np.arange(n_features).astype(np.int64) - - index = faiss.IndexFlatL2(feature_dims) - index = faiss.IndexIDMap2(index) - index.add_with_ids(features, idx) - - [index.id_map.at(int(i)) for i in range(index.ntotal)] - - def test_downcast_Refine(self): - - index = faiss.IndexRefineFlat( - faiss.IndexScalarQuantizer(10, faiss.ScalarQuantizer.QT_8bit) - ) - - # serialize and deserialize - index2 = faiss.deserialize_index( - faiss.serialize_index(index) - ) - - assert isinstance(index2, faiss.IndexRefineFlat) - - def do_test_array_type(self, dtype): - """ tests swig_ptr and rev_swig_ptr for this type of array """ - a = np.arange(12).astype(dtype) - ptr = faiss.swig_ptr(a) - a2 = faiss.rev_swig_ptr(ptr, 12) - np.testing.assert_array_equal(a, a2) - - def test_all_array_types(self): - self.do_test_array_type('float32') - self.do_test_array_type('float64') - self.do_test_array_type('int8') - self.do_test_array_type('uint8') - self.do_test_array_type('int16') - self.do_test_array_type('uint16') - self.do_test_array_type('int32') - self.do_test_array_type('uint32') - self.do_test_array_type('int64') - self.do_test_array_type('uint64') - - def test_int64(self): - # see https://github.com/facebookresearch/faiss/issues/1529 - v = faiss.Int64Vector() - - for i in range(10): - v.push_back(i) - a = faiss.vector_to_array(v) - assert a.dtype == 'int64' - np.testing.assert_array_equal(a, np.arange(10, dtype='int64')) - - # check if it works in an IDMap - idx = faiss.IndexIDMap(faiss.IndexFlatL2(32)) - idx.add_with_ids( - np.random.rand(10, 32).astype('float32'), - np.random.randint(1000, size=10, dtype='int64') - ) - faiss.vector_to_array(idx.id_map) - - -class TestNNDescentKNNG(unittest.TestCase): - - def test_knng_L2(self): - self.subtest(32, 10, faiss.METRIC_L2) - - def test_knng_IP(self): - self.subtest(32, 10, faiss.METRIC_INNER_PRODUCT) - - def subtest(self, d, K, metric): - metric_names = {faiss.METRIC_L1: 'L1', - faiss.METRIC_L2: 'L2', - faiss.METRIC_INNER_PRODUCT: 'IP'} - - nb = 1000 - _, xb, _ = get_dataset_2(d, 0, nb, 0) - - _, knn = faiss.knn(xb, xb, K + 1, metric) - knn = knn[:, 1:] - - index = faiss.IndexNNDescentFlat(d, K, metric) - index.nndescent.S = 10 - index.nndescent.R = 32 - index.nndescent.L = K + 20 - index.nndescent.iter = 5 - index.verbose = True - - index.add(xb) - graph = index.nndescent.final_graph - graph = faiss.vector_to_array(graph) - graph = graph.reshape(nb, K) - - recalls = 0 - for i in range(nb): - for j in range(K): - for k in range(K): - if graph[i, j] == knn[i, k]: - recalls += 1 - break - recall = 1.0 * recalls / (nb * K) - assert recall > 0.99 - - def test_small_nndescent(self): - """ building a too small graph used to crash, make sure it raises - an exception instead. - TODO: build the exact knn graph for small cases - """ - d = 32 - K = 10 - index = faiss.IndexNNDescentFlat(d, K, faiss.METRIC_L2) - index.nndescent.S = 10 - index.nndescent.R = 32 - index.nndescent.L = K + 20 - index.nndescent.iter = 5 - index.verbose = True - - xb = np.zeros((78, d), dtype='float32') - self.assertRaises(RuntimeError, index.add, xb) - - class TestResultHeap(unittest.TestCase): def test_keep_min(self): @@ -663,6 +489,7 @@ def test_bucket_sort_inplace_int64(self): def test_bucket_sort_inplace_parallel_int64(self): self.do_test_bucket_sort_inplace(4, dtype='int64') + class TestMergeKNNResults(unittest.TestCase): def do_test(self, ismax, dtype): diff --git a/tests/test_documentation.py b/tests/test_documentation.py index c55f162a9b..2a0e189281 100644 --- a/tests/test_documentation.py +++ b/tests/test_documentation.py @@ -4,7 +4,6 @@ # LICENSE file in the root directory of this source tree. import unittest - import faiss diff --git a/tests/test_doxygen_documentation.py b/tests/test_doxygen_documentation.py deleted file mode 100644 index e6b7c1006b..0000000000 --- a/tests/test_doxygen_documentation.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -import unittest - -import faiss - - -class TestDocumentation(unittest.TestCase): - - def test_doxygen_comments(self): - maxheap_array = faiss.float_maxheap_array_t() - - self.assertTrue("a template structure for a set of [min|max]-heaps" - in maxheap_array.__doc__) diff --git a/tests/test_graph_based.py b/tests/test_graph_based.py index 19d4349912..81786efdf7 100644 --- a/tests/test_graph_based.py +++ b/tests/test_graph_based.py @@ -3,7 +3,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -""" a few tests for graph-based indices (HNSW and NSG)""" +""" a few tests for graph-based indices (HNSW, nndescent and NSG)""" import numpy as np import unittest @@ -506,3 +506,62 @@ def test_order(self): gt = np.arange(0, k)[np.newaxis, :] # [1, k] gt = np.repeat(gt, nq, axis=0) # [nq, k] np.testing.assert_array_equal(indices, gt) + + +class TestNNDescentKNNG(unittest.TestCase): + + def test_knng_L2(self): + self.subtest(32, 10, faiss.METRIC_L2) + + def test_knng_IP(self): + self.subtest(32, 10, faiss.METRIC_INNER_PRODUCT) + + def subtest(self, d, K, metric): + metric_names = {faiss.METRIC_L1: 'L1', + faiss.METRIC_L2: 'L2', + faiss.METRIC_INNER_PRODUCT: 'IP'} + + nb = 1000 + _, xb, _ = get_dataset_2(d, 0, nb, 0) + + _, knn = faiss.knn(xb, xb, K + 1, metric) + knn = knn[:, 1:] + + index = faiss.IndexNNDescentFlat(d, K, metric) + index.nndescent.S = 10 + index.nndescent.R = 32 + index.nndescent.L = K + 20 + index.nndescent.iter = 5 + index.verbose = True + + index.add(xb) + graph = index.nndescent.final_graph + graph = faiss.vector_to_array(graph) + graph = graph.reshape(nb, K) + + recalls = 0 + for i in range(nb): + for j in range(K): + for k in range(K): + if graph[i, j] == knn[i, k]: + recalls += 1 + break + recall = 1.0 * recalls / (nb * K) + assert recall > 0.99 + + def test_small_nndescent(self): + """ building a too small graph used to crash, make sure it raises + an exception instead. + TODO: build the exact knn graph for small cases + """ + d = 32 + K = 10 + index = faiss.IndexNNDescentFlat(d, K, faiss.METRIC_L2) + index.nndescent.S = 10 + index.nndescent.R = 32 + index.nndescent.L = K + 20 + index.nndescent.iter = 5 + index.verbose = True + + xb = np.zeros((78, d), dtype='float32') + self.assertRaises(RuntimeError, index.add, xb) diff --git a/tests/test_swig_wrapper.py b/tests/test_swig_wrapper.py new file mode 100644 index 0000000000..ab3dd9ab70 --- /dev/null +++ b/tests/test_swig_wrapper.py @@ -0,0 +1,142 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# a few tests of the swig wrapper + +import unittest +import faiss +import numpy as np + + +class TestSWIGWrap(unittest.TestCase): + """ various regressions with the SWIG wrapper """ + + def test_size_t_ptr(self): + # issue 1064 + index = faiss.IndexHNSWFlat(10, 32) + + hnsw = index.hnsw + index.add(np.random.rand(100, 10).astype('float32')) + be = np.empty(2, 'uint64') + hnsw.neighbor_range(23, 0, faiss.swig_ptr(be), faiss.swig_ptr(be[1:])) + + def test_id_map_at(self): + # issue 1020 + n_features = 100 + feature_dims = 10 + + features = np.random.random((n_features, feature_dims)).astype(np.float32) + idx = np.arange(n_features).astype(np.int64) + + index = faiss.IndexFlatL2(feature_dims) + index = faiss.IndexIDMap2(index) + index.add_with_ids(features, idx) + + [index.id_map.at(int(i)) for i in range(index.ntotal)] + + def test_downcast_Refine(self): + + index = faiss.IndexRefineFlat( + faiss.IndexScalarQuantizer(10, faiss.ScalarQuantizer.QT_8bit) + ) + + # serialize and deserialize + index2 = faiss.deserialize_index( + faiss.serialize_index(index) + ) + + assert isinstance(index2, faiss.IndexRefineFlat) + + def do_test_array_type(self, dtype): + """ tests swig_ptr and rev_swig_ptr for this type of array """ + a = np.arange(12).astype(dtype) + ptr = faiss.swig_ptr(a) + a2 = faiss.rev_swig_ptr(ptr, 12) + np.testing.assert_array_equal(a, a2) + + def test_all_array_types(self): + self.do_test_array_type('float32') + self.do_test_array_type('float64') + self.do_test_array_type('int8') + self.do_test_array_type('uint8') + self.do_test_array_type('int16') + self.do_test_array_type('uint16') + self.do_test_array_type('int32') + self.do_test_array_type('uint32') + self.do_test_array_type('int64') + self.do_test_array_type('uint64') + + def test_int64(self): + # see https://github.com/facebookresearch/faiss/issues/1529 + v = faiss.Int64Vector() + + for i in range(10): + v.push_back(i) + a = faiss.vector_to_array(v) + assert a.dtype == 'int64' + np.testing.assert_array_equal(a, np.arange(10, dtype='int64')) + + # check if it works in an IDMap + idx = faiss.IndexIDMap(faiss.IndexFlatL2(32)) + idx.add_with_ids( + np.random.rand(10, 32).astype('float32'), + np.random.randint(1000, size=10, dtype='int64') + ) + faiss.vector_to_array(idx.id_map) + + def test_asan(self): + # this test should fail with ASAN + index = faiss.IndexFlatL2(32) + index.this.own(False) # this is a mem leak, should be catched by ASAN + + def test_SWIG_version(self): + self.assertLess(faiss.swig_version(), 0x050000) + + +class TestRevSwigPtr(unittest.TestCase): + + def test_rev_swig_ptr(self): + + index = faiss.IndexFlatL2(4) + xb0 = np.vstack([ + i * 10 + np.array([1, 2, 3, 4], dtype='float32') + for i in range(5)]) + index.add(xb0) + xb = faiss.rev_swig_ptr(index.get_xb(), 4 * 5).reshape(5, 4) + self.assertEqual(np.abs(xb0 - xb).sum(), 0) + + +class TestException(unittest.TestCase): + + def test_exception(self): + + index = faiss.IndexFlatL2(10) + + a = np.zeros((5, 10), dtype='float32') + b = np.zeros(5, dtype='int64') + + # an unsupported operation for IndexFlat + self.assertRaises( + RuntimeError, + index.add_with_ids, a, b + ) + # assert 'add_with_ids not implemented' in str(e) + + def test_exception_2(self): + self.assertRaises( + RuntimeError, + faiss.index_factory, 12, 'IVF256,Flat,PQ8' + ) + # assert 'could not parse' in str(e) + + +@unittest.skipIf(faiss.swig_version() < 0x040000, "swig < 4 does not support Doxygen comments") +class TestDoxygen(unittest.TestCase): + + def test_doxygen_comments(self): + maxheap_array = faiss.float_maxheap_array_t() + + self.assertTrue("a template structure for a set of [min|max]-heaps" + in maxheap_array.__doc__)