-
Notifications
You must be signed in to change notification settings - Fork 4.3k
add skip_storage flag to HNSW #3487
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,8 +5,6 @@ | |
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| // -*- c++ -*- | ||
|
|
||
| #include <faiss/index_io.h> | ||
|
|
||
| #include <faiss/impl/io_macros.h> | ||
|
|
@@ -531,7 +529,11 @@ Index* read_index(IOReader* f, int io_flags) { | |
| Index* idx = nullptr; | ||
| uint32_t h; | ||
| READ1(h); | ||
| if (h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) { | ||
| if (h == fourcc("null")) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When do we see the caller provide null ? |
||
| // denotes a missing index, useful for some cases | ||
| return nullptr; | ||
| } else if ( | ||
| h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) { | ||
| IndexFlat* idxf; | ||
| if (h == fourcc("IxFI")) { | ||
| idxf = new IndexFlatIP(); | ||
|
|
@@ -961,7 +963,7 @@ Index* read_index(IOReader* f, int io_flags) { | |
| read_index_header(idxhnsw, f); | ||
| read_HNSW(&idxhnsw->hnsw, f); | ||
| idxhnsw->storage = read_index(f, io_flags); | ||
| idxhnsw->own_fields = true; | ||
| idxhnsw->own_fields = idxhnsw->storage != nullptr; | ||
| if (h == fourcc("IHNp") && !(io_flags & IO_FLAG_PQ_SKIP_SDC_TABLE)) { | ||
| dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,8 +5,6 @@ | |
| * LICENSE file in the root directory of this source tree. | ||
| */ | ||
|
|
||
| // -*- c++ -*- | ||
|
|
||
| #include <faiss/index_io.h> | ||
|
|
||
| #include <faiss/impl/io.h> | ||
|
|
@@ -390,8 +388,12 @@ static void write_ivf_header(const IndexIVF* ivf, IOWriter* f) { | |
| write_direct_map(&ivf->direct_map, f); | ||
| } | ||
|
|
||
| void write_index(const Index* idx, IOWriter* f) { | ||
| if (const IndexFlat* idxf = dynamic_cast<const IndexFlat*>(idx)) { | ||
| void write_index(const Index* idx, IOWriter* f, int io_flags) { | ||
| if (idx == nullptr) { | ||
| // eg. for a storage component of HNSW that is set to nullptr | ||
| uint32_t h = fourcc("null"); | ||
| WRITE1(h); | ||
| } else if (const IndexFlat* idxf = dynamic_cast<const IndexFlat*>(idx)) { | ||
| uint32_t h = | ||
| fourcc(idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" | ||
| : idxf->metric_type == METRIC_L2 ? "IxF2" | ||
|
|
@@ -765,7 +767,12 @@ void write_index(const Index* idx, IOWriter* f) { | |
| WRITE1(h); | ||
| write_index_header(idxhnsw, f); | ||
| write_HNSW(&idxhnsw->hnsw, f); | ||
| write_index(idxhnsw->storage, f); | ||
| if (io_flags & IO_FLAG_SKIP_STORAGE) { | ||
| uint32_t n4 = fourcc("null"); | ||
| WRITE1(n4); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it gonna to output in stdout? |
||
| } else { | ||
| write_index(idxhnsw->storage, f); | ||
| } | ||
| } else if (const IndexNSG* idxnsg = dynamic_cast<const IndexNSG*>(idx)) { | ||
| uint32_t h = dynamic_cast<const IndexNSGFlat*>(idx) ? fourcc("INSf") | ||
| : dynamic_cast<const IndexNSGPQ*>(idx) ? fourcc("INSp") | ||
|
|
@@ -841,14 +848,14 @@ void write_index(const Index* idx, IOWriter* f) { | |
| } | ||
| } | ||
|
|
||
| void write_index(const Index* idx, FILE* f) { | ||
| void write_index(const Index* idx, FILE* f, int io_flags) { | ||
| FileIOWriter writer(f); | ||
| write_index(idx, &writer); | ||
| write_index(idx, &writer, io_flags); | ||
| } | ||
|
|
||
| void write_index(const Index* idx, const char* fname) { | ||
| void write_index(const Index* idx, const char* fname, int io_flags) { | ||
| FileIOWriter writer(fname); | ||
| write_index(idx, &writer); | ||
| write_index(idx, &writer, io_flags); | ||
| } | ||
|
|
||
| void write_VectorTransform(const VectorTransform* vt, const char* fname) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -133,6 +133,42 @@ def test_ndis_stats(self): | |
| Dhnsw, Ihnsw = index.search(self.xq, 1) | ||
| self.assertGreater(stats.ndis, len(self.xq) * index.hnsw.efSearch) | ||
|
|
||
| def test_io_no_storage(self): | ||
| d = self.xq.shape[1] | ||
| index = faiss.IndexHNSWFlat(d, 16) | ||
| index.add(self.xb) | ||
|
|
||
| Dref, Iref = index.search(self.xq, 5) | ||
|
|
||
| # test writing without storage | ||
| index2 = faiss.deserialize_index( | ||
| faiss.serialize_index(index, faiss.IO_FLAG_SKIP_STORAGE) | ||
| ) | ||
| self.assertEquals(index2.storage, None) | ||
| self.assertRaises( | ||
| RuntimeError, | ||
| index2.search, self.xb, 1) | ||
|
|
||
| # make sure we can store an index with empty storage | ||
| index4 = faiss.deserialize_index( | ||
| faiss.serialize_index(index2)) | ||
|
|
||
| # add storage afterwards | ||
| index.storage = faiss.clone_index(index.storage) | ||
| index.own_fields = True | ||
|
|
||
| Dnew, Inew = index.search(self.xq, 5) | ||
| np.testing.assert_array_equal(Dnew, Dref) | ||
| np.testing.assert_array_equal(Inew, Iref) | ||
|
|
||
| if False: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. where do we define the False? |
||
| # test reading without storage | ||
| # not implemented because it is hard to skip over an index | ||
| index3 = faiss.deserialize_index( | ||
| faiss.serialize_index(index), faiss.IO_FLAG_SKIP_STORAGE | ||
| ) | ||
| self.assertEquals(index3.storage, None) | ||
|
|
||
|
|
||
| class TestNSG(unittest.TestCase): | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When do we prefer to using IndexHNSW and when should we use IndexHNSWFlat?