Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ set(FAISS_SRC
impl/io.cpp
impl/kmeans1d.cpp
impl/lattice_Zn.cpp
impl/mapped_io.cpp
impl/pq4_fast_scan.cpp
impl/pq4_fast_scan_search_1.cpp
impl/pq4_fast_scan_search_qbs.cpp
impl/residual_quantizer_encode_steps.cpp
impl/io.cpp
impl/lattice_Zn.cpp
impl/zerocopy_io.cpp
impl/NNDescent.cpp
invlists/BlockInvertedLists.cpp
invlists/DirectMap.cpp
Expand Down
2 changes: 1 addition & 1 deletion faiss/IndexFlatCodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ CodePacker* IndexFlatCodes::get_CodePacker() const {
}

void IndexFlatCodes::permute_entries(const idx_t* perm) {
std::vector<uint8_t> new_codes(codes.size());
MaybeOwnedVector<uint8_t> new_codes(codes.size());

for (idx_t i = 0; i < ntotal; i++) {
memcpy(new_codes.data() + i * code_size,
Expand Down
6 changes: 4 additions & 2 deletions faiss/IndexFlatCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

#pragma once

#include <vector>

#include <faiss/Index.h>
#include <faiss/impl/DistanceComputer.h>
#include <vector>
#include <faiss/impl/maybe_owned_vector.h>

namespace faiss {

Expand All @@ -21,7 +23,7 @@ struct IndexFlatCodes : Index {
size_t code_size;

/// encoded dataset, size ntotal * code_size
std::vector<uint8_t> codes;
MaybeOwnedVector<uint8_t> codes;

IndexFlatCodes();

Expand Down
2 changes: 1 addition & 1 deletion faiss/impl/HNSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ void HNSW::permute_entries(const idx_t* map) {
// swap everyone
std::swap(levels, new_levels);
std::swap(offsets, new_offsets);
std::swap(neighbors, new_neighbors);
neighbors = std::move(new_neighbors);
}

/**************************************************************
Expand Down
3 changes: 2 additions & 1 deletion faiss/impl/HNSW.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <faiss/Index.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/maybe_owned_vector.h>
#include <faiss/impl/platform_macros.h>
#include <faiss/utils/Heap.h>
#include <faiss/utils/random.h>
Expand Down Expand Up @@ -121,7 +122,7 @@ struct HNSW {

/// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i
/// for all levels. this is where all storage goes.
std::vector<storage_idx_t> neighbors;
MaybeOwnedVector<storage_idx_t> neighbors;

/// entry point in the search structure (one of the points with maximum
/// level
Expand Down
205 changes: 189 additions & 16 deletions faiss/impl/index_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,167 @@
#include <faiss/IndexBinaryHash.h>
#include <faiss/IndexBinaryIVF.h>

// mmap-ing and viewing facilities
#include <faiss/impl/maybe_owned_vector.h>

#include <faiss/impl/mapped_io.h>
#include <faiss/impl/zerocopy_io.h>

namespace faiss {

/*************************************************************
* Mmap-ing and viewing facilities
**************************************************************/

template <typename VectorT>
void read_vector_with_size(VectorT& target, IOReader* f, size_t size) {
ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f);
if (zr != nullptr) {
if constexpr (is_maybe_owned_vector_v<VectorT>) {
// create a view
char* address = nullptr;
size_t nread = zr->get_data_view(
(void**)&address,
sizeof(typename VectorT::value_type),
size);

FAISS_THROW_IF_NOT_FMT(
nread == (size),
"read error in %s: %zd != %zd (%s)",
f->name.c_str(),
nread,
size_t(size),
strerror(errno));

VectorT view = VectorT::create_view(address, nread);
target = std::move(view);

return;
}
}

target.resize(size);
READANDCHECK(target.data(), size);
}

template <typename VectorT>
void read_vector(VectorT& target, IOReader* f) {
// is it a mmap-enabled reader?
MappedFileIOReader* mf = dynamic_cast<MappedFileIOReader*>(f);
if (mf != nullptr) {
// check if the use case is right
if constexpr (is_maybe_owned_vector_v<VectorT>) {
// read the size
size_t size = 0;
READANDCHECK(&size, 1);
// ok, mmap and check
char* address = nullptr;
const size_t nread = mf->mmap(
(void**)&address,
sizeof(typename VectorT::value_type),
size);

FAISS_THROW_IF_NOT_FMT(
nread == (size),
"read error in %s: %zd != %zd (%s)",
f->name.c_str(),
nread,
size,
strerror(errno));

VectorT mmapped_view =
VectorT::create_view(address, nread, mf->mmap_owner);
target = std::move(mmapped_view);

return;
}
}

// is it a zero-copy reader?
ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f);
if (zr != nullptr) {
if constexpr (is_maybe_owned_vector_v<VectorT>) {
// read the size first
size_t size = target.size();
READANDCHECK(&size, 1);

// create a view
char* address = nullptr;
size_t nread = zr->get_data_view(
(void**)&address,
sizeof(typename VectorT::value_type),
size);
VectorT view = VectorT::create_view(address, nread, nullptr);
target = std::move(view);

return;
}
}

// the default case
READVECTOR(target);
}

template <typename VectorT>
void read_xb_vector(VectorT& target, IOReader* f) {
// is it a mmap-enabled reader?
MappedFileIOReader* mf = dynamic_cast<MappedFileIOReader*>(f);
if (mf != nullptr) {
// check if the use case is right
if constexpr (is_maybe_owned_vector_v<VectorT>) {
// read the size
size_t size = 0;
READANDCHECK(&size, 1);

size *= 4;

// ok, mmap and check
char* address = nullptr;
const size_t nread = mf->mmap(
(void**)&address,
sizeof(typename VectorT::value_type),
size);

FAISS_THROW_IF_NOT_FMT(
nread == (size),
"read error in %s: %zd != %zd (%s)",
f->name.c_str(),
nread,
size,
strerror(errno));

VectorT mmapped_view =
VectorT::create_view(address, nread, mf->mmap_owner);
target = std::move(mmapped_view);

return;
}
}

ZeroCopyIOReader* zr = dynamic_cast<ZeroCopyIOReader*>(f);
if (zr != nullptr) {
if constexpr (std::is_same_v<VectorT, MaybeOwnedVector<uint8_t>>) {
// read the size first
size_t size = target.size();
READANDCHECK(&size, 1);

size *= 4;

char* address = nullptr;
size_t nread = zr->get_data_view(
(void**)&address,
sizeof(typename VectorT::value_type),
size);
VectorT view = VectorT::create_view(address, nread, nullptr);
target = std::move(view);
return;
}
}

// the default case
READXBVECTOR(target);
}

/*************************************************************
* Read
**************************************************************/
Expand Down Expand Up @@ -275,7 +434,7 @@ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
aq->search_type == AdditiveQuantizer::ST_norm_cqint4 ||
aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) {
READXBVECTOR(aq->qnorm.codes);
read_xb_vector(aq->qnorm.codes, f);
aq->qnorm.ntotal = aq->qnorm.codes.size() / 4;
aq->qnorm.update_permutation();
}
Expand Down Expand Up @@ -365,7 +524,7 @@ static void read_HNSW(HNSW* hnsw, IOReader* f) {
READVECTOR(hnsw->cum_nneighbor_per_level);
READVECTOR(hnsw->levels);
READVECTOR(hnsw->offsets);
READVECTOR(hnsw->neighbors);
read_vector(hnsw->neighbors, f);

READ1(hnsw->entry_point);
READ1(hnsw->max_level);
Expand Down Expand Up @@ -545,7 +704,7 @@ Index* read_index(IOReader* f, int io_flags) {
}
read_index_header(idxf, f);
idxf->code_size = idxf->d * sizeof(float);
READXBVECTOR(idxf->codes);
read_xb_vector(idxf->codes, f);
FAISS_THROW_IF_NOT(
idxf->codes.size() == idxf->ntotal * idxf->code_size);
// leak!
Expand Down Expand Up @@ -576,7 +735,7 @@ Index* read_index(IOReader* f, int io_flags) {
idxl->rrot = *rrot;
delete rrot;
}
READVECTOR(idxl->codes);
read_vector(idxl->codes, f);
FAISS_THROW_IF_NOT(
idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits);
FAISS_THROW_IF_NOT(
Expand All @@ -589,7 +748,7 @@ Index* read_index(IOReader* f, int io_flags) {
read_index_header(idxp, f);
read_ProductQuantizer(&idxp->pq, f);
idxp->code_size = idxp->pq.code_size;
READVECTOR(idxp->codes);
read_vector(idxp->codes, f);
if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
READ1(idxp->search_type);
READ1(idxp->encode_signs);
Expand All @@ -611,28 +770,28 @@ Index* read_index(IOReader* f, int io_flags) {
read_ResidualQuantizer(&idxr->rq, f, io_flags);
}
READ1(idxr->code_size);
READVECTOR(idxr->codes);
read_vector(idxr->codes, f);
idx = idxr;
} else if (h == fourcc("IxLS")) {
auto idxr = new IndexLocalSearchQuantizer();
read_index_header(idxr, f);
read_LocalSearchQuantizer(&idxr->lsq, f);
READ1(idxr->code_size);
READVECTOR(idxr->codes);
read_vector(idxr->codes, f);
idx = idxr;
} else if (h == fourcc("IxPR")) {
auto idxpr = new IndexProductResidualQuantizer();
read_index_header(idxpr, f);
read_ProductResidualQuantizer(&idxpr->prq, f, io_flags);
READ1(idxpr->code_size);
READVECTOR(idxpr->codes);
read_vector(idxpr->codes, f);
idx = idxpr;
} else if (h == fourcc("IxPL")) {
auto idxpl = new IndexProductLocalSearchQuantizer();
read_index_header(idxpl, f);
read_ProductLocalSearchQuantizer(&idxpl->plsq, f);
READ1(idxpl->code_size);
READVECTOR(idxpl->codes);
read_vector(idxpl->codes, f);
idx = idxpl;
} else if (h == fourcc("ImRQ")) {
ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
Expand Down Expand Up @@ -789,7 +948,7 @@ Index* read_index(IOReader* f, int io_flags) {
IndexScalarQuantizer* idxs = new IndexScalarQuantizer();
read_index_header(idxs, f);
read_ScalarQuantizer(&idxs->sq, f);
READVECTOR(idxs->codes);
read_vector(idxs->codes, f);
idxs->code_size = idxs->sq.code_size;
idx = idxs;
} else if (h == fourcc("IxLa")) {
Expand Down Expand Up @@ -947,7 +1106,7 @@ Index* read_index(IOReader* f, int io_flags) {
READ1(idxp->code_size_1);
READ1(idxp->code_size_2);
READ1(idxp->code_size);
READVECTOR(idxp->codes);
read_vector(idxp->codes, f);
idx = idxp;
} else if (
h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
Expand Down Expand Up @@ -1071,14 +1230,28 @@ Index* read_index(IOReader* f, int io_flags) {
}

Index* read_index(FILE* f, int io_flags) {
FileIOReader reader(f);
return read_index(&reader, io_flags);
if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) {
// enable mmap-supporting IOReader
auto owner = std::make_shared<MmappedFileMappingOwner>(f);
MappedFileIOReader reader(owner);
return read_index(&reader, io_flags);
} else {
FileIOReader reader(f);
return read_index(&reader, io_flags);
}
}

Index* read_index(const char* fname, int io_flags) {
FileIOReader reader(fname);
Index* idx = read_index(&reader, io_flags);
return idx;
if ((io_flags & IO_FLAG_MMAP_IFC) == IO_FLAG_MMAP_IFC) {
// enable mmap-supporting IOReader
auto owner = std::make_shared<MmappedFileMappingOwner>(fname);
MappedFileIOReader reader(owner);
return read_index(&reader, io_flags);
} else {
FileIOReader reader(fname);
Index* idx = read_index(&reader, io_flags);
return idx;
}
}

VectorTransform* read_VectorTransform(const char* fname) {
Expand Down
4 changes: 2 additions & 2 deletions faiss/impl/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

#pragma once

#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <string>
#include <vector>

#include <faiss/Index.h>

namespace faiss {

struct IOReader {
Expand Down
Loading
Loading