diff --git a/include/rocksdb/utilities/secondary_index.h b/include/rocksdb/utilities/secondary_index.h index c6734cfc872..0e5659d3f24 100644 --- a/include/rocksdb/utilities/secondary_index.h +++ b/include/rocksdb/utilities/secondary_index.h @@ -6,10 +6,13 @@ #pragma once +#include #include #include #include +#include "rocksdb/iterator.h" +#include "rocksdb/options.h" #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" @@ -96,6 +99,32 @@ class SecondaryIndex { const Slice& primary_column_value, const Slice& previous_column_value, std::optional>* secondary_value) const = 0; + + // Create an iterator that can be used by applications to query the index. + // This method takes a ReadOptions structure, which can be used by + // applications to provide (implementation-specific) query parameters to the + // index as well as an underlying iterator over the index's secondary column + // family, which the returned iterator is expected to take ownership of and + // use to read the actual secondary index entries. (Providing the underlying + // iterator this way enables querying the index as of a specific point in time + // for example.) + // + // Querying the index can be performed by calling the returned iterator's + // Seek API with a search target, and then using Next (and potentially + // Prev) to iterate through the matching index entries. SeekToFirst, + // SeekToLast, and SeekForPrev are not expected to be supported by the + // iterator. The iterator should expose primary keys, that is, the secondary + // key prefix should be stripped from the index entries. + // + // The exact semantics of the returned iterator depend on the index and are + // implementation-specific. For simple indices, the search target might be a + // primary column value, and the iterator might return all primary keys that + // have the given column value; however, other semantics are also possible. + // For vector indices, the search target might be a vector, and the iterator + // might return similar vectors from the index. + virtual std::unique_ptr NewIterator( + const ReadOptions& read_options, + std::unique_ptr&& underlying_it) const = 0; }; } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/secondary_index/faiss_ivf_index.cc b/utilities/secondary_index/faiss_ivf_index.cc index 0ad11411951..7aa65d21204 100644 --- a/utilities/secondary_index/faiss_ivf_index.cc +++ b/utilities/secondary_index/faiss_ivf_index.cc @@ -210,4 +210,11 @@ Status FaissIVFIndex::GetSecondaryValue( return Status::OK(); } +std::unique_ptr FaissIVFIndex::NewIterator( + const ReadOptions& /* read_options */, + std::unique_ptr&& /* underlying_it */) const { + // TODO: implement this + return std::unique_ptr(NewErrorIterator(Status::NotSupported())); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/secondary_index/faiss_ivf_index.h b/utilities/secondary_index/faiss_ivf_index.h index 78463c22cd4..b226503adea 100644 --- a/utilities/secondary_index/faiss_ivf_index.h +++ b/utilities/secondary_index/faiss_ivf_index.h @@ -43,6 +43,10 @@ class FaissIVFIndex : public SecondaryIndex { std::optional>* secondary_value) const override; + std::unique_ptr NewIterator( + const ReadOptions& read_options, + std::unique_ptr&& underlying_it) const override; + private: class Adapter; diff --git a/utilities/secondary_index/secondary_index_iterator.h b/utilities/secondary_index/secondary_index_iterator.h new file mode 100644 index 00000000000..048cd63f3e3 --- /dev/null +++ b/utilities/secondary_index/secondary_index_iterator.h @@ -0,0 +1,137 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include + +#include "rocksdb/iterator.h" +#include "rocksdb/status.h" +#include "rocksdb/utilities/secondary_index.h" +#include "util/overload.h" + +namespace ROCKSDB_NAMESPACE { + +// A simple iterator that can be used to query a secondary index (that is, find +// the primary keys for a given search target). Can be used as-is or as a +// building block for more complex iterators. +class SecondaryIndexIterator : public Iterator { + public: + SecondaryIndexIterator(const SecondaryIndex* index, + std::unique_ptr&& underlying_it) + : index_(index), underlying_it_(std::move(underlying_it)) { + assert(index_); + assert(underlying_it_); + } + + bool Valid() const override { + return status_.ok() && underlying_it_->Valid() && + underlying_it_->key().starts_with(prefix_); + } + + void SeekToFirst() override { + status_ = Status::NotSupported( + "SeekToFirst is not supported for secondary index iterators"); + } + + void SeekToLast() override { + status_ = Status::NotSupported( + "SeekToLast is not supported for secondary index iterators"); + } + + void Seek(const Slice& target) override { + status_ = Status::OK(); + + std::variant prefix; + + const Status s = index_->GetSecondaryKeyPrefix(target, &prefix); + if (!s.ok()) { + status_ = s; + return; + } + + prefix_ = std::visit( + overload{ + [](const Slice& value) -> std::string { return value.ToString(); }, + [](const std::string& value) -> std::string { return value; }}, + prefix); + + // FIXME: this works for BytewiseComparator but not for all comparators in + // general + underlying_it_->Seek(prefix_); + } + + void SeekForPrev(const Slice& /* target */) override { + status_ = Status::NotSupported( + "SeekForPrev is not supported for secondary index iterators"); + } + + void Next() override { + assert(Valid()); + + underlying_it_->Next(); + } + + void Prev() override { + assert(Valid()); + + underlying_it_->Prev(); + } + + bool PrepareValue() override { + assert(Valid()); + + return underlying_it_->PrepareValue(); + } + + Status status() const override { + if (!status_.ok()) { + return status_; + } + + return underlying_it_->status(); + } + + Slice key() const override { + assert(Valid()); + + Slice key = underlying_it_->key(); + key.remove_prefix(prefix_.size()); + + return key; + } + + Slice value() const override { + assert(Valid()); + + return underlying_it_->value(); + } + + const WideColumns& columns() const override { + assert(Valid()); + + return underlying_it_->columns(); + } + + Slice timestamp() const override { + assert(Valid()); + + return underlying_it_->timestamp(); + } + + Status GetProperty(std::string prop_name, std::string* prop) override { + return underlying_it_->GetProperty(std::move(prop_name), prop); + } + + private: + const SecondaryIndex* index_; + std::unique_ptr underlying_it_; + Status status_; + std::string prefix_; +}; + +} // namespace ROCKSDB_NAMESPACE diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index fd1715ea0f6..21d95beb609 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -29,7 +29,7 @@ #include "util/random.h" #include "util/string_util.h" #include "utilities/merge_operators.h" -#include "utilities/merge_operators/string_append/stringappend.h" +#include "utilities/secondary_index/secondary_index_iterator.h" #include "utilities/transactions/pessimistic_transaction_db.h" namespace ROCKSDB_NAMESPACE { @@ -8083,6 +8083,13 @@ TEST_P(TransactionTest, SecondaryIndex) { return Status::OK(); } + std::unique_ptr NewIterator( + const ReadOptions& /* read_options */, + std::unique_ptr&& underlying_it) const override { + return std::make_unique(this, + std::move(underlying_it)); + } + private: ColumnFamilyHandle* primary_cfh_{}; ColumnFamilyHandle* secondary_cfh_{}; @@ -8181,6 +8188,7 @@ TEST_P(TransactionTest, SecondaryIndex) { } { + // Read the raw secondary index entries from CF2 std::unique_ptr it(db->NewIterator(ReadOptions(), cfh2)); it->SeekToFirst(); @@ -8198,6 +8206,58 @@ TEST_P(TransactionTest, SecondaryIndex) { ASSERT_OK(it->status()); } + { + // Query the secondary index + std::unique_ptr underlying_it( + db->NewIterator(ReadOptions(), cfh2)); + std::unique_ptr it( + index->NewIterator(ReadOptions(), std::move(underlying_it))); + + it->SeekToFirst(); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->SeekToLast(); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->SeekForPrev("box"); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->Seek("box"); // last character used for indexing: x + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key3"); + ASSERT_EQ(it->value(), "zab"); + + it->Next(); + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key4"); + ASSERT_EQ(it->value(), "xuuq"); + + it->Prev(); + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key3"); + ASSERT_EQ(it->value(), "zab"); + + it->Next(); + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key4"); + ASSERT_EQ(it->value(), "xuuq"); + + it->Next(); + ASSERT_FALSE(it->Valid()); + ASSERT_OK(it->status()); + + it->Seek("toy"); // last character used for indexing: y + ASSERT_FALSE(it->Valid()); + ASSERT_OK(it->status()); + } + // Make some updates to the key-values indexed above through the database // interface (i.e. using implicit transactions) @@ -8256,6 +8316,7 @@ TEST_P(TransactionTest, SecondaryIndex) { } { + // Read the raw secondary index entries from CF2 std::unique_ptr it(db->NewIterator(ReadOptions(), cfh2)); it->SeekToFirst(); @@ -8272,6 +8333,46 @@ TEST_P(TransactionTest, SecondaryIndex) { ASSERT_FALSE(it->Valid()); ASSERT_OK(it->status()); } + + { + // Query the secondary index + std::unique_ptr underlying_it( + db->NewIterator(ReadOptions(), cfh2)); + std::unique_ptr it( + index->NewIterator(ReadOptions(), std::move(underlying_it))); + + it->SeekToFirst(); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->SeekToLast(); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->SeekForPrev("bot"); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->Seek("bot"); // last character used for indexing: t + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key1"); + ASSERT_EQ(it->value(), "tluarg"); + + it->Next(); + ASSERT_FALSE(it->Valid()); + ASSERT_OK(it->status()); + + it->Seek("toy"); // last character used for indexing: y + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key3"); + ASSERT_EQ(it->value(), "ylprag"); + + it->Next(); + ASSERT_FALSE(it->Valid()); + ASSERT_OK(it->status()); + } } TEST_F(TransactionDBTest, CollapseKey) {