Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log posting list reads. #32950

Merged
merged 2 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
LOG_SETUP(".diskindex.bitvectordictionary");

using search::index::BitVectorDictionaryLookupResult;
using search::index::PostingListFileRange;

namespace search::diskindex {

Expand Down Expand Up @@ -117,4 +118,14 @@ BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_resul
return read_bitvector(lookup_result, read_stats);
}

PostingListFileRange
BitVectorDictionary::get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const
{
if (!lookup_result.valid()) {
return {0, 0};
}
uint64_t offset = ((uint64_t) _vectorSize) * lookup_result.idx + _datHeaderLen;
return {offset, offset + _vectorSize};
}

}
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "bitvectorkeyscope.h"
#include <vespa/searchlib/index/bitvector_dictionary_lookup_result.h>
#include <vespa/searchlib/index/posting_list_file_range.h>
#include <vespa/searchlib/index/bitvectorkeys.h>
#include <vespa/searchlib/common/tunefileinfo.h>
#include <string>
Expand Down Expand Up @@ -70,6 +71,7 @@ class BitVectorDictionary
std::unique_ptr<BitVector> read_bitvector(index::BitVectorDictionaryLookupResult lookup_result,
ReadStats &read_stats);
std::unique_ptr<BitVector> read_bitvector(index::BitVectorDictionaryLookupResult lookup_result);
index::PostingListFileRange get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const;

uint32_t getDocIdLimit() const noexcept { return _docIdLimit; }

Expand Down
34 changes: 34 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,46 @@ DiskTermBlueprint::DiskTermBlueprint(const FieldSpec & field,
_lookupRes.counts._numDocs == 0));
}

void
DiskTermBlueprint::log_bitvector_read() const
{
auto range = _field_index.get_bitvector_file_range(_bitvector_lookup_result);
LOG(debug, "DiskTermBlueprint::fetchPosting "
"bitvector %s %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu32 " %" PRIu64 " %" PRIu64,
_field.getName().c_str(), _query_term.c_str(), _field_index.get_file_id(),
_lookupRes.wordNum, _lookupRes.counts._numDocs,
_bitvector_lookup_result.idx,
range.start_offset, range.size());

}

void
DiskTermBlueprint::log_posting_list_read() const
{
auto range = _field_index.get_posting_list_file_range(_lookupRes);
LOG(debug, "DiskTermBlueprint::fetchPosting "
"posting %s %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64,
_field.getName().c_str(), _query_term.c_str(), _field_index.get_file_id(),
_lookupRes.wordNum, _lookupRes.counts._numDocs,
_lookupRes.bitOffset, _lookupRes.counts._bitLength,
range.start_offset, range.size());
}

void
DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo)
{
(void) execInfo;
if (!_fetchPostingsDone) {
if (_useBitVector && _bitvector_lookup_result.valid()) {
if (LOG_WOULD_LOG(debug)) {
log_bitvector_read();
}
_bitVector = _field_index.read_bit_vector(_bitvector_lookup_result);
}
if (!_bitVector) {
if (LOG_WOULD_LOG(debug)) {
log_posting_list_read();
}
_postingHandle = _field_index.read_posting_list(_lookupRes);
}
}
Expand All @@ -90,6 +121,9 @@ DiskTermBlueprint::get_bitvector() const
}
std::lock_guard guard(_mutex);
if (!_late_bitvector) {
if (LOG_WOULD_LOG(debug)) {
log_bitvector_read();
}
_late_bitvector = _field_index.read_bit_vector(_bitvector_lookup_result);
assert(_late_bitvector);
}
Expand Down
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint
mutable std::shared_ptr<BitVector> _late_bitvector;

const BitVector* get_bitvector() const;
void log_bitvector_read() const;
void log_posting_list_read() const;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe these should be tagged as __attribute__((noinline)) to keep them off the hot path?

public:
/**
* Create a new blueprint.
Expand Down
8 changes: 8 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/field_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
using DiskPostingFile = index::PostingListFileRandRead;
using DiskPostingFileReal = Zc4PosOccRandRead;
using DiskPostingFileDynamicKReal = ZcPosOccRandRead;
using PostingListFileRange = index::PostingListFileRange;

class LockedFieldIndexIoStats {
FieldIndexIoStats _stats;
Expand Down Expand Up @@ -76,17 +77,24 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
bool trim) const;
index::PostingListHandle read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const override;
index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const;
PostingListFileRange get_posting_list_file_range(const search::index::DictionaryLookupResult& lookup_result) const {
return _posting_file->get_posting_list_file_range(lookup_result);
}
index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const;
std::shared_ptr<BitVector> read_uncached_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const;
std::shared_ptr<BitVector> read(const IPostingListCache::BitVectorKey& key, IPostingListCache::Context& ctx) const override;
std::shared_ptr<BitVector> read_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const;
PostingListFileRange get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const {
return _bit_vector_dict->get_bitvector_file_range(lookup_result);
}
std::unique_ptr<search::queryeval::SearchIterator> create_iterator(const search::index::DictionaryLookupResult& lookup_result,
const index::PostingListHandle& handle,
const search::fef::TermFieldMatchDataArray& tfmda) const;
index::FieldLengthInfo get_field_length_info() const;

index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); }
FieldIndexStats get_stats(bool clear_disk_io_stats) const;
uint64_t get_file_id() const noexcept { return _file_id; }
uint32_t get_field_id() const noexcept { return _field_id; }
bool is_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; }
};
Expand Down
59 changes: 34 additions & 25 deletions searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ using search::bitcompression::EGPosOccDecodeContext;
using search::bitcompression::EGPosOccDecodeContextCooked;
using search::bitcompression::PosOccFieldsParams;
using search::bitcompression::FeatureDecodeContext;
using search::index::DictionaryLookupResult;
using search::index::FieldLengthInfo;
using search::index::PostingListCounts;
using search::index::PostingListFileRange;
using search::index::PostingListHandle;
using search::ComprFileReadContext;

Expand All @@ -31,6 +33,17 @@ std::string myId4("Zc.4");
std::string myId5("Zc.5");
std::string interleaved_features("interleaved_features");

PostingListFileRange get_file_range(const DictionaryLookupResult& lookup_result, uint64_t header_bit_size)
{
uint64_t start_offset = (lookup_result.bitOffset + header_bit_size) >> 3;
// Align start at 64-bit boundary
start_offset -= (start_offset & 7);
uint64_t end_offset = (lookup_result.bitOffset + header_bit_size + lookup_result.counts._bitLength + 7) >> 3;
// Align end at 64-bit boundary
end_offset += (-end_offset & 7);
return {start_offset, end_offset};
}

}

namespace search::diskindex {
Expand Down Expand Up @@ -91,25 +104,18 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
return handle;
}

uint64_t startOffset = (lookup_result.bitOffset + _headerBitSize) >> 3;
// Align start at 64-bit boundary
startOffset -= (startOffset & 7);
uint64_t endOffset = (lookup_result.bitOffset + _headerBitSize +
lookup_result.counts._bitLength + 7) >> 3;
// Align end at 64-bit boundary
endOffset += (-endOffset & 7);

void *mapPtr = _file->MemoryMapPtr(startOffset);
auto file_range = get_file_range(lookup_result, _headerBitSize);
void *mapPtr = _file->MemoryMapPtr(file_range.start_offset);
if (mapPtr != nullptr) {
handle._mem = mapPtr;
size_t pad_before = startOffset - vespalib::round_down_to_page_boundary(startOffset);
handle._read_bytes = vespalib::round_up_to_page_size(pad_before + endOffset - startOffset + decode_prefetch_size);
size_t pad_before = file_range.start_offset - vespalib::round_down_to_page_boundary(file_range.start_offset);
handle._read_bytes = vespalib::round_up_to_page_size(pad_before + file_range.size() + decode_prefetch_size);
} else {
uint64_t vectorLen = endOffset - startOffset;
uint64_t vectorLen = file_range.size();
size_t padBefore;
size_t padAfter;
size_t padExtraAfter; // Decode prefetch space
_file->DirectIOPadding(startOffset, vectorLen, padBefore, padAfter);
_file->DirectIOPadding(file_range.start_offset, vectorLen, padBefore, padAfter);
padExtraAfter = 0;
if (padAfter < decode_prefetch_size) {
padExtraAfter = decode_prefetch_size - padAfter;
Expand All @@ -120,10 +126,10 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
if (mallocLen > 0) {
alignedBuffer = _file->AllocateDirectIOBuffer(mallocLen);
assert(alignedBuffer != nullptr);
assert(endOffset + padAfter + padExtraAfter <= _fileSize);
assert(file_range.end_offset + padAfter + padExtraAfter <= _fileSize);
_file->ReadBuf(alignedBuffer,
padBefore + vectorLen + padAfter,
startOffset - padBefore);
file_range.start_offset - padBefore);
}
// Zero decode prefetch memory to avoid uninitialized reads
if (padExtraAfter > 0) {
Expand All @@ -136,7 +142,7 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
handle._allocSize = mallocLen;
handle._read_bytes = padBefore + vectorLen + padAfter;
}
handle._bitOffsetMem = (startOffset << 3) - _headerBitSize;
handle._bitOffsetMem = (file_range.start_offset << 3) - _headerBitSize;
return handle;
}

Expand All @@ -147,14 +153,8 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &looku
if (lookup_result.counts._bitLength == 0 || _memoryMapped) {
return;
}
uint64_t start_offset = (lookup_result.bitOffset + _headerBitSize) >> 3;
// Align start at 64-bit boundary
start_offset -= (start_offset & 7);
uint64_t end_offset = (lookup_result.bitOffset + _headerBitSize +
lookup_result.counts._bitLength + 7) >> 3;
// Align end at 64-bit boundary
end_offset += (-end_offset & 7);
size_t malloc_len = end_offset - start_offset + decode_prefetch_size;
auto file_range = get_file_range(lookup_result, _headerBitSize);
size_t malloc_len = file_range.size() + decode_prefetch_size;
if (handle._allocSize == malloc_len) {
assert(handle._allocMem.get() == handle._mem);
return;
Expand All @@ -169,7 +169,16 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &looku
handle._allocMem = std::shared_ptr<void>(mem, free);
handle._mem = mem;
handle._allocSize = malloc_len;
handle._read_bytes = end_offset - start_offset;
handle._read_bytes = file_range.size();
}

PostingListFileRange
ZcPosOccRandRead::get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const
{
if (lookup_result.counts._bitLength == 0) {
return {0, 0};
}
return get_file_range(lookup_result, _headerBitSize);
}

bool
Expand Down
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead

using DictionaryLookupResult = index::DictionaryLookupResult;
using PostingListCounts = index::PostingListCounts;
using PostingListFileRange = index::PostingListFileRange;
using PostingListHandle = index::PostingListHandle;

/**
Expand All @@ -46,6 +47,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead
PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override;
void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const override;
PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const override;

bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override;
bool close() override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace search::index {
class BitVectorDictionaryLookupResult {
public:
static constexpr uint32_t invalid = std::numeric_limits<uint32_t>::max();
uint64_t idx;
uint32_t idx;

explicit BitVectorDictionaryLookupResult(uint32_t idx_in) noexcept
: idx(idx_in)
Expand Down
24 changes: 24 additions & 0 deletions searchlib/src/vespa/searchlib/index/posting_list_file_range.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <cstdint>

namespace search::index {

/*
* Range of a posting list file used for posting list. Might include padding
* at start and end due to file format. Offsets are in bytes.
*/
struct PostingListFileRange {
uint64_t start_offset;
uint64_t end_offset;

PostingListFileRange(uint64_t start_offset_in, uint64_t end_offset_in)
: start_offset(start_offset_in),
end_offset(end_offset_in)
{
}
uint64_t size() const noexcept { return end_offset - start_offset; }
};

}
6 changes: 6 additions & 0 deletions searchlib/src/vespa/searchlib/index/postinglistfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ PostingListFileRandReadPassThrough::consider_trim_posting_list(const DictionaryL
return _lower->consider_trim_posting_list(lookup_result, handle, bloat_factor);
}

PostingListFileRange
PostingListFileRandReadPassThrough::get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const
{
return _lower->get_posting_list_file_range(lookup_result);
}

bool
PostingListFileRandReadPassThrough::open(const std::string &name,
const TuneFileRandRead &tuneFileRead)
Expand Down
4 changes: 4 additions & 0 deletions searchlib/src/vespa/searchlib/index/postinglistfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#pragma once

#include "dictionary_lookup_result.h"
#include "posting_list_file_range.h"
#include "postinglistcounts.h"
#include "postinglisthandle.h"
#include <vespa/searchlib/common/tunefileinfo.h>
Expand Down Expand Up @@ -167,6 +168,8 @@ class PostingListFileRandRead {
virtual void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const = 0;

virtual PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const = 0;

/**
* Open posting list file for random read.
*/
Expand Down Expand Up @@ -206,6 +209,7 @@ class PostingListFileRandReadPassThrough : public PostingListFileRandRead {
PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override;
void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const override;
PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const override;

bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override;
bool close() override;
Expand Down