From c6dbbcbf277ac35f398637980bb57398a4434dbc Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Sun, 20 Aug 2023 11:25:18 +0200 Subject: [PATCH] implemented better analyzer rewinding --- columnar/accessor/accessor.h | 11 ++++++++ columnar/accessor/accessortraits.h | 31 ++++++---------------- columnar/columnar.cpp | 42 +++++++++--------------------- 3 files changed, 32 insertions(+), 52 deletions(-) diff --git a/columnar/accessor/accessor.h b/columnar/accessor/accessor.h index 96e754b2..eff126c0 100644 --- a/columnar/accessor/accessor.h +++ b/columnar/accessor/accessor.h @@ -32,12 +32,23 @@ class MatchingBlocks_c FORCE_INLINE void Add ( int iBlock ) { m_dBlocks.push_back(iBlock); } FORCE_INLINE int GetBlock ( int iBlock ) const { return m_dBlocks[iBlock]; } FORCE_INLINE int GetNumBlocks() const { return (int)m_dBlocks.size(); } + FORCE_INLINE int Find ( int iStartBlock, int iValue ); private: std::vector m_dBlocks; }; +int MatchingBlocks_c::Find ( int iStartBlock, int iValue ) +{ + auto tFound = std::lower_bound ( m_dBlocks.begin()+iStartBlock, m_dBlocks.end(), iValue ); + if ( tFound==m_dBlocks.end() ) + return (int)m_dBlocks.size(); + + return tFound-m_dBlocks.begin(); +} + + using SharedBlocks_c = std::shared_ptr; class Analyzer_i : public common::BlockIterator_i diff --git a/columnar/accessor/accessortraits.h b/columnar/accessor/accessortraits.h index f72b16d7..b6545982 100644 --- a/columnar/accessor/accessortraits.h +++ b/columnar/accessor/accessortraits.h @@ -198,32 +198,17 @@ bool Analyzer_T::MoveToSubblock ( int iSubblock ) template bool Analyzer_T::HintRowID ( uint32_t tRowID ) { - int iNextSubblock = m_iCurSubblock; + int iNextSubblock = m_tSubblockCalc.GetSubblockId(tRowID); + if constexpr ( HAVE_MATCHING_BLOCKS ) + iNextSubblock = m_pMatchingSubblocks->Find ( m_iCurSubblock, iNextSubblock ); - // we assume that we are only advancing forward - while ( iNextSubblockGetBlock(iNextSubblock); - else - iSubblockID = iNextSubblock; - - uint32_t tSubblockStart = m_tSubblockCalc.SubblockId2RowId(iSubblockID); - uint32_t tSubblockEnd = tSubblockStart + m_tSubblockCalc.m_iSubblockSize; - - if ( tRowID=tSubblockStart && tRowID=m_iTotalSubblocks ) + return false; - iNextSubblock++; - } + if ( iNextSubblock>m_iCurSubblock ) + return MoveToSubblock(iNextSubblock); - return false; + return true; } template diff --git a/columnar/columnar.cpp b/columnar/columnar.cpp index dfd05d6e..e7ed8acf 100644 --- a/columnar/columnar.cpp +++ b/columnar/columnar.cpp @@ -230,7 +230,7 @@ class BlockIterator_c : public BlockIterator_i bool WasCutoffHit() const final { return false; } private: - static const int MAX_COLLECTED = 128; + static const int MAX_COLLECTED = 1024; std::shared_ptr m_pMatchingBlocks; std::array m_dCollected; @@ -250,9 +250,10 @@ class BlockIterator_c : public BlockIterator_i int m_iMinMaxLeafShift = 0; int m_iNumLevels = 0; - inline bool SetCurBlock ( int iBlock ); - FORCE_INLINE int GetNumDocs ( int iBlock ) const; - FORCE_INLINE uint32_t MinMaxBlockId2RowId ( int iBlockId ) const; + FORCE_INLINE bool SetCurBlock ( int iBlock ); + FORCE_INLINE int GetNumDocs ( int iBlock ) const; + FORCE_INLINE uint32_t MinMaxBlockId2RowId ( int iBlockId ) const { return iBlockId<>m_iMinMaxLeafShift; } }; @@ -284,35 +285,24 @@ bool BlockIterator_c::Setup ( const std::vector & dHeaders, return true; } + void BlockIterator_c::AddDesc ( std::vector & dDesc ) const { for ( const auto & i : m_dAttrs ) dDesc.push_back ( { i, "prefilter" } ); } + bool BlockIterator_c::HintRowID ( uint32_t tRowID ) { - int iNextBlock = m_iBlock; - int iNumBlocks = m_pMatchingBlocks->GetNumBlocks(); - - // we assume that we are only advancing forward - while ( iNextBlockGetBlock(iNextBlock) ); - uint32_t tSubblockEnd = tSubblockStart + m_iDocsPerBlock; - - if ( tRowID=tSubblockStart && tRowIDFind ( m_iBlock, RowId2MinMaxBlockId(tRowID) ); + if ( iNextBlock>=m_pMatchingBlocks->GetNumBlocks() ) + return false; - iNextBlock++; - } + if ( iNextBlock>m_iBlock ) + SetCurBlock(iNextBlock); - return false; + return true; } @@ -377,12 +367,6 @@ int BlockIterator_c::GetNumDocs ( int iBlock ) const return m_iDocsInLastBlock; } - -uint32_t BlockIterator_c::MinMaxBlockId2RowId ( int iBlockId ) const -{ - return iBlockId<