From b707d5b0eec0383cdae12730d36eb8a25bc26ce2 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Sat, 27 Jul 2024 18:33:10 +0200 Subject: [PATCH] added native exclude filter handling via bitmaps --- secondary/blockreader.cpp | 45 +++++++++++++++++++++------------------ secondary/blockreader.h | 4 ++-- secondary/secondary.cpp | 2 +- util/bitvec.h | 26 ++++++++++++++++------ 4 files changed, 47 insertions(+), 30 deletions(-) diff --git a/secondary/blockreader.cpp b/secondary/blockreader.cpp index bae048f6..6a397d10 100644 --- a/secondary/blockreader.cpp +++ b/secondary/blockreader.cpp @@ -97,7 +97,7 @@ class SplitBitmap_c FORCE_INLINE int Scan ( int iStart ); FORCE_INLINE int GetLength() const { return m_iSize; } - void Invert() { assert ( 0 && "Unsupported by SplitBitmap_c" ); } + void Invert ( int iMinBit=-1, int iMaxBit=-1 ) { assert ( 0 && "Unsupported by SplitBitmap_c" ); } template void Fetch ( int & iIterator, int iBase, RESULT * & pRes, RESULT * pMax ); @@ -203,7 +203,7 @@ class BitmapIterator_i : public BlockIterator_i { public: virtual void Add ( BlockIterator_i * pIterator ) = 0; - virtual void Invert() = 0; + virtual void Invert ( RowidRange_t * pBounds ) = 0; }; template @@ -221,7 +221,7 @@ class BitmapIterator_T : public BitmapIterator_i bool WasCutoffHit() const override { return !m_iRowsLeft; } void Add ( BlockIterator_i * pIterator ) override; - void Invert() override { m_tBitmap.Invert(); } + void Invert ( RowidRange_t * pBounds ) override { m_tBitmap.Invert ( pBounds ? pBounds->m_uMin : -1, pBounds ? pBounds->m_uMax : -1 ); } private: static const int RESULT_BLOCK_SIZE = 1024; @@ -407,7 +407,7 @@ class ReaderTraits_c : public BlockReader_i int m_iCutoff = 0; bool NeedBitmapIterator() const; - BitmapIterator_i * SpawnBitmapIterator ( const RowidRange_t * pBounds = nullptr, const Filter_t * pRange = nullptr ) const; + BitmapIterator_i * SpawnBitmapIterator ( const RowidRange_t * pBounds, bool bExclude ) const; void LoadValueBlockData ( bool bOnlyCount, FileReader_c & tReader ); uint32_t CalcNumBlockValues ( int iBlock ) const; }; @@ -440,10 +440,11 @@ bool ReaderTraits_c::NeedBitmapIterator() const } -BitmapIterator_i * ReaderTraits_c::SpawnBitmapIterator ( const RowidRange_t * pBounds, const Filter_t * pRange ) const +BitmapIterator_i * ReaderTraits_c::SpawnBitmapIterator ( const RowidRange_t * pBounds, bool bExclude ) const { - // force bitmap iterator for IS NULL queries - if ( pRange && pRange->m_eType==common::FilterType_e::NOTNULL && pRange->m_bExclude ) + // force bitmap iterator for exclude filters + // FIXME! make invertable split bitmaps + if ( bExclude ) { if ( pBounds ) return new BitmapIterator_T, true> ( m_sAttr, m_tRsetInfo.m_uRowsCount, pBounds ); @@ -517,8 +518,8 @@ class BlockReader_c : public ReaderTraits_c public: BlockReader_c ( const ReaderFactory_c & tCtx, std::shared_ptr & pCodec ); - void CreateBlocksIterator ( const std::vector & dIt, std::vector & dRes ) override; - void CreateBlocksIterator ( const BlockIter_t & tIt, const Filter_t & tVal, std::vector & dRes ) override { assert ( 0 && "Requesting range iterators from block reader" ); } + void CreateBlocksIterator ( const std::vector & dIt, const Filter_t & tFilter, std::vector & dRes ) override; + void CreateBlocksIterator ( const BlockIter_t & tIt, const Filter_t & tFilter, std::vector & dRes ) override { assert ( 0 && "Requesting range iterators from block reader" ); } uint32_t CalcValueCount ( const std::vector & dIt ) override; uint32_t CalcValueCount ( const BlockIter_t & tIt, const common::Filter_t & tVal ) override { assert ( 0 && "Requesting range iterators from block reader" ); return 0; } @@ -639,17 +640,21 @@ void BlockReader_c::CreateBlocksIterator ( const BlockIter_t & tIt, ADDITERATOR } -void BlockReader_c::CreateBlocksIterator ( const std::vector & dIt, std::vector & dRes ) +void BlockReader_c::CreateBlocksIterator ( const std::vector & dIt, const Filter_t & tFilter, std::vector & dRes ) { // add bitmap iterator as 1st element of dRes on exit std::function fnDeleter = [&]( BitmapIterator_i * pIterator ){ if ( pIterator ) { assert(dRes.empty()); dRes.push_back(pIterator); } }; - std::unique_ptr pBitmapIterator ( SpawnBitmapIterator(), fnDeleter ); + RowidRange_t * pBounds = m_bHaveBounds ? &m_tBounds : nullptr; + std::unique_ptr pBitmapIterator ( SpawnBitmapIterator ( pBounds, tFilter.m_bExclude ), fnDeleter ); if ( pBitmapIterator && m_iCutoff>=0 ) pBitmapIterator->SetCutoff(m_iCutoff); std::unique_ptr pCommonIterator; for ( auto & i : dIt ) CreateBlocksIterator ( i, [this, &dRes, &pBitmapIterator, &pCommonIterator]( int iItem ){ AddIterator ( iItem, dRes, pBitmapIterator.get(), pCommonIterator ); } ); + + if ( tFilter.m_bExclude ) + pBitmapIterator->Invert(pBounds); } @@ -829,8 +834,8 @@ class RangeReader_c : public ReaderTraits_c public: RangeReader_c ( const ReaderFactory_c & tCtx, std::shared_ptr & pCodec ); - void CreateBlocksIterator ( const std::vector & dIt, std::vector & dRes ) override { assert ( 0 && "Requesting block iterators from range reader" ); } - void CreateBlocksIterator ( const BlockIter_t & tIt, const Filter_t & tRange, std::vector & dRes ) override; + void CreateBlocksIterator ( const std::vector & dIt, const Filter_t & tFilter, std::vector & dRes ) override { assert ( 0 && "Requesting block iterators from range reader" ); } + void CreateBlocksIterator ( const BlockIter_t & tIt, const Filter_t & tFilter, std::vector & dRes ) override; uint32_t CalcValueCount ( const std::vector & dIt ) override { assert ( 0 && "Requesting block iterators from range reader" ); return 0; } uint32_t CalcValueCount ( const BlockIter_t & tIt, const common::Filter_t & tRange ) override; @@ -1017,22 +1022,20 @@ void RangeReader_c::CreateBlocksIterator ( const BlockIter_t & tIt, const Filter } -void RangeReader_c::CreateBlocksIterator ( const BlockIter_t & tIt, const Filter_t & tRange, std::vector & dRes ) +void RangeReader_c::CreateBlocksIterator ( const BlockIter_t & tIt, const Filter_t & tFilter, std::vector & dRes ) { // add bitmap iterator as 1st element of dRes on exit std::function fnDeleter = [&]( BitmapIterator_i * pIterator ){ if ( pIterator ) { assert(dRes.empty()); dRes.push_back(pIterator); } }; - std::unique_ptr pBitmapIterator ( SpawnBitmapIterator ( m_bHaveBounds ? &m_tBounds : nullptr, &tRange ), fnDeleter ); + RowidRange_t * pBounds = m_bHaveBounds ? &m_tBounds : nullptr; + std::unique_ptr pBitmapIterator ( SpawnBitmapIterator ( pBounds, tFilter.m_bExclude ), fnDeleter ); if ( pBitmapIterator && m_iCutoff>=0 ) pBitmapIterator->SetCutoff(m_iCutoff); std::unique_ptr pCommonIterator; - CreateBlocksIterator ( tIt, tRange, [this, &dRes, &pBitmapIterator, &pCommonIterator]( int iValCur, bool bLoad ){ return AddIterator ( iValCur, bLoad, dRes, pBitmapIterator.get(), pCommonIterator ); } ); + CreateBlocksIterator ( tIt, tFilter, [this, &dRes, &pBitmapIterator, &pCommonIterator]( int iValCur, bool bLoad ){ return AddIterator ( iValCur, bLoad, dRes, pBitmapIterator.get(), pCommonIterator ); } ); - if ( tRange.m_eType==common::FilterType_e::NOTNULL && tRange.m_bExclude ) - { - assert(pBitmapIterator); - pBitmapIterator->Invert(); - } + if ( tFilter.m_bExclude ) + pBitmapIterator->Invert(pBounds); } diff --git a/secondary/blockreader.h b/secondary/blockreader.h index c3a43212..45fa9532 100644 --- a/secondary/blockreader.h +++ b/secondary/blockreader.h @@ -52,8 +52,8 @@ class BlockReader_i public: virtual ~BlockReader_i() = default; - virtual void CreateBlocksIterator ( const std::vector & dIt, std::vector & dRes ) = 0; - virtual void CreateBlocksIterator ( const BlockIter_t & tIt, const common::Filter_t & tVal, std::vector & dRes ) = 0; + virtual void CreateBlocksIterator ( const std::vector & dIt, const common::Filter_t & tFilter, std::vector & dRes ) = 0; + virtual void CreateBlocksIterator ( const BlockIter_t & tIt, const common::Filter_t & tFilter, std::vector & dRes ) = 0; virtual uint32_t CalcValueCount ( const std::vector & dIt ) = 0; virtual uint32_t CalcValueCount ( const BlockIter_t & tIt, const common::Filter_t & tVal ) = 0; }; diff --git a/secondary/secondary.cpp b/secondary/secondary.cpp index 2164a878..c2956a26 100644 --- a/secondary/secondary.cpp +++ b/secondary/secondary.cpp @@ -338,7 +338,7 @@ int64_t SecondaryIndex_c::GetValsRows ( std::vector * pIterat if ( !pBlockReader ) return 0; - pBlockReader->CreateBlocksIterator ( dBlocksIt, *pIterators ); + pBlockReader->CreateBlocksIterator ( dBlocksIt, tFilter, *pIterators ); return iNumIterators; } diff --git a/util/bitvec.h b/util/bitvec.h index f07ee451..c92f9114 100644 --- a/util/bitvec.h +++ b/util/bitvec.h @@ -47,15 +47,29 @@ class BitVec_T m_dData [ iBit>>SHIFT ] |= ( (T)1 )<<( iBit&MASK ); } - void Invert() + void Invert ( int iMinBit=-1, int iMaxBit=-1 ) { - for ( auto & i : m_dData ) - i = ~i; + if ( iMinBit<0 ) + iMinBit = 0; - if ( m_iSize!=m_iDataLen << SHIFT ) + if ( iMaxBit<0 ) + iMaxBit = m_iSize; + + int iMinId = iMinBit>>SHIFT; + int iMaxId = (iMaxBit+SIZEBITS-1)>>SHIFT; + for ( int i = iMinId; i