From 4223c525aed2cfb704ae9a0b439e5fac034913d0 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Sun, 30 Jun 2024 21:13:15 +0200 Subject: [PATCH] implemented NOTNULL filter type for secondary indexes --- columnar/columnar.h | 2 +- common/filter.h | 3 ++- secondary/blockreader.cpp | 36 ++++++++++++++++++++++++++++++++---- secondary/secondary.cpp | 38 ++++++++++++++++++++++++++------------ secondary/secondary.h | 2 +- util/bitvec.h | 12 ++++++++++++ 6 files changed, 74 insertions(+), 19 deletions(-) diff --git a/columnar/columnar.h b/columnar/columnar.h index 73abca9c..cdfa97f5 100644 --- a/columnar/columnar.h +++ b/columnar/columnar.h @@ -28,7 +28,7 @@ namespace columnar { -static const int LIB_VERSION = 25; +static const int LIB_VERSION = 26; class Iterator_i { diff --git a/common/filter.h b/common/filter.h index 3b149f27..579070e5 100644 --- a/common/filter.h +++ b/common/filter.h @@ -31,7 +31,8 @@ enum class FilterType_e VALUES, RANGE, FLOATRANGE, - STRINGS + STRINGS, + NOTNULL }; diff --git a/secondary/blockreader.cpp b/secondary/blockreader.cpp index 2366d342..bae048f6 100644 --- a/secondary/blockreader.cpp +++ b/secondary/blockreader.cpp @@ -97,6 +97,8 @@ class SplitBitmap_c FORCE_INLINE int Scan ( int iStart ); FORCE_INLINE int GetLength() const { return m_iSize; } + void Invert() { assert ( 0 && "Unsupported by SplitBitmap_c" ); } + template void Fetch ( int & iIterator, int iBase, RESULT * & pRes, RESULT * pMax ); @@ -201,6 +203,7 @@ class BitmapIterator_i : public BlockIterator_i { public: virtual void Add ( BlockIterator_i * pIterator ) = 0; + virtual void Invert() = 0; }; template @@ -218,6 +221,7 @@ class BitmapIterator_T : public BitmapIterator_i bool WasCutoffHit() const override { return !m_iRowsLeft; } void Add ( BlockIterator_i * pIterator ) override; + void Invert() override { m_tBitmap.Invert(); } private: static const int RESULT_BLOCK_SIZE = 1024; @@ -403,7 +407,7 @@ class ReaderTraits_c : public BlockReader_i int m_iCutoff = 0; bool NeedBitmapIterator() const; - BitmapIterator_i * SpawnBitmapIterator ( const RowidRange_t * pBounds = nullptr ) const; + BitmapIterator_i * SpawnBitmapIterator ( const RowidRange_t * pBounds = nullptr, const Filter_t * pRange = nullptr ) const; void LoadValueBlockData ( bool bOnlyCount, FileReader_c & tReader ); uint32_t CalcNumBlockValues ( int iBlock ) const; }; @@ -436,8 +440,17 @@ bool ReaderTraits_c::NeedBitmapIterator() const } -BitmapIterator_i * ReaderTraits_c::SpawnBitmapIterator ( const RowidRange_t * pBounds ) const +BitmapIterator_i * ReaderTraits_c::SpawnBitmapIterator ( const RowidRange_t * pBounds, const Filter_t * pRange ) const { + // force bitmap iterator for IS NULL queries + if ( pRange && pRange->m_eType==common::FilterType_e::NOTNULL && pRange->m_bExclude ) + { + if ( pBounds ) + return new BitmapIterator_T, true> ( m_sAttr, m_tRsetInfo.m_uRowsCount, pBounds ); + else + return new BitmapIterator_T, false> ( m_sAttr, m_tRsetInfo.m_uRowsCount ); + } + if ( !NeedBitmapIterator() ) return nullptr; @@ -785,8 +798,11 @@ FindValueResult_t BlockReader_T::FindValue ( uint64_t uRefVal ) ///////////////////////////////////////////////////////////////////// template -int CmpRange ( T tStart, T tEnd, const Filter_t & tRange ) +static int CmpRange ( T tStart, T tEnd, const Filter_t & tRange ) { + if ( tRange.m_bLeftUnbounded && tRange.m_bRightUnbounded ) + return 0; + Interval_T tIntBlock ( tStart, tEnd ); Interval_T tIntRange; @@ -1005,12 +1021,18 @@ void RangeReader_c::CreateBlocksIterator ( const BlockIter_t & tIt, const Filter { // add bitmap iterator as 1st element of dRes on exit std::function fnDeleter = [&]( BitmapIterator_i * pIterator ){ if ( pIterator ) { assert(dRes.empty()); dRes.push_back(pIterator); } }; - std::unique_ptr pBitmapIterator ( SpawnBitmapIterator ( m_bHaveBounds ? &m_tBounds : nullptr ), fnDeleter ); + std::unique_ptr pBitmapIterator ( SpawnBitmapIterator ( m_bHaveBounds ? &m_tBounds : nullptr, &tRange ), fnDeleter ); if ( pBitmapIterator && m_iCutoff>=0 ) pBitmapIterator->SetCutoff(m_iCutoff); std::unique_ptr pCommonIterator; CreateBlocksIterator ( tIt, tRange, [this, &dRes, &pBitmapIterator, &pCommonIterator]( int iValCur, bool bLoad ){ return AddIterator ( iValCur, bLoad, dRes, pBitmapIterator.get(), pCommonIterator ); } ); + + if ( tRange.m_eType==common::FilterType_e::NOTNULL && tRange.m_bExclude ) + { + assert(pBitmapIterator); + pBitmapIterator->Invert(); + } } @@ -1068,6 +1090,9 @@ class RangeReader_T : public RangeReader_c bool EvalRangeValue ( int iItem, const Filter_t & tRange ) const override { + if ( tRange.m_bLeftUnbounded && tRange.m_bRightUnbounded ) + return true; + if ( std::is_floating_point::value ) return ValueInInterval ( UintToFloat ( m_dValues[iItem] ), tRange ); else @@ -1134,6 +1159,9 @@ BlockReader_i * ReaderFactory_c::CreateRangeReader() case AttrType_e::FLOAT: return new RangeReader_T ( *this, pCodec ); + case AttrType_e::STRING: + return new RangeReader_T ( *this, pCodec ); + case AttrType_e::INT64: case AttrType_e::INT64SET: return new RangeReader_T ( *this, pCodec ); diff --git a/secondary/secondary.cpp b/secondary/secondary.cpp index 882b5670..ff1e865d 100644 --- a/secondary/secondary.cpp +++ b/secondary/secondary.cpp @@ -335,8 +335,18 @@ bool SecondaryIndex_c::PrepareBlocksRange ( const Filter_t & tFilter, ApproxPos_ const bool bFloat = tCol.m_eType==AttrType_e::FLOAT; - tPos = { 0, 0, ( uBlocksCount - 1 ) * m_uValuesPerBlock }; - if ( tFilter.m_bRightUnbounded ) + tPos = { 0, 0, ( uBlocksCount - 1 )*m_uValuesPerBlock }; + + if ( ( tFilter.m_bLeftUnbounded && tFilter.m_bRightUnbounded ) || (!tFilter.m_bLeftUnbounded && !tFilter.m_bRightUnbounded ) ) + { + ApproxPos_t tFoundMin = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMinValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMinValue ) ); + ApproxPos_t tFoundMax = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMaxValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMaxValue ) ); + tPos.m_iLo = std::min ( tFoundMin.m_iLo, tFoundMax.m_iLo ); + tPos.m_iPos = std::min ( tFoundMin.m_iPos, tFoundMax.m_iPos ); + tPos.m_iHi = std::max ( tFoundMin.m_iHi, tFoundMax.m_iHi ); + iNumIterators = tFoundMax.m_iPos-tFoundMin.m_iPos+1; + } + else if ( tFilter.m_bRightUnbounded ) { ApproxPos_t tFound = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMinValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMinValue ) ); tPos.m_iPos = tFound.m_iPos; @@ -350,15 +360,6 @@ bool SecondaryIndex_c::PrepareBlocksRange ( const Filter_t & tFilter, ApproxPos_ tPos.m_iHi = tFound.m_iHi; iNumIterators = tPos.m_iPos-tPos.m_iLo; } - else - { - ApproxPos_t tFoundMin = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMinValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMinValue ) ); - ApproxPos_t tFoundMax = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMaxValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMaxValue ) ); - tPos.m_iLo = std::min ( tFoundMin.m_iLo, tFoundMax.m_iLo ); - tPos.m_iPos = std::min ( tFoundMin.m_iPos, tFoundMax.m_iPos ); - tPos.m_iHi = std::max ( tFoundMin.m_iHi, tFoundMax.m_iHi ); - iNumIterators = tFoundMax.m_iPos-tFoundMin.m_iPos+1; - } iNumIterators = std::max ( iNumIterators, int64_t(0) ); return true; @@ -438,12 +439,22 @@ const ColumnInfo_t * SecondaryIndex_c::GetAttr ( const Filter_t & tFilter, std:: { tFixedFilter = tFilter; FixupFilterSettings ( tFixedFilter, tCol.m_eType ); - if ( tFixedFilter.m_eType==FilterType_e::STRINGS ) + switch ( tFixedFilter.m_eType ) { + case FilterType_e::STRINGS: if ( !tFixedFilter.m_fnCalcStrHash ) return false; tFixedFilter = StringFilterToHashFilter ( tFixedFilter, false ); + break; + + case FilterType_e::NOTNULL: + tFixedFilter.m_bLeftUnbounded = true; + tFixedFilter.m_bRightUnbounded = true; + break; + + default: + break; } return true; @@ -468,6 +479,7 @@ bool SecondaryIndex_c::CreateIterators ( std::vector & dItera case FilterType_e::RANGE: case FilterType_e::FLOATRANGE: + case FilterType_e::NOTNULL: GetRangeRows ( &dIterators, tFixedFilter, pBounds, uMaxValues, iRsetSize, iCutoff ); return true; @@ -506,6 +518,7 @@ bool SecondaryIndex_c::CalcCount ( uint32_t & uCount, const common::Filter_t & t case FilterType_e::RANGE: case FilterType_e::FLOATRANGE: + case FilterType_e::NOTNULL: uCount = CalcRangeRows ( tFixedFilter ); if ( bExclude ) uCount = uMaxValues - uCount; @@ -536,6 +549,7 @@ uint32_t SecondaryIndex_c::GetNumIterators ( const common::Filter_t & tFilter ) case FilterType_e::RANGE: case FilterType_e::FLOATRANGE: + case FilterType_e::NOTNULL: return GetRangeRows ( nullptr, tFixedFilter, nullptr, 0, 0, INT_MAX ); default: diff --git a/secondary/secondary.h b/secondary/secondary.h index bc443790..55720dd2 100644 --- a/secondary/secondary.h +++ b/secondary/secondary.h @@ -38,7 +38,7 @@ namespace common namespace SI { -static const int LIB_VERSION = 15; +static const int LIB_VERSION = 16; static const uint32_t STORAGE_VERSION = 8; class Index_i diff --git a/util/bitvec.h b/util/bitvec.h index 4f96e37d..f07ee451 100644 --- a/util/bitvec.h +++ b/util/bitvec.h @@ -47,6 +47,18 @@ class BitVec_T m_dData [ iBit>>SHIFT ] |= ( (T)1 )<<( iBit&MASK ); } + void Invert() + { + for ( auto & i : m_dData ) + i = ~i; + + if ( m_iSize!=m_iDataLen << SHIFT ) + { + int iFirstBit = (m_iDataLen-1) << SHIFT; + m_dData.back() &= (T(1) << ( m_iSize-iFirstBit )) - 1; + } + } + template void Fetch ( int & iIterator, int iBase, RESULT * & pRes, RESULT * pMax ) {