Skip to content

Commit

Permalink
implemented NOTNULL filter type for secondary indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
glookka committed Jun 30, 2024
1 parent 89ed74a commit 4223c52
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 19 deletions.
2 changes: 1 addition & 1 deletion columnar/columnar.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
namespace columnar
{

static const int LIB_VERSION = 25;
static const int LIB_VERSION = 26;

class Iterator_i
{
Expand Down
3 changes: 2 additions & 1 deletion common/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ enum class FilterType_e
VALUES,
RANGE,
FLOATRANGE,
STRINGS
STRINGS,
NOTNULL
};


Expand Down
36 changes: 32 additions & 4 deletions secondary/blockreader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class SplitBitmap_c
FORCE_INLINE int Scan ( int iStart );
FORCE_INLINE int GetLength() const { return m_iSize; }

void Invert() { assert ( 0 && "Unsupported by SplitBitmap_c" ); }

template <typename RESULT>
void Fetch ( int & iIterator, int iBase, RESULT * & pRes, RESULT * pMax );

Expand Down Expand Up @@ -201,6 +203,7 @@ class BitmapIterator_i : public BlockIterator_i
{
public:
virtual void Add ( BlockIterator_i * pIterator ) = 0;
virtual void Invert() = 0;
};

template <typename BITMAP, bool ROWID_RANGE>
Expand All @@ -218,6 +221,7 @@ class BitmapIterator_T : public BitmapIterator_i
bool WasCutoffHit() const override { return !m_iRowsLeft; }

void Add ( BlockIterator_i * pIterator ) override;
void Invert() override { m_tBitmap.Invert(); }

private:
static const int RESULT_BLOCK_SIZE = 1024;
Expand Down Expand Up @@ -403,7 +407,7 @@ class ReaderTraits_c : public BlockReader_i
int m_iCutoff = 0;

bool NeedBitmapIterator() const;
BitmapIterator_i * SpawnBitmapIterator ( const RowidRange_t * pBounds = nullptr ) const;
BitmapIterator_i * SpawnBitmapIterator ( const RowidRange_t * pBounds = nullptr, const Filter_t * pRange = nullptr ) const;
void LoadValueBlockData ( bool bOnlyCount, FileReader_c & tReader );
uint32_t CalcNumBlockValues ( int iBlock ) const;
};
Expand Down Expand Up @@ -436,8 +440,17 @@ bool ReaderTraits_c::NeedBitmapIterator() const
}


BitmapIterator_i * ReaderTraits_c::SpawnBitmapIterator ( const RowidRange_t * pBounds ) const
BitmapIterator_i * ReaderTraits_c::SpawnBitmapIterator ( const RowidRange_t * pBounds, const Filter_t * pRange ) const
{
// force bitmap iterator for IS NULL queries
if ( pRange && pRange->m_eType==common::FilterType_e::NOTNULL && pRange->m_bExclude )
{
if ( pBounds )
return new BitmapIterator_T<BitVec_T<uint64_t>, true> ( m_sAttr, m_tRsetInfo.m_uRowsCount, pBounds );
else
return new BitmapIterator_T<BitVec_T<uint64_t>, false> ( m_sAttr, m_tRsetInfo.m_uRowsCount );
}

if ( !NeedBitmapIterator() )
return nullptr;

Expand Down Expand Up @@ -785,8 +798,11 @@ FindValueResult_t BlockReader_T<uint32_t, float>::FindValue ( uint64_t uRefVal )
/////////////////////////////////////////////////////////////////////

template<typename T>
int CmpRange ( T tStart, T tEnd, const Filter_t & tRange )
static int CmpRange ( T tStart, T tEnd, const Filter_t & tRange )
{
if ( tRange.m_bLeftUnbounded && tRange.m_bRightUnbounded )
return 0;

Interval_T<T> tIntBlock ( tStart, tEnd );

Interval_T<T> tIntRange;
Expand Down Expand Up @@ -1005,12 +1021,18 @@ void RangeReader_c::CreateBlocksIterator ( const BlockIter_t & tIt, const Filter
{
// add bitmap iterator as 1st element of dRes on exit
std::function<void( BitmapIterator_i * pIterator )> fnDeleter = [&]( BitmapIterator_i * pIterator ){ if ( pIterator ) { assert(dRes.empty()); dRes.push_back(pIterator); } };
std::unique_ptr<BitmapIterator_i, decltype(fnDeleter)> pBitmapIterator ( SpawnBitmapIterator ( m_bHaveBounds ? &m_tBounds : nullptr ), fnDeleter );
std::unique_ptr<BitmapIterator_i, decltype(fnDeleter)> pBitmapIterator ( SpawnBitmapIterator ( m_bHaveBounds ? &m_tBounds : nullptr, &tRange ), fnDeleter );
if ( pBitmapIterator && m_iCutoff>=0 )
pBitmapIterator->SetCutoff(m_iCutoff);

std::unique_ptr<BlockIteratorWithSetup_i> pCommonIterator;
CreateBlocksIterator ( tIt, tRange, [this, &dRes, &pBitmapIterator, &pCommonIterator]( int iValCur, bool bLoad ){ return AddIterator ( iValCur, bLoad, dRes, pBitmapIterator.get(), pCommonIterator ); } );

if ( tRange.m_eType==common::FilterType_e::NOTNULL && tRange.m_bExclude )
{
assert(pBitmapIterator);
pBitmapIterator->Invert();
}
}


Expand Down Expand Up @@ -1068,6 +1090,9 @@ class RangeReader_T : public RangeReader_c

bool EvalRangeValue ( int iItem, const Filter_t & tRange ) const override
{
if ( tRange.m_bLeftUnbounded && tRange.m_bRightUnbounded )
return true;

if ( std::is_floating_point<DST_VALUE>::value )
return ValueInInterval<float> ( UintToFloat ( m_dValues[iItem] ), tRange );
else
Expand Down Expand Up @@ -1134,6 +1159,9 @@ BlockReader_i * ReaderFactory_c::CreateRangeReader()
case AttrType_e::FLOAT:
return new RangeReader_T<uint32_t, float> ( *this, pCodec );

case AttrType_e::STRING:
return new RangeReader_T<uint64_t, uint64_t> ( *this, pCodec );

case AttrType_e::INT64:
case AttrType_e::INT64SET:
return new RangeReader_T<uint64_t, int64_t> ( *this, pCodec );
Expand Down
38 changes: 26 additions & 12 deletions secondary/secondary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,18 @@ bool SecondaryIndex_c::PrepareBlocksRange ( const Filter_t & tFilter, ApproxPos_

const bool bFloat = tCol.m_eType==AttrType_e::FLOAT;

tPos = { 0, 0, ( uBlocksCount - 1 ) * m_uValuesPerBlock };
if ( tFilter.m_bRightUnbounded )
tPos = { 0, 0, ( uBlocksCount - 1 )*m_uValuesPerBlock };

if ( ( tFilter.m_bLeftUnbounded && tFilter.m_bRightUnbounded ) || (!tFilter.m_bLeftUnbounded && !tFilter.m_bRightUnbounded ) )
{
ApproxPos_t tFoundMin = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMinValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMinValue ) );
ApproxPos_t tFoundMax = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMaxValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMaxValue ) );
tPos.m_iLo = std::min ( tFoundMin.m_iLo, tFoundMax.m_iLo );
tPos.m_iPos = std::min ( tFoundMin.m_iPos, tFoundMax.m_iPos );
tPos.m_iHi = std::max ( tFoundMin.m_iHi, tFoundMax.m_iHi );
iNumIterators = tFoundMax.m_iPos-tFoundMin.m_iPos+1;
}
else if ( tFilter.m_bRightUnbounded )
{
ApproxPos_t tFound = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMinValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMinValue ) );
tPos.m_iPos = tFound.m_iPos;
Expand All @@ -350,15 +360,6 @@ bool SecondaryIndex_c::PrepareBlocksRange ( const Filter_t & tFilter, ApproxPos_
tPos.m_iHi = tFound.m_iHi;
iNumIterators = tPos.m_iPos-tPos.m_iLo;
}
else
{
ApproxPos_t tFoundMin = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMinValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMinValue ) );
ApproxPos_t tFoundMax = ( bFloat ? m_dIdx[iCol]->Search ( FloatToUint ( tFilter.m_fMaxValue ) ) : m_dIdx[iCol]->Search ( tFilter.m_iMaxValue ) );
tPos.m_iLo = std::min ( tFoundMin.m_iLo, tFoundMax.m_iLo );
tPos.m_iPos = std::min ( tFoundMin.m_iPos, tFoundMax.m_iPos );
tPos.m_iHi = std::max ( tFoundMin.m_iHi, tFoundMax.m_iHi );
iNumIterators = tFoundMax.m_iPos-tFoundMin.m_iPos+1;
}

iNumIterators = std::max ( iNumIterators, int64_t(0) );
return true;
Expand Down Expand Up @@ -438,12 +439,22 @@ const ColumnInfo_t * SecondaryIndex_c::GetAttr ( const Filter_t & tFilter, std::
{
tFixedFilter = tFilter;
FixupFilterSettings ( tFixedFilter, tCol.m_eType );
if ( tFixedFilter.m_eType==FilterType_e::STRINGS )
switch ( tFixedFilter.m_eType )
{
case FilterType_e::STRINGS:
if ( !tFixedFilter.m_fnCalcStrHash )
return false;

tFixedFilter = StringFilterToHashFilter ( tFixedFilter, false );
break;

case FilterType_e::NOTNULL:
tFixedFilter.m_bLeftUnbounded = true;
tFixedFilter.m_bRightUnbounded = true;
break;

default:
break;
}

return true;
Expand All @@ -468,6 +479,7 @@ bool SecondaryIndex_c::CreateIterators ( std::vector<BlockIterator_i *> & dItera

case FilterType_e::RANGE:
case FilterType_e::FLOATRANGE:
case FilterType_e::NOTNULL:
GetRangeRows ( &dIterators, tFixedFilter, pBounds, uMaxValues, iRsetSize, iCutoff );
return true;

Expand Down Expand Up @@ -506,6 +518,7 @@ bool SecondaryIndex_c::CalcCount ( uint32_t & uCount, const common::Filter_t & t

case FilterType_e::RANGE:
case FilterType_e::FLOATRANGE:
case FilterType_e::NOTNULL:
uCount = CalcRangeRows ( tFixedFilter );
if ( bExclude )
uCount = uMaxValues - uCount;
Expand Down Expand Up @@ -536,6 +549,7 @@ uint32_t SecondaryIndex_c::GetNumIterators ( const common::Filter_t & tFilter )

case FilterType_e::RANGE:
case FilterType_e::FLOATRANGE:
case FilterType_e::NOTNULL:
return GetRangeRows ( nullptr, tFixedFilter, nullptr, 0, 0, INT_MAX );

default:
Expand Down
2 changes: 1 addition & 1 deletion secondary/secondary.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace common
namespace SI
{

static const int LIB_VERSION = 15;
static const int LIB_VERSION = 16;
static const uint32_t STORAGE_VERSION = 8;

class Index_i
Expand Down
12 changes: 12 additions & 0 deletions util/bitvec.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ class BitVec_T
m_dData [ iBit>>SHIFT ] |= ( (T)1 )<<( iBit&MASK );
}

void Invert()
{
for ( auto & i : m_dData )
i = ~i;

if ( m_iSize!=m_iDataLen << SHIFT )
{
int iFirstBit = (m_iDataLen-1) << SHIFT;
m_dData.back() &= (T(1) << ( m_iSize-iFirstBit )) - 1;
}
}

template <typename RESULT>
void Fetch ( int & iIterator, int iBase, RESULT * & pRes, RESULT * pMax )
{
Expand Down

0 comments on commit 4223c52

Please sign in to comment.