Skip to content

Commit

Permalink
Merge branch 'fast_plain_count' into 'master'
Browse files Browse the repository at this point in the history
Added a fastpath for count(*) w/o filters

See merge request manticoresearch/dev!385
  • Loading branch information
glookka committed Apr 17, 2023
2 parents 4ad33d0 + 08f7527 commit 2bb9276
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 23 deletions.
5 changes: 3 additions & 2 deletions src/searchd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5554,8 +5554,9 @@ SphQueueSettings_t SearchHandler_c::MakeQueueSettings ( const CSphIndex * pIndex
tQS.m_pHook = pHook;
tQS.m_iMaxMatches = GetMaxMatches ( iMaxMatches, pIndex );
tQS.m_bNeedDocids = m_bNeedDocIDs; // need docids to merge results from indexes
tQS.m_fnGetCountDistinct = [pIndex]( const CSphString & sAttr ){ return pIndex->GetCountDistinct(sAttr); };
tQS.m_fnGetCount = [pIndex]( const CSphFilterSettings & tFilter ){ return pIndex->GetCount(tFilter); };
tQS.m_fnGetCountDistinct = [pIndex]( const CSphString & sAttr ){ return pIndex->GetCountDistinct(sAttr); };
tQS.m_fnGetCountFilter = [pIndex]( const CSphFilterSettings & tFilter ){ return pIndex->GetCountFilter(tFilter); };
tQS.m_fnGetCount = [pIndex](){ return pIndex->GetCount(); };
tQS.m_bEnableFastDistinct = m_dLocal.GetLength()<=1;
tQS.m_bForceSingleThread = bForceSingleThread;
return tQS;
Expand Down
30 changes: 20 additions & 10 deletions src/sphinx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,8 @@ class CSphIndex_VLN : public CSphIndex, public IndexAlterHelper_c, public DebugC
bool CheckEarlyReject ( const CSphVector<CSphFilterSettings> & dFilters, const ISphFilter * pFilter, ESphCollation eCollation, const ISphSchema & tSchema ) const;
int64_t GetPseudoShardingMetric ( const VecTraits_T<const CSphQuery> & dQueries, const VecTraits_T<int64_t> & dMaxCountDistinct, int iThreads, bool & bForceSingleThread ) const override;
int64_t GetCountDistinct ( const CSphString & sAttr ) const override;
int64_t GetCount ( const CSphFilterSettings & tFilter ) const override;
int64_t GetCountFilter ( const CSphFilterSettings & tFilter ) const override;
int64_t GetCount() const override;

private:
static const int MIN_WRITE_BUFFER = 262144; ///< min write buffer size
Expand Down Expand Up @@ -2065,9 +2066,6 @@ static bool DetectNonClonableSorters ( const CSphQuery & tQuery )

static bool DetectPrecalcSorters ( const CSphQuery & tQuery, bool bHasSI )
{
if ( !bHasSI )
return false;

if ( tQuery.m_dItems.any_of ( []( auto & tItem ){ return tItem.m_eAggrFunc!=SPH_AGGR_NONE; } ) )
return false;

Expand All @@ -2078,13 +2076,19 @@ static bool DetectPrecalcSorters ( const CSphQuery & tQuery, bool bHasSI )
return false;

bool bDistinct = !tQuery.m_sGroupDistinct.IsEmpty();
if ( bHasSI )
{
// check for count distinct precalc
if ( bDistinct && tQuery.m_dFilters.IsEmpty() )
return true;

// check for count distinct precalc
if ( bDistinct && tQuery.m_dFilters.IsEmpty() )
return true;
// check for count(*) precalc w/one filter
if ( !bDistinct && tQuery.m_dFilters.GetLength()==1 )
return true;
}

// check for count(*) precalc
if ( !bDistinct && tQuery.m_dFilters.GetLength()==1 )
// check for count(*) w/o filters
if ( !bDistinct && tQuery.m_dFilters.IsEmpty() )
return true;

return false;
Expand Down Expand Up @@ -3128,7 +3132,7 @@ int64_t CSphIndex_VLN::GetCountDistinct ( const CSphString & sAttr ) const
}


int64_t CSphIndex_VLN::GetCount ( const CSphFilterSettings & tFilter ) const
int64_t CSphIndex_VLN::GetCountFilter ( const CSphFilterSettings & tFilter ) const
{
if ( !m_pSIdx.get() || m_tDeadRowMap.HasDead() )
return -1;
Expand All @@ -3150,6 +3154,12 @@ int64_t CSphIndex_VLN::GetCount ( const CSphFilterSettings & tFilter ) const
return uCount;
}


int64_t CSphIndex_VLN::GetCount() const
{
return m_iDocinfo - m_tDeadRowMap.GetNumDeads();
}

/////////////////////////////////////////////////////////////////////////////

struct CmpHit_fn
Expand Down
8 changes: 5 additions & 3 deletions src/sphinx.h
Original file line number Diff line number Diff line change
Expand Up @@ -1114,7 +1114,8 @@ class CSphIndex : public ISphKeywordsStat, public IndexSegment_c, public Docstor
virtual int64_t GetPseudoShardingMetric ( const VecTraits_T<const CSphQuery> & dQueries, const VecTraits_T<int64_t> & dMaxCountDistinct, int iThreads, bool & bForceSingleThread ) const;
virtual bool MustRunInSingleThread ( const VecTraits_T<const CSphQuery> & dQueries, bool bHasSI, const VecTraits_T<int64_t> & dMaxCountDistinct, bool & bForceSingleThread ) const;
virtual int64_t GetCountDistinct ( const CSphString & sAttr ) const { return -1; } // returns values if index has some meta on its attributes
virtual int64_t GetCount ( const CSphFilterSettings & tFilter ) const { return -1; } // returns values if index has some meta on its attributes
virtual int64_t GetCountFilter ( const CSphFilterSettings & tFilter ) const { return -1; } // returns values if index has some meta on its attributes
virtual int64_t GetCount() const { return -1; }

public:
/// build index by indexing given sources
Expand Down Expand Up @@ -1361,8 +1362,9 @@ struct SphQueueSettings_t
const CSphFilterSettings * m_pAggrFilter = nullptr;
int m_iMaxMatches = DEFAULT_MAX_MATCHES;
bool m_bNeedDocids = false;
std::function<int64_t (const CSphString &)> m_fnGetCountDistinct;
std::function<int64_t (const CSphFilterSettings &)> m_fnGetCount;
std::function<int64_t (const CSphString &)> m_fnGetCountDistinct;
std::function<int64_t (const CSphFilterSettings &)> m_fnGetCountFilter;
std::function<int64_t ()> m_fnGetCount;
bool m_bEnableFastDistinct = false;
bool m_bForceSingleThread = false;

Expand Down
30 changes: 27 additions & 3 deletions src/sphinxrt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,8 @@ class RtIndex_c final : public RtIndex_i, public ISphNoncopyable, public ISphWor
int ChunkIDByChunkIdx (int iChunkIdx) const;

int64_t GetCountDistinct ( const CSphString & sAttr ) const override;
int64_t GetCount ( const CSphFilterSettings & tFilter ) const override;
int64_t GetCountFilter ( const CSphFilterSettings & tFilter ) const override;
int64_t GetCount() const override;

// helpers
ConstDiskChunkRefPtr_t MergeDiskChunks ( const char* szParentAction, const ConstDiskChunkRefPtr_t& pChunkA, const ConstDiskChunkRefPtr_t& pChunkB, CSphIndexProgress& tProgress, VecTraits_T<CSphFilterSettings> dFilters );
Expand Down Expand Up @@ -8633,7 +8634,7 @@ int64_t RtIndex_c::GetCountDistinct ( const CSphString & sAttr ) const
}


int64_t RtIndex_c::GetCount ( const CSphFilterSettings & tFilter ) const
int64_t RtIndex_c::GetCountFilter ( const CSphFilterSettings & tFilter ) const
{
// fixme! add code to calculate count(*) in RAM segments
if ( m_tRtChunks.GetRamSegmentsCount() )
Expand All @@ -8646,7 +8647,7 @@ int64_t RtIndex_c::GetCount ( const CSphFilterSettings & tFilter ) const
int64_t iSumCount = 0;
for ( const auto & i : *pDiskChunks )
{
int64_t iCount = i->Cidx().GetCount(tFilter);
int64_t iCount = i->Cidx().GetCountFilter(tFilter);
if ( iCount==-1 )
return -1;

Expand All @@ -8657,6 +8658,29 @@ int64_t RtIndex_c::GetCount ( const CSphFilterSettings & tFilter ) const
}


int64_t RtIndex_c::GetCount() const
{
int64_t iCount = 0;
auto pSegs = m_tRtChunks.RamSegs();
if ( pSegs )
for ( auto & pSeg : *pSegs )
iCount += const_cast<RtSegment_t*> ( pSeg.Ptr() )->m_tAliveRows;

auto pDiskChunks = m_tRtChunks.DiskChunks();
if ( pDiskChunks )
for ( const auto & i : *pDiskChunks )
{
int64_t iChunkCount = i->Cidx().GetCount();
if ( iChunkCount==-1 )
return -1;

iCount += iChunkCount;
}

return iCount;
}


void RtIndex_c::DropDiskChunk ( int iChunkID, int* pAffected )
{
TRACE_SCHED ( "rt", "RtIndex_c::DropDiskChunk" );
Expand Down
25 changes: 20 additions & 5 deletions src/sphinxsort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4456,10 +4456,10 @@ class FastCountDistinctSorter_c final : public FastBaseSorter_c

// fast count sorter
// works by using precalculated count taken from secondary indexes
class FastCountSorter_c final : public FastBaseSorter_c
class FastCountFilterSorter_c final : public FastBaseSorter_c
{
public:
FastCountSorter_c ( int iCount, const CSphGroupSorterSettings & tSettings )
FastCountFilterSorter_c ( int iCount, const CSphGroupSorterSettings & tSettings )
: FastBaseSorter_c ( tSettings )
, m_iCount ( iCount )
{}
Expand Down Expand Up @@ -4608,6 +4608,7 @@ static inline ESphSortKeyPart Attr2Keypart ( ESphAttr eType )
struct Precalculated_t
{
int64_t m_iCountDistinct = -1;
int64_t m_iCountFilter = -1;
int64_t m_iCount = -1;
};

Expand All @@ -4634,8 +4635,11 @@ static ISphMatchSorter * sphCreateSorter3rd ( const ISphMatchComparator * pComp,
if ( tPrecalc.m_iCountDistinct!=-1 )
return new FastCountDistinctSorter_c ( tPrecalc.m_iCountDistinct, tSettings );

if ( tPrecalc.m_iCountFilter!=-1 )
return new FastCountFilterSorter_c ( tPrecalc.m_iCountFilter, tSettings );

if ( tPrecalc.m_iCount!=-1 )
return new FastCountSorter_c ( tPrecalc.m_iCount, tSettings );
return new FastCountFilterSorter_c ( tPrecalc.m_iCount, tSettings );

BYTE uSelector3rd = 8*( tSettings.m_bJson ? 1:0 ) + 4*( pQuery->m_iGroupbyLimit>1 ? 1:0 ) + 2*( tSettings.m_bImplicit ? 1:0 ) + ( ( tSettings.m_pGrouper && tSettings.m_pGrouper->IsMultiValue() ) ? 1:0 );
switch ( uSelector3rd )
Expand Down Expand Up @@ -4934,6 +4938,7 @@ class QueueCreator_c
int GetGroupDistinctAttrIndex() const { return GetAliasedAttrIndex ( m_tQuery.m_sGroupDistinct, m_tQuery, *m_pSorterSchema ); }

bool CanCalcFastCountDistinct() const;
bool CanCalcFastCountFilter() const;
bool CanCalcFastCount() const;
Precalculated_t FetchPrecalculatedValues() const;

Expand Down Expand Up @@ -6678,13 +6683,20 @@ bool QueueCreator_c::CanCalcFastCountDistinct() const
}


bool QueueCreator_c::CanCalcFastCount() const
bool QueueCreator_c::CanCalcFastCountFilter() const
{
bool bHasAggregates = PredictAggregates();
return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.GetLength()==1 && m_tQuery.m_sQuery.IsEmpty();
}


bool QueueCreator_c::CanCalcFastCount() const
{
bool bHasAggregates = PredictAggregates();
return !bHasAggregates && m_tGroupSorterSettings.m_bImplicit && !m_tGroupSorterSettings.m_bDistinct && m_tQuery.m_dFilters.IsEmpty() && m_tQuery.m_sQuery.IsEmpty();
}


Precalculated_t QueueCreator_c::FetchPrecalculatedValues() const
{
Precalculated_t tPrecalc;
Expand All @@ -6695,8 +6707,11 @@ Precalculated_t QueueCreator_c::FetchPrecalculatedValues() const
tPrecalc.m_iCountDistinct = m_tSettings.m_fnGetCountDistinct ? m_tSettings.m_fnGetCountDistinct ( m_pSorterSchema->GetAttr(iCountDistinctAttr).m_sName ) : -1;
}

if ( CanCalcFastCountFilter() )
tPrecalc.m_iCountFilter = m_tSettings.m_fnGetCountFilter ? m_tSettings.m_fnGetCountFilter ( m_tQuery.m_dFilters[0] ) : -1;

if ( CanCalcFastCount() )
tPrecalc.m_iCount = m_tSettings.m_fnGetCount ? m_tSettings.m_fnGetCount ( m_tQuery.m_dFilters[0] ) : -1;
tPrecalc.m_iCount = m_tSettings.m_fnGetCount ? m_tSettings.m_fnGetCount() : -1;

return tPrecalc;
}
Expand Down

0 comments on commit 2bb9276

Please sign in to comment.