Skip to content

Commit

Permalink
Improvement #6872 - Indexed STARTING WITH execution is very slow with…
Browse files Browse the repository at this point in the history
… UNICODE collation.
  • Loading branch information
asfernandes committed Jun 25, 2021
1 parent 84a8e2d commit d680aed
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 29 deletions.
63 changes: 36 additions & 27 deletions src/common/unicode_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1471,29 +1471,53 @@ UnicodeUtil::Utf16Collation* UnicodeUtil::Utf16Collation::create(
icu->ucolSetAttribute(compareCollator, UCOL_STRENGTH, UCOL_SECONDARY, &status);
}

USet* contractions = icu->usetOpen(0, 0);
// status not verified here.
icu->ucolGetContractions(partialCollator, contractions, &status);

Utf16Collation* obj = FB_NEW Utf16Collation();
obj->icu = icu;
obj->tt = tt;
obj->attributes = attributes;
obj->compareCollator = compareCollator;
obj->partialCollator = partialCollator;
obj->sortCollator = sortCollator;
obj->contractions = contractions;
obj->contractionsCount = icu->usetGetItemCount(contractions);
obj->numericSort = isNumericSort;
obj->maxContractionsPrefixLength = 0;

USet* contractions = icu->usetOpen(1, 0);
// status not verified here.
icu->ucolGetContractions(partialCollator, contractions, &status);

int contractionsCount = icu->usetGetItemCount(contractions);

for (int contractionIndex = 0; contractionIndex < contractionsCount; ++contractionIndex)
{
UChar str[10];
UChar32 start, end;

status = U_ZERO_ERROR;
int len = icu->usetGetItem(contractions, contractionIndex, &start, &end, str, sizeof(str), &status);

if (len >= 2)
{
obj->maxContractionsPrefixLength = len - 1 > obj->maxContractionsPrefixLength ?
len - 1 : obj->maxContractionsPrefixLength;

for (int currentLen = 1; currentLen < len; ++currentLen)
{
string s(reinterpret_cast<const char*>(str), currentLen * 2);

if (!obj->contractionsPrefix.exist(s))
obj->contractionsPrefix.push(s);
}
}
}

icu->usetClose(contractions);

return obj;
}


UnicodeUtil::Utf16Collation::~Utf16Collation()
{
icu->usetClose(contractions);

icu->ucolClose(compareCollator);
icu->ucolClose(partialCollator);
icu->ucolClose(sortCollator);
Expand Down Expand Up @@ -1544,30 +1568,16 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
switch (key_type)
{
case INTL_KEY_PARTIAL:
{
coll = partialCollator;

// Remove last bytes of key if they are start of a contraction
// to correctly find in the index.
ConversionICU& cIcu(getConversionICU());
for (int i = 0; i < contractionsCount; ++i)
{
UChar str[10];
UErrorCode status = U_ZERO_ERROR;
int len = icu->usetGetItem(contractions, i, NULL, NULL, str, sizeof(str), &status);
if (len < 0)
fatal_exception::raiseFmt("uset_getItem() error %d", status);

if (unsigned(len) > srcLenLong) // safe cast - sign checked
len = srcLenLong;
else
--len;

// safe cast - alignment not changed
if (cIcu.u_strCompare(str, len,
reinterpret_cast<const UChar*>(src) + srcLenLong - len, len, true) == 0)
for (int i = MIN(maxContractionsPrefixLength, srcLenLong); i > 0; --i)
{
if (contractionsPrefix.exist(string(reinterpret_cast<const char*>(src + srcLenLong - i), i * 2)))
{
srcLenLong -= len;
srcLenLong -= i;
break;
}
}
Expand All @@ -1588,7 +1598,6 @@ USHORT UnicodeUtil::Utf16Collation::stringToKey(USHORT srcLen, const USHORT* src
}

break;
}

case INTL_KEY_UNIQUE:
coll = compareCollator;
Expand Down
10 changes: 8 additions & 2 deletions src/common/unicode_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "../common/IntlUtil.h"
#include "../common/os/mod_loader.h"
#include "../common/classes/fb_string.h"
#include "../common/classes/objects_array.h"
#include <unicode/ucnv.h>
#include <unicode/ucal.h>

Expand Down Expand Up @@ -184,6 +185,11 @@ class UnicodeUtil
Firebird::IntlUtil::SpecificAttributesMap& specificAttributes,
const Firebird::string& configInfo);

Utf16Collation()
: contractionsPrefix(*getDefaultMemoryPool())
{
}

~Utf16Collation();

USHORT keyLength(USHORT len) const;
Expand All @@ -206,8 +212,8 @@ class UnicodeUtil
UCollator* compareCollator;
UCollator* partialCollator;
UCollator* sortCollator;
USet* contractions;
int contractionsCount;
Firebird::SortedObjectsArray<Firebird::string> contractionsPrefix; // UTF-16 string
unsigned maxContractionsPrefixLength; // number of characters
bool numericSort;
};

Expand Down

0 comments on commit d680aed

Please sign in to comment.