Skip to content

Commit 78e60fc

Browse files
committed
compress:check more bytes to reduce ZSTD_count call
Comparing 4B instead of comparing 1B in ZSTD_noDict mode, thus it can avoid cases like match in match[ml] but mismatch in match[ml-3]..match[ml-1]. So the call count of ZSTD_count can be reduced. Signed-off-by: Jun He <[email protected]> Change-Id: I3449ea423d5c8e8344f75341f19a2d1643c703f6
1 parent 43f21a6 commit 78e60fc

File tree

1 file changed

+36
-4
lines changed

1 file changed

+36
-4
lines changed

lib/compress/zstd_lazy.c

+36-4
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,11 @@ size_t ZSTD_HcFindBestMatch(
678678
? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
679679

680680
U32 matchIndex;
681+
U32 ref = 0, lookup = 0, pr = 0;
682+
if (dictMode == ZSTD_noDict) {
683+
/* read initial refernence */
684+
ref = MEM_read32(ip);
685+
}
681686

682687
if (dictMode == ZSTD_dedicatedDictSearch) {
683688
const U32* entry = &dms->hashTable[ddsIdx];
@@ -692,8 +697,14 @@ size_t ZSTD_HcFindBestMatch(
692697
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
693698
const BYTE* const match = base + matchIndex;
694699
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
695-
if (match[ml] == ip[ml]) /* potentially better */
696-
currentMl = ZSTD_count(ip, match, iLimit);
700+
if (dictMode == ZSTD_noDict) {
701+
lookup = MEM_read32(match + pr); /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
702+
if (ref == lookup) /* potentially better */
703+
currentMl = ZSTD_count(ip, match, iLimit);
704+
} else {
705+
if (match[ml] == ip[ml]) /* potentially better */
706+
currentMl = ZSTD_count(ip, match, iLimit);
707+
}
697708
} else {
698709
const BYTE* const match = dictBase + matchIndex;
699710
assert(match+4 <= dictEnd);
@@ -706,6 +717,11 @@ size_t ZSTD_HcFindBestMatch(
706717
ml = currentMl;
707718
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
708719
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
720+
if (dictMode == ZSTD_noDict) {
721+
/* have a new longer ml, now advance one more byte and read new reference */
722+
pr = (U32)ml - sizeof(ref) + 1;
723+
ref = MEM_read32(ip + pr);
724+
}
709725
}
710726

711727
if (matchIndex <= minChain) break;
@@ -1210,6 +1226,11 @@ size_t ZSTD_RowFindBestMatch(
12101226
size_t numMatches = 0;
12111227
size_t currMatch = 0;
12121228
ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
1229+
U32 ref = 0, lookup = 0, pr = 0;
1230+
if (dictMode == ZSTD_noDict) {
1231+
/* read initial ref */
1232+
ref = MEM_read32(ip);
1233+
}
12131234

12141235
/* Cycle through the matches and prefetch */
12151236
for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
@@ -1244,8 +1265,14 @@ size_t ZSTD_RowFindBestMatch(
12441265
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
12451266
const BYTE* const match = base + matchIndex;
12461267
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
1247-
if (match[ml] == ip[ml]) /* potentially better */
1248-
currentMl = ZSTD_count(ip, match, iLimit);
1268+
if (dictMode == ZSTD_noDict) {
1269+
lookup = MEM_read32(match + pr); /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
1270+
if (ref == lookup) /* potentially better */
1271+
currentMl = ZSTD_count(ip, match, iLimit);
1272+
} else {
1273+
if (match[ml] == ip[ml]) /* potentially better */
1274+
currentMl = ZSTD_count(ip, match, iLimit);
1275+
}
12491276
} else {
12501277
const BYTE* const match = dictBase + matchIndex;
12511278
assert(match+4 <= dictEnd);
@@ -1258,6 +1285,11 @@ size_t ZSTD_RowFindBestMatch(
12581285
ml = currentMl;
12591286
*offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
12601287
if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
1288+
if (dictMode == ZSTD_noDict) {
1289+
/* have a new longer ml, now advance one more byte and read new reference */
1290+
pr = (U32)ml - sizeof(ref) + 1;
1291+
ref = MEM_read32(ip + pr);
1292+
}
12611293
}
12621294
}
12631295
}

0 commit comments

Comments
 (0)