@@ -678,6 +678,11 @@ size_t ZSTD_HcFindBestMatch(
678
678
? ZSTD_hashPtr (ip , ddsHashLog , mls ) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0 ;
679
679
680
680
U32 matchIndex ;
681
+ U32 ref = 0 , lookup = 0 , pr = 0 ;
682
+ if (dictMode == ZSTD_noDict ) {
683
+ /* read initial refernence */
684
+ ref = MEM_read32 (ip );
685
+ }
681
686
682
687
if (dictMode == ZSTD_dedicatedDictSearch ) {
683
688
const U32 * entry = & dms -> hashTable [ddsIdx ];
@@ -692,8 +697,14 @@ size_t ZSTD_HcFindBestMatch(
692
697
if ((dictMode != ZSTD_extDict ) || matchIndex >= dictLimit ) {
693
698
const BYTE * const match = base + matchIndex ;
694
699
assert (matchIndex >= dictLimit ); /* ensures this is true if dictMode != ZSTD_extDict */
695
- if (match [ml ] == ip [ml ]) /* potentially better */
696
- currentMl = ZSTD_count (ip , match , iLimit );
700
+ if (dictMode == ZSTD_noDict ) {
701
+ lookup = MEM_read32 (match + pr ); /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
702
+ if (ref == lookup ) /* potentially better */
703
+ currentMl = ZSTD_count (ip , match , iLimit );
704
+ } else {
705
+ if (match [ml ] == ip [ml ]) /* potentially better */
706
+ currentMl = ZSTD_count (ip , match , iLimit );
707
+ }
697
708
} else {
698
709
const BYTE * const match = dictBase + matchIndex ;
699
710
assert (match + 4 <= dictEnd );
@@ -706,6 +717,11 @@ size_t ZSTD_HcFindBestMatch(
706
717
ml = currentMl ;
707
718
* offsetPtr = OFFSET_TO_OFFBASE (curr - matchIndex );
708
719
if (ip + currentMl == iLimit ) break ; /* best possible, avoids read overflow on next attempt */
720
+ if (dictMode == ZSTD_noDict ) {
721
+ /* have a new longer ml, now advance one more byte and read new reference */
722
+ pr = (U32 )ml - sizeof (ref ) + 1 ;
723
+ ref = MEM_read32 (ip + pr );
724
+ }
709
725
}
710
726
711
727
if (matchIndex <= minChain ) break ;
@@ -1210,6 +1226,11 @@ size_t ZSTD_RowFindBestMatch(
1210
1226
size_t numMatches = 0 ;
1211
1227
size_t currMatch = 0 ;
1212
1228
ZSTD_VecMask matches = ZSTD_row_getMatchMask (tagRow , (BYTE )tag , headGrouped , rowEntries );
1229
+ U32 ref = 0 , lookup = 0 , pr = 0 ;
1230
+ if (dictMode == ZSTD_noDict ) {
1231
+ /* read initial ref */
1232
+ ref = MEM_read32 (ip );
1233
+ }
1213
1234
1214
1235
/* Cycle through the matches and prefetch */
1215
1236
for (; (matches > 0 ) && (nbAttempts > 0 ); -- nbAttempts , matches &= (matches - 1 )) {
@@ -1244,8 +1265,14 @@ size_t ZSTD_RowFindBestMatch(
1244
1265
if ((dictMode != ZSTD_extDict ) || matchIndex >= dictLimit ) {
1245
1266
const BYTE * const match = base + matchIndex ;
1246
1267
assert (matchIndex >= dictLimit ); /* ensures this is true if dictMode != ZSTD_extDict */
1247
- if (match [ml ] == ip [ml ]) /* potentially better */
1248
- currentMl = ZSTD_count (ip , match , iLimit );
1268
+ if (dictMode == ZSTD_noDict ) {
1269
+ lookup = MEM_read32 (match + pr ); /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
1270
+ if (ref == lookup ) /* potentially better */
1271
+ currentMl = ZSTD_count (ip , match , iLimit );
1272
+ } else {
1273
+ if (match [ml ] == ip [ml ]) /* potentially better */
1274
+ currentMl = ZSTD_count (ip , match , iLimit );
1275
+ }
1249
1276
} else {
1250
1277
const BYTE * const match = dictBase + matchIndex ;
1251
1278
assert (match + 4 <= dictEnd );
@@ -1258,6 +1285,11 @@ size_t ZSTD_RowFindBestMatch(
1258
1285
ml = currentMl ;
1259
1286
* offsetPtr = OFFSET_TO_OFFBASE (curr - matchIndex );
1260
1287
if (ip + currentMl == iLimit ) break ; /* best possible, avoids read overflow on next attempt */
1288
+ if (dictMode == ZSTD_noDict ) {
1289
+ /* have a new longer ml, now advance one more byte and read new reference */
1290
+ pr = (U32 )ml - sizeof (ref ) + 1 ;
1291
+ ref = MEM_read32 (ip + pr );
1292
+ }
1261
1293
}
1262
1294
}
1263
1295
}
0 commit comments