@@ -1317,14 +1317,10 @@ size_t ZSTD_RowFindBestMatch(
1317
1317
}
1318
1318
1319
1319
1320
- typedef size_t (* searchMax_f )(
1321
- ZSTD_matchState_t * ms ,
1322
- const BYTE * ip , const BYTE * iLimit , size_t * offsetPtr );
1323
-
1324
1320
/**
1325
- * This struct contains the functions necessary for lazy to search .
1326
- * Currently, that is only searchMax. However, it is still valuable to have the
1327
- * VTable because this makes it easier to add more functions to the VTable later .
1321
+ * Generate search functions templated on (dictMode, mls, rowLog) .
1322
+ * These functions are outlined for code size & compilation time.
1323
+ * ZSTD_searchMax() dispatches to the correct implementation function .
1328
1324
*
1329
1325
* TODO: The start of the search function involves loading and calculating a
1330
1326
* bunch of constants from the ZSTD_matchState_t. These computations could be
@@ -1342,38 +1338,35 @@ typedef size_t (*searchMax_f)(
1342
1338
* the single segment loop. It should go in searchMax instead of its own
1343
1339
* function to avoid having multiple virtual function calls per search.
1344
1340
*/
1345
- typedef struct {
1346
- searchMax_f searchMax ;
1347
- } ZSTD_LazyVTable ;
1348
1341
1349
- #define GEN_ZSTD_BT_VTABLE (dictMode , mls ) \
1350
- static size_t ZSTD_BtFindBestMatch_##dictMode##_##mls( \
1342
+ #define ZSTD_BT_SEARCH_FN (dictMode , mls ) ZSTD_BtFindBestMatch_##dictMode##_##mls
1343
+ #define ZSTD_HC_SEARCH_FN (dictMode , mls ) ZSTD_HcFindBestMatch_##dictMode##_##mls
1344
+ #define ZSTD_ROW_SEARCH_FN (dictMode , mls , rowLog ) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
1345
+
1346
+ #define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
1347
+
1348
+ #define GEN_ZSTD_BT_SEARCH_FN (dictMode , mls ) \
1349
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \
1351
1350
ZSTD_matchState_t* ms, \
1352
1351
const BYTE* ip, const BYTE* const iLimit, \
1353
1352
size_t* offBasePtr) \
1354
1353
{ \
1355
1354
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1356
1355
return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode);\
1357
1356
} \
1358
- static const ZSTD_LazyVTable ZSTD_BtVTable_##dictMode##_##mls = { \
1359
- ZSTD_BtFindBestMatch_##dictMode##_##mls \
1360
- };
1361
1357
1362
- #define GEN_ZSTD_HC_VTABLE (dictMode , mls ) \
1363
- static size_t ZSTD_HcFindBestMatch_## dictMode##_## mls( \
1358
+ #define GEN_ZSTD_HC_SEARCH_FN (dictMode , mls ) \
1359
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN( dictMode, mls)( \
1364
1360
ZSTD_matchState_t* ms, \
1365
1361
const BYTE* ip, const BYTE* const iLimit, \
1366
1362
size_t* offsetPtr) \
1367
1363
{ \
1368
1364
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1369
1365
return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
1370
1366
} \
1371
- static const ZSTD_LazyVTable ZSTD_HcVTable_##dictMode##_##mls = { \
1372
- ZSTD_HcFindBestMatch_##dictMode##_##mls \
1373
- };
1374
1367
1375
- #define GEN_ZSTD_ROW_VTABLE (dictMode , mls , rowLog ) \
1376
- static size_t ZSTD_RowFindBestMatch_## dictMode##_## mls##_## rowLog( \
1368
+ #define GEN_ZSTD_ROW_SEARCH_FN (dictMode , mls , rowLog ) \
1369
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN( dictMode, mls, rowLog)( \
1377
1370
ZSTD_matchState_t* ms, \
1378
1371
const BYTE* ip, const BYTE* const iLimit, \
1379
1372
size_t* offsetPtr) \
@@ -1382,9 +1375,6 @@ typedef struct {
1382
1375
assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
1383
1376
return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
1384
1377
} \
1385
- static const ZSTD_LazyVTable ZSTD_RowVTable_##dictMode##_##mls##_##rowLog = { \
1386
- ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog \
1387
- };
1388
1378
1389
1379
#define ZSTD_FOR_EACH_ROWLOG (X , dictMode , mls ) \
1390
1380
X(dictMode, mls, 4) \
@@ -1407,84 +1397,114 @@ typedef struct {
1407
1397
X(__VA_ARGS__, dictMatchState) \
1408
1398
X(__VA_ARGS__, dedicatedDictSearch)
1409
1399
1410
- /* Generate Row VTables for each combination of (dictMode, mls, rowLog) */
1411
- ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS_ROWLOG , GEN_ZSTD_ROW_VTABLE )
1412
- /* Generate Binary Tree VTables for each combination of (dictMode, mls) */
1413
- ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS , GEN_ZSTD_BT_VTABLE )
1414
- /* Generate Hash Chain VTables for each combination of (dictMode, mls) */
1415
- ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS , GEN_ZSTD_HC_VTABLE )
1416
-
1417
- #define GEN_ZSTD_BT_VTABLE_ARRAY (dictMode ) \
1418
- { \
1419
- &ZSTD_BtVTable_##dictMode##_4, \
1420
- &ZSTD_BtVTable_##dictMode##_5, \
1421
- &ZSTD_BtVTable_##dictMode##_6 \
1422
- }
1423
-
1424
- #define GEN_ZSTD_HC_VTABLE_ARRAY (dictMode ) \
1425
- { \
1426
- &ZSTD_HcVTable_##dictMode##_4, \
1427
- &ZSTD_HcVTable_##dictMode##_5, \
1428
- &ZSTD_HcVTable_##dictMode##_6 \
1429
- }
1400
+ /* Generate row search fns for each combination of (dictMode, mls, rowLog) */
1401
+ ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS_ROWLOG , GEN_ZSTD_ROW_SEARCH_FN )
1402
+ /* Generate binary Tree search fns for each combination of (dictMode, mls) */
1403
+ ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS , GEN_ZSTD_BT_SEARCH_FN )
1404
+ /* Generate hash chain search fns for each combination of (dictMode, mls) */
1405
+ ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS , GEN_ZSTD_HC_SEARCH_FN )
1406
+ typedef enum { search_hashChain = 0 , search_binaryTree = 1 , search_rowHash = 2 } searchMethod_e ;
1430
1407
1431
- #define GEN_ZSTD_ROW_VTABLE_ARRAY_ (dictMode , mls ) \
1432
- { \
1433
- &ZSTD_RowVTable_##dictMode##_##mls##_4, \
1434
- &ZSTD_RowVTable_##dictMode##_##mls##_5, \
1435
- &ZSTD_RowVTable_##dictMode##_##mls##_6 \
1408
+ #define GEN_ZSTD_CALL_BT_SEARCH_FN (dictMode , mls ) \
1409
+ return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
1410
+ #define GEN_ZSTD_CALL_HC_SEARCH_FN (dictMode , mls ) \
1411
+ return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
1412
+ #define GEN_ZSTD_CALL_ROW_SEARCH_FN (dictMode , rowLog , mls ) \
1413
+ return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
1414
+
1415
+ #define ZSTD_SWITCH_MLS (X , ...) \
1416
+ switch (mls) { \
1417
+ default: assert(0); \
1418
+ case 4: \
1419
+ X(__VA_ARGS__, 4) \
1420
+ break; \
1421
+ case 5: \
1422
+ X(__VA_ARGS__, 5) \
1423
+ break; \
1424
+ case 6: \
1425
+ X(__VA_ARGS__, 6) \
1426
+ break; \
1436
1427
}
1437
1428
1438
- #define GEN_ZSTD_ROW_VTABLE_ARRAY (dictMode ) \
1439
- { \
1440
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 4), \
1441
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 5), \
1442
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 6) \
1429
+ #define ZSTD_SWITCH_ROW_LOG (X , ...) \
1430
+ switch (rowLog) { \
1431
+ default: assert(0); \
1432
+ case 4: \
1433
+ ZSTD_SWITCH_MLS(X, __VA_ARGS__, 4) \
1434
+ break; \
1435
+ case 5: \
1436
+ ZSTD_SWITCH_MLS(X, __VA_ARGS__, 5) \
1437
+ break; \
1438
+ case 6: \
1439
+ ZSTD_SWITCH_MLS(X, __VA_ARGS__, 6) \
1440
+ break; \
1443
1441
}
1444
1442
1445
- #define GEN_ZSTD_VTABLE_ARRAY (X ) \
1446
- { \
1447
- X(noDict), \
1448
- X(extDict), \
1449
- X(dictMatchState), \
1450
- X(dedicatedDictSearch) \
1443
+ #define ZSTD_SWITCH_SEARCH_METHOD (dictMode ) \
1444
+ switch (searchMethod) { \
1445
+ default: assert(0); \
1446
+ case search_hashChain: \
1447
+ ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
1448
+ break; \
1449
+ case search_binaryTree: \
1450
+ ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
1451
+ break; \
1452
+ case search_rowHash: \
1453
+ ZSTD_SWITCH_ROW_LOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode) \
1454
+ break; \
1451
1455
}
1452
1456
1453
- /* *******************************
1454
- * Common parser - lazy strategy
1455
- *********************************/
1456
- typedef enum { search_hashChain = 0 , search_binaryTree = 1 , search_rowHash = 2 } searchMethod_e ;
1457
-
1458
1457
/**
1459
- * This table is indexed first by the four ZSTD_dictMode_e values, and then
1460
- * by the two searchMethod_e values. NULLs are placed for configurations
1461
- * that should never occur (extDict modes go to the other implementation
1462
- * below and there is no DDSS for binary tree search yet).
1458
+ * Searches for the longest match at @p ip.
1459
+ * Dispatches to the correct implementation function based on the
1460
+ * (searchMethod, dictMode, mls, rowLog). We use switch statements
1461
+ * here instead of using an indirect function call through a function
1462
+ * pointer because after Spectre and Meltdown mitigations, indirect
1463
+ * function calls can be very costly, especially in the kernel.
1464
+ *
1465
+ * NOTE: dictMode and searchMethod should be templated, so those switch
1466
+ * statements should be optimized out. Only the mls & rowLog switches
1467
+ * should be left.
1468
+ *
1469
+ * @param ms The match state.
1470
+ * @param ip The position to search at.
1471
+ * @param iend The end of the input data.
1472
+ * @param[out] offsetPtr Stores the match offset into this pointer.
1473
+ * @param mls The minimum search length, in the range [4, 6].
1474
+ * @param rowLog The row log (if applicable), in the range [4, 6].
1475
+ * @param searchMethod The search method to use (templated).
1476
+ * @param dictMode The dictMode (templated).
1477
+ *
1478
+ * @returns The length of the longest match found, or < mls if no match is found.
1479
+ * If a match is found its offset is stored in @p offsetPtr.
1463
1480
*/
1464
-
1465
- static ZSTD_LazyVTable const *
1466
- ZSTD_selectLazyVTable (ZSTD_matchState_t const * ms , searchMethod_e searchMethod , ZSTD_dictMode_e dictMode )
1481
+ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax (
1482
+ ZSTD_matchState_t * ms ,
1483
+ const BYTE * ip ,
1484
+ const BYTE * iend ,
1485
+ size_t * offsetPtr ,
1486
+ U32 const mls ,
1487
+ U32 const rowLog ,
1488
+ searchMethod_e const searchMethod ,
1489
+ ZSTD_dictMode_e const dictMode )
1467
1490
{
1468
- /* Fill the Hc/Bt VTable arrays with the right functions for the (dictMode, mls) combination. */
1469
- ZSTD_LazyVTable const * const hcVTables [4 ][3 ] = GEN_ZSTD_VTABLE_ARRAY (GEN_ZSTD_HC_VTABLE_ARRAY );
1470
- ZSTD_LazyVTable const * const btVTables [4 ][3 ] = GEN_ZSTD_VTABLE_ARRAY (GEN_ZSTD_BT_VTABLE_ARRAY );
1471
- /* Fill the Row VTable array with the right functions for the (dictMode, mls, rowLog) combination. */
1472
- ZSTD_LazyVTable const * const rowVTables [4 ][3 ][3 ] = GEN_ZSTD_VTABLE_ARRAY (GEN_ZSTD_ROW_VTABLE_ARRAY );
1473
-
1474
- U32 const mls = MAX (4 , MIN (6 , ms -> cParams .minMatch ));
1475
- U32 const rowLog = MAX (4 , MIN (6 , ms -> cParams .searchLog ));
1476
- switch (searchMethod ) {
1477
- case search_hashChain :
1478
- return hcVTables [dictMode ][mls - 4 ];
1479
- case search_binaryTree :
1480
- return btVTables [dictMode ][mls - 4 ];
1481
- case search_rowHash :
1482
- return rowVTables [dictMode ][mls - 4 ][rowLog - 4 ];
1483
- default :
1484
- return NULL ;
1491
+ if (dictMode == ZSTD_noDict ) {
1492
+ ZSTD_SWITCH_SEARCH_METHOD (noDict )
1493
+ } else if (dictMode == ZSTD_extDict ) {
1494
+ ZSTD_SWITCH_SEARCH_METHOD (extDict )
1495
+ } else if (dictMode == ZSTD_dictMatchState ) {
1496
+ ZSTD_SWITCH_SEARCH_METHOD (dictMatchState )
1497
+ } else if (dictMode == ZSTD_dedicatedDictSearch ) {
1498
+ ZSTD_SWITCH_SEARCH_METHOD (dedicatedDictSearch )
1485
1499
}
1500
+ assert (0 );
1501
+ return 0 ;
1486
1502
}
1487
1503
1504
+ /* *******************************
1505
+ * Common parser - lazy strategy
1506
+ *********************************/
1507
+
1488
1508
FORCE_INLINE_TEMPLATE size_t
1489
1509
ZSTD_compressBlock_lazy_generic (
1490
1510
ZSTD_matchState_t * ms , seqStore_t * seqStore ,
@@ -1501,8 +1521,9 @@ ZSTD_compressBlock_lazy_generic(
1501
1521
const BYTE * const base = ms -> window .base ;
1502
1522
const U32 prefixLowestIndex = ms -> window .dictLimit ;
1503
1523
const BYTE * const prefixLowest = base + prefixLowestIndex ;
1524
+ const U32 rowLog = BOUNDED (4 , ms -> cParams .searchLog , 6 );
1525
+ const U32 mls = BOUNDED (4 , ms -> cParams .minMatch , 6 );
1504
1526
1505
- searchMax_f const searchMax = ZSTD_selectLazyVTable (ms , searchMethod , dictMode )-> searchMax ;
1506
1527
U32 offset_1 = rep [0 ], offset_2 = rep [1 ];
1507
1528
U32 offsetSaved1 = 0 , offsetSaved2 = 0 ;
1508
1529
@@ -1519,8 +1540,6 @@ ZSTD_compressBlock_lazy_generic(
1519
1540
0 ;
1520
1541
const U32 dictAndPrefixLength = (U32 )((ip - prefixLowest ) + (dictEnd - dictLowest ));
1521
1542
1522
- assert (searchMax != NULL );
1523
-
1524
1543
DEBUGLOG (5 , "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)" , (U32 )dictMode , (U32 )searchMethod );
1525
1544
ip += (dictAndPrefixLength == 0 );
1526
1545
if (dictMode == ZSTD_noDict ) {
@@ -1538,7 +1557,6 @@ ZSTD_compressBlock_lazy_generic(
1538
1557
}
1539
1558
1540
1559
if (searchMethod == search_rowHash ) {
1541
- const U32 rowLog = MAX (4 , MIN (6 , ms -> cParams .searchLog ));
1542
1560
ZSTD_row_fillHashCache (ms , base , rowLog ,
1543
1561
MIN (ms -> cParams .minMatch , 6 /* mls caps out at 6 */ ),
1544
1562
ms -> nextToUpdate , ilimit );
@@ -1579,7 +1597,7 @@ ZSTD_compressBlock_lazy_generic(
1579
1597
1580
1598
/* first search (depth 0) */
1581
1599
{ size_t offbaseFound = 999999999 ;
1582
- size_t const ml2 = searchMax (ms , ip , iend , & offbaseFound );
1600
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & offbaseFound , mls , rowLog , searchMethod , dictMode );
1583
1601
if (ml2 > matchLength )
1584
1602
matchLength = ml2 , start = ip , offBase = offbaseFound ;
1585
1603
}
@@ -1618,7 +1636,7 @@ ZSTD_compressBlock_lazy_generic(
1618
1636
}
1619
1637
}
1620
1638
{ size_t ofbCandidate = 999999999 ;
1621
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
1639
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , dictMode );
1622
1640
int const gain2 = (int )(ml2 * 4 - ZSTD_highbit32 ((U32 )ofbCandidate )); /* raw approx */
1623
1641
int const gain1 = (int )(matchLength * 4 - ZSTD_highbit32 ((U32 )offBase ) + 4 );
1624
1642
if ((ml2 >= 4 ) && (gain2 > gain1 )) {
@@ -1654,7 +1672,7 @@ ZSTD_compressBlock_lazy_generic(
1654
1672
}
1655
1673
}
1656
1674
{ size_t ofbCandidate = 999999999 ;
1657
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
1675
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , dictMode );
1658
1676
int const gain2 = (int )(ml2 * 4 - ZSTD_highbit32 ((U32 )ofbCandidate )); /* raw approx */
1659
1677
int const gain1 = (int )(matchLength * 4 - ZSTD_highbit32 ((U32 )offBase ) + 7 );
1660
1678
if ((ml2 >= 4 ) && (gain2 > gain1 )) {
@@ -1899,9 +1917,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1899
1917
const BYTE * const dictEnd = dictBase + dictLimit ;
1900
1918
const BYTE * const dictStart = dictBase + ms -> window .lowLimit ;
1901
1919
const U32 windowLog = ms -> cParams .windowLog ;
1902
- const U32 rowLog = ms -> cParams .searchLog < 5 ? 4 : 5 ;
1920
+ const U32 rowLog = BOUNDED (4 , ms -> cParams .searchLog , 6 );
1921
+ const U32 mls = BOUNDED (4 , ms -> cParams .minMatch , 6 );
1903
1922
1904
- searchMax_f const searchMax = ZSTD_selectLazyVTable (ms , searchMethod , ZSTD_extDict )-> searchMax ;
1905
1923
U32 offset_1 = rep [0 ], offset_2 = rep [1 ];
1906
1924
1907
1925
DEBUGLOG (5 , "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)" , (U32 )searchMethod );
@@ -1943,7 +1961,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1943
1961
1944
1962
/* first search (depth 0) */
1945
1963
{ size_t ofbCandidate = 999999999 ;
1946
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
1964
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , ZSTD_extDict );
1947
1965
if (ml2 > matchLength )
1948
1966
matchLength = ml2 , start = ip , offBase = ofbCandidate ;
1949
1967
}
@@ -1978,7 +1996,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1978
1996
1979
1997
/* search match, depth 1 */
1980
1998
{ size_t ofbCandidate = 999999999 ;
1981
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
1999
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , ZSTD_extDict );
1982
2000
int const gain2 = (int )(ml2 * 4 - ZSTD_highbit32 ((U32 )ofbCandidate )); /* raw approx */
1983
2001
int const gain1 = (int )(matchLength * 4 - ZSTD_highbit32 ((U32 )offBase ) + 4 );
1984
2002
if ((ml2 >= 4 ) && (gain2 > gain1 )) {
@@ -2010,7 +2028,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
2010
2028
2011
2029
/* search match, depth 2 */
2012
2030
{ size_t ofbCandidate = 999999999 ;
2013
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
2031
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , ZSTD_extDict );
2014
2032
int const gain2 = (int )(ml2 * 4 - ZSTD_highbit32 ((U32 )ofbCandidate )); /* raw approx */
2015
2033
int const gain1 = (int )(matchLength * 4 - ZSTD_highbit32 ((U32 )offBase ) + 7 );
2016
2034
if ((ml2 >= 4 ) && (gain2 > gain1 )) {
0 commit comments