@@ -1317,14 +1317,10 @@ size_t ZSTD_RowFindBestMatch(
1317
1317
}
1318
1318
1319
1319
1320
- typedef size_t (* searchMax_f )(
1321
- ZSTD_matchState_t * ms ,
1322
- const BYTE * ip , const BYTE * iLimit , size_t * offsetPtr );
1323
-
1324
1320
/**
1325
- * This struct contains the functions necessary for lazy to search .
1326
- * Currently, that is only searchMax. However, it is still valuable to have the
1327
- * VTable because this makes it easier to add more functions to the VTable later .
1321
+ * Generate search functions templated on (dictMode, mls, rowLog) .
1322
+ * These functions are outlined for code size & compilation time.
1323
+ * ZSTD_searchMax() dispatches to the correct implementation function .
1328
1324
*
1329
1325
* TODO: The start of the search function involves loading and calculating a
1330
1326
* bunch of constants from the ZSTD_matchState_t. These computations could be
@@ -1342,38 +1338,35 @@ typedef size_t (*searchMax_f)(
1342
1338
* the single segment loop. It should go in searchMax instead of its own
1343
1339
* function to avoid having multiple virtual function calls per search.
1344
1340
*/
1345
- typedef struct {
1346
- searchMax_f searchMax ;
1347
- } ZSTD_LazyVTable ;
1348
1341
1349
- #define GEN_ZSTD_BT_VTABLE (dictMode , mls ) \
1350
- static size_t ZSTD_BtFindBestMatch_##dictMode##_##mls( \
1351
- ZSTD_matchState_t* ms, \
1352
- const BYTE* ip, const BYTE* const iLimit, \
1353
- size_t* offBasePtr) \
1354
- { \
1355
- assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1356
- return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode);\
1357
- } \
1358
- static const ZSTD_LazyVTable ZSTD_BtVTable_##dictMode##_##mls = { \
1359
- ZSTD_BtFindBestMatch_##dictMode##_##mls \
1360
- };
1342
+ #define ZSTD_BT_SEARCH_FN (dictMode , mls ) ZSTD_BtFindBestMatch_##dictMode##_##mls
1343
+ #define ZSTD_HC_SEARCH_FN (dictMode , mls ) ZSTD_HcFindBestMatch_##dictMode##_##mls
1344
+ #define ZSTD_ROW_SEARCH_FN (dictMode , mls , rowLog ) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
1345
+
1346
+ #define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
1347
+
1348
+ #define GEN_ZSTD_BT_SEARCH_FN (dictMode , mls ) \
1349
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \
1350
+ ZSTD_matchState_t* ms, \
1351
+ const BYTE* ip, const BYTE* const iLimit, \
1352
+ size_t* offBasePtr) \
1353
+ { \
1354
+ assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1355
+ return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
1356
+ } \
1361
1357
1362
- #define GEN_ZSTD_HC_VTABLE (dictMode , mls ) \
1363
- static size_t ZSTD_HcFindBestMatch_## dictMode##_## mls( \
1358
+ #define GEN_ZSTD_HC_SEARCH_FN (dictMode , mls ) \
1359
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN( dictMode, mls)( \
1364
1360
ZSTD_matchState_t* ms, \
1365
1361
const BYTE* ip, const BYTE* const iLimit, \
1366
1362
size_t* offsetPtr) \
1367
1363
{ \
1368
1364
assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1369
1365
return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
1370
1366
} \
1371
- static const ZSTD_LazyVTable ZSTD_HcVTable_##dictMode##_##mls = { \
1372
- ZSTD_HcFindBestMatch_##dictMode##_##mls \
1373
- };
1374
1367
1375
- #define GEN_ZSTD_ROW_VTABLE (dictMode , mls , rowLog ) \
1376
- static size_t ZSTD_RowFindBestMatch_## dictMode##_## mls##_## rowLog( \
1368
+ #define GEN_ZSTD_ROW_SEARCH_FN (dictMode , mls , rowLog ) \
1369
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN( dictMode, mls, rowLog)( \
1377
1370
ZSTD_matchState_t* ms, \
1378
1371
const BYTE* ip, const BYTE* const iLimit, \
1379
1372
size_t* offsetPtr) \
@@ -1382,9 +1375,6 @@ typedef struct {
1382
1375
assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
1383
1376
return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
1384
1377
} \
1385
- static const ZSTD_LazyVTable ZSTD_RowVTable_##dictMode##_##mls##_##rowLog = { \
1386
- ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog \
1387
- };
1388
1378
1389
1379
#define ZSTD_FOR_EACH_ROWLOG (X , dictMode , mls ) \
1390
1380
X(dictMode, mls, 4) \
@@ -1407,84 +1397,103 @@ typedef struct {
1407
1397
X(__VA_ARGS__, dictMatchState) \
1408
1398
X(__VA_ARGS__, dedicatedDictSearch)
1409
1399
1410
- /* Generate Row VTables for each combination of (dictMode, mls, rowLog) */
1411
- ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS_ROWLOG , GEN_ZSTD_ROW_VTABLE )
1412
- /* Generate Binary Tree VTables for each combination of (dictMode, mls) */
1413
- ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS , GEN_ZSTD_BT_VTABLE )
1414
- /* Generate Hash Chain VTables for each combination of (dictMode, mls) */
1415
- ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS , GEN_ZSTD_HC_VTABLE )
1416
-
1417
- #define GEN_ZSTD_BT_VTABLE_ARRAY (dictMode ) \
1418
- { \
1419
- &ZSTD_BtVTable_##dictMode##_4, \
1420
- &ZSTD_BtVTable_##dictMode##_5, \
1421
- &ZSTD_BtVTable_##dictMode##_6 \
1422
- }
1423
-
1424
- #define GEN_ZSTD_HC_VTABLE_ARRAY (dictMode ) \
1425
- { \
1426
- &ZSTD_HcVTable_##dictMode##_4, \
1427
- &ZSTD_HcVTable_##dictMode##_5, \
1428
- &ZSTD_HcVTable_##dictMode##_6 \
1429
- }
1430
-
1431
- #define GEN_ZSTD_ROW_VTABLE_ARRAY_ (dictMode , mls ) \
1432
- { \
1433
- &ZSTD_RowVTable_##dictMode##_##mls##_4, \
1434
- &ZSTD_RowVTable_##dictMode##_##mls##_5, \
1435
- &ZSTD_RowVTable_##dictMode##_##mls##_6 \
1436
- }
1400
+ /* Generate row search fns for each combination of (dictMode, mls, rowLog) */
1401
+ ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS_ROWLOG , GEN_ZSTD_ROW_SEARCH_FN )
1402
+ /* Generate binary Tree search fns for each combination of (dictMode, mls) */
1403
+ ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS , GEN_ZSTD_BT_SEARCH_FN )
1404
+ /* Generate hash chain search fns for each combination of (dictMode, mls) */
1405
+ ZSTD_FOR_EACH_DICT_MODE (ZSTD_FOR_EACH_MLS , GEN_ZSTD_HC_SEARCH_FN )
1437
1406
1438
- #define GEN_ZSTD_ROW_VTABLE_ARRAY (dictMode ) \
1439
- { \
1440
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 4), \
1441
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 5), \
1442
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 6) \
1443
- }
1407
+ typedef enum { search_hashChain = 0 , search_binaryTree = 1 , search_rowHash = 2 } searchMethod_e ;
1444
1408
1445
- #define GEN_ZSTD_VTABLE_ARRAY (X ) \
1446
- { \
1447
- X(noDict), \
1448
- X(extDict), \
1449
- X(dictMatchState), \
1450
- X(dedicatedDictSearch) \
1409
+ #define GEN_ZSTD_CALL_BT_SEARCH_FN (dictMode , mls ) \
1410
+ case mls: \
1411
+ return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
1412
+ #define GEN_ZSTD_CALL_HC_SEARCH_FN (dictMode , mls ) \
1413
+ case mls: \
1414
+ return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
1415
+ #define GEN_ZSTD_CALL_ROW_SEARCH_FN (dictMode , mls , rowLog ) \
1416
+ case rowLog: \
1417
+ return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
1418
+
1419
+ #define ZSTD_SWITCH_MLS (X , dictMode ) \
1420
+ switch (mls) { \
1421
+ ZSTD_FOR_EACH_MLS(X, dictMode) \
1451
1422
}
1452
1423
1453
- /* *******************************
1454
- * Common parser - lazy strategy
1455
- *********************************/
1456
- typedef enum { search_hashChain = 0 , search_binaryTree = 1 , search_rowHash = 2 } searchMethod_e ;
1424
+ #define ZSTD_SWITCH_ROWLOG (dictMode , mls ) \
1425
+ case mls: \
1426
+ switch (rowLog) { \
1427
+ ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
1428
+ } \
1429
+ ZSTD_UNREACHABLE; \
1430
+ break;
1431
+
1432
+ #define ZSTD_SWITCH_SEARCH_METHOD (dictMode ) \
1433
+ switch (searchMethod) { \
1434
+ case search_hashChain: \
1435
+ ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
1436
+ break; \
1437
+ case search_binaryTree: \
1438
+ ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
1439
+ break; \
1440
+ case search_rowHash: \
1441
+ ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \
1442
+ break; \
1443
+ } \
1444
+ ZSTD_UNREACHABLE;
1457
1445
1458
1446
/**
1459
- * This table is indexed first by the four ZSTD_dictMode_e values, and then
1460
- * by the two searchMethod_e values. NULLs are placed for configurations
1461
- * that should never occur (extDict modes go to the other implementation
1462
- * below and there is no DDSS for binary tree search yet).
1447
+ * Searches for the longest match at @p ip.
1448
+ * Dispatches to the correct implementation function based on the
1449
+ * (searchMethod, dictMode, mls, rowLog). We use switch statements
1450
+ * here instead of using an indirect function call through a function
1451
+ * pointer because after Spectre and Meltdown mitigations, indirect
1452
+ * function calls can be very costly, especially in the kernel.
1453
+ *
1454
+ * NOTE: dictMode and searchMethod should be templated, so those switch
1455
+ * statements should be optimized out. Only the mls & rowLog switches
1456
+ * should be left.
1457
+ *
1458
+ * @param ms The match state.
1459
+ * @param ip The position to search at.
1460
+ * @param iend The end of the input data.
1461
+ * @param[out] offsetPtr Stores the match offset into this pointer.
1462
+ * @param mls The minimum search length, in the range [4, 6].
1463
+ * @param rowLog The row log (if applicable), in the range [4, 6].
1464
+ * @param searchMethod The search method to use (templated).
1465
+ * @param dictMode The dictMode (templated).
1466
+ *
1467
+ * @returns The length of the longest match found, or < mls if no match is found.
1468
+ * If a match is found its offset is stored in @p offsetPtr.
1463
1469
*/
1464
-
1465
- static ZSTD_LazyVTable const *
1466
- ZSTD_selectLazyVTable (ZSTD_matchState_t const * ms , searchMethod_e searchMethod , ZSTD_dictMode_e dictMode )
1470
+ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax (
1471
+ ZSTD_matchState_t * ms ,
1472
+ const BYTE * ip ,
1473
+ const BYTE * iend ,
1474
+ size_t * offsetPtr ,
1475
+ U32 const mls ,
1476
+ U32 const rowLog ,
1477
+ searchMethod_e const searchMethod ,
1478
+ ZSTD_dictMode_e const dictMode )
1467
1479
{
1468
- /* Fill the Hc/Bt VTable arrays with the right functions for the (dictMode, mls) combination. */
1469
- ZSTD_LazyVTable const * const hcVTables [4 ][3 ] = GEN_ZSTD_VTABLE_ARRAY (GEN_ZSTD_HC_VTABLE_ARRAY );
1470
- ZSTD_LazyVTable const * const btVTables [4 ][3 ] = GEN_ZSTD_VTABLE_ARRAY (GEN_ZSTD_BT_VTABLE_ARRAY );
1471
- /* Fill the Row VTable array with the right functions for the (dictMode, mls, rowLog) combination. */
1472
- ZSTD_LazyVTable const * const rowVTables [4 ][3 ][3 ] = GEN_ZSTD_VTABLE_ARRAY (GEN_ZSTD_ROW_VTABLE_ARRAY );
1473
-
1474
- U32 const mls = MAX (4 , MIN (6 , ms -> cParams .minMatch ));
1475
- U32 const rowLog = MAX (4 , MIN (6 , ms -> cParams .searchLog ));
1476
- switch (searchMethod ) {
1477
- case search_hashChain :
1478
- return hcVTables [dictMode ][mls - 4 ];
1479
- case search_binaryTree :
1480
- return btVTables [dictMode ][mls - 4 ];
1481
- case search_rowHash :
1482
- return rowVTables [dictMode ][mls - 4 ][rowLog - 4 ];
1483
- default :
1484
- return NULL ;
1480
+ if (dictMode == ZSTD_noDict ) {
1481
+ ZSTD_SWITCH_SEARCH_METHOD (noDict )
1482
+ } else if (dictMode == ZSTD_extDict ) {
1483
+ ZSTD_SWITCH_SEARCH_METHOD (extDict )
1484
+ } else if (dictMode == ZSTD_dictMatchState ) {
1485
+ ZSTD_SWITCH_SEARCH_METHOD (dictMatchState )
1486
+ } else if (dictMode == ZSTD_dedicatedDictSearch ) {
1487
+ ZSTD_SWITCH_SEARCH_METHOD (dedicatedDictSearch )
1485
1488
}
1489
+ ZSTD_UNREACHABLE ;
1490
+ return 0 ;
1486
1491
}
1487
1492
1493
+ /* *******************************
1494
+ * Common parser - lazy strategy
1495
+ *********************************/
1496
+
1488
1497
FORCE_INLINE_TEMPLATE size_t
1489
1498
ZSTD_compressBlock_lazy_generic (
1490
1499
ZSTD_matchState_t * ms , seqStore_t * seqStore ,
@@ -1501,8 +1510,9 @@ ZSTD_compressBlock_lazy_generic(
1501
1510
const BYTE * const base = ms -> window .base ;
1502
1511
const U32 prefixLowestIndex = ms -> window .dictLimit ;
1503
1512
const BYTE * const prefixLowest = base + prefixLowestIndex ;
1513
+ const U32 mls = BOUNDED (4 , ms -> cParams .minMatch , 6 );
1514
+ const U32 rowLog = BOUNDED (4 , ms -> cParams .searchLog , 6 );
1504
1515
1505
- searchMax_f const searchMax = ZSTD_selectLazyVTable (ms , searchMethod , dictMode )-> searchMax ;
1506
1516
U32 offset_1 = rep [0 ], offset_2 = rep [1 ];
1507
1517
U32 offsetSaved1 = 0 , offsetSaved2 = 0 ;
1508
1518
@@ -1519,8 +1529,6 @@ ZSTD_compressBlock_lazy_generic(
1519
1529
0 ;
1520
1530
const U32 dictAndPrefixLength = (U32 )((ip - prefixLowest ) + (dictEnd - dictLowest ));
1521
1531
1522
- assert (searchMax != NULL );
1523
-
1524
1532
DEBUGLOG (5 , "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)" , (U32 )dictMode , (U32 )searchMethod );
1525
1533
ip += (dictAndPrefixLength == 0 );
1526
1534
if (dictMode == ZSTD_noDict ) {
@@ -1538,7 +1546,6 @@ ZSTD_compressBlock_lazy_generic(
1538
1546
}
1539
1547
1540
1548
if (searchMethod == search_rowHash ) {
1541
- const U32 rowLog = MAX (4 , MIN (6 , ms -> cParams .searchLog ));
1542
1549
ZSTD_row_fillHashCache (ms , base , rowLog ,
1543
1550
MIN (ms -> cParams .minMatch , 6 /* mls caps out at 6 */ ),
1544
1551
ms -> nextToUpdate , ilimit );
@@ -1579,7 +1586,7 @@ ZSTD_compressBlock_lazy_generic(
1579
1586
1580
1587
/* first search (depth 0) */
1581
1588
{ size_t offbaseFound = 999999999 ;
1582
- size_t const ml2 = searchMax (ms , ip , iend , & offbaseFound );
1589
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & offbaseFound , mls , rowLog , searchMethod , dictMode );
1583
1590
if (ml2 > matchLength )
1584
1591
matchLength = ml2 , start = ip , offBase = offbaseFound ;
1585
1592
}
@@ -1618,7 +1625,7 @@ ZSTD_compressBlock_lazy_generic(
1618
1625
}
1619
1626
}
1620
1627
{ size_t ofbCandidate = 999999999 ;
1621
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
1628
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , dictMode );
1622
1629
int const gain2 = (int )(ml2 * 4 - ZSTD_highbit32 ((U32 )ofbCandidate )); /* raw approx */
1623
1630
int const gain1 = (int )(matchLength * 4 - ZSTD_highbit32 ((U32 )offBase ) + 4 );
1624
1631
if ((ml2 >= 4 ) && (gain2 > gain1 )) {
@@ -1654,7 +1661,7 @@ ZSTD_compressBlock_lazy_generic(
1654
1661
}
1655
1662
}
1656
1663
{ size_t ofbCandidate = 999999999 ;
1657
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
1664
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , dictMode );
1658
1665
int const gain2 = (int )(ml2 * 4 - ZSTD_highbit32 ((U32 )ofbCandidate )); /* raw approx */
1659
1666
int const gain1 = (int )(matchLength * 4 - ZSTD_highbit32 ((U32 )offBase ) + 7 );
1660
1667
if ((ml2 >= 4 ) && (gain2 > gain1 )) {
@@ -1899,9 +1906,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1899
1906
const BYTE * const dictEnd = dictBase + dictLimit ;
1900
1907
const BYTE * const dictStart = dictBase + ms -> window .lowLimit ;
1901
1908
const U32 windowLog = ms -> cParams .windowLog ;
1902
- const U32 rowLog = ms -> cParams .searchLog < 5 ? 4 : 5 ;
1909
+ const U32 mls = BOUNDED (4 , ms -> cParams .minMatch , 6 );
1910
+ const U32 rowLog = BOUNDED (4 , ms -> cParams .searchLog , 6 );
1903
1911
1904
- searchMax_f const searchMax = ZSTD_selectLazyVTable (ms , searchMethod , ZSTD_extDict )-> searchMax ;
1905
1912
U32 offset_1 = rep [0 ], offset_2 = rep [1 ];
1906
1913
1907
1914
DEBUGLOG (5 , "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)" , (U32 )searchMethod );
@@ -1943,7 +1950,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1943
1950
1944
1951
/* first search (depth 0) */
1945
1952
{ size_t ofbCandidate = 999999999 ;
1946
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
1953
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , ZSTD_extDict );
1947
1954
if (ml2 > matchLength )
1948
1955
matchLength = ml2 , start = ip , offBase = ofbCandidate ;
1949
1956
}
@@ -1978,7 +1985,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1978
1985
1979
1986
/* search match, depth 1 */
1980
1987
{ size_t ofbCandidate = 999999999 ;
1981
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
1988
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , ZSTD_extDict );
1982
1989
int const gain2 = (int )(ml2 * 4 - ZSTD_highbit32 ((U32 )ofbCandidate )); /* raw approx */
1983
1990
int const gain1 = (int )(matchLength * 4 - ZSTD_highbit32 ((U32 )offBase ) + 4 );
1984
1991
if ((ml2 >= 4 ) && (gain2 > gain1 )) {
@@ -2010,7 +2017,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
2010
2017
2011
2018
/* search match, depth 2 */
2012
2019
{ size_t ofbCandidate = 999999999 ;
2013
- size_t const ml2 = searchMax (ms , ip , iend , & ofbCandidate );
2020
+ size_t const ml2 = ZSTD_searchMax (ms , ip , iend , & ofbCandidate , mls , rowLog , searchMethod , ZSTD_extDict );
2014
2021
int const gain2 = (int )(ml2 * 4 - ZSTD_highbit32 ((U32 )ofbCandidate )); /* raw approx */
2015
2022
int const gain1 = (int )(matchLength * 4 - ZSTD_highbit32 ((U32 )offBase ) + 7 );
2016
2023
if ((ml2 >= 4 ) && (gain2 > gain1 )) {
0 commit comments