Skip to content

Commit 18b10d7

Browse files
committed
Cap hashLog & chainLog to ensure that we only use 32 bits of hash
* Cap shortCache chainLog to 24 * Cap row match finder hashLog so that rowLog <= 24 * Add unit tests to expose all cases. The row match finder unit tests are only run in 64-bit mode, because they allocate ~1GB. Fixes #3336
1 parent bce0382 commit 18b10d7

File tree

5 files changed

+144
-7
lines changed

5 files changed

+144
-7
lines changed

lib/compress/zstd_compress.c

+36-5
Original file line numberDiff line numberDiff line change
@@ -1412,7 +1412,8 @@ static ZSTD_compressionParameters
14121412
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
14131413
unsigned long long srcSize,
14141414
size_t dictSize,
1415-
ZSTD_cParamMode_e mode)
1415+
ZSTD_cParamMode_e mode,
1416+
ZSTD_paramSwitch_e useRowMatchFinder)
14161417
{
14171418
const U64 minSrcSize = 513; /* (1<<9) + 1 */
14181419
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
@@ -1465,11 +1466,40 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
14651466
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
14661467
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
14671468

1469+
/* We can't use more than 32 bits of hash in total, so that means that we require:
1470+
* (hashLog + 8) <= 32 && (chainLog + 8) <= 32
1471+
*/
14681472
if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
14691473
U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
14701474
if (cPar.hashLog > maxShortCacheHashLog) {
14711475
cPar.hashLog = maxShortCacheHashLog;
14721476
}
1477+
if (cPar.chainLog > maxShortCacheHashLog) {
1478+
cPar.chainLog = maxShortCacheHashLog;
1479+
}
1480+
}
1481+
1482+
1483+
/* At this point, we aren't 100% sure if we are using the row match finder.
1484+
* Unless it is explicitly disabled, conservatively assume that it is enabled.
1485+
* In this case it will only be disabled for small sources, so shrinking the
1486+
* hash log a little bit shouldn't result in any ratio loss.
1487+
*/
1488+
if (useRowMatchFinder == ZSTD_ps_auto)
1489+
useRowMatchFinder = ZSTD_ps_enable;
1490+
1491+
/* We can't hash more than 32-bits in total. So that means that we require:
1492+
* (hashLog - rowLog + 8) <= 32
1493+
*/
1494+
if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
1495+
/* Switch to 32-entry rows if searchLog is 5 (or more) */
1496+
U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
1497+
U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
1498+
U32 const maxHashLog = maxRowHashLog + rowLog;
1499+
assert(cPar.hashLog >= rowLog);
1500+
if (cPar.hashLog > maxHashLog) {
1501+
cPar.hashLog = maxHashLog;
1502+
}
14731503
}
14741504

14751505
return cPar;
@@ -1482,7 +1512,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
14821512
{
14831513
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
14841514
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
1485-
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
1515+
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
14861516
}
14871517

14881518
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
@@ -1513,7 +1543,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
15131543
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
15141544
assert(!ZSTD_checkCParams(cParams));
15151545
/* srcSizeHint == 0 means 0 */
1516-
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
1546+
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
15171547
}
15181548

15191549
static size_t
@@ -2185,7 +2215,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
21852215
}
21862216

21872217
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
2188-
cdict->dictContentSize, ZSTD_cpm_attachDict);
2218+
cdict->dictContentSize, ZSTD_cpm_attachDict,
2219+
params.useRowMatchFinder);
21892220
params.cParams.windowLog = windowLog;
21902221
params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
21912222
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
@@ -6740,7 +6771,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
67406771
cp.targetLength = (unsigned)(-clampedCompressionLevel);
67416772
}
67426773
/* refine parameters based on srcSize & dictSize */
6743-
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
6774+
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
67446775
}
67456776
}
67466777

lib/compress/zstd_lazy.c

-1
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,6 @@ size_t ZSTD_HcFindBestMatch(
759759
***********************************/
760760
/* Constants for row-based hash */
761761
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
762-
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
763762
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
764763
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
765764

lib/compress/zstd_lazy.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ extern "C" {
2525
*/
2626
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
2727

28+
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
29+
2830
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
2931
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
3032

@@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
116118
size_t ZSTD_compressBlock_btlazy2_extDict(
117119
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
118120
void const* src, size_t srcSize);
119-
121+
120122

121123
#if defined (__cplusplus)
122124
}

tests/fuzzer.c

+84
Original file line numberDiff line numberDiff line change
@@ -2832,6 +2832,90 @@ static int basicUnitTests(U32 const seed, double compressibility)
28322832
}
28332833
DISPLAYLEVEL(3, "OK \n");
28342834

2835+
DISPLAYLEVEL(3, "test%3i : ZSTD_fast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++);
2836+
{
2837+
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
2838+
ZSTD_customMem customMem = {NULL, NULL, NULL};
2839+
ZSTD_DCtx* dctx = ZSTD_createDCtx();
2840+
ZSTD_CDict* cdict;
2841+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_fast));
2842+
/* Set windowLog to 25 so hash/chain logs don't get sized down */
2843+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25));
2844+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25));
2845+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25));
2846+
/* Set srcSizeHint to 2^25 so hash/chain logs don't get sized down */
2847+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25));
2848+
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
2849+
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
2850+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
2851+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
2852+
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
2853+
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
2854+
CHECK_Z(cSize);
2855+
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
2856+
ZSTD_freeCDict(cdict);
2857+
ZSTD_freeDCtx(dctx);
2858+
ZSTD_freeCCtxParams(cctxParams);
2859+
}
2860+
DISPLAYLEVEL(3, "OK \n");
2861+
2862+
DISPLAYLEVEL(3, "test%3i : ZSTD_dfast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++);
2863+
{
2864+
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
2865+
ZSTD_customMem customMem = {NULL, NULL, NULL};
2866+
ZSTD_DCtx* dctx = ZSTD_createDCtx();
2867+
ZSTD_CDict* cdict;
2868+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_dfast));
2869+
/* Set windowLog to 25 so hash/chain logs don't get sized down */
2870+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25));
2871+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25));
2872+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25));
2873+
/* Set srcSizeHint to 2^25 so hash/chain logs don't get sized down */
2874+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25));
2875+
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
2876+
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
2877+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
2878+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
2879+
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
2880+
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
2881+
CHECK_Z(cSize);
2882+
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
2883+
ZSTD_freeCDict(cdict);
2884+
ZSTD_freeDCtx(dctx);
2885+
ZSTD_freeCCtxParams(cctxParams);
2886+
}
2887+
DISPLAYLEVEL(3, "OK \n");
2888+
2889+
DISPLAYLEVEL(3, "test%3i : ZSTD_lazy attach dictionary with hashLog = 29 and searchLog = 4 : ", testNb++);
2890+
if (MEM_64bits()) {
2891+
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
2892+
ZSTD_customMem customMem = {NULL, NULL, NULL};
2893+
ZSTD_DCtx* dctx = ZSTD_createDCtx();
2894+
ZSTD_CDict* cdict;
2895+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_lazy));
2896+
/* Force enable row based match finder, and disable dedicated dict search. */
2897+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable));
2898+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, 0));
2899+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_searchLog, 4));
2900+
/* Set windowLog to 29 so hash/chain logs don't get sized down */
2901+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 29));
2902+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 29));
2903+
/* Set srcSizeHint to 2^29 so hash/chain logs don't get sized down */
2904+
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 29));
2905+
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
2906+
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
2907+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
2908+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
2909+
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
2910+
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
2911+
CHECK_Z(cSize);
2912+
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
2913+
ZSTD_freeCDict(cdict);
2914+
ZSTD_freeDCtx(dctx);
2915+
ZSTD_freeCCtxParams(cctxParams);
2916+
}
2917+
DISPLAYLEVEL(3, "OK \n");
2918+
28352919
DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++);
28362920
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
28372921
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,

tests/zstreamtest.c

+21
Original file line numberDiff line numberDiff line change
@@ -1566,6 +1566,27 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
15661566
CHECK(!ZSTD_isError(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, -1)), "Out of range doesn't error");
15671567
DISPLAYLEVEL(3, "OK \n");
15681568

1569+
DISPLAYLEVEL(3, "test%3i : ZSTD_lazy compress with hashLog = 29 and searchLog = 4 : ", testNb++);
1570+
if (MEM_64bits()) {
1571+
ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 };
1572+
ZSTD_inBuffer in = { CNBuffer, CNBufferSize, 0 };
1573+
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
1574+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_strategy, ZSTD_lazy));
1575+
/* Force enable the row based match finder */
1576+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable));
1577+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_searchLog, 4));
1578+
/* Set windowLog to 29 so the hashLog doesn't get sized down */
1579+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_windowLog, 29));
1580+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_hashLog, 29));
1581+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_checksumFlag, 1));
1582+
/* Compress with continue first so the hashLog doesn't get sized down */
1583+
CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_continue));
1584+
CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_end));
1585+
cSize = out.pos;
1586+
CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize));
1587+
}
1588+
DISPLAYLEVEL(3, "OK \n");
1589+
15691590
DISPLAYLEVEL(3, "test%3i : Test offset == windowSize : ", testNb++);
15701591
{
15711592
int windowLog;

0 commit comments

Comments
 (0)