Skip to content

Commit

Permalink
Cap hashLog & chainLog to ensure that we only use 32 bits of hash
Browse files Browse the repository at this point in the history
* Cap shortCache chainLog to 24
* Cap row match finder hashLog so that rowLog <= 24
* Add unit tests to expose all cases. The row match finder unit tests
  are only run in 64-bit mode, because they allocate ~1GB.

Fixes #3336
  • Loading branch information
terrelln committed Jan 20, 2023
1 parent bce0382 commit f29bbd1
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 7 deletions.
41 changes: 36 additions & 5 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1412,7 +1412,8 @@ static ZSTD_compressionParameters
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
unsigned long long srcSize,
size_t dictSize,
ZSTD_cParamMode_e mode)
ZSTD_cParamMode_e mode,
ZSTD_paramSwitch_e useRowMatchFinder)
{
const U64 minSrcSize = 513; /* (1<<9) + 1 */
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
Expand Down Expand Up @@ -1465,11 +1466,40 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */

/* We can't use more than 32 bits of hash in total, so that means that we require:
* (hashLog + 8) <= 32 && (chainLog + 8) <= 32
*/
if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
if (cPar.hashLog > maxShortCacheHashLog) {
cPar.hashLog = maxShortCacheHashLog;
}
if (cPar.chainLog > maxShortCacheHashLog) {
cPar.chainLog = maxShortCacheHashLog;
}
}


/* At this point, we aren't 100% sure if we are using the row match finder.
* Unless it is explicitly disabled, conservatively assume that it is enabled.
* In this case it will only be disabled for small sources, so shrinking the
* hash log a little bit shouldn't result in any ratio loss.
*/
if (useRowMatchFinder == ZSTD_ps_auto)
useRowMatchFinder = ZSTD_ps_enable;

/* We can't hash more than 32-bits in total. So that means that we require:
* (hashLog - rowLog + 8) <= 32
*/
if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
/* Switch to 32-entry rows if searchLog is 5 (or more) */
U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
U32 const maxHashLog = maxRowHashLog + rowLog;
assert(cPar.hashLog >= rowLog);
if (cPar.hashLog > maxHashLog) {
cPar.hashLog = maxHashLog;
}
}

return cPar;
Expand All @@ -1482,7 +1512,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
{
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
}

static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
Expand Down Expand Up @@ -1513,7 +1543,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
assert(!ZSTD_checkCParams(cParams));
/* srcSizeHint == 0 means 0 */
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
}

static size_t
Expand Down Expand Up @@ -2185,7 +2215,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
}

params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
cdict->dictContentSize, ZSTD_cpm_attachDict);
cdict->dictContentSize, ZSTD_cpm_attachDict,
params.useRowMatchFinder);
params.cParams.windowLog = windowLog;
params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
Expand Down Expand Up @@ -6740,7 +6771,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
cp.targetLength = (unsigned)(-clampedCompressionLevel);
}
/* refine parameters based on srcSize & dictSize */
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
}
}

Expand Down
1 change: 0 additions & 1 deletion lib/compress/zstd_lazy.c
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,6 @@ size_t ZSTD_HcFindBestMatch(
***********************************/
/* Constants for row-based hash */
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */

Expand Down
4 changes: 3 additions & 1 deletion lib/compress/zstd_lazy.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ extern "C" {
*/
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2

#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */

U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);

Expand Down Expand Up @@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);


#if defined (__cplusplus)
}
Expand Down
74 changes: 74 additions & 0 deletions tests/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -2832,6 +2832,80 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : ZSTD_fast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++);
{
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
ZSTD_customMem customMem = {NULL, NULL, NULL};
ZSTD_DCtx* dctx = ZSTD_createDCtx();
ZSTD_CDict* cdict;
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_fast));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25));
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
ZSTD_freeCDict(cdict);
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : ZSTD_dfast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++);
{
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
ZSTD_customMem customMem = {NULL, NULL, NULL};
ZSTD_DCtx* dctx = ZSTD_createDCtx();
ZSTD_CDict* cdict;
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_dfast));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25));
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
ZSTD_freeCDict(cdict);
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : ZSTD_lazy attach dictionary with hashLog = 29 and searchLog = 4 : ", testNb++);
if (MEM_64bits()) {
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
ZSTD_customMem customMem = {NULL, NULL, NULL};
ZSTD_DCtx* dctx = ZSTD_createDCtx();
ZSTD_CDict* cdict;
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_lazy));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, 0));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_searchLog, 4));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 29));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 29));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 29));
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
ZSTD_freeCDict(cdict);
ZSTD_freeDCtx(dctx);
}
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,
Expand Down
18 changes: 18 additions & 0 deletions tests/zstreamtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -1566,6 +1566,24 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
CHECK(!ZSTD_isError(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, -1)), "Out of range doesn't error");
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : ZSTD_lazy compress with hashLog = 29 and searchLog = 4 : ", testNb++);
if (MEM_64bits()) {
ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 };
ZSTD_inBuffer in = { CNBuffer, CNBufferSize, 0 };
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_strategy, ZSTD_lazy));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_searchLog, 4));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_windowLog, 29));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_hashLog, 29));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_continue));
CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_end));
cSize = out.pos;
CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize));
}
DISPLAYLEVEL(3, "OK \n");

DISPLAYLEVEL(3, "test%3i : Test offset == windowSize : ", testNb++);
{
int windowLog;
Expand Down

0 comments on commit f29bbd1

Please sign in to comment.