From f29bbd112b07bc54ac2e2a1eb593d2252b45f0d9 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 19 Jan 2023 16:27:24 -0800 Subject: [PATCH] Cap hashLog & chainLog to ensure that we only use 32 bits of hash * Cap shortCache chainLog to 24 * Cap row match finder hashLog so that rowLog <= 24 * Add unit tests to expose all cases. The row match finder unit tests are only run in 64-bit mode, because they allocate ~1GB. Fixes #3336 --- lib/compress/zstd_compress.c | 41 +++++++++++++++++--- lib/compress/zstd_lazy.c | 1 - lib/compress/zstd_lazy.h | 4 +- tests/fuzzer.c | 74 ++++++++++++++++++++++++++++++++++++ tests/zstreamtest.c | 18 +++++++++ 5 files changed, 131 insertions(+), 7 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 3a48e7dcd48..e0bcbfb165b 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1412,7 +1412,8 @@ static ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize, - ZSTD_cParamMode_e mode) + ZSTD_cParamMode_e mode, + ZSTD_paramSwitch_e useRowMatchFinder) { const U64 minSrcSize = 513; /* (1<<9) + 1 */ const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); @@ -1465,11 +1466,40 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + /* We can't use more than 32 bits of hash in total, so that means that we require: + * (hashLog + 8) <= 32 && (chainLog + 8) <= 32 + */ if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) { U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS; if (cPar.hashLog > maxShortCacheHashLog) { cPar.hashLog = maxShortCacheHashLog; } + if (cPar.chainLog > maxShortCacheHashLog) { + cPar.chainLog = maxShortCacheHashLog; + } + } + + + /* At this point, we aren't 100% sure if we are using the row match finder. + * Unless it is explicitly disabled, conservatively assume that it is enabled. + * In this case it will only be disabled for small sources, so shrinking the + * hash log a little bit shouldn't result in any ratio loss. + */ + if (useRowMatchFinder == ZSTD_ps_auto) + useRowMatchFinder = ZSTD_ps_enable; + + /* We can't hash more than 32-bits in total. So that means that we require: + * (hashLog - rowLog + 8) <= 32 + */ + if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) { + /* Switch to 32-entry rows if searchLog is 5 (or more) */ + U32 const rowLog = BOUNDED(4, cPar.searchLog, 6); + U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS; + U32 const maxHashLog = maxRowHashLog + rowLog; + assert(cPar.hashLog >= rowLog); + if (cPar.hashLog > maxHashLog) { + cPar.hashLog = maxHashLog; + } } return cPar; @@ -1482,7 +1512,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, { cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto); } static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); @@ -1513,7 +1543,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); assert(!ZSTD_checkCParams(cParams)); /* srcSizeHint == 0 means 0 */ - return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder); } static size_t @@ -2185,7 +2215,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, } params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, - cdict->dictContentSize, ZSTD_cpm_attachDict); + cdict->dictContentSize, ZSTD_cpm_attachDict, + params.useRowMatchFinder); params.cParams.windowLog = windowLog; params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, @@ -6740,7 +6771,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, cp.targetLength = (unsigned)(-clampedCompressionLevel); } /* refine parameters based on srcSize & dictSize */ - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto); } } diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 810bf011cfb..a2473427299 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -759,7 +759,6 @@ size_t ZSTD_HcFindBestMatch( ***********************************/ /* Constants for row-based hash */ #define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ -#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */ diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index c24f1c794d3..3bde67331e4 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -25,6 +25,8 @@ extern "C" { */ #define ZSTD_LAZY_DDSS_BUCKET_LOG 2 +#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ + U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); @@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row( size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); - + #if defined (__cplusplus) } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 4a091c8972b..816ed71660c 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2832,6 +2832,80 @@ static int basicUnitTests(U32 const seed, double compressibility) } DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : ZSTD_fast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++); + { + ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); + ZSTD_customMem customMem = {NULL, NULL, NULL}; + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZSTD_CDict* cdict; + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_fast)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25)); + cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem); + CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); + CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict)); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK_Z(cSize); + CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize)); + ZSTD_freeCDict(cdict); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_dfast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++); + { + ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); + ZSTD_customMem customMem = {NULL, NULL, NULL}; + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZSTD_CDict* cdict; + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_dfast)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25)); + cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem); + CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); + CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict)); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK_Z(cSize); + CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize)); + ZSTD_freeCDict(cdict); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + + DISPLAYLEVEL(3, "test%3i : ZSTD_lazy attach dictionary with hashLog = 29 and searchLog = 4 : ", testNb++); + if (MEM_64bits()) { + ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); + ZSTD_customMem customMem = {NULL, NULL, NULL}; + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + ZSTD_CDict* cdict; + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_lazy)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, 0)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_searchLog, 4)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 29)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 29)); + CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 29)); + cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem); + CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); + CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict)); + cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); + CHECK_Z(cSize); + CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize)); + ZSTD_freeCDict(cdict); + ZSTD_freeDCtx(dctx); + } + DISPLAYLEVEL(3, "OK \n"); + DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++); { U32 u; for (u=0; u