From a08fabd51a4191b9b1a2d580454ae6b7b805ee35 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Mon, 17 Oct 2022 10:24:29 -0700 Subject: [PATCH 1/8] Rough draft speed optimization --- lib/common/huf.h | 2 +- lib/compress/huf_compress.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/lib/common/huf.h b/lib/common/huf.h index d79acd1d53a..0db6fb36927 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -173,7 +173,7 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* **************************************** * HUF detailed API * ****************************************/ -#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra +#define HUF_OPTIMAL_DEPTH_THRESHOLD 3 typedef enum { HUF_depth_fast, /** Use heuristic to find the table depth**/ HUF_depth_optimal /** Test possible table depths to find the one that produces the smallest header + encoded size**/ diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 54b58f1919c..d8bbaa6b4ae 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1258,7 +1258,8 @@ unsigned HUF_minTableLog(size_t srcSize, unsigned symbolCardinality) unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode) { - unsigned optLog = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); + unsigned optLogGuess = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); + unsigned optLog = optLogGuess; if (depthMode == HUF_depth_optimal) { /** Test valid depths and return optimal **/ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); @@ -1267,10 +1268,11 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS unsigned huffLog; size_t maxBits, hSize, newSize; const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); + const unsigned minTableLog = HUF_minTableLog(srcSize, symbolCardinality); if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; - for (huffLog = HUF_minTableLog(srcSize, symbolCardinality); huffLog <= maxTableLog; huffLog++) { + for (huffLog = optLogGuess; huffLog >= minTableLog; huffLog--) { maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, huffLog, workSpace, wkspSize); @@ -1282,7 +1284,29 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; - if (newSize < optSize) { + if (newSize > optSize) { + break; + } else { + optSize = newSize; + optLog = huffLog; + } + } + + for (huffLog = optLogGuess + 1; huffLog <= maxTableLog; huffLog++) { + maxBits = HUF_buildCTable_wksp(table, count, + maxSymbolValue, huffLog, + workSpace, wkspSize); + if (ERR_isError(maxBits)) continue; + + hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, + workSpace, wkspSize); + if (ERR_isError(hSize)) continue; + + newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; + + if (newSize > optSize) { + break; + } else { optSize = newSize; optLog = huffLog; } From 401331909e85a5590d03786130187545fe1e12df Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Mon, 24 Oct 2022 12:35:16 -0700 Subject: [PATCH 2/8] Commit for benchmarking --- lib/compress/huf_compress.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index b6905d3a6e6..4c3af94b75e 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1266,10 +1266,11 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS unsigned huffLog; size_t maxBits, hSize, newSize; const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); - const unsigned minTableLog = HUF_minTableLog(srcSize, symbolCardinality); + const unsigned minTableLog = HUF_minTableLog(symbolCardinality); if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; + /* Search left of guess until size increases */ for (huffLog = optLogGuess; huffLog >= minTableLog; huffLog--) { maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, huffLog, @@ -1290,6 +1291,7 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS } } + /* Search right of estimate until size increases */ for (huffLog = optLogGuess + 1; huffLog <= maxTableLog; huffLog++) { maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, huffLog, From db74d043d6de8268d7c23c8781c26ecef60a86b7 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Thu, 27 Oct 2022 10:20:44 -0700 Subject: [PATCH 3/8] Speed optimizations with macro --- contrib/freestanding_lib/freestanding.py | 27 ++- contrib/linux-kernel/Makefile | 7 +- contrib/linux-kernel/linux.mk | 20 +- contrib/linux-kernel/test/Makefile | 1 + .../linux-kernel/test/include/linux/module.h | 2 + contrib/linux-kernel/zstd_common_module.c | 32 +++ lib/common/compiler.h | 6 + lib/common/huf.h | 2 +- lib/common/pool.c | 8 +- lib/compress/huf_compress.c | 59 ++--- lib/compress/zstd_lazy.c | 229 +++++++++--------- lib/decompress/zstd_ddict.c | 2 +- programs/zstd.1 | 2 +- programs/zstd.1.md | 2 +- tests/fuzz/zstd_helpers.c | 5 + 15 files changed, 237 insertions(+), 167 deletions(-) create mode 100644 contrib/linux-kernel/zstd_common_module.c diff --git a/contrib/freestanding_lib/freestanding.py b/contrib/freestanding_lib/freestanding.py index 4a02dea147b..4e0a944f144 100755 --- a/contrib/freestanding_lib/freestanding.py +++ b/contrib/freestanding_lib/freestanding.py @@ -431,7 +431,7 @@ def __init__( external_xxhash: bool, xxh64_state: Optional[str], xxh64_prefix: Optional[str], rewritten_includes: [(str, str)], defs: [(str, Optional[str])], replaces: [(str, str)], - undefs: [str], excludes: [str], seds: [str], + undefs: [str], excludes: [str], seds: [str], spdx: bool, ): self._zstd_deps = zstd_deps self._mem = mem @@ -446,6 +446,7 @@ def __init__( self._undefs = undefs self._excludes = excludes self._seds = seds + self._spdx = spdx def _dst_lib_file_paths(self): """ @@ -640,6 +641,27 @@ def _process_seds(self): for sed in self._seds: self._process_sed(sed) + def _process_spdx(self): + if not self._spdx: + return + self._log("Processing spdx") + SPDX_C = "// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause\n" + SPDX_H_S = "/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */\n" + for filepath in self._dst_lib_file_paths(): + file = FileLines(filepath) + if file.lines[0] == SPDX_C or file.lines[0] == SPDX_H_S: + continue + for line in file.lines: + if "SPDX-License-Identifier" in line: + raise RuntimeError(f"Unexpected SPDX license identifier: {file.filename} {repr(line)}") + if file.filename.endswith(".c"): + file.lines.insert(0, SPDX_C) + elif file.filename.endswith(".h") or file.filename.endswith(".S"): + file.lines.insert(0, SPDX_H_S) + else: + raise RuntimeError(f"Unexpected file extension: {file.filename}") + file.write() + def go(self): @@ -651,6 +673,7 @@ def go(self): self._rewrite_includes() self._replace_xxh64_prefix() self._process_seds() + self._process_spdx() def parse_optional_pair(defines: [str]) -> [(str, Optional[str])]: @@ -689,6 +712,7 @@ def main(name, args): parser.add_argument("--xxh64-prefix", default=None, help="Alternate XXH64 function prefix (excluding _) e.g. --xxh64-prefix=xxh64") parser.add_argument("--rewrite-include", default=[], dest="rewritten_includes", action="append", help="Rewrite an include REGEX=NEW (e.g. '=')") parser.add_argument("--sed", default=[], dest="seds", action="append", help="Apply a sed replacement. Format: `s/REGEX/FORMAT/[g]`. REGEX is a Python regex. FORMAT is a Python format string formatted by the regex dict.") + parser.add_argument("--spdx", action="store_true", help="Add SPDX License Identifiers") parser.add_argument("-D", "--define", default=[], dest="defs", action="append", help="Pre-define this macro (can be passed multiple times)") parser.add_argument("-U", "--undefine", default=[], dest="undefs", action="append", help="Pre-undefine this macro (can be passed multiple times)") parser.add_argument("-R", "--replace", default=[], dest="replaces", action="append", help="Pre-define this macro and replace the first ifndef block with its definition") @@ -743,6 +767,7 @@ def main(name, args): args.undefs, args.excludes, args.seds, + args.spdx, ).go() if __name__ == "__main__": diff --git a/contrib/linux-kernel/Makefile b/contrib/linux-kernel/Makefile index 47a4317404b..baa1f24c6a7 100644 --- a/contrib/linux-kernel/Makefile +++ b/contrib/linux-kernel/Makefile @@ -26,6 +26,7 @@ libzstd: --rewrite-include '"(\.\./)?zstd_errors.h"=' \ --sed 's,/\*\*\*,/* *,g' \ --sed 's,/\*\*,/*,g' \ + --spdx \ -DZSTD_NO_INTRINSICS \ -DZSTD_NO_UNUSED_FUNCTIONS \ -DZSTD_LEGACY_SUPPORT=0 \ @@ -55,10 +56,13 @@ libzstd: -DZSTD_HAVE_WEAK_SYMBOLS=0 \ -DZSTD_TRACE=0 \ -DZSTD_NO_TRACE \ + -DZSTD_DISABLE_ASM \ -DZSTD_LINUX_KERNEL + rm linux/lib/zstd/decompress/huf_decompress_amd64.S mv linux/lib/zstd/zstd.h linux/include/linux/zstd_lib.h mv linux/lib/zstd/zstd_errors.h linux/include/linux/ cp linux_zstd.h linux/include/linux/zstd.h + cp zstd_common_module.c linux/lib/zstd cp zstd_compress_module.c linux/lib/zstd cp zstd_decompress_module.c linux/lib/zstd cp decompress_sources.h linux/lib/zstd @@ -102,4 +106,5 @@ test: libzstd .PHONY: clean clean: - $(RM) -rf linux test/test test/static_test + $(RM) -rf linux + $(MAKE) -C test clean diff --git a/contrib/linux-kernel/linux.mk b/contrib/linux-kernel/linux.mk index f6f3a8983d8..20f08c644b7 100644 --- a/contrib/linux-kernel/linux.mk +++ b/contrib/linux-kernel/linux.mk @@ -10,16 +10,10 @@ # ################################################################ obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o - -ccflags-y += -Wno-error=deprecated-declarations +obj-$(CONFIG_ZSTD_COMMON) += zstd_common.o zstd_compress-y := \ zstd_compress_module.o \ - common/debug.o \ - common/entropy_common.o \ - common/error_private.o \ - common/fse_decompress.o \ - common/zstd_common.o \ compress/fse_compress.o \ compress/hist.o \ compress/huf_compress.o \ @@ -35,13 +29,15 @@ zstd_compress-y := \ zstd_decompress-y := \ zstd_decompress_module.o \ + decompress/huf_decompress.o \ + decompress/zstd_ddict.o \ + decompress/zstd_decompress.o \ + decompress/zstd_decompress_block.o \ + +zstd_common-y := \ + zstd_common_module.o \ common/debug.o \ common/entropy_common.o \ common/error_private.o \ common/fse_decompress.o \ common/zstd_common.o \ - decompress/huf_decompress.o \ - decompress/huf_decompress_amd64.o \ - decompress/zstd_ddict.o \ - decompress/zstd_decompress.o \ - decompress/zstd_decompress_block.o \ diff --git a/contrib/linux-kernel/test/Makefile b/contrib/linux-kernel/test/Makefile index be82b3fbac8..53b0c2a6596 100644 --- a/contrib/linux-kernel/test/Makefile +++ b/contrib/linux-kernel/test/Makefile @@ -45,4 +45,5 @@ clean: $(RM) -f $(LINUX_ZSTDLIB)/*.o $(RM) -f $(LINUX_ZSTDLIB)/**/*.o $(RM) -f *.o *.a + $(RM) -f static_test $(RM) -f test diff --git a/contrib/linux-kernel/test/include/linux/module.h b/contrib/linux-kernel/test/include/linux/module.h index be6d20daea2..63a28d57b26 100644 --- a/contrib/linux-kernel/test/include/linux/module.h +++ b/contrib/linux-kernel/test/include/linux/module.h @@ -12,6 +12,8 @@ #define EXPORT_SYMBOL(symbol) \ void* __##symbol = symbol +#define EXPORT_SYMBOL_GPL(symbol) \ + void* __##symbol = symbol #define MODULE_LICENSE(license) #define MODULE_DESCRIPTION(description) diff --git a/contrib/linux-kernel/zstd_common_module.c b/contrib/linux-kernel/zstd_common_module.c new file mode 100644 index 00000000000..22686e367e6 --- /dev/null +++ b/contrib/linux-kernel/zstd_common_module.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include + +#include "common/huf.h" +#include "common/fse.h" +#include "common/zstd_internal.h" + +// Export symbols shared by compress and decompress into a common module + +#undef ZSTD_isError /* defined within zstd_internal.h */ +EXPORT_SYMBOL_GPL(FSE_readNCount); +EXPORT_SYMBOL_GPL(HUF_readStats); +EXPORT_SYMBOL_GPL(HUF_readStats_wksp); +EXPORT_SYMBOL_GPL(ZSTD_isError); +EXPORT_SYMBOL_GPL(ZSTD_getErrorName); +EXPORT_SYMBOL_GPL(ZSTD_getErrorCode); +EXPORT_SYMBOL_GPL(ZSTD_customMalloc); +EXPORT_SYMBOL_GPL(ZSTD_customCalloc); +EXPORT_SYMBOL_GPL(ZSTD_customFree); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Common"); diff --git a/lib/common/compiler.h b/lib/common/compiler.h index 6c7100e835a..42f289e0b4c 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -165,6 +165,12 @@ #define UNLIKELY(x) (x) #endif +#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) +# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); } +#else +# define ZSTD_UNREACHABLE { assert(0); } +#endif + /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ # include /* For Visual 2005 */ diff --git a/lib/common/huf.h b/lib/common/huf.h index dee99da772a..595b2f6db5d 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -173,7 +173,7 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* **************************************** * HUF detailed API * ****************************************/ -#define HUF_OPTIMAL_DEPTH_THRESHOLD 3 +#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra typedef enum { HUF_depth_fast, /** Use heuristic to find the table depth**/ HUF_depth_optimal /** Test possible table depths to find the one that produces the smallest header + encoded size**/ diff --git a/lib/common/pool.c b/lib/common/pool.c index 5c1d07d356e..bf21c57ed66 100644 --- a/lib/common/pool.c +++ b/lib/common/pool.c @@ -12,7 +12,7 @@ /* ====== Dependencies ======= */ #include "zstd_deps.h" /* size_t */ #include "debug.h" /* assert */ -#include "zstd_internal.h" /* ZSTD_customMalloc, ZSTD_customFree */ +#include "zstd_internal.h" /* ZSTD_customCalloc, ZSTD_customFree */ #include "pool.h" /* ====== Compiler specifics ====== */ @@ -126,7 +126,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, * empty and full queues. */ ctx->queueSize = queueSize + 1; - ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queue = (POOL_job*)ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem); ctx->queueHead = 0; ctx->queueTail = 0; ctx->numThreadsBusy = 0; @@ -140,7 +140,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, } ctx->shutdown = 0; /* Allocate space for the thread handles */ - ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threads = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); ctx->threadCapacity = 0; ctx->customMem = customMem; /* Check for errors */ @@ -220,7 +220,7 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) return 0; } /* numThreads > threadCapacity */ - { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); if (!threadPool) return 1; /* replace existing thread pool */ ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 4c3af94b75e..a6a5a257c4c 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1253,63 +1253,54 @@ unsigned HUF_minTableLog(unsigned symbolCardinality) return minBitsSymbols; } +#define ESTIMATE_TOTAL_SIZE(huffLog) {\ + maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, huffLog, workSpace, wkspSize);\ + if (ERR_isError(maxBits)) continue;\ + hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);\ + if (ERR_isError(hSize)) continue;\ + newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;\ + }\ + unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode) { - unsigned optLogGuess = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); - unsigned optLog = optLogGuess; + unsigned optLog = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); assert(srcSize > 1); /* Not supported, RLE should be used instead */ if (depthMode == HUF_depth_optimal) { /** Test valid depths and return optimal **/ BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); - size_t optSize = ((size_t) ~0); - unsigned huffLog; size_t maxBits, hSize, newSize; const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); const unsigned minTableLog = HUF_minTableLog(symbolCardinality); + size_t optSize = ((size_t) ~0); + unsigned optLogGuess = optLog; - if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; - - /* Search left of guess until size increases */ - for (huffLog = optLogGuess; huffLog >= minTableLog; huffLog--) { - maxBits = HUF_buildCTable_wksp(table, count, - maxSymbolValue, huffLog, - workSpace, wkspSize); - if (ERR_isError(maxBits)) continue; - - hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, - workSpace, wkspSize); - if (ERR_isError(hSize)) continue; + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; /** Assert workspace is large enough **/ - newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; + /* Search below estimate log until size increases */ + for (; optLogGuess >= minTableLog; optLogGuess--) { + ESTIMATE_TOTAL_SIZE(optLogGuess); if (newSize > optSize) { break; - } else { - optSize = newSize; - optLog = huffLog; } + optSize = newSize; + optLog = optLogGuess; } - /* Search right of estimate until size increases */ - for (huffLog = optLogGuess + 1; huffLog <= maxTableLog; huffLog++) { - maxBits = HUF_buildCTable_wksp(table, count, - maxSymbolValue, huffLog, - workSpace, wkspSize); - if (ERR_isError(maxBits)) continue; - - hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, - workSpace, wkspSize); - if (ERR_isError(hSize)) continue; + if (optSize < ((size_t) ~0)) { + return optLog; + } - newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; + /* Search above estimate log until size increases */ + for (; optLogGuess <= maxTableLog; optLogGuess++) { + ESTIMATE_TOTAL_SIZE(optLogGuess); if (newSize > optSize) { break; - } else { - optSize = newSize; - optLog = huffLog; } + optSize = newSize; + optLog = optLogGuess; } } assert(optLog <= HUF_TABLELOG_MAX); diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 3e2ee1dda01..2ee9cf63025 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1317,14 +1317,10 @@ size_t ZSTD_RowFindBestMatch( } -typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - /** - * This struct contains the functions necessary for lazy to search. - * Currently, that is only searchMax. However, it is still valuable to have the - * VTable because this makes it easier to add more functions to the VTable later. + * Generate search functions templated on (dictMode, mls, rowLog). + * These functions are outlined for code size & compilation time. + * ZSTD_searchMax() dispatches to the correct implementation function. * * TODO: The start of the search function involves loading and calculating a * bunch of constants from the ZSTD_matchState_t. These computations could be @@ -1342,25 +1338,25 @@ typedef size_t (*searchMax_f)( * the single segment loop. It should go in searchMax instead of its own * function to avoid having multiple virtual function calls per search. */ -typedef struct { - searchMax_f searchMax; -} ZSTD_LazyVTable; -#define GEN_ZSTD_BT_VTABLE(dictMode, mls) \ - static size_t ZSTD_BtFindBestMatch_##dictMode##_##mls( \ - ZSTD_matchState_t* ms, \ - const BYTE* ip, const BYTE* const iLimit, \ - size_t* offBasePtr) \ - { \ - assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ - return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode);\ - } \ - static const ZSTD_LazyVTable ZSTD_BtVTable_##dictMode##_##mls = { \ - ZSTD_BtFindBestMatch_##dictMode##_##mls \ - }; +#define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls +#define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls +#define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog + +#define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE + +#define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offBasePtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \ + } \ -#define GEN_ZSTD_HC_VTABLE(dictMode, mls) \ - static size_t ZSTD_HcFindBestMatch_##dictMode##_##mls( \ +#define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \ ZSTD_matchState_t* ms, \ const BYTE* ip, const BYTE* const iLimit, \ size_t* offsetPtr) \ @@ -1368,12 +1364,9 @@ typedef struct { assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \ } \ - static const ZSTD_LazyVTable ZSTD_HcVTable_##dictMode##_##mls = { \ - ZSTD_HcFindBestMatch_##dictMode##_##mls \ - }; -#define GEN_ZSTD_ROW_VTABLE(dictMode, mls, rowLog) \ - static size_t ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog( \ +#define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \ ZSTD_matchState_t* ms, \ const BYTE* ip, const BYTE* const iLimit, \ size_t* offsetPtr) \ @@ -1382,9 +1375,6 @@ typedef struct { assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \ return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \ } \ - static const ZSTD_LazyVTable ZSTD_RowVTable_##dictMode##_##mls##_##rowLog = { \ - ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog \ - }; #define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \ X(dictMode, mls, 4) \ @@ -1407,84 +1397,103 @@ typedef struct { X(__VA_ARGS__, dictMatchState) \ X(__VA_ARGS__, dedicatedDictSearch) -/* Generate Row VTables for each combination of (dictMode, mls, rowLog) */ -ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_VTABLE) -/* Generate Binary Tree VTables for each combination of (dictMode, mls) */ -ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_VTABLE) -/* Generate Hash Chain VTables for each combination of (dictMode, mls) */ -ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_VTABLE) - -#define GEN_ZSTD_BT_VTABLE_ARRAY(dictMode) \ - { \ - &ZSTD_BtVTable_##dictMode##_4, \ - &ZSTD_BtVTable_##dictMode##_5, \ - &ZSTD_BtVTable_##dictMode##_6 \ - } - -#define GEN_ZSTD_HC_VTABLE_ARRAY(dictMode) \ - { \ - &ZSTD_HcVTable_##dictMode##_4, \ - &ZSTD_HcVTable_##dictMode##_5, \ - &ZSTD_HcVTable_##dictMode##_6 \ - } - -#define GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, mls) \ - { \ - &ZSTD_RowVTable_##dictMode##_##mls##_4, \ - &ZSTD_RowVTable_##dictMode##_##mls##_5, \ - &ZSTD_RowVTable_##dictMode##_##mls##_6 \ - } +/* Generate row search fns for each combination of (dictMode, mls, rowLog) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN) +/* Generate binary Tree search fns for each combination of (dictMode, mls) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN) +/* Generate hash chain search fns for each combination of (dictMode, mls) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN) -#define GEN_ZSTD_ROW_VTABLE_ARRAY(dictMode) \ - { \ - GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 4), \ - GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 5), \ - GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 6) \ - } +typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e; -#define GEN_ZSTD_VTABLE_ARRAY(X) \ - { \ - X(noDict), \ - X(extDict), \ - X(dictMatchState), \ - X(dedicatedDictSearch) \ +#define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \ + case mls: \ + return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr); +#define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \ + case mls: \ + return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr); +#define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \ + case rowLog: \ + return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr); + +#define ZSTD_SWITCH_MLS(X, dictMode) \ + switch (mls) { \ + ZSTD_FOR_EACH_MLS(X, dictMode) \ } -/* ******************************* -* Common parser - lazy strategy -*********************************/ -typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e; +#define ZSTD_SWITCH_ROWLOG(dictMode, mls) \ + case mls: \ + switch (rowLog) { \ + ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \ + } \ + ZSTD_UNREACHABLE; \ + break; + +#define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \ + switch (searchMethod) { \ + case search_hashChain: \ + ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \ + break; \ + case search_binaryTree: \ + ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \ + break; \ + case search_rowHash: \ + ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \ + break; \ + } \ + ZSTD_UNREACHABLE; /** - * This table is indexed first by the four ZSTD_dictMode_e values, and then - * by the two searchMethod_e values. NULLs are placed for configurations - * that should never occur (extDict modes go to the other implementation - * below and there is no DDSS for binary tree search yet). + * Searches for the longest match at @p ip. + * Dispatches to the correct implementation function based on the + * (searchMethod, dictMode, mls, rowLog). We use switch statements + * here instead of using an indirect function call through a function + * pointer because after Spectre and Meltdown mitigations, indirect + * function calls can be very costly, especially in the kernel. + * + * NOTE: dictMode and searchMethod should be templated, so those switch + * statements should be optimized out. Only the mls & rowLog switches + * should be left. + * + * @param ms The match state. + * @param ip The position to search at. + * @param iend The end of the input data. + * @param[out] offsetPtr Stores the match offset into this pointer. + * @param mls The minimum search length, in the range [4, 6]. + * @param rowLog The row log (if applicable), in the range [4, 6]. + * @param searchMethod The search method to use (templated). + * @param dictMode The dictMode (templated). + * + * @returns The length of the longest match found, or < mls if no match is found. + * If a match is found its offset is stored in @p offsetPtr. */ - -static ZSTD_LazyVTable const* -ZSTD_selectLazyVTable(ZSTD_matchState_t const* ms, searchMethod_e searchMethod, ZSTD_dictMode_e dictMode) +FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax( + ZSTD_matchState_t* ms, + const BYTE* ip, + const BYTE* iend, + size_t* offsetPtr, + U32 const mls, + U32 const rowLog, + searchMethod_e const searchMethod, + ZSTD_dictMode_e const dictMode) { - /* Fill the Hc/Bt VTable arrays with the right functions for the (dictMode, mls) combination. */ - ZSTD_LazyVTable const* const hcVTables[4][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_HC_VTABLE_ARRAY); - ZSTD_LazyVTable const* const btVTables[4][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_BT_VTABLE_ARRAY); - /* Fill the Row VTable array with the right functions for the (dictMode, mls, rowLog) combination. */ - ZSTD_LazyVTable const* const rowVTables[4][3][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_ROW_VTABLE_ARRAY); - - U32 const mls = MAX(4, MIN(6, ms->cParams.minMatch)); - U32 const rowLog = MAX(4, MIN(6, ms->cParams.searchLog)); - switch (searchMethod) { - case search_hashChain: - return hcVTables[dictMode][mls - 4]; - case search_binaryTree: - return btVTables[dictMode][mls - 4]; - case search_rowHash: - return rowVTables[dictMode][mls - 4][rowLog - 4]; - default: - return NULL; + if (dictMode == ZSTD_noDict) { + ZSTD_SWITCH_SEARCH_METHOD(noDict) + } else if (dictMode == ZSTD_extDict) { + ZSTD_SWITCH_SEARCH_METHOD(extDict) + } else if (dictMode == ZSTD_dictMatchState) { + ZSTD_SWITCH_SEARCH_METHOD(dictMatchState) + } else if (dictMode == ZSTD_dedicatedDictSearch) { + ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch) } + ZSTD_UNREACHABLE; + return 0; } +/* ******************************* +* Common parser - lazy strategy +*********************************/ + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, @@ -1501,8 +1510,9 @@ ZSTD_compressBlock_lazy_generic( const BYTE* const base = ms->window.base; const U32 prefixLowestIndex = ms->window.dictLimit; const BYTE* const prefixLowest = base + prefixLowestIndex; + const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); - searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, dictMode)->searchMax; U32 offset_1 = rep[0], offset_2 = rep[1]; U32 offsetSaved1 = 0, offsetSaved2 = 0; @@ -1519,8 +1529,6 @@ ZSTD_compressBlock_lazy_generic( 0; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); - assert(searchMax != NULL); - DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { @@ -1538,7 +1546,6 @@ ZSTD_compressBlock_lazy_generic( } if (searchMethod == search_rowHash) { - const U32 rowLog = MAX(4, MIN(6, ms->cParams.searchLog)); ZSTD_row_fillHashCache(ms, base, rowLog, MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), ms->nextToUpdate, ilimit); @@ -1579,7 +1586,7 @@ ZSTD_compressBlock_lazy_generic( /* first search (depth 0) */ { size_t offbaseFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offbaseFound); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode); if (ml2 > matchLength) matchLength = ml2, start = ip, offBase = offbaseFound; } @@ -1618,7 +1625,7 @@ ZSTD_compressBlock_lazy_generic( } } { size_t ofbCandidate=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &ofbCandidate); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { @@ -1654,7 +1661,7 @@ ZSTD_compressBlock_lazy_generic( } } { size_t ofbCandidate=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &ofbCandidate); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); if ((ml2 >= 4) && (gain2 > gain1)) { @@ -1899,9 +1906,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictStart = dictBase + ms->window.lowLimit; const U32 windowLog = ms->cParams.windowLog; - const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; + const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); - searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, ZSTD_extDict)->searchMax; U32 offset_1 = rep[0], offset_2 = rep[1]; DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); @@ -1943,7 +1950,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* first search (depth 0) */ { size_t ofbCandidate = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &ofbCandidate); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); if (ml2 > matchLength) matchLength = ml2, start = ip, offBase = ofbCandidate; } @@ -1978,7 +1985,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* search match, depth 1 */ { size_t ofbCandidate = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &ofbCandidate); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); if ((ml2 >= 4) && (gain2 > gain1)) { @@ -2010,7 +2017,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( /* search match, depth 2 */ { size_t ofbCandidate = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &ofbCandidate); + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); if ((ml2 >= 4) && (gain2 > gain1)) { diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c index 889764a5e87..6ffa35f6eb9 100644 --- a/lib/decompress/zstd_ddict.c +++ b/lib/decompress/zstd_ddict.c @@ -240,5 +240,5 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) { if (ddict==NULL) return 0; - return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); + return ddict->dictID; } diff --git a/programs/zstd.1 b/programs/zstd.1 index a2bf7fd2f50..5cf3c6bb1c3 100644 --- a/programs/zstd.1 +++ b/programs/zstd.1 @@ -144,7 +144,7 @@ Note: cannot use both this and \-D together Note: \fB\-\-long\fR mode will be au \fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\. . .IP "\(bu" 4 -\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, Zstandard uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i\.e\. you can increase or decrease it)\. +\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, \fBzstd\fR uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i\.e\. you can increase or decrease it)\. . .IP This is also used during compression when using with \-\-patch\-from=\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MB)\. diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 3ab8404ee65..37c2ba1873e 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -183,7 +183,7 @@ the last one takes effect. The decoder will have to rely on implicit knowledge about which dictionary to use, it won't be able to check if it's correct. * `-M#`, `--memory=#`: - Set a memory usage limit. By default, Zstandard uses 128 MB for decompression + Set a memory usage limit. By default, `zstd` uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i.e. you can increase or decrease it). diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 4f8727df96a..b4a6509db18 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -89,8 +89,13 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer setRand(cctx, ZSTD_c_ldmHashRateLog, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX, producer); /* Set misc parameters */ +#ifndef ZSTD_MULTITHREAD + setRand(cctx, ZSTD_c_nbWorkers, 0, 0, producer); + setRand(cctx, ZSTD_c_rsyncable, 0, 0, producer); +#else setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); +#endif setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer); setRand(cctx, ZSTD_c_enableDedicatedDictSearch, 0, 1, producer); setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); From c2638212af253e8d9c9161743812b5e39056e0f5 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Thu, 27 Oct 2022 13:13:17 -0700 Subject: [PATCH 4/8] Change threshold for benchmarking --- lib/common/huf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/common/huf.h b/lib/common/huf.h index 595b2f6db5d..dee99da772a 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -173,7 +173,7 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* **************************************** * HUF detailed API * ****************************************/ -#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra +#define HUF_OPTIMAL_DEPTH_THRESHOLD 3 typedef enum { HUF_depth_fast, /** Use heuristic to find the table depth**/ HUF_depth_optimal /** Test possible table depths to find the one that produces the smallest header + encoded size**/ From 482689b995bd6afb4a7b335bb80b9e60f501e3c2 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Tue, 20 Dec 2022 12:27:38 -0800 Subject: [PATCH 5/8] huf log speed optimization: unidirectional scan of logs + break when regressing --- .github/dependabot.yml | 6 ++++ .github/workflows/dev-long-tests.yml | 4 +++ .github/workflows/dev-short-tests.yml | 14 +++++--- .../workflows/publish-release-artifacts.yml | 2 +- .github/workflows/scorecards.yml | 2 +- build/meson/meson.build | 14 ++------ contrib/linux-kernel/test/test.c | 6 +++- lib/common/huf.h | 2 +- lib/compress/huf_compress.c | 32 +++++-------------- lib/compress/zstd_compress.c | 15 +++++---- lib/compress/zstd_lazy.c | 3 +- programs/fileio.c | 16 ++++++++-- programs/util.c | 9 ++++-- programs/zstdcli.c | 6 ++-- tests/cli-tests/compression/window-resize.sh | 9 ++++++ .../window-resize.sh.stderr.ignore | 0 .../compression/window-resize.sh.stdout.glob | 3 ++ tests/fuzz/zstd_helpers.c | 2 +- tests/fuzzer.c | 22 ++++++------- tests/regression/config.c | 4 +-- tests/zstreamtest.c | 2 +- 21 files changed, 96 insertions(+), 77 deletions(-) create mode 100644 .github/dependabot.yml create mode 100755 tests/cli-tests/compression/window-resize.sh create mode 100644 tests/cli-tests/compression/window-resize.sh.stderr.ignore create mode 100644 tests/cli-tests/compression/window-resize.sh.stdout.glob diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000000..5ace4600a1f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/dev-long-tests.yml b/.github/workflows/dev-long-tests.yml index aadc0ab5499..ac34c7165e5 100644 --- a/.github/workflows/dev-long-tests.yml +++ b/.github/workflows/dev-long-tests.yml @@ -71,7 +71,9 @@ jobs: steps: - uses: actions/checkout@v3 - name: gcc-8 + ASan + UBSan + Test Zstd + # See https://askubuntu.com/a/1428822 run: | + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu focal main universe" | sudo tee -a /etc/apt/sources.list sudo apt-get -qqq update make gcc8install CC=gcc-8 make -j uasan-test-zstd @@ -271,7 +275,7 @@ jobs: qemu-consistency: name: QEMU ${{ matrix.name }} - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false # 'false' means Don't stop matrix workflows even if some matrix failed. matrix: @@ -376,7 +380,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Add MSBuild to PATH - uses: microsoft/setup-msbuild@v1.0.2 + uses: microsoft/setup-msbuild@v1.1.3 - name: Build and run tests working-directory: ${{env.GITHUB_WORKSPACE}} env: diff --git a/.github/workflows/publish-release-artifacts.yml b/.github/workflows/publish-release-artifacts.yml index b5a3ac689bf..4a70fb8c63e 100644 --- a/.github/workflows/publish-release-artifacts.yml +++ b/.github/workflows/publish-release-artifacts.yml @@ -68,7 +68,7 @@ jobs: fi - name: Publish - uses: skx/github-action-publish-binaries@release-1.3 + uses: skx/github-action-publish-binaries@release-2.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 2eae4749437..1c19b48b2cc 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -32,7 +32,7 @@ jobs: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@865b4092859256271290c77adbd10a43f4779972 # tag=v2.0.3 + uses: ossf/scorecard-action@99c53751e09b9529366343771cc321ec74e9bd3d # tag=v2.0.6 with: results_file: results.sarif results_format: sarif diff --git a/build/meson/meson.build b/build/meson/meson.build index f264760a34c..98d556aa3bf 100644 --- a/build/meson/meson.build +++ b/build/meson/meson.build @@ -23,7 +23,9 @@ project('zstd', # so this isn't safe #'werror=true' ], - version: 'DUMMY', + version: run_command( + find_program('GetZstdLibraryVersion.py'), '../../lib/zstd.h', + check: true).stdout().strip(), meson_version: '>=0.48.0') cc = meson.get_compiler('c') @@ -45,16 +47,6 @@ compiler_msvc = 'msvc' zstd_version = meson.project_version() -zstd_h_file = join_paths(meson.current_source_dir(), '../../lib/zstd.h') -GetZstdLibraryVersion_py = find_program('GetZstdLibraryVersion.py', native : true) -r = run_command(GetZstdLibraryVersion_py, zstd_h_file) -if r.returncode() == 0 - zstd_version = r.stdout().strip() - message('Project version is now: @0@'.format(zstd_version)) -else - error('Cannot find project version in @0@'.format(zstd_h_file)) -endif - zstd_libversion = zstd_version # ============================================================================= diff --git a/contrib/linux-kernel/test/test.c b/contrib/linux-kernel/test/test.c index 6cd1730bb3a..67d248e0cbf 100644 --- a/contrib/linux-kernel/test/test.c +++ b/contrib/linux-kernel/test/test.c @@ -186,11 +186,14 @@ static void __attribute__((noinline)) use(void *x) { asm volatile("" : "+r"(x)); } +static void __attribute__((noinline)) fill_stack(void) { + memset(g_stack, 0x33, 8192); +} + static void __attribute__((noinline)) set_stack(void) { char stack[8192]; g_stack = stack; - memset(g_stack, 0x33, 8192); use(g_stack); } @@ -208,6 +211,7 @@ static void __attribute__((noinline)) check_stack(void) { static void test_stack_usage(test_data_t const *data) { set_stack(); + fill_stack(); test_f2fs(); test_btrfs(data); test_decompress_unzstd(data); diff --git a/lib/common/huf.h b/lib/common/huf.h index dee99da772a..595b2f6db5d 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -173,7 +173,7 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* **************************************** * HUF detailed API * ****************************************/ -#define HUF_OPTIMAL_DEPTH_THRESHOLD 3 +#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra typedef enum { HUF_depth_fast, /** Use heuristic to find the table depth**/ HUF_depth_optimal /** Test possible table depths to find the one that produces the smallest header + encoded size**/ diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index a6a5a257c4c..eb2cb7b94a5 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1253,14 +1253,6 @@ unsigned HUF_minTableLog(unsigned symbolCardinality) return minBitsSymbols; } -#define ESTIMATE_TOTAL_SIZE(huffLog) {\ - maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, huffLog, workSpace, wkspSize);\ - if (ERR_isError(maxBits)) continue;\ - hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);\ - if (ERR_isError(hSize)) continue;\ - newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;\ - }\ - unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode) { unsigned optLog = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); @@ -1273,28 +1265,20 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); const unsigned minTableLog = HUF_minTableLog(symbolCardinality); size_t optSize = ((size_t) ~0); - unsigned optLogGuess = optLog; if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; /** Assert workspace is large enough **/ - /* Search below estimate log until size increases */ - for (; optLogGuess >= minTableLog; optLogGuess--) { - ESTIMATE_TOTAL_SIZE(optLogGuess); + /* Search until size increases */ + for (unsigned optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { + maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); - if (newSize > optSize) { - break; - } - optSize = newSize; - optLog = optLogGuess; - } + if (ERR_isError(maxBits)) continue; - if (optSize < ((size_t) ~0)) { - return optLog; - } + hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); + + if (ERR_isError(hSize)) continue; - /* Search above estimate log until size increases */ - for (; optLogGuess <= maxTableLog; optLogGuess++) { - ESTIMATE_TOTAL_SIZE(optLogGuess); + newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; if (newSize > optSize) { break; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index adf1f6e7afc..0069a7b1bee 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -265,9 +265,9 @@ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); } -/* Returns 1 if compression parameters are such that we should +/* Returns ZSTD_ps_enable if compression parameters are such that we should * enable long distance matching (wlog >= 27, strategy >= btopt). - * Returns 0 otherwise. + * Returns ZSTD_ps_disable otherwise. */ static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, const ZSTD_compressionParameters* const cParams) { @@ -482,8 +482,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; case ZSTD_c_enableLongDistanceMatching: - bounds.lowerBound = 0; - bounds.upperBound = 1; + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; return bounds; case ZSTD_c_ldmHashLog: @@ -854,6 +854,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, return (size_t)CCtxParams->enableDedicatedDictSearch; case ZSTD_c_enableLongDistanceMatching : + BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value); CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; return CCtxParams->ldmParams.enableLdm; @@ -1096,7 +1097,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) { - DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize); RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, "Can't set pledgedSrcSize when not in init stage."); cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; @@ -1369,8 +1370,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, } /* resize windowLog if input is small enough, to use less memory */ - if ( (srcSize < maxWindowResize) - && (dictSize < maxWindowResize) ) { + if ( (srcSize <= maxWindowResize) + && (dictSize <= maxWindowResize) ) { U32 const tSize = (U32)(srcSize + dictSize); static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 2ee9cf63025..d5a7b5cbf95 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1078,7 +1078,7 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGr } # endif /* ZSTD_ARCH_ARM_NEON */ /* SWAR */ - { const size_t chunkSize = sizeof(size_t); + { const int chunkSize = sizeof(size_t); const size_t shiftAmount = ((chunkSize * 8) - chunkSize); const size_t xFF = ~((size_t)0); const size_t x01 = xFF / 0xFF; @@ -2123,7 +2123,6 @@ size_t ZSTD_compressBlock_lazy_extDict_row( size_t ZSTD_compressBlock_lazy2_extDict_row( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) - { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); } diff --git a/programs/fileio.c b/programs/fileio.c index 96cf602a300..e80d370110a 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -2749,6 +2749,7 @@ typedef struct { int numSkippableFrames; int decompUnavailable; int usesCheck; + BYTE checksum[4]; U32 nbFiles; unsigned dictID; } fileInfo_t; @@ -2843,8 +2844,8 @@ FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile) int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; if (contentChecksumFlag) { info->usesCheck = 1; - ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0, - info_frame_error, "Error: could not skip past checksum"); + ERROR_IF(fread(info->checksum, 1, 4, srcFile) != 4, + info_frame_error, "Error: could not read checksum"); } } info->numActualFrames++; } @@ -2936,7 +2937,16 @@ displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) (unsigned long long)info->decompressedSize); DISPLAYOUT("Ratio: %.4f\n", ratio); } - DISPLAYOUT("Check: %s\n", checkString); + + if (info->usesCheck && info->numActualFrames == 1) { + DISPLAYOUT("Check: %s %02x%02x%02x%02x\n", checkString, + info->checksum[3], info->checksum[2], + info->checksum[1], info->checksum[0] + ); + } else { + DISPLAYOUT("Check: %s\n", checkString); + } + DISPLAYOUT("\n"); } } diff --git a/programs/util.c b/programs/util.c index a3af2621143..63b3ae1767f 100644 --- a/programs/util.c +++ b/programs/util.c @@ -569,7 +569,7 @@ UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2) for( idx2=0 ; (idx2 < table2->tableSize) && table2->fileNames[idx2] && (pos < newTotalTableSize) ; ++idx2, ++newTableIdx) { size_t const curLen = strlen(table2->fileNames[idx2]); memcpy(buf+pos, table2->fileNames[idx2], curLen); - assert(newTableIdx <= newTable->tableSize); + assert(newTableIdx < newTable->tableSize); newTable->fileNames[newTableIdx] = buf+pos; pos += curLen+1; } } @@ -693,8 +693,11 @@ static int UTIL_prepareFileList(const char *dirName, ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; assert(newListSize >= 0); *bufStart = (char*)UTIL_realloc(*bufStart, (size_t)newListSize); - *bufEnd = *bufStart + newListSize; - if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + if (*bufStart != NULL) { + *bufEnd = *bufStart + newListSize; + } else { + free(path); closedir(dir); return 0; + } } if (*bufStart + *pos + pathLength < *bufEnd) { memcpy(*bufStart + *pos, path, pathLength + 1); /* with final \0 */ diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 583c8a5919b..362f320a998 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -216,7 +216,6 @@ static void usage_advanced(const char* programName) DISPLAYOUT("\n"); DISPLAYOUT("Advanced compression options :\n"); DISPLAYOUT(" --ultra enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); - DISPLAYOUT(" --long[=#] enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAYOUT(" --fast[=#] switch to very fast compression levels (default: %u)\n", 1); #ifdef ZSTD_GZCOMPRESS if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */ @@ -224,9 +223,9 @@ static void usage_advanced(const char* programName) DISPLAYOUT(" --no-name do not store original filename when compressing\n"); } #endif - DISPLAYOUT(" --adapt dynamically adapt compression level to I/O conditions\n"); - DISPLAYOUT(" --[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies\n"); + DISPLAYOUT(" --long[=#] enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog); DISPLAYOUT(" --patch-from=FILE : specify the file to be used as a reference point for zstd's diff engine. \n"); + DISPLAYOUT(" --adapt dynamically adapt compression level to I/O conditions\n"); # ifdef ZSTD_MULTITHREAD DISPLAYOUT(" -T# spawn # compression threads (default: 1, 0==# cores) \n"); DISPLAYOUT(" -B# select size of each job (default: 0==automatic) \n"); @@ -240,6 +239,7 @@ static void usage_advanced(const char* programName) DISPLAYOUT(" --target-compressed-block-size=# : generate compressed block of approximately targeted size \n"); DISPLAYOUT(" --no-dictID don't write dictID into header (dictionary compression only)\n"); DISPLAYOUT(" --[no-]compress-literals : force (un)compressed literals\n"); + DISPLAYOUT(" --[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies\n"); DISPLAYOUT(" --format=zstd compress files to the .zst format (default)\n"); #ifdef ZSTD_GZCOMPRESS diff --git a/tests/cli-tests/compression/window-resize.sh b/tests/cli-tests/compression/window-resize.sh new file mode 100755 index 00000000000..3b5e6fe24f3 --- /dev/null +++ b/tests/cli-tests/compression/window-resize.sh @@ -0,0 +1,9 @@ +#!/bin/sh +datagen -g1G > file +zstd --long=31 -1 --single-thread --no-content-size -f file +zstd -l -v file.zst + +# We want to ignore stderr (its outputting "*** zstd command line interface +# 64-bits v1.5.3, by Yann Collet ***") + +rm file file.zst diff --git a/tests/cli-tests/compression/window-resize.sh.stderr.ignore b/tests/cli-tests/compression/window-resize.sh.stderr.ignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/cli-tests/compression/window-resize.sh.stdout.glob b/tests/cli-tests/compression/window-resize.sh.stdout.glob new file mode 100644 index 00000000000..313d216e1e7 --- /dev/null +++ b/tests/cli-tests/compression/window-resize.sh.stdout.glob @@ -0,0 +1,3 @@ +... +Window Size: 1.000 GiB (1073741824 B) +... diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index b4a6509db18..08ce70dd7a0 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -80,7 +80,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer setRand(cctx, ZSTD_c_checksumFlag, 0, 1, producer); setRand(cctx, ZSTD_c_dictIDFlag, 0, 1, producer); /* Select long distance matching parameters */ - setRand(cctx, ZSTD_c_enableLongDistanceMatching, 0, 1, producer); + setRand(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_auto, ZSTD_ps_disable, producer); setRand(cctx, ZSTD_c_ldmHashLog, ZSTD_HASHLOG_MIN, 16, producer); setRand(cctx, ZSTD_c_ldmMinMatch, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX, producer); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 81c2d9dba22..879e537bc90 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -851,7 +851,7 @@ static int basicUnitTests(U32 const seed, double compressibility) RDG_genBuffer(dict, size, 0.5, 0.5, seed); RDG_genBuffer(src, size, 0.5, 0.5, seed); - CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)); assert(!ZSTD_isError(ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, size, dict, size, 3))); ZSTD_freeCCtx(cctx); @@ -875,7 +875,7 @@ static int basicUnitTests(U32 const seed, double compressibility) CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, nbWorkers)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceMaxWindow, 1)); - CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)); CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, CNBuffSize)); cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize); CHECK_Z(cSize); @@ -900,7 +900,7 @@ static int basicUnitTests(U32 const seed, double compressibility) RDG_genBuffer(CNBuffer, testSize, 0.6, 0.6, seed); memcpy(dict + testSize, CNBuffer, testSize); for (level = 1; level <= 5; ++level) { - for (ldmEnabled = 0; ldmEnabled <= 1; ++ldmEnabled) { + for (ldmEnabled = ZSTD_ps_enable; ldmEnabled <= ZSTD_ps_disable; ++ldmEnabled) { size_t cSize0; XXH64_hash_t compressedChecksum0; @@ -956,7 +956,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Enable MT, LDM, and opt parser */ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1)); - CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19)); @@ -995,7 +995,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Disable content size to skip single-pass decompression. */ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, (int)kWindowLog)); - CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmMinMatch, 32)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmHashRateLog, 1)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_ldmHashLog, 16)); @@ -1092,7 +1092,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* compress on level 1 using refPrefix and ldm */ ZSTD_CCtx_refPrefix(cctx, dict, size);; - CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)) + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)) refPrefixLdmCompressedSize = ZSTD_compress2(cctx, dst, dstSize, src, size); assert(!ZSTD_isError(refPrefixLdmCompressedSize)); @@ -2820,7 +2820,7 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(3, "test%3i : parameters in order : ", testNb++); assert(cctx != NULL); CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) ); CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) ); { size_t const compressedSize = ZSTD_compress2(cctx, compressedBuffer, ZSTD_compressBound(inputSize), @@ -2836,7 +2836,7 @@ static int basicUnitTests(U32 const seed, double compressibility) { ZSTD_CCtx* cctx = ZSTD_createCCtx(); DISPLAYLEVEL(3, "test%3i : parameters disordered : ", testNb++); CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) ); - CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1) ); + CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) ); CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) ); { size_t const result = ZSTD_compress2(cctx, compressedBuffer, ZSTD_compressBound(inputSize), @@ -3492,7 +3492,7 @@ static int basicUnitTests(U32 const seed, double compressibility) /* Enable MT, LDM, and use refPrefix() for a small dict */ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2)); - CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)); CHECK_Z(ZSTD_CCtx_refPrefix(cctx, dict, dictSize)); CHECK_Z(ZSTD_compress2(cctx, dst, dstSize, src, srcSize)); @@ -3686,7 +3686,7 @@ static int longUnitTests(U32 const seed, double compressibility) /* Enable checksum to verify round trip. */ CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); - CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19)); /* Round trip once with ldm. */ @@ -3696,7 +3696,7 @@ static int longUnitTests(U32 const seed, double compressibility) ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1)); - CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 0)); + CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_disable)); CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 19)); /* Round trip once without ldm. */ diff --git a/tests/regression/config.c b/tests/regression/config.c index 57cd110c6bb..30d0ca5e212 100644 --- a/tests/regression/config.c +++ b/tests/regression/config.c @@ -183,7 +183,7 @@ static config_t no_pledged_src_size_with_dict = { }; static param_value_t const ldm_param_values[] = { - {.param = ZSTD_c_enableLongDistanceMatching, .value = 1}, + {.param = ZSTD_c_enableLongDistanceMatching, .value = ZSTD_ps_enable}, }; static config_t ldm = { @@ -204,7 +204,7 @@ static config_t mt = { static param_value_t const mt_ldm_param_values[] = { {.param = ZSTD_c_nbWorkers, .value = 2}, - {.param = ZSTD_c_enableLongDistanceMatching, .value = 1}, + {.param = ZSTD_c_enableLongDistanceMatching, .value = ZSTD_ps_enable}, }; static config_t mt_ldm = { diff --git a/tests/zstreamtest.c b/tests/zstreamtest.c index ce9020f128c..348f72ed426 100644 --- a/tests/zstreamtest.c +++ b/tests/zstreamtest.c @@ -2281,7 +2281,7 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest, /* mess with long distance matching parameters */ if (bigTests) { - if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_enableLongDistanceMatching, FUZ_rand(&lseed) & 63, opaqueAPI) ); + if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_enableLongDistanceMatching, FUZ_randomClampedLength(&lseed, ZSTD_ps_auto, ZSTD_ps_disable), opaqueAPI) ); if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashLog, FUZ_randomClampedLength(&lseed, ZSTD_HASHLOG_MIN, 23), opaqueAPI) ); if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) ); if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) ); From c26f348dc810ae536a63f088b380b5e6b341b45a Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Tue, 20 Dec 2022 12:43:46 -0800 Subject: [PATCH 6/8] fix CI errors --- lib/compress/huf_compress.c | 3 +- tests/regression/results.csv | 60 ++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index eb2cb7b94a5..d7755e0f266 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1265,11 +1265,12 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); const unsigned minTableLog = HUF_minTableLog(symbolCardinality); size_t optSize = ((size_t) ~0); + unsigned optLogGuess; if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; /** Assert workspace is large enough **/ /* Search until size increases */ - for (unsigned optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { + for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); if (ERR_isError(maxBits)) continue; diff --git a/tests/regression/results.csv b/tests/regression/results.csv index dee7d572169..ffa9b22e007 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -12,9 +12,9 @@ silesia.tar, level 7, compress silesia.tar, level 9, compress simple, 4552899 silesia.tar, level 13, compress simple, 4502956 silesia.tar, level 16, compress simple, 4360527 -silesia.tar, level 19, compress simple, 4266970 +silesia.tar, level 19, compress simple, 4267014 silesia.tar, uncompressed literals, compress simple, 4854086 -silesia.tar, uncompressed literals optimal, compress simple, 4266970 +silesia.tar, uncompressed literals optimal, compress simple, 4267014 silesia.tar, huffman literals, compress simple, 6179047 github.tar, level -5, compress simple, 52115 github.tar, level -3, compress simple, 45678 @@ -29,9 +29,9 @@ github.tar, level 7, compress github.tar, level 9, compress simple, 36760 github.tar, level 13, compress simple, 35501 github.tar, level 16, compress simple, 40471 -github.tar, level 19, compress simple, 32149 +github.tar, level 19, compress simple, 32127 github.tar, uncompressed literals, compress simple, 38831 -github.tar, uncompressed literals optimal, compress simple, 32149 +github.tar, uncompressed literals optimal, compress simple, 32127 github.tar, huffman literals, compress simple, 42560 silesia, level -5, compress cctx, 6857372 silesia, level -3, compress cctx, 6503412 @@ -46,7 +46,7 @@ silesia, level 7, compress silesia, level 9, compress cctx, 4543018 silesia, level 13, compress cctx, 4493990 silesia, level 16, compress cctx, 4359864 -silesia, level 19, compress cctx, 4296438 +silesia, level 19, compress cctx, 4296439 silesia, long distance mode, compress cctx, 4842075 silesia, multithreaded, compress cctx, 4842075 silesia, multithreaded long distance mode, compress cctx, 4842075 @@ -55,7 +55,7 @@ silesia, small hash log, compress silesia, small chain log, compress cctx, 4912197 silesia, explicit params, compress cctx, 4794052 silesia, uncompressed literals, compress cctx, 4842075 -silesia, uncompressed literals optimal, compress cctx, 4296438 +silesia, uncompressed literals optimal, compress cctx, 4296439 silesia, huffman literals, compress cctx, 6172202 silesia, multithreaded with advanced params, compress cctx, 4842075 github, level -5, compress cctx, 204407 @@ -110,7 +110,7 @@ silesia, level 7, zstdcli, silesia, level 9, zstdcli, 4543066 silesia, level 13, zstdcli, 4494038 silesia, level 16, zstdcli, 4359912 -silesia, level 19, zstdcli, 4296486 +silesia, level 19, zstdcli, 4296487 silesia, long distance mode, zstdcli, 4833785 silesia, multithreaded, zstdcli, 4842123 silesia, multithreaded long distance mode, zstdcli, 4833785 @@ -135,7 +135,7 @@ silesia.tar, level 7, zstdcli, silesia.tar, level 9, zstdcli, 4552903 silesia.tar, level 13, zstdcli, 4502960 silesia.tar, level 16, zstdcli, 4360531 -silesia.tar, level 19, zstdcli, 4266974 +silesia.tar, level 19, zstdcli, 4267018 silesia.tar, no source size, zstdcli, 4854160 silesia.tar, long distance mode, zstdcli, 4845745 silesia.tar, multithreaded, zstdcli, 4854164 @@ -213,7 +213,7 @@ github.tar, level 13, zstdcli, github.tar, level 13 with dict, zstdcli, 37134 github.tar, level 16, zstdcli, 40475 github.tar, level 16 with dict, zstdcli, 33378 -github.tar, level 19, zstdcli, 32153 +github.tar, level 19, zstdcli, 32131 github.tar, level 19 with dict, zstdcli, 32716 github.tar, no source size, zstdcli, 38832 github.tar, no source size with dict, zstdcli, 38004 @@ -249,7 +249,7 @@ silesia, level 12 row 1, advanced silesia, level 12 row 2, advanced one pass, 4503116 silesia, level 13, advanced one pass, 4493990 silesia, level 16, advanced one pass, 4359864 -silesia, level 19, advanced one pass, 4296438 +silesia, level 19, advanced one pass, 4296439 silesia, no source size, advanced one pass, 4842075 silesia, long distance mode, advanced one pass, 4833710 silesia, multithreaded, advanced one pass, 4842075 @@ -283,7 +283,7 @@ silesia.tar, level 12 row 1, advanced silesia.tar, level 12 row 2, advanced one pass, 4513797 silesia.tar, level 13, advanced one pass, 4502956 silesia.tar, level 16, advanced one pass, 4360527 -silesia.tar, level 19, advanced one pass, 4266970 +silesia.tar, level 19, advanced one pass, 4267014 silesia.tar, no source size, advanced one pass, 4854086 silesia.tar, long distance mode, advanced one pass, 4840452 silesia.tar, multithreaded, advanced one pass, 4854160 @@ -527,7 +527,7 @@ github.tar, level 16 with dict dms, advanced github.tar, level 16 with dict dds, advanced one pass, 33206 github.tar, level 16 with dict copy, advanced one pass, 33374 github.tar, level 16 with dict load, advanced one pass, 39081 -github.tar, level 19, advanced one pass, 32149 +github.tar, level 19, advanced one pass, 32127 github.tar, level 19 with dict, advanced one pass, 32712 github.tar, level 19 with dict dms, advanced one pass, 32555 github.tar, level 19 with dict dds, advanced one pass, 32555 @@ -567,7 +567,7 @@ silesia, level 12 row 1, advanced silesia, level 12 row 2, advanced one pass small out, 4503116 silesia, level 13, advanced one pass small out, 4493990 silesia, level 16, advanced one pass small out, 4359864 -silesia, level 19, advanced one pass small out, 4296438 +silesia, level 19, advanced one pass small out, 4296439 silesia, no source size, advanced one pass small out, 4842075 silesia, long distance mode, advanced one pass small out, 4833710 silesia, multithreaded, advanced one pass small out, 4842075 @@ -601,7 +601,7 @@ silesia.tar, level 12 row 1, advanced silesia.tar, level 12 row 2, advanced one pass small out, 4513797 silesia.tar, level 13, advanced one pass small out, 4502956 silesia.tar, level 16, advanced one pass small out, 4360527 -silesia.tar, level 19, advanced one pass small out, 4266970 +silesia.tar, level 19, advanced one pass small out, 4267014 silesia.tar, no source size, advanced one pass small out, 4854086 silesia.tar, long distance mode, advanced one pass small out, 4840452 silesia.tar, multithreaded, advanced one pass small out, 4854160 @@ -845,7 +845,7 @@ github.tar, level 16 with dict dms, advanced github.tar, level 16 with dict dds, advanced one pass small out, 33206 github.tar, level 16 with dict copy, advanced one pass small out, 33374 github.tar, level 16 with dict load, advanced one pass small out, 39081 -github.tar, level 19, advanced one pass small out, 32149 +github.tar, level 19, advanced one pass small out, 32127 github.tar, level 19 with dict, advanced one pass small out, 32712 github.tar, level 19 with dict dms, advanced one pass small out, 32555 github.tar, level 19 with dict dds, advanced one pass small out, 32555 @@ -885,7 +885,7 @@ silesia, level 12 row 1, advanced silesia, level 12 row 2, advanced streaming, 4503116 silesia, level 13, advanced streaming, 4493990 silesia, level 16, advanced streaming, 4359864 -silesia, level 19, advanced streaming, 4296438 +silesia, level 19, advanced streaming, 4296439 silesia, no source size, advanced streaming, 4842039 silesia, long distance mode, advanced streaming, 4833710 silesia, multithreaded, advanced streaming, 4842075 @@ -919,7 +919,7 @@ silesia.tar, level 12 row 1, advanced silesia.tar, level 12 row 2, advanced streaming, 4513797 silesia.tar, level 13, advanced streaming, 4502956 silesia.tar, level 16, advanced streaming, 4360527 -silesia.tar, level 19, advanced streaming, 4266970 +silesia.tar, level 19, advanced streaming, 4267014 silesia.tar, no source size, advanced streaming, 4859267 silesia.tar, long distance mode, advanced streaming, 4840452 silesia.tar, multithreaded, advanced streaming, 4854160 @@ -1163,7 +1163,7 @@ github.tar, level 16 with dict dms, advanced github.tar, level 16 with dict dds, advanced streaming, 33206 github.tar, level 16 with dict copy, advanced streaming, 33374 github.tar, level 16 with dict load, advanced streaming, 39081 -github.tar, level 19, advanced streaming, 32149 +github.tar, level 19, advanced streaming, 32127 github.tar, level 19 with dict, advanced streaming, 32712 github.tar, level 19 with dict dms, advanced streaming, 32555 github.tar, level 19 with dict dds, advanced streaming, 32555 @@ -1195,10 +1195,10 @@ silesia, level 7, old stre silesia, level 9, old streaming, 4543018 silesia, level 13, old streaming, 4493990 silesia, level 16, old streaming, 4359864 -silesia, level 19, old streaming, 4296438 +silesia, level 19, old streaming, 4296439 silesia, no source size, old streaming, 4842039 silesia, uncompressed literals, old streaming, 4842075 -silesia, uncompressed literals optimal, old streaming, 4296438 +silesia, uncompressed literals optimal, old streaming, 4296439 silesia, huffman literals, old streaming, 6172207 silesia.tar, level -5, old streaming, 6856523 silesia.tar, level -3, old streaming, 6505954 @@ -1213,10 +1213,10 @@ silesia.tar, level 7, old stre silesia.tar, level 9, old streaming, 4552900 silesia.tar, level 13, old streaming, 4502956 silesia.tar, level 16, old streaming, 4360527 -silesia.tar, level 19, old streaming, 4266970 +silesia.tar, level 19, old streaming, 4267014 silesia.tar, no source size, old streaming, 4859267 silesia.tar, uncompressed literals, old streaming, 4859271 -silesia.tar, uncompressed literals optimal, old streaming, 4266970 +silesia.tar, uncompressed literals optimal, old streaming, 4267014 silesia.tar, huffman literals, old streaming, 6179056 github, level -5, old streaming, 204407 github, level -5 with dict, old streaming, 46718 @@ -1277,12 +1277,12 @@ github.tar, level 13, old stre github.tar, level 13 with dict, old streaming, 37130 github.tar, level 16, old streaming, 40471 github.tar, level 16 with dict, old streaming, 33374 -github.tar, level 19, old streaming, 32149 +github.tar, level 19, old streaming, 32127 github.tar, level 19 with dict, old streaming, 32712 github.tar, no source size, old streaming, 38828 github.tar, no source size with dict, old streaming, 38000 github.tar, uncompressed literals, old streaming, 38831 -github.tar, uncompressed literals optimal, old streaming, 32149 +github.tar, uncompressed literals optimal, old streaming, 32127 github.tar, huffman literals, old streaming, 42560 silesia, level -5, old streaming advanced, 6854744 silesia, level -3, old streaming advanced, 6503319 @@ -1297,7 +1297,7 @@ silesia, level 7, old stre silesia, level 9, old streaming advanced, 4543018 silesia, level 13, old streaming advanced, 4493990 silesia, level 16, old streaming advanced, 4359864 -silesia, level 19, old streaming advanced, 4296438 +silesia, level 19, old streaming advanced, 4296439 silesia, no source size, old streaming advanced, 4842039 silesia, long distance mode, old streaming advanced, 4842075 silesia, multithreaded, old streaming advanced, 4842075 @@ -1307,7 +1307,7 @@ silesia, small hash log, old stre silesia, small chain log, old streaming advanced, 4912197 silesia, explicit params, old streaming advanced, 4795452 silesia, uncompressed literals, old streaming advanced, 4842075 -silesia, uncompressed literals optimal, old streaming advanced, 4296438 +silesia, uncompressed literals optimal, old streaming advanced, 4296439 silesia, huffman literals, old streaming advanced, 6172207 silesia, multithreaded with advanced params, old streaming advanced, 4842075 silesia.tar, level -5, old streaming advanced, 6856523 @@ -1323,7 +1323,7 @@ silesia.tar, level 7, old stre silesia.tar, level 9, old streaming advanced, 4552900 silesia.tar, level 13, old streaming advanced, 4502956 silesia.tar, level 16, old streaming advanced, 4360527 -silesia.tar, level 19, old streaming advanced, 4266970 +silesia.tar, level 19, old streaming advanced, 4267014 silesia.tar, no source size, old streaming advanced, 4859267 silesia.tar, long distance mode, old streaming advanced, 4859271 silesia.tar, multithreaded, old streaming advanced, 4859271 @@ -1333,7 +1333,7 @@ silesia.tar, small hash log, old stre silesia.tar, small chain log, old streaming advanced, 4917021 silesia.tar, explicit params, old streaming advanced, 4806873 silesia.tar, uncompressed literals, old streaming advanced, 4859271 -silesia.tar, uncompressed literals optimal, old streaming advanced, 4266970 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4267014 silesia.tar, huffman literals, old streaming advanced, 6179056 silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271 github, level -5, old streaming advanced, 213265 @@ -1403,7 +1403,7 @@ github.tar, level 13, old stre github.tar, level 13 with dict, old streaming advanced, 35807 github.tar, level 16, old streaming advanced, 40471 github.tar, level 16 with dict, old streaming advanced, 38578 -github.tar, level 19, old streaming advanced, 32149 +github.tar, level 19, old streaming advanced, 32127 github.tar, level 19 with dict, old streaming advanced, 32704 github.tar, no source size, old streaming advanced, 38828 github.tar, no source size with dict, old streaming advanced, 38015 @@ -1415,7 +1415,7 @@ github.tar, small hash log, old stre github.tar, small chain log, old streaming advanced, 41669 github.tar, explicit params, old streaming advanced, 41385 github.tar, uncompressed literals, old streaming advanced, 38831 -github.tar, uncompressed literals optimal, old streaming advanced, 32149 +github.tar, uncompressed literals optimal, old streaming advanced, 32127 github.tar, huffman literals, old streaming advanced, 42560 github.tar, multithreaded with advanced params, old streaming advanced, 38831 github, level -5 with dict, old streaming cdict, 46718 From df714ddb0f2dc076dc841d10c8b72f66ef5af937 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Tue, 3 Jan 2023 07:20:21 -0800 Subject: [PATCH 7/8] implement suggestions --- lib/compress/huf_compress.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index d7755e0f266..9a4cde6f28e 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -1264,7 +1264,7 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS size_t maxBits, hSize, newSize; const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); const unsigned minTableLog = HUF_minTableLog(symbolCardinality); - size_t optSize = ((size_t) ~0); + size_t optSize = ((size_t) ~0) - 1; unsigned optLogGuess; if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog; /** Assert workspace is large enough **/ @@ -1275,17 +1275,22 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS if (ERR_isError(maxBits)) continue; + if (maxBits < optLogGuess && optLogGuess > minTableLog) break; + hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); if (ERR_isError(hSize)) continue; newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; - if (newSize > optSize) { + if (newSize > optSize + 1) { break; } - optSize = newSize; - optLog = optLogGuess; + + if (newSize < optSize) { + optSize = newSize; + optLog = optLogGuess; + } } } assert(optLog <= HUF_TABLELOG_MAX); From 87becc567d20aeae6265ef6cee0f063aafc05bb8 Mon Sep 17 00:00:00 2001 From: Danielle Rozenblit Date: Tue, 3 Jan 2023 08:41:40 -0800 Subject: [PATCH 8/8] update regression results.csv --- tests/regression/results.csv | 60 ++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/tests/regression/results.csv b/tests/regression/results.csv index ffa9b22e007..4a44dbb0048 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -12,9 +12,9 @@ silesia.tar, level 7, compress silesia.tar, level 9, compress simple, 4552899 silesia.tar, level 13, compress simple, 4502956 silesia.tar, level 16, compress simple, 4360527 -silesia.tar, level 19, compress simple, 4267014 +silesia.tar, level 19, compress simple, 4267021 silesia.tar, uncompressed literals, compress simple, 4854086 -silesia.tar, uncompressed literals optimal, compress simple, 4267014 +silesia.tar, uncompressed literals optimal, compress simple, 4267021 silesia.tar, huffman literals, compress simple, 6179047 github.tar, level -5, compress simple, 52115 github.tar, level -3, compress simple, 45678 @@ -29,9 +29,9 @@ github.tar, level 7, compress github.tar, level 9, compress simple, 36760 github.tar, level 13, compress simple, 35501 github.tar, level 16, compress simple, 40471 -github.tar, level 19, compress simple, 32127 +github.tar, level 19, compress simple, 32149 github.tar, uncompressed literals, compress simple, 38831 -github.tar, uncompressed literals optimal, compress simple, 32127 +github.tar, uncompressed literals optimal, compress simple, 32149 github.tar, huffman literals, compress simple, 42560 silesia, level -5, compress cctx, 6857372 silesia, level -3, compress cctx, 6503412 @@ -46,7 +46,7 @@ silesia, level 7, compress silesia, level 9, compress cctx, 4543018 silesia, level 13, compress cctx, 4493990 silesia, level 16, compress cctx, 4359864 -silesia, level 19, compress cctx, 4296439 +silesia, level 19, compress cctx, 4296438 silesia, long distance mode, compress cctx, 4842075 silesia, multithreaded, compress cctx, 4842075 silesia, multithreaded long distance mode, compress cctx, 4842075 @@ -55,7 +55,7 @@ silesia, small hash log, compress silesia, small chain log, compress cctx, 4912197 silesia, explicit params, compress cctx, 4794052 silesia, uncompressed literals, compress cctx, 4842075 -silesia, uncompressed literals optimal, compress cctx, 4296439 +silesia, uncompressed literals optimal, compress cctx, 4296438 silesia, huffman literals, compress cctx, 6172202 silesia, multithreaded with advanced params, compress cctx, 4842075 github, level -5, compress cctx, 204407 @@ -110,7 +110,7 @@ silesia, level 7, zstdcli, silesia, level 9, zstdcli, 4543066 silesia, level 13, zstdcli, 4494038 silesia, level 16, zstdcli, 4359912 -silesia, level 19, zstdcli, 4296487 +silesia, level 19, zstdcli, 4296486 silesia, long distance mode, zstdcli, 4833785 silesia, multithreaded, zstdcli, 4842123 silesia, multithreaded long distance mode, zstdcli, 4833785 @@ -135,7 +135,7 @@ silesia.tar, level 7, zstdcli, silesia.tar, level 9, zstdcli, 4552903 silesia.tar, level 13, zstdcli, 4502960 silesia.tar, level 16, zstdcli, 4360531 -silesia.tar, level 19, zstdcli, 4267018 +silesia.tar, level 19, zstdcli, 4267025 silesia.tar, no source size, zstdcli, 4854160 silesia.tar, long distance mode, zstdcli, 4845745 silesia.tar, multithreaded, zstdcli, 4854164 @@ -213,7 +213,7 @@ github.tar, level 13, zstdcli, github.tar, level 13 with dict, zstdcli, 37134 github.tar, level 16, zstdcli, 40475 github.tar, level 16 with dict, zstdcli, 33378 -github.tar, level 19, zstdcli, 32131 +github.tar, level 19, zstdcli, 32153 github.tar, level 19 with dict, zstdcli, 32716 github.tar, no source size, zstdcli, 38832 github.tar, no source size with dict, zstdcli, 38004 @@ -249,7 +249,7 @@ silesia, level 12 row 1, advanced silesia, level 12 row 2, advanced one pass, 4503116 silesia, level 13, advanced one pass, 4493990 silesia, level 16, advanced one pass, 4359864 -silesia, level 19, advanced one pass, 4296439 +silesia, level 19, advanced one pass, 4296438 silesia, no source size, advanced one pass, 4842075 silesia, long distance mode, advanced one pass, 4833710 silesia, multithreaded, advanced one pass, 4842075 @@ -283,7 +283,7 @@ silesia.tar, level 12 row 1, advanced silesia.tar, level 12 row 2, advanced one pass, 4513797 silesia.tar, level 13, advanced one pass, 4502956 silesia.tar, level 16, advanced one pass, 4360527 -silesia.tar, level 19, advanced one pass, 4267014 +silesia.tar, level 19, advanced one pass, 4267021 silesia.tar, no source size, advanced one pass, 4854086 silesia.tar, long distance mode, advanced one pass, 4840452 silesia.tar, multithreaded, advanced one pass, 4854160 @@ -527,7 +527,7 @@ github.tar, level 16 with dict dms, advanced github.tar, level 16 with dict dds, advanced one pass, 33206 github.tar, level 16 with dict copy, advanced one pass, 33374 github.tar, level 16 with dict load, advanced one pass, 39081 -github.tar, level 19, advanced one pass, 32127 +github.tar, level 19, advanced one pass, 32149 github.tar, level 19 with dict, advanced one pass, 32712 github.tar, level 19 with dict dms, advanced one pass, 32555 github.tar, level 19 with dict dds, advanced one pass, 32555 @@ -567,7 +567,7 @@ silesia, level 12 row 1, advanced silesia, level 12 row 2, advanced one pass small out, 4503116 silesia, level 13, advanced one pass small out, 4493990 silesia, level 16, advanced one pass small out, 4359864 -silesia, level 19, advanced one pass small out, 4296439 +silesia, level 19, advanced one pass small out, 4296438 silesia, no source size, advanced one pass small out, 4842075 silesia, long distance mode, advanced one pass small out, 4833710 silesia, multithreaded, advanced one pass small out, 4842075 @@ -601,7 +601,7 @@ silesia.tar, level 12 row 1, advanced silesia.tar, level 12 row 2, advanced one pass small out, 4513797 silesia.tar, level 13, advanced one pass small out, 4502956 silesia.tar, level 16, advanced one pass small out, 4360527 -silesia.tar, level 19, advanced one pass small out, 4267014 +silesia.tar, level 19, advanced one pass small out, 4267021 silesia.tar, no source size, advanced one pass small out, 4854086 silesia.tar, long distance mode, advanced one pass small out, 4840452 silesia.tar, multithreaded, advanced one pass small out, 4854160 @@ -845,7 +845,7 @@ github.tar, level 16 with dict dms, advanced github.tar, level 16 with dict dds, advanced one pass small out, 33206 github.tar, level 16 with dict copy, advanced one pass small out, 33374 github.tar, level 16 with dict load, advanced one pass small out, 39081 -github.tar, level 19, advanced one pass small out, 32127 +github.tar, level 19, advanced one pass small out, 32149 github.tar, level 19 with dict, advanced one pass small out, 32712 github.tar, level 19 with dict dms, advanced one pass small out, 32555 github.tar, level 19 with dict dds, advanced one pass small out, 32555 @@ -885,7 +885,7 @@ silesia, level 12 row 1, advanced silesia, level 12 row 2, advanced streaming, 4503116 silesia, level 13, advanced streaming, 4493990 silesia, level 16, advanced streaming, 4359864 -silesia, level 19, advanced streaming, 4296439 +silesia, level 19, advanced streaming, 4296438 silesia, no source size, advanced streaming, 4842039 silesia, long distance mode, advanced streaming, 4833710 silesia, multithreaded, advanced streaming, 4842075 @@ -919,7 +919,7 @@ silesia.tar, level 12 row 1, advanced silesia.tar, level 12 row 2, advanced streaming, 4513797 silesia.tar, level 13, advanced streaming, 4502956 silesia.tar, level 16, advanced streaming, 4360527 -silesia.tar, level 19, advanced streaming, 4267014 +silesia.tar, level 19, advanced streaming, 4267021 silesia.tar, no source size, advanced streaming, 4859267 silesia.tar, long distance mode, advanced streaming, 4840452 silesia.tar, multithreaded, advanced streaming, 4854160 @@ -1163,7 +1163,7 @@ github.tar, level 16 with dict dms, advanced github.tar, level 16 with dict dds, advanced streaming, 33206 github.tar, level 16 with dict copy, advanced streaming, 33374 github.tar, level 16 with dict load, advanced streaming, 39081 -github.tar, level 19, advanced streaming, 32127 +github.tar, level 19, advanced streaming, 32149 github.tar, level 19 with dict, advanced streaming, 32712 github.tar, level 19 with dict dms, advanced streaming, 32555 github.tar, level 19 with dict dds, advanced streaming, 32555 @@ -1195,10 +1195,10 @@ silesia, level 7, old stre silesia, level 9, old streaming, 4543018 silesia, level 13, old streaming, 4493990 silesia, level 16, old streaming, 4359864 -silesia, level 19, old streaming, 4296439 +silesia, level 19, old streaming, 4296438 silesia, no source size, old streaming, 4842039 silesia, uncompressed literals, old streaming, 4842075 -silesia, uncompressed literals optimal, old streaming, 4296439 +silesia, uncompressed literals optimal, old streaming, 4296438 silesia, huffman literals, old streaming, 6172207 silesia.tar, level -5, old streaming, 6856523 silesia.tar, level -3, old streaming, 6505954 @@ -1213,10 +1213,10 @@ silesia.tar, level 7, old stre silesia.tar, level 9, old streaming, 4552900 silesia.tar, level 13, old streaming, 4502956 silesia.tar, level 16, old streaming, 4360527 -silesia.tar, level 19, old streaming, 4267014 +silesia.tar, level 19, old streaming, 4267021 silesia.tar, no source size, old streaming, 4859267 silesia.tar, uncompressed literals, old streaming, 4859271 -silesia.tar, uncompressed literals optimal, old streaming, 4267014 +silesia.tar, uncompressed literals optimal, old streaming, 4267021 silesia.tar, huffman literals, old streaming, 6179056 github, level -5, old streaming, 204407 github, level -5 with dict, old streaming, 46718 @@ -1277,12 +1277,12 @@ github.tar, level 13, old stre github.tar, level 13 with dict, old streaming, 37130 github.tar, level 16, old streaming, 40471 github.tar, level 16 with dict, old streaming, 33374 -github.tar, level 19, old streaming, 32127 +github.tar, level 19, old streaming, 32149 github.tar, level 19 with dict, old streaming, 32712 github.tar, no source size, old streaming, 38828 github.tar, no source size with dict, old streaming, 38000 github.tar, uncompressed literals, old streaming, 38831 -github.tar, uncompressed literals optimal, old streaming, 32127 +github.tar, uncompressed literals optimal, old streaming, 32149 github.tar, huffman literals, old streaming, 42560 silesia, level -5, old streaming advanced, 6854744 silesia, level -3, old streaming advanced, 6503319 @@ -1297,7 +1297,7 @@ silesia, level 7, old stre silesia, level 9, old streaming advanced, 4543018 silesia, level 13, old streaming advanced, 4493990 silesia, level 16, old streaming advanced, 4359864 -silesia, level 19, old streaming advanced, 4296439 +silesia, level 19, old streaming advanced, 4296438 silesia, no source size, old streaming advanced, 4842039 silesia, long distance mode, old streaming advanced, 4842075 silesia, multithreaded, old streaming advanced, 4842075 @@ -1307,7 +1307,7 @@ silesia, small hash log, old stre silesia, small chain log, old streaming advanced, 4912197 silesia, explicit params, old streaming advanced, 4795452 silesia, uncompressed literals, old streaming advanced, 4842075 -silesia, uncompressed literals optimal, old streaming advanced, 4296439 +silesia, uncompressed literals optimal, old streaming advanced, 4296438 silesia, huffman literals, old streaming advanced, 6172207 silesia, multithreaded with advanced params, old streaming advanced, 4842075 silesia.tar, level -5, old streaming advanced, 6856523 @@ -1323,7 +1323,7 @@ silesia.tar, level 7, old stre silesia.tar, level 9, old streaming advanced, 4552900 silesia.tar, level 13, old streaming advanced, 4502956 silesia.tar, level 16, old streaming advanced, 4360527 -silesia.tar, level 19, old streaming advanced, 4267014 +silesia.tar, level 19, old streaming advanced, 4267021 silesia.tar, no source size, old streaming advanced, 4859267 silesia.tar, long distance mode, old streaming advanced, 4859271 silesia.tar, multithreaded, old streaming advanced, 4859271 @@ -1333,7 +1333,7 @@ silesia.tar, small hash log, old stre silesia.tar, small chain log, old streaming advanced, 4917021 silesia.tar, explicit params, old streaming advanced, 4806873 silesia.tar, uncompressed literals, old streaming advanced, 4859271 -silesia.tar, uncompressed literals optimal, old streaming advanced, 4267014 +silesia.tar, uncompressed literals optimal, old streaming advanced, 4267021 silesia.tar, huffman literals, old streaming advanced, 6179056 silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271 github, level -5, old streaming advanced, 213265 @@ -1403,7 +1403,7 @@ github.tar, level 13, old stre github.tar, level 13 with dict, old streaming advanced, 35807 github.tar, level 16, old streaming advanced, 40471 github.tar, level 16 with dict, old streaming advanced, 38578 -github.tar, level 19, old streaming advanced, 32127 +github.tar, level 19, old streaming advanced, 32149 github.tar, level 19 with dict, old streaming advanced, 32704 github.tar, no source size, old streaming advanced, 38828 github.tar, no source size with dict, old streaming advanced, 38015 @@ -1415,7 +1415,7 @@ github.tar, small hash log, old stre github.tar, small chain log, old streaming advanced, 41669 github.tar, explicit params, old streaming advanced, 41385 github.tar, uncompressed literals, old streaming advanced, 38831 -github.tar, uncompressed literals optimal, old streaming advanced, 32127 +github.tar, uncompressed literals optimal, old streaming advanced, 32149 github.tar, huffman literals, old streaming advanced, 42560 github.tar, multithreaded with advanced params, old streaming advanced, 38831 github, level -5 with dict, old streaming cdict, 46718