From 762898f5e4424a72bdbb3d3847abd598c8be1846 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 8 Feb 2022 11:49:31 -0500 Subject: [PATCH] Bugfix and new features for largeNbDicts benchmark --- contrib/largeNbDicts/README.md | 24 ++++++++----- contrib/largeNbDicts/largeNbDicts.c | 52 ++++++++++++++--------------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/contrib/largeNbDicts/README.md b/contrib/largeNbDicts/README.md index f29bcdfe8e3..010102c904f 100644 --- a/contrib/largeNbDicts/README.md +++ b/contrib/largeNbDicts/README.md @@ -14,12 +14,20 @@ Command line : ``` largeNbDicts [Options] filename(s) -Options : --r : recursively load all files in subdirectories (default: off) --B# : split input into blocks of size # (default: no split) --# : use compression level # (default: 3) --D # : use # as a dictionary (default: create one) --i# : nb benchmark rounds (default: 6) ---nbDicts=# : set nb of dictionaries to # (default: one per block) --h : help (this text) +Options : +-z : benchmark compression (default) +-d : benchmark decompression +-r : recursively load all files in subdirectories (default: off) +-B# : split input into blocks of size # (default: no split) +-# : use compression level # (default: 3) +-D # : use # as a dictionary (default: create one) +-i# : nb benchmark rounds (default: 6) +--nbBlocks=#: use # blocks for bench (default: one per file) +--nbDicts=# : create # dictionaries for bench (default: one per block) +-h : help (this text) + +Advanced Options (see zstd.h for documentation) : +--dedicated-dict-search +--dict-content-type=# +--dict-attach-pref=# ``` diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index ddbb3e081be..ae5383d25cb 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -39,6 +39,7 @@ #define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */ #define DICTSIZE (4 KB) #define CLEVEL_DEFAULT 3 +#define DICT_LOAD_METHOD ZSTD_dlm_byCopy #define BENCH_TIME_DEFAULT_S 6 #define RUN_TIME_DEFAULT_MS 1000 @@ -156,19 +157,6 @@ createDictionaryBuffer(const char* dictionaryName, } } -static ZSTD_CDict* createCDictForDedicatedDictSearch(const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_CCtx_params* params = ZSTD_createCCtxParams(); - ZSTD_CCtxParams_init(params, compressionLevel); - ZSTD_CCtxParams_setParameter(params, ZSTD_c_enableDedicatedDictSearch, 1); - ZSTD_CCtxParams_setParameter(params, ZSTD_c_compressionLevel, compressionLevel); - - ZSTD_CDict* cdict = ZSTD_createCDict_advanced2(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, params, ZSTD_defaultCMem); - - ZSTD_freeCCtxParams(params); - return cdict; -} - /*! BMK_loadFiles() : * Loads `buffer`, with content from files listed within `fileNamesTable`. * Fills `buffer` entirely. @@ -461,14 +449,12 @@ static void freeCDictCollection(cdict_collection_t cdictc) } /* returns .buffers=NULL if operation fails */ -static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, int cLevel, int dedicatedDictSearch) +static cdict_collection_t createCDictCollection(const void* dictBuffer, size_t dictSize, size_t nbCDict, ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams) { ZSTD_CDict** const cdicts = malloc(nbCDict * sizeof(ZSTD_CDict*)); if (cdicts==NULL) return kNullCDictCollection; for (size_t dictNb=0; dictNb < nbCDict; dictNb++) { - cdicts[dictNb] = dedicatedDictSearch ? - createCDictForDedicatedDictSearch(dictBuffer, dictSize, cLevel) : - ZSTD_createCDict(dictBuffer, dictSize, cLevel); + cdicts[dictNb] = ZSTD_createCDict_advanced2(dictBuffer, dictSize, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem); CONTROL(cdicts[dictNb] != NULL); } cdict_collection_t cdictc; @@ -735,7 +721,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, size_t blockSize, int clevel, unsigned nbDictMax, unsigned nbBlocks, unsigned nbRounds, int benchCompression, - int dedicatedDictSearch) + ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams) { int result = 0; @@ -786,13 +772,11 @@ int bench(const char** fileNameTable, unsigned nbFiles, /* dictionary determination */ buffer_t const dictBuffer = createDictionaryBuffer(dictionary, srcs.buffer.ptr, - srcs.slices.capacities, srcs.slices.nbSlices, + srcSlices.capacities, srcSlices.nbSlices, DICTSIZE); CONTROL(dictBuffer.ptr != NULL); - ZSTD_CDict* const cdict = dedicatedDictSearch ? - createCDictForDedicatedDictSearch(dictBuffer.ptr, dictBuffer.size, clevel) : - ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced2(dictBuffer.ptr, dictBuffer.size, DICT_LOAD_METHOD, dictContentType, cctxParams, ZSTD_defaultCMem); CONTROL(cdict != NULL); size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel); @@ -815,14 +799,14 @@ int bench(const char** fileNameTable, unsigned nbFiles, unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks; - cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, clevel, dedicatedDictSearch); + cdict_collection_t const cdictionaries = createCDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts, dictContentType, cctxParams); CONTROL(cdictionaries.cdicts != NULL); ddict_collection_t const ddictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts); CONTROL(ddictionaries.ddicts != NULL); if (benchCompression) { - size_t const dictMem = ZSTD_estimateCDictSize(dictBuffer.size, ZSTD_dlm_byCopy); + size_t const dictMem = ZSTD_estimateCDictSize(dictBuffer.size, DICT_LOAD_METHOD); size_t const allDictMem = dictMem * nbDicts; DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n", nbDicts, (double)allDictMem / (1 MB)); @@ -836,7 +820,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, freeBufferCollection(resultCollection); } else { - size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy); + size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, DICT_LOAD_METHOD); size_t const allDictMem = dictMem * nbDicts; DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n", nbDicts, (double)allDictMem / (1 MB)); @@ -927,6 +911,11 @@ int usage(const char* exeName) DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n"); DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n"); DISPLAY ("-h : help (this text) \n"); + DISPLAY (" \n"); + DISPLAY ("Advanced Options (see zstd.h for documentation) : \n"); + DISPLAY ("--dedicated-dict-search\n"); + DISPLAY ("--dict-content-type=#\n"); + DISPLAY ("--dict-attach-pref=#\n"); return 0; } @@ -956,6 +945,8 @@ int main (int argc, const char** argv) size_t blockSize = BLOCKSIZE_DEFAULT; unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */ unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */ + ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; + ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach; for (int argNb = 1; argNb < argc ; argNb++) { const char* argument = argv[argNb]; @@ -972,6 +963,8 @@ int main (int argc, const char** argv) if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--clevel=")) { cLevel = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; } + if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; } /* anything that's not a command is a filename */ nameTable[nameIdx++] = argument; @@ -989,10 +982,17 @@ int main (int argc, const char** argv) nameTable = NULL; /* UTIL_createFileNamesTable() takes ownership of nameTable */ } - int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dedicatedDictSearch); + ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams(); + ZSTD_CCtxParams_init(cctxParams, cLevel); + ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch); + ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0); + ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref); + + int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams); UTIL_freeFileNamesTable(filenameTable); free(nameTable); + ZSTD_freeCCtxParams(cctxParams); return result; }