From e0c4863c5c81c7aecdb7e1560e081346747a495b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Mon, 13 Jun 2022 17:23:33 -0700 Subject: [PATCH 1/4] largeNbDicts bugfix + improvements --- contrib/largeNbDicts/largeNbDicts.c | 30 +++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index ae5383d25cb..034753d3c9f 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -654,7 +654,8 @@ static int benchMem(slice_collection_t dstBlocks, slice_collection_t srcBlocks, ddict_collection_t ddictionaries, cdict_collection_t cdictionaries, - unsigned nbRounds, int benchCompression) + unsigned nbRounds, int benchCompression, + const char* exeName) { assert(dstBlocks.nbSlices == srcBlocks.nbSlices); @@ -703,6 +704,22 @@ static int benchMem(slice_collection_t dstBlocks, } DISPLAY("\n"); + char* csvFileName = malloc(strlen(exeName) + 5); + strcpy(csvFileName, exeName); + strcat(csvFileName, ".csv"); + FILE* csvFile = fopen(csvFileName, "r"); + if (!csvFile) { + csvFile = fopen(csvFileName, "wt"); + assert(csvFile); + fprintf(csvFile, "%s\n", exeName); + } else { + csvFile = fopen(csvFileName, "at"); + assert(csvFile); + } + fprintf(csvFile, "%.1f\n", bestSpeed); + fclose(csvFile); + free(csvFileName); + freeDecompressInstructions(di); freeCompressInstructions(ci); BMK_freeTimedFnState(benchState); @@ -721,7 +738,8 @@ int bench(const char** fileNameTable, unsigned nbFiles, size_t blockSize, int clevel, unsigned nbDictMax, unsigned nbBlocks, unsigned nbRounds, int benchCompression, - ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams) + ZSTD_dictContentType_e dictContentType, ZSTD_CCtx_params* cctxParams, + const char* exeName) { int result = 0; @@ -806,7 +824,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, CONTROL(ddictionaries.ddicts != NULL); if (benchCompression) { - size_t const dictMem = ZSTD_estimateCDictSize(dictBuffer.size, DICT_LOAD_METHOD); + size_t const dictMem = ZSTD_sizeof_CDict(cdictionaries.cdicts[0]); size_t const allDictMem = dictMem * nbDicts; DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n", nbDicts, (double)allDictMem / (1 MB)); @@ -816,7 +834,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices); CONTROL(resultCollection.buffer.ptr != NULL); - result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression); + result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName); freeBufferCollection(resultCollection); } else { @@ -830,7 +848,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices); CONTROL(resultCollection.buffer.ptr != NULL); - result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression); + result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName); freeBufferCollection(resultCollection); } @@ -988,7 +1006,7 @@ int main (int argc, const char** argv) ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref); - int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams); + int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName); UTIL_freeFileNamesTable(filenameTable); free(nameTable); From f7ebbcd0cc090d8012c906a009521558f70245a7 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 14 Jun 2022 14:52:51 -0700 Subject: [PATCH 2/4] Support advanced API so forceCopy/forceAttach works properly --- contrib/largeNbDicts/largeNbDicts.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index 034753d3c9f..9be059a6e2f 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -577,11 +577,12 @@ typedef struct { cdict_collection_t dictionaries; } compressInstructions; -compressInstructions createCompressInstructions(cdict_collection_t dictionaries) +compressInstructions createCompressInstructions(cdict_collection_t dictionaries, ZSTD_CCtx_params* cctxParams) { compressInstructions ci; ci.cctx = ZSTD_createCCtx(); CONTROL(ci.cctx != NULL); + ZSTD_CCtx_setParametersUsingCCtxParams(ci.cctx, cctxParams); ci.nbDicts = dictionaries.nbCDict; ci.dictNb = 0; ci.dictionaries = dictionaries; @@ -622,10 +623,10 @@ size_t compress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, compressInstructions* const ci = (compressInstructions*) payload; (void)dstCapacity; - ZSTD_compress_usingCDict(ci->cctx, - dst, srcSize, - src, srcSize, - ci->dictionaries.cdicts[ci->dictNb]); + ZSTD_CCtx_refCDict(ci->cctx, ci->dictionaries.cdicts[ci->dictNb]); + ZSTD_compress2(ci->cctx, + dst, srcSize, + src, srcSize); ci->dictNb = ci->dictNb + 1; if (ci->dictNb >= ci->nbDicts) ci->dictNb = 0; @@ -655,9 +656,10 @@ static int benchMem(slice_collection_t dstBlocks, ddict_collection_t ddictionaries, cdict_collection_t cdictionaries, unsigned nbRounds, int benchCompression, - const char* exeName) + const char* exeName, ZSTD_CCtx_params* cctxParams) { assert(dstBlocks.nbSlices == srcBlocks.nbSlices); + if (benchCompression) assert(cctxParams); unsigned const ms_per_round = RUN_TIME_DEFAULT_MS; unsigned const total_time_ms = nbRounds * ms_per_round; @@ -668,7 +670,7 @@ static int benchMem(slice_collection_t dstBlocks, BMK_createTimedFnState(total_time_ms, ms_per_round); decompressInstructions di = createDecompressInstructions(ddictionaries); - compressInstructions ci = createCompressInstructions(cdictionaries); + compressInstructions ci = createCompressInstructions(cdictionaries, cctxParams); void* payload = benchCompression ? (void*)&ci : (void*)&di; BMK_benchParams_t const bp = { .benchFn = benchCompression ? compress : decompress, @@ -834,7 +836,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, buffer_collection_t resultCollection = createBufferCollection_fromSliceCollection(srcSlices); CONTROL(resultCollection.buffer.ptr != NULL); - result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName); + result = benchMem(dstSlices, resultCollection.slices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName, cctxParams); freeBufferCollection(resultCollection); } else { @@ -848,7 +850,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices); CONTROL(resultCollection.buffer.ptr != NULL); - result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName); + result = benchMem(resultCollection.slices, dstSlices, ddictionaries, cdictionaries, nbRounds, benchCompression, exeName, NULL); freeBufferCollection(resultCollection); } From 2bbdc9f40e6663311028dec241cdc6ab3b5c7e33 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 14 Jun 2022 14:57:54 -0700 Subject: [PATCH 3/4] Fix FILE handle leak --- contrib/largeNbDicts/largeNbDicts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index 9be059a6e2f..a87220b7920 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -715,6 +715,7 @@ static int benchMem(slice_collection_t dstBlocks, assert(csvFile); fprintf(csvFile, "%s\n", exeName); } else { + free(csvFileName); csvFile = fopen(csvFileName, "at"); assert(csvFile); } From 24364057bcb6b29a65c6ab91a3da6a32add141eb Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 14 Jun 2022 19:18:49 -0400 Subject: [PATCH 4/4] fix typo Co-authored-by: Nick Terrell --- contrib/largeNbDicts/largeNbDicts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index a87220b7920..25b154c309f 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -715,7 +715,7 @@ static int benchMem(slice_collection_t dstBlocks, assert(csvFile); fprintf(csvFile, "%s\n", exeName); } else { - free(csvFileName); + fclose(csvFile); csvFile = fopen(csvFileName, "at"); assert(csvFile); }