Skip to content

Commit bce0382

Browse files
authored
Bugfixes for the External Matchfinder API (#3433)
* external matchfinder bugfixes + tests * small doc fix
1 parent dc1c6cc commit bce0382

File tree

3 files changed

+88
-21
lines changed

3 files changed

+88
-21
lines changed

lib/compress/zstd_compress.c

+23-12
Original file line numberDiff line numberDiff line change
@@ -1599,7 +1599,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
15991599

16001600
size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
16011601
size_t const externalSeqSpace = useExternalMatchFinder
1602-
? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
1602+
? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
16031603
: 0;
16041604

16051605
size_t const neededSpace =
@@ -3158,7 +3158,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
31583158
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
31593159
zc->appliedParams.useRowMatchFinder,
31603160
dictMode);
3161-
assert(zc->externalMatchCtx.mFinder == NULL);
31623161
ms->ldmSeqStore = NULL;
31633162
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
31643163
}
@@ -5945,6 +5944,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
59455944
params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize);
59465945

59475946
#ifdef ZSTD_MULTITHREAD
5947+
/* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
5948+
RETURN_ERROR_IF(
5949+
params.useExternalMatchFinder == 1 && params.nbWorkers >= 1,
5950+
parameter_combination_unsupported,
5951+
"External matchfinder isn't supported with nbWorkers >= 1"
5952+
);
5953+
59485954
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
59495955
params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
59505956
}
@@ -6774,14 +6780,19 @@ void ZSTD_registerExternalMatchFinder(
67746780
ZSTD_CCtx* zc, void* mState,
67756781
ZSTD_externalMatchFinder_F* mFinder
67766782
) {
6777-
ZSTD_externalMatchCtx emctx = {
6778-
mState,
6779-
mFinder,
6780-
6781-
/* seqBuffer is allocated later (from the cwskp) */
6782-
NULL, /* seqBuffer */
6783-
0 /* seqBufferCapacity */
6784-
};
6785-
zc->externalMatchCtx = emctx;
6786-
zc->requestedParams.useExternalMatchFinder = 1;
6783+
if (mFinder != NULL) {
6784+
ZSTD_externalMatchCtx emctx = {
6785+
mState,
6786+
mFinder,
6787+
6788+
/* seqBuffer is allocated later (from the cwskp) */
6789+
NULL, /* seqBuffer */
6790+
0 /* seqBufferCapacity */
6791+
};
6792+
zc->externalMatchCtx = emctx;
6793+
zc->requestedParams.useExternalMatchFinder = 1;
6794+
} else {
6795+
ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
6796+
zc->requestedParams.useExternalMatchFinder = 0;
6797+
}
67876798
}

lib/zstd.h

+18-3
Original file line numberDiff line numberDiff line change
@@ -2840,8 +2840,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc
28402840
* externalMatchState.
28412841
*
28422842
* *** LIMITATIONS ***
2843-
* External matchfinders are compatible with all zstd compression APIs. There are
2844-
* only two limitations.
2843+
* External matchfinders are compatible with all zstd compression APIs which respect
2844+
* advanced parameters. However, there are three limitations:
28452845
*
28462846
* First, the ZSTD_c_enableLongDistanceMatching cParam is not supported.
28472847
* COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an
@@ -2863,7 +2863,11 @@ ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* bloc
28632863
* APIs, work with the external matchfinder, but the external matchfinder won't
28642864
* receive any history from the previous block. Each block is an independent chunk.
28652865
*
2866-
* Long-term, we plan to overcome both limitations. There is no technical blocker to
2866+
* Third, multi-threading within a single compression is not supported. In other words,
2867+
* COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external matchfinder is registered.
2868+
* Multi-threading across compressions is fine: simply create one CCtx per thread.
2869+
*
2870+
* Long-term, we plan to overcome all three limitations. There is no technical blocker to
28672871
* overcoming them. It is purely a question of engineering effort.
28682872
*/
28692873

@@ -2886,6 +2890,17 @@ typedef size_t ZSTD_externalMatchFinder_F (
28862890
* compressions. It will remain set until the user explicitly resets compression
28872891
* parameters.
28882892
*
2893+
* External matchfinder registration is considered to be an "advanced parameter",
2894+
* part of the "advanced API". This means it will only have an effect on
2895+
* compression APIs which respect advanced parameters, such as compress2() and
2896+
* compressStream(). Older compression APIs such as compressCCtx(), which predate
2897+
* the introduction of "advanced parameters", will ignore any external matchfinder
2898+
* setting.
2899+
*
2900+
* The external matchfinder can be "cleared" by registering a NULL external
2901+
* matchfinder function pointer. This removes all limitations described above in
2902+
* the "LIMITATIONS" section of the API docs.
2903+
*
28892904
* The user is strongly encouraged to read the full API documentation (above)
28902905
* before calling this function. */
28912906
ZSTDLIB_STATIC_API void

tests/zstreamtest.c

+47-6
Original file line numberDiff line numberDiff line change
@@ -1846,15 +1846,11 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
18461846

18471847
CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed");
18481848

1849-
ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
1849+
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
18501850

18511851
/* Reference external matchfinder outside the test loop to
18521852
* check that the reference is preserved across compressions */
1853-
ZSTD_registerExternalMatchFinder(
1854-
zc,
1855-
&externalMatchState,
1856-
zstreamExternalMatchFinder
1857-
);
1853+
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
18581854

18591855
for (enableFallback = 0; enableFallback < 1; enableFallback++) {
18601856
size_t testCaseId;
@@ -1916,11 +1912,56 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
19161912
}
19171913

19181914
/* Test that reset clears the external matchfinder */
1915+
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
1916+
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
1917+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
1918+
CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
1919+
1920+
/* Test that registering mFinder == NULL clears the external matchfinder */
19191921
ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
1922+
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
19201923
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
19211924
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
1925+
ZSTD_registerExternalMatchFinder(zc, NULL, NULL); /* clear the external matchfinder */
19221926
CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
19231927

1928+
/* Test that external matchfinder doesn't interact with older APIs */
1929+
ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
1930+
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
1931+
externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder is used */
1932+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
1933+
CHECK_Z(ZSTD_compressCCtx(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize, 3));
1934+
1935+
/* Test that compression returns the correct error with LDM */
1936+
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
1937+
{
1938+
size_t res;
1939+
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
1940+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable));
1941+
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
1942+
CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
1943+
CHECK(
1944+
ZSTD_getErrorCode(res) != ZSTD_error_parameter_combination_unsupported,
1945+
"EMF: Wrong error code: %s", ZSTD_getErrorName(res)
1946+
);
1947+
}
1948+
1949+
#ifdef ZSTD_MULTITHREAD
1950+
/* Test that compression returns the correct error with nbWorkers > 0 */
1951+
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
1952+
{
1953+
size_t res;
1954+
ZSTD_registerExternalMatchFinder(zc, &externalMatchState, zstreamExternalMatchFinder);
1955+
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_nbWorkers, 1));
1956+
res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
1957+
CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
1958+
CHECK(
1959+
ZSTD_getErrorCode(res) != ZSTD_error_parameter_combination_unsupported,
1960+
"EMF: Wrong error code: %s", ZSTD_getErrorName(res)
1961+
);
1962+
}
1963+
#endif
1964+
19241965
free(dstBuf);
19251966
free(checkBuf);
19261967
}

0 commit comments

Comments
 (0)