Skip to content

Commit

Permalink
Merge pull request #3127 from embg/repcode_history
Browse files Browse the repository at this point in the history
Correct and clarify repcode offset history logic
  • Loading branch information
embg authored May 12, 2022
2 parents 8af64f4 + 3620a0a commit f349d18
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 26 deletions.
19 changes: 11 additions & 8 deletions lib/compress/zstd_double_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
U32 offsetSaved1 = 0, offsetSaved2 = 0;

size_t mLength;
U32 offset;
Expand Down Expand Up @@ -100,8 +100,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const current = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
U32 const maxRep = current - windowLow;
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
}

/* Outer Loop: one iteration per match found and stored */
Expand Down Expand Up @@ -175,9 +175,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
} while (ip1 <= ilimit);

_cleanup:
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;

/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved2;

/* Return the last literals size */
return (size_t)(iend - anchor);
Expand Down Expand Up @@ -275,7 +279,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;

const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
Expand Down Expand Up @@ -461,8 +464,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
} /* while (ip < ilimit) */

/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1;
rep[1] = offset_2;

/* Return the last literals size */
return (size_t)(iend - anchor);
Expand Down
43 changes: 31 additions & 12 deletions lib/compress/zstd_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ ZSTD_compressBlock_fast_noDict_generic(

U32 rep_offset1 = rep[0];
U32 rep_offset2 = rep[1];
U32 offsetSaved = 0;
U32 offsetSaved1 = 0, offsetSaved2 = 0;

size_t hash0; /* hash for ip0 */
size_t hash1; /* hash for ip1 */
Expand All @@ -141,8 +141,8 @@ ZSTD_compressBlock_fast_noDict_generic(
{ U32 const curr = (U32)(ip0 - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
U32 const maxRep = curr - windowLow;
if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
}

/* start each op */
Expand Down Expand Up @@ -281,9 +281,24 @@ ZSTD_compressBlock_fast_noDict_generic(
* However, it seems to be a meaningful performance hit to try to search
* them. So let's not. */

/* When the repcodes are outside of the prefix, we set them to zero before the loop.
* When the offsets are still zero, we need to restore them after the block to have a correct
* repcode history. If only one offset was invalid, it is easy. The tricky case is when both
* offsets were invalid. We need to figure out which offset to refill with.
* - If both offsets are zero they are in the same order.
* - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
* - If only one is zero, we need to decide which offset to restore.
* - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
* - It is impossible for rep_offset2 to be non-zero.
*
* So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
* set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
*/
offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;

/* save reps for next block */
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;

/* Return the last literals size */
return (size_t)(iend - anchor);
Expand Down Expand Up @@ -410,7 +425,6 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;

const ZSTD_matchState_t* const dms = ms->dictMatchState;
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
Expand Down Expand Up @@ -567,8 +581,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(

_cleanup:
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
rep[0] = offset_1;
rep[1] = offset_2;

/* Return the last literals size */
return (size_t)(iend - anchor);
Expand Down Expand Up @@ -625,6 +639,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved1 = 0, offsetSaved2 = 0;

const BYTE* ip0 = istart;
const BYTE* ip1;
Expand Down Expand Up @@ -657,8 +672,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(

{ U32 const curr = (U32)(ip0 - base);
U32 const maxRep = curr - dictStartIndex;
if (offset_2 >= maxRep) offset_2 = 0;
if (offset_1 >= maxRep) offset_1 = 0;
if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
}

/* start each op */
Expand Down Expand Up @@ -780,9 +795,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
* However, it seems to be a meaningful performance hit to try to search
* them. So let's not. */

/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;

/* save reps for next block */
rep[0] = offset_1 ? offset_1 : rep[0];
rep[1] = offset_2 ? offset_2 : rep[1];
rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved2;

/* Return the last literals size */
return (size_t)(iend - anchor);
Expand Down
17 changes: 11 additions & 6 deletions lib/compress/zstd_lazy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1461,7 +1461,8 @@ ZSTD_compressBlock_lazy_generic(
const BYTE* const prefixLowest = base + prefixLowestIndex;

searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, dictMode)->searchMax;
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
U32 offset_1 = rep[0], offset_2 = rep[1];
U32 offsetSaved1 = 0, offsetSaved2 = 0;

const int isDMS = dictMode == ZSTD_dictMatchState;
const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
Expand All @@ -1484,8 +1485,8 @@ ZSTD_compressBlock_lazy_generic(
U32 const curr = (U32)(ip - base);
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
U32 const maxRep = curr - windowLow;
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
}
if (isDxS) {
/* dictMatchState repCode checks don't currently handle repCode == 0
Expand Down Expand Up @@ -1681,9 +1682,13 @@ ZSTD_compressBlock_lazy_generic(
continue; /* faster when present ... (?) */
} } }

/* Save reps for next block */
rep[0] = offset_1 ? offset_1 : savedOffset;
rep[1] = offset_2 ? offset_2 : savedOffset;
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;

/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved1;
rep[1] = offset_2 ? offset_2 : offsetSaved2;

/* Return the last literals size */
return (size_t)(iend - anchor);
Expand Down

0 comments on commit f349d18

Please sign in to comment.