Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor offset+repcode sumtype #2962

Merged
merged 15 commits into from
Dec 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions lib/common/zstd_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ extern "C" {
#define ZSTD_OPT_NUM (1<<12)

#define ZSTD_REP_NUM 3 /* number of repcodes */
#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };

#define KB *(1 <<10)
Expand Down Expand Up @@ -285,7 +284,7 @@ typedef enum {
* Private declarations
*********************************************/
typedef struct seqDef_s {
U32 offset; /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
U16 litLength;
U16 mlBase; /* mlBase == matchLength - MINMATCH */
} seqDef;
Expand Down
200 changes: 109 additions & 91 deletions lib/compress/zstd_compress.c

Large diffs are not rendered by default.

94 changes: 62 additions & 32 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
*********************************/

typedef struct {
U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */
U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
U32 len; /* Raw length of match */
} ZSTD_match_t;

Expand Down Expand Up @@ -497,31 +497,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
}

typedef struct repcodes_s {
U32 rep[3];
} repcodes_t;

MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
{
repcodes_t newReps;
if (offset >= ZSTD_REP_NUM) { /* full offset */
newReps.rep[2] = rep[1];
newReps.rep[1] = rep[0];
newReps.rep[0] = offset - ZSTD_REP_MOVE;
} else { /* repcode */
U32 const repCode = offset + ll0;
if (repCode > 0) { /* note : if repCode==0, no change */
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
newReps.rep[1] = rep[0];
newReps.rep[0] = currentOffset;
} else { /* repCode == 0 */
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
}
}
return newReps;
}

/* ZSTD_cParam_withinBounds:
* @return 1 if value is within cParam bounds,
* 0 otherwise */
Expand Down Expand Up @@ -590,7 +565,9 @@ MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxPa
* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
* large copies.
*/
static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
static void
ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
{
assert(iend > ilimit_w);
if (ip <= ilimit_w) {
ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
Expand All @@ -600,14 +577,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
while (ip < iend) *op++ = *ip++;
}

#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
#define STORE_REPCODE_1 STORE_REPCODE(1)
#define STORE_REPCODE_2 STORE_REPCODE(2)
#define STORE_REPCODE_3 STORE_REPCODE(3)
#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this pattern means evaluating the argument must not have any side effects.

Copy link
Contributor Author

@Cyan4973 Cyan4973 Dec 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is a known side effect of macros.
Indeed, these macros shall only be used with scalar variables, not invoking function calls.
And it happens this condition is well respected, throughout the whole code base.

For more defensive properties, a possible alternative would be to swap these macros with inline functions,
although this second solution introduces its own set of potential casting issues.

#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE)
#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

STORE_... and STORED_... are easily confused. Better antonyms of STORE_... might be LOAD_... or RETRIEVE_... or DECODE_....

Copy link
Contributor Author

@Cyan4973 Cyan4973 Dec 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.
Initially, only the STORE_*() macros were supposed to exist.
The other ones (STORED_*()) were added later, when realizing that the numeric format of the sum type was relied upon in various parts of the code base. So this was a quick way to bring these usages under control. And then, the nb of STORED_*() macros kept increasing...

It's more a temporary situation.
In the following PR, macro names will be updated significantly.

#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */
#define STORED_TO_OFFBASE(o) ((o)+1)
#define OFFBASE_TO_STORED(o) ((o)-1)

/*! ZSTD_storeSeq() :
* Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
* `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
* @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
* @matchLength : must be >= MINMATCH
* Allowed to overread literals up to litLimit.
*/
HINT_INLINE UNUSED_ATTR
void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t matchLength)
HINT_INLINE UNUSED_ATTR void
ZSTD_storeSeq(seqStore_t* seqStorePtr,
size_t litLength, const BYTE* literals, const BYTE* litLimit,
U32 offBase_minus1,
size_t matchLength)
{
BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
BYTE const* const litEnd = literals + litLength;
Expand All @@ -616,7 +609,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
{ U32 const pos = (U32)((const BYTE*)literals - g_start);
DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
pos, (U32)litLength, (U32)matchLength, (U32)offCode);
pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
}
#endif
assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
Expand Down Expand Up @@ -647,7 +640,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
seqStorePtr->sequences[0].litLength = (U16)litLength;

/* match offset */
seqStorePtr->sequences[0].offset = offCode + 1;
seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);

/* match Length */
assert(matchLength >= MINMATCH);
Expand All @@ -663,6 +656,43 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
seqStorePtr->sequences++;
}

/* ZSTD_updateRep() :
* updates in-place @rep (array of repeat offsets)
* @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
*/
MEM_STATIC void
ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
{
if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */
rep[2] = rep[1];
rep[1] = rep[0];
rep[0] = STORED_OFFSET(offBase_minus1);
} else { /* repcode */
U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
if (repCode > 0) { /* note : if repCode==0, no change */
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
rep[2] = (repCode >= 2) ? rep[1] : rep[2];
rep[1] = rep[0];
rep[0] = currentOffset;
} else { /* repCode == 0 */
/* nothing to do */
}
}
}

typedef struct repcodes_s {
U32 rep[3];
} repcodes_t;

MEM_STATIC repcodes_t
ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
{
repcodes_t newReps;
ZSTD_memcpy(&newReps, rep, sizeof(newReps));
ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
return newReps;
}


/*-*************************************
* Match length counter
Expand Down
14 changes: 7 additions & 7 deletions lib/compress/zstd_compress_sequences.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,13 +319,13 @@ ZSTD_encodeSequences_body(
U32 const ofBits = ofCodeTable[nbSeq-1];
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
BIT_flushBits(&blockStream);
}
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
ofBits - extraBits);
} else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
}
BIT_flushBits(&blockStream);

Expand All @@ -340,7 +340,7 @@ ZSTD_encodeSequences_body(
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
(unsigned)sequences[n].litLength,
(unsigned)sequences[n].mlBase + MINMATCH,
(unsigned)sequences[n].offset);
(unsigned)sequences[n].offBase);
/* 32b*/ /* 64b*/
/* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
Expand All @@ -356,13 +356,13 @@ ZSTD_encodeSequences_body(
if (longOffsets) {
unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
BIT_flushBits(&blockStream); /* (7)*/
}
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
ofBits - extraBits); /* 31 */
} else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */
}
BIT_flushBits(&blockStream); /* (7)*/
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
Expand Down
2 changes: 1 addition & 1 deletion lib/compress/zstd_compress_superblock.c
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
repcodes_t rep;
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
for (seq = sstart; seq < sp; ++seq) {
rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
}
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
}
Expand Down
20 changes: 10 additions & 10 deletions lib/compress/zstd_double_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
goto _match_stored;
}

Expand Down Expand Up @@ -217,7 +217,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
hashLong[hl1] = (U32)(ip1 - base);
}

ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);

_match_stored:
/* match found */
Expand All @@ -243,7 +243,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength);
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
Expand Down Expand Up @@ -328,7 +328,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
goto _match_stored;
}

Expand Down Expand Up @@ -419,7 +419,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
offset_2 = offset_1;
offset_1 = offset;

ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);

_match_stored:
/* match found */
Expand Down Expand Up @@ -448,7 +448,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2);
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
Expand Down Expand Up @@ -585,7 +585,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
Expand All @@ -596,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);

} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
Expand All @@ -621,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);

} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
Expand Down Expand Up @@ -653,7 +653,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2);
ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
Expand Down
Loading