Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Omit empty next slab ID in encoded array data slab #339

Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Omit empty next slab ID in encoded array data slab
Currently, we omit empty next slab ID in encoded root data slabs
because next slab ID is always empty in root data slabs.

However, next slab ID is also empty for non-root data slabs if
the non-root data slab is the last data slab.

This commit sets hasNextSlabID flag during encoding and only encodes
non-empty next slab ID for array data slab.

This change saves 16 bytes for the last non-root data slabs.  Also,
we don't special case the omission of next slab ID in root slabs.

NOTE: omission of empty next slab ID doesn't affect slab size
computation which is used for slab operations, such as splitting and
merging.  This commit is an optimization during slab encoding.
fxamacker committed Sep 5, 2023
commit 23295495f86485f00f12d89eaea864bb0a4d2dce
19 changes: 13 additions & 6 deletions array.go
Original file line number Diff line number Diff line change
@@ -417,16 +417,17 @@ func newArrayDataSlabFromDataV1(
var extraData *ArrayExtraData
var next SlabID

// Decode header
// Decode extra data
if h.isRoot() {
// Decode extra data
extraData, data, err = newArrayExtraDataFromData(data, decMode, decodeTypeInfo)
if err != nil {
// err is categorized already by newArrayExtraDataFromData.
return nil, err
}
} else {
// Decode next slab ID
}

// Decode next slab ID
if h.hasNextSlabID() {
next, err = NewSlabIDFromRawBytes(data)
if err != nil {
// error returned from NewSlabIDFromRawBytes is categorized already.
@@ -516,6 +517,10 @@ func (a *ArrayDataSlab) Encode(enc *Encoder) error {
h.setHasPointers()
}

if a.next != SlabIDUndefined {
h.setHasNextSlabID()
}

if a.extraData != nil {
h.setRoot()
}
@@ -534,8 +539,10 @@ func (a *ArrayDataSlab) Encode(enc *Encoder) error {
// err is already categorized by ArrayExtraData.Encode().
return err
}
} else {
// Encode next slab ID to scratch
}

// Encode next slab ID
if a.next != SlabIDUndefined {
n, err := a.next.ToRawBytes(enc.Scratch[:])
if err != nil {
// Don't need to wrap because err is already categorized by SlabID.ToRawBytes().
75 changes: 49 additions & 26 deletions array_debug.go
Original file line number Diff line number Diff line change
@@ -446,17 +446,15 @@ func validArraySlabSerialization(
}

// Extra check: encoded data size == header.size
encodedExtraDataSize, err := getEncodedArrayExtraDataSize(slab.ExtraData(), cborEncMode)
encodedSlabSize, err := computeSlabSize(data)
if err != nil {
// Don't need to wrap error as external error because err is already categorized by getEncodedArrayExtraDataSize().
// Don't need to wrap error as external error because err is already categorized by computeSlabSize().
return err
}

// Need to exclude extra data size from encoded data size.
encodedSlabSize := uint32(len(data) - encodedExtraDataSize)
if slab.Header().size != encodedSlabSize {
return NewFatalError(fmt.Errorf("slab %d encoded size %d != header.size %d (encoded extra data size %d)",
id, encodedSlabSize, slab.Header().size, encodedExtraDataSize))
if slab.Header().size != uint32(encodedSlabSize) {
return NewFatalError(fmt.Errorf("slab %d encoded size %d != header.size %d",
id, encodedSlabSize, slab.Header().size))
}

// Compare encoded data of original slab with encoded data of decoded slab
@@ -640,25 +638,6 @@ func arrayExtraDataEqual(expected, actual *ArrayExtraData) error {
return nil
}

func getEncodedArrayExtraDataSize(extraData *ArrayExtraData, cborEncMode cbor.EncMode) (int, error) {
if extraData == nil {
return 0, nil
}

var buf bytes.Buffer
enc := NewEncoder(&buf, cborEncMode)

// Normally the flag shouldn't be 0. But in this case we just need the encoded data size
// so the content of the flag doesn't matter.
err := extraData.Encode(enc)
if err != nil {
// Don't need to wrap error as external error because err is already categorized by ArrayExtraData.Encode().
return 0, err
}

return len(buf.Bytes()), nil
}

func ValidValueSerialization(
value Value,
cborDecMode cbor.DecMode,
@@ -690,3 +669,47 @@ func ValidValueSerialization(
}
return nil
}

func computeSlabSize(data []byte) (int, error) {
if len(data) < versionAndFlagSize {
return 0, NewDecodingError(fmt.Errorf("data is too short"))
}

h, err := newHeadFromData(data[:versionAndFlagSize])
if err != nil {
return 0, NewDecodingError(err)
}

slabExtraDataSize, err := getExtraDataSize(h, data[versionAndFlagSize:])
if err != nil {
return 0, err
}

// Computed slab size (slab header size):
// - excludes slab extra data size
// - adds next slab ID for non-root data slab if not encoded
size := len(data) - slabExtraDataSize

isDataSlab := h.getSlabArrayType() == slabArrayData ||
h.getSlabMapType() == slabMapData ||
h.getSlabMapType() == slabMapCollisionGroup

if !h.isRoot() && isDataSlab && !h.hasNextSlabID() {
size += slabIDSize
}

return size, nil
}

func getExtraDataSize(h head, data []byte) (int, error) {
if h.isRoot() {
dec := cbor.NewStreamDecoder(bytes.NewBuffer(data))
b, err := dec.DecodeRawBytes()
if err != nil {
return 0, NewDecodingError(err)
}
return len(b), nil
}

return 0, nil
}
4 changes: 1 addition & 3 deletions array_test.go
Original file line number Diff line number Diff line change
@@ -1899,7 +1899,7 @@ func TestArrayEncodeDecode(t *testing.T) {
// (data slab) next: 3, data: [aaaaaaaaaaaaaaaaaaaaaa ... aaaaaaaaaaaaaaaaaaaaaa]
id2: {
// version
0x10,
0x12,
// array data slab flag
0x00,
// next slab id
@@ -1924,8 +1924,6 @@ func TestArrayEncodeDecode(t *testing.T) {
0x10,
// array data slab flag
0x40,
// next slab id
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// CBOR encoded array head (fixed size 3 byte)
0x99, 0x00, 0x0b,
// CBOR encoded array elements
4 changes: 1 addition & 3 deletions storage_test.go
Original file line number Diff line number Diff line change
@@ -927,7 +927,7 @@ func TestPersistentStorageSlabIterator(t *testing.T) {
// (data slab) next: 3, data: [aaaaaaaaaaaaaaaaaaaaaa ... aaaaaaaaaaaaaaaaaaaaaa]
id2: {
// version
0x10,
0x12,
// array data slab flag
0x00,
// next slab id
@@ -952,8 +952,6 @@ func TestPersistentStorageSlabIterator(t *testing.T) {
0x10,
// array data slab flag
0x40,
// next slab id
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// CBOR encoded array head (fixed size 3 byte)
0x99, 0x00, 0x0b,
// CBOR encoded array elements