Skip to content

Commit

Permalink
[breaking/format] Remove vlen from entry header (#945)
Browse files Browse the repository at this point in the history
This commit removes value length from the entry header stored in each SST.
We don't need vlen to store the length of the value. We can find it by
using the entry offsets stored in the footer of the blocks.

Entries in the table are of the form
+-----------------+-------+------+--------+-----------------+------+
| Klen1 (Point A) | Plen1 | Key1 | Value1 | Klen2 (Point B) | .... |
+-----------------+-------+------+--------+-----------------+------+

And we have the entry index at the end of each block.
+---------------------------+--------------------------+
| Entry1 offset 1 (Point A) | Entry Offset 1 (Point B) |
+---------------------------+--------------------------+

Using the entry index and current position in the buffer, we can find
the length of the value.
  • Loading branch information
Ibrahim Jarif authored Aug 7, 2019
1 parent e843141 commit 88d5a3c
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 15 deletions.
2 changes: 1 addition & 1 deletion manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ func (mf *manifestFile) addChanges(changesParam []*pb.ManifestChange) error {
var magicText = [4]byte{'B', 'd', 'g', 'r'}

// The magic version number.
const magicVersion = 6
const magicVersion = 7

func helpRewrite(dir string, m *Manifest) (*os.File, int, error) {
rewritePath := filepath.Join(dir, manifestRewriteFilename)
Expand Down
8 changes: 2 additions & 6 deletions table/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,23 @@ func newBuffer(sz int) *bytes.Buffer {
type header struct {
plen uint16 // Overlap with base key.
klen uint16 // Length of the diff.
vlen uint32 // Length of value.
}

// Encode encodes the header.
func (h header) Encode(b []byte) {
binary.BigEndian.PutUint16(b[0:2], h.plen)
binary.BigEndian.PutUint16(b[2:4], h.klen)
binary.BigEndian.PutUint32(b[4:8], h.vlen)
}

// Decode decodes the header.
func (h *header) Decode(buf []byte) int {
h.plen = binary.BigEndian.Uint16(buf[0:2])
h.klen = binary.BigEndian.Uint16(buf[2:4])
h.vlen = binary.BigEndian.Uint32(buf[4:8])
return h.Size()
}

// Size returns size of the header. Currently it's just a constant.
func (h header) Size() int { return 8 }
func (h header) Size() int { return 4 }

// Builder is used in building a table.
type Builder struct {
Expand Down Expand Up @@ -117,15 +114,14 @@ func (b *Builder) addHelper(key []byte, v y.ValueStruct) {
h := header{
plen: uint16(len(key) - len(diffKey)),
klen: uint16(len(diffKey)),
vlen: uint32(v.EncodedSize()),
}

// store current entry's offset
y.AssertTrue(uint32(b.buf.Len()) < math.MaxUint32)
b.entryOffsets = append(b.entryOffsets, uint32(b.buf.Len())-b.baseOffset)

// Layout: header, diffKey, value.
var hbuf [8]byte
var hbuf [4]byte
h.Encode(hbuf[:])
b.buf.Write(hbuf[:])
b.buf.Write(diffKey) // We only need to store the key difference.
Expand Down
27 changes: 19 additions & 8 deletions table/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ type blockIterator struct {
func (itr *blockIterator) Reset() {
itr.pos = 0
itr.err = nil
itr.baseKey = []byte{}
itr.key = []byte{}
itr.val = []byte{}
itr.baseKey = itr.baseKey[:0]
itr.key = itr.key[:0]
itr.val = itr.val[:0]
itr.init = false
itr.currentIdx = -1
}
Expand Down Expand Up @@ -159,13 +159,24 @@ func (itr *blockIterator) parseKV(h header) {
copy(itr.key[h.plen:], itr.data[itr.pos:itr.pos+uint32(h.klen)])
itr.pos += uint32(h.klen)

if itr.pos+uint32(h.vlen) > uint32(len(itr.data)) {
itr.err = errors.Errorf("Value exceeded size of block: %d %d %d %d %v",
itr.pos, h.klen, h.vlen, len(itr.data), h)
var valEndOffset uint32
// We're at the last entry in the block.
if itr.currentIdx == itr.numEntries-1 {
valEndOffset = uint32(itr.entriesIndexStart)
} else {
// Get starting offset of the next entry which is the end of the current entry.
valEndOffset = itr.getOffset(itr.currentIdx + 1)
}

if valEndOffset > uint32(len(itr.data)) {
itr.err = errors.Errorf("Value endoffset exceeded size of block. "+
"Pos:%d Len:%d EndOffset:%d Header:%v", itr.pos, len(itr.data), valEndOffset, h)
return
}
itr.val = y.SafeCopy(itr.val, itr.data[itr.pos:itr.pos+uint32(h.vlen)])
itr.pos += uint32(h.vlen)
// TODO (ibrahim): Can we avoid this copy?
itr.val = y.SafeCopy(itr.val, itr.data[itr.pos:valEndOffset])
// Set pos to the end of current entry.
itr.pos = valEndOffset
}

func (itr *blockIterator) Next() {
Expand Down

0 comments on commit 88d5a3c

Please sign in to comment.