Skip to content

Commit 88d5a3c

Browse files
author
Ibrahim Jarif
authored
[breaking/format] Remove vlen from entry header (#945)
This commit removes value length from the entry header stored in each SST. We don't need vlen to store the length of the value. We can find it by using the entry offsets stored in the footer of the blocks. Entries in the table are of the form +-----------------+-------+------+--------+-----------------+------+ | Klen1 (Point A) | Plen1 | Key1 | Value1 | Klen2 (Point B) | .... | +-----------------+-------+------+--------+-----------------+------+ And we have the entry index at the end of each block. +---------------------------+--------------------------+ | Entry1 offset 1 (Point A) | Entry Offset 1 (Point B) | +---------------------------+--------------------------+ Using the entry index and current position in the buffer, we can find the length of the value.
1 parent e843141 commit 88d5a3c

File tree

3 files changed

+22
-15
lines changed

3 files changed

+22
-15
lines changed

manifest.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ func (mf *manifestFile) addChanges(changesParam []*pb.ManifestChange) error {
223223
var magicText = [4]byte{'B', 'd', 'g', 'r'}
224224

225225
// The magic version number.
226-
const magicVersion = 6
226+
const magicVersion = 7
227227

228228
func helpRewrite(dir string, m *Manifest) (*os.File, int, error) {
229229
rewritePath := filepath.Join(dir, manifestRewriteFilename)

table/builder.go

+2-6
Original file line numberDiff line numberDiff line change
@@ -37,26 +37,23 @@ func newBuffer(sz int) *bytes.Buffer {
3737
type header struct {
3838
plen uint16 // Overlap with base key.
3939
klen uint16 // Length of the diff.
40-
vlen uint32 // Length of value.
4140
}
4241

4342
// Encode encodes the header.
4443
func (h header) Encode(b []byte) {
4544
binary.BigEndian.PutUint16(b[0:2], h.plen)
4645
binary.BigEndian.PutUint16(b[2:4], h.klen)
47-
binary.BigEndian.PutUint32(b[4:8], h.vlen)
4846
}
4947

5048
// Decode decodes the header.
5149
func (h *header) Decode(buf []byte) int {
5250
h.plen = binary.BigEndian.Uint16(buf[0:2])
5351
h.klen = binary.BigEndian.Uint16(buf[2:4])
54-
h.vlen = binary.BigEndian.Uint32(buf[4:8])
5552
return h.Size()
5653
}
5754

5855
// Size returns size of the header. Currently it's just a constant.
59-
func (h header) Size() int { return 8 }
56+
func (h header) Size() int { return 4 }
6057

6158
// Builder is used in building a table.
6259
type Builder struct {
@@ -117,15 +114,14 @@ func (b *Builder) addHelper(key []byte, v y.ValueStruct) {
117114
h := header{
118115
plen: uint16(len(key) - len(diffKey)),
119116
klen: uint16(len(diffKey)),
120-
vlen: uint32(v.EncodedSize()),
121117
}
122118

123119
// store current entry's offset
124120
y.AssertTrue(uint32(b.buf.Len()) < math.MaxUint32)
125121
b.entryOffsets = append(b.entryOffsets, uint32(b.buf.Len())-b.baseOffset)
126122

127123
// Layout: header, diffKey, value.
128-
var hbuf [8]byte
124+
var hbuf [4]byte
129125
h.Encode(hbuf[:])
130126
b.buf.Write(hbuf[:])
131127
b.buf.Write(diffKey) // We only need to store the key difference.

table/iterator.go

+19-8
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ type blockIterator struct {
4343
func (itr *blockIterator) Reset() {
4444
itr.pos = 0
4545
itr.err = nil
46-
itr.baseKey = []byte{}
47-
itr.key = []byte{}
48-
itr.val = []byte{}
46+
itr.baseKey = itr.baseKey[:0]
47+
itr.key = itr.key[:0]
48+
itr.val = itr.val[:0]
4949
itr.init = false
5050
itr.currentIdx = -1
5151
}
@@ -159,13 +159,24 @@ func (itr *blockIterator) parseKV(h header) {
159159
copy(itr.key[h.plen:], itr.data[itr.pos:itr.pos+uint32(h.klen)])
160160
itr.pos += uint32(h.klen)
161161

162-
if itr.pos+uint32(h.vlen) > uint32(len(itr.data)) {
163-
itr.err = errors.Errorf("Value exceeded size of block: %d %d %d %d %v",
164-
itr.pos, h.klen, h.vlen, len(itr.data), h)
162+
var valEndOffset uint32
163+
// We're at the last entry in the block.
164+
if itr.currentIdx == itr.numEntries-1 {
165+
valEndOffset = uint32(itr.entriesIndexStart)
166+
} else {
167+
// Get starting offset of the next entry which is the end of the current entry.
168+
valEndOffset = itr.getOffset(itr.currentIdx + 1)
169+
}
170+
171+
if valEndOffset > uint32(len(itr.data)) {
172+
itr.err = errors.Errorf("Value endoffset exceeded size of block. "+
173+
"Pos:%d Len:%d EndOffset:%d Header:%v", itr.pos, len(itr.data), valEndOffset, h)
165174
return
166175
}
167-
itr.val = y.SafeCopy(itr.val, itr.data[itr.pos:itr.pos+uint32(h.vlen)])
168-
itr.pos += uint32(h.vlen)
176+
// TODO (ibrahim): Can we avoid this copy?
177+
itr.val = y.SafeCopy(itr.val, itr.data[itr.pos:valEndOffset])
178+
// Set pos to the end of current entry.
179+
itr.pos = valEndOffset
169180
}
170181

171182
func (itr *blockIterator) Next() {

0 commit comments

Comments
 (0)