Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use 8 bytes to store int64 components of database keys #107

Merged
merged 3 commits into from
Sep 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Changelog

## Unreleased

BREAKING CHANGES

- Changed internal database key format to store int64 key components in a full 8-byte fixed width.


IMPROVEMENTS

- Database key format avoids use of fmt.Sprintf fmt.Sscanf leading to ~10% speedup in benchmark BenchmarkTreeLoadAndDelete

## 0.10.0

BREAKING CHANGES
Expand Down
144 changes: 144 additions & 0 deletions key_format.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package iavl

import (
"encoding/binary"
"fmt"
)

// Provides a fixed-width lexicographically sortable []byte key format
type KeyFormat struct {
prefix byte
layout []int
length int
}

// Create a []byte key format based on a single byte prefix and fixed width key segments each of whose length is
// specified by by the corresponding element of layout.
//
// For example, to store keys that could index some objects by a version number and their SHA256 hash using the form:
// 'c<version uint64><hash [32]byte>' then you would define the KeyFormat with:
//
// var keyFormat = NewKeyFormat('c', 8, 32)
//
// Then you can create a key with:
//
// func ObjectKey(version uint64, objectBytes []byte) []byte {
// hasher := sha256.New()
// hasher.Sum(nil)
// return keyFormat.Key(version, hasher.Sum(nil))
// }
func NewKeyFormat(prefix byte, layout ...int) *KeyFormat {
// For prefix byte
length := 1
for _, l := range layout {
length += int(l)
}
return &KeyFormat{
prefix: prefix,
layout: layout,
length: length,
}
}

// Format the byte segments into the key format - will panic if the segment lengths do not match the layout.
func (kf *KeyFormat) KeyBytes(segments ...[]byte) []byte {
key := make([]byte, kf.length)
key[0] = kf.prefix
n := 1
for i, s := range segments {
l := kf.layout[i]
if len(s) > l {
panic(fmt.Errorf("length of segment %X provided to KeyFormat.KeyBytes() is longer than the %d bytes "+
"required by layout for segment %d", s, l, i))
}
n += l
// Big endian so pad on left if not given the full width for this segment
copy(key[n-len(s):n], s)
}
return key[:n]
}

// Format the args passed into the key format - will panic if the arguments passed do not match the length
// of the segment to which they correspond. When called with no arguments returns the raw prefix (useful as a start
// element of the entire keys space when sorted lexicographically).
func (kf *KeyFormat) Key(args ...interface{}) []byte {
if len(args) > len(kf.layout) {
panic(fmt.Errorf("KeyFormat.Key() is provided with %d args but format only has %d segments",
len(args), len(kf.layout)))
}
segments := make([][]byte, len(args))
for i, a := range args {
segments[i] = format(a)
}
return kf.KeyBytes(segments...)
}

// Reads out the bytes associated with each segment of the key format from key.
func (kf *KeyFormat) ScanBytes(key []byte) [][]byte {
segments := make([][]byte, len(kf.layout))
n := 1
for i, l := range kf.layout {
n += l
if n > len(key) {
return segments[:i]
}
segments[i] = key[n-l : n]
}
return segments
}

// Extracts the segments into the values pointed to by each of args. Each arg must be a pointer to int64, uint64, or
// []byte, and the width of the args must match layout.
func (kf *KeyFormat) Scan(key []byte, args ...interface{}) {
segments := kf.ScanBytes(key)
if len(args) > len(segments) {
panic(fmt.Errorf("KeyFormat.Scan() is provided with %d args but format only has %d segments in key %X",
len(args), len(segments), key))
}
for i, a := range args {
scan(a, segments[i])
}
}

// Return the prefix as a string.
func (kf *KeyFormat) Prefix() string {
return string([]byte{kf.prefix})
}

func scan(a interface{}, value []byte) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rename this to scanValue. Probably just a matter of preferences.

switch v := a.(type) {
case *int64:
// Negative values will be mapped correctly when read in as uint64 and then type converted
*v = int64(binary.BigEndian.Uint64(value))
case *uint64:
*v = binary.BigEndian.Uint64(value)
case *[]byte:
*v = value
default:
panic(fmt.Errorf("KeyFormat scan() does not support scanning value of type %T: %v", a, a))
}
}

func format(a interface{}) []byte {
switch v := a.(type) {
case uint64:
return formatUint64(v)
case int64:
return formatUint64(uint64(v))
// Provide formatting from int,uint as a convenience to avoid casting arguments
case uint:
return formatUint64(uint64(v))
case int:
return formatUint64(uint64(v))
case []byte:
return v
default:
panic(fmt.Errorf("KeyFormat format() does not support formatting value of type %T: %v", a, a))
}
}

func formatUint64(v uint64) []byte {
bs := make([]byte, 8)
binary.BigEndian.PutUint64(bs, v)
return bs
}
70 changes: 70 additions & 0 deletions key_format_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package iavl

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestKeyFormatBytes(t *testing.T) {
kf := NewKeyFormat(byte('e'), 8, 8, 8)
assert.Equal(t, []byte{'e', 0, 0, 0, 0, 0, 1, 2, 3}, kf.KeyBytes([]byte{1, 2, 3}))
assert.Equal(t, []byte{'e', 1, 2, 3, 4, 5, 6, 7, 8}, kf.KeyBytes([]byte{1, 2, 3, 4, 5, 6, 7, 8}))
assert.Equal(t, []byte{'e', 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 1, 1, 2, 2, 3, 3},
kf.KeyBytes([]byte{1, 2, 3, 4, 5, 6, 7, 8}, []byte{1, 2, 3, 4, 5, 6, 7, 8}, []byte{1, 1, 2, 2, 3, 3}))
assert.Equal(t, []byte{'e'}, kf.KeyBytes())
}

func TestKeyFormat(t *testing.T) {
kf := NewKeyFormat(byte('e'), 8, 8, 8)
key := []byte{'e', 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 0, 1, 144}
var a, b, c int64 = 100, 200, 400
assert.Equal(t, key, kf.Key(a, b, c))

var ao, bo, co = new(int64), new(int64), new(int64)
kf.Scan(key, ao, bo, co)
assert.Equal(t, a, *ao)
assert.Equal(t, b, *bo)
assert.Equal(t, c, *co)

bs := new([]byte)
kf.Scan(key, ao, bo, bs)
assert.Equal(t, a, *ao)
assert.Equal(t, b, *bo)
assert.Equal(t, []byte{0, 0, 0, 0, 0, 0, 1, 144}, *bs)

assert.Equal(t, []byte{'e', 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 200}, kf.Key(a, b))
}

func TestNegativeKeys(t *testing.T) {
kf := NewKeyFormat(byte('e'), 8, 8)

var a, b int64 = -100, -200
// One's complement plus one
key := []byte{'e',
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, byte(0xff + a + 1),
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, byte(0xff + b + 1)}
assert.Equal(t, key, kf.Key(a, b))

var ao, bo = new(int64), new(int64)
kf.Scan(key, ao, bo)
assert.Equal(t, a, *ao)
assert.Equal(t, b, *bo)
}

func TestOverflow(t *testing.T) {
kf := NewKeyFormat(byte('o'), 8, 8)

var a int64 = 1 << 62
var b uint64 = 1 << 63
key := []byte{'o',
0x40, 0, 0, 0, 0, 0, 0, 0,
0x80, 0, 0, 0, 0, 0, 0, 0,
}
assert.Equal(t, key, kf.Key(a, b))

var ao, bo = new(int64), new(int64)
kf.Scan(key, ao, bo)
assert.Equal(t, a, *ao)
assert.Equal(t, int64(b), *bo)
}
65 changes: 32 additions & 33 deletions nodedb.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,29 @@ import (
"sort"
"sync"

"github.com/tendermint/tendermint/crypto/tmhash"
dbm "github.com/tendermint/tendermint/libs/db"
)

const (
int64Size = 8
hashSize = tmhash.Size
)

var (
// All node keys are prefixed with this. This ensures no collision is
// possible with the other keys, and makes them easier to traverse.
nodePrefix = "n/"
nodeKeyFmt = "n/%X"
// All node keys are prefixed with the byte 'n'. This ensures no collision is
// possible with the other keys, and makes them easier to traverse. They are indexed by the node hash.
nodeKeyFormat = NewKeyFormat('n', hashSize) // n<hash>

// Orphans are keyed in the database by their expected lifetime.
// The first number represents the *last* version at which the orphan needs
// to exist, while the second number represents the *earliest* version at
// which it is expected to exist - which starts out by being the version
// of the node being orphaned.
orphanPrefix = "o/"
orphanPrefixFmt = "o/%010d/" // o/<last-version>/
orphanKeyFmt = "o/%010d/%010d/%X" // o/<last-version>/<first-version>/<hash>
orphanKeyFormat = NewKeyFormat('o', int64Size, int64Size, hashSize) // o<last-version><first-version><hash>

// r/<version>
rootPrefix = "r/"
rootPrefixFmt = "r/%010d"
// Root nodes are indexed separately by their version
rootKeyFormat = NewKeyFormat('r', int64Size) // r<version>
)

type nodeDB struct {
Expand Down Expand Up @@ -196,7 +198,7 @@ func (ndb *nodeDB) deleteOrphans(version int64) {

// See comment on `orphanKeyFmt`. Note that here, `version` and
// `toVersion` are always equal.
fmt.Sscanf(string(key), orphanKeyFmt, &toVersion, &fromVersion)
orphanKeyFormat.Scan(key, &toVersion, &fromVersion)

// Delete orphan key and reverse-lookup key.
ndb.batch.Delete(key)
Expand All @@ -218,15 +220,15 @@ func (ndb *nodeDB) deleteOrphans(version int64) {
}

func (ndb *nodeDB) nodeKey(hash []byte) []byte {
return []byte(fmt.Sprintf(nodeKeyFmt, hash))
return nodeKeyFormat.KeyBytes(hash)
}

func (ndb *nodeDB) orphanKey(fromVersion, toVersion int64, hash []byte) []byte {
return []byte(fmt.Sprintf(orphanKeyFmt, toVersion, fromVersion, hash))
return orphanKeyFormat.Key(toVersion, fromVersion, hash)
}

func (ndb *nodeDB) rootKey(version int64) []byte {
return []byte(fmt.Sprintf(rootPrefixFmt, version))
return rootKeyFormat.Key(version)
}

func (ndb *nodeDB) getLatestVersion() int64 {
Expand All @@ -244,20 +246,16 @@ func (ndb *nodeDB) updateLatestVersion(version int64) {

func (ndb *nodeDB) getPreviousVersion(version int64) int64 {
itr := ndb.db.ReverseIterator(
[]byte(fmt.Sprintf(rootPrefixFmt, version-1)),
[]byte(fmt.Sprintf(rootPrefixFmt, 0)),
rootKeyFormat.Key(version-1),
rootKeyFormat.Key(0),
)
defer itr.Close()

pversion := int64(-1)
for ; itr.Valid(); itr.Next() {
k := itr.Key()
_, err := fmt.Sscanf(string(k), rootPrefixFmt, &pversion)
if err != nil {
panic(err)
} else {
return pversion
}
rootKeyFormat.Scan(k, &pversion)
return pversion
}

return 0
Expand All @@ -274,13 +272,12 @@ func (ndb *nodeDB) deleteRoot(version int64) {
}

func (ndb *nodeDB) traverseOrphans(fn func(k, v []byte)) {
ndb.traversePrefix([]byte(orphanPrefix), fn)
ndb.traversePrefix(orphanKeyFormat.Key(), fn)
}

// Traverse orphans ending at a certain version.
func (ndb *nodeDB) traverseOrphansVersion(version int64, fn func(k, v []byte)) {
prefix := fmt.Sprintf(orphanPrefixFmt, version)
ndb.traversePrefix([]byte(prefix), fn)
ndb.traversePrefix(orphanKeyFormat.Key(version), fn)
}

// Traverse all keys.
Expand Down Expand Up @@ -339,9 +336,9 @@ func (ndb *nodeDB) getRoot(version int64) []byte {
func (ndb *nodeDB) getRoots() (map[int64][]byte, error) {
roots := map[int64][]byte{}

ndb.traversePrefix([]byte(rootPrefix), func(k, v []byte) {
ndb.traversePrefix(rootKeyFormat.Key(), func(k, v []byte) {
var version int64
fmt.Sscanf(string(k), rootPrefixFmt, &version)
rootKeyFormat.Scan(k, &version)
roots[version] = v
})
return roots, nil
Expand Down Expand Up @@ -426,12 +423,12 @@ func (ndb *nodeDB) size() int {
func (ndb *nodeDB) traverseNodes(fn func(hash []byte, node *Node)) {
nodes := []*Node{}

ndb.traversePrefix([]byte(nodePrefix), func(key, value []byte) {
ndb.traversePrefix(nodeKeyFormat.Key(), func(key, value []byte) {
node, err := MakeNode(value)
if err != nil {
panic(fmt.Sprintf("Couldn't decode node from database: %v", err))
}
fmt.Sscanf(string(key), nodeKeyFmt, &node.hash)
nodeKeyFormat.Scan(key, &node.hash)
nodes = append(nodes, node)
})

Expand All @@ -448,7 +445,7 @@ func (ndb *nodeDB) String() string {
var str string
index := 0

ndb.traversePrefix([]byte(rootPrefix), func(key, value []byte) {
ndb.traversePrefix(rootKeyFormat.Key(), func(key, value []byte) {
str += fmt.Sprintf("%s: %x\n", string(key), value)
})
str += "\n"
Expand All @@ -462,11 +459,13 @@ func (ndb *nodeDB) String() string {
if len(hash) == 0 {
str += fmt.Sprintf("<nil>\n")
} else if node == nil {
str += fmt.Sprintf("%s%40x: <nil>\n", nodePrefix, hash)
str += fmt.Sprintf("%s%40x: <nil>\n", nodeKeyFormat.Prefix(), hash)
} else if node.value == nil && node.height > 0 {
str += fmt.Sprintf("%s%40x: %s %-16s h=%d version=%d\n", nodePrefix, hash, node.key, "", node.height, node.version)
str += fmt.Sprintf("%s%40x: %s %-16s h=%d version=%d\n",
nodeKeyFormat.Prefix(), hash, node.key, "", node.height, node.version)
} else {
str += fmt.Sprintf("%s%40x: %s = %-16s h=%d version=%d\n", nodePrefix, hash, node.key, node.value, node.height, node.version)
str += fmt.Sprintf("%s%40x: %s = %-16s h=%d version=%d\n",
nodeKeyFormat.Prefix(), hash, node.key, node.value, node.height, node.version)
}
index++
})
Expand Down
Loading