Skip to content
This repository has been archived by the owner on Jun 27, 2023. It is now read-only.

Commit

Permalink
Tests for unsharding PR (#99)
Browse files Browse the repository at this point in the history
- Add tests for automatic unsharding
- Modified some internals to be sufficiently extensible for testing
  • Loading branch information
schomatis authored Oct 22, 2021
1 parent efd7822 commit 51cb5fe
Show file tree
Hide file tree
Showing 7 changed files with 503 additions and 168 deletions.
41 changes: 30 additions & 11 deletions hamt/hamt.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,24 @@ import (
"fmt"
"os"

format "github.com/ipfs/go-unixfs"
"github.com/ipfs/go-unixfs/internal"

bitfield "github.com/ipfs/go-bitfield"
cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
dag "github.com/ipfs/go-merkledag"
format "github.com/ipfs/go-unixfs"
)

const (
// HashMurmur3 is the multiformats identifier for Murmur3
HashMurmur3 uint64 = 0x22
)

func init() {
internal.HAMTHashFunction = murmur3Hash
}

func (ds *Shard) isValueNode() bool {
return ds.key != "" && ds.val != nil
}
Expand All @@ -45,17 +51,29 @@ func (ds *Shard) isValueNode() bool {
type Shard struct {
childer *childer

tableSize int
// Entries per node (number of possible childs indexed by the partial key).
tableSize int
// Bits needed to encode child indexes (log2 of number of entries). This is
// the number of bits taken from the hash key on each level of the tree.
tableSizeLg2 int

builder cid.Builder
hashFunc uint64

// String format with number of zeros that will be present in the hexadecimal
// encoding of the child index to always reach the fixed maxpadlen chars.
// Example: maxpadlen = 4 => prefixPadStr: "%04X" (print number in hexadecimal
// format padding with zeros to always reach 4 characters).
prefixPadStr string
maxpadlen int
// Length in chars of string that encodes child indexes. We encode indexes
// as hexadecimal strings to this is log4 of number of entries.
maxpadlen int

dserv ipld.DAGService

// FIXME: Remove. We don't actually store "value nodes". This confusing
// abstraction just removes the maxpadlen from the link names to extract
// the actual value link the trie is storing.
// leaf node
key string
val *ipld.Link
Expand All @@ -68,12 +86,13 @@ func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
return nil, err
}

// FIXME: Make this at least a static configuration for testing.
ds.hashFunc = HashMurmur3
return ds, nil
}

func makeShard(ds ipld.DAGService, size int) (*Shard, error) {
lg2s, err := logtwo(size)
lg2s, err := Logtwo(size)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -211,7 +230,7 @@ func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
// name key in this Shard or its children. It also returns the previous link
// under that name key (if any).
func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) {
hv := &hashBits{b: hash([]byte(name))}
hv := newHashBits(name)
err := ds.dserv.Add(ctx, node)
if err != nil {
return nil, err
Expand All @@ -221,6 +240,9 @@ func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node
if err != nil {
return nil, err
}

// FIXME: We don't need to set the name here, it will get overwritten.
// This is confusing, confirm and remove this line.
lnk.Name = ds.linkNamePrefix(0) + name

return ds.setValue(ctx, hv, name, lnk)
Expand All @@ -236,13 +258,13 @@ func (ds *Shard) Remove(ctx context.Context, name string) error {
// RemoveAndPrevious is similar to the public Remove but also returns the
// old removed link (if it exists).
func (ds *Shard) RemoveAndPrevious(ctx context.Context, name string) (*ipld.Link, error) {
hv := &hashBits{b: hash([]byte(name))}
hv := newHashBits(name)
return ds.setValue(ctx, hv, name, nil)
}

// Find searches for a child node by 'name' within this hamt
func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) {
hv := &hashBits{b: hash([]byte(name))}
hv := newHashBits(name)

var out *ipld.Link
err := ds.getValue(ctx, hv, name, func(sv *Shard) error {
Expand Down Expand Up @@ -489,10 +511,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
return nil, err
}
child.builder = ds.builder
chhv := &hashBits{
b: hash([]byte(grandChild.key)),
consumed: hv.consumed,
}
chhv := newConsumedHashBits(grandChild.key, hv.consumed)

// We explicitly ignore the oldValue returned by the next two insertions
// (which will be nil) to highlight there is no overwrite here: they are
Expand Down
18 changes: 15 additions & 3 deletions hamt/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ package hamt

import (
"fmt"
"math/bits"

"github.com/ipfs/go-unixfs/internal"

"github.com/spaolacci/murmur3"
"math/bits"
)

// hashBits is a helper that allows the reading of the 'next n bits' as an integer.
Expand All @@ -13,6 +15,16 @@ type hashBits struct {
consumed int
}

func newHashBits(val string) *hashBits {
return &hashBits{b: internal.HAMTHashFunction([]byte(val))}
}

func newConsumedHashBits(val string, consumed int) *hashBits {
hv := &hashBits{b: internal.HAMTHashFunction([]byte(val))}
hv.consumed = consumed
return hv
}

func mkmask(n int) byte {
return (1 << uint(n)) - 1
}
Expand Down Expand Up @@ -50,7 +62,7 @@ func (hb *hashBits) next(i int) int {
}
}

func logtwo(v int) (int, error) {
func Logtwo(v int) (int, error) {
if v <= 0 {
return 0, fmt.Errorf("hamt size should be a power of two")
}
Expand All @@ -61,7 +73,7 @@ func logtwo(v int) (int, error) {
return lg2, nil
}

func hash(val []byte) []byte {
func murmur3Hash(val []byte) []byte {
h := murmur3.New64()
h.Write(val)
return h.Sum(nil)
Expand Down
3 changes: 3 additions & 0 deletions internal/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package internal

var HAMTHashFunction func(val []byte) []byte
95 changes: 95 additions & 0 deletions io/completehamt_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package io

import (
"context"
"encoding/binary"
"fmt"
"math"
"testing"

mdtest "github.com/ipfs/go-merkledag/test"
"github.com/stretchr/testify/assert"

"github.com/ipfs/go-unixfs"
"github.com/ipfs/go-unixfs/hamt"

ipld "github.com/ipfs/go-ipld-format"
)

// CreateCompleteHAMT creates a HAMT the following properties:
// * its height (distance/edges from root to deepest node) is specified by treeHeight.
// * all leaf Shard nodes have the same depth (and have only 'value' links).
// * all internal Shard nodes point only to other Shards (and hence have zero 'value' links).
// * the total number of 'value' links (directory entries) is:
// io.DefaultShardWidth ^ (treeHeight + 1).
// FIXME: HAMTHashFunction needs to be set to idHash by the caller. We depend on
// this simplification for the current logic to work. (HAMTHashFunction is a
// global setting of the package, it is hard-coded in the serialized Shard node
// and not allowed to be changed on a per HAMT/Shard basis.)
// (If we didn't rehash inside setValue then we could just generate
// the fake hash as in io.SetAndPrevious through `newHashBits()` and pass
// it as an argument making the hash independent of tree manipulation; that
// sounds as the correct way to go in general and we wouldn't need this.)
func CreateCompleteHAMT(ds ipld.DAGService, treeHeight int, childsPerNode int) (ipld.Node, error) {
if treeHeight < 1 {
panic("treeHeight < 1")
}
if treeHeight > 8 {
panic("treeHeight > 8: we don't allow a key larger than what can be encoded in a 64-bit word")
}

rootShard, err := hamt.NewShard(ds, childsPerNode)
if err != nil {
return nil, err
}

// Assuming we are using the ID hash function we can just insert all
// the combinations of a byte slice that will reach the desired height.
totalChildren := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
log2ofChilds, err := hamt.Logtwo(childsPerNode)
if err != nil {
return nil, err
}
if log2ofChilds*treeHeight%8 != 0 {
return nil, fmt.Errorf("childsPerNode * treeHeight should be multiple of 8")
}
bytesInKey := log2ofChilds * treeHeight / 8
for i := 0; i < totalChildren; i++ {
var hashbuf [8]byte
binary.LittleEndian.PutUint64(hashbuf[:], uint64(i))
var oldLink *ipld.Link
oldLink, err = rootShard.SetAndPrevious(context.Background(), string(hashbuf[:bytesInKey]), unixfs.EmptyFileNode())
if err != nil {
return nil, err
}
if oldLink != nil {
// We shouldn't be overwriting any value, otherwise the tree
// won't be complete.
return nil, fmt.Errorf("we have overwritten entry %s",
oldLink.Cid)
}
}

return rootShard.Node()
}

// Return the same value as the hash.
func idHash(val []byte) []byte {
return val
}

func TestCreateCompleteShard(t *testing.T) {
ds := mdtest.Mock()
childsPerNode := 16
treeHeight := 2
node, err := CreateCompleteHAMT(ds, treeHeight, childsPerNode)
assert.NoError(t, err)

shard, err := hamt.NewHamtFromDag(ds, node)
assert.NoError(t, err)
links, err := shard.EnumLinks(context.Background())
assert.NoError(t, err)

childNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
assert.Equal(t, childNodes, len(links))
}
42 changes: 18 additions & 24 deletions io/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@ package io
import (
"context"
"fmt"
mdag "github.com/ipfs/go-merkledag"
format "github.com/ipfs/go-unixfs"
"github.com/ipfs/go-unixfs/hamt"
"os"

"github.com/ipfs/go-unixfs/hamt"
"github.com/ipfs/go-unixfs/private/linksize"

"github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
logging "github.com/ipfs/go-log"
mdag "github.com/ipfs/go-merkledag"
format "github.com/ipfs/go-unixfs"
)

var log = logging.Logger("unixfs")
Expand All @@ -24,6 +26,7 @@ var log = logging.Logger("unixfs")
var HAMTShardingSize = 0

// DefaultShardWidth is the default value used for hamt sharding width.
// Needs to be a power of two (shard entry size) and multiple of 8 (bitfield size).
var DefaultShardWidth = 256

// Directory defines a UnixFS directory. It is used for creating, reading and
Expand Down Expand Up @@ -78,7 +81,9 @@ func productionLinkSize(linkName string, linkCid cid.Cid) int {
return len(linkName) + linkCid.ByteLen()
}

var estimatedLinkSize = productionLinkSize
func init() {
linksize.LinkSizeFunction = productionLinkSize
}

// BasicDirectory is the basic implementation of `Directory`. All the entries
// are stored in a single node.
Expand Down Expand Up @@ -167,11 +172,11 @@ func (d *BasicDirectory) computeEstimatedSize() {
}

func (d *BasicDirectory) addToEstimatedSize(name string, linkCid cid.Cid) {
d.estimatedSize += estimatedLinkSize(name, linkCid)
d.estimatedSize += linksize.LinkSizeFunction(name, linkCid)
}

func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) {
d.estimatedSize -= estimatedLinkSize(name, linkCid)
d.estimatedSize -= linksize.LinkSizeFunction(name, linkCid)
if d.estimatedSize < 0 {
// Something has gone very wrong. Log an error and recompute the
// size from scratch.
Expand Down Expand Up @@ -208,10 +213,10 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node
if err != nil {
return false, err
}
operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid)
operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid)
}
if nodeToAdd != nil {
operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid())
operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
}

return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil
Expand Down Expand Up @@ -437,11 +442,11 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err
}

func (d *HAMTDirectory) addToSizeChange(name string, linkCid cid.Cid) {
d.sizeChange += estimatedLinkSize(name, linkCid)
d.sizeChange += linksize.LinkSizeFunction(name, linkCid)
}

func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) {
d.sizeChange -= estimatedLinkSize(name, linkCid)
d.sizeChange -= linksize.LinkSizeFunction(name, linkCid)
}

// Evaluate a switch from HAMTDirectory to BasicDirectory in case the size will
Expand All @@ -464,12 +469,12 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string
if err != nil {
return false, err
}
operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid)
operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid)
}

// For the AddEntry case compute the size addition of the new entry.
if nodeToAdd != nil {
operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid())
operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
}

if d.sizeChange+operationSizeChange >= 0 {
Expand Down Expand Up @@ -506,7 +511,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int)
return false, linkResult.Err
}

partialSize += estimatedLinkSize(linkResult.Link.Name, linkResult.Link.Cid)
partialSize += linksize.LinkSizeFunction(linkResult.Link.Name, linkResult.Link.Cid)
if partialSize+sizeChange >= HAMTShardingSize {
// We have already fetched enough shards to assert we are
// above the threshold, so no need to keep fetching.
Expand Down Expand Up @@ -581,17 +586,6 @@ func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipl
return nil
}

func (d *UpgradeableDirectory) getDagService() ipld.DAGService {
switch v := d.Directory.(type) {
case *BasicDirectory:
return v.dserv
case *HAMTDirectory:
return v.dserv
default:
panic("unknown directory type")
}
}

// RemoveChild implements the `Directory` interface. Used in the case where we wrap
// a HAMTDirectory that might need to be downgraded to a BasicDirectory. The
// upgrade path is in AddChild.
Expand Down
Loading

0 comments on commit 51cb5fe

Please sign in to comment.