Skip to content
This repository was archived by the owner on Jun 27, 2023. It is now read-only.

Commit 51cb5fe

Browse files
authored
Tests for unsharding PR (#99)
- Add tests for automatic unsharding - Modified some internals to be sufficiently extensible for testing
1 parent efd7822 commit 51cb5fe

File tree

7 files changed

+503
-168
lines changed

7 files changed

+503
-168
lines changed

hamt/hamt.go

+30-11
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,24 @@ import (
2525
"fmt"
2626
"os"
2727

28+
format "github.com/ipfs/go-unixfs"
29+
"github.com/ipfs/go-unixfs/internal"
30+
2831
bitfield "github.com/ipfs/go-bitfield"
2932
cid "github.com/ipfs/go-cid"
3033
ipld "github.com/ipfs/go-ipld-format"
3134
dag "github.com/ipfs/go-merkledag"
32-
format "github.com/ipfs/go-unixfs"
3335
)
3436

3537
const (
3638
// HashMurmur3 is the multiformats identifier for Murmur3
3739
HashMurmur3 uint64 = 0x22
3840
)
3941

42+
func init() {
43+
internal.HAMTHashFunction = murmur3Hash
44+
}
45+
4046
func (ds *Shard) isValueNode() bool {
4147
return ds.key != "" && ds.val != nil
4248
}
@@ -45,17 +51,29 @@ func (ds *Shard) isValueNode() bool {
4551
type Shard struct {
4652
childer *childer
4753

48-
tableSize int
54+
// Entries per node (number of possible childs indexed by the partial key).
55+
tableSize int
56+
// Bits needed to encode child indexes (log2 of number of entries). This is
57+
// the number of bits taken from the hash key on each level of the tree.
4958
tableSizeLg2 int
5059

5160
builder cid.Builder
5261
hashFunc uint64
5362

63+
// String format with number of zeros that will be present in the hexadecimal
64+
// encoding of the child index to always reach the fixed maxpadlen chars.
65+
// Example: maxpadlen = 4 => prefixPadStr: "%04X" (print number in hexadecimal
66+
// format padding with zeros to always reach 4 characters).
5467
prefixPadStr string
55-
maxpadlen int
68+
// Length in chars of string that encodes child indexes. We encode indexes
69+
// as hexadecimal strings to this is log4 of number of entries.
70+
maxpadlen int
5671

5772
dserv ipld.DAGService
5873

74+
// FIXME: Remove. We don't actually store "value nodes". This confusing
75+
// abstraction just removes the maxpadlen from the link names to extract
76+
// the actual value link the trie is storing.
5977
// leaf node
6078
key string
6179
val *ipld.Link
@@ -68,12 +86,13 @@ func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
6886
return nil, err
6987
}
7088

89+
// FIXME: Make this at least a static configuration for testing.
7190
ds.hashFunc = HashMurmur3
7291
return ds, nil
7392
}
7493

7594
func makeShard(ds ipld.DAGService, size int) (*Shard, error) {
76-
lg2s, err := logtwo(size)
95+
lg2s, err := Logtwo(size)
7796
if err != nil {
7897
return nil, err
7998
}
@@ -211,7 +230,7 @@ func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
211230
// name key in this Shard or its children. It also returns the previous link
212231
// under that name key (if any).
213232
func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node) (*ipld.Link, error) {
214-
hv := &hashBits{b: hash([]byte(name))}
233+
hv := newHashBits(name)
215234
err := ds.dserv.Add(ctx, node)
216235
if err != nil {
217236
return nil, err
@@ -221,6 +240,9 @@ func (ds *Shard) SetAndPrevious(ctx context.Context, name string, node ipld.Node
221240
if err != nil {
222241
return nil, err
223242
}
243+
244+
// FIXME: We don't need to set the name here, it will get overwritten.
245+
// This is confusing, confirm and remove this line.
224246
lnk.Name = ds.linkNamePrefix(0) + name
225247

226248
return ds.setValue(ctx, hv, name, lnk)
@@ -236,13 +258,13 @@ func (ds *Shard) Remove(ctx context.Context, name string) error {
236258
// RemoveAndPrevious is similar to the public Remove but also returns the
237259
// old removed link (if it exists).
238260
func (ds *Shard) RemoveAndPrevious(ctx context.Context, name string) (*ipld.Link, error) {
239-
hv := &hashBits{b: hash([]byte(name))}
261+
hv := newHashBits(name)
240262
return ds.setValue(ctx, hv, name, nil)
241263
}
242264

243265
// Find searches for a child node by 'name' within this hamt
244266
func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) {
245-
hv := &hashBits{b: hash([]byte(name))}
267+
hv := newHashBits(name)
246268

247269
var out *ipld.Link
248270
err := ds.getValue(ctx, hv, name, func(sv *Shard) error {
@@ -489,10 +511,7 @@ func (ds *Shard) setValue(ctx context.Context, hv *hashBits, key string, value *
489511
return nil, err
490512
}
491513
child.builder = ds.builder
492-
chhv := &hashBits{
493-
b: hash([]byte(grandChild.key)),
494-
consumed: hv.consumed,
495-
}
514+
chhv := newConsumedHashBits(grandChild.key, hv.consumed)
496515

497516
// We explicitly ignore the oldValue returned by the next two insertions
498517
// (which will be nil) to highlight there is no overwrite here: they are

hamt/util.go

+15-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@ package hamt
22

33
import (
44
"fmt"
5+
"math/bits"
6+
7+
"github.com/ipfs/go-unixfs/internal"
58

69
"github.com/spaolacci/murmur3"
7-
"math/bits"
810
)
911

1012
// hashBits is a helper that allows the reading of the 'next n bits' as an integer.
@@ -13,6 +15,16 @@ type hashBits struct {
1315
consumed int
1416
}
1517

18+
func newHashBits(val string) *hashBits {
19+
return &hashBits{b: internal.HAMTHashFunction([]byte(val))}
20+
}
21+
22+
func newConsumedHashBits(val string, consumed int) *hashBits {
23+
hv := &hashBits{b: internal.HAMTHashFunction([]byte(val))}
24+
hv.consumed = consumed
25+
return hv
26+
}
27+
1628
func mkmask(n int) byte {
1729
return (1 << uint(n)) - 1
1830
}
@@ -50,7 +62,7 @@ func (hb *hashBits) next(i int) int {
5062
}
5163
}
5264

53-
func logtwo(v int) (int, error) {
65+
func Logtwo(v int) (int, error) {
5466
if v <= 0 {
5567
return 0, fmt.Errorf("hamt size should be a power of two")
5668
}
@@ -61,7 +73,7 @@ func logtwo(v int) (int, error) {
6173
return lg2, nil
6274
}
6375

64-
func hash(val []byte) []byte {
76+
func murmur3Hash(val []byte) []byte {
6577
h := murmur3.New64()
6678
h.Write(val)
6779
return h.Sum(nil)

internal/config.go

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
package internal
2+
3+
var HAMTHashFunction func(val []byte) []byte

io/completehamt_test.go

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package io
2+
3+
import (
4+
"context"
5+
"encoding/binary"
6+
"fmt"
7+
"math"
8+
"testing"
9+
10+
mdtest "github.com/ipfs/go-merkledag/test"
11+
"github.com/stretchr/testify/assert"
12+
13+
"github.com/ipfs/go-unixfs"
14+
"github.com/ipfs/go-unixfs/hamt"
15+
16+
ipld "github.com/ipfs/go-ipld-format"
17+
)
18+
19+
// CreateCompleteHAMT creates a HAMT the following properties:
20+
// * its height (distance/edges from root to deepest node) is specified by treeHeight.
21+
// * all leaf Shard nodes have the same depth (and have only 'value' links).
22+
// * all internal Shard nodes point only to other Shards (and hence have zero 'value' links).
23+
// * the total number of 'value' links (directory entries) is:
24+
// io.DefaultShardWidth ^ (treeHeight + 1).
25+
// FIXME: HAMTHashFunction needs to be set to idHash by the caller. We depend on
26+
// this simplification for the current logic to work. (HAMTHashFunction is a
27+
// global setting of the package, it is hard-coded in the serialized Shard node
28+
// and not allowed to be changed on a per HAMT/Shard basis.)
29+
// (If we didn't rehash inside setValue then we could just generate
30+
// the fake hash as in io.SetAndPrevious through `newHashBits()` and pass
31+
// it as an argument making the hash independent of tree manipulation; that
32+
// sounds as the correct way to go in general and we wouldn't need this.)
33+
func CreateCompleteHAMT(ds ipld.DAGService, treeHeight int, childsPerNode int) (ipld.Node, error) {
34+
if treeHeight < 1 {
35+
panic("treeHeight < 1")
36+
}
37+
if treeHeight > 8 {
38+
panic("treeHeight > 8: we don't allow a key larger than what can be encoded in a 64-bit word")
39+
}
40+
41+
rootShard, err := hamt.NewShard(ds, childsPerNode)
42+
if err != nil {
43+
return nil, err
44+
}
45+
46+
// Assuming we are using the ID hash function we can just insert all
47+
// the combinations of a byte slice that will reach the desired height.
48+
totalChildren := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
49+
log2ofChilds, err := hamt.Logtwo(childsPerNode)
50+
if err != nil {
51+
return nil, err
52+
}
53+
if log2ofChilds*treeHeight%8 != 0 {
54+
return nil, fmt.Errorf("childsPerNode * treeHeight should be multiple of 8")
55+
}
56+
bytesInKey := log2ofChilds * treeHeight / 8
57+
for i := 0; i < totalChildren; i++ {
58+
var hashbuf [8]byte
59+
binary.LittleEndian.PutUint64(hashbuf[:], uint64(i))
60+
var oldLink *ipld.Link
61+
oldLink, err = rootShard.SetAndPrevious(context.Background(), string(hashbuf[:bytesInKey]), unixfs.EmptyFileNode())
62+
if err != nil {
63+
return nil, err
64+
}
65+
if oldLink != nil {
66+
// We shouldn't be overwriting any value, otherwise the tree
67+
// won't be complete.
68+
return nil, fmt.Errorf("we have overwritten entry %s",
69+
oldLink.Cid)
70+
}
71+
}
72+
73+
return rootShard.Node()
74+
}
75+
76+
// Return the same value as the hash.
77+
func idHash(val []byte) []byte {
78+
return val
79+
}
80+
81+
func TestCreateCompleteShard(t *testing.T) {
82+
ds := mdtest.Mock()
83+
childsPerNode := 16
84+
treeHeight := 2
85+
node, err := CreateCompleteHAMT(ds, treeHeight, childsPerNode)
86+
assert.NoError(t, err)
87+
88+
shard, err := hamt.NewHamtFromDag(ds, node)
89+
assert.NoError(t, err)
90+
links, err := shard.EnumLinks(context.Background())
91+
assert.NoError(t, err)
92+
93+
childNodes := int(math.Pow(float64(childsPerNode), float64(treeHeight)))
94+
assert.Equal(t, childNodes, len(links))
95+
}

io/directory.go

+18-24
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@ package io
33
import (
44
"context"
55
"fmt"
6-
mdag "github.com/ipfs/go-merkledag"
7-
format "github.com/ipfs/go-unixfs"
8-
"github.com/ipfs/go-unixfs/hamt"
96
"os"
107

8+
"github.com/ipfs/go-unixfs/hamt"
9+
"github.com/ipfs/go-unixfs/private/linksize"
10+
1111
"github.com/ipfs/go-cid"
1212
ipld "github.com/ipfs/go-ipld-format"
1313
logging "github.com/ipfs/go-log"
14+
mdag "github.com/ipfs/go-merkledag"
15+
format "github.com/ipfs/go-unixfs"
1416
)
1517

1618
var log = logging.Logger("unixfs")
@@ -24,6 +26,7 @@ var log = logging.Logger("unixfs")
2426
var HAMTShardingSize = 0
2527

2628
// DefaultShardWidth is the default value used for hamt sharding width.
29+
// Needs to be a power of two (shard entry size) and multiple of 8 (bitfield size).
2730
var DefaultShardWidth = 256
2831

2932
// Directory defines a UnixFS directory. It is used for creating, reading and
@@ -78,7 +81,9 @@ func productionLinkSize(linkName string, linkCid cid.Cid) int {
7881
return len(linkName) + linkCid.ByteLen()
7982
}
8083

81-
var estimatedLinkSize = productionLinkSize
84+
func init() {
85+
linksize.LinkSizeFunction = productionLinkSize
86+
}
8287

8388
// BasicDirectory is the basic implementation of `Directory`. All the entries
8489
// are stored in a single node.
@@ -167,11 +172,11 @@ func (d *BasicDirectory) computeEstimatedSize() {
167172
}
168173

169174
func (d *BasicDirectory) addToEstimatedSize(name string, linkCid cid.Cid) {
170-
d.estimatedSize += estimatedLinkSize(name, linkCid)
175+
d.estimatedSize += linksize.LinkSizeFunction(name, linkCid)
171176
}
172177

173178
func (d *BasicDirectory) removeFromEstimatedSize(name string, linkCid cid.Cid) {
174-
d.estimatedSize -= estimatedLinkSize(name, linkCid)
179+
d.estimatedSize -= linksize.LinkSizeFunction(name, linkCid)
175180
if d.estimatedSize < 0 {
176181
// Something has gone very wrong. Log an error and recompute the
177182
// size from scratch.
@@ -208,10 +213,10 @@ func (d *BasicDirectory) needsToSwitchToHAMTDir(name string, nodeToAdd ipld.Node
208213
if err != nil {
209214
return false, err
210215
}
211-
operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid)
216+
operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid)
212217
}
213218
if nodeToAdd != nil {
214-
operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid())
219+
operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
215220
}
216221

217222
return d.estimatedSize+operationSizeChange >= HAMTShardingSize, nil
@@ -437,11 +442,11 @@ func (d *HAMTDirectory) switchToBasic(ctx context.Context) (*BasicDirectory, err
437442
}
438443

439444
func (d *HAMTDirectory) addToSizeChange(name string, linkCid cid.Cid) {
440-
d.sizeChange += estimatedLinkSize(name, linkCid)
445+
d.sizeChange += linksize.LinkSizeFunction(name, linkCid)
441446
}
442447

443448
func (d *HAMTDirectory) removeFromSizeChange(name string, linkCid cid.Cid) {
444-
d.sizeChange -= estimatedLinkSize(name, linkCid)
449+
d.sizeChange -= linksize.LinkSizeFunction(name, linkCid)
445450
}
446451

447452
// Evaluate a switch from HAMTDirectory to BasicDirectory in case the size will
@@ -464,12 +469,12 @@ func (d *HAMTDirectory) needsToSwitchToBasicDir(ctx context.Context, name string
464469
if err != nil {
465470
return false, err
466471
}
467-
operationSizeChange -= estimatedLinkSize(name, entryToRemove.Cid)
472+
operationSizeChange -= linksize.LinkSizeFunction(name, entryToRemove.Cid)
468473
}
469474

470475
// For the AddEntry case compute the size addition of the new entry.
471476
if nodeToAdd != nil {
472-
operationSizeChange += estimatedLinkSize(name, nodeToAdd.Cid())
477+
operationSizeChange += linksize.LinkSizeFunction(name, nodeToAdd.Cid())
473478
}
474479

475480
if d.sizeChange+operationSizeChange >= 0 {
@@ -506,7 +511,7 @@ func (d *HAMTDirectory) sizeBelowThreshold(ctx context.Context, sizeChange int)
506511
return false, linkResult.Err
507512
}
508513

509-
partialSize += estimatedLinkSize(linkResult.Link.Name, linkResult.Link.Cid)
514+
partialSize += linksize.LinkSizeFunction(linkResult.Link.Name, linkResult.Link.Cid)
510515
if partialSize+sizeChange >= HAMTShardingSize {
511516
// We have already fetched enough shards to assert we are
512517
// above the threshold, so no need to keep fetching.
@@ -581,17 +586,6 @@ func (d *UpgradeableDirectory) AddChild(ctx context.Context, name string, nd ipl
581586
return nil
582587
}
583588

584-
func (d *UpgradeableDirectory) getDagService() ipld.DAGService {
585-
switch v := d.Directory.(type) {
586-
case *BasicDirectory:
587-
return v.dserv
588-
case *HAMTDirectory:
589-
return v.dserv
590-
default:
591-
panic("unknown directory type")
592-
}
593-
}
594-
595589
// RemoveChild implements the `Directory` interface. Used in the case where we wrap
596590
// a HAMTDirectory that might need to be downgraded to a BasicDirectory. The
597591
// upgrade path is in AddChild.

0 commit comments

Comments
 (0)