Skip to content

Commit

Permalink
Extract multi{codec,hash} registries better.
Browse files Browse the repository at this point in the history
And, make a package which can be imported to register "all" of the
multihashes.  (Or at least all of them that you would've expected
from go-multihash.)

There are also packages that are split roughly per the transitive
dependency it brings in, so you can pick and choose.

This cascaded into more work than I might've expected.
Turns out a handful of the things we have multihash identifiers for
actually *do not* implement the standard hash.Hash contract at all.
For these, I've made small shims.

Test fixtures across the library switch to using sha2-512.
Previously I had written a bunch of them to use sha3 variants,
but since that is not in the standard library, I'm going to move away
from that so as not to re-bloat the transitive dependency tree
just for the tests and examples.
  • Loading branch information
warpfork committed Feb 25, 2021
1 parent a1482fe commit 8fef531
Show file tree
Hide file tree
Showing 15 changed files with 281 additions and 66 deletions.
6 changes: 3 additions & 3 deletions codec/dagcbor/multicodec.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"github.com/polydawn/refmt/cbor"

"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
)

var (
Expand All @@ -15,8 +15,8 @@ var (
)

func init() {
codec.MulticodecEncoderRegistry[0x71] = Encode
codec.MulticodecDecoderRegistry[0x71] = Decode
multicodec.EncoderRegistry[0x71] = Encode
multicodec.DecoderRegistry[0x71] = Decode
}

func Decode(na ipld.NodeAssembler, r io.Reader) error {
Expand Down
2 changes: 1 addition & 1 deletion codec/dagcbor/roundtripCidlink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func TestRoundtripCidlink(t *testing.T) {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}
lsys := cidlink.DefaultLinkSystem()
Expand Down
2 changes: 1 addition & 1 deletion codec/dagcbor/roundtrip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func TestRoundtripLinksAndBytes(t *testing.T) {
lnk := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}.BuildLink([]byte{1, 2, 3, 4}) // dummy value, content does not matter to this test.

Expand Down
6 changes: 3 additions & 3 deletions codec/dagjson/multicodec.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/polydawn/refmt/json"

"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
)

var (
Expand All @@ -16,8 +16,8 @@ var (
)

func init() {
codec.MulticodecEncoderRegistry[0x0129] = Encode
codec.MulticodecDecoderRegistry[0x0129] = Decode
multicodec.EncoderRegistry[0x0129] = Encode
multicodec.DecoderRegistry[0x0129] = Decode
}

func Decode(na ipld.NodeAssembler, r io.Reader) error {
Expand Down
4 changes: 2 additions & 2 deletions codec/dagjson/roundtripCidlink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func TestRoundtripCidlink(t *testing.T) {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x0129,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}
lsys := cidlink.DefaultLinkSystem()
Expand Down Expand Up @@ -48,7 +48,7 @@ func TestUnmarshalTrickyMapContainingLink(t *testing.T) {
lnk := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}.BuildLink([]byte{1, 2, 3, 4}) // dummy value, content does not matter to this test.

Expand Down
9 changes: 5 additions & 4 deletions linking/cid/linksystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@ import (
"hash"

"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
"github.com/ipld/go-ipld-prime/multihash"
)

func DefaultLinkSystem() ipld.LinkSystem {
return ipld.LinkSystem{
EncoderChooser: func(lp ipld.LinkPrototype) (ipld.Encoder, error) {
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MulticodecEncoderRegistry[lp2.GetCodec()]
fn, ok := multicodec.EncoderRegistry[lp2.GetCodec()]
if !ok {
return nil, fmt.Errorf("no encoder registered for multicodec indicator 0x%x", lp2.GetCodec())
}
Expand All @@ -26,7 +27,7 @@ func DefaultLinkSystem() ipld.LinkSystem {
lp := lnk.Prototype()
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MulticodecDecoderRegistry[lp2.GetCodec()]
fn, ok := multicodec.DecoderRegistry[lp2.GetCodec()]
if !ok {
return nil, fmt.Errorf("no decoder registered for multicodec indicator 0x%x", lp2.GetCodec())
}
Expand All @@ -38,7 +39,7 @@ func DefaultLinkSystem() ipld.LinkSystem {
HasherChooser: func(lp ipld.LinkPrototype) (hash.Hash, error) {
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MultihashRegistry[lp2.MhType]
fn, ok := multihash.Registry[lp2.MhType]
if !ok {
return nil, fmt.Errorf("no hasher registered for multihash indicator 0x%x", lp2.MhType)
}
Expand Down
8 changes: 4 additions & 4 deletions linkingExamples_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ func ExampleStoringLink() {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1, // Usually '1'.
Codec: 0x71, // 0x71 means "dag-cbor" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhType: 0x15, // 0x15 means "sha3-384" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhLength: 48, // sha3-384 hash has a 48-byte sum.
MhType: 0x13, // 0x20 means "sha2-512" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhLength: 64, // sha2-512 hash has a 64-byte sum.
}}

// And we need some data to link to! Here's a quick piece of example data:
Expand Down Expand Up @@ -85,13 +85,13 @@ func ExampleStoringLink() {
// We'll pick this data back up again in the example for loading.

// Output:
// link: bafyrkmbukvrgzcs6qlsh4wvkvbe5wp7sclcblfnapnb2xfznisbykpbnlocet2qzley3cpxofoxqrnqgm3ta
// link: bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk
// concrete type: `cidlink.Link`
}

func ExampleLoadingLink() {
// Let's say we want to load this link (it's the same one we just created in the example above).
cid, _ := cid.Decode("bafyrkmbukvrgzcs6qlsh4wvkvbe5wp7sclcblfnapnb2xfznisbykpbnlocet2qzley3cpxofoxqrnqgm3ta")
cid, _ := cid.Decode("bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk")
lnk := cidlink.Link{cid}

// Let's get a LinkSystem. We're going to be working with CID links,
Expand Down
51 changes: 5 additions & 46 deletions codec/multicodecs.go → multicodec/multicodec.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
package codec
package multicodec

import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"hash"

"golang.org/x/crypto/sha3"

"github.com/ipld/go-ipld-prime"
)

// MulticodecEncoderRegistry is a simple map which maps a multicodec indicator number
// EncoderRegistry is a simple map which maps a multicodec indicator number
// to an ipld.Encoder function.
//
// Packages which implement an IPLD codec and have a multicodec number reserved in
Expand All @@ -29,9 +21,9 @@ import (
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MulticodecEncoderRegistry = make(map[uint64]ipld.Encoder)
var EncoderRegistry = make(map[uint64]ipld.Encoder)

// MulticodecDecoderRegistry is a simple map which maps a multicodec indicator number
// DecoderRegistry is a simple map which maps a multicodec indicator number
// to an ipld.Decoder function.
//
// Packages which implement an IPLD codec and have a multicodec number reserved in
Expand All @@ -48,37 +40,4 @@ var MulticodecEncoderRegistry = make(map[uint64]ipld.Encoder)
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MulticodecDecoderRegistry = make(map[uint64]ipld.Decoder)

// MultihashRegistry is a simple map which maps a multihash indicator number
// to a standard golang Hash interface.
//
// Hashers which are available in the golang stdlib are registered here automatically.
// Some hashes from x/crypto are also included out-of-the-box.
//
// Packages which want to register more hashing functions and have a multihash number reserved in
// https://github.com/multiformats/multicodec/blob/master/table.csv
// are encouraged to do so at package init time.
// (Doing this at package init time ensures this map can be accessed without race conditions.)
//
// The linking/cid.DefaultLinkSystem will use this map to find decoders
// to use when deserializing data from storage.
//
// This registry map is only used for default behaviors.
// If you don't want to rely on it, you can always construct your own LinkSystem.
// (For this reason, there's no special effort made to detect conflicting registrations in this map.
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MultihashRegistry = make(map[uint64]func() hash.Hash)

func init() {
MultihashRegistry[0xd5] = md5.New
MultihashRegistry[0x11] = sha1.New
MultihashRegistry[0x12] = sha256.New
MultihashRegistry[0x13] = sha512.New
MultihashRegistry[0x14] = sha3.New512
MultihashRegistry[0x15] = sha3.New384
MultihashRegistry[0x16] = sha3.New256
MultihashRegistry[0x17] = sha3.New224
}
var DecoderRegistry = make(map[uint64]ipld.Decoder)
51 changes: 51 additions & 0 deletions multihash/errata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package multihash

import (
"bytes"
"crypto/sha256"
"hash"
)

type identityMultihash struct {
bytes.Buffer
}

func (identityMultihash) BlockSize() int {
return 32 // A prefered block size is nonsense for the "identity" "hash". An arbitrary but unsurprising and positive nonzero number has been chosen to minimize the odds of fascinating bugs.
}

func (x identityMultihash) Size() int {
return x.Len()
}

func (x identityMultihash) Sum(digest []byte) []byte {
return x.Bytes()
}

type doubleSha256 struct {
main hash.Hash
}

func (x doubleSha256) Write(body []byte) (int, error) {
return x.main.Write(body)
}

func (doubleSha256) BlockSize() int {
return sha256.BlockSize
}

func (doubleSha256) Size() int {
return sha256.Size
}

func (x doubleSha256) Reset() {
x.main.Reset()
}

func (x doubleSha256) Sum(digest []byte) []byte {
intermediate := [sha256.Size]byte{}
x.main.Sum(intermediate[:])
h2 := sha256.New()
h2.Write(intermediate[:])
return h2.Sum(digest)
}
48 changes: 48 additions & 0 deletions multihash/multihash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package multihash

import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"hash"
)

// Registry is a simple map which maps a multihash indicator number
// to a standard golang Hash interface.
//
// Multihash indicator numbers are reserved and described in
// https://github.com/multiformats/multicodec/blob/master/table.csv .
// The keys used in this map must match those reservations.
//
// Hashers which are available in the golang stdlib are registered here automatically.
//
// Packages which want to register more hashing functions (and have a multihash number reserved!)
// are encouraged to do so at package init time.
// (Doing this at package init time ensures this map can be accessed without race conditions.)
//
// The linking/cid.DefaultLinkSystem will use this map to find hashers
// to use when serializing data and computing links,
// and when loading data from storage and verifying its integrity.
//
// This registry map is only used for default behaviors.
// If you don't want to rely on it, you can always construct your own LinkSystem.
// (For this reason, there's no special effort made to detect conflicting registrations in this map.
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
// This should never be done to make behavior alterations
// (hash functions are well standardized and so is the multihash indicator table),
// but may be relevant if one is really itching to try out different hash implementations for performance reasons.
var Registry = make(map[uint64]func() hash.Hash)

func init() {
Registry[0x00] = func() hash.Hash { return &identityMultihash{} }
Registry[0xd5] = md5.New
Registry[0x11] = sha1.New
Registry[0x12] = sha256.New
Registry[0x13] = sha512.New
// Registry[0x1f] = sha256.New224 // SOON
// Registry[0x20] = sha512.New384 // SOON
Registry[0x56] = func() hash.Hash { return &doubleSha256{} }
}
23 changes: 23 additions & 0 deletions multihash/register/all/multihash_all.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
This package has no purpose except to perform registration of mulithashes.
It is meant to be used as a side-effecting import, e.g.
import (
_ "github.com/ipld/go-ipld-prime/mulithash/register/all"
)
This package registers many multihashes at once.
Importing it will increase the size of your dependency tree significantly.
It's recommended that you import this package if you're building some
kind of data broker application, which may need to handle many different kinds of hashes;
if you're building an application which you know only handles a specific hash,
importing this package may bloat your builds unnecessarily.
*/
package all

import (
_ "github.com/ipld/go-ipld-prime/multihash/register/blake2"
_ "github.com/ipld/go-ipld-prime/multihash/register/murmur3"
_ "github.com/ipld/go-ipld-prime/multihash/register/sha3"
)
48 changes: 48 additions & 0 deletions multihash/register/blake2/multihash_blake2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
This package has no purpose except to perform registration of multihashes.
It is meant to be used as a side-effecting import, e.g.
import (
_ "github.com/ipld/go-ipld-prime/mulithash/register/blake2"
)
This package registers several multihashes for the blake2 family
(both the 's' and the 'b' variants, and in a variety of sizes).
*/
package blake2

import (
"hash"

"github.com/minio/blake2b-simd"
"golang.org/x/crypto/blake2s"

"github.com/ipld/go-ipld-prime/multihash"
)

const (
BLAKE2B_MIN = 0xb201
BLAKE2B_MAX = 0xb240
BLAKE2S_MIN = 0xb241
BLAKE2S_MAX = 0xb260
)

func init() {
// BLAKE2S
// This package only enables support for 32byte (256 bit) blake2s.
multihash.Registry[BLAKE2S_MIN+31] = func() hash.Hash { h, _ := blake2s.New256(nil); return h }

// BLAKE2B
// There's a whole range of these.
for c := uint64(BLAKE2B_MIN); c <= BLAKE2B_MAX; c++ {
size := int(c - BLAKE2B_MIN + 1)
multihash.Registry[c] = func() hash.Hash {
hasher, err := blake2b.New(&blake2b.Config{Size: uint8(size)})
if err != nil {
panic(err)
}
return hasher
}
}
}
Loading

0 comments on commit 8fef531

Please sign in to comment.