From e424b72eabe151e7e4ffbf7efec02b4c28688c62 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Thu, 12 May 2022 21:00:03 +1000 Subject: [PATCH] feat: introduce UIntNode interface, used within DAG-CBOR codec --- codec/dagcbor/marshal.go | 21 +++++--- codec/dagcbor/roundtrip_test.go | 65 ++++++++++++++++++++++++ codec/dagcbor/unmarshal.go | 8 ++- datamodel/node.go | 15 ++++++ node/basicnode/int.go | 88 +++++++++++++++++++++++++++++++-- 5 files changed, 187 insertions(+), 10 deletions(-) diff --git a/codec/dagcbor/marshal.go b/codec/dagcbor/marshal.go index 608f8a3e..a7ee1620 100644 --- a/codec/dagcbor/marshal.go +++ b/codec/dagcbor/marshal.go @@ -99,13 +99,22 @@ func marshal(n datamodel.Node, tk *tok.Token, sink shared.TokenSink, options Enc _, err = sink.Step(tk) return err case datamodel.Kind_Int: - v, err := n.AsInt() - if err != nil { - return err + if uin, ok := n.(datamodel.UintNode); ok { + v, err := uin.AsUint() + if err != nil { + return err + } + tk.Type = tok.TUint + tk.Uint = v + } else { + v, err := n.AsInt() + if err != nil { + return err + } + tk.Type = tok.TInt + tk.Int = v } - tk.Type = tok.TInt - tk.Int = int64(v) - _, err = sink.Step(tk) + _, err := sink.Step(tk) return err case datamodel.Kind_Float: v, err := n.AsFloat() diff --git a/codec/dagcbor/roundtrip_test.go b/codec/dagcbor/roundtrip_test.go index babce5e5..d897611c 100644 --- a/codec/dagcbor/roundtrip_test.go +++ b/codec/dagcbor/roundtrip_test.go @@ -3,12 +3,15 @@ package dagcbor import ( "bytes" "crypto/rand" + "encoding/hex" + "math" "strings" "testing" qt "github.com/frankban/quicktest" cid "github.com/ipfs/go-cid" + "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/fluent" cidlink "github.com/ipld/go-ipld-prime/linking/cid" "github.com/ipld/go-ipld-prime/node/basicnode" @@ -115,3 +118,65 @@ func TestRoundtripLinksAndBytes(t *testing.T) { reconstructed := nb.Build() qt.Check(t, reconstructed, nodetests.NodeContentEquals, linkByteNode) } + +func TestInts(t *testing.T) { + data := []struct { + name string + hex string + value uint64 + intValue int64 + intErr string + decodeErr string + }{ + {"max uint64", "1bffffffffffffffff", math.MaxUint64, 0, "unsigned integer out of range of int64 type", ""}, + {"max int64", "1b7fffffffffffffff", math.MaxInt64, math.MaxInt64, "", ""}, + {"1", "01", 1, 1, "", ""}, + {"0", "00", 0, 0, "", ""}, + {"-1", "20", 0, -1, "", ""}, + {"min int64", "3b7fffffffffffffff", 0, math.MinInt64, "", ""}, + {"~min uint64", "3bfffffffffffffffe", 0, 0, "", "cbor: negative integer out of rage of int64 type"}, + // TODO: 3bffffffffffffffff isn't properly handled by refmt, it's coerced to zero + // MaxUint64 gets overflowed here: https://github.com/polydawn/refmt/blob/30ac6d18308e584ca6a2e74ba81475559db94c5f/cbor/cborDecoderTerminals.go#L75 + } + + for _, td := range data { + t.Run(td.name, func(t *testing.T) { + buf, err := hex.DecodeString(td.hex) // max uint64 + qt.Assert(t, err, qt.IsNil) + nb := basicnode.Prototype.Any.NewBuilder() + err = Decode(nb, bytes.NewReader(buf)) + if td.decodeErr != "" { + qt.Assert(t, err, qt.IsNotNil) + qt.Assert(t, err.Error(), qt.Equals, td.decodeErr) + return + } + qt.Assert(t, err, qt.IsNil) + n := nb.Build() + + ii, err := n.AsInt() + if td.intErr != "" { + qt.Assert(t, err.Error(), qt.Equals, td.intErr) + } else { + qt.Assert(t, err, qt.IsNil) + qt.Assert(t, ii, qt.Equals, int64(td.intValue)) + } + + // if the number is outside of the positive int64 range, we should be able + // to access it as a UintNode and be able to access the full int64 range + uin, ok := n.(datamodel.UintNode) + if td.value <= math.MaxInt64 { + qt.Assert(t, ok, qt.IsFalse) + } else { + qt.Assert(t, ok, qt.IsTrue) + val, err := uin.AsUint() + qt.Assert(t, err, qt.IsNil) + qt.Assert(t, val, qt.Equals, uint64(td.value)) + } + + var byts bytes.Buffer + err = Encode(n, &byts) + qt.Assert(t, err, qt.IsNil) + qt.Assert(t, hex.EncodeToString(byts.Bytes()), qt.Equals, td.hex) + }) + } +} diff --git a/codec/dagcbor/unmarshal.go b/codec/dagcbor/unmarshal.go index 34912ad1..37d72cc7 100644 --- a/codec/dagcbor/unmarshal.go +++ b/codec/dagcbor/unmarshal.go @@ -13,6 +13,7 @@ import ( "github.com/ipld/go-ipld-prime/datamodel" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" ) var ( @@ -275,7 +276,12 @@ func unmarshal2(na datamodel.NodeAssembler, tokSrc shared.TokenSource, tk *tok.T if *gas < 0 { return ErrAllocationBudgetExceeded } - return na.AssignInt(int64(tk.Uint)) // FIXME overflow check + // note that this pushes any overflow errors up the stack when AsInt() may + // be called on a UintNode that is too large to cast to an int64 + if tk.Uint > math.MaxInt64 { + return na.AssignNode(basicnode.NewUint(tk.Uint)) + } + return na.AssignInt(int64(tk.Uint)) case tok.TFloat64: *gas -= 1 if *gas < 0 { diff --git a/datamodel/node.go b/datamodel/node.go index 30ae6bc8..625f472d 100644 --- a/datamodel/node.go +++ b/datamodel/node.go @@ -167,6 +167,21 @@ type Node interface { Prototype() NodePrototype } +// UintNode is an optional interface that can be used to represent an Int node +// that provides access to the full uint64 range. +// +// EXPERIMENTAL: this API is experimental and may be changed or removed in a +// future use. A future iteration may replace this with a BigInt interface to +// access a larger range of integers that may be enabled by alternative codecs. +type UintNode interface { + Node + + // AsUint returns a uint64 representing the underlying integer if possible. + // This may return an error if the Node represents a negative integer that + // cannot be represented as a uint64. + AsUint() (uint64, error) +} + // LargeBytesNode is an optional interface extending a Bytes node that allows its // contents to be accessed through an io.ReadSeeker instead of a []byte slice. Use of // an io.Reader is encouraged, as it allows for streaming large byte slices diff --git a/node/basicnode/int.go b/node/basicnode/int.go index 5d330226..3ef4653d 100644 --- a/node/basicnode/int.go +++ b/node/basicnode/int.go @@ -1,26 +1,40 @@ package basicnode import ( + "fmt" + "math" + "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/node/mixins" ) var ( _ datamodel.Node = plainInt(0) + _ datamodel.Node = plainUint(0) + _ datamodel.UintNode = plainUint(0) _ datamodel.NodePrototype = Prototype__Int{} _ datamodel.NodeBuilder = &plainInt__Builder{} _ datamodel.NodeAssembler = &plainInt__Assembler{} ) func NewInt(value int64) datamodel.Node { - v := plainInt(value) - return &v + return plainInt(value) +} + +// NewUint creates a new uint64-backed Node which will behave as a plain Int +// node but also conforms to the datamodel.UintNode interface which can access +// the full uint64 range. +// +// EXPERIMENTAL: this API is experimental and may be changed or removed in a +// future release. +func NewUint(value uint64) datamodel.Node { + return plainUint(value) } // plainInt is a simple boxed int that complies with datamodel.Node. type plainInt int64 -// -- Node interface methods --> +// -- Node interface methods for plainInt --> func (plainInt) Kind() datamodel.Kind { return datamodel.Kind_Int @@ -74,6 +88,74 @@ func (plainInt) Prototype() datamodel.NodePrototype { return Prototype__Int{} } +// plainUint is a simple boxed uint64 that complies with datamodel.Node, +// allowing representation of the uint64 range above the int64 maximum via the +// UintNode interface +type plainUint uint64 + +// -- Node interface methods for plainUint --> + +func (plainUint) Kind() datamodel.Kind { + return datamodel.Kind_Int +} +func (plainUint) LookupByString(string) (datamodel.Node, error) { + return mixins.Int{TypeName: "int"}.LookupByString("") +} +func (plainUint) LookupByNode(key datamodel.Node) (datamodel.Node, error) { + return mixins.Int{TypeName: "int"}.LookupByNode(nil) +} +func (plainUint) LookupByIndex(idx int64) (datamodel.Node, error) { + return mixins.Int{TypeName: "int"}.LookupByIndex(0) +} +func (plainUint) LookupBySegment(seg datamodel.PathSegment) (datamodel.Node, error) { + return mixins.Int{TypeName: "int"}.LookupBySegment(seg) +} +func (plainUint) MapIterator() datamodel.MapIterator { + return nil +} +func (plainUint) ListIterator() datamodel.ListIterator { + return nil +} +func (plainUint) Length() int64 { + return -1 +} +func (plainUint) IsAbsent() bool { + return false +} +func (plainUint) IsNull() bool { + return false +} +func (plainUint) AsBool() (bool, error) { + return mixins.Int{TypeName: "int"}.AsBool() +} +func (n plainUint) AsInt() (int64, error) { + if uint64(n) > uint64(math.MaxInt64) { + return -1, fmt.Errorf("unsigned integer out of range of int64 type") + } + return int64(n), nil +} +func (plainUint) AsFloat() (float64, error) { + return mixins.Int{TypeName: "int"}.AsFloat() +} +func (plainUint) AsString() (string, error) { + return mixins.Int{TypeName: "int"}.AsString() +} +func (plainUint) AsBytes() ([]byte, error) { + return mixins.Int{TypeName: "int"}.AsBytes() +} +func (plainUint) AsLink() (datamodel.Link, error) { + return mixins.Int{TypeName: "int"}.AsLink() +} +func (plainUint) Prototype() datamodel.NodePrototype { + return Prototype__Int{} +} + +// allows plainUint to conform to the plainUint interface + +func (n plainUint) AsUint() (uint64, error) { + return uint64(n), nil +} + // -- NodePrototype --> type Prototype__Int struct{}