diff --git a/errors.go b/errors.go index fbb68e81..4c5b1373 100644 --- a/errors.go +++ b/errors.go @@ -2,10 +2,10 @@ package toml import ( "fmt" + "reflect" "strconv" "strings" - "github.com/pelletier/go-toml/v2/internal/danger" "github.com/pelletier/go-toml/v2/unstable" ) @@ -58,14 +58,14 @@ func (s *StrictMissingError) String() string { // // Implements errors.Join() interface. func (s *StrictMissingError) Unwrap() []error { - errs := make([]error, 0, len(s.Errors)) + errs := make([]error, len(s.Errors)) for i := range s.Errors { - errs = append(errs, &s.Errors[i]) + errs[i] = &s.Errors[i] } return errs } -// Key is a slice of strings that represents a path to a value in a TOML document. +// Key represents a TOML key as a sequence of key parts. type Key []string // Error returns the error message contained in the DecodeError. @@ -93,12 +93,14 @@ func (e *DecodeError) Key() Key { // wrapDecodeError creates a DecodeError referencing a highlighted // range of bytes from document. // -// Highlight needs to be a sub-slice of document, or this function panics. +// highlight needs to be a sub-slice of document, or this function panics. // // The function copies all bytes used in DecodeError, so that document and // highlight can be freely deallocated. +// +//nolint:funlen func wrapDecodeError(document []byte, de *unstable.ParserError) *DecodeError { - offset := danger.SubsliceOffset(document, de.Highlight) + offset := subsliceOffset(document, de.Highlight) errMessage := de.Error() errLine, errColumn := positionAtEnd(document[:offset]) @@ -258,5 +260,24 @@ func positionAtEnd(b []byte) (row int, column int) { } } - return row, column + return +} + +// subsliceOffset returns the byte offset of subslice within data. +// subslice must share the same backing array as data. +func subsliceOffset(data []byte, subslice []byte) int { + if len(subslice) == 0 { + return 0 + } + + // Use reflect to get the data pointers of both slices. + // This is safe because we're only reading the pointer values for comparison. + dataPtr := reflect.ValueOf(data).Pointer() + subPtr := reflect.ValueOf(subslice).Pointer() + + offset := int(subPtr - dataPtr) + if offset < 0 || offset > len(data) { + panic("subslice is not within data") + } + return offset } diff --git a/errors_test.go b/errors_test.go index 213e494c..73b7bdc1 100644 --- a/errors_test.go +++ b/errors_test.go @@ -11,6 +11,7 @@ import ( "github.com/pelletier/go-toml/v2/unstable" ) +//nolint:funlen func TestDecodeError(t *testing.T) { examples := []struct { desc string @@ -201,6 +202,84 @@ func TestDecodeError_Accessors(t *testing.T) { assert.Equal(t, "bar", e.String()) } +func TestDecodeError_DuplicateContent(t *testing.T) { + // This test verifies that when the same content appears multiple times + // in the document, the error correctly points to the actual location + // of the error, not the first occurrence of the content. + // + // The document has "1__2" on line 1 and "3__4" on line 2. + // Both have "__" which is invalid, but we want to ensure errors + // on line 2 report line 2, not line 1. + + doc := `a = 1 +b = 3__4` + + var v map[string]int + err := Unmarshal([]byte(doc), &v) + + var derr *DecodeError + if !errors.As(err, &derr) { + t.Fatal("error not in expected format") + } + + row, col := derr.Position() + // The error should be on line 2 where "3__4" is + if row != 2 { + t.Errorf("expected error on row 2, got row %d", row) + } + // Column should point to the "__" part (after "3") + if col < 5 { + t.Errorf("expected error at column >= 5, got column %d", col) + } +} + +func TestDecodeError_Position(t *testing.T) { + // Test that error positions are correctly reported for various error locations + examples := []struct { + name string + doc string + expectedRow int + minCol int + }{ + { + name: "error on first line", + doc: `a = 1__2`, + expectedRow: 1, + minCol: 5, + }, + { + name: "error on second line", + doc: "a = 1\nb = 2__3", + expectedRow: 2, + minCol: 5, + }, + { + name: "error on third line", + doc: "a = 1\nb = 2\nc = 3__4", + expectedRow: 3, + minCol: 5, + }, + } + + for _, e := range examples { + t.Run(e.name, func(t *testing.T) { + var v map[string]int + err := Unmarshal([]byte(e.doc), &v) + + var derr *DecodeError + if !errors.As(err, &derr) { + t.Fatal("error not in expected format") + } + + row, col := derr.Position() + assert.Equal(t, e.expectedRow, row) + if col < e.minCol { + t.Errorf("expected column >= %d, got %d", e.minCol, col) + } + }) + } +} + func TestStrictErrorUnwrap(t *testing.T) { fo := bytes.NewBufferString(` Missing = 1 diff --git a/internal/danger/danger.go b/internal/danger/danger.go deleted file mode 100644 index c7ac8444..00000000 --- a/internal/danger/danger.go +++ /dev/null @@ -1,64 +0,0 @@ -// Package danger provides optimized unsafe functions. -package danger - -import ( - "fmt" - "unsafe" -) - -const maxInt = uintptr(int(^uint(0) >> 1)) - -func SubsliceOffset(data []byte, subslice []byte) int { - datap := uintptr(unsafe.Pointer(unsafe.SliceData(data))) // #nosec G103 - hlp := uintptr(unsafe.Pointer(unsafe.SliceData(subslice))) // #nosec G103 - - if hlp < datap { - panic(fmt.Errorf("subslice address (%d) is before data address (%d)", hlp, datap)) - } - offset := hlp - datap - - if offset > maxInt { - panic(fmt.Errorf("slice offset larger than int (%d)", offset)) - } - - intoffset := int(offset) - - if intoffset > len(data) { - panic(fmt.Errorf("slice offset (%d) is farther than data length (%d)", intoffset, len(data))) - } - - if intoffset+len(subslice) > len(data) { - panic(fmt.Errorf("slice ends (%d+%d) is farther than data length (%d)", intoffset, len(subslice), len(data))) - } - - return intoffset -} - -func BytesRange(start []byte, end []byte) []byte { - if start == nil || end == nil { - panic("cannot call BytesRange with nil") - } - - startp := uintptr(unsafe.Pointer(unsafe.SliceData(start))) // #nosec G103 - endp := uintptr(unsafe.Pointer(unsafe.SliceData(end))) // #nosec G103 - - if startp > endp { - panic(fmt.Errorf("start pointer address (%d) is after end pointer address (%d)", startp, endp)) - } - - l := len(start) - endLen := int(endp-startp) + len(end) - if endLen > l { - l = endLen - } - - if l > cap(start) { - panic("range length is larger than capacity") - } - - return start[:l] -} - -func Stride(ptr unsafe.Pointer, size uintptr, offset int) unsafe.Pointer { - return unsafe.Add(ptr, size*uintptr(offset)) -} diff --git a/internal/danger/danger_test.go b/internal/danger/danger_test.go deleted file mode 100644 index 6569cdba..00000000 --- a/internal/danger/danger_test.go +++ /dev/null @@ -1,176 +0,0 @@ -package danger_test - -import ( - "testing" - "unsafe" - - "github.com/pelletier/go-toml/v2/internal/assert" - "github.com/pelletier/go-toml/v2/internal/danger" -) - -func TestSubsliceOffsetValid(t *testing.T) { - examples := []struct { - desc string - test func() ([]byte, []byte) - offset int - }{ - { - desc: "simple", - test: func() ([]byte, []byte) { - data := []byte("hello") - return data, data[1:] - }, - offset: 1, - }, - } - - for _, e := range examples { - t.Run(e.desc, func(t *testing.T) { - d, s := e.test() - offset := danger.SubsliceOffset(d, s) - assert.Equal(t, e.offset, offset) - }) - } -} - -func TestSubsliceOffsetInvalid(t *testing.T) { - examples := []struct { - desc string - test func() ([]byte, []byte) - }{ - { - desc: "unrelated arrays", - test: func() ([]byte, []byte) { - return []byte("one"), []byte("two") - }, - }, - { - desc: "slice starts before data", - test: func() ([]byte, []byte) { - full := []byte("hello world") - return full[5:], full[1:] - }, - }, - { - desc: "slice starts after data", - test: func() ([]byte, []byte) { - full := []byte("hello world") - return full[:3], full[5:] - }, - }, - { - desc: "slice ends after data", - test: func() ([]byte, []byte) { - full := []byte("hello world") - return full[:5], full[3:8] - }, - }, - } - - for _, e := range examples { - t.Run(e.desc, func(t *testing.T) { - d, s := e.test() - assert.Panics(t, func() { - danger.SubsliceOffset(d, s) - }) - }) - } -} - -func TestStride(t *testing.T) { - a := []byte{1, 2, 3, 4} - x := &a[1] - n := (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), 1)) - assert.Equal(t, &a[2], n) - n = (*byte)(danger.Stride(unsafe.Pointer(x), unsafe.Sizeof(byte(0)), -1)) - assert.Equal(t, &a[0], n) -} - -func TestBytesRange(t *testing.T) { - type fn = func() ([]byte, []byte) - examples := []struct { - desc string - test fn - expected []byte - }{ - { - desc: "simple", - test: func() ([]byte, []byte) { - full := []byte("hello world") - return full[1:3], full[6:8] - }, - expected: []byte("ello wo"), - }, - { - desc: "full", - test: func() ([]byte, []byte) { - full := []byte("hello world") - return full[0:1], full[len(full)-1:] - }, - expected: []byte("hello world"), - }, - { - desc: "end before start", - test: func() ([]byte, []byte) { - full := []byte("hello world") - return full[len(full)-1:], full[0:1] - }, - }, - { - desc: "nils", - test: func() ([]byte, []byte) { - return nil, nil - }, - }, - { - desc: "nils start", - test: func() ([]byte, []byte) { - return nil, []byte("foo") - }, - }, - { - desc: "nils end", - test: func() ([]byte, []byte) { - return []byte("foo"), nil - }, - }, - { - desc: "start is end", - test: func() ([]byte, []byte) { - full := []byte("hello world") - return full[1:3], full[1:3] - }, - expected: []byte("el"), - }, - { - desc: "end contained in start", - test: func() ([]byte, []byte) { - full := []byte("hello world") - return full[1:7], full[2:4] - }, - expected: []byte("ello w"), - }, - { - desc: "different backing arrays", - test: func() ([]byte, []byte) { - one := []byte("hello world") - two := []byte("hello world") - return one, two - }, - }, - } - - for _, e := range examples { - t.Run(e.desc, func(t *testing.T) { - start, end := e.test() - if e.expected == nil { - assert.Panics(t, func() { - danger.BytesRange(start, end) - }) - } else { - res := danger.BytesRange(start, end) - assert.Equal(t, e.expected, res) - } - }) - } -} diff --git a/internal/danger/typeid.go b/internal/danger/typeid.go deleted file mode 100644 index 4de065fa..00000000 --- a/internal/danger/typeid.go +++ /dev/null @@ -1,23 +0,0 @@ -package danger - -import ( - "reflect" - "unsafe" -) - -// TypeID is used as key in encoder and decoder caches to enable using -// the optimize runtime.mapaccess2_fast64 function instead of the more -// expensive lookup if we were to use reflect.Type as map key. -// -// typeID holds the pointer to the reflect.Type value, which is unique -// in the program. -// -// https://github.com/segmentio/encoding/blob/master/json/codec.go#L59-L61 -type TypeID unsafe.Pointer - -func MakeTypeID(t reflect.Type) TypeID { - // reflect.Type has the fields: - // typ unsafe.Pointer - // ptr unsafe.Pointer - return TypeID((*[2]unsafe.Pointer)(unsafe.Pointer(&t))[1]) // #nosec G103 -} diff --git a/internal/tracker/seen_test.go b/internal/tracker/seen_test.go index 086b849a..4543f4d8 100644 --- a/internal/tracker/seen_test.go +++ b/internal/tracker/seen_test.go @@ -1,8 +1,8 @@ package tracker import ( + "reflect" "testing" - "unsafe" "github.com/pelletier/go-toml/v2/internal/assert" ) @@ -12,9 +12,10 @@ func TestEntrySize(t *testing.T) { // performance of unmarshaling documents. Should only be increased with care // and a very good reason. maxExpectedEntrySize := 48 + entrySize := int(reflect.TypeOf(entry{}).Size()) assert.True(t, - int(unsafe.Sizeof(entry{})) <= maxExpectedEntrySize, + entrySize <= maxExpectedEntrySize, "Expected entry to be less than or equal to %d, got: %d", - maxExpectedEntrySize, int(unsafe.Sizeof(entry{})), + maxExpectedEntrySize, entrySize, ) } diff --git a/strict.go b/strict.go index 802e7e4d..2a147c02 100644 --- a/strict.go +++ b/strict.go @@ -1,7 +1,6 @@ package toml import ( - "github.com/pelletier/go-toml/v2/internal/danger" "github.com/pelletier/go-toml/v2/internal/tracker" "github.com/pelletier/go-toml/v2/unstable" ) @@ -13,6 +12,9 @@ type strict struct { key tracker.KeyTracker missing []unstable.ParserError + + // Reference to the document for computing key ranges. + doc []byte } func (s *strict) EnterTable(node *unstable.Node) { @@ -53,7 +55,7 @@ func (s *strict) MissingTable(node *unstable.Node) { } s.missing = append(s.missing, unstable.ParserError{ - Highlight: keyLocation(node), + Highlight: s.keyLocation(node), Message: "missing table", Key: s.key.Key(), }) @@ -65,7 +67,7 @@ func (s *strict) MissingField(node *unstable.Node) { } s.missing = append(s.missing, unstable.ParserError{ - Highlight: keyLocation(node), + Highlight: s.keyLocation(node), Message: "missing field", Key: s.key.Key(), }) @@ -88,7 +90,7 @@ func (s *strict) Error(doc []byte) error { return err } -func keyLocation(node *unstable.Node) []byte { +func (s *strict) keyLocation(node *unstable.Node) []byte { k := node.Key() hasOne := k.Next() @@ -96,12 +98,17 @@ func keyLocation(node *unstable.Node) []byte { panic("should not be called with empty key") } - start := k.Node().Data - end := k.Node().Data + // Get the range from the first key to the last key. + firstRaw := k.Node().Raw + lastRaw := firstRaw for k.Next() { - end = k.Node().Data + lastRaw = k.Node().Raw } - return danger.BytesRange(start, end) + // Compute the slice from the document using the ranges. + start := firstRaw.Offset + end := lastRaw.Offset + lastRaw.Length + + return s.doc[start:end] } diff --git a/unmarshaler.go b/unmarshaler.go index 10022f46..afaa48d5 100644 --- a/unmarshaler.go +++ b/unmarshaler.go @@ -12,7 +12,6 @@ import ( "sync/atomic" "time" - "github.com/pelletier/go-toml/v2/internal/danger" "github.com/pelletier/go-toml/v2/internal/tracker" "github.com/pelletier/go-toml/v2/unstable" ) @@ -123,6 +122,7 @@ func (d *Decoder) Decode(v interface{}) error { dec := decoder{ strict: strict{ Enabled: d.strict, + doc: b, }, unmarshalerInterface: d.unmarshalerInterface, } @@ -1300,13 +1300,13 @@ func fieldByIndex(v reflect.Value, path []int) reflect.Value { type fieldPathsMap = map[string][]int -var globalFieldPathsCache atomic.Value // map[danger.TypeID]fieldPathsMap +var globalFieldPathsCache atomic.Value // map[reflect.Type]fieldPathsMap func structFieldPath(v reflect.Value, name string) ([]int, bool) { t := v.Type() - cache, _ := globalFieldPathsCache.Load().(map[danger.TypeID]fieldPathsMap) - fieldPaths, ok := cache[danger.MakeTypeID(t)] + cache, _ := globalFieldPathsCache.Load().(map[reflect.Type]fieldPathsMap) + fieldPaths, ok := cache[t] if !ok { fieldPaths = map[string][]int{} @@ -1317,8 +1317,8 @@ func structFieldPath(v reflect.Value, name string) ([]int, bool) { fieldPaths[strings.ToLower(name)] = path }) - newCache := make(map[danger.TypeID]fieldPathsMap, len(cache)+1) - newCache[danger.MakeTypeID(t)] = fieldPaths + newCache := make(map[reflect.Type]fieldPathsMap, len(cache)+1) + newCache[t] = fieldPaths for k, v := range cache { newCache[k] = v } diff --git a/unstable/ast.go b/unstable/ast.go index a22e9e1b..34ef628c 100644 --- a/unstable/ast.go +++ b/unstable/ast.go @@ -1,10 +1,8 @@ package unstable import ( + "errors" "fmt" - "unsafe" - - "github.com/pelletier/go-toml/v2/internal/danger" ) // Iterator over a sequence of nodes. @@ -19,30 +17,39 @@ import ( // // do something with n // } type Iterator struct { + nodes *[]Node + idx int32 started bool - node *Node } // Next moves the iterator forward and returns true if points to a // node, false otherwise. func (c *Iterator) Next() bool { + if c.nodes == nil { + return false + } if !c.started { c.started = true - } else if c.node.Valid() { - c.node = c.node.Next() + } else if c.idx >= 0 { + c.idx = (*c.nodes)[c.idx].next } - return c.node.Valid() + return c.idx >= 0 && int(c.idx) < len(*c.nodes) } // IsLast returns true if the current node of the iterator is the last // one. Subsequent calls to Next() will return false. func (c *Iterator) IsLast() bool { - return c.node.next == 0 + return c.nodes == nil || c.idx < 0 || (*c.nodes)[c.idx].next < 0 } // Node returns a pointer to the node pointed at by the iterator. func (c *Iterator) Node() *Node { - return c.node + if c.nodes == nil || c.idx < 0 { + return nil + } + n := &(*c.nodes)[c.idx] + n.nodes = c.nodes + return n } // Node in a TOML expression AST. @@ -65,11 +72,12 @@ type Node struct { Raw Range // Raw bytes from the input. Data []byte // Node value (either allocated or referencing the input). - // References to other nodes, as offsets in the backing array - // from this node. References can go backward, so those can be - // negative. - next int // 0 if last element - child int // 0 if no child + // Absolute indices into the backing nodes slice. -1 means none. + next int32 + child int32 + + // Reference to the backing nodes slice for navigation. + nodes *[]Node } // Range of bytes in the document. @@ -80,24 +88,24 @@ type Range struct { // Next returns a pointer to the next node, or nil if there is no next node. func (n *Node) Next() *Node { - if n.next == 0 { + if n.next < 0 { return nil } - ptr := unsafe.Pointer(n) // #nosec G103 - size := unsafe.Sizeof(Node{}) - return (*Node)(danger.Stride(ptr, size, n.next)) + next := &(*n.nodes)[n.next] + next.nodes = n.nodes + return next } // Child returns a pointer to the first child node of this node. Other children // can be accessed calling Next on the first child. Returns nil if this Node // has no child. func (n *Node) Child() *Node { - if n.child == 0 { + if n.child < 0 { return nil } - ptr := unsafe.Pointer(n) // #nosec G103 - size := unsafe.Sizeof(Node{}) - return (*Node)(danger.Stride(ptr, size, n.child)) + child := &(*n.nodes)[n.child] + child.nodes = n.nodes + return child } // Valid returns true if the node's kind is set (not to Invalid). @@ -111,15 +119,16 @@ func (n *Node) Valid() bool { func (n *Node) Key() Iterator { switch n.Kind { case KeyValue: - value := n.Child() - if !value.Valid() { - panic("KeyValue should have at least two children") + child := n.child + if child < 0 { + panic(errors.New("KeyValue should have at least two children")) } - return Iterator{node: value.Next()} + valueNode := &(*n.nodes)[child] + return Iterator{nodes: n.nodes, idx: valueNode.next} case Table, ArrayTable: - return Iterator{node: n.Child()} + return Iterator{nodes: n.nodes, idx: n.child} default: - panic(fmt.Errorf("key is not supported on a %s", n.Kind)) + panic(fmt.Errorf("Key() is not supported on a %s", n.Kind)) } } @@ -132,5 +141,5 @@ func (n *Node) Value() *Node { // Children returns an iterator over a node's children. func (n *Node) Children() Iterator { - return Iterator{node: n.Child()} + return Iterator{nodes: n.nodes, idx: n.child} } diff --git a/unstable/builder.go b/unstable/builder.go index 9538e30d..e4354985 100644 --- a/unstable/builder.go +++ b/unstable/builder.go @@ -7,15 +7,6 @@ type root struct { nodes []Node } -// Iterator over the top level nodes. -func (r *root) Iterator() Iterator { - it := Iterator{} - if len(r.nodes) > 0 { - it.node = &r.nodes[0] - } - return it -} - func (r *root) at(idx reference) *Node { return &r.nodes[idx] } @@ -33,12 +24,10 @@ type builder struct { lastIdx int } -func (b *builder) Tree() *root { - return &b.tree -} - func (b *builder) NodeAt(ref reference) *Node { - return b.tree.at(ref) + n := b.tree.at(ref) + n.nodes = &b.tree.nodes + return n } func (b *builder) Reset() { @@ -48,24 +37,28 @@ func (b *builder) Reset() { func (b *builder) Push(n Node) reference { b.lastIdx = len(b.tree.nodes) + n.next = -1 + n.child = -1 b.tree.nodes = append(b.tree.nodes, n) return reference(b.lastIdx) } func (b *builder) PushAndChain(n Node) reference { newIdx := len(b.tree.nodes) + n.next = -1 + n.child = -1 b.tree.nodes = append(b.tree.nodes, n) if b.lastIdx >= 0 { - b.tree.nodes[b.lastIdx].next = newIdx - b.lastIdx + b.tree.nodes[b.lastIdx].next = int32(newIdx) //nolint:gosec // TOML ASTs are small } b.lastIdx = newIdx return reference(b.lastIdx) } func (b *builder) AttachChild(parent reference, child reference) { - b.tree.nodes[parent].child = int(child) - int(parent) + b.tree.nodes[parent].child = int32(child) //nolint:gosec // TOML ASTs are small } func (b *builder) Chain(from reference, to reference) { - b.tree.nodes[from].next = int(to) - int(from) + b.tree.nodes[from].next = int32(to) //nolint:gosec // TOML ASTs are small } diff --git a/unstable/parser.go b/unstable/parser.go index b17a32cd..d48e07f3 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -6,7 +6,6 @@ import ( "unicode" "github.com/pelletier/go-toml/v2/internal/characters" - "github.com/pelletier/go-toml/v2/internal/danger" ) // ParserError describes an error relative to the content of the document. @@ -70,11 +69,26 @@ func (p *Parser) Data() []byte { // panics. func (p *Parser) Range(b []byte) Range { return Range{ - Offset: uint32(danger.SubsliceOffset(p.data, b)), // #nosec G115 - Length: uint32(len(b)), // #nosec G115 + Offset: uint32(p.subsliceOffset(b)), //nolint:gosec // TOML documents are small + Length: uint32(len(b)), //nolint:gosec // TOML documents are small } } +// rangeOfToken computes the Range of a token given the remaining bytes after the token. +// This is used when the token was extracted from the beginning of some position, +// and 'rest' is what remains after the token. +func (p *Parser) rangeOfToken(token, rest []byte) Range { + offset := len(p.data) - len(token) - len(rest) + return Range{Offset: uint32(offset), Length: uint32(len(token))} //nolint:gosec // TOML documents are small +} + +// subsliceOffset returns the byte offset of subslice b within p.data. +// b must be a suffix (tail) of p.data. +func (p *Parser) subsliceOffset(b []byte) int { + // b is a suffix of p.data, so its offset is len(p.data) - len(b) + return len(p.data) - len(b) +} + // Raw returns the slice corresponding to the bytes in the given range. func (p *Parser) Raw(raw Range) []byte { return p.data[raw.Offset : raw.Offset+raw.Length] @@ -158,9 +172,17 @@ type Shape struct { End Position } -func (p *Parser) position(b []byte) Position { - offset := danger.SubsliceOffset(p.data, b) +// Shape returns the shape of the given range in the input. Will +// panic if the range is not a subslice of the input. +func (p *Parser) Shape(r Range) Shape { + return Shape{ + Start: p.positionAt(int(r.Offset)), + End: p.positionAt(int(r.Offset + r.Length)), + } +} +// positionAt returns the position at the given byte offset in the document. +func (p *Parser) positionAt(offset int) Position { lead := p.data[:offset] return Position{ @@ -170,16 +192,6 @@ func (p *Parser) position(b []byte) Position { } } -// Shape returns the shape of the given range in the input. Will -// panic if the range is not a subslice of the input. -func (p *Parser) Shape(r Range) Shape { - raw := p.Raw(r) - return Shape{ - Start: p.position(raw), - End: p.position(raw[r.Length:]), - } -} - func (p *Parser) parseNewline(b []byte) ([]byte, error) { if b[0] == '\n' { return b[1:], nil @@ -199,7 +211,7 @@ func (p *Parser) parseComment(b []byte) (reference, []byte, error) { if p.KeepComments && err == nil { ref = p.builder.Push(Node{ Kind: Comment, - Raw: p.Range(data), + Raw: p.rangeOfToken(data, rest), Data: data, }) } @@ -351,6 +363,7 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { return ref, b, err } +//nolint:cyclop,funlen func (p *Parser) parseVal(b []byte) (reference, []byte, error) { // val = string / boolean / array / inline-table / date-time / float / integer ref := invalidReference @@ -375,7 +388,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) { if err == nil { ref = p.builder.Push(Node{ Kind: String, - Raw: p.Range(raw), + Raw: p.rangeOfToken(raw, b), Data: v, }) } @@ -393,7 +406,7 @@ func (p *Parser) parseVal(b []byte) (reference, []byte, error) { if err == nil { ref = p.builder.Push(Node{ Kind: String, - Raw: p.Range(raw), + Raw: p.rangeOfToken(raw, b), Data: v, }) } @@ -455,7 +468,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) { // inline-table-keyvals = keyval [ inline-table-sep inline-table-keyvals ] parent := p.builder.Push(Node{ Kind: InlineTable, - Raw: p.Range(b[:1]), + Raw: p.rangeOfToken(b[:1], b[1:]), }) first := true @@ -508,6 +521,7 @@ func (p *Parser) parseInlineTable(b []byte) (reference, []byte, error) { return parent, rest, err } +//nolint:funlen,cyclop func (p *Parser) parseValArray(b []byte) (reference, []byte, error) { // array = array-open [ array-values ] ws-comment-newline array-close // array-open = %x5B ; [ @@ -671,6 +685,7 @@ func (p *Parser) parseMultilineLiteralString(b []byte) ([]byte, []byte, []byte, return token, token[i : len(token)-3], rest, err } +//nolint:funlen,gocognit,cyclop func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, error) { // ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body // ml-basic-string-delim @@ -716,6 +731,7 @@ func (p *Parser) parseMultilineBasicString(b []byte) ([]byte, []byte, []byte, er for i < len(token)-3 { c := token[i] + //nolint:nestif if c == '\\' { // When the last non-whitespace character on a line is an unescaped \, // it will be trimmed along with all whitespace (including newlines) up @@ -817,7 +833,7 @@ func (p *Parser) parseKey(b []byte) (reference, []byte, error) { ref := p.builder.Push(Node{ Kind: Key, - Raw: p.Range(raw), + Raw: p.rangeOfToken(raw, b), Data: key, }) @@ -833,7 +849,7 @@ func (p *Parser) parseKey(b []byte) (reference, []byte, error) { p.builder.PushAndChain(Node{ Kind: Key, - Raw: p.Range(raw), + Raw: p.rangeOfToken(raw, b), Data: key, }) } else { @@ -865,6 +881,7 @@ func (p *Parser) parseSimpleKey(b []byte) (raw, key, rest []byte, err error) { } } +//nolint:funlen,cyclop func (p *Parser) parseBasicString(b []byte) ([]byte, []byte, []byte, error) { // basic-string = quotation-mark *basic-char quotation-mark // quotation-mark = %x22 ; " @@ -998,6 +1015,7 @@ func (p *Parser) parseWhitespace(b []byte) []byte { return rest } +//nolint:cyclop func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) { switch b[0] { case 'i': @@ -1008,7 +1026,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) return p.builder.Push(Node{ Kind: Float, Data: b[:3], - Raw: p.Range(b[:3]), + Raw: p.rangeOfToken(b[:3], b[3:]), }), b[3:], nil case 'n': if !scanFollowsNan(b) { @@ -1018,7 +1036,7 @@ func (p *Parser) parseIntOrFloatOrDateTime(b []byte) (reference, []byte, error) return p.builder.Push(Node{ Kind: Float, Data: b[:3], - Raw: p.Range(b[:3]), + Raw: p.rangeOfToken(b[:3], b[3:]), }), b[3:], nil case '+', '-': return p.scanIntOrFloat(b) @@ -1113,6 +1131,7 @@ byteLoop: }), b[i:], nil } +//nolint:funlen,gocognit,cyclop func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { i := 0 @@ -1142,7 +1161,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { return p.builder.Push(Node{ Kind: Integer, Data: b[:i], - Raw: p.Range(b[:i]), + Raw: p.rangeOfToken(b[:i], b[i:]), }), b[i:], nil } @@ -1166,7 +1185,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { return p.builder.Push(Node{ Kind: Float, Data: b[:i+3], - Raw: p.Range(b[:i+3]), + Raw: p.rangeOfToken(b[:i+3], b[i+3:]), }), b[i+3:], nil } @@ -1178,7 +1197,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { return p.builder.Push(Node{ Kind: Float, Data: b[:i+3], - Raw: p.Range(b[:i+3]), + Raw: p.rangeOfToken(b[:i+3], b[i+3:]), }), b[i+3:], nil } @@ -1201,7 +1220,7 @@ func (p *Parser) scanIntOrFloat(b []byte) (reference, []byte, error) { return p.builder.Push(Node{ Kind: kind, Data: b[:i], - Raw: p.Range(b[:i]), + Raw: p.rangeOfToken(b[:i], b[i:]), }), b[i:], nil } diff --git a/unstable/parser_test.go b/unstable/parser_test.go index 5edac37c..2f5f9ec6 100644 --- a/unstable/parser_test.go +++ b/unstable/parser_test.go @@ -196,6 +196,7 @@ func compareIterator(t *testing.T, expected []astNode, actual Iterator) { } } +//nolint:funlen func TestParser_AST(t *testing.T) { examples := []struct { desc string @@ -604,6 +605,74 @@ key5 = [ # Next to start of inline array. // 36:1->36:21 (804->824) | Comment [# After array table.] } +func TestIterator_IsLast(t *testing.T) { + // Test IsLast on an iterator with multiple elements using public Parser API + doc := `array = [1, 2, 3]` + p := Parser{} + p.Reset([]byte(doc)) + p.NextExpression() + + e := p.Expression() + arr := e.Value() // The array node + + it := arr.Children() + count := 0 + lastCount := 0 + for it.Next() { + count++ + if it.IsLast() { + lastCount++ + } + } + + assert.Equal(t, 3, count) + assert.Equal(t, 1, lastCount) +} + +func TestNodeChaining(t *testing.T) { + // Test that sibling nodes are correctly chained via Next() + // This exercises the internal PushAndChain functionality through public APIs + doc := `a.b.c = 1` + p := Parser{} + p.Reset([]byte(doc)) + p.NextExpression() + + e := p.Expression() + // KeyValue has children: value, then key parts (a, b, c) + keyIt := e.Key() + + // Collect all key parts by following the iterator + var keys []string + for keyIt.Next() { + keys = append(keys, string(keyIt.Node().Data)) + } + + assert.Equal(t, []string{"a", "b", "c"}, keys) +} + +func TestMultipleExpressions(t *testing.T) { + // Test parsing multiple top-level expressions + // This exercises root iteration through public APIs + doc := ` +key1 = "value1" +key2 = "value2" +key3 = "value3" +` + p := Parser{} + p.Reset([]byte(doc)) + + var keys []string + for p.NextExpression() { + e := p.Expression() + keyIt := e.Key() + keyIt.Next() + keys = append(keys, string(keyIt.Node().Data)) + } + + assert.NoError(t, p.Error()) + assert.Equal(t, []string{"key1", "key2", "key3"}, keys) +} + func ExampleParser() { doc := ` hello = "world"