diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 7a685cea..45e0423e 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -10,6 +10,7 @@ on: jobs: build: strategy: + fail-fast: false matrix: os: [ 'ubuntu-latest', 'windows-latest', 'macos-latest', 'macos-14' ] go: [ '1.24', '1.25' ] diff --git a/unmarshaler.go b/unmarshaler.go index b61a347f..e7db8128 100644 --- a/unmarshaler.go +++ b/unmarshaler.go @@ -56,13 +56,18 @@ func (d *Decoder) DisallowUnknownFields() *Decoder { // EnableUnmarshalerInterface allows to enable unmarshaler interface. // -// With this feature enabled, types implementing the unstable/Unmarshaler +// With this feature enabled, types implementing the unstable.Unmarshaler // interface can be decoded from any structure of the document. It allows types // that don't have a straightforward TOML representation to provide their own // decoding logic. // -// Currently, types can only decode from a single value. Tables and array tables -// are not supported. +// The UnmarshalTOML method receives raw TOML bytes: +// - For single values: the raw value bytes (e.g., `"hello"` for a string) +// - For tables: all key-value lines belonging to that table +// - For inline tables/arrays: the raw bytes of the inline structure +// +// The unstable.RawMessage type can be used to capture raw TOML bytes for +// later processing, similar to json.RawMessage. // // *Unstable:* This method does not follow the compatibility guarantees of // semver. It can be changed or removed without a new major version being @@ -599,18 +604,28 @@ func (d *decoder) handleArrayTablePart(key unstable.Iterator, v reflect.Value) ( // cannot handle it. func (d *decoder) handleTable(key unstable.Iterator, v reflect.Value) (reflect.Value, error) { if v.Kind() == reflect.Slice { - if v.Len() == 0 { - return reflect.Value{}, unstable.NewParserError(key.Node().Data, "cannot store a table in a slice") - } - elem := v.Index(v.Len() - 1) - x, err := d.handleTable(key, elem) - if err != nil { - return reflect.Value{}, err + // For non-empty slices, work with the last element + if v.Len() > 0 { + elem := v.Index(v.Len() - 1) + x, err := d.handleTable(key, elem) + if err != nil { + return reflect.Value{}, err + } + if x.IsValid() { + elem.Set(x) + } + return reflect.Value{}, nil } - if x.IsValid() { - elem.Set(x) + // Empty slice - check if it implements Unmarshaler (e.g., RawMessage) + // and we're at the end of the key path + if d.unmarshalerInterface && !key.Next() { + if v.CanAddr() && v.Addr().CanInterface() { + if outi, ok := v.Addr().Interface().(unstable.Unmarshaler); ok { + return d.handleKeyValuesUnmarshaler(outi) + } + } } - return reflect.Value{}, nil + return reflect.Value{}, unstable.NewParserError(key.Node().Data, "cannot store a table in a slice") } if key.Next() { // Still scoping the key @@ -624,6 +639,24 @@ func (d *decoder) handleTable(key unstable.Iterator, v reflect.Value) (reflect.V // Handle root expressions until the end of the document or the next // non-key-value. func (d *decoder) handleKeyValues(v reflect.Value) (reflect.Value, error) { + // Check if target implements Unmarshaler before processing key-values. + // This allows types to handle entire tables themselves. + if d.unmarshalerInterface { + vv := v + for vv.Kind() == reflect.Ptr { + if vv.IsNil() { + vv.Set(reflect.New(vv.Type().Elem())) + } + vv = vv.Elem() + } + if vv.CanAddr() && vv.Addr().CanInterface() { + if outi, ok := vv.Addr().Interface().(unstable.Unmarshaler); ok { + // Collect all key-value expressions for this table + return d.handleKeyValuesUnmarshaler(outi) + } + } + } + var rv reflect.Value for d.nextExpr() { expr := d.expr() @@ -653,6 +686,41 @@ func (d *decoder) handleKeyValues(v reflect.Value) (reflect.Value, error) { return rv, nil } +// handleKeyValuesUnmarshaler collects all key-value expressions for a table +// and passes them to the Unmarshaler as raw TOML bytes. +func (d *decoder) handleKeyValuesUnmarshaler(u unstable.Unmarshaler) (reflect.Value, error) { + // Collect raw bytes from all key-value expressions for this table. + // We use the Raw field on each KeyValue expression to preserve the + // original formatting (whitespace, quoting style, etc.) from the document. + var buf []byte + + for d.nextExpr() { + expr := d.expr() + if expr.Kind != unstable.KeyValue { + d.stashExpr() + break + } + + _, err := d.seen.CheckExpression(expr) + if err != nil { + return reflect.Value{}, err + } + + // Use the raw bytes from the original document to preserve formatting + if expr.Raw.Length > 0 { + raw := d.p.Raw(expr.Raw) + buf = append(buf, raw...) + } + buf = append(buf, '\n') + } + + if err := u.UnmarshalTOML(buf); err != nil { + return reflect.Value{}, err + } + + return reflect.Value{}, nil +} + type ( handlerFn func(key unstable.Iterator, v reflect.Value) (reflect.Value, error) valueMakerFn func() reflect.Value @@ -697,7 +765,8 @@ func (d *decoder) handleValue(value *unstable.Node, v reflect.Value) error { if d.unmarshalerInterface { if v.CanAddr() && v.Addr().CanInterface() { if outi, ok := v.Addr().Interface().(unstable.Unmarshaler); ok { - return outi.UnmarshalTOML(value) + // Pass raw bytes from the original document + return outi.UnmarshalTOML(d.p.Raw(value.Raw)) } } } @@ -1201,7 +1270,8 @@ func (d *decoder) handleKeyValuePart(key unstable.Iterator, value *unstable.Node if d.unmarshalerInterface { if v.CanAddr() && v.Addr().CanInterface() { if outi, ok := v.Addr().Interface().(unstable.Unmarshaler); ok { - return reflect.Value{}, outi.UnmarshalTOML(value) + // Pass raw bytes from the original document + return reflect.Value{}, outi.UnmarshalTOML(d.p.Raw(value.Raw)) } } } diff --git a/unmarshaler_test.go b/unmarshaler_test.go index 3e3b2a3a..9987722a 100644 --- a/unmarshaler_test.go +++ b/unmarshaler_test.go @@ -96,6 +96,132 @@ func ExampleUnmarshal() { // tags: [go toml] } +// pluginConfig demonstrates how to implement dynamic unmarshaling +// based on a "type" field. This pattern is useful for plugin systems +// or polymorphic configuration. +type pluginConfig struct { + Type string + Config any +} + +func (p *pluginConfig) UnmarshalTOML(data []byte) error { + // First, decode just the type field + var typeOnly struct { + Type string `toml:"type"` + } + if err := toml.Unmarshal(data, &typeOnly); err != nil { + return err + } + p.Type = typeOnly.Type + + // Now decode the config based on the type + switch typeOnly.Type { + case "database": + var cfg struct { + Type string `toml:"type"` + Host string `toml:"host"` + Port int `toml:"port"` + } + if err := toml.Unmarshal(data, &cfg); err != nil { + return err + } + p.Config = map[string]any{"host": cfg.Host, "port": cfg.Port} + case "cache": + var cfg struct { + Type string `toml:"type"` + TTL int `toml:"ttl"` + } + if err := toml.Unmarshal(data, &cfg); err != nil { + return err + } + p.Config = map[string]any{"ttl": cfg.TTL} + } + return nil +} + +// This example demonstrates dynamic unmarshaling based on a discriminator +// field. The pluginConfig type uses UnmarshalTOML to first read the "type" +// field, then decode the rest of the configuration based on that type. +// This pattern is useful for plugin systems or configuration that varies +// by type. +func ExampleDecoder_EnableUnmarshalerInterface_dynamicConfig() { + doc := ` +[[plugins]] +type = "database" +host = "localhost" +port = 5432 + +[[plugins]] +type = "cache" +ttl = 300 +` + type Config struct { + Plugins []pluginConfig `toml:"plugins"` + } + + var cfg Config + err := toml.NewDecoder(strings.NewReader(doc)). + EnableUnmarshalerInterface(). + Decode(&cfg) + if err != nil { + panic(err) + } + + for _, p := range cfg.Plugins { + fmt.Printf("type=%s config=%v\n", p.Type, p.Config) + } + // Output: + // type=database config=map[host:localhost port:5432] + // type=cache config=map[ttl:300] +} + +// This example demonstrates using RawMessage to capture raw TOML bytes +// for later processing. RawMessage is similar to json.RawMessage - it +// delays decoding so you can inspect the raw content or decode it +// differently based on context. +func ExampleDecoder_EnableUnmarshalerInterface_rawMessage() { + doc := ` +[plugin] +name = "example" +version = "1.0" +enabled = true +` + + type Config struct { + Plugin unstable.RawMessage `toml:"plugin"` + } + + var cfg Config + err := toml.NewDecoder(strings.NewReader(doc)). + EnableUnmarshalerInterface(). + Decode(&cfg) + if err != nil { + panic(err) + } + + // cfg.Plugin contains the raw TOML bytes + fmt.Printf("Raw TOML captured:\n%s", cfg.Plugin) + + // You can later decode it into a specific type + var plugin struct { + Name string `toml:"name"` + Version string `toml:"version"` + Enabled bool `toml:"enabled"` + } + if err := toml.Unmarshal(cfg.Plugin, &plugin); err != nil { + panic(err) + } + fmt.Printf("Decoded: name=%s version=%s enabled=%v\n", + plugin.Name, plugin.Version, plugin.Enabled) + + // Output: + // Raw TOML captured: + // name = "example" + // version = "1.0" + // enabled = true + // Decoded: name=example version=1.0 enabled=true +} + type badReader struct{} func (r *badReader) Read([]byte) (int, error) { @@ -3900,8 +4026,8 @@ type CustomUnmarshalerKey struct { A int64 } -func (k *CustomUnmarshalerKey) UnmarshalTOML(value *unstable.Node) error { - item, err := strconv.ParseInt(string(value.Data), 10, 64) +func (k *CustomUnmarshalerKey) UnmarshalTOML(data []byte) error { + item, err := strconv.ParseInt(string(data), 10, 64) if err != nil { return fmt.Errorf("error converting to int64, %w", err) } @@ -3989,7 +4115,7 @@ foo = "bar"`, type doc994 struct{} -func (d *doc994) UnmarshalTOML(*unstable.Node) error { +func (d *doc994) UnmarshalTOML([]byte) error { return errors.New("expected-error") } @@ -4012,8 +4138,8 @@ type doc994ok struct { S string } -func (d *doc994ok) UnmarshalTOML(value *unstable.Node) error { - d.S = string(value.Data) + " from unmarshaler" +func (d *doc994ok) UnmarshalTOML(data []byte) error { + d.S = string(data) + " from unmarshaler" return nil } @@ -4026,7 +4152,8 @@ func TestIssue994_OK(t *testing.T) { Decode(&d) assert.NoError(t, err) - assert.Equal(t, "bar from unmarshaler", d.S) + // With bytes-based interface, raw TOML bytes are passed including quotes + assert.Equal(t, "\"bar\" from unmarshaler", d.S) } func TestIssue995(t *testing.T) { @@ -4385,3 +4512,265 @@ func TestIssue1028(t *testing.T) { assert.Error(t, err) }) } + +// Tests for issue #873 - Bring back toml.Unmarshaler for tables and arrays + +type customTable873 struct { + Keys []string + Values map[string]string +} + +func (c *customTable873) UnmarshalTOML(data []byte) error { + c.Keys = []string{} + c.Values = make(map[string]string) + + // Parse the raw TOML bytes into a map to extract keys in order + // For this test, we use a simple line-by-line parser to preserve order + lines := bytes.Split(data, []byte{'\n'}) + for _, line := range lines { + line = bytes.TrimSpace(line) + if len(line) == 0 { + continue + } + // Skip table headers + if line[0] == '[' { + continue + } + // Parse key = value + eqIdx := bytes.Index(line, []byte{'='}) + if eqIdx < 0 { + continue + } + key := string(bytes.TrimSpace(line[:eqIdx])) + // Remove quotes from quoted keys + if len(key) >= 2 && key[0] == '"' && key[len(key)-1] == '"' { + key = key[1 : len(key)-1] + } + valueBytes := bytes.TrimSpace(line[eqIdx+1:]) + // Remove quotes from string values + if len(valueBytes) >= 2 && valueBytes[0] == '"' && valueBytes[len(valueBytes)-1] == '"' { + valueBytes = valueBytes[1 : len(valueBytes)-1] + } + c.Keys = append(c.Keys, key) + c.Values[key] = string(valueBytes) + } + + return nil +} + +// Test for split tables - when the same parent table is defined in multiple places +// This is a key requirement for issue #873: if type A implements Unmarshaler, +// and [a.b] and [a.d] are defined with another table [x] in between, +// A should receive content for both b and d, but not x. +func TestIssue873_SplitTables(t *testing.T) { + // For this test, we expect each sub-table to be handled separately + // The parent doesn't receive the sub-tables directly - each sub-table + // (b and d) gets its own call to handleKeyValues + type Config struct { + A struct { + B customTable873 `toml:"b"` + D customTable873 `toml:"d"` + } `toml:"a"` + X customTable873 `toml:"x"` + } + + doc := ` +[a.b] +C = "1" + +[x] +Y = "100" + +[a.d] +E = "2" +` + + var cfg Config + err := toml.NewDecoder(bytes.NewReader([]byte(doc))). + EnableUnmarshalerInterface(). + Decode(&cfg) + + assert.NoError(t, err) + // Each sub-table should have received its own key-values + assert.Equal(t, []string{"C"}, cfg.A.B.Keys) + assert.Equal(t, "1", cfg.A.B.Values["C"]) + assert.Equal(t, []string{"E"}, cfg.A.D.Keys) + assert.Equal(t, "2", cfg.A.D.Values["E"]) + assert.Equal(t, []string{"Y"}, cfg.X.Keys) + assert.Equal(t, "100", cfg.X.Values["Y"]) +} + +// Test using RawMessage to capture raw TOML bytes +func TestIssue873_RawMessage(t *testing.T) { + type Config struct { + Plugin unstable.RawMessage `toml:"plugin"` + } + + doc := ` +[plugin] +name = "example" +version = "1.0" +` + + var cfg Config + err := toml.NewDecoder(bytes.NewReader([]byte(doc))). + EnableUnmarshalerInterface(). + Decode(&cfg) + + assert.NoError(t, err) + // RawMessage should contain the raw key-value bytes + expected := "name = \"example\"\nversion = \"1.0\"\n" + assert.Equal(t, expected, string(cfg.Plugin)) +} + +// Test keys that need quoting (contain special characters) +func TestIssue873_QuotedKeys(t *testing.T) { + type Config struct { + Section customTable873 `toml:"section"` + } + + doc := ` +[section] +"key with spaces" = "value1" +"key.with.dots" = "value2" +` + + var cfg Config + err := toml.NewDecoder(bytes.NewReader([]byte(doc))). + EnableUnmarshalerInterface(). + Decode(&cfg) + + assert.NoError(t, err) + assert.Equal(t, 2, len(cfg.Section.Keys)) + assert.Equal(t, "value1", cfg.Section.Values["key with spaces"]) + assert.Equal(t, "value2", cfg.Section.Values["key.with.dots"]) +} + +// errorUnmarshaler873 is used to test error propagation from UnmarshalTOML +type errorUnmarshaler873 struct{} + +func (e *errorUnmarshaler873) UnmarshalTOML([]byte) error { + return errors.New("intentional error") +} + +// Test error propagation from UnmarshalTOML +func TestIssue873_UnmarshalerError(t *testing.T) { + doc := ` +[section] +key = "value" +` + + type Config struct { + Section errorUnmarshaler873 `toml:"section"` + } + + var cfg Config + err := toml.NewDecoder(bytes.NewReader([]byte(doc))). + EnableUnmarshalerInterface(). + Decode(&cfg) + + assert.Error(t, err) + assert.True(t, strings.Contains(err.Error(), "intentional error")) +} + +// Test dotted keys in a table (e.g., a.b = value) +func TestIssue873_DottedKeys(t *testing.T) { + type Config struct { + Section customTable873 `toml:"section"` + } + + doc := ` +[section] +sub.key = "value1" +another.nested.key = "value2" +` + + var cfg Config + err := toml.NewDecoder(bytes.NewReader([]byte(doc))). + EnableUnmarshalerInterface(). + Decode(&cfg) + + assert.NoError(t, err) + assert.Equal(t, 2, len(cfg.Section.Keys)) + // The dotted keys should be preserved in the raw output + assert.Equal(t, "value1", cfg.Section.Values["sub.key"]) + assert.Equal(t, "value2", cfg.Section.Values["another.nested.key"]) +} + +// Test pointer to pointer to Unmarshaler (covers pointer dereferencing loop) +func TestIssue873_DoublePointerUnmarshaler(t *testing.T) { + type Config struct { + Section **customTable873 `toml:"section"` + } + + doc := ` +[section] +key = "value" +` + + var cfg Config + err := toml.NewDecoder(bytes.NewReader([]byte(doc))). + EnableUnmarshalerInterface(). + Decode(&cfg) + + assert.NoError(t, err) + assert.True(t, cfg.Section != nil) + assert.True(t, *cfg.Section != nil) + assert.Equal(t, []string{"key"}, (*cfg.Section).Keys) + assert.Equal(t, "value", (*cfg.Section).Values["key"]) +} + +// formattingCapture captures the raw TOML bytes to verify formatting preservation +type formattingCapture struct { + RawBytes string +} + +func (f *formattingCapture) UnmarshalTOML(data []byte) error { + f.RawBytes = string(data) + return nil +} + +func TestIssue873_FormattingPreservation(t *testing.T) { + type Config struct { + Section *formattingCapture `toml:"section"` + } + + // Test that various formatting styles are preserved: + // - Extra spaces around '=' + // - Literal strings (single quotes) + // - Hex numbers + // - Inline tables + doc := `[section] +key1 = "value with spaces" +key2 = 'literal string' +hex_val = 0xDEADBEEF +inline = { a = 1, b = 2 } +` + + var cfg Config + err := toml.NewDecoder(bytes.NewReader([]byte(doc))). + EnableUnmarshalerInterface(). + Decode(&cfg) + + assert.NoError(t, err) + assert.True(t, cfg.Section != nil) + + // The raw bytes should preserve original formatting + raw := cfg.Section.RawBytes + + // Check that extra spaces around '=' are preserved + assert.True(t, strings.Contains(raw, "key1 = \"value with spaces\""), + "Expected spacing to be preserved, got: %s", raw) + + // Check that literal string style is preserved + assert.True(t, strings.Contains(raw, "key2 = 'literal string'"), + "Expected literal string to be preserved, got: %s", raw) + + // Check that hex format is preserved + assert.True(t, strings.Contains(raw, "hex_val = 0xDEADBEEF"), + "Expected hex format to be preserved, got: %s", raw) + + // Check that inline table is preserved + assert.True(t, strings.Contains(raw, "inline = { a = 1, b = 2 }"), + "Expected inline table to be preserved, got: %s", raw) +} diff --git a/unstable/parser.go b/unstable/parser.go index d48e07f3..e2c973b5 100644 --- a/unstable/parser.go +++ b/unstable/parser.go @@ -328,6 +328,9 @@ func (p *Parser) parseStdTable(b []byte) (reference, []byte, error) { func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { // keyval = key keyval-sep val + // Track the start position for Raw range + startB := b + ref := p.builder.Push(Node{ Kind: KeyValue, }) @@ -360,6 +363,10 @@ func (p *Parser) parseKeyval(b []byte) (reference, []byte, error) { p.builder.Chain(valRef, key) p.builder.AttachChild(ref, valRef) + // Set Raw to span the entire key-value expression + node := p.builder.NodeAt(ref) + node.Raw = p.rangeOfToken(startB[:len(startB)-len(b)], b) + return ref, b, err } diff --git a/unstable/parser_test.go b/unstable/parser_test.go index 2f5f9ec6..9726915c 100644 --- a/unstable/parser_test.go +++ b/unstable/parser_test.go @@ -539,7 +539,7 @@ key5 = [ # Next to start of inline array. // --- // 6:1->6:22 (105->126) | Comment [# Above simple value.] // --- - // 1:1->1:1 (0->0) | KeyValue [] + // 7:1->7:14 (127->140) | KeyValue [] // 7:7->7:14 (133->140) | String [value] // 7:1->7:4 (127->130) | Key [key] // 7:15->7:38 (141->164) | Comment [# Next to simple value.] @@ -552,12 +552,12 @@ key5 = [ # Next to start of inline array. // --- // 14:1->14:22 (252->273) | Comment [# Above inline table.] // --- - // 1:1->1:1 (0->0) | KeyValue [] + // 15:1->15:50 (274->323) | KeyValue [] // 15:8->15:9 (281->282) | InlineTable [] - // 1:1->1:1 (0->0) | KeyValue [] + // 15:10->15:23 (283->296) | KeyValue [] // 15:18->15:23 (291->296) | String [Tom] // 15:10->15:15 (283->288) | Key [first] - // 1:1->1:1 (0->0) | KeyValue [] + // 15:25->15:48 (298->321) | KeyValue [] // 15:32->15:48 (305->321) | String [Preston-Werner] // 15:25->15:29 (298->302) | Key [last] // 15:1->15:5 (274->278) | Key [name] @@ -567,7 +567,7 @@ key5 = [ # Next to start of inline array. // --- // 18:1->18:15 (371->385) | Comment [# Above array.] // --- - // 1:1->1:1 (0->0) | KeyValue [] + // 19:1->19:20 (386->405) | KeyValue [] // 1:1->1:1 (0->0) | Array [] // 19:11->19:12 (396->397) | Integer [1] // 19:14->19:15 (399->400) | Integer [2] @@ -579,7 +579,7 @@ key5 = [ # Next to start of inline array. // --- // 22:1->22:26 (448->473) | Comment [# Above multi-line array.] // --- - // 1:1->1:1 (0->0) | KeyValue [] + // 23:1->31:2 (474->694) | KeyValue [] // 1:1->1:1 (0->0) | Array [] // 23:10->23:42 (483->515) | Comment [# Next to start of inline array.] // 24:3->24:38 (518->553) | Comment [# Second line before array content.] diff --git a/unstable/unmarshaler.go b/unstable/unmarshaler.go index 00cfd6de..5a79da88 100644 --- a/unstable/unmarshaler.go +++ b/unstable/unmarshaler.go @@ -1,7 +1,32 @@ package unstable -// The Unmarshaler interface may be implemented by types to customize their -// behavior when being unmarshaled from a TOML document. +// Unmarshaler is implemented by types that can unmarshal a TOML +// description of themselves. The input is a valid TOML document +// containing the relevant portion of the parsed document. +// +// For tables (including split tables defined in multiple places), +// the data contains the raw key-value bytes from the original document +// with adjusted table headers to be relative to the unmarshaling target. type Unmarshaler interface { - UnmarshalTOML(value *Node) error + UnmarshalTOML(data []byte) error +} + +// RawMessage is a raw encoded TOML value. It implements Unmarshaler +// and can be used to delay TOML decoding or capture raw content. +// +// Example usage: +// +// type Config struct { +// Plugin RawMessage `toml:"plugin"` +// } +// +// var cfg Config +// toml.NewDecoder(r).EnableUnmarshalerInterface().Decode(&cfg) +// // cfg.Plugin now contains the raw TOML bytes for [plugin] +type RawMessage []byte + +// UnmarshalTOML implements Unmarshaler. +func (m *RawMessage) UnmarshalTOML(data []byte) error { + *m = append((*m)[0:0], data...) + return nil }