diff --git a/codec.go b/codec.go index ee5bda1..1e4e9e1 100644 --- a/codec.go +++ b/codec.go @@ -198,6 +198,43 @@ func NewCodecForStandardJSONFull(schemaSpecification string) (*Codec, error) { }) } +// NewCodecForUnambiguousJSON provides full serialization/deserialization +// for json that is unambiguous in terms of what the field will contain. +// This means that avro Union types containing only a single concrete type +// e.g. ["null", "string"] no longer have to specify their type. Unlike +// NewCodecForStandardJSONFull, ambiguous types ["int", "string"] do still +// need to specify their type as map. See the following examples: +// +// ["null", "string"] => "some string" || null +// ["int", "string"] => {"int": 1} || {"string": "some string"} +// ["null", "int", "string"] => null || {"int": 1} || {"string": "some string"} +// +// this is especially useful when using json.Marshal with structs containing +// optional types: +// +// type Person struct { +// Name *string `json:"name,omitempty"` +// } +// +// or using json.Marshal with structs containing a union: +// +// type Message struct { +// Direction DirectionUnion `json:DirectionUnion" +// } +// +// type DirectionUnion struct { // only one of the fields can be non-nil +// +// Request *string `json:"request,omitempty"` +// Response *string `json:"response,omitempty"` +// } +func NewCodecForUnambiguousJSON(schemaSpecification string) (*Codec, error) { + return NewCodecFrom(schemaSpecification, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) +} + func NewCodecFrom(schemaSpecification string, cb *codecBuilder) (*Codec, error) { var schema interface{} diff --git a/union.go b/union.go index 031e84f..481b4db 100644 --- a/union.go +++ b/union.go @@ -20,10 +20,38 @@ import ( // codecInfo is a set of quick lookups it holds all the lookup info for the // all the schemas we need to handle the list of types for this union type codecInfo struct { - allowedTypes []string - codecFromIndex []*Codec - codecFromName map[string]*Codec - indexFromName map[string]int + allowedTypes []string + codecFromIndex []*Codec + codecFromName map[string]*Codec + indexFromName map[string]int + unambiguousMode bool +} + +// isNullable returns if the "null" type is one of the registered types +func (cr codecInfo) isNullable() bool { + _, nullable := cr.indexFromName["null"] + return nullable +} + +// numConcreteTypes returns the number of concrete types (not "null") specified to the codec +func (cr codecInfo) numConcreteTypes() int { + _, nullable := cr.indexFromName["null"] + numConcreteTypes := len(cr.allowedTypes) + if nullable { + numConcreteTypes -= 1 + } + return numConcreteTypes +} + +// firstConcreteTypeCodec returns the first non-null codec +func (cr codecInfo) firstConcreteTypeCodec() *Codec { + for k, v := range cr.codecFromName { + if k == "null" { + continue + } + return v + } + return nil } // Union wraps a datum value in a map for encoding as a Union, as required by @@ -124,6 +152,13 @@ func unionBinaryFromNative(cr *codecInfo) func(buf []byte, datum interface{}) ([ } return longBinaryFromNative(buf, index) case map[string]interface{}: + if cr.unambiguousMode && cr.isNullable() && cr.numConcreteTypes() == 1 { + c := cr.firstConcreteTypeCodec() + index := cr.indexFromName[c.typeName.fullName] + buf, _ = longBinaryFromNative(buf, index) + return c.binaryFromNative(buf, datum) + } + if len(v) != 1 { return nil, fmt.Errorf("cannot encode binary union: non-nil Union values ought to be specified with Go map[string]interface{}, with single key equal to type name, and value equal to datum value: %v; received: %T", cr.allowedTypes, datum) } @@ -138,6 +173,14 @@ func unionBinaryFromNative(cr *codecInfo) func(buf []byte, datum interface{}) ([ return c.binaryFromNative(buf, value) } } + + if cr.unambiguousMode && cr.isNullable() && cr.numConcreteTypes() == 1 { + c := cr.firstConcreteTypeCodec() + index := cr.indexFromName[c.typeName.fullName] + buf, _ = longBinaryFromNative(buf, index) + return c.binaryFromNative(buf, datum) + } + return nil, fmt.Errorf("cannot encode binary union: non-nil Union values ought to be specified with Go map[string]interface{}, with single key equal to type name, and value equal to datum value: %v; received: %T", cr.allowedTypes, datum) } } @@ -163,8 +206,7 @@ func unionTextualFromNative(cr *codecInfo) func(buf []byte, datum interface{}) ( return func(buf []byte, datum interface{}) ([]byte, error) { switch v := datum.(type) { case nil: - _, ok := cr.indexFromName["null"] - if !ok { + if !cr.isNullable() { return nil, fmt.Errorf("cannot encode textual union: no member schema types support datum: allowed types: %v; received: %T", cr.allowedTypes, datum) } return append(buf, "null"...), nil @@ -178,19 +220,24 @@ func unionTextualFromNative(cr *codecInfo) func(buf []byte, datum interface{}) ( if !ok { return nil, fmt.Errorf("cannot encode textual union: no member schema types support datum: allowed types: %v; received: %T", cr.allowedTypes, datum) } - buf = append(buf, '{') var err error - buf, err = stringTextualFromNative(buf, key) - if err != nil { - return nil, fmt.Errorf("cannot encode textual union: %s", err) + if !cr.unambiguousMode || cr.numConcreteTypes() > 1 { + buf = append(buf, '{') + buf, err = stringTextualFromNative(buf, key) + if err != nil { + return nil, fmt.Errorf("cannot encode textual union: %s", err) + } + buf = append(buf, ':') } - buf = append(buf, ':') c := cr.codecFromIndex[index] buf, err = c.textualFromNative(buf, value) if err != nil { return nil, fmt.Errorf("cannot encode textual union: %s", err) } - return append(buf, '}'), nil + if !cr.unambiguousMode || cr.numConcreteTypes() > 1 { + buf = append(buf, '}') + } + return buf, nil } } return nil, fmt.Errorf("cannot encode textual union: non-nil values ought to be specified with Go map[string]interface{}, with single key equal to type name, and value equal to datum value: %v; received: %T", cr.allowedTypes, datum) @@ -200,8 +247,7 @@ func textualJSONFromNativeAvro(cr *codecInfo) func(buf []byte, datum interface{} return func(buf []byte, datum interface{}) ([]byte, error) { switch v := datum.(type) { case nil: - _, ok := cr.indexFromName["null"] - if !ok { + if !cr.isNullable() { return nil, fmt.Errorf("cannot encode textual union: no member schema types support datum: allowed types: %v; received: %T", cr.allowedTypes, datum) } return append(buf, "null"...), nil @@ -301,6 +347,32 @@ func buildCodecForTypeDescribedBySliceOneWayJSON(st map[string]*Codec, enclosing } return rv, nil } +func buildCodecForTypeDescribedBySliceUnambiguousJSON(st map[string]*Codec, enclosingNamespace string, schemaArray []interface{}, cb *codecBuilder) (*Codec, error) { + if len(schemaArray) == 0 { + return nil, errors.New("Union ought to have one or more members") + } + + cr, err := makeCodecInfo(st, enclosingNamespace, schemaArray, cb) + cr.unambiguousMode = true + if err != nil { + return nil, err + } + + rv := &Codec{ + // NOTE: To support record field default values, union schema set to the + // type name of first member + // TODO: add/change to schemaCanonical below + schemaOriginal: cr.codecFromIndex[0].typeName.fullName, + + typeName: &name{"union", nullNamespace}, + nativeFromBinary: unionNativeFromBinary(&cr), + binaryFromNative: unionBinaryFromNative(&cr), + nativeFromTextual: nativeAvroFromTextualJSON(&cr), + textualFromNative: unionTextualFromNative(&cr), + } + return rv, nil +} + func buildCodecForTypeDescribedBySliceTwoWayJSON(st map[string]*Codec, enclosingNamespace string, schemaArray []interface{}, cb *codecBuilder) (*Codec, error) { if len(schemaArray) == 0 { return nil, errors.New("Union ought to have one or more members") @@ -340,6 +412,11 @@ func checkAll(allowedTypes []string, cr *codecInfo, buf []byte) (interface{}, [] if err != nil { continue } + + // in unambiguous mode, don't return the type if only a single concrete type is registered + if cr.unambiguousMode && cr.numConcreteTypes() == 1 { + return rv, rb, nil + } return map[string]interface{}{name: rv}, rb, nil } return nil, buf, fmt.Errorf("could not decode any json data in input %v", string(buf)) @@ -405,11 +482,59 @@ func nativeAvroFromTextualJSON(cr *codecInfo) func(buf []byte) (interface{}, []b sort.Strings(cr.allowedTypes) case map[string]interface{}: + if cr.unambiguousMode && cr.numConcreteTypes() > 1 { + asmap, ok := m.(map[string]interface{}) // we know this cast cannot fail + if !ok || len(asmap) != 1 { + return nil, buf, fmt.Errorf("expected map with a single key, got: %v", string(buf)) + } + + var name string + var value []byte + for _name, _value := range asmap { + name = _name + var err error + value, err = json.Marshal(_value) + if err != nil { + return nil, buf, fmt.Errorf("could not read value of type as []byte: %v", _value) + } + } + + index, ok := cr.indexFromName[name] + if !ok { + return nil, buf, fmt.Errorf("invalid type: %v", name) + } + + c := cr.codecFromIndex[index] + rv, _, err := c.NativeFromTextual(value) + if err != nil { + return nil, buf, fmt.Errorf("could not decode json data in input: %v: %v", string(buf), err) + } + return map[string]interface{}{name: rv}, buf[dec.InputOffset():], nil + } // try to decode it as a map // because a map should fail faster than a record // if that fails assume record and return it sort.Strings(cr.allowedTypes) + case interface{}: + // if running in unambiguous mode, allow a nullable (NULL, T) type to be checked + if cr.unambiguousMode && cr.numConcreteTypes() == 2 { + // get T + var index int + for _key, _index := range cr.indexFromName { + if _key != "null" { + index = _index + break + } + } + + c := cr.codecFromIndex[index] + rv, _, err := c.NativeFromTextual(buf) + if err != nil { + return nil, buf, fmt.Errorf("could not decode json data in input: %v: %v", string(buf), err) + } + return rv, buf[dec.InputOffset():], nil + } } return checkAll(allowedTypes, cr, buf) diff --git a/union_test.go b/union_test.go index b66884f..0903036 100644 --- a/union_test.go +++ b/union_test.go @@ -263,6 +263,214 @@ func ExampleCodec_TextualFromNative_json() { // Output: {"string":"some string"} } +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_TextualFromNative_unambiguous_primitive() { + codec, err := NewCodecFrom(`["null","string"]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + buf, err := codec.TextualFromNative(nil, Union("string", "some string")) + if err != nil { + fmt.Println(err) + } + fmt.Println(string(buf)) + // Output: "some string" +} + +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_NativeFromTextual_unambiguous_primitive() { + codec, err := NewCodecFrom(`["null","string"]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + // send in a legit json string + t, _, err := codec.NativeFromTextual([]byte("\"some string\"")) + if err != nil { + fmt.Println(err) + } + // see it parse directly into string + o, ok := t.(string) + if !ok { + fmt.Printf("its a %T not a string", t) + } + // pull out the string to show its all good + fmt.Println(o) + // Output: some string +} + +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_TextualFromNative_unambiguous_record() { + codec, err := NewCodecFrom(`["null",{"type": "record", "name": "Person", "fields": [{"name": "name", "type": "string"}]}]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + buf, err := codec.TextualFromNative(nil, Union("Person", map[string]interface{}{"name": "John Doe"})) + if err != nil { + fmt.Println(err) + } + fmt.Println(string(buf)) + // Output: {"name":"John Doe"} +} + +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_NativeFromTextual_unambiguous_record() { + codec, err := NewCodecFrom(`["null",{"type": "record", "name": "Person", "fields": [{"name": "name", "type": "string"}]}]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + // send in a legit json string + t, _, err := codec.NativeFromTextual([]byte("{\"name\": \"John Doe\"}")) + if err != nil { + fmt.Println(err) + } + // see it parse directly into string + o, ok := t.(map[string]interface{}) + if !ok { + fmt.Printf("its a %T not a string", t) + } + // pull out the string to show its all good + fmt.Println(o) + // Output: map[name:John Doe] +} + +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_TextualFromNative_unambiguous_nil() { + codec, err := NewCodecFrom(`["null","string"]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + buf, err := codec.TextualFromNative(nil, Union("null", nil)) + if err != nil { + fmt.Println(err) + } + fmt.Println(string(buf)) + // Output: null +} + +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_NativeFromTextual_unambiguous_nil() { + codec, err := NewCodecFrom(`["null","string"]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + // send in a legit json string + t, _, err := codec.NativeFromTextual([]byte("null")) + if err != nil { + fmt.Println(err) + } + // pull out the string to show its all good + fmt.Println(t) + // Output: +} + +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_TextualFromNative_ambiguous_primitive() { + codec, err := NewCodecFrom(`["int","string"]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + buf, err := codec.TextualFromNative(nil, Union("string", "some string")) + if err != nil { + fmt.Println(err) + } + fmt.Println(string(buf)) + // Output: {"string":"some string"} +} + +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_NativeFromTextual_ambiguous_primitive() { + codec, err := NewCodecFrom(`["int","string"]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + // send in a legit json string + t, _, err := codec.NativeFromTextual([]byte("{\"string\": \"some string\"}")) + // see it parse into a map like the avro encoder does + o, ok := t.(map[string]interface{}) + if !ok { + fmt.Printf("its a %T not a map[string]interface{}", t) + } + // pull out the string to show its all good + v := o["string"] + fmt.Println(v) + // Output: some string +} + +func ExampleCodec_TextualFromNative_ambiguous_record() { + codec, err := NewCodecFrom(`["int",{"type": "record", "name": "Person", "fields": [{"name": "name", "type": "string"}]}]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + buf, err := codec.TextualFromNative(nil, Union("Person", map[string]interface{}{"name": "John Doe"})) + if err != nil { + fmt.Println(err) + } + fmt.Println(string(buf)) + // Output: {"Person":{"name":"John Doe"}} +} + +// Use the unambiguous JSON codec instead for nullable types +func ExampleCodec_NativeFromTextual_ambiguous_record() { + codec, err := NewCodecFrom(`["int",{"type": "record", "name": "Person", "fields": [{"name": "name", "type": "string"}]}]`, &codecBuilder{ + buildCodecForTypeDescribedByMap, + buildCodecForTypeDescribedByString, + buildCodecForTypeDescribedBySliceUnambiguousJSON, + }) + if err != nil { + fmt.Println(err) + } + // send in a legit json string + t, _, err := codec.NativeFromTextual([]byte("{\"Person\": {\"name\": \"John Doe\"}}")) + if err != nil { + fmt.Println(err) + } + // see it parse into a map like the avro encoder does + o, ok := t.(map[string]interface{}) + if !ok { + fmt.Printf("its a %T not a map[string]interface{}", t) + } + // pull out the Person to show its all good + v := o["Person"] + fmt.Println(v) + // Output: map[name:John Doe] +} + func ExampleCodec_NativeFromTextual_json() { codec, err := NewCodecFrom(`["null","string","int"]`, &codecBuilder{ buildCodecForTypeDescribedByMap,