Skip to content

Commit

Permalink
proto: revert UTF-8 validation for proto2 (#628)
Browse files Browse the repository at this point in the history
The proto specification officially says that proto2 and proto3 strings should be
validated, but pragmatically, compliance with the spec has been poor.
For example, the Go implementation did not validate either and added strict
validation recently to be compliant. However, this caused signficant breakage.

Cases of breakage should change the proto field type from string to the bytes type.
However, this is not always possible, when the field is part of the exposed API.
This tends to be the case for proto2, where some other notable language
implementations (like C++) do not validate proto2 for valid UTF-8.
However, since most language implementations do validate for UTF-8 in proto3,
we keep that behavior.

Making this change for Go is a little tricky since each field does not necessarily
know whether it is operating under the proto2 or proto3 syntax. Thus, we modify
the generator to emit a "proto3" struct field tag for all fields in proto3.
The implications of this change is that people will need to regenerate their
proto files to have UTF-8 validation.

We expand UTF-8 validation tests to ensure this works for the cross-product of
(proto2, proto3) and (scalar, vector, oneof, and maps) fields with strings.

Fixes #622
  • Loading branch information
dsnet authored Jun 6, 2018
1 parent 64db29d commit 05f48f4
Show file tree
Hide file tree
Showing 25 changed files with 1,088 additions and 620 deletions.
230 changes: 115 additions & 115 deletions conformance/internal/conformance_proto/conformance.pb.go

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions jsonpb/jsonpb_test_proto/more_test_objects.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 49 additions & 10 deletions proto/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"time"

. "github.com/golang/protobuf/proto"
pb3 "github.com/golang/protobuf/proto/proto3_proto"
. "github.com/golang/protobuf/proto/test_proto"
)

Expand Down Expand Up @@ -2250,17 +2251,55 @@ func TestConcurrentMarshal(t *testing.T) {
}

func TestInvalidUTF8(t *testing.T) {
const wire = "\x12\x04\xde\xea\xca\xfe"

var m GoTest
if err := Unmarshal([]byte(wire), &m); err == nil {
t.Errorf("Unmarshal error: got nil, want non-nil")
}
const invalidUTF8 = "\xde\xad\xbe\xef\x80\x00\xff"
tests := []struct {
label string
proto2 Message
proto3 Message
}{{
label: "Scalar",
proto2: &TestUTF8{Scalar: String(invalidUTF8)},
proto3: &pb3.TestUTF8{Scalar: invalidUTF8},
}, {
label: "Vector",
proto2: &TestUTF8{Vector: []string{invalidUTF8}},
proto3: &pb3.TestUTF8{Vector: []string{invalidUTF8}},
}, {
label: "Oneof",
proto2: &TestUTF8{Oneof: &TestUTF8_Field{invalidUTF8}},
proto3: &pb3.TestUTF8{Oneof: &pb3.TestUTF8_Field{invalidUTF8}},
}, {
label: "MapKey",
proto2: &TestUTF8{MapKey: map[string]int64{invalidUTF8: 0}},
proto3: &pb3.TestUTF8{MapKey: map[string]int64{invalidUTF8: 0}},
}, {
label: "MapValue",
proto2: &TestUTF8{MapValue: map[int64]string{0: invalidUTF8}},
proto3: &pb3.TestUTF8{MapValue: map[int64]string{0: invalidUTF8}},
}}

for _, tt := range tests {
// Proto2 should not validate UTF-8.
b, err := Marshal(tt.proto2)
if err != nil {
t.Errorf("Marshal(proto2.%s) = %v, want nil", tt.label, err)
}
tt.proto2.Reset()
err = Unmarshal(b, tt.proto2)
if err != nil {
t.Errorf("Unmarshal(proto2.%s) = %v, want nil", tt.label, err)
}

m.Reset()
m.Table = String(wire[2:])
if _, err := Marshal(&m); err == nil {
t.Errorf("Marshal error: got nil, want non-nil")
// Proto3 should validate UTF-8.
_, err = Marshal(tt.proto3)
if err == nil {
t.Errorf("Marshal(proto3.%s) = %v, want non-nil", tt.label, err)
}
tt.proto3.Reset()
err = Unmarshal(b, tt.proto3)
if err == nil {
t.Errorf("Unmarshal(proto3.%s) = %v, want non-nil", tt.label, err)
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion proto/properties.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ type Properties struct {
Repeated bool
Packed bool // relevant for repeated primitives only
Enum string // set for enum types only
proto3 bool // whether this is known to be a proto3 field; set for []byte only
proto3 bool // whether this is known to be a proto3 field
oneof bool // whether this is a oneof field

Default string // default value
Expand Down
Loading

0 comments on commit 05f48f4

Please sign in to comment.