diff --git a/README.md b/README.md index 7f46bd5..88df460 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,5 @@ Package in Go for parsing Google Protocol Buffers [.proto files version 2 + 3] ( See (https://github.com/emicklei/proto-contrib) for other contributions on top of this package such as protofmt, proto2xsd and proto2gql. -#### known issues - -- the proto2 test file in (https://github.com/emicklei/proto-contrib/cmd/protofmt) folder contains character escape sequences that are currently not accepted by the scanner. See line 537 and 573. © 2017, [ernestmicklei.com](http://ernestmicklei.com). MIT License. Contributions welcome. \ No newline at end of file diff --git a/option.go b/option.go index d40c061..d455238 100644 --- a/option.go +++ b/option.go @@ -82,18 +82,25 @@ func (o *Option) parse(p *Parser) error { return p.unexpected(lit, "option value assignment =", o) } r := p.peekNonWhitespace() - if '{' == r { - p.next() // consume { - return o.parseAggregate(p) - } - // non aggregate - l := new(Literal) - l.Position = pos - if err := l.parse(p); err != nil { - return err - } - o.Constant = *l - return nil + var err error + // values of an option can have illegal escape sequences + // for the standard Go scanner used by this package. + p.ignoreIllegalEscapesWhile(func() { + if '{' == r { + // aggregate + p.next() // consume { + err = o.parseAggregate(p) + } else { + // non aggregate + l := new(Literal) + l.Position = pos + if e := l.parse(p); e != nil { + err = e + } + o.Constant = *l + } + }) + return err } // inlineComment is part of commentInliner. diff --git a/option_test.go b/option_test.go index 2352552..c26900a 100644 --- a/option_test.go +++ b/option_test.go @@ -249,6 +249,41 @@ func TestFieldCustomOptions(t *testing.T) { } } +func TestIgnoreIllegalEscapeCharsInAggregatedConstants(t *testing.T) { + src := `syntax = "proto3"; + message Person { + string name = 3 [(validate.rules).string = { + pattern: "^[^\d\s]+( [^\d\s]+)*$", + max_bytes: 256, + }]; + }` + p := newParserOn(src) + d, err := p.Parse() + if err != nil { + t.Fatal(err) + } + f := d.Elements[1].(*Message).Elements[0].(*NormalField) + if got, want := f.Options[0].AggregatedConstants[0].Source, "^[^\\d\\s]+( [^\\d\\s]+)*$"; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } +} + +func TestIgnoreIllegalEscapeCharsInConstant(t *testing.T) { + src := `syntax = "proto2"; + message Person { + optional string cpp_trigraph = 20 [default = "? \? ?? \?? \??? ??/ ?\?-"]; + }` + p := newParserOn(src) + d, err := p.Parse() + if err != nil { + t.Fatal(err) + } + f := d.Elements[1].(*Message).Elements[0].(*NormalField) + if got, want := f.Options[0].Constant.Source, "? \\? ?? \\?? \\??? ??/ ?\\?-"; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } +} + func TestFieldCustomOptionExtendedIdent(t *testing.T) { proto := `Type field = 1 [(validate.rules).enum.defined_only = true];` p := newParserOn(proto) diff --git a/parser.go b/parser.go index fdf646f..9e39042 100644 --- a/parser.go +++ b/parser.go @@ -30,6 +30,7 @@ import ( "io" "runtime" "strconv" + "strings" "text/scanner" ) @@ -64,6 +65,22 @@ func (p *Parser) handleScanError(s *scanner.Scanner, msg string) { fmt.Errorf("go scanner error at %v = %v", s.Position, msg)) } +// ignoreIllegalEscapesWhile is called for scanning constants of an option. +// Such content can have a syntax that is not acceptable by the Go scanner. +// This temporary installs a handler that ignores only one type of error: illegal char escape +func (p *Parser) ignoreIllegalEscapesWhile(block func()) { + // during block call change error handler + p.scanner.Error = func(s *scanner.Scanner, msg string) { + if strings.Contains(msg, "illegal char escape") { // too bad there is no constant for this in scanner pkg + return + } + p.handleScanError(s, msg) + } + block() + // restore + p.scanner.Error = p.handleScanError +} + // Parse parses a proto definition. May return a parse or scanner error. func (p *Parser) Parse() (*Proto, error) { proto := new(Proto)