diff --git a/enum.go b/enum.go index 95e7828..d852fb0 100644 --- a/enum.go +++ b/enum.go @@ -52,9 +52,6 @@ func (e *Enum) parse(p *Parser) error { } } done: - if tok != tRIGHTCURLY { - return p.unexpected(lit, "enum closing }", e) - } return nil } diff --git a/field.go b/field.go index 3eb42ad..9ee3559 100644 --- a/field.go +++ b/field.go @@ -29,7 +29,12 @@ func (f *NormalField) columns() (cols []aligned) { if f.Repeated { cols = append(cols, leftAligned("repeated ")) } else { - cols = append(cols, alignedSpace) + cols = append(cols, alignedEmpty) + } + if f.Optional { + cols = append(cols, leftAligned("optional ")) + } else { + cols = append(cols, alignedEmpty) } cols = append(cols, rightAligned(f.Type), alignedSpace, leftAligned(f.Name), alignedEquals, rightAligned(strconv.Itoa(f.Sequence))) if len(f.Options) > 0 { diff --git a/field_test.go b/field_test.go index d745dfe..bd29e8a 100644 --- a/field_test.go +++ b/field_test.go @@ -88,3 +88,13 @@ func TestMapField(t *testing.T) { t.Fatal(err) } } + +func TestOptionalWithOption(t *testing.T) { + proto := `optional int32 default_int32 = 61 [default = 41 ];` + p := newParserOn(proto) + f := newNormalField() + err := f.parse(p) + if err != nil { + t.Fatal(err) + } +} diff --git a/group.go b/group.go index 68c9b3c..55e14f7 100644 --- a/group.go +++ b/group.go @@ -4,6 +4,7 @@ package proto // https://developers.google.com/protocol-buffers/docs/reference/proto2-spec#group_field type Group struct { Name string + Optional bool Sequence int Elements []Visitee } @@ -19,7 +20,7 @@ func (g *Group) addElement(v Visitee) { } // parse expects: -// group = label "group" groupName "=" fieldNumber messageBody +// groupName "=" fieldNumber { messageBody func (g *Group) parse(p *Parser) error { tok, lit := p.scanIgnoreWhitespace() if tok != tIDENT { @@ -37,6 +38,10 @@ func (g *Group) parse(p *Parser) error { return p.unexpected(lit, "group sequence number", g) } g.Sequence = i + tok, lit = p.scanIgnoreWhitespace() + if tok != tLEFTCURLY { + return p.unexpected(lit, "group opening {", g) + } parseMessageBody(p, g) return nil } diff --git a/group_test.go b/group_test.go new file mode 100644 index 0000000..d491871 --- /dev/null +++ b/group_test.go @@ -0,0 +1,22 @@ +package proto + +import "testing" + +func TestGroup(t *testing.T) { + oto := `message M { + optional group OptionalGroup = 16 { + optional int32 a = 17; + } + }` + p := newParserOn(oto) + p.scanIgnoreWhitespace() // consume first token + m := new(Message) + err := m.parse(p) + if err != nil { + t.Error(err) + } + if got, want := len(m.Elements), 1; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } + t.Logf("%#v", m) +} diff --git a/message.go b/message.go index 6987be7..f3a6064 100644 --- a/message.go +++ b/message.go @@ -16,6 +16,7 @@ func (m *Message) addElement(v Visitee) { m.Elements = append(m.Elements, v) } +// parse expects ident { messageBody func (m *Message) parse(p *Parser) error { tok, lit := p.scanIgnoreWhitespace() if tok != tIDENT { @@ -31,6 +32,7 @@ func (m *Message) parse(p *Parser) error { return parseMessageBody(p, m) } +// parseMessageBody parses elements after {. It consumes the closing } func parseMessageBody(p *Parser, c elementContainer) error { var ( tok token @@ -77,7 +79,29 @@ func parseMessageBody(p *Parser, c elementContainer) error { return err } c.addElement(r) - // BEGIN proto2 only + // BEGIN proto2 + case tOPTIONAL, tREPEATED: + // look ahead + prevTok := tok + tok, lit = p.scanIgnoreWhitespace() + if tGROUP == tok { + g := new(Group) + g.Optional = prevTok == tOPTIONAL + if err := g.parse(p); err != nil { + return err + } + c.addElement(g) + } else { + // not a group, will be tFIELD + p.unscan() + f := newNormalField() + f.Optional = prevTok == tOPTIONAL + f.Repeated = prevTok == tREPEATED + if err := f.parse(p); err != nil { + return err + } + c.addElement(f) + } case tGROUP: g := new(Group) if err := g.parse(p); err != nil { @@ -85,7 +109,7 @@ func parseMessageBody(p *Parser, c elementContainer) error { } c.addElement(g) // END proto2 only - case tRIGHTCURLY: + case tRIGHTCURLY, tEOF: goto done case tSEMICOLON: // continue diff --git a/option.go b/option.go index 6337094..c35cda2 100644 --- a/option.go +++ b/option.go @@ -42,24 +42,29 @@ func (o *Option) keyValuePair(embedded bool) (cols []aligned) { // ( ident | "(" fullIdent ")" ) { "." ident } "=" constant ";" func (o *Option) parse(p *Parser) error { tok, lit := p.scanIgnoreWhitespace() - switch tok { - case tIDENT: - o.Name = lit - case tLEFTPAREN: + if tLEFTPAREN == tok { tok, lit = p.scanIgnoreWhitespace() if tok != tIDENT { - return p.unexpected(lit, "option identifier", o) + if !isKeyword(tok) { + return p.unexpected(lit, "option full identifier", o) + } } o.Name = lit - tok, lit = p.scanIgnoreWhitespace() + tok, _ = p.scanIgnoreWhitespace() if tok != tRIGHTPAREN { - return p.unexpected(lit, "option closing )", o) + return p.unexpected(lit, "full identifier closing )", o) + } + } else { + // non full ident + if tIDENT != tok { + if !isKeyword(tok) { + return p.unexpected(lit, "option identifier", o) + } } - default: - return p.unexpected(lit, "option identifier or (", o) + o.Name = lit } tok, lit = p.scanIgnoreWhitespace() - if tok == tDOT { + if tDOT == tok { // extend identifier tok, lit = p.scanIgnoreWhitespace() if tok != tIDENT { @@ -68,7 +73,7 @@ func (o *Option) parse(p *Parser) error { o.Name = fmt.Sprintf("%s.%s", o.Name, lit) tok, lit = p.scanIgnoreWhitespace() } - if tok != tEQUALS { + if tEQUALS != tok { return p.unexpected(lit, "option constant =", o) } l := new(Literal) @@ -93,32 +98,8 @@ func (l Literal) String() string { return l.Source } +// parse expects to read a literal constant after =. func (l *Literal) parse(p *Parser) error { - tok, lit := p.scanIgnoreWhitespace() - // stringLiteral? - if tok == tQUOTE { - ident := p.s.scanUntil('"') - if len(ident) == 0 { - return p.unexpected(lit, "literal quoted string", l) - } - l.Source, l.IsString = ident, true - return nil - } - // stringLiteral? - if tok == tSINGLEQUOTE { - ident := p.s.scanUntil('\'') - if len(ident) == 0 { - return p.unexpected(lit, "literal single quoted string", l) - } - l.Source, l.IsString = ident, true - return nil - } - // float, bool or intLit ? - if lit == "-" { // TODO token? - _, rem := p.s.scanIdent() - l.Source = "-" + rem - return nil - } - l.Source = lit + l.Source, l.IsString = p.s.scanLiteral() return nil } diff --git a/parser_test.go b/parser_test.go index e5b1b51..aaa250b 100644 --- a/parser_test.go +++ b/parser_test.go @@ -26,3 +26,20 @@ func newParserOn(def string) *Parser { p.debug = true return p } + +// TEMPORARY tests +func TestScanIgnoreWhitespace_Digits(t *testing.T) { + p := newParserOn("1234") + _, lit := p.scanIgnoreWhitespace() + if got, want := lit, "1"; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } +} + +func TestScanIgnoreWhitespace_Minus(t *testing.T) { + p := newParserOn("-1234") + _, lit := p.scanIgnoreWhitespace() + if got, want := lit, "-"; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } +} diff --git a/scanner.go b/scanner.go index fce2c23..69835ea 100644 --- a/scanner.go +++ b/scanner.go @@ -7,6 +7,7 @@ import ( "bytes" "fmt" "io" + "strings" ) // scanner represents a lexical scanner. @@ -96,6 +97,46 @@ func (s *scanner) scanWhitespace() (tok token, lit string) { return tWS, buf.String() } +// scanLiteral returns the current rune and all contiguous non-literal and whether is a string. +func (s *scanner) scanLiteral() (string, bool) { + var ch rune + // first skip all whitespace runes + for { + if ch = s.read(); ch == eof { + return "", false + } + if !isWhitespace(ch) { + break + } + } + // is there a single quoted string ahead? + if '\'' == ch { + return s.scanUntil('\''), true + } + // is there a double quoted string ahead? + if '"' == ch { + return s.scanUntil('"'), true + } + // Create a buffer and read the current character into it. + var buf bytes.Buffer + buf.WriteRune(ch) + + // Read every subsequent non-literal character into the buffer. + // Whitespace characters , EOF and literal terminators will cause the loop to exit. + for { + if ch := s.read(); ch == eof { + break + } else if isWhitespace(ch) || strings.ContainsRune("[]();,", ch) { // TODO const? + s.unread(ch) + break + } else { + buf.WriteRune(ch) + } + } + return buf.String(), false +} + +// TODO use scanLiteral? func (s *scanner) scanInteger() (int, error) { var i int if _, err := fmt.Fscanf(s.r, "%d", &i); err != nil { @@ -156,11 +197,13 @@ func (s *scanner) scanIdent() (tok token, lit string) { return tONEOF, buf.String() case "reserved": return tRESERVED, buf.String() - // proto2 + // BEGIN proto2 case "optional": return tOPTIONAL, buf.String() + case "group": + return tGROUP, buf.String() + // END proto2 } - return tIDENT, buf.String() } diff --git a/scanner_test.go b/scanner_test.go index df80745..f9a7901 100644 --- a/scanner_test.go +++ b/scanner_test.go @@ -52,3 +52,41 @@ func TestScanIntegerString(t *testing.T) { t.Errorf("got [%v] want [%v]", got, want) } } + +func TestScanLiteral_string(t *testing.T) { + r := strings.NewReader(` "string" `) + s := newScanner(r) + v, is := s.scanLiteral() + if got, want := v, "string"; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } + if got, want := is, true; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } +} + +// TODO table driven +func TestScanLiteral_string2(t *testing.T) { + r := strings.NewReader(`'string'`) + s := newScanner(r) + v, is := s.scanLiteral() + if got, want := v, "string"; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } + if got, want := is, true; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } +} + +// TODO table driven +func TestScanLiteral_float(t *testing.T) { + r := strings.NewReader(`-3.14e10`) + s := newScanner(r) + v, is := s.scanLiteral() + if got, want := v, "-3.14e10"; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } + if got, want := is, false; got != want { + t.Errorf("got [%v] want [%v]", got, want) + } +}