diff --git a/gopp.go b/gopp.go index 0d19e4a..441a1ac 100644 --- a/gopp.go +++ b/gopp.go @@ -5,9 +5,11 @@ package gopp import ( + "bytes" "fmt" "regexp" "sort" + "strconv" "strings" ) @@ -100,59 +102,88 @@ type Rule struct { Expr } -func (r Rule) String() string { - return fmt.Sprintf("Rule(%s:%v)", r.Name, r.Expr) -} - -type Symbol struct { - Name string - Pattern string +type Term interface { + CollectLiterals(literals map[string]bool) + Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) + Repr() string } -type Expr []Term +type ( + Expr []Term -func (e Expr) CollectLiterals(literals map[string]bool) { - for _, term := range e { - term.CollectLiterals(literals) + RepeatZeroTerm struct { + Term } - return -} -type Term interface { - CollectLiterals(literals map[string]bool) - Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) -} + RepeatOneTerm struct { + Term + } -type RepeatZeroTerm struct { - Term -} + OptionalTerm struct { + Expr + } -func (rzt RepeatZeroTerm) String() string { - return fmt.Sprintf("RepeatZeroTerm(%v)", rzt.Term) -} + GroupTerm struct { + Expr + } -type RepeatOneTerm struct { - Term -} + RuleTerm struct { + Name string + noLiterals + } -func (rot RepeatOneTerm) String() string { - return fmt.Sprintf("RepeatOneTerm(%v)", rot.Term) -} + InlineRuleTerm struct { + Name string + noLiterals + } -type OptionalTerm struct { - Expr -} + TagTerm struct { + Tag string + noLiterals + } -func (ot OptionalTerm) String() string { - return fmt.Sprintf("OptionalTerm(%v)", ot.Expr) -} + LiteralTerm struct { + Literal string + } +) -type GroupTerm struct { - Expr +func (r Rule) String() string { return fmt.Sprintf("Rule(%s:%v)", r.Name, r.Expr) } +func (rzt RepeatZeroTerm) String() string { return fmt.Sprintf("RepeatZeroTerm(%v)", rzt.Term) } +func (rot RepeatOneTerm) String() string { return fmt.Sprintf("RepeatOneTerm(%v)", rot.Term) } +func (ot OptionalTerm) String() string { return fmt.Sprintf("OptionalTerm(%v)", ot.Expr) } +func (gt GroupTerm) String() string { return fmt.Sprintf("GroupTerm(%v)", gt.Expr) } +func (rt RuleTerm) String() string { return fmt.Sprintf("RuleTerm(%s)", rt.Name) } +func (irt InlineRuleTerm) String() string { return fmt.Sprintf("InlineRuleTerm(%s)", irt.Name) } +func (tt TagTerm) String() string { return fmt.Sprintf("TagTerm(%q)", tt.Tag) } +func (lt LiteralTerm) String() string { return fmt.Sprintf("LiteralTerm(%q)", lt.Literal) } + +func (e Expr) Repr() string { + b := bytes.Buffer{} + ew := make(Expr, 0, len(e)) + for _, t := range e { + if _, ok := t.(TagTerm); !ok { + ew = append(ew, t) + } + } + if len(ew) > 0 { + b.WriteString(ew[0].Repr()) + } + for _, t := range ew[1:] { + b.WriteByte(' ') + b.WriteString(t.Repr()) + } + return b.String() } - -func (gt GroupTerm) String() string { - return fmt.Sprintf("GroupTerm(%v)", gt.Expr) +func (r Rule) Repr() string { return r.Name } +func (rzt RepeatZeroTerm) Repr() string { return fmt.Sprintf("%s*", rzt.Term.Repr()) } +func (rot RepeatOneTerm) Repr() string { return fmt.Sprintf("%s+", rot.Term.Repr()) } +func (ot OptionalTerm) Repr() string { return fmt.Sprintf("[%v]", ot.Expr.Repr()) } +func (gt GroupTerm) Repr() string { return fmt.Sprintf("(%v)", gt.Expr.Repr()) } +func (rt RuleTerm) Repr() string { return fmt.Sprintf("<<%s>>", rt.Name) } +func (irt InlineRuleTerm) Repr() string { return fmt.Sprintf("<%s>", irt.Name) } +func (tt TagTerm) Repr() string { return fmt.Sprintf("{%s}", strconv.Quote(tt.Tag)[1:len(tt.Tag)+1]) } +func (lt LiteralTerm) Repr() string { + return fmt.Sprintf("'%s'", strconv.Quote(lt.Literal)[1:len(lt.Literal)+1]) } type noLiterals struct{} @@ -161,39 +192,11 @@ func (n noLiterals) CollectLiterals(literals map[string]bool) { return } -type RuleTerm struct { - Name string - noLiterals -} - -func (rt RuleTerm) String() string { - return fmt.Sprintf("RuleTerm(%s)", rt.Name) -} - -type InlineRuleTerm struct { - Name string - noLiterals -} - -func (irt InlineRuleTerm) String() string { - return fmt.Sprintf("InlineRuleTerm(%s)", irt.Name) -} - -type TagTerm struct { - Tag string - noLiterals -} - -func (tt TagTerm) String() string { - return fmt.Sprintf("TagTerm(%q)", tt.Tag) -} - -type LiteralTerm struct { - Literal string -} - -func (lt LiteralTerm) String() string { - return fmt.Sprintf("LiteralTerm(%q)", lt.Literal) +func (e Expr) CollectLiterals(literals map[string]bool) { + for _, term := range e { + term.CollectLiterals(literals) + } + return } func (l LiteralTerm) CollectLiterals(literals map[string]bool) { @@ -201,6 +204,11 @@ func (l LiteralTerm) CollectLiterals(literals map[string]bool) { return } +type Symbol struct { + Name string + Pattern string +} + type AST []Node type Node interface{} diff --git a/parse.go b/parse.go index 605b3d8..de5122f 100644 --- a/parse.go +++ b/parse.go @@ -38,12 +38,21 @@ func Parse(g Grammar, startRule string, document []byte) (ast AST, err error) { items, remaining, err := start.Parse(g, tokens, pd, []string{}) if err != nil { - // TODO: use pd to return informative error messages. - err = pd.FarthestErrors[0] + st := make([]string, len(pd.ParseStack)) + for i, t := range pd.ParseStack { + st[i] = t.Repr() + } + err = ParseError{pd} return } - if len(remaining) != 0 { - err = errors.New("Did not parse entire file.") + switch len(remaining) { + case 0: // do nothing + case 1: + err = fmt.Errorf("1 token remaining: %v", remaining[0]) + case 2, 3: + err = fmt.Errorf("%d tokens remaining: %v", len(remaining), remaining) + default: + err = fmt.Errorf("%d tokens remaining: %v...", len(remaining), remaining[:3]) } ast = items @@ -65,6 +74,7 @@ type ParseData struct { errored bool FarthestErrors []error TokensForError []Token + ParseStack []Term } func NewParseData() (pd *ParseData) { @@ -87,9 +97,28 @@ func (pd *ParseData) ErrorWith(err error, remaining []Token) { pd.errored = true } +func (pd *ParseData) Push(t Term, err *error) func() { + stackLen := len(pd.ParseStack) + pd.ParseStack = append(pd.ParseStack, t) + return func() { + if *err == nil { + pd.ParseStack = pd.ParseStack[:stackLen] + } + } +} + +type ParseError struct { + Pd *ParseData +} + +func (err ParseError) Error() string { + return err.Pd.FarthestErrors[0].Error() +} + func (r Rule) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { rName := fmt.Sprintf("Rule(%q)", r.Name) tr.In(rName, tokens) + defer pd.Push(r, &err)() defer func() { if err == nil { tr.Out(rName, items) @@ -112,6 +141,7 @@ func (r Rule) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames [] func (e Expr) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { rName := fmt.Sprintf("Expr") tr.In(rName, tokens) + defer pd.Push(e, &err)() defer func() { if err == nil { tr.Out(rName, items) @@ -141,6 +171,7 @@ func (e Expr) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames [] func (t RepeatZeroTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { rName := fmt.Sprintf("RepeatZeroTerm") tr.In(rName, tokens) + defer pd.Push(t, &err)() defer func() { if err == nil { tr.Out(rName, items) @@ -172,6 +203,7 @@ func (t RepeatZeroTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRu func (t RepeatOneTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { rName := fmt.Sprintf("RepeatOneTerm") tr.In(rName, tokens) + defer pd.Push(t, &err)() defer func() { if err == nil { tr.Out(rName, items) @@ -210,6 +242,7 @@ func (t RepeatOneTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRul func (t OptionalTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { rName := fmt.Sprintf("OptionalTerm") tr.In(rName, tokens) + defer pd.Push(t, &err)() defer func() { if err == nil { tr.Out(rName, items) @@ -231,6 +264,7 @@ func (t OptionalTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRule func (t RuleTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { rName := fmt.Sprintf("RuleTerm(%q)", t.Name) tr.In(rName, tokens) + defer pd.Push(t, &err)() defer func() { if err == nil { tr.Out(rName, items) @@ -266,6 +300,7 @@ func (t RuleTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleName func (t InlineRuleTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { rName := fmt.Sprintf("InlineRuleTerm(%q)", t.Name) tr.In(rName, tokens) + defer pd.Push(t, &err)() defer func() { if err == nil { tr.Out(rName, items) @@ -320,6 +355,7 @@ func (t TagTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames func (t LiteralTerm) Parse(g Grammar, tokens []Token, pd *ParseData, parentRuleNames []string) (items []Node, remainingTokens []Token, err error) { rName := fmt.Sprintf("LiteralTerm(%q)", t.Literal) tr.In(rName, tokens) + defer pd.Push(t, &err)() defer func() { if err == nil { tr.Out(rName, items)