diff --git a/Makefile b/Makefile index fceb340b..6128a1e5 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ # --- Global ------------------------------------------------------------------- O = out -COVERAGE = 90 +COVERAGE = 70 VERSION ?= $(shell git describe --tags --dirty --always) all: build tiny test test-tiny check-coverage lint frontend ## Build, test, check coverage and lint diff --git a/docs/syntax_grammar.md b/docs/syntax_grammar.md index 448be345..a101c4ff 100644 --- a/docs/syntax_grammar.md +++ b/docs/syntax_grammar.md @@ -63,27 +63,35 @@ enclosed in double quotes `""`. Comments are fenced by `/* … */`. The `evy` source code is UTF-8 encoded. The NUL character `U+0000` is not allowed. - program = { statements | func | event_handler } . - statements = { statement NL } . - statement = assignment | declaration | func_call | - loop | if | return | - BREAK | EMPTY_STATEMENT . + program = { statement | func | event_handler } . + statement = empty_stmt | + assign_stmt | typed_decl_stmt | inferred_decl_stmt | + func_call_stmt | + return_stmt | break_stmt | + for_stmt | while_stmt | if_stmt . - EMPTY_STATEMENT = . - BREAK = "break" . + + /* --- Statement ---- */ + empty_stmt = NL . + + assign_stmt = assignable "=" expr NL . + typed_decl_stmt = typed_decl NL . + inferred_decl_stmt = ident ":=" toplevel_expr NL . + + func_call_stmt = func_call NL. + + return_stmt = "return" [ toplevel_expr ] NL. + break_stmt = "break" NL . /* --- Assignment --- */ - assignment = assignable "=" expr . assignable = ident { selector } . ident = LETTER { LETTER | UNICODE_DIGIT } . selector = index | dot_selector . index = "[" expr "]" . dot_selector = "." ident . - /* --- Declarations --- */ - declaration = typed_decl | inferred_decl . - typed_ident = ident ":" type . - inferred_decl = ident ":=" toplevel_expr . + /* --- Type --- */ + typed_decl = ident ":" type . type = BASIC_TYPE | array_type | map_type | "any" . BASIC_TYPE = "num" | "string" | "bool" . @@ -120,36 +128,34 @@ not allowed. map_elems = { ident ":" term [NL] } . /* --- Control flow --- */ - loop = for | while . - for = "for" range NL - statements - "end" . + for_stmt = "for" range NL + { statement } + "end" NL . range = ident ( ":=" | "=" ) "range" range_args . range_args = term [ term [ term ] ] . - while = "while" toplevel_expr NL - statements - "end" . - - if = "if" toplevel_expr NL - statements - { "else" "if" toplevel_expr NL - statements } - [ "else" NL - statements ] - "end" . + while_stmt = "while" toplevel_expr NL + { statement } + "end" NL . + + if_stmt = "if" toplevel_expr NL + { statement } + { "else" "if" toplevel_expr NL + { statement } } + [ "else" NL + { statement } ] + "end" NL . /* --- Functions ---- */ func = "func" ident func_signature NL - statements - "end" . + { statement } + "end" NL . func_signature = [ ":" type ] params . params = { typed_decl } | variadic_param . variadic_param = typed_decl "..." . - return = "return" [ toplevel_expr ] . event_handler = "on" ident NL - statements - "end" . + { statement } + "end" NL . /* --- Terminals --- */ LETTER = UNICODE_LETTER | "_" . diff --git a/frontend/index.html b/frontend/index.html index 264b86a8..ca5a5ce5 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -23,16 +23,16 @@
- +
diff --git a/pkg/evaluator/builtin.go b/pkg/evaluator/builtin.go new file mode 100644 index 00000000..28c8d789 --- /dev/null +++ b/pkg/evaluator/builtin.go @@ -0,0 +1,44 @@ +package evaluator + +import ( + "strconv" + "strings" +) + +type Builtin func(args []Value) Value + +func (b Builtin) Type() ValueType { return BUILTIN } +func (b Builtin) String() string { return "builtin function" } + +func newBuiltins(e *Evaluator) map[string]Builtin { + return map[string]Builtin{ + "print": Builtin(e.Print), + "len": Builtin(Len), + } +} + +func (e *Evaluator) Print(args []Value) Value { + argList := make([]string, len(args)) + for i, arg := range args { + argList[i] = arg.String() + } + e.print(strings.Join(argList, " ")) + return nil +} + +func Len(args []Value) Value { + if len(args) != 1 { + return newError("'len' takes 1 argument not " + strconv.Itoa(len(args))) + } + switch arg := args[0].(type) { + case *Map: + return &Num{Val: float64(len(arg.Pairs))} + case *Array: + return &Num{Val: float64(len(arg.Elements))} + case *String: + return &Num{Val: float64(len(arg.Val))} + default: + return newError("'len' takes 1 argument of type 'string', array '[]' or map '{}' not " + args[0].Type().String()) + } + +} diff --git a/pkg/evaluator/evaluator.go b/pkg/evaluator/evaluator.go index 87d862c1..4bb8d419 100644 --- a/pkg/evaluator/evaluator.go +++ b/pkg/evaluator/evaluator.go @@ -1,7 +1,101 @@ package evaluator -import "strings" +import ( + "foxygo.at/evy/pkg/parser" +) func Run(input string, print func(string)) { - print(strings.ToUpper(input)) + p := parser.New(input) + prog := p.Parse() + e := &Evaluator{print: print} + e.builtins = newBuiltins(e) + val := e.Eval(prog, NewScope()) + if isError(val) { + print(val.String()) + } +} + +type Evaluator struct { + print func(string) + builtins map[string]Builtin +} + +func (e *Evaluator) Eval(node parser.Node, scope *Scope) Value { + switch node := node.(type) { + case *parser.Program: + return e.evalProgram(node, scope) + case *parser.Declaration: + return e.evalDeclaration(node, scope) + case *parser.Var: + v := e.evalVar(node, scope) + return v + case *parser.Term: + return e.evalTerm(node, scope) + case *parser.NumLiteral: + return &Num{Val: node.Value} + case *parser.StringLiteral: + return &String{Val: node.Value} + case *parser.Bool: + return &Bool{Val: node.Value} + case *parser.FunctionCall: + return e.evalFunctionCall(node, scope) + } + return nil +} + +func (e *Evaluator) evalProgram(program *parser.Program, scope *Scope) Value { + var result Value + for _, statement := range program.Statements { + result = e.Eval(statement, scope) + if isError(result) { + return result + } + } + return result +} + +func (e *Evaluator) evalDeclaration(decl *parser.Declaration, scope *Scope) Value { + val := e.Eval(decl.Value, scope) + if isError(val) { + return val + } + scope.Set(decl.Var.Name, val) + return nil +} + +func (e *Evaluator) evalFunctionCall(funcCall *parser.FunctionCall, scope *Scope) Value { + args := e.evalTerms(funcCall.Arguments, scope) + if len(args) == 1 && isError(args[0]) { + return args[0] + } + builtin, ok := e.builtins[funcCall.Name] + if !ok { + return newError("cannot find builtin function " + funcCall.Name) + } + return builtin(args) +} + +func (e *Evaluator) evalVar(v *parser.Var, scope *Scope) Value { + if val, ok := scope.Get(v.Name); ok { + return val + } + return newError("cannot find variable " + v.Name) +} + +func (e *Evaluator) evalTerm(term parser.Node, scope *Scope) Value { + return e.Eval(term, scope) +} + +func (e *Evaluator) evalTerms(terms []parser.Node, scope *Scope) []Value { + result := make([]Value, len(terms)) + + for i, t := range terms { + evaluated := e.Eval(t, scope) + if isError(evaluated) { + return []Value{evaluated} + } + result[i] = evaluated + } + + return result } diff --git a/pkg/evaluator/evaluator_test.go b/pkg/evaluator/evaluator_test.go new file mode 100644 index 00000000..45b3fbb4 --- /dev/null +++ b/pkg/evaluator/evaluator_test.go @@ -0,0 +1,53 @@ +package evaluator + +import ( + "bytes" + "testing" + + "foxygo.at/evy/pkg/assert" +) + +func TestBasicEval(t *testing.T) { + in := "a:=1\n print a 2" + want := "1 2" + b := bytes.Buffer{} + fn := func(s string) { b.WriteString(s) } + Run(in, fn) + assert.Equal(t, want, b.String()) +} + +func TestParseDeclaration(t *testing.T) { + tests := map[string]string{ + "a:=1": "1", + `a:="abc"`: "abc", + `a:=true`: "true", + `a:= len "abc"`: "3", + } + for in, want := range tests { + in, want := in, want + t.Run(in, func(t *testing.T) { + in += "\n print a" + b := bytes.Buffer{} + fn := func(s string) { b.WriteString(s) } + Run(in, fn) + assert.Equal(t, want, b.String()) + }) + } +} + +func TestDemo(t *testing.T) { + prog := ` +move 10 10 +line 20 20 + +x := 12 +print "x:" x +if x > 10 + print "🍦 big x" +end` + b := bytes.Buffer{} + fn := func(s string) { b.WriteString(s) } + Run(prog, fn) + want := "x: 12" + assert.Equal(t, want, b.String()) +} diff --git a/pkg/evaluator/scope.go b/pkg/evaluator/scope.go new file mode 100644 index 00000000..11c91043 --- /dev/null +++ b/pkg/evaluator/scope.go @@ -0,0 +1,29 @@ +package evaluator + +type Scope struct { + store map[string]Value + outer *Scope +} + +func NewScope() *Scope { + return &Scope{store: map[string]Value{}} +} + +func NewEnclosedScope(outer *Scope) *Scope { + return &Scope{store: map[string]Value{}, outer: outer} +} + +func (s *Scope) Get(name string) (Value, bool) { + if s == nil { + return nil, false + } + if val, ok := s.store[name]; ok { + return val, ok + } + return s.outer.Get(name) +} + +func (s *Scope) Set(name string, val Value) Value { + s.store[name] = val + return val +} diff --git a/pkg/evaluator/value.go b/pkg/evaluator/value.go new file mode 100644 index 00000000..cbf1ae5f --- /dev/null +++ b/pkg/evaluator/value.go @@ -0,0 +1,117 @@ +package evaluator + +import ( + "strconv" + "strings" +) + +type ValueType int + +const ( + ERROR ValueType = iota + NUM + BOOL + STRING + ARRAY + MAP + RETURN_VALUE + FUNCTION + BUILTIN +) + +var valueTypeStrings = map[ValueType]string{ + ERROR: "ERROR", + NUM: "NUM", + BOOL: "BOOL", + STRING: "STRING", + ARRAY: "ARRAY", + MAP: "MAP", + RETURN_VALUE: "RETURN_VALUE", + FUNCTION: "FUNCTION", + BUILTIN: "BUILTIN", +} + +func (t ValueType) String() string { + if s, ok := valueTypeStrings[t]; ok { + return s + } + return "" +} + +func (t ValueType) GoString() string { + return t.String() +} + +type Value interface { + Type() ValueType + String() string +} + +type Num struct { + Val float64 +} + +type Bool struct { + Val bool +} + +type String struct { + Val string +} + +type Array struct { + Elements []Value +} + +type Map struct { + Pairs map[string]Value +} + +type ReturnValue struct { + Val Value +} + +type Error struct { + Message string +} + +func (n *Num) Type() ValueType { return NUM } +func (n *Num) String() string { return strconv.FormatFloat(n.Val, 'f', -1, 64) } + +func (s *String) Type() ValueType { return STRING } +func (s *String) String() string { return s.Val } + +func (*Bool) Type() ValueType { return BOOL } +func (s *Bool) String() string { + return strconv.FormatBool(s.Val) +} + +func (r *ReturnValue) Type() ValueType { return RETURN_VALUE } +func (r *ReturnValue) String() string { return r.Val.String() } + +func (e *Error) Type() ValueType { return ERROR } +func (e *Error) String() string { return "ERROR: " + e.Message } +func isError(val Value) bool { + return val != nil && val.Type() == ERROR +} +func newError(msg string) *Error { + return &Error{Message: msg} +} + +func (a *Array) Type() ValueType { return ARRAY } +func (a *Array) String() string { + elements := make([]string, len(a.Elements)) + for i, e := range a.Elements { + elements[i] = e.String() + } + return "[" + strings.Join(elements, ", ") + "]" +} + +func (m *Map) Type() ValueType { return MAP } +func (m *Map) String() string { + pairs := make([]string, 0, len(m.Pairs)) + for key, value := range m.Pairs { + pairs = append(pairs, key+":"+value.String()) + } + return "{" + strings.Join(pairs, ", ") + "}" +} diff --git a/pkg/lexer/token.go b/pkg/lexer/token.go index 085fd8cb..52b85c16 100644 --- a/pkg/lexer/token.go +++ b/pkg/lexer/token.go @@ -184,6 +184,10 @@ func (t *Token) SetType(tokenType TokenType) *Token { return t } +func (t *Token) TokenType() TokenType { + return t.Type +} + func (t *Token) SetLiteral(literal string) *Token { t.Literal = literal return t diff --git a/pkg/parser/ast.go b/pkg/parser/ast.go new file mode 100644 index 00000000..0fad4a50 --- /dev/null +++ b/pkg/parser/ast.go @@ -0,0 +1,232 @@ +package parser + +import ( + "strconv" + "strings" + + "foxygo.at/evy/pkg/lexer" +) + +type Node interface { + String() string + Type() *Type +} + +type Program struct { + Statements []Node +} + +type FunctionCall struct { + Token *lexer.Token // The IDENT of the function + Name string + Arguments []Node + nType *Type +} + +type Term struct { + Token *lexer.Token + Value Node + nType *Type +} + +type Declaration struct { + Token *lexer.Token + Var *Var + Value Node // literal, expression, assignable, ... +} + +type FuncDecl struct { + Token *lexer.Token // The 'func' token + Name string + Params []*Var + VariadicParam *Var + ReturnType *Type + Body *BlockStatement +} + +type EventHandler struct { + Name string + Body *BlockStatement +} + +type Var struct { + Token *lexer.Token + Name string + nType *Type +} + +type BlockStatement struct { + Token *lexer.Token // the NL before the first statement + Statements []Node +} + +type Bool struct { + Token *lexer.Token + Value bool +} + +type NumLiteral struct { + Token *lexer.Token + Value float64 +} + +type StringLiteral struct { + Token *lexer.Token + Value string +} + +type ArrayLiteral struct { + Token *lexer.Token + Elements []Node + nType *Type +} + +type MapLiteral struct { + Token *lexer.Token + Pairs map[string]*Term + nType *Type +} + +func (p *Program) String() string { + return newlineList(p.Statements) +} +func (*Program) Type() *Type { + return NONE_TYPE +} + +func (f *FunctionCall) String() string { + s := make([]string, len(f.Arguments)) + for i, arg := range f.Arguments { + s[i] = arg.String() + } + args := strings.Join(s, ", ") + return f.Name + "(" + args + ")" +} +func (f *FunctionCall) Type() *Type { + return f.nType +} + +func (t *Term) String() string { + return t.Value.String() +} +func (t *Term) Type() *Type { + return t.nType +} + +func (d *Declaration) String() string { + if d.Value == nil { + return d.Var.String() + } + return d.Var.String() + "=" + d.Value.String() +} +func (d *Declaration) Type() *Type { + return d.Var.nType +} + +func (f *FuncDecl) String() string { + s := make([]string, len(f.Params)) + for i, param := range f.Params { + s[i] = param.String() + } + params := strings.Join(s, ", ") + if f.VariadicParam != nil { + params += f.VariadicParam.String() + "..." + } + signature := f.Name + "(" + params + ")" + body := f.Body.String() + return signature + "{\n" + body + "\n}\n" +} +func (f *FuncDecl) Type() *Type { + return f.ReturnType +} + +func (e *EventHandler) String() string { + body := e.Body.String() + return "on " + e.Name + " {\n" + body + "\n}\n" +} +func (e *EventHandler) Type() *Type { + return NONE_TYPE +} + +func (v *Var) String() string { + return v.Name + ":" + v.nType.String() +} +func (v *Var) Type() *Type { + return v.nType +} + +func (b *BlockStatement) String() string { + return newlineList(b.Statements) +} +func (b *BlockStatement) Type() *Type { + return NONE_TYPE +} + +func (b *Bool) String() string { + return strconv.FormatBool(b.Value) +} +func (b *Bool) Type() *Type { + return BOOL_TYPE +} + +func (n *NumLiteral) String() string { + return strconv.FormatFloat(n.Value, 'f', -1, 64) +} +func (n *NumLiteral) Type() *Type { + return NUM_TYPE +} + +func (s *StringLiteral) String() string { + return "'" + s.Value + "'" +} +func (s *StringLiteral) Type() *Type { + return STRING_TYPE +} + +func (a *ArrayLiteral) String() string { + elements := make([]string, len(a.Elements)) + for i, e := range a.Elements { + elements[i] = e.String() + } + return "[" + strings.Join(elements, ", ") + "]" +} +func (a *ArrayLiteral) Type() *Type { + return a.nType +} + +func (m *MapLiteral) String() string { + pairs := make([]string, 0, len(m.Pairs)) + for key, val := range m.Pairs { + pairs = append(pairs, key+":"+val.String()) + } + return "{" + strings.Join(pairs, ", ") + "}" +} +func (m *MapLiteral) Type() *Type { + return m.nType +} + +func newlineList(nodes []Node) string { + lines := make([]string, len(nodes)) + for i, n := range nodes { + lines[i] = n.String() + } + return strings.Join(lines, "\n") + "\n" +} + +func zeroValue(t TypeName) Node { + switch t { + case NUM: + return &NumLiteral{Value: 0} + case STRING: + return &StringLiteral{Value: ""} + case BOOL: + return &Bool{Value: false} + case ANY: + return &Bool{Value: false} + case ARRAY: + return &ArrayLiteral{} + case MAP: + return &MapLiteral{} + } + return nil +} diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index c8c7ea00..3f8bd0a3 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -1,5 +1,565 @@ package parser +import ( + "strconv" + "strings" + + "foxygo.at/evy/pkg/lexer" +) + func Run(input string) string { - return "Parsing:\n" + input + parser := New(input) + prog := parser.Parse() + if len(parser.errors) > 0 { + errs := make([]string, len(parser.errors)) + for i, e := range parser.errors { + errs[i] = e.String() + } + return strings.Join(errs, "\n") + "\n\n" + prog.String() + } + return prog.String() +} + +type Parser struct { + errors []Error + + pos int // current position in token slice (points to current token) + cur *lexer.Token // current token under examination + peek *lexer.Token // next token after current token + + tokens []*lexer.Token + funcs map[string]*FuncDecl // all function declaration by name and index in tokens. + vars map[string]*Var // TODO: needs scoping in block statements; // all declared variables with type +} + +// Error is an Evy parse error. +type Error struct { + message string + token *lexer.Token +} + +func (e Error) String() string { + return e.token.Location() + ": " + e.message +} + +func New(input string) *Parser { + l := lexer.New(input) + p := &Parser{ + vars: map[string]*Var{}, + funcs: builtins(), + } + + // Read all tokens, collect function declaration tokens by index + // funcs temporarily holds FUNC token indices for further processing + var funcs []int + var token *lexer.Token + for token = l.Next(); token.Type != lexer.EOF; token = l.Next() { + p.tokens = append(p.tokens, token) + if token.Type == lexer.FUNC { // Collect all function names + funcs = append(funcs, len(p.tokens)-1) + } + } + p.tokens = append(p.tokens, token) // append EOF with pos + + // Parse all function signatures, prior to proper parsing, to build + // a function name and type lookup table because functions can be + // called before declaration. + for _, i := range funcs { + p.advanceTo(i) + fd := p.parseFuncDeclSignature() + if fd != nil { + p.funcs[fd.Name] = fd + } + } + return p +} + +func builtins() map[string]*FuncDecl { + return map[string]*FuncDecl{ + "print": &FuncDecl{ + Name: "print", + VariadicParam: &Var{Name: "a", nType: ANY_TYPE}, + }, + "len": &FuncDecl{ + Name: "print", + Params: []*Var{{Name: "a", nType: ANY_TYPE}}, + ReturnType: NUM_TYPE, + }, + } +} + +func (p *Parser) Parse() *Program { + return p.parseProgram() +} + +// function names matching `parsePROCUTION` align with production names +// in grammar doc/syntax_grammar.md +func (p *Parser) parseProgram() *Program { + program := &Program{Statements: []Node{}} + p.advanceTo(0) + for p.cur.TokenType() != lexer.EOF { + var stmt Node + switch p.cur.TokenType() { + case lexer.FUNC: + stmt = p.parseFunc() + case lexer.ON: + stmt = p.parseEventHandler() + default: + stmt = p.parseStatement() + } + if stmt != nil { + program.Statements = append(program.Statements, stmt) + } + } + return program +} + +func (p *Parser) parseFunc() Node { + p.advance() // advance past FUNC + tok := p.cur // function name + + p.advancePastNL() // // advance past signature, already parsed into p.funcs earlier + block := p.parseBlock() + + if tok.TokenType() != lexer.IDENT { + return nil + } + fd := p.funcs[tok.Literal] + if fd.Body != nil { + p.appendError("Redeclaration of function '" + tok.Literal + "'") + return nil + } + fd.Body = block + return fd +} + +func (p *Parser) parseEventHandler() Node { + e := &EventHandler{} + if p.assertToken(lexer.IDENT) { + e.Name = p.cur.Literal + p.advance() // advance past event name IDENT + p.assertEOL() + } + p.advancePastNL() // advance past `on EVENT_NAME` + e.Body = p.parseBlock() + return e +} + +func (p *Parser) parseStatement() Node { + switch p.cur.TokenType() { + // empty statement + case lexer.NL, lexer.EOF, lexer.COMMENT: + p.advancePastNL() + return nil + case lexer.IDENT: + switch p.peek.Type { + case lexer.ASSIGN, lexer.LBRACKET, lexer.DOT: + return p.parseAssignStatement() // TODO + case lexer.COLON: + return p.parseTypedDeclStatement() + case lexer.DECLARE: + return p.parseInferredDeclStatement() + } + if p.isFuncCall(p.cur) { + return p.parseFunCallStatement() + } + p.appendError("unknown function '" + p.cur.Literal + "'") + p.advancePastNL() + return nil + case lexer.RETURN: + return p.parseReturnStatment() // TODO + case lexer.BREAK: + return p.parseBreakStatment() // TODO + case lexer.FOR: + return p.parseForStatment() // TODO + case lexer.WHILE: + return p.parseWhileStatment() // TODO + case lexer.IF: + return p.parseIfStatment() // TODO + } + p.appendError("unexpected token '" + p.cur.Format() + "'") + p.advancePastNL() + return nil +} + +func (p *Parser) parseAssignStatement() Node { + return nil +} + +func (p *Parser) parseFuncDeclSignature() *FuncDecl { + fd := &FuncDecl{Token: p.cur} + p.advance() // advance past FUNC + if !p.assertToken(lexer.IDENT) { + p.advancePastNL() + return nil + } + p.advance() // advance past function name IDENT + if p.cur.TokenType() == lexer.COLON { + p.advance() // advance past `:` of return type declaration, e.g. in `func rand:num` + fd.ReturnType = p.parseType() + if fd.ReturnType.Name == ILLEGAL { + p.appendErrorForToken("bust return type", fd.Token) + } + } + for !p.isAtEOL() && p.cur.TokenType() != lexer.DOT3 { + decl := p.parseTypedDecl().(*Declaration) + fd.Params = append(fd.Params, decl.Var) + } + if p.cur.TokenType() == lexer.DOT3 { + if len(fd.Params) == 1 { + fd.VariadicParam = fd.Params[0] + fd.Params = nil + } else { + p.appendError("variadic parameters must be used with single type") + } + } + p.assertEOL() + p.advancePastNL() + return fd +} + +func (p *Parser) parseTypedDeclStatement() Node { + decl := p.parseTypedDecl() + if decl.Type().Name != ILLEGAL { + p.assertEOL() + } + p.advancePastNL() + return decl +} + +// parseTypedDecl parses declarations like +// `x:num` or `y:any[]{}` +func (p *Parser) parseTypedDecl() Node { + ident := p.cur.Literal + decl := &Declaration{ + Token: p.cur, + Var: &Var{Token: p.cur, Name: ident}, + } + p.advance() // advance past IDENT + p.advance() // advance past `:` + v := p.parseType() + decl.Var.nType = v + decl.Value = zeroValue(v.Name) + if v == ILLEGAL_TYPE { + p.appendErrorForToken("bust type declaration", decl.Token) + } else { + p.vars[ident] = decl.Var + } + return decl +} + +// parseType parses `num[]{}` into `MAP ARRAY NUM` inverting the order. +func (p *Parser) parseType() *Type { + result := p.parseBasicType() + if result == ILLEGAL_TYPE { + return result + } + return p.parseSubType(result) +} + +func (p *Parser) parseBasicType() *Type { + tt := p.cur.TokenType() + t := basicTypeName(tt) + p.advance() + if t == ILLEGAL { + return ILLEGAL_TYPE + } + return &Type{Name: t} +} + +func (p *Parser) parseSubType(parent *Type) *Type { + tt := p.cur.TokenType() + typeName := compositeTypeName(tt) + if typeName == ILLEGAL { // we have moved passed the type declaration + return parent + } + if !matchParen(tt, p.peek.Type) { + return ILLEGAL_TYPE + } + p.advance() // advance past opening token `[` or `{` + p.advance() // advance past closing token `]` or `}` + node := &Type{Name: typeName, Sub: parent} + return p.parseSubType(node) +} + +func matchParen(t1, t2 lexer.TokenType) bool { + return (t1 == lexer.LBRACKET && t2 == lexer.RBRACKET) || + (t1 == lexer.LCURLY && t2 == lexer.RCURLY) +} + +func (p *Parser) parseInferredDeclStatement() Node { + ident := p.cur.Literal + decl := &Declaration{ + Token: p.cur, + Var: &Var{Token: p.cur, Name: ident}, // , nType: &Type{Name: ILLEGAL}}, + } + p.advance() // advance past IDENT + p.advance() // advance past `:=` + val := p.parseTopLevelExpression() + if val == nil { + decl.Var.nType = ILLEGAL_TYPE + } else { + decl.Value = val + decl.Var.nType = val.Type() + p.vars[ident] = decl.Var + p.assertEOL() + } + p.advancePastNL() + return decl +} + +func (p *Parser) parseTopLevelExpression() Node { + tt := p.cur.TokenType() + if tt == lexer.IDENT && p.isFuncCall(p.cur) { + return p.parseFuncCall() + } + return p.parseExpression() +} + +func (p *Parser) parseExpression() Node { + return p.parseTerm() +} + +func (p *Parser) parseTerm() Node { + //TODO: UNARY_OP Term; composite literals; assignable; slice; type_assertion; "(" toplevel_expr ")" + tt := p.cur.TokenType() + if tt == lexer.IDENT { + ident := p.cur.Literal + p.advance() + v, ok := p.vars[ident] + if !ok { + p.appendError("unknown identifier '" + ident + "'") + return nil + } + return v + } + if p.isLiteral() { + lit := p.parseLiteral() + if lit == nil { + return nil + } + return lit + } + p.appendError("invalid term") + p.advance() + return nil + +} + +func (p *Parser) isLiteral() bool { + tt := p.cur.TokenType() + if tt == lexer.NUM_LIT || tt == lexer.STRING_LIT || tt == lexer.TRUE || tt == lexer.FALSE { + return true + } + if !isBasicType(tt) { + return false + } + peek := p.peek.TokenType() + return peek == lexer.LBRACKET || peek == lexer.LCURLY +} + +func (p *Parser) parseLiteral() Node { + tok := p.cur + tt := tok.TokenType() + p.advance() + switch tt { + case lexer.STRING_LIT: + return &StringLiteral{Token: tok, Value: tok.Literal} + case lexer.NUM_LIT: + val, err := strconv.ParseFloat(tok.Literal, 64) + if err != nil { + p.appendError(err.Error()) + return nil + } + return &NumLiteral{Token: tok, Value: val} + case lexer.TRUE, lexer.FALSE: + return &Bool{Token: tok, Value: tt == lexer.TRUE} + } + return nil +} + +func (p *Parser) isFuncCall(tok *lexer.Token) bool { + funcName := tok.Literal + _, ok := p.funcs[funcName] + return ok +} + +func (p *Parser) parseFunCallStatement() Node { + fc := p.parseFuncCall() + p.assertEOL() + p.advancePastNL() + return fc +} + +func (p *Parser) parseFuncCall() Node { + funcToken := p.cur + funcName := p.cur.Literal + decl := p.funcs[funcName] + p.advance() // advance past function name IDENT + args := p.parseTerms() + p.assertArgTypes(decl, args) + return &FunctionCall{ + Name: funcName, + Token: funcToken, + Arguments: args, + nType: decl.ReturnType, + } +} + +func (p *Parser) assertArgTypes(decl *FuncDecl, args []Node) { + if decl.Params != nil { + if len(decl.Params) != len(args) { + p.appendError("expected " + strconv.Itoa(len(decl.Params)) + ", found " + strconv.Itoa(len(args))) + return + } + for i := range args { + paramType := decl.Params[i].Type() + argType := args[i].Type() + if !paramType.Accepts(argType) { + p.appendError("expected type" + paramType.String() + ", found " + argType.String()) + } + } + return + } + if decl.VariadicParam != nil { + paramType := decl.VariadicParam.Type() + for _, arg := range args { + if !paramType.Accepts(arg.Type()) { + p.appendError("expected variadic type" + paramType.String() + ", found " + arg.Type().String()) + } + } + return + } + if len(args) != 0 { + p.appendError("expected no arguments") + } +} + +func (p *Parser) parseTerms() []Node { + var terms []Node + for !p.isTermsEnd() { + term := p.parseTerm() + if term != nil { + terms = append(terms, term) + } + } + return terms +} + +func (p *Parser) isTermsEnd() bool { + tt := p.cur.TokenType() + return p.isAtEOL() || tt == lexer.RBRACKET || tt == lexer.RCURLY || tt == lexer.RPAREN +} + +func (p *Parser) advancePastNL() { + tt := p.cur.TokenType() + for tt != lexer.NL && tt != lexer.EOF { + p.advance() + tt = p.cur.TokenType() + } + if tt == lexer.NL { + p.advance() + } +} + +func (p *Parser) isAtEOL() bool { + tt := p.cur.TokenType() + return tt == lexer.NL || tt == lexer.EOF || tt == lexer.COMMENT +} + +func (p *Parser) assertToken(tt lexer.TokenType) bool { + if p.cur.TokenType() != tt { + p.appendError("expected token type '" + tt.String() + "', got '" + p.cur.TokenType().String() + "'") + return false + } + return true +} + +func (p *Parser) assertEOL() bool { + if !p.isAtEOL() { + p.appendError("expected end of line, found '" + p.cur.Format() + "'") + return false + } + return true +} + +func (p *Parser) appendError(message string) { + p.errors = append(p.errors, Error{message: message, token: p.cur}) +} + +func (p *Parser) appendErrorForToken(message string, token *lexer.Token) { + p.errors = append(p.errors, Error{message: message, token: token}) +} + +func (p *Parser) parseBlock() *BlockStatement { + tok := p.cur + var stmts []Node + for p.cur.TokenType() != lexer.END && p.cur.TokenType() != lexer.EOF { + stmt := p.parseStatement() + if stmt != nil { + stmts = append(stmts, stmt) + } + } + p.advancePastNL() + return &BlockStatement{Token: tok, Statements: stmts} +} + +func (p *Parser) advance() { + p.pos++ + p.cur = p.lookAt(p.pos) + p.peek = p.lookAt(p.pos + 1) +} + +func (p *Parser) advanceTo(pos int) { + p.pos = pos + p.cur = p.lookAt(pos) + p.peek = p.lookAt(pos + 1) +} + +func (p *Parser) lookAt(pos int) *lexer.Token { + if pos >= len(p.tokens) { + return p.tokens[len(p.tokens)-1] // EOF with pos + } + return p.tokens[pos] +} + +func (p *Parser) errorsString() string { + errs := make([]string, len(p.errors)) + for i, err := range p.errors { + errs[i] = err.String() + } + return strings.Join(errs, "\n") +} + +//TODO: implemented +func (p *Parser) parseReturnStatment() Node { + p.advancePastNL() + return nil +} + +//TODO: implemented +func (p *Parser) parseBreakStatment() Node { + p.advancePastNL() + return nil +} + +//TODO: implemented +func (p *Parser) parseForStatment() Node { + p.advancePastNL() + p.parseBlock() + return nil +} + +//TODO: implemented +func (p *Parser) parseWhileStatment() Node { + p.advancePastNL() + p.parseBlock() + return nil +} + +//TODO: implemented +func (p *Parser) parseIfStatment() Node { + p.advancePastNL() + p.parseBlock() + return nil } diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go new file mode 100644 index 00000000..bb250c55 --- /dev/null +++ b/pkg/parser/parser_test.go @@ -0,0 +1,148 @@ +package parser + +import ( + "strings" + "testing" + + "foxygo.at/evy/pkg/assert" +) + +func TestParseDeclaration(t *testing.T) { + tests := map[string][]string{ + "a := 1": []string{"a:NUM=1"}, + "b:bool": []string{"b:BOOL=false"}, + "\nb:bool\n": []string{"b:BOOL=false"}, + `a := "abc" + b:bool + c := true`: []string{"a:STRING='abc'", "b:BOOL=false", "c:BOOL=true"}, + "a:num[]": []string{"a:ARRAY NUM=[]"}, + "a:num[]{}": []string{"a:MAP ARRAY NUM={}"}, + "abc:any[]{}": []string{"abc:MAP ARRAY ANY={}"}, + } + for input, wantSlice := range tests { + want := strings.Join(wantSlice, "\n") + "\n" + parser := New(input) + got := parser.Parse() + assert.Equal(t, 0, len(parser.errors), "input: %s\nerrors:\n%s", input, parser.errorsString()) + assert.Equal(t, want, got.String()) + } +} + +func TestEmptyProgram(t *testing.T) { + tests := []string{ + "", + "\n", + "\n\n\n", + " ", + " \n //adf \n", + "//blabla", + "//blabla\n", + " \n //blabla \n", + " \n //blabla", + } + for _, input := range tests { + parser := New(input) + got := parser.Parse() + assert.Equal(t, 0, len(parser.errors), "input: %s\nerrors:\n%s", input, parser.errorsString()) + assert.Equal(t, "\n", got.String()) + } +} + +func TestParseDeclarationError(t *testing.T) { + tests := map[string][]string{ + "a :invalid": []string{"a:ILLEGAL"}, + "a :": []string{"a:ILLEGAL"}, + "a :\n": []string{"a:ILLEGAL"}, + "a ://blabla\n": []string{"a:ILLEGAL"}, + "a :true": []string{"a:ILLEGAL"}, + "a :[]": []string{"a:ILLEGAL"}, + "a :num num": []string{"a:NUM=0"}, + "a :[]num": []string{"a:ILLEGAL"}, + "a :()": []string{"a:ILLEGAL"}, + "a :num{}num": []string{"a:MAP NUM={}"}, + "a ::": []string{"a:ILLEGAL"}, + "a :=:": []string{"a:ILLEGAL"}, + } + for input, wantSlice := range tests { + want := strings.Join(wantSlice, "\n") + "\n" + parser := New(input) + got := parser.Parse() + assert.Equal(t, 1, len(parser.errors), "input: %s\nerrors:\n%s", input, parser.errorsString()) + assert.Equal(t, want, got.String(), "input: %s", input) + } +} + +func TestFunctionCall(t *testing.T) { + tests := map[string][]string{ + "print": []string{"print()"}, + "print 123": []string{"print(123)"}, + `print 123 "abc"`: []string{"print(123, 'abc')"}, + "a:=1 \n print a": []string{"a:NUM=1", "print(a:NUM)"}, + "a:string \n print a": []string{"a:STRING=''", "print(a:STRING)"}, + `a:=true + b:string + print a b`: []string{"a:BOOL=true", "b:STRING=''", "print(a:BOOL, b:STRING)"}, + } + for input, wantSlice := range tests { + want := strings.Join(wantSlice, "\n") + "\n" + parser := New(input) + got := parser.Parse() + assert.Equal(t, 0, len(parser.errors), "input: %s\nerrors: %s", input, parser.errorsString()) + assert.Equal(t, want, got.String()) + } +} + +func TestBlock(t *testing.T) { + tests := map[string][]string{ + `if true + print "TRUE" + end`: []string{""}, + `if true + if 12 > 11 + print "TRUE" + end + end`: []string{""}, + } + for input, wantSlice := range tests { + want := strings.Join(wantSlice, "\n") + "\n" + parser := New(input) + got := parser.Parse() + assert.Equal(t, 0, len(parser.errors), "input: %s\nerrors: %#v", input, parser.errors) + assert.Equal(t, want, got.String()) + } +} + +func TestToplevelExprFuncCall(t *testing.T) { + input := ` +x := len "123" +` + parser := New(input) + got := parser.Parse() + assert.Equal(t, 0, len(parser.errors), "errors: %#v", parser.errors) + want := ` +x:NUM=len('123') +`[1:] + assert.Equal(t, want, got.String()) +} + +func TestDemo(t *testing.T) { + input := ` +move 10 10 +line 20 20 + +x := 12 +print "x:" x +if x > 10 + print "🍦 big x" +end` + parser := New(input) + got := parser.Parse() + assert.Equal(t, 2, len(parser.errors), "errors: %#v", parser.errors) + assert.Equal(t, "line 2 column 1: unknown function 'move'", parser.errors[0].String()) + assert.Equal(t, "line 3 column 1: unknown function 'line'", parser.errors[1].String()) + want := ` +x:NUM=12 +print('x:', x:NUM) +`[1:] + assert.Equal(t, want, got.String()) +} diff --git a/pkg/parser/type.go b/pkg/parser/type.go new file mode 100644 index 00000000..9724970b --- /dev/null +++ b/pkg/parser/type.go @@ -0,0 +1,112 @@ +package parser + +import "foxygo.at/evy/pkg/lexer" + +type TypeName int + +const ( + ILLEGAL TypeName = iota + NUM + STRING + BOOL + ANY + ARRAY + MAP + NONE // for functions without return value, declaration statements, etc. +) + +var ( + ILLEGAL_TYPE = &Type{Name: ILLEGAL} + NUM_TYPE = &Type{Name: NUM} + BOOL_TYPE = &Type{Name: BOOL} + STRING_TYPE = &Type{Name: STRING} + ANY_TYPE = &Type{Name: ANY} + NONE_TYPE = &Type{Name: NONE} +) + +func isBasicType(t lexer.TokenType) bool { + return t == lexer.NUM || t == lexer.STRING || t == lexer.BOOL || t == lexer.ANY +} + +func basicTypeName(t lexer.TokenType) TypeName { + switch t { + case lexer.NUM: + return NUM + case lexer.STRING: + return STRING + case lexer.BOOL: + return BOOL + case lexer.ANY: + return ANY + } + return ILLEGAL +} + +func compositeTypeName(t lexer.TokenType) TypeName { + switch t { + case lexer.LBRACKET: + return ARRAY + case lexer.LCURLY: + return MAP + } + return ILLEGAL +} + +var typeNameStrings = map[TypeName]string{ + ILLEGAL: "ILLEGAL", + NUM: "NUM", + STRING: "STRING", + BOOL: "BOOL", + ANY: "ANY", + ARRAY: "ARRAY", + MAP: "MAP", +} + +func (t TypeName) String() string { + if s, ok := typeNameStrings[t]; ok { + return s + } + return "UNKNOWN" +} + +func (t TypeName) GoString() string { + return t.String() +} + +type Type struct { + Name TypeName // string, num, bool, composite types array, map + Sub *Type // e.g.: `[]int` : Type{Name: "array", Sub: &Type{Name: "int"} } +} + +func (t *Type) String() string { + if t.Sub == nil { + return t.Name.String() + } + return t.Name.String() + " " + t.Sub.String() +} + +func (t *Type) Accepts(t2 *Type) bool { + if t.acceptsStrict(t2) { + return true + } + n, n2 := t.Name, t2.Name + if n == ANY && n2 != ILLEGAL && n2 != NONE { + return true + } + return false +} + +// any[] (ARRAY ANY) DOES NOT accept num[] (ARRAY NUM) +func (t *Type) acceptsStrict(t2 *Type) bool { + n, n2 := t.Name, t2.Name + if n == ILLEGAL || n == NONE || n2 == ILLEGAL || n2 == NONE { + return false + } + if n != n2 { + return false + } + if t.Sub == nil || t2.Sub == nil { + return t.Sub == nil && t2.Sub == nil + } + return t.Sub.acceptsStrict(t2.Sub) +}