2.4 Parser (basic)

cedrickchee · cedrickchee · commit b54dc0a3c69b · 2020-03-27T03:10:21.000+08:00
diff --git a/ast/ast.go b/ast/ast.go
@@ -0,0 +1,76 @@
+package ast
+
+// Packge ast implement the Abstract Syntax Tree (AST) that represents the
+// parsed source code before being passed on to the interpreter for evaluation.
+
+import "github.com/cedrickchee/hou/token"
+
+// Node defines an interface for all nodes in the AST.
+type Node interface {
+	// Returns the literal value of the token it's associated with.
+	// This method will be used only for debugging and testing.
+	TokenLiteral() string
+}
+
+// Statement defines the interface for all statement nodes.
+type Statement interface {
+	// Some of these nodes implement the Statement interface.
+	Node
+	statementNode()
+}
+
+// Expression defines the interface for all expression nodes.
+type Expression interface {
+	// Some of these nodes implement the Expression interface.
+	Node
+	expressionNode()
+}
+
+// =============================================================================
+// Implementation of Node
+// =============================================================================
+
+// Program is the root node of every AST. Every valid program is a series of
+// statements.
+type Program struct {
+	// A program consists of a slice of AST nodes that implement the Statement
+	// interface.
+	Statements []Statement
+}
+
+// TokenLiteral prints the literal value of the token associated with this node.
+func (p *Program) TokenLiteral() string {
+	if len(p.Statements) > 0 {
+		return p.Statements[0].TokenLiteral()
+	} else {
+		return ""
+	}
+}
+
+// LetStatement the `let` statement represents the AST node that binds an
+// expression to an identifier
+type LetStatement struct {
+	Token token.Token // the token.LET token
+	// Name hold the identifier of the binding and Value for the expression
+	// that produces the value.
+	Name  *Identifier
+	Value Expression
+}
+
+func (ls *LetStatement) statementNode() {}
+
+// TokenLiteral prints the literal value of the token associated with this node.
+func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal }
+
+// Identifier is a node that holds the literal value of an identifier
+type Identifier struct {
+	Token token.Token // the token.IDENT token
+	Value string
+}
+
+// To hold the identifier of the binding, the x in let x = 5; , we have the
+// Identifier struct type, which implements the Expression interface.
+func (i *Identifier) expressionNode() {}
+
+// TokenLiteral prints the literal value of the token associated with this node.
+func (i *Identifier) TokenLiteral() string { return i.Token.Literal }
diff --git a/parser/parser.go b/parser/parser.go
@@ -0,0 +1,112 @@
+package parser
+
+// Package parser implements the parser that takes as input tokens from the
+// lexer and produces as output an AST (Abstract Syntax Tree).
+
+import (
+	"github.com/cedrickchee/hou/ast"
+	"github.com/cedrickchee/hou/lexer"
+	"github.com/cedrickchee/hou/token"
+)
+
+// Parser implements the parser.
+type Parser struct {
+	l *lexer.Lexer
+
+	curToken  token.Token
+	peekToken token.Token
+}
+
+// New constructs a new Parser with a Lexer as input.
+func New(l *lexer.Lexer) *Parser {
+	p := &Parser{l: l}
+
+	// Read two tokens, so curToken and peekToken are both set.
+	p.nextToken()
+	p.nextToken()
+
+	return p
+}
+
+// Helper method that advances both curToken and peekToken.
+func (p *Parser) nextToken() {
+	p.curToken = p.peekToken
+	p.peekToken = p.l.NextToken()
+}
+
+// ParseProgram starts the parsing process and is the entry point for all other
+// sub-parsers that are responsible for other nodes in the AST.
+func (p *Parser) ParseProgram() *ast.Program {
+	// Construct the root node of the AST.
+	program := &ast.Program{}
+	program.Statements = []ast.Statement{}
+
+	// Iterate over every token in the input until it encounters an token.EOF
+	// token.
+	for p.curToken.Type != token.EOF {
+		stmt := p.parseStatement()
+		if stmt != nil {
+			program.Statements = append(program.Statements, stmt)
+		}
+		p.nextToken()
+	}
+	return program
+}
+
+// Parse a statement.
+func (p *Parser) parseStatement() ast.Statement {
+	switch p.curToken.Type {
+	case token.LET:
+		return p.parseLetStatement()
+	default:
+		return nil
+	}
+}
+
+func (p *Parser) parseLetStatement() *ast.LetStatement {
+	// Constructs an *ast.LetStatement node with the token it’s currently
+	// sitting on (a token.LET token).
+	stmt := &ast.LetStatement{Token: p.curToken}
+
+	// Advances the tokens while making assertions about the next token.
+	if !p.expectPeek(token.IDENT) {
+		return nil
+	}
+
+	// Use token.IDENT token to construct an *ast.Identifier node.
+	stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
+
+	// Expects an equal sign and jumps over the expression following the
+	// equal sign.
+	if !p.expectPeek(token.ASSIGN) {
+		return nil
+	}
+
+	// TODO: We're skipping the expressions until we
+	// encounter a semicolon
+	for !p.curTokenIs(token.SEMICOLON) {
+		p.nextToken()
+	}
+
+	return stmt
+}
+
+// "assertion functions".
+// Enforce the correctness of the order of tokens by checking the type of the
+// next token.
+func (p *Parser) expectPeek(t token.TokenType) bool {
+	if p.peekTokenIs(t) {
+		p.nextToken()
+		return true
+	} else {
+		return false
+	}
+}
+
+func (p *Parser) peekTokenIs(t token.TokenType) bool {
+	return p.peekToken.Type == t
+}
+
+func (p *Parser) curTokenIs(t token.TokenType) bool {
+	return p.curToken.Type == t
+}
diff --git a/parser/parser_test.go b/parser/parser_test.go
@@ -0,0 +1,67 @@
+package parser
+
+import (
+	"testing"
+
+	"github.com/cedrickchee/hou/ast"
+	"github.com/cedrickchee/hou/lexer"
+)
+
+func TestLetStatements(t *testing.T) {
+	input := `
+let x = 5;
+let	y = 10;
+let foobar = 838383;
+`
+	l := lexer.New(input)
+	p := New(l)
+
+	program := p.ParseProgram()
+	if program == nil {
+		t.Fatalf("ParseProgram() returned nil")
+	}
+	if len(program.Statements) != 3 {
+		t.Fatalf("program.Statements does not contain 3 statements. got=%d",
+			len(program.Statements))
+	}
+
+	tests := []struct {
+		expectedIdentifier string
+	}{
+		{"x"},
+		{"y"},
+		{"foobar"},
+	}
+
+	for i, tt := range tests {
+		stmt := program.Statements[i]
+		if !testLetStatement(t, stmt, tt.expectedIdentifier) {
+			return
+		}
+	}
+}
+
+func testLetStatement(t *testing.T, s ast.Statement, name string) bool {
+	if s.TokenLiteral() != "let" {
+		t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral())
+		return false
+	}
+
+	letStmt, ok := s.(*ast.LetStatement)
+	if !ok {
+		t.Errorf("s not *ast.LetStatement. got=%T", s)
+		return false
+	}
+
+	if letStmt.Name.Value != name {
+		t.Errorf("letStmt.Name.Value not '%s'. got=%s", name, letStmt.Name.Value)
+		return false
+	}
+
+	if letStmt.Name.TokenLiteral() != name {
+		t.Errorf("s.Name not '%s'. got=%s", name, letStmt.Name)
+		return false
+	}
+
+	return true
+}