Skip to content

Commit b54dc0a

Browse files
committed
2.4 Parser (basic)
1 parent 02a5f71 commit b54dc0a

File tree

3 files changed

+255
-0
lines changed

3 files changed

+255
-0
lines changed

ast/ast.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package ast
2+
3+
// Packge ast implement the Abstract Syntax Tree (AST) that represents the
4+
// parsed source code before being passed on to the interpreter for evaluation.
5+
6+
import "github.com/cedrickchee/hou/token"
7+
8+
// Node defines an interface for all nodes in the AST.
9+
type Node interface {
10+
// Returns the literal value of the token it's associated with.
11+
// This method will be used only for debugging and testing.
12+
TokenLiteral() string
13+
}
14+
15+
// Statement defines the interface for all statement nodes.
16+
type Statement interface {
17+
// Some of these nodes implement the Statement interface.
18+
Node
19+
statementNode()
20+
}
21+
22+
// Expression defines the interface for all expression nodes.
23+
type Expression interface {
24+
// Some of these nodes implement the Expression interface.
25+
Node
26+
expressionNode()
27+
}
28+
29+
// =============================================================================
30+
// Implementation of Node
31+
// =============================================================================
32+
33+
// Program is the root node of every AST. Every valid program is a series of
34+
// statements.
35+
type Program struct {
36+
// A program consists of a slice of AST nodes that implement the Statement
37+
// interface.
38+
Statements []Statement
39+
}
40+
41+
// TokenLiteral prints the literal value of the token associated with this node.
42+
func (p *Program) TokenLiteral() string {
43+
if len(p.Statements) > 0 {
44+
return p.Statements[0].TokenLiteral()
45+
} else {
46+
return ""
47+
}
48+
}
49+
50+
// LetStatement the `let` statement represents the AST node that binds an
51+
// expression to an identifier
52+
type LetStatement struct {
53+
Token token.Token // the token.LET token
54+
// Name hold the identifier of the binding and Value for the expression
55+
// that produces the value.
56+
Name *Identifier
57+
Value Expression
58+
}
59+
60+
func (ls *LetStatement) statementNode() {}
61+
62+
// TokenLiteral prints the literal value of the token associated with this node.
63+
func (ls *LetStatement) TokenLiteral() string { return ls.Token.Literal }
64+
65+
// Identifier is a node that holds the literal value of an identifier
66+
type Identifier struct {
67+
Token token.Token // the token.IDENT token
68+
Value string
69+
}
70+
71+
// To hold the identifier of the binding, the x in let x = 5; , we have the
72+
// Identifier struct type, which implements the Expression interface.
73+
func (i *Identifier) expressionNode() {}
74+
75+
// TokenLiteral prints the literal value of the token associated with this node.
76+
func (i *Identifier) TokenLiteral() string { return i.Token.Literal }

parser/parser.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
package parser
2+
3+
// Package parser implements the parser that takes as input tokens from the
4+
// lexer and produces as output an AST (Abstract Syntax Tree).
5+
6+
import (
7+
"github.com/cedrickchee/hou/ast"
8+
"github.com/cedrickchee/hou/lexer"
9+
"github.com/cedrickchee/hou/token"
10+
)
11+
12+
// Parser implements the parser.
13+
type Parser struct {
14+
l *lexer.Lexer
15+
16+
curToken token.Token
17+
peekToken token.Token
18+
}
19+
20+
// New constructs a new Parser with a Lexer as input.
21+
func New(l *lexer.Lexer) *Parser {
22+
p := &Parser{l: l}
23+
24+
// Read two tokens, so curToken and peekToken are both set.
25+
p.nextToken()
26+
p.nextToken()
27+
28+
return p
29+
}
30+
31+
// Helper method that advances both curToken and peekToken.
32+
func (p *Parser) nextToken() {
33+
p.curToken = p.peekToken
34+
p.peekToken = p.l.NextToken()
35+
}
36+
37+
// ParseProgram starts the parsing process and is the entry point for all other
38+
// sub-parsers that are responsible for other nodes in the AST.
39+
func (p *Parser) ParseProgram() *ast.Program {
40+
// Construct the root node of the AST.
41+
program := &ast.Program{}
42+
program.Statements = []ast.Statement{}
43+
44+
// Iterate over every token in the input until it encounters an token.EOF
45+
// token.
46+
for p.curToken.Type != token.EOF {
47+
stmt := p.parseStatement()
48+
if stmt != nil {
49+
program.Statements = append(program.Statements, stmt)
50+
}
51+
p.nextToken()
52+
}
53+
return program
54+
}
55+
56+
// Parse a statement.
57+
func (p *Parser) parseStatement() ast.Statement {
58+
switch p.curToken.Type {
59+
case token.LET:
60+
return p.parseLetStatement()
61+
default:
62+
return nil
63+
}
64+
}
65+
66+
func (p *Parser) parseLetStatement() *ast.LetStatement {
67+
// Constructs an *ast.LetStatement node with the token it’s currently
68+
// sitting on (a token.LET token).
69+
stmt := &ast.LetStatement{Token: p.curToken}
70+
71+
// Advances the tokens while making assertions about the next token.
72+
if !p.expectPeek(token.IDENT) {
73+
return nil
74+
}
75+
76+
// Use token.IDENT token to construct an *ast.Identifier node.
77+
stmt.Name = &ast.Identifier{Token: p.curToken, Value: p.curToken.Literal}
78+
79+
// Expects an equal sign and jumps over the expression following the
80+
// equal sign.
81+
if !p.expectPeek(token.ASSIGN) {
82+
return nil
83+
}
84+
85+
// TODO: We're skipping the expressions until we
86+
// encounter a semicolon
87+
for !p.curTokenIs(token.SEMICOLON) {
88+
p.nextToken()
89+
}
90+
91+
return stmt
92+
}
93+
94+
// "assertion functions".
95+
// Enforce the correctness of the order of tokens by checking the type of the
96+
// next token.
97+
func (p *Parser) expectPeek(t token.TokenType) bool {
98+
if p.peekTokenIs(t) {
99+
p.nextToken()
100+
return true
101+
} else {
102+
return false
103+
}
104+
}
105+
106+
func (p *Parser) peekTokenIs(t token.TokenType) bool {
107+
return p.peekToken.Type == t
108+
}
109+
110+
func (p *Parser) curTokenIs(t token.TokenType) bool {
111+
return p.curToken.Type == t
112+
}

parser/parser_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package parser
2+
3+
import (
4+
"testing"
5+
6+
"github.com/cedrickchee/hou/ast"
7+
"github.com/cedrickchee/hou/lexer"
8+
)
9+
10+
func TestLetStatements(t *testing.T) {
11+
input := `
12+
let x = 5;
13+
let y = 10;
14+
let foobar = 838383;
15+
`
16+
l := lexer.New(input)
17+
p := New(l)
18+
19+
program := p.ParseProgram()
20+
if program == nil {
21+
t.Fatalf("ParseProgram() returned nil")
22+
}
23+
if len(program.Statements) != 3 {
24+
t.Fatalf("program.Statements does not contain 3 statements. got=%d",
25+
len(program.Statements))
26+
}
27+
28+
tests := []struct {
29+
expectedIdentifier string
30+
}{
31+
{"x"},
32+
{"y"},
33+
{"foobar"},
34+
}
35+
36+
for i, tt := range tests {
37+
stmt := program.Statements[i]
38+
if !testLetStatement(t, stmt, tt.expectedIdentifier) {
39+
return
40+
}
41+
}
42+
}
43+
44+
func testLetStatement(t *testing.T, s ast.Statement, name string) bool {
45+
if s.TokenLiteral() != "let" {
46+
t.Errorf("s.TokenLiteral not 'let'. got=%q", s.TokenLiteral())
47+
return false
48+
}
49+
50+
letStmt, ok := s.(*ast.LetStatement)
51+
if !ok {
52+
t.Errorf("s not *ast.LetStatement. got=%T", s)
53+
return false
54+
}
55+
56+
if letStmt.Name.Value != name {
57+
t.Errorf("letStmt.Name.Value not '%s'. got=%s", name, letStmt.Name.Value)
58+
return false
59+
}
60+
61+
if letStmt.Name.TokenLiteral() != name {
62+
t.Errorf("s.Name not '%s'. got=%s", name, letStmt.Name)
63+
return false
64+
}
65+
66+
return true
67+
}

0 commit comments

Comments
 (0)