Skip to content

Commit

Permalink
Merge pull request google#5 from sparkprime/lexer_changes
Browse files Browse the repository at this point in the history
Port lexer changes from google/jsonnet 0c96da7 to 27ddf2c Fix google#1
  • Loading branch information
jbeda committed Feb 26, 2016
2 parents 2282fdf + c3f136d commit 04c51f7
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 31 deletions.
70 changes: 41 additions & 29 deletions lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ const (
tokenBraceR
tokenBracketL
tokenBracketR
tokenColon
tokenComma
tokenDollar
tokenDot
Expand Down Expand Up @@ -101,7 +100,6 @@ var tokenKindStrings = []string{
tokenBraceR: "\"}\"",
tokenBracketL: "\"[\"",
tokenBracketR: "\"]\"",
tokenColon: "\":\"",
tokenComma: "\",\"",
tokenDollar: "\"$\"",
tokenDot: "\".\"",
Expand Down Expand Up @@ -197,7 +195,7 @@ func isIdentifier(r rune) bool {

func isSymbol(r rune) bool {
switch r {
case '&', '|', '^', '=', '<', '>', '*', '/', '%', '#':
case '!', '$', ':', '~', '+', '-', '&', '|', '^', '=', '<', '>', '*', '/', '%':
return true
}
return false
Expand Down Expand Up @@ -533,7 +531,7 @@ func (l *lexer) lexIdentifier() {
}

// lexSymbol will lex a token that starts with a symbol. This could be a
// comment, block quote or an operator. This function assumes that the next
// C or C++ comment, block quote or an operator. This function assumes that the next
// rune to be served by the lexer will be the first rune of the new token.
func (l *lexer) lexSymbol() error {
r := l.next()
Expand All @@ -550,16 +548,6 @@ func (l *lexer) lexSymbol() error {
return nil
}

if r == '#' {
l.resetTokenStart() // Throw out the leading #
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
}
// Leave the '\n' in the lexer to be fodder for the next round
l.backup()
l.addCommentFodder(fodderCommentHash)
return nil
}

if r == '/' && l.peek() == '*' {
commentStartLoc := l.tokenStartLoc
l.next() // consume the '*'
Expand Down Expand Up @@ -640,10 +628,39 @@ func (l *lexer) lexSymbol() error {

// Assume any string of symbols is a single operator.
for r = l.next(); isSymbol(r); r = l.next() {

// Not allowed // in operators
if r == '/' && strings.HasPrefix(l.input[l.pos:], "/") {
break
}
// Not allowed /* in operators
if r == '/' && strings.HasPrefix(l.input[l.pos:], "*") {
break
}
// Not allowed ||| in operators
if r == '|' && strings.HasPrefix(l.input[l.pos:], "||") {
break
}
}

l.backup()
l.emitToken(tokenOperator)

// Operators are not allowed to end with + - ~ ! unless they are one rune long.
// So, wind it back if we need to, but stop at the first rune.
// This relies on the hack that all operator symbols are ASCII and thus there is
// no need to treat this substring as general UTF-8.
for r = rune(l.input[l.pos - 1]); l.pos > l.tokenStart + 1; l.pos-- {
switch r {
case '+', '-', '~', '!':
continue
}
break
}

if l.input[l.tokenStart:l.pos] == "$" {
l.emitToken(tokenDollar)
} else {
l.emitToken(tokenOperator)
}
return nil
}

Expand All @@ -665,12 +682,8 @@ func lex(fn string, input string) (tokens, error) {
l.emitToken(tokenBracketL)
case ']':
l.emitToken(tokenBracketR)
case ':':
l.emitToken(tokenColon)
case ',':
l.emitToken(tokenComma)
case '$':
l.emitToken(tokenDollar)
case '.':
l.emitToken(tokenDot)
case '(':
Expand All @@ -680,15 +693,6 @@ func lex(fn string, input string) (tokens, error) {
case ';':
l.emitToken(tokenSemicolon)

// Operators
case '!':
if l.peek() == '=' {
_ = l.next()
}
l.emitToken(tokenOperator)
case '~', '+', '-':
l.emitToken(tokenOperator)

case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
l.backup()
err = l.lexNumber()
Expand Down Expand Up @@ -733,6 +737,14 @@ func lex(fn string, input string) (tokens, error) {
r = l.next()
}
}
case '#':
l.resetTokenStart() // Throw out the leading #
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
}
// Leave the '\n' in the lexer to be fodder for the next round
l.backup()
l.addCommentFodder(fodderCommentHash)

default:
if isIdentifierFirst(r) {
l.backup()
Expand Down
7 changes: 6 additions & 1 deletion lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ var lexTests = []lexTest{
{"brace R", "}", tokens{{kind: tokenBraceR, data: "}"}}, ""},
{"bracket L", "[", tokens{{kind: tokenBracketL, data: "["}}, ""},
{"bracket R", "]", tokens{{kind: tokenBracketR, data: "]"}}, ""},
{"colon", ":", tokens{{kind: tokenColon, data: ":"}}, ""},
{"colon", ":", tokens{{kind: tokenOperator, data: ":"}}, ""},
{"colon2", "::", tokens{{kind: tokenOperator, data: "::"}}, ""},
{"colon3", ":::", tokens{{kind: tokenOperator, data: ":::"}}, ""},
{"arrow right", "->", tokens{{kind: tokenOperator, data: "->"}}, ""},
{"less than minus", "<-", tokens{{kind: tokenOperator, data: "<"},
{kind: tokenOperator, data: "-"}}, ""},
{"comma", ",", tokens{{kind: tokenComma, data: ","}}, ""},
{"dollar", "$", tokens{{kind: tokenDollar, data: "$"}}, ""},
{"dot", ".", tokens{{kind: tokenDot, data: "."}}, ""},
Expand Down
2 changes: 1 addition & 1 deletion parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func (p *parser) parse(prec precedence) (astNode, error) {
return nil, err
}
var msg astNode
if p.peek().kind == tokenColon {
if p.peek().kind == tokenOperator && p.peek().data == ":" {
p.pop()
msg, err = p.parse(maxPrecedence)
if err != nil {
Expand Down

0 comments on commit 04c51f7

Please sign in to comment.