diff --git a/pkg/lexer/lexer.go b/pkg/lexer/lexer.go index 1b313e2de9..d845de3c0d 100644 --- a/pkg/lexer/lexer.go +++ b/pkg/lexer/lexer.go @@ -18,7 +18,6 @@ import ( type Lexer struct { reader *bufio.Reader buffer *bytes.Buffer - equalsBuffer *bytes.Buffer line int char int charPositionBeforeLineTerminator int @@ -26,13 +25,9 @@ type Lexer struct { // NewLexer initializes a new lexer func NewLexer() *Lexer { - - l := &Lexer{ - equalsBuffer: &bytes.Buffer{}, - buffer: &bytes.Buffer{}, + return &Lexer{ + buffer: &bytes.Buffer{}, } - - return l } // SetInput sets the new reader as input and resets all position stats @@ -63,15 +58,12 @@ func (l *Lexer) Read() (tok token.Token, err error) { return tok, err } - if r != runes.SPACE && - r != runes.TAB && - r != runes.LINETERMINATOR && - r != runes.COMMA { + if !l.runeIsWhitespace(r) { break } } - if tok, matched := l.switchSimpleTokens(pos, r); matched { + if tok, matched := l.matchSingleRuneToken(pos, r); matched { return tok, nil } @@ -91,19 +83,19 @@ func (l *Lexer) Read() (tok token.Token, err error) { return l.readIdent(pos, r) } -func (l *Lexer) swallowWhitespace() error { +func (l *Lexer) swallowWhitespace() (err error) { + + var peeked []byte + for { - next, err := l.reader.Peek(1) + peeked, err = l.reader.Peek(1) if err == io.EOF { return nil } else if err != nil { return err } - if bytes.Equal(next, literal.SPACE) || - bytes.Equal(next, literal.TAB) || - bytes.Equal(next, literal.LINETERMINATOR) || - bytes.Equal(next, literal.COMMA) { + if l.bytesIsWhitespace(peeked) { _, _, err = l.readRune() if err != nil { return err @@ -131,10 +123,10 @@ func (l *Lexer) Peek(ignoreWhitespace bool) (key keyword.Keyword, err error) { return key, err } - return l.keyFromBytes(peeked) + return l.keywordFromBytes(peeked) } -func (l *Lexer) keyFromBytes(b []byte) (key keyword.Keyword, err error) { +func (l *Lexer) keywordFromBytes(b []byte) (key keyword.Keyword, err error) { r, _ := utf8.DecodeRune(b) @@ -208,30 +200,30 @@ func (l *Lexer) peekSpread() (key keyword.Keyword, err error) { } func (l *Lexer) peekIsFloat() (isFloat bool, err error) { - var numBytesToRead int - var bytesPeeked []byte - for err == nil { - numBytesToRead++ - - bytesPeeked, err = l.reader.Peek(numBytesToRead) - if bytes.HasSuffix(bytesPeeked, literal.SPACE) || - bytes.HasSuffix(bytesPeeked, literal.TAB) || - bytes.HasSuffix(bytesPeeked, literal.LINETERMINATOR) || - bytes.HasSuffix(bytesPeeked, literal.COMMA) { - return - } else if bytes.HasSuffix(bytesPeeked, literal.DOT) { - return true, nil - } - } + peeked, err := l.reader.Peek(32) if err == io.EOF { err = nil + } else if err != nil { + return false, err } - return + for pos := range peeked { + r, _ := utf8.DecodeRune(peeked[pos : pos+1]) + + if !isFloat && r == runes.DOT { + isFloat = true + } else if isFloat && r == runes.DOT { + return false, fmt.Errorf("peekIsFloat: invalid input") + } else if !unicode.IsDigit(r) { + break + } + } + + return isFloat, err } -func (l *Lexer) switchSimpleTokens(position position.Position, run rune) (tok token.Token, matched bool) { +func (l *Lexer) matchSingleRuneToken(position position.Position, run rune) (tok token.Token, matched bool) { matched = true @@ -278,32 +270,29 @@ func (l *Lexer) readIdent(position position.Position, beginWith rune) (tok token return tok, err } - var nextRune rune + var peeked []byte + var r rune for { - nextRune, _, err = l.readRune() + peeked, err = l.reader.Peek(1) if err == io.EOF { err = nil break - } - if err != nil { - l.buffer.Reset() + } else if err != nil { return } - if unicode.IsLetter(nextRune) || - unicode.IsDigit(nextRune) || - nextRune == runes.UNDERSCORE || - nextRune == runes.NEGATIVESIGN { - _, err = l.buffer.WriteRune(nextRune) + if l.bytesIsIdent(peeked) { + r, _, err = l.readRune() if err != nil { return tok, err } - } else { - err = l.unreadRune() + + _, err = l.buffer.WriteRune(r) if err != nil { return tok, err } + } else { break } } @@ -312,278 +301,124 @@ func (l *Lexer) readIdent(position position.Position, beginWith rune) (tok token copy(tok.Literal, l.buffer.Bytes()) l.buffer.Reset() - if bytes.Equal(tok.Literal, literal.TRUE) { - tok.Keyword = keyword.TRUE - } else if bytes.Equal(tok.Literal, literal.FALSE) { - tok.Keyword = keyword.FALSE - } else if bytes.Equal(tok.Literal, literal.NULL) { - tok.Keyword = keyword.NULL - } else if bytes.Equal(tok.Literal, literal.ON) { - tok.Keyword = keyword.ON - } else if bytes.Equal(tok.Literal, literal.IMPLEMENTS) { - tok.Keyword = keyword.IMPLEMENTS - } else if bytes.Equal(tok.Literal, literal.SCHEMA) { - tok.Keyword = keyword.SCHEMA - } else if bytes.Equal(tok.Literal, literal.SCALAR) { - tok.Keyword = keyword.SCALAR - } else if bytes.Equal(tok.Literal, literal.TYPE) { - tok.Keyword = keyword.TYPE - } else if bytes.Equal(tok.Literal, literal.INTERFACE) { - tok.Keyword = keyword.INTERFACE - } else if bytes.Equal(tok.Literal, literal.UNION) { - tok.Keyword = keyword.UNION - } else if bytes.Equal(tok.Literal, literal.ENUM) { - tok.Keyword = keyword.ENUM - } else if bytes.Equal(tok.Literal, literal.INPUT) { - tok.Keyword = keyword.INPUT - } else if bytes.Equal(tok.Literal, literal.DIRECTIVE) { - tok.Keyword = keyword.DIRECTIVE - } else if bytes.Equal(tok.Literal, literal.QUERY) { - tok.Keyword = keyword.QUERY - } else if bytes.Equal(tok.Literal, literal.MUTATION) { - tok.Keyword = keyword.MUTATION - } else if bytes.Equal(tok.Literal, literal.SUBSCRIPTION) { - tok.Keyword = keyword.SUBSCRIPTION - } else if bytes.Equal(tok.Literal, literal.FRAGMENT) { - tok.Keyword = keyword.FRAGMENT - } else { - tok.Keyword = keyword.IDENT - } + tok.Keyword = l.identKeywordFromBytes(tok.Literal) return } -func (l *Lexer) isTerminated(input []byte) bool { - return bytes.HasSuffix(input, literal.SPACE) || - bytes.HasSuffix(input, literal.TAB) || - bytes.HasSuffix(input, literal.LINETERMINATOR) || - bytes.HasSuffix(input, literal.COMMA) || - bytes.HasSuffix(input, literal.EQUALS) || - bytes.HasSuffix(input, literal.COLON) || - bytes.HasSuffix(input, literal.CURLYBRACKETOPEN) || - bytes.HasSuffix(input, literal.CURLYBRACKETCLOSE) || - bytes.HasSuffix(input, literal.BRACKETOPEN) || - bytes.HasSuffix(input, literal.BRACKETCLOSE) || - bytes.HasSuffix(input, literal.SQUAREBRACKETOPEN) || - bytes.HasSuffix(input, literal.SQUAREBRACKETCLOSE) || - bytes.HasSuffix(input, literal.PIPE) || - bytes.HasSuffix(input, literal.BANG) || - bytes.HasSuffix(input, literal.AND) || - bytes.HasSuffix(input, literal.DOLLAR) || - bytes.HasSuffix(input, literal.QUOTE) || - bytes.HasSuffix(input, literal.SLASH) || - bytes.HasSuffix(input, literal.BACKSLASH) || - bytes.HasSuffix(input, literal.AT) -} - -func (l *Lexer) peekEOFSafe(n int) ([]byte, error) { - peeked, err := l.reader.Peek(n) - if err == nil || err == io.EOF { - return peeked, nil - } - - return nil, err -} - -func (l *Lexer) peekIdent2() (done bool, k keyword.Keyword, err error) { +const identWantBytes = 13 - peeked, err := l.peekEOFSafe(3) - if err != nil { - return true, k, err - } - - if len(peeked) == 3 && !l.isTerminated(peeked) { - return false, k, err - } - - if bytes.HasPrefix(peeked, literal.ON) { - return true, keyword.ON, nil - } - - return true, keyword.IDENT, nil -} - -func (l *Lexer) peekIdent4() (done bool, k keyword.Keyword, err error) { - - peeked, err := l.peekEOFSafe(5) - if err != nil { - return true, k, err - } - - if len(peeked) == 5 && !l.isTerminated(peeked) { - return false, k, err - } - - if bytes.HasPrefix(peeked, literal.TRUE) { - return true, keyword.TRUE, nil - } else if bytes.HasPrefix(peeked, literal.FALSE) { - return true, keyword.FALSE, nil - } else if bytes.HasPrefix(peeked, literal.NULL) { - return true, keyword.NULL, nil - } else if bytes.HasPrefix(peeked, literal.TYPE) { - return true, keyword.TYPE, nil - } else if bytes.HasPrefix(peeked, literal.ENUM) { - return true, keyword.ENUM, nil - } - - return true, keyword.IDENT, nil -} - -func (l *Lexer) peekIdent5() (done bool, k keyword.Keyword, err error) { +func (l *Lexer) peekIdent() (k keyword.Keyword, err error) { - peeked, err := l.peekEOFSafe(6) + peeked, err := l.peekEOFSafe(identWantBytes) if err != nil { - return true, k, err - } - - if len(peeked) == 6 && !l.isTerminated(peeked) { - return false, k, err - } - - if bytes.HasPrefix(peeked, literal.FALSE) { - return true, keyword.FALSE, nil - } else if bytes.HasPrefix(peeked, literal.UNION) { - return true, keyword.UNION, nil - } else if bytes.HasPrefix(peeked, literal.INPUT) { - return true, keyword.INPUT, nil - } else if bytes.HasPrefix(peeked, literal.QUERY) { - return true, keyword.QUERY, nil + return k, err } - return true, keyword.IDENT, nil -} - -func (l *Lexer) peekIdent6() (done bool, k keyword.Keyword, err error) { - - peeked, err := l.peekEOFSafe(7) - if err != nil { - return true, k, err - } + nonIdentPosition := bytes.IndexFunc(peeked, func(r rune) bool { + return !l.runeIsIdent(r) + }) - if len(peeked) == 7 && !l.isTerminated(peeked) { - return false, k, err + if l.isUnterminatedIdent(identWantBytes, len(peeked), nonIdentPosition) { + return keyword.IDENT, nil } - if bytes.HasPrefix(peeked, literal.SCHEMA) { - return true, keyword.SCHEMA, nil - } else if bytes.HasPrefix(peeked, literal.SCALAR) { - return true, keyword.SCALAR, nil + if !l.isIndexFuncResultUnsatisfied(nonIdentPosition) { + peeked = peeked[:nonIdentPosition] } - return true, keyword.IDENT, nil + return l.identKeywordFromBytes(peeked), nil } -func (l *Lexer) peekIdent8() (done bool, k keyword.Keyword, err error) { - - peeked, err := l.peekEOFSafe(9) - if err != nil { - return true, k, err - } - - if len(peeked) == 9 && !l.isTerminated(peeked) { - return false, k, err - } - - if bytes.HasPrefix(peeked, literal.MUTATION) { - return true, keyword.MUTATION, nil - } else if bytes.HasPrefix(peeked, literal.FRAGMENT) { - return true, keyword.FRAGMENT, nil - } - - return true, keyword.IDENT, nil +func (l *Lexer) isUnterminatedIdent(nWantBytes, nGotBytes, nonIdentPosition int) bool { + return l.isIndexFuncResultUnsatisfied(nonIdentPosition) && nWantBytes == nGotBytes } -func (l *Lexer) peekIdent9() (done bool, k keyword.Keyword, err error) { - - peeked, err := l.peekEOFSafe(10) - if err != nil { - return true, k, err - } - - if len(peeked) == 10 && !l.isTerminated(peeked) { - return false, k, err - } - - if bytes.HasPrefix(peeked, literal.INTERFACE) { - return true, keyword.INTERFACE, nil - } else if bytes.HasPrefix(peeked, literal.DIRECTIVE) { - return true, keyword.DIRECTIVE, nil - } - - return true, keyword.IDENT, nil +func (l *Lexer) isIndexFuncResultUnsatisfied(result int) bool { + return result == -1 } -func (l *Lexer) peekIdent10() (done bool, k keyword.Keyword, err error) { - - peeked, err := l.peekEOFSafe(11) - if err != nil { - return true, k, err - } - - if len(peeked) == 11 && !l.isTerminated(peeked) { - return false, k, err - } - - if bytes.HasPrefix(peeked, literal.IMPLEMENTS) { - return true, keyword.IMPLEMENTS, nil - } - - return true, keyword.IDENT, nil -} - -func (l *Lexer) peekIdent12() (done bool, k keyword.Keyword, err error) { - - peeked, err := l.peekEOFSafe(13) - if err != nil { - return true, k, err - } - - if len(peeked) == 13 && !l.isTerminated(peeked) { - return false, k, err - } - - if bytes.HasPrefix(peeked, literal.SUBSCRIPTION) { - return true, keyword.SUBSCRIPTION, nil +func (l *Lexer) peekEOFSafe(n int) ([]byte, error) { + peeked, err := l.reader.Peek(n) + if err == nil || err == io.EOF { + return peeked, nil } - return true, keyword.IDENT, nil + return nil, err } -func (l *Lexer) peekIdent() (k keyword.Keyword, err error) { - - if done, k, err := l.peekIdent2(); done { - return k, err - } - - if done, k, err := l.peekIdent4(); done { - return k, err - } - - if done, k, err := l.peekIdent5(); done { - return k, err - } - - if done, k, err := l.peekIdent6(); done { - return k, err - } - - if done, k, err := l.peekIdent8(); done { - return k, err - } - - if done, k, err := l.peekIdent9(); done { - return k, err - } - - if done, k, err := l.peekIdent10(); done { - return k, err - } - - if done, k, err := l.peekIdent12(); done { - return k, err +func (l *Lexer) identKeywordFromBytes(ident []byte) (k keyword.Keyword) { + switch len(ident) { + case 2: + if bytes.Equal(ident, literal.ON) { + k = keyword.ON + return + } + case 4: + if bytes.Equal(ident, literal.TRUE) { + k = keyword.TRUE + return + } else if bytes.Equal(ident, literal.NULL) { + k = keyword.NULL + return + } else if bytes.Equal(ident, literal.TYPE) { + k = keyword.TYPE + return + } else if bytes.Equal(ident, literal.ENUM) { + k = keyword.ENUM + return + } + case 5: + if bytes.Equal(ident, literal.FALSE) { + k = keyword.FALSE + return + } else if bytes.Equal(ident, literal.UNION) { + k = keyword.UNION + return + } else if bytes.Equal(ident, literal.INPUT) { + k = keyword.INPUT + return + } else if bytes.Equal(ident, literal.QUERY) { + k = keyword.QUERY + return + } + case 6: + if bytes.Equal(ident, literal.SCHEMA) { + k = keyword.SCHEMA + return + } else if bytes.Equal(ident, literal.SCALAR) { + k = keyword.SCALAR + return + } + case 8: + if bytes.Equal(ident, literal.MUTATION) { + k = keyword.MUTATION + return + } else if bytes.Equal(ident, literal.FRAGMENT) { + k = keyword.FRAGMENT + return + } + case 9: + if bytes.Equal(ident, literal.INTERFACE) { + k = keyword.INTERFACE + return + } else if bytes.Equal(ident, literal.DIRECTIVE) { + k = keyword.DIRECTIVE + return + } + case 10: + if bytes.Equal(ident, literal.IMPLEMENTS) { + k = keyword.IMPLEMENTS + return + } + case 12: + if bytes.Equal(ident, literal.SUBSCRIPTION) { + k = keyword.SUBSCRIPTION + return + } } - return keyword.IDENT, nil + return keyword.IDENT } func (l *Lexer) readVariable(position position.Position) (tok token.Token, err error) { @@ -616,7 +451,7 @@ func (l *Lexer) readSpread(position position.Position) (tok token.Token, err err tok.Position = position - isSpread, err := l.peekEquals([]rune{runes.DOT, runes.DOT}, true, false) + isSpread, err := l.peekEquals([]byte(".."), true, false) if err != nil { return tok, err } @@ -630,103 +465,21 @@ func (l *Lexer) readSpread(position position.Position) (tok token.Token, err err return } -func (l *Lexer) readString(position position.Position) (tok token.Token, err error) { - - tok.Keyword = keyword.STRING - tok.Position = position +func (l *Lexer) readString(pos position.Position) (tok token.Token, err error) { - isMultiLineString, err := l.peekEquals([]rune{runes.QUOTE, runes.QUOTE}, true, true) + isMultiLineString, err := l.peekEquals([]byte(`""`), true, true) if err != nil { return tok, err } if isMultiLineString { - - var escaped bool - - for { - - nextRune, _, err := l.readRune() - if err != nil { - return tok, err - } - - switch nextRune { - case runes.QUOTE: - if escaped { - l.buffer.WriteRune(nextRune) - escaped = false - } else { - - isMultiLineStringEnd, err := l.peekEquals([]rune{runes.QUOTE, runes.QUOTE}, true, true) - if err != nil { - return tok, err - } - - if !isMultiLineStringEnd { - l.buffer.WriteRune(nextRune) - escaped = false - } else { - tok.Literal = make([]byte, l.buffer.Len()) - copy(tok.Literal, l.buffer.Bytes()) - l.buffer.Reset() - tok.Literal = l.trimStartEnd(tok.Literal, literal.LINETERMINATOR) - return tok, nil - } - } - case runes.BACKSLASH: - if escaped { - l.buffer.WriteRune(nextRune) - escaped = false - } else { - escaped = true - } - default: - l.buffer.WriteRune(nextRune) - escaped = false - } - } - - //tok.Literal, err = l.readWriteUntil([]rune{runes.QUOTE, runes.QUOTE, runes.QUOTE}, true) - //tok.Literal = l.trimStartEnd(tok.Literal, literal.LINETERMINATOR) - return + return l.readMultiLineString(pos) } - var escaped bool - - for { - - nextRune, _, err := l.readRune() - if err != nil { - return tok, err - } - - switch nextRune { - case runes.QUOTE: - if escaped { - l.buffer.WriteRune(nextRune) - escaped = false - } else { - tok.Literal = make([]byte, l.buffer.Len()) - copy(tok.Literal, l.buffer.Bytes()) - l.buffer.Reset() - return tok, nil - } - case runes.BACKSLASH: - if escaped { - l.buffer.WriteRune(nextRune) - escaped = false - } else { - escaped = true - } - default: - l.buffer.WriteRune(nextRune) - escaped = false - } - } + return l.readSingleLineString(pos) } -func (l *Lexer) swallow(amount int) error { +func (l *Lexer) swallowAmount(amount int) error { for i := 0; i < amount; i++ { _, _, err := l.readRune() if err != nil { @@ -737,20 +490,10 @@ func (l *Lexer) swallow(amount int) error { return nil } -func (l *Lexer) peekEquals(equals []rune, swallow, returnErrorOnEOF bool) (bool, error) { - - for _, r := range equals { - _, err := l.equalsBuffer.WriteRune(r) - if err != nil { - return false, err - } - } - - equalBytes := l.equalsBuffer.Bytes() - l.equalsBuffer.Reset() +func (l *Lexer) peekEquals(equals []byte, swallow, returnErrorOnEOF bool) (bool, error) { var matches bool - peeked, err := l.reader.Peek(len(equalBytes)) + peeked, err := l.reader.Peek(len(equals)) if !returnErrorOnEOF && err == io.EOF { return false, nil } @@ -759,9 +502,9 @@ func (l *Lexer) peekEquals(equals []rune, swallow, returnErrorOnEOF bool) (bool, return matches, err } - matches = bytes.Equal(equalBytes, peeked) + matches = bytes.Equal(equals, peeked) if swallow && matches { - err = l.swallow(len(equals)) + err = l.swallowAmount(len(equals)) } return matches, err @@ -776,45 +519,13 @@ func (l *Lexer) readDigit(position position.Position, beginWith rune) (tok token return tok, err } - tok.Keyword = keyword.INTEGER - - var totalMatches int - var r rune - - for { - r, _, err = l.readRune() - if err == io.EOF { - err = nil - break - } else if err != nil { - l.buffer.Reset() - return tok, err - } - - if unicode.IsDigit(r) { - _, err = l.buffer.WriteRune(r) - if err != nil { - l.buffer.Reset() - return tok, err - } - - totalMatches++ - - } else { - err = l.unreadRune() - if err != nil { - l.buffer.Reset() - return tok, err - } - break - } + _, err = l.writeNextDigitsToBuffer() + if err != nil { + l.buffer.Reset() + return tok, err } - tok.Literal = make([]byte, l.buffer.Len()) - copy(tok.Literal, l.buffer.Bytes()) - l.buffer.Reset() - - isFloat, err := l.peekEquals([]rune{runes.DOT}, true, false) + isFloat, err := l.peekEquals([]byte("."), true, false) if err != nil { return tok, err } @@ -823,6 +534,12 @@ func (l *Lexer) readDigit(position position.Position, beginWith rune) (tok token return l.readFloat(position, tok.Literal) } + tok.Keyword = keyword.INTEGER + + tok.Literal = make([]byte, l.buffer.Len()) + copy(tok.Literal, l.buffer.Bytes()) + l.buffer.Reset() + return } @@ -830,19 +547,33 @@ func (l *Lexer) readFloat(position position.Position, integerPart []byte) (tok t tok.Position = position - _, err = l.buffer.Write(integerPart) + _, err = l.buffer.WriteRune(runes.DOT) if err != nil { l.buffer.Reset() return tok, err } - _, err = l.buffer.WriteRune(runes.DOT) + totalMatches, err := l.writeNextDigitsToBuffer() if err != nil { l.buffer.Reset() return tok, err } - var totalMatches int + if totalMatches == 0 { + l.buffer.Reset() + return tok, fmt.Errorf("readFloat: expected float part after '.'") + } + + tok.Keyword = keyword.FLOAT + tok.Literal = make([]byte, l.buffer.Len()) + copy(tok.Literal, l.buffer.Bytes()) + l.buffer.Reset() + + return +} + +func (l *Lexer) writeNextDigitsToBuffer() (totalMatches int, err error) { + var r rune for { @@ -851,15 +582,13 @@ func (l *Lexer) readFloat(position position.Position, integerPart []byte) (tok t err = nil break } else if err != nil { - l.buffer.Reset() - return tok, err + return totalMatches, err } if unicode.IsDigit(r) { _, err = l.buffer.WriteRune(r) if err != nil { - l.buffer.Reset() - return tok, err + return totalMatches, err } totalMatches++ @@ -867,22 +596,12 @@ func (l *Lexer) readFloat(position position.Position, integerPart []byte) (tok t } else { err = l.unreadRune() if err != nil { - l.buffer.Reset() - return tok, err + return totalMatches, err } break } } - if totalMatches == 0 { - l.buffer.Reset() - return tok, fmt.Errorf("readFloat: expected float part after '.'") - } - - tok.Keyword = keyword.FLOAT - tok.Literal = make([]byte, l.buffer.Len()) - copy(tok.Literal, l.buffer.Bytes()) - return } @@ -892,10 +611,17 @@ func (l *Lexer) trimStartEnd(input, trim []byte) []byte { func (l *Lexer) readRune() (r rune, position position.Position, err error) { + if l.reader == nil { + return r, position, fmt.Errorf("readRune: reader must not be nil") + } + position.Line = l.line position.Char = l.char r, size, err := l.reader.ReadRune() + if err != nil { + return r, position, err + } if r == runes.LINETERMINATOR { l.charPositionBeforeLineTerminator = l.char @@ -915,7 +641,7 @@ func (l *Lexer) unreadRune() error { return err } - isLineTerminator, err := l.peekEquals([]rune{runes.LINETERMINATOR}, false, false) + isLineTerminator, err := l.peekEquals([]byte("\n"), false, false) if err != nil { return err } @@ -929,3 +655,119 @@ func (l *Lexer) unreadRune() error { return nil } + +func (l *Lexer) runeIsIdent(r rune) bool { + return unicode.IsLetter(r) || + unicode.IsDigit(r) || + r == runes.NEGATIVESIGN || + r == runes.UNDERSCORE +} + +func (l *Lexer) bytesIsIdent(b []byte) bool { + r, _ := utf8.DecodeRune(b) + return l.runeIsIdent(r) +} + +func (l *Lexer) runeIsWhitespace(r rune) bool { + return r == runes.SPACE || + r == runes.TAB || + r == runes.LINETERMINATOR || + r == runes.COMMA +} + +func (l *Lexer) bytesIsWhitespace(b []byte) bool { + return bytes.Equal(b, literal.SPACE) || + bytes.Equal(b, literal.TAB) || + bytes.Equal(b, literal.LINETERMINATOR) || + bytes.Equal(b, literal.COMMA) +} + +func (l *Lexer) readMultiLineString(pos position.Position) (tok token.Token, err error) { + + tok.Keyword = keyword.STRING + tok.Position = pos + + var escaped bool + + for { + + nextRune, _, err := l.readRune() + if err != nil { + return tok, err + } + + switch nextRune { + case runes.QUOTE: + if escaped { + l.buffer.WriteRune(nextRune) + escaped = false + } else { + + isMultiLineStringEnd, err := l.peekEquals([]byte(`""`), true, true) + if err != nil { + return tok, err + } + + if !isMultiLineStringEnd { + l.buffer.WriteRune(nextRune) + escaped = false + } else { + tok.Literal = make([]byte, l.buffer.Len()) + copy(tok.Literal, l.buffer.Bytes()) + l.buffer.Reset() + tok.Literal = l.trimStartEnd(tok.Literal, literal.LINETERMINATOR) + return tok, nil + } + } + case runes.BACKSLASH: + if escaped { + l.buffer.WriteRune(nextRune) + escaped = false + } else { + escaped = true + } + default: + l.buffer.WriteRune(nextRune) + escaped = false + } + } +} + +func (l *Lexer) readSingleLineString(pos position.Position) (tok token.Token, err error) { + + tok.Keyword = keyword.STRING + tok.Position = pos + + var escaped bool + + for { + + nextRune, _, err := l.readRune() + if err != nil { + return tok, err + } + + switch nextRune { + case runes.QUOTE: + if escaped { + l.buffer.WriteRune(nextRune) + escaped = false + } else { + tok.Literal = make([]byte, l.buffer.Len()) + copy(tok.Literal, l.buffer.Bytes()) + l.buffer.Reset() + return tok, nil + } + case runes.BACKSLASH: + if escaped { + l.buffer.WriteRune(nextRune) + escaped = false + } else { + escaped = true + } + default: + l.buffer.WriteRune(nextRune) + escaped = false + } + } +} diff --git a/pkg/lexer/lexer.test b/pkg/lexer/lexer.test new file mode 100755 index 0000000000..4ea5025486 Binary files /dev/null and b/pkg/lexer/lexer.test differ diff --git a/pkg/lexer/lexer_test.go b/pkg/lexer/lexer_test.go index 94820aa4ee..604e2d10bd 100644 --- a/pkg/lexer/lexer_test.go +++ b/pkg/lexer/lexer_test.go @@ -62,6 +62,73 @@ func TestLexerRegressions(t *testing.T) { } } +var _ = Describe("Lexer.Read", func() { + It("should not panic if reader is nil", func() { + lexer := NewLexer() + f := func() { + _, err := lexer.Read() + Expect(err).To(HaveOccurred()) + } + + Expect(f).ShouldNot(Panic()) + }) + It("should read correctly from reader when re-setting input", func() { + lexer := NewLexer() + lexer.SetInput(bytes.NewReader([]byte("x"))) + _, err := lexer.Read() + Expect(err).NotTo(HaveOccurred()) + + lexer.SetInput(bytes.NewReader([]byte("x"))) + x, err := lexer.Read() + Expect(err).NotTo(HaveOccurred()) + Expect(x).To(Equal(token.Token{ + Keyword: keyword.IDENT, + Literal: []byte("x"), + Position: position.Position{ + Line: 1, + Char: 1, + }, + })) + }) + It("should read eof multiple times correctly", func() { + lexer := NewLexer() + lexer.SetInput(bytes.NewReader([]byte("x"))) + + x, err := lexer.Read() + Expect(err).NotTo(HaveOccurred()) + Expect(x).To(Equal(token.Token{ + Keyword: keyword.IDENT, + Literal: []byte("x"), + Position: position.Position{ + Line: 1, + Char: 1, + }, + })) + + eof1, err := lexer.Read() + Expect(err).NotTo(HaveOccurred()) + Expect(eof1).To(Equal(token.Token{ + Keyword: keyword.EOF, + Literal: []byte("eof"), + Position: position.Position{ + Line: 1, + Char: 2, + }, + })) + + eof2, err := lexer.Read() + Expect(err).NotTo(HaveOccurred()) + Expect(eof2).To(Equal(token.Token{ + Keyword: keyword.EOF, + Literal: []byte("eof"), + Position: position.Position{ + Line: 1, + Char: 2, + }, + })) + }) +}) + var _ = Describe("Lexer.Read", func() { type Case struct { @@ -99,6 +166,17 @@ var _ = Describe("Lexer.Read", func() { }, }, }), + Entry("should read integer with comma at the end", Case{ + in: []byte("1337,"), + out: token.Token{ + Keyword: keyword.INTEGER, + Literal: []byte("1337"), + Position: position.Position{ + Line: 1, + Char: 1, + }, + }, + }), Entry("should read float", Case{ in: []byte("13.37"), out: token.Token{ @@ -717,6 +795,20 @@ var _ = Describe("Lexer.Peek()", func() { }, }), }), + Entry("should peek ON with whitespace behind", Case{ + input: []byte("on "), + expectKey: Equal(keyword.ON), + expectErr: BeNil(), + expectNextTokenErr: BeNil(), + expectNextToken: Equal(token.Token{ + Keyword: keyword.ON, + Literal: []byte("on"), + Position: position.Position{ + Line: 1, + Char: 1, + }, + }), + }), Entry("should peek ignore comma", Case{ input: []byte(","), expectKey: Equal(keyword.EOF), @@ -1115,6 +1207,20 @@ var _ = Describe("Lexer.peekIsFloat", func() { ) }) +func BenchmarkPeekIsFloat(b *testing.B) { + input := bytes.NewReader([]byte("13373737.37")) + lexer := NewLexer() + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + input.Seek(0, io.SeekStart) + lexer.SetInput(input) + lexer.peekIsFloat() + } +} + var _ = Describe("Lexer.Read", func() { type Case struct { @@ -1378,6 +1484,35 @@ baz`), }, }, }), + Entry("should read '1,2,3' as three integers", Case{ + in: []byte("1,2,3"), + out: []token.Token{ + { + Keyword: keyword.INTEGER, + Literal: []byte("1"), + Position: position.Position{ + Line: 1, + Char: 1, + }, + }, + { + Keyword: keyword.INTEGER, + Literal: []byte("2"), + Position: position.Position{ + Line: 1, + Char: 3, + }, + }, + { + Keyword: keyword.INTEGER, + Literal: []byte("3"), + Position: position.Position{ + Line: 1, + Char: 5, + }, + }, + }, + }), ) }) diff --git a/pkg/lexer/memprofile.out b/pkg/lexer/memprofile.out new file mode 100644 index 0000000000..982463e78b Binary files /dev/null and b/pkg/lexer/memprofile.out differ diff --git a/pkg/lexer/profile.out b/pkg/lexer/profile.out new file mode 100644 index 0000000000..f53487f3d9 Binary files /dev/null and b/pkg/lexer/profile.out differ diff --git a/pkg/parser/memprofile.out b/pkg/parser/memprofile.out index 568ce40c82..c6f1b6e96b 100644 Binary files a/pkg/parser/memprofile.out and b/pkg/parser/memprofile.out differ diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index 8aafefbc80..ad4c2dff52 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -32,7 +32,8 @@ func (e errInvalidType) Error() string { // Parser holds the lexer and a buffer for writing literals type Parser struct { - l Lexer + l Lexer + selectionSetBuffers []document.SelectionSet } // Lexer is the interface used by the Parser to lex tokens @@ -45,7 +46,8 @@ type Lexer interface { // NewParser returns a new parser using a buffered runestringer func NewParser() *Parser { return &Parser{ - l: lexer.NewLexer(), + l: lexer.NewLexer(), + selectionSetBuffers: make([]document.SelectionSet, 10), } } @@ -88,3 +90,21 @@ func (p *Parser) peekExpect(expected keyword.Keyword, swallow bool) (matched boo return } + +func (p *Parser) getSelectionSetBuffer() *document.SelectionSet { + + var s document.SelectionSet + + if len(p.selectionSetBuffers) == 0 { + s = make(document.SelectionSet, 10) + } else { + s, p.selectionSetBuffers = p.selectionSetBuffers[0], p.selectionSetBuffers[1:] + s = s[:0] + } + + return &s +} + +func (p *Parser) putSelectionSet(set *document.SelectionSet) { + p.selectionSetBuffers = append(p.selectionSetBuffers, *set) +} diff --git a/pkg/parser/parser.test b/pkg/parser/parser.test index f32026a8df..f347a08606 100755 Binary files a/pkg/parser/parser.test and b/pkg/parser/parser.test differ diff --git a/pkg/parser/profile.out b/pkg/parser/profile.out index b19ca1a0d3..f1d17202c7 100644 Binary files a/pkg/parser/profile.out and b/pkg/parser/profile.out differ diff --git a/pkg/parser/selectionset_parser.go b/pkg/parser/selectionset_parser.go index 99b81268cd..4435d0564e 100644 --- a/pkg/parser/selectionset_parser.go +++ b/pkg/parser/selectionset_parser.go @@ -16,24 +16,34 @@ func (p *Parser) parseSelectionSet() (selectionSet document.SelectionSet, err er return } + buffer := p.getSelectionSetBuffer() + for { next, err := p.l.Peek(true) if err != nil { + p.putSelectionSet(buffer) return selectionSet, err } if next == keyword.CURLYBRACKETCLOSE { _, err = p.l.Read() + + selectionSet = make(document.SelectionSet, len(*buffer)) + copy(selectionSet, *buffer) + + p.putSelectionSet(buffer) + return selectionSet, err } selection, err := p.parseSelection() if err != nil { + p.putSelectionSet(buffer) return selectionSet, err } - selectionSet = append(selectionSet, selection) + *buffer = append(*buffer, selection) } }