fix: raw block not ignoring {% characters, fixes #263

harttle · Dec 7, 2020 · a492d8e · a492d8e
1 parent c8afa39
commit a492d8e
Show file tree

Hide file tree

Showing 24 changed files with 153 additions and 81 deletions.
diff --git a/bin/character-gen.js b/bin/character-gen.js
@@ -4,7 +4,7 @@ const isQuote = c => c === '"' || c === "'"
 const isOperator = c => '!=<>'.includes(c)
 const isNumber = c => c >= '0' && c <= '9'
 const isCharacter = c => (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-const isVariable = c => '_-?'.includes(c) || isCharacter(c) || isNumber(c)
+const isIdentifier = c => '_-?'.includes(c) || isCharacter(c) || isNumber(c)
 const isBlank = c => c === '\n' || c === '\t' || c === ' ' || c === '\r' || c === '\v' || c === '\f'
 const isInlineBlank = c => c === '\t' || c === ' ' || c === '\r'
 const isSign = c => c === '-' || c === '+'
@@ -15,7 +15,7 @@ const types = []
 for (let i = 0; i < 128; i++) {
   const c = String.fromCharCode(i)
   let n = 0
-  if (isVariable(c)) n |= 1
+  if (isIdentifier(c)) n |= 1
   if (isOperator(c)) n |= 2
   if (isBlank(c)) n |= 4
   if (isQuote(c)) n |= 8
@@ -31,7 +31,7 @@ console.log(`
 // This file is generated by bin/character-gen.js
 // bitmask character types to boost performance
 export const TYPES = [${types.join(', ')}]
-export const VARIABLE = 1
+export const IDENTIFIER = 1
 export const OPERATOR = 2
 export const BLANK = 4
 export const QUOTE = 8

diff --git a/src/builtin/tags/assign.ts b/src/builtin/tags/assign.ts
@@ -3,7 +3,7 @@ import { Tokenizer, assert, TagImplOptions, TagToken, Context } from '../../type
 export default {
   parse: function (token: TagToken) {
     const tokenizer = new Tokenizer(token.args)
-    this.key = tokenizer.readWord().content
+    this.key = tokenizer.readIdentifier().content
     tokenizer.skipBlank()
     assert(tokenizer.peek() === '=', () => `illegal token ${token.getText()}`)
     tokenizer.advance()

diff --git a/src/builtin/tags/capture.ts b/src/builtin/tags/capture.ts
@@ -25,7 +25,7 @@ export default {
 } as TagImplOptions
 
 function readVariableName (tokenizer: Tokenizer) {
-  const word = tokenizer.readWord().content
+  const word = tokenizer.readIdentifier().content
   if (word) return word
   const quoted = tokenizer.readQuoted()
   if (quoted) return evalQuotedToken(quoted)

diff --git a/src/builtin/tags/decrement.ts b/src/builtin/tags/decrement.ts
@@ -4,7 +4,7 @@ import { isNumber, stringify } from '../../util/underscore'
 export default {
   parse: function (token: TagToken) {
     const tokenizer = new Tokenizer(token.args)
-    this.variable = tokenizer.readWord().content
+    this.variable = tokenizer.readIdentifier().content
   },
   render: function (context: Context, emitter: Emitter) {
     const scope = context.environments

diff --git a/src/builtin/tags/for.ts b/src/builtin/tags/for.ts
@@ -8,8 +8,8 @@ export default {
   parse: function (token: TagToken, remainTokens: TopLevelToken[]) {
     const toknenizer = new Tokenizer(token.args)
 
-    const variable = toknenizer.readWord()
-    const inStr = toknenizer.readWord()
+    const variable = toknenizer.readIdentifier()
+    const inStr = toknenizer.readIdentifier()
     const collection = toknenizer.readValue()
     assert(
       variable.size() && inStr.content === 'in' && collection,

diff --git a/src/builtin/tags/include.ts b/src/builtin/tags/include.ts
@@ -11,7 +11,7 @@ export default {
     assert(this.file, () => `illegal argument "${token.args}"`)
 
     const begin = tokenizer.p
-    const withStr = tokenizer.readWord()
+    const withStr = tokenizer.readIdentifier()
     if (withStr.content === 'with') {
       tokenizer.skipBlank()
       if (tokenizer.peek() !== ':') {

diff --git a/src/builtin/tags/increment.ts b/src/builtin/tags/increment.ts
@@ -4,7 +4,7 @@ import { Tokenizer, Emitter, TagToken, Context, TagImplOptions } from '../../typ
 export default {
   parse: function (token: TagToken) {
     const tokenizer = new Tokenizer(token.args)
-    this.variable = tokenizer.readWord().content
+    this.variable = tokenizer.readIdentifier().content
   },
   render: function (context: Context, emitter: Emitter) {
     const scope = context.environments

diff --git a/src/builtin/tags/render.ts b/src/builtin/tags/render.ts
@@ -15,16 +15,16 @@ export default {
     while (!tokenizer.end()) {
       tokenizer.skipBlank()
       const begin = tokenizer.p
-      const keyword = tokenizer.readWord()
+      const keyword = tokenizer.readIdentifier()
       if (keyword.content === 'with' || keyword.content === 'for') {
         tokenizer.skipBlank()
         if (tokenizer.peek() !== ':') {
           const value = tokenizer.readValue()
           if (value) {
             const beforeAs = tokenizer.p
-            const asStr = tokenizer.readWord()
+            const asStr = tokenizer.readIdentifier()
             let alias
-            if (asStr.content === 'as') alias = tokenizer.readWord()
+            if (asStr.content === 'as') alias = tokenizer.readIdentifier()
             else tokenizer.p = beforeAs
 
             this[keyword.content] = { value, alias: alias && alias.content }

diff --git a/src/builtin/tags/tablerow.ts b/src/builtin/tags/tablerow.ts
@@ -7,10 +7,10 @@ export default {
   parse: function (tagToken: TagToken, remainTokens: TopLevelToken[]) {
     const tokenizer = new Tokenizer(tagToken.args)
 
-    this.variable = tokenizer.readWord()
+    this.variable = tokenizer.readIdentifier()
     tokenizer.skipBlank()
 
-    const tmp = tokenizer.readWord()
+    const tmp = tokenizer.readIdentifier()
     assert(tmp && tmp.content === 'in', () => `illegal tag: ${tagToken.getText()}`)
 
     this.collection = tokenizer.readValue()

diff --git a/src/parser/match-operator.ts b/src/parser/match-operator.ts
@@ -1,4 +1,4 @@
-import { VARIABLE } from '../util/character'
+import { IDENTIFIER } from '../util/character'
 
 const trie = {
   a: { n: { d: { end: true, needBoundary: true } } },
@@ -19,6 +19,6 @@ export function matchOperator (str: string, begin: number, end = str.length) {
     if (node['end']) info = node
   }
   if (!info) return -1
-  if (info['needBoundary'] && str.charCodeAt(i) & VARIABLE) return -1
+  if (info['needBoundary'] && str.charCodeAt(i) & IDENTIFIER) return -1
   return i
 }
diff --git a/src/parser/tokenizer.ts b/src/parser/tokenizer.ts
@@ -1,6 +1,6 @@
 import { whiteSpaceCtrl } from './whitespace-ctrl'
 import { NumberToken } from '../tokens/number-token'
-import { WordToken } from '../tokens/word-token'
+import { IdentifierToken } from '../tokens/identifier-token'
 import { literalValues } from '../util/literal'
 import { LiteralToken } from '../tokens/literal-token'
 import { OperatorToken } from '../tokens/operator-token'
@@ -20,12 +20,14 @@ import { ValueToken } from '../tokens/value-token'
 import { OutputToken } from '../tokens/output-token'
 import { TokenizationError } from '../util/error'
 import { NormalizedFullOptions, defaultOptions } from '../liquid-options'
-import { TYPES, QUOTE, BLANK, VARIABLE } from '../util/character'
+import { TYPES, QUOTE, BLANK, IDENTIFIER } from '../util/character'
 import { matchOperator } from './match-operator'
 
 export class Tokenizer {
   p = 0
   N: number
+  private rawBeginAt = -1
+
   constructor (
     private input: string,
     private file: string = ''
@@ -70,7 +72,7 @@ export class Tokenizer {
     assert(this.peek() === '|', () => `unexpected token at ${this.snapshot()}`)
     this.p++
     const begin = this.p
-    const name = this.readWord()
+    const name = this.readIdentifier()
     if (!name.size()) return null
     const args = []
     this.skipBlank()
@@ -107,17 +109,18 @@ export class Tokenizer {
 
   readTopLevelToken (options: NormalizedFullOptions): TopLevelToken {
     const { tagDelimiterLeft, outputDelimiterLeft } = options
-    if (this.matchWord(tagDelimiterLeft)) return this.readTagToken(options)
-    if (this.matchWord(outputDelimiterLeft)) return this.readOutputToken(options)
+    if (this.rawBeginAt > -1) return this.readEndrawOrRawContent(options)
+    if (this.match(tagDelimiterLeft)) return this.readTagToken(options)
+    if (this.match(outputDelimiterLeft)) return this.readOutputToken(options)
     return this.readHTMLToken(options)
   }
 
   readHTMLToken (options: NormalizedFullOptions): HTMLToken {
     const begin = this.p
     while (this.p < this.N) {
       const { tagDelimiterLeft, outputDelimiterLeft } = options
-      if (this.matchWord(tagDelimiterLeft)) break
-      if (this.matchWord(outputDelimiterLeft)) break
+      if (this.match(tagDelimiterLeft)) break
+      if (this.match(outputDelimiterLeft)) break
       ++this.p
     }
     return new HTMLToken(this.input, begin, this.p, this.file)
@@ -128,34 +131,71 @@ export class Tokenizer {
     const { tagDelimiterRight } = options
     const begin = this.p
     if (this.readTo(tagDelimiterRight) === -1) {
-      this.mkError(`tag ${this.snapshot(begin)} not closed`, begin)
+      throw this.mkError(`tag ${this.snapshot(begin)} not closed`, begin)
     }
-    return new TagToken(input, begin, this.p, options, file)
+    const token = new TagToken(input, begin, this.p, options, file)
+    if (token.name === 'raw') this.rawBeginAt = begin
+    return token
   }
 
   readOutputToken (options: NormalizedFullOptions): OutputToken {
     const { file, input } = this
     const { outputDelimiterRight } = options
     const begin = this.p
     if (this.readTo(outputDelimiterRight) === -1) {
-      this.mkError(`output ${this.snapshot(begin)} not closed`, begin)
+      throw this.mkError(`output ${this.snapshot(begin)} not closed`, begin)
     }
     return new OutputToken(input, begin, this.p, options, file)
   }
 
+  readEndrawOrRawContent (options: NormalizedFullOptions): HTMLToken | TagToken {
+    const { tagDelimiterLeft, tagDelimiterRight } = options
+    const begin = this.p
+    let leftPos = this.readTo(tagDelimiterLeft) - tagDelimiterLeft.length
+    while (this.p < this.N) {
+      if (this.readIdentifier().getText() !== 'endraw') {
+        leftPos = this.readTo(tagDelimiterLeft) - tagDelimiterLeft.length
+        continue
+      }
+      while (this.p <= this.N) {
+        if (this.rmatch(tagDelimiterRight)) {
+          const end = this.p
+          if (begin === leftPos) {
+            this.rawBeginAt = -1
+            return new TagToken(this.input, begin, end, options, this.file)
+          } else {
+            this.p = leftPos
+            return new HTMLToken(this.input, begin, leftPos, this.file)
+          }
+        }
+        if (this.rmatch(tagDelimiterLeft)) break
+        this.p++
+      }
+    }
+    throw this.mkError(`raw ${this.snapshot(this.rawBeginAt)} not closed`, begin)
+  }
+
   mkError (msg: string, begin: number) {
-    throw new TokenizationError(msg, new WordToken(this.input, begin, this.N, this.file))
+    return new TokenizationError(msg, new IdentifierToken(this.input, begin, this.N, this.file))
   }
 
   snapshot (begin: number = this.p) {
     return JSON.stringify(ellipsis(this.input.slice(begin), 16))
   }
 
-  readWord (): WordToken { // rename to identifier
+  /**
+   * @deprecated
+   */
+  readWord () {
+    console.warn('Tokenizer#readWord() will be removed, use #readIdentifier instead')
+    return this.readIdentifier()
+  }
+
+  readIdentifier (): IdentifierToken {
     this.skipBlank()
     const begin = this.p
-    while (this.peekType() & VARIABLE) ++this.p
-    return new WordToken(this.input, begin, this.p, this.file)
+    while (this.peekType() & IDENTIFIER) ++this.p
+    return new IdentifierToken(this.input, begin, this.p, this.file)
   }
 
   readHashes () {
@@ -171,7 +211,7 @@ export class Tokenizer {
     this.skipBlank()
     if (this.peek() === ',') ++this.p
     const begin = this.p
-    const name = this.readWord()
+    const name = this.readIdentifier()
     if (!name.size()) return
     let value
 
@@ -198,7 +238,7 @@ export class Tokenizer {
   readTo (end: string): number {
     while (this.p < this.N) {
       ++this.p
-      if (this.reverseMatchWord(end)) return this.p
+      if (this.rmatch(end)) return this.p
     }
     return -1
   }
@@ -216,21 +256,21 @@ export class Tokenizer {
       return new PropertyAccessToken(prop, [], this.p)
     }
 
-    const variable = this.readWord()
+    const variable = this.readIdentifier()
     if (!variable.size()) return
 
     let isNumber = variable.isNumber(true)
-    const props: (QuotedToken | WordToken)[] = []
+    const props: (QuotedToken | IdentifierToken)[] = []
     while (true) {
       if (this.peek() === '[') {
         isNumber = false
         this.p++
-        const prop = this.readValue() || new WordToken(this.input, this.p, this.p, this.file)
+        const prop = this.readValue() || new IdentifierToken(this.input, this.p, this.p, this.file)
         this.readTo(']')
         props.push(prop)
       } else if (this.peek() === '.' && this.peek(1) !== '.') { // skip range syntax
         this.p++
-        const prop = this.readWord()
+        const prop = this.readIdentifier()
         if (!prop.size()) break
         if (!prop.isNumber()) isNumber = false
         props.push(prop)
@@ -239,7 +279,7 @@ export class Tokenizer {
     if (!props.length && literalValues.hasOwnProperty(variable.content)) {
       return new LiteralToken(this.input, variable.begin, variable.end, this.file)
     }
-    if (isNumber) return new NumberToken(variable, props[0] as WordToken)
+    if (isNumber) return new NumberToken(variable, props[0] as IdentifierToken)
     return new PropertyAccessToken(variable, props, this.p)
   }
 
@@ -276,22 +316,22 @@ export class Tokenizer {
     return new QuotedToken(this.input, begin, this.p, this.file)
   }
 
-  readFileName (): WordToken {
+  readFileName (): IdentifierToken {
     const begin = this.p
     while (!(this.peekType() & BLANK) && this.peek() !== ',' && this.p < this.N) this.p++
-    return new WordToken(this.input, begin, this.p, this.file)
+    return new IdentifierToken(this.input, begin, this.p, this.file)
   }
 
-  matchWord (word: string) {
+  match (word: string) {
     for (let i = 0; i < word.length; i++) {
       if (word[i] !== this.input[this.p + i]) return false
     }
     return true
   }
 
-  reverseMatchWord (word: string) {
-    for (let i = 0; i < word.length; i++) {
-      if (word[word.length - 1 - i] !== this.input[this.p - 1 - i]) return false
+  rmatch (pattern: string) {
+    for (let i = 0; i < pattern.length; i++) {
+      if (pattern[pattern.length - 1 - i] !== this.input[this.p - 1 - i]) return false
     }
     return true
   }

diff --git a/src/parser/whitespace-ctrl.ts b/src/parser/whitespace-ctrl.ts
@@ -4,7 +4,6 @@ import { NormalizedFullOptions } from '../liquid-options'
 import { TYPES, INLINE_BLANK, BLANK } from '../util/character'
 
 export function whiteSpaceCtrl (tokens: Token[], options: NormalizedFullOptions) {
-  options = { greedy: true, ...options }
   let inRaw = false
 
   for (let i = 0; i < tokens.length; i++) {

diff --git a/src/template/tag/tag.ts b/src/template/tag/tag.ts
@@ -1,13 +1,12 @@
 import { isFunction } from '../../util/underscore'
 import { Liquid } from '../../liquid'
 import { TemplateImpl } from '../../template/template-impl'
-import { Emitter, Hash, Context, TagImplOptions, TagToken, Template, TopLevelToken } from '../../types'
+import { Emitter, Hash, Context, TagToken, Template, TopLevelToken } from '../../types'
 import { TagImpl } from './tag-impl'
 
 export class Tag extends TemplateImpl<TagToken> implements Template {
   public name: string
   private impl: TagImpl
-  private static impls: { [key: string]: TagImplOptions } = {}
 
   public constructor (token: TagToken, tokens: TopLevelToken[], liquid: Liquid) {
     super(token)

diff --git a/src/tokens/hash-token.ts b/src/tokens/hash-token.ts
@@ -1,14 +1,14 @@
 import { Token } from './token'
 import { ValueToken } from './value-token'
-import { WordToken } from './word-token'
+import { IdentifierToken } from './identifier-token'
 import { TokenKind } from '../parser/token-kind'
 
 export class HashToken extends Token {
   constructor (
     public input: string,
     public begin: number,
     public end: number,
-    public name: WordToken,
+    public name: IdentifierToken,
     public value?: ValueToken,
     public file?: string
   ) {

diff --git a/src/tokens/word-token.ts → src/tokens/identifier-token.ts b/src/tokens/word-token.ts → src/tokens/identifier-token.ts
@@ -2,8 +2,7 @@ import { Token } from './token'
 import { NUMBER, TYPES, SIGN } from '../util/character'
 import { TokenKind } from '../parser/token-kind'
 
-// a word can be an identifier, a number, a keyword or a single-word-literal
-export class WordToken extends Token {
+export class IdentifierToken extends Token {
   public content: string
   constructor (
     public input: string,