From e3f26ee53faf804d50045b2d6f7684d045f97e90 Mon Sep 17 00:00:00 2001 From: Eric Knibbe Date: Tue, 10 Sep 2019 15:03:45 -0400 Subject: [PATCH] Rewrite large portion of Liquid lexer (#1327) This commit is an extensive updating of the Liquid lexer. It does the following: - supports negative numbers; - supports `tablerow` block; - supports reversed flag in for-loops; - supports whitespace control dashes in tags; - supports hyphens in variable names; - supports `first`, `last` and `size` dot notation; - supports `echo` and `render` tags; - supports objects within an `include` tag; - tokenises `empty` and `blank` aliases as `Name::Builtin`; - tokenises `capture`'s argument as a `Name::Variable`; - tokenises `include`'s filename argument as `Name::Other`; - tokenises `with`, `for` and `in` as `Name::Tag`; - tokenises `nil` as `Keyword::Constant`; - tokenises `increment` and `decrement` as `Name::Tag`; - tokenises for-loop objects as `Name::Builtin`; - stops treating `not` and `!` as operators; and - requires a filter name to follow a pipe. In addition, this commit refactors the lexer and removes several redundant lines and states. --- lib/rouge/demos/liquid | 1 - lib/rouge/lexers/liquid.rb | 190 ++++++++++++++++--------------------- spec/visual/samples/liquid | 49 +++++----- 3 files changed, 108 insertions(+), 132 deletions(-) diff --git a/lib/rouge/demos/liquid b/lib/rouge/demos/liquid index ec57021b3c..19ed3af281 100644 --- a/lib/rouge/demos/liquid +++ b/lib/rouge/demos/liquid @@ -5,7 +5,6 @@ Only {{ product.price | format_as_money }}

{{ product.description | prettyprint | truncate: 200 }}

- {% endfor %} diff --git a/lib/rouge/lexers/liquid.rb b/lib/rouge/lexers/liquid.rb index d2fdac9398..caab00d028 100644 --- a/lib/rouge/lexers/liquid.rb +++ b/lib/rouge/lexers/liquid.rb @@ -12,12 +12,12 @@ class Liquid < RegexLexer state :root do rule %r/[^\{]+/, Text - rule %r/(\{%)(\s*)/ do + rule %r/(\{%-?)(\s*)/ do groups Punctuation, Text::Whitespace push :tag_or_block end - rule %r/(\{\{)(\s*)/ do + rule %r/(\{\{-?)(\s*)/ do groups Punctuation, Text::Whitespace push :output end @@ -27,78 +27,70 @@ class Liquid < RegexLexer state :tag_or_block do # builtin logic blocks - rule %r/(if|unless|elsif|case)(?=\s+)/, Keyword::Reserved, :condition + rule %r/(if|elsif|unless|case)\b/, Keyword::Reserved, :condition + rule %r/(when)\b/, Keyword::Reserved, :when - rule %r/(when)(\s+)/ do - groups Keyword::Reserved, Text::Whitespace - push :when - end - - rule %r/(else)(\s*)(%\})/ do + rule %r/(else)(\s*)(-?%\})/ do groups Keyword::Reserved, Text::Whitespace, Punctuation pop! end # other builtin blocks - rule %r/(capture)(\s+)([^\s%]+)(\s*)(%\})/ do - groups Name::Tag, Text::Whitespace, Name::Attribute, Text::Whitespace, Punctuation + rule %r/(capture|(?:in|de)crement)(\s+)([^\s%]+)(\s*)(-?%\})/ do + groups Name::Tag, Text::Whitespace, Name::Variable, Text::Whitespace, Punctuation pop! end - rule %r/(comment)(\s*)(%\})/ do + rule %r/(comment)(\s*)(-?%\})/ do groups Name::Tag, Text::Whitespace, Punctuation push :comment end - rule %r/(raw)(\s*)(%\})/ do + rule %r/(raw)(\s*)(-?%\})/ do groups Name::Tag, Text::Whitespace, Punctuation push :raw end - rule %r/assign/, Name::Tag, :assign - rule %r/include/, Name::Tag, :include - # end of block - rule %r/(end(?:case|unless|if))(\s*)(%\})/ do + rule %r/(end(?:if|unless|case))(\s*)(-?%\})/ do groups Keyword::Reserved, Text::Whitespace, Punctuation pop! end - rule %r/(end(?:[^\s%]+))(\s*)(%\})/ do + rule %r/(end(?:[^\s%]+))(\s*)(-?%\})/ do groups Name::Tag, Text::Whitespace, Punctuation pop! end # builtin tags - rule %r/(cycle)(\s+)(([^\s:]*)(:))?(\s*)/ do |m| - token Name::Tag, m[1] - token Text::Whitespace, m[2] + rule %r/(assign|echo)\b/, Name::Tag, :assign + rule %r/(include|render)\b/, Name::Tag, :include - if m[4] =~ /'[^']*'/ - token Str::Single, m[4] - elsif m[4] =~ /"[^"]*"/ - token Str::Double, m[4] - else - token Name::Attribute, m[4] - end + rule %r/(cycle)(\s+)(?:([^\s:]*)(\s*)(:))?(\s*)/ do |m| + token_class = case m[3] + when %r/'[^']*'/ then Str::Single + when %r/"[^"]*"/ then Str::Double + else + Name::Attribute + end - token Punctuation, m[5] - token Text::Whitespace, m[6] + groups Name::Tag, Text::Whitespace, token_class, + Text::Whitespace, Punctuation, Text::Whitespace push :variable_tag_markup end # iteration rule %r/ - (for)(\s+) + (for|tablerow)(\s+) ([\w-]+)(\s+) (in)(\s+) ( - (?: [^\s,\|'"] | (?:"[^"]*"|'[^']*') )+ + (?: [^\s%,\|'"] | (?:"[^"]*"|'[^']*') )+ )(\s*) /x do |m| groups Name::Tag, Text::Whitespace, Name::Variable, Text::Whitespace, - Keyword::Reserved, Text::Whitespace + Name::Tag, Text::Whitespace token_class = case m[7] when %r/'[^']*'/ then Str::Single @@ -119,105 +111,76 @@ class Liquid < RegexLexer end state :output do - mixin :whitespace - mixin :generic + rule %r/(\|)(\s*)([a-zA-Z_][^\s}\|:]*)/ do + groups Punctuation, Text::Whitespace, Name::Function + push :filters + end - rule %r/\}\}/, Punctuation, :pop! - rule %r/\|/, Punctuation, :filters + mixin :end_of_tag + mixin :generic end state :filters do - mixin :whitespace - - rule(/\}\}/) { token Punctuation; reset_stack } - - rule %r/([^\s\|:]+)(:?)(\s*)/ do - groups Name::Function, Punctuation, Text::Whitespace - push :filter_markup + rule %r/(\|)(\s*)([a-zA-Z_][^\s%}\|:]*)/ do + groups Punctuation, Text::Whitespace, Name::Function end - end - - state :filter_markup do - rule %r/\|/, Punctuation, :pop! mixin :end_of_tag mixin :end_of_block - mixin :default_param_markup + mixin :variable_param_markup end state :condition do - mixin :end_of_block - mixin :whitespace - - rule %r/([=!><]=?)/, Operator - - rule %r/\b(?:(!)|(not\b))/ do - groups Operator, Operator::Word - end - - rule %r/(contains)/, Operator::Word + rule %r/([=!]=|[<>]=?)/, Operator + rule %r/(and|or|contains)\b/, Operator::Word + mixin :end_of_block mixin :generic - mixin :whitespace end state :when do mixin :end_of_block - mixin :whitespace mixin :generic end - state :operator do - rule %r/(\s*)((?:=|!|>|<)=?)(\s*)/ do - groups Text::Whitespace, Operator, Text::Whitespace - pop! - end - - rule %r/(\s*)(\bcontains\b)(\s*)/ do - groups Text::Whitespace, Operator::Word, Text::Whitespace - pop! - end - end - state :end_of_tag do - rule(/\}\}/) { token Punctuation; reset_stack } + rule(/-?\}\}/) { token Punctuation; reset_stack } end state :end_of_block do - rule(/%\}/) { token Punctuation; reset_stack } + rule(/-?%\}/) { token Punctuation; reset_stack } end # states for unknown markup state :param_markup do mixin :whitespace + mixin :keyword mixin :string + mixin :number rule %r/([^\s=:]+)(\s*)(=|:)/ do groups Name::Attribute, Text::Whitespace, Operator end - rule %r/(\{\{)(\s*)([^\s\}])(\s*)(\}\})/ do - groups Punctuation, Text::Whitespace, Text, Text::Whitespace, Punctuation - end - - mixin :number - mixin :keyword - - rule %r/,/, Punctuation + rule %r/[,:]/, Punctuation end state :default_param_markup do mixin :param_markup - rule %r/./, Text + + rule %r/\S+/, Text end state :variable_param_markup do mixin :param_markup mixin :variable - rule %r/./, Text + + rule %r/\S+/, Text end state :tag_markup do + rule %r/(reversed)\b/, Name::Attribute + mixin :end_of_block mixin :default_param_markup end @@ -229,12 +192,14 @@ class Liquid < RegexLexer # states for different values types state :keyword do - rule %r/\b(false|true)\b/, Keyword::Constant + rule %r/(false|true|nil)\b/, Keyword::Constant end state :variable do - rule %r/\.(?=\w)/, Punctuation - rule %r/[a-zA-Z_]\w*\??/, Name::Variable + rule %r/(empty|blank|forloop\.[^\s%}\|:]+)\b/, Name::Builtin + rule %r/\.(?=\w)|\[|\]/, Punctuation + rule %r/(first|last|size)\b/, Name::Function + rule %r/[a-zA-Z_][\w-]*\??/, Name::Variable end state :string do @@ -243,21 +208,17 @@ class Liquid < RegexLexer end state :number do + rule %r/-/, Operator rule %r/\d+\.\d+/, Num::Float rule %r/\d+/, Num::Integer end - state :array_index do - rule %r/\[/, Punctuation - rule %r/\]/, Punctuation - end - state :generic do - mixin :array_index + mixin :whitespace mixin :keyword mixin :string - mixin :variable mixin :number + mixin :variable end state :whitespace do @@ -265,18 +226,20 @@ class Liquid < RegexLexer end state :comment do - rule %r/(\{%)(\s*)(endcomment)(\s*)(%\})/ do + rule %r/[^\{]+/, Comment + + rule %r/(\{%-?)(\s*)(endcomment)(\s*)(-?%\})/ do groups Punctuation, Text::Whitespace, Name::Tag, Text::Whitespace, Punctuation reset_stack end - rule %r/./, Comment + rule %r/\{/, Comment end state :raw do rule %r/[^\{]+/, Text - rule %r/(\{%)(\s*)(endraw)(\s*)(%\})/ do + rule %r/(\{%-?)(\s*)(endraw)(\s*)(-?%\})/ do groups Punctuation, Text::Whitespace, Name::Tag, Text::Whitespace, Punctuation reset_stack end @@ -285,27 +248,38 @@ class Liquid < RegexLexer end state :assign do - mixin :whitespace - mixin :end_of_block + rule %r/=/, Operator - rule %r/(\s*)(=)(\s*)/ do - groups Text::Whitespace, Operator, Text::Whitespace + rule %r/(\|)(\s*)([a-zA-Z_][^\s%\|:]*)/ do + groups Punctuation, Text::Whitespace, Name::Function + push :filters end - rule %r/\|/, Punctuation, :filters - + mixin :end_of_block mixin :generic end state :include do - mixin :whitespace - - rule %r/([^\.]+)(\.)(html|liquid)/ do - groups Name::Attribute, Punctuation, Name::Attribute + rule %r/(\{\{-?)(\s*)/ do + groups Punctuation, Text::Whitespace + push :output_embed end + rule %r/(with|for)\b/, Name::Tag + rule %r/[^\s\.]+(\.[^\s\.]+)+\b/, Name::Other + mixin :variable_tag_markup end + + state :output_embed do + rule %r/(\|)(\s*)([a-zA-Z_][^\s}\|:]*)/ do + groups Punctuation, Text::Whitespace, Name::Function + end + + rule %r/-?\}\}/, Punctuation, :pop! + + mixin :variable_param_markup + end end end end diff --git a/spec/visual/samples/liquid b/spec/visual/samples/liquid index d141d0401b..3c888f45ce 100644 --- a/spec/visual/samples/liquid +++ b/spec/visual/samples/liquid @@ -10,10 +10,10 @@ Just regular text - what happens? {% comment %} My lovely {{comment}} that is split - accross multiple lines {% comment %} + across multiple lines {% comment %} {% endcomment %} -{% custom_tag params: true %} +{% custom_tag param1: true param2 : nil %} {% custom_block my="abc" c = false %} Just usual {{liquid}}. {% endcustom_block %} @@ -23,14 +23,15 @@ Just regular text - what happens? {{ variable | upcase }} {{ var.field | textilize | markdownify }} {{ var.field.property | textilize | markdownify }} +{{ -3.14 | abs }} {{ 'string' | truncate: 100 param='df"g' }} -{{ variable.nil? }} +{{ variable-nil? }} {% capture name %} -{{ title | downcase }} +{{- title | downcase | slice: -3, 2 -}} {% endcapture %} -{% assign life = 'infinite' | upcase %} +{%- assign life = 'infinite' | upcase -%} {% cycle '1', 2, var %} {% cycle 'group1': '1', var, 2 %} @@ -38,12 +39,12 @@ Just regular text - what happens? {% if a == 'B' %} Testing {{ some }} stuff. -{% elsif a == 'C%}' %} -{% elsif c %} +{% elsif a != 'C%}' %} +{% elsif c and d or e == empty %} {% else %} {% endif %} -{% unless not a %} +{% unless a %} Some {{ output }} right here. {% else %} {% endunless %} @@ -57,14 +58,16 @@ Some other {{ output }}! {% endcase %} {% include file.html param = 'value' param2 = object %} -{% include 'snippet' param = 'value' param2 = object %} +{% include 'snippet', param: 'value', param2: object %} +{% include product_page with products[0] %} +{% include {{product_page | split: "." | first}} for products %} {% assign page_has_image = false %} {% assign img_tag = '<' | append: 'img' %} {% if link.object.content contains img_tag %} {% assign src = link.object.content | split: 'src="' %} {% assign src = src[1] | split: '"' | first %} - {% if src %} + {% if src.size %} {% assign page_has_image = true %} {% assign image_src = src | replace: '_small', '' | replace: '_compact', '' | replace: '_medium', '' | replace: '_large', '' | replace: '_grande', '' %} @@ -81,45 +84,45 @@ Some other {{ output }}! {% endif %} -{% for page in site.pages %} +{% tablerow page in site.pages %} {% if page.layout == 'home' %} {{ page.excerpt }} {% endif %} -{% endfor %} +{% endtablerow %} {% for i in (1..5) %} - {% if i == 4 %} + {% if i > 4 %} {% break %} {% else %} {% continue %} {% endif %} {% endfor %} -{% for item in array limit:2 reversed %} - {{ item }} +{% for item in array reversed limit:2 %} + Item {{ forloop.index }}: {{ item.name }} {% endfor %} {% for item in array offset:2 %} - {{ item }} + {% increment var %} {% endfor %} {% for item in array reversed %} - {{ item }} + {% decrement var %} {% endfor %} {% for i in (3..5) %} - {{ i }} + {% render "snippet", number: i %} {% endfor %} -{% assign num = 4 %} -{% for i in (1..num) %} - {{ i }} +{% assign num- = 4 %} +{% for i in (1..num-) %} + {%-if forloop.last-%}{{ i }}{%-endif-%} {% endfor %} {% for char in 'The Quick Brown Fox' %} {{ char | upcase }} {% endfor %} -{% for char in "Hello World" %} - {{ char | upcase }} +{% for char in "Hello World" reversed %} + {% echo char | upcase %} {% endfor %}