From 6e491939e29be4b7cddcf9aaa380f92b3d91bf72 Mon Sep 17 00:00:00 2001 From: Marco Concetto Rudilosso Date: Mon, 10 Jun 2024 15:52:52 +0200 Subject: [PATCH] wip --- example/liquid2profile | 1 + lib/liquid.rb | 2 +- lib/liquid/block_body.rb | 6 +- lib/liquid/expression.rb | 28 ++++- lib/liquid/lexer.rb | 196 ++++++++++++++++++++++++++------- lib/liquid/parse_context.rb | 2 +- lib/liquid/parser.rb | 39 ++++--- lib/liquid/parser_switching.rb | 1 + lib/liquid/range_lookup.rb | 6 +- lib/liquid/tags/for.rb | 13 ++- lib/liquid/tags/if.rb | 4 +- lib/liquid/variable.rb | 20 +++- lib/liquid/variable_lookup.rb | 18 ++- 13 files changed, 261 insertions(+), 75 deletions(-) diff --git a/example/liquid2profile b/example/liquid2profile index 0c0cc9ffd..9cff493f1 100755 --- a/example/liquid2profile +++ b/example/liquid2profile @@ -74,6 +74,7 @@ n_runs = options[:n_runs] code = ARGF.read require "liquid" +# require "liquid/c" cpu_time_start = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID) diff --git a/lib/liquid.rb b/lib/liquid.rb index 7bf5aabb0..fa56d2682 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -63,9 +63,9 @@ module Liquid require 'liquid/interrupts' require 'liquid/strainer_template' require 'liquid/strainer_factory' +require 'liquid/parser_switching' require 'liquid/expression' require 'liquid/context' -require 'liquid/parser_switching' require 'liquid/tag' require 'liquid/tag/disabler' require 'liquid/tag/disableable' diff --git a/lib/liquid/block_body.rb b/lib/liquid/block_body.rb index 61096de80..dd48873f3 100644 --- a/lib/liquid/block_body.rb +++ b/lib/liquid/block_body.rb @@ -246,8 +246,10 @@ def render_node(context, output, node) end def create_variable(token, parse_context) - if token =~ ContentOfVariable - markup = Regexp.last_match(1) + if token.end_with?("}}") + start_markup = token[2] == WhitespaceControl ? 3 : 2 + end_markup = token[-3] == WhitespaceControl ? -3 : -2 + markup = token[start_markup...end_markup] return Variable.new(markup, parse_context) end BlockBody.raise_missing_variable_terminator(token, parse_context) diff --git a/lib/liquid/expression.rb b/lib/liquid/expression.rb index a1426732c..39a6e325e 100644 --- a/lib/liquid/expression.rb +++ b/lib/liquid/expression.rb @@ -20,7 +20,31 @@ class Expression # malicious input as described in https://github.com/Shopify/liquid/issues/1357 RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/ - def self.parse(markup) + include ParserSwitching + + def self.parse(markup, parse_context) + new(markup, parse_context) + end + + private_class_method def self.new(markup, parse_context) + obj = allocate + obj.instance_variable_set(:@markup, markup) + obj.instance_variable_set(:@parse_context, parse_context) + if !parse_context.nil? + obj.strict_parse_with_error_mode_fallback(markup) + else + obj.lax_parse(markup) + end + end + + def strict_parse(markup) + return nil unless markup + + p = Parser.new(markup) + p.expression + end + + def lax_parse(markup) return nil unless markup markup = markup.strip @@ -33,7 +57,7 @@ def self.parse(markup) when INTEGERS_REGEX Regexp.last_match(1).to_i when RANGES_REGEX - RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2)) + RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2), parse_context) when FLOATS_REGEX Regexp.last_match(1).to_f else diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb index 4ce2bc7b9..263791987 100644 --- a/lib/liquid/lexer.rb +++ b/lib/liquid/lexer.rb @@ -1,58 +1,180 @@ # frozen_string_literal: true -require "strscan" module Liquid + class StringScanner + def initialize(string) + @len = string.length + @string = string.freeze + @buffer = IO::Buffer.for(@string) + @pos = 0 + end + + def eos? + @pos >= @len + end + + def peek(n = 0) + return if @pos + n >= @len + @buffer.get_value(:U8, @pos + n) + end + + def match(str) + return if @pos + str.length > @len + + if (@buffer.slice(@pos, str.length) <=> IO::Buffer.for(str)) == 0 + advance(str.length) + end + end + + def match_until(char) + pos = 1 + pos += 1 while peek(pos) != char + if peek(pos) == char + advance(pos + 1) + end + end + + def advance(n = 1) + original_pos = @pos + @pos += n + @string[original_pos, n] + end + + def space?(c) + return false unless c + c == 32 || c == 9 || c == 10 || c == 13 + end + + def skip_spaces + @pos += 1 while @pos < @len && space?(@buffer.get_value(:U8, @pos)) + end + end + class Lexer SPECIALS = { - '|' => :pipe, - '.' => :dot, - ':' => :colon, - ',' => :comma, - '[' => :open_square, - ']' => :close_square, - '(' => :open_round, - ')' => :close_round, - '?' => :question, - '-' => :dash, + '|'.ord => :pipe, + '.'.ord => :dot, + ':'.ord => :colon, + ','.ord => :comma, + '['.ord => :open_square, + ']'.ord => :close_square, + '('.ord => :open_round, + ')'.ord => :close_round, + '?'.ord => :question, + '-'.ord => :dash, }.freeze - IDENTIFIER = /[a-zA-Z_][\w-]*\??/ - SINGLE_STRING_LITERAL = /'[^\']*'/ - DOUBLE_STRING_LITERAL = /"[^\"]*"/ - STRING_LITERAL = Regexp.union(SINGLE_STRING_LITERAL, DOUBLE_STRING_LITERAL) - NUMBER_LITERAL = /-?\d+(\.\d+)?/ - DOTDOT = /\.\./ - COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/ - WHITESPACE_OR_NOTHING = /\s*/ + + LESS_THAN = '<'.ord + GREATER_THAN = '>'.ord + EQUALS = '='.ord + EXCLAMATION = '!'.ord + QUOTE = '"'.ord + APOSTROPHE = "'".ord + DASH = '-'.ord + DOT = '.'.ord + UNDERSCORE = '_'.ord + QUESTION_MARK = '?'.ord def initialize(input) @ss = StringScanner.new(input) end + def digit?(char) + return false unless char + char >= 48 && char <= 57 + end + + def alpha?(char) + return false unless char + char >= 65 && char <= 90 || char >= 97 && char <= 122 + end + + def identifier?(char) + return false unless char + digit?(char) || alpha?(char) || char == UNDERSCORE || char == DASH + end + def tokenize @output = [] until @ss.eos? - @ss.skip(WHITESPACE_OR_NOTHING) + @ss.skip_spaces break if @ss.eos? - tok = if (t = @ss.scan(COMPARISON_OPERATOR)) - [:comparison, t] - elsif (t = @ss.scan(STRING_LITERAL)) - [:string, t] - elsif (t = @ss.scan(NUMBER_LITERAL)) - [:number, t] - elsif (t = @ss.scan(IDENTIFIER)) - [:id, t] - elsif (t = @ss.scan(DOTDOT)) - [:dotdot, t] - else - c = @ss.getch - if (s = SPECIALS[c]) - [s, c] - else - raise SyntaxError, "Unexpected character #{c}" + + next_char = @ss.peek + case next_char + when LESS_THAN + @output << [:comparison, @ss.match("<=") || @ss.match("<>") || @ss.match("<")] + next + when GREATER_THAN + @output << [:comparison, @ss.match(">=") || @ss.match(">")] + next + when EQUALS + if (match = @ss.match("==")) + @output << [:comparison, match] + next + end + when EXCLAMATION + if (match = @ss.match("!=")) + @output << [:comparison, match] + next + end + when DOT + if (match = @ss.match("..")) + @output << [:dotdot, match] + next + end + end + + if (match = @ss.match("contains")) + @output << [:comparison, match] + next + end + + if next_char == APOSTROPHE || next_char == QUOTE + if (str = @ss.match_until(next_char)) + @output << [:string, str] + next end end - @output << tok + + if next_char == DASH || digit?(next_char) + peek = 1 + has_dot = false + while (peeked = @ss.peek(peek)) + if !has_dot && peeked == DOT + has_dot = true + elsif !digit?(peeked) + break + end + peek += 1 + end + peek -= 1 + + if @ss.peek(peek) == DOT + peek -= 1 + end + + if @ss.peek(peek) != DASH + @output << [:number, @ss.advance(peek)] + next + end + end + + if alpha?(next_char) || next_char == UNDERSCORE + peek = 1 + peek += 1 while identifier?(@ss.peek(peek)) + peek += 1 if @ss.peek(peek) == QUESTION_MARK + @output << [:id, @ss.advance(peek)] + next + end + + if (special = SPECIALS[next_char]) + @output << [special, @ss.advance] + next + else + raise SyntaxError, "Unexpected character #{next_char.chr}" + end end @output << [:end_of_string] diff --git a/lib/liquid/parse_context.rb b/lib/liquid/parse_context.rb index 87570ad52..ea17cc062 100644 --- a/lib/liquid/parse_context.rb +++ b/lib/liquid/parse_context.rb @@ -28,7 +28,7 @@ def new_tokenizer(markup, start_line_number: nil, for_liquid_tag: false) end def parse_expression(markup) - Expression.parse(markup) + Expression.parse(markup, self) end def partial=(value) diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index 609601ac4..32a94abd5 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -2,6 +2,9 @@ module Liquid class Parser + Kwarg = Struct.new(:name, :value) + Arg = Struct.new(:value) + def initialize(input) l = Lexer.new(input) @tokens = l.tokenize @@ -50,8 +53,15 @@ def expression token = @tokens[@p] case token[0] when :id - str = consume - str << variable_lookups + name = consume + lookups = variable_lookups + command_flags = 0 + lookups.each_index do |i| + if VariableLookup::COMMAND_METHODS.include?(lookups[i]) + @command_flags |= 1 << i + end + end + VariableLookup.new_with(name, lookups, command_flags) when :open_square str = consume str << expression @@ -72,31 +82,32 @@ def expression end def argument - str = +"" # might be a keyword argument (identifier: expression) if look(:id) && look(:colon, 1) - str << consume << consume << ' ' + name = consume(:id) + consume(:colon) + value = expression + Kwarg.new(name, value) + else + Arg.new(expression) end - - str << expression - str end def variable_lookups - str = +"" + lookups = [] loop do if look(:open_square) - str << consume - str << expression - str << consume(:close_square) + consume + lookups << expression + consume(:close_square) elsif look(:dot) - str << consume - str << consume(:id) + consume + lookups << consume(:id) else break end end - str + lookups end end end diff --git a/lib/liquid/parser_switching.rb b/lib/liquid/parser_switching.rb index 78afd58a9..8cbc83bc2 100644 --- a/lib/liquid/parser_switching.rb +++ b/lib/liquid/parser_switching.rb @@ -33,6 +33,7 @@ def parse_with_selected_parser(markup) def strict_parse_with_error_context(markup) strict_parse(markup) rescue SyntaxError => e + puts e e.line_number = line_number e.markup_context = markup_context(markup) raise e diff --git a/lib/liquid/range_lookup.rb b/lib/liquid/range_lookup.rb index fd208a676..52a407f5a 100644 --- a/lib/liquid/range_lookup.rb +++ b/lib/liquid/range_lookup.rb @@ -2,9 +2,9 @@ module Liquid class RangeLookup - def self.parse(start_markup, end_markup) - start_obj = Expression.parse(start_markup) - end_obj = Expression.parse(end_markup) + def self.parse(start_markup, end_markup, parse_context = nil) + start_obj = Expression.parse(start_markup, parse_context) + end_obj = Expression.parse(end_markup, parse_context) if start_obj.respond_to?(:evaluate) || end_obj.respond_to?(:evaluate) new(start_obj, end_obj) else diff --git a/lib/liquid/tags/for.rb b/lib/liquid/tags/for.rb index 9205fa42f..bb8b57e30 100644 --- a/lib/liquid/tags/for.rb +++ b/lib/liquid/tags/for.rb @@ -92,8 +92,7 @@ def strict_parse(markup) @variable_name = p.consume(:id) raise SyntaxError, options[:locale].t("errors.syntax.for_invalid_in") unless p.id?('in') - collection_name = p.expression - @collection_name = parse_expression(collection_name) + @collection_name = p.expression @name = "#{@variable_name}-#{collection_name}" @reversed = p.id?('reversed') @@ -104,7 +103,7 @@ def strict_parse(markup) raise SyntaxError, options[:locale].t("errors.syntax.for_invalid_attribute") end p.consume(:colon) - set_attribute(attribute, p.expression) + set_attribute(attribute, p.expression, do_parse: false) end p.consume(:end_of_string) end @@ -174,16 +173,18 @@ def render_segment(context, output, segment) output end - def set_attribute(key, expr) + def set_attribute(key, expr, do_parse: true) case key when 'offset' @from = if expr == 'continue' :continue - else + elsif do_parse parse_expression(expr) + else + expr end when 'limit' - @limit = parse_expression(expr) + @limit = do_parse ? parse_expression(expr) : expr end end diff --git a/lib/liquid/tags/if.rb b/lib/liquid/tags/if.rb index 92ed8aa3b..c76d75313 100644 --- a/lib/liquid/tags/if.rb +++ b/lib/liquid/tags/if.rb @@ -120,9 +120,9 @@ def parse_binary_comparisons(p) end def parse_comparison(p) - a = parse_expression(p.expression) + a = p.expression if (op = p.consume?(:comparison)) - b = parse_expression(p.expression) + b = p.expression Condition.new(a, op, b) else Condition.new(a) diff --git a/lib/liquid/variable.rb b/lib/liquid/variable.rb index 1a4c07198..529696091 100644 --- a/lib/liquid/variable.rb +++ b/lib/liquid/variable.rb @@ -65,11 +65,11 @@ def strict_parse(markup) return if p.look(:end_of_string) - @name = parse_context.parse_expression(p.expression) + @name = p.expression while p.consume?(:pipe) filtername = p.consume(:id) filterargs = p.consume?(:colon) ? parse_filterargs(p) : [] - @filters << parse_filter_expressions(filtername, filterargs) + @filters << parse_strict_filter_expressions(filtername, filterargs) end p.consume(:end_of_string) end @@ -132,6 +132,22 @@ def parse_filter_expressions(filter_name, unparsed_args) result end + def parse_strict_filter_expressions(filter_name, args) + filter_args = [] + keyword_args = nil + args.each do |a| + if a.is_a?(Liquid::Parser::Kwarg) + keyword_args ||= {} + keyword_args[a.name] = a.value + else + filter_args << a.value + end + end + result = [filter_name, filter_args] + result << keyword_args if keyword_args + result + end + def evaluate_filter_expressions(context, filter_args, filter_kwargs) parsed_args = filter_args.map { |expr| context.evaluate(expr) } if filter_kwargs diff --git a/lib/liquid/variable_lookup.rb b/lib/liquid/variable_lookup.rb index 1ade30ef9..3b96c92d4 100644 --- a/lib/liquid/variable_lookup.rb +++ b/lib/liquid/variable_lookup.rb @@ -6,16 +6,24 @@ class VariableLookup attr_reader :name, :lookups - def self.parse(markup) - new(markup) + def self.parse(markup, parse_context = nil) + new(markup, parse_context) end - def initialize(markup) + def self.new_with(name, lookups, command_flags) + instance = allocate + instance.instance_variable_set(:@name, name) + instance.instance_variable_set(:@lookups, lookups) + instance.instance_variable_set(:@command_flags, command_flags) + instance + end + + def initialize(markup, parse_context = nil) lookups = markup.scan(VariableParser) name = lookups.shift if name&.start_with?('[') && name&.end_with?(']') - name = Expression.parse(name[1..-2]) + name = Expression.parse(name[1..-2], parse_context) end @name = name @@ -25,7 +33,7 @@ def initialize(markup) @lookups.each_index do |i| lookup = lookups[i] if lookup&.start_with?('[') && lookup&.end_with?(']') - lookups[i] = Expression.parse(lookup[1..-2]) + lookups[i] = Expression.parse(lookup[1..-2], parse_context) elsif COMMAND_METHODS.include?(lookup) @command_flags |= 1 << i end