Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Maaarcocr committed Jun 10, 2024
1 parent 9230471 commit 6e49193
Show file tree
Hide file tree
Showing 13 changed files with 261 additions and 75 deletions.
1 change: 1 addition & 0 deletions example/liquid2profile
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ n_runs = options[:n_runs]
code = ARGF.read

require "liquid"
# require "liquid/c"

cpu_time_start = Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)

Expand Down
2 changes: 1 addition & 1 deletion lib/liquid.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ module Liquid
require 'liquid/interrupts'
require 'liquid/strainer_template'
require 'liquid/strainer_factory'
require 'liquid/parser_switching'
require 'liquid/expression'
require 'liquid/context'
require 'liquid/parser_switching'
require 'liquid/tag'
require 'liquid/tag/disabler'
require 'liquid/tag/disableable'
Expand Down
6 changes: 4 additions & 2 deletions lib/liquid/block_body.rb
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,10 @@ def render_node(context, output, node)
end

def create_variable(token, parse_context)
if token =~ ContentOfVariable
markup = Regexp.last_match(1)
if token.end_with?("}}")
start_markup = token[2] == WhitespaceControl ? 3 : 2
end_markup = token[-3] == WhitespaceControl ? -3 : -2
markup = token[start_markup...end_markup]
return Variable.new(markup, parse_context)
end
BlockBody.raise_missing_variable_terminator(token, parse_context)
Expand Down
28 changes: 26 additions & 2 deletions lib/liquid/expression.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,31 @@ class Expression
# malicious input as described in https://github.com/Shopify/liquid/issues/1357
RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/

def self.parse(markup)
include ParserSwitching

def self.parse(markup, parse_context)
new(markup, parse_context)
end

private_class_method def self.new(markup, parse_context)
obj = allocate
obj.instance_variable_set(:@markup, markup)
obj.instance_variable_set(:@parse_context, parse_context)
if !parse_context.nil?
obj.strict_parse_with_error_mode_fallback(markup)
else
obj.lax_parse(markup)
end
end

def strict_parse(markup)
return nil unless markup

p = Parser.new(markup)
p.expression
end

def lax_parse(markup)
return nil unless markup

markup = markup.strip
Expand All @@ -33,7 +57,7 @@ def self.parse(markup)
when INTEGERS_REGEX
Regexp.last_match(1).to_i
when RANGES_REGEX
RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2))
RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2), parse_context)
when FLOATS_REGEX
Regexp.last_match(1).to_f
else
Expand Down
196 changes: 159 additions & 37 deletions lib/liquid/lexer.rb
Original file line number Diff line number Diff line change
@@ -1,58 +1,180 @@
# frozen_string_literal: true

require "strscan"
module Liquid
class StringScanner
def initialize(string)
@len = string.length
@string = string.freeze
@buffer = IO::Buffer.for(@string)
@pos = 0
end

def eos?
@pos >= @len
end

def peek(n = 0)
return if @pos + n >= @len
@buffer.get_value(:U8, @pos + n)
end

def match(str)
return if @pos + str.length > @len

if (@buffer.slice(@pos, str.length) <=> IO::Buffer.for(str)) == 0
advance(str.length)
end
end

def match_until(char)
pos = 1
pos += 1 while peek(pos) != char
if peek(pos) == char
advance(pos + 1)
end
end

def advance(n = 1)
original_pos = @pos
@pos += n
@string[original_pos, n]
end

def space?(c)
return false unless c
c == 32 || c == 9 || c == 10 || c == 13
end

def skip_spaces
@pos += 1 while @pos < @len && space?(@buffer.get_value(:U8, @pos))
end
end

class Lexer
SPECIALS = {
'|' => :pipe,
'.' => :dot,
':' => :colon,
',' => :comma,
'[' => :open_square,
']' => :close_square,
'(' => :open_round,
')' => :close_round,
'?' => :question,
'-' => :dash,
'|'.ord => :pipe,
'.'.ord => :dot,
':'.ord => :colon,
','.ord => :comma,
'['.ord => :open_square,
']'.ord => :close_square,
'('.ord => :open_round,
')'.ord => :close_round,
'?'.ord => :question,
'-'.ord => :dash,
}.freeze
IDENTIFIER = /[a-zA-Z_][\w-]*\??/
SINGLE_STRING_LITERAL = /'[^\']*'/
DOUBLE_STRING_LITERAL = /"[^\"]*"/
STRING_LITERAL = Regexp.union(SINGLE_STRING_LITERAL, DOUBLE_STRING_LITERAL)
NUMBER_LITERAL = /-?\d+(\.\d+)?/
DOTDOT = /\.\./
COMPARISON_OPERATOR = /==|!=|<>|<=?|>=?|contains(?=\s)/
WHITESPACE_OR_NOTHING = /\s*/

LESS_THAN = '<'.ord
GREATER_THAN = '>'.ord
EQUALS = '='.ord
EXCLAMATION = '!'.ord
QUOTE = '"'.ord
APOSTROPHE = "'".ord
DASH = '-'.ord
DOT = '.'.ord
UNDERSCORE = '_'.ord
QUESTION_MARK = '?'.ord

def initialize(input)
@ss = StringScanner.new(input)
end

def digit?(char)
return false unless char
char >= 48 && char <= 57
end

def alpha?(char)
return false unless char
char >= 65 && char <= 90 || char >= 97 && char <= 122
end

def identifier?(char)
return false unless char
digit?(char) || alpha?(char) || char == UNDERSCORE || char == DASH
end

def tokenize
@output = []

until @ss.eos?
@ss.skip(WHITESPACE_OR_NOTHING)
@ss.skip_spaces
break if @ss.eos?
tok = if (t = @ss.scan(COMPARISON_OPERATOR))
[:comparison, t]
elsif (t = @ss.scan(STRING_LITERAL))
[:string, t]
elsif (t = @ss.scan(NUMBER_LITERAL))
[:number, t]
elsif (t = @ss.scan(IDENTIFIER))
[:id, t]
elsif (t = @ss.scan(DOTDOT))
[:dotdot, t]
else
c = @ss.getch
if (s = SPECIALS[c])
[s, c]
else
raise SyntaxError, "Unexpected character #{c}"

next_char = @ss.peek
case next_char
when LESS_THAN
@output << [:comparison, @ss.match("<=") || @ss.match("<>") || @ss.match("<")]
next
when GREATER_THAN
@output << [:comparison, @ss.match(">=") || @ss.match(">")]
next
when EQUALS
if (match = @ss.match("=="))
@output << [:comparison, match]
next
end
when EXCLAMATION
if (match = @ss.match("!="))
@output << [:comparison, match]
next
end
when DOT
if (match = @ss.match(".."))
@output << [:dotdot, match]
next
end
end

if (match = @ss.match("contains"))
@output << [:comparison, match]
next
end

if next_char == APOSTROPHE || next_char == QUOTE
if (str = @ss.match_until(next_char))
@output << [:string, str]
next
end
end
@output << tok

if next_char == DASH || digit?(next_char)
peek = 1
has_dot = false
while (peeked = @ss.peek(peek))
if !has_dot && peeked == DOT
has_dot = true
elsif !digit?(peeked)
break
end
peek += 1
end
peek -= 1

if @ss.peek(peek) == DOT
peek -= 1
end

if @ss.peek(peek) != DASH
@output << [:number, @ss.advance(peek)]
next
end
end

if alpha?(next_char) || next_char == UNDERSCORE
peek = 1
peek += 1 while identifier?(@ss.peek(peek))
peek += 1 if @ss.peek(peek) == QUESTION_MARK
@output << [:id, @ss.advance(peek)]
next
end

if (special = SPECIALS[next_char])
@output << [special, @ss.advance]
next
else
raise SyntaxError, "Unexpected character #{next_char.chr}"
end
end

@output << [:end_of_string]
Expand Down
2 changes: 1 addition & 1 deletion lib/liquid/parse_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def new_tokenizer(markup, start_line_number: nil, for_liquid_tag: false)
end

def parse_expression(markup)
Expression.parse(markup)
Expression.parse(markup, self)
end

def partial=(value)
Expand Down
39 changes: 25 additions & 14 deletions lib/liquid/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

module Liquid
class Parser
Kwarg = Struct.new(:name, :value)
Arg = Struct.new(:value)

def initialize(input)
l = Lexer.new(input)
@tokens = l.tokenize
Expand Down Expand Up @@ -50,8 +53,15 @@ def expression
token = @tokens[@p]
case token[0]
when :id
str = consume
str << variable_lookups
name = consume
lookups = variable_lookups
command_flags = 0
lookups.each_index do |i|
if VariableLookup::COMMAND_METHODS.include?(lookups[i])
@command_flags |= 1 << i
end
end
VariableLookup.new_with(name, lookups, command_flags)
when :open_square
str = consume
str << expression
Expand All @@ -72,31 +82,32 @@ def expression
end

def argument
str = +""
# might be a keyword argument (identifier: expression)
if look(:id) && look(:colon, 1)
str << consume << consume << ' '
name = consume(:id)
consume(:colon)
value = expression
Kwarg.new(name, value)
else
Arg.new(expression)
end

str << expression
str
end

def variable_lookups
str = +""
lookups = []
loop do
if look(:open_square)
str << consume
str << expression
str << consume(:close_square)
consume
lookups << expression
consume(:close_square)
elsif look(:dot)
str << consume
str << consume(:id)
consume
lookups << consume(:id)
else
break
end
end
str
lookups
end
end
end
1 change: 1 addition & 0 deletions lib/liquid/parser_switching.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def parse_with_selected_parser(markup)
def strict_parse_with_error_context(markup)
strict_parse(markup)
rescue SyntaxError => e
puts e
e.line_number = line_number
e.markup_context = markup_context(markup)
raise e
Expand Down
6 changes: 3 additions & 3 deletions lib/liquid/range_lookup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

module Liquid
class RangeLookup
def self.parse(start_markup, end_markup)
start_obj = Expression.parse(start_markup)
end_obj = Expression.parse(end_markup)
def self.parse(start_markup, end_markup, parse_context = nil)
start_obj = Expression.parse(start_markup, parse_context)
end_obj = Expression.parse(end_markup, parse_context)
if start_obj.respond_to?(:evaluate) || end_obj.respond_to?(:evaluate)
new(start_obj, end_obj)
else
Expand Down
Loading

0 comments on commit 6e49193

Please sign in to comment.