Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions spec/compiler/lexer/lexer_spec.cr
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
require "../../support/syntax"

private def it_lexes(string, type)
private def it_lexes(string, type, *, slash_is_regex : Bool? = nil)
it "lexes #{string.inspect}" do
lexer = Lexer.new string
unless (v = slash_is_regex).nil?
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace simply by if v = slash_is_regex

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the same. slash_is_regex is Bool? so you need to test for .nil?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok 👍 - the language evaluate if the expression is true and not nil, and we just want to know if it isn't nil.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be abusing that the default value of slash_is_regex is false.

lexer.slash_is_regex = v
end
token = lexer.next_token
token.type.should eq(type)
end
Expand Down Expand Up @@ -99,7 +102,7 @@ end

private def it_lexes_operators(ops)
ops.each do |op|
it_lexes op.to_s, op
it_lexes op.to_s, op, slash_is_regex: false
end
end

Expand Down Expand Up @@ -244,9 +247,9 @@ describe "Lexer" do
it_lexes_char "'\\\\'", '\\'
assert_syntax_error "'", "unterminated char literal"
assert_syntax_error "'\\", "unterminated char literal"
it_lexes_operators [:"=", :"<", :"<=", :">", :">=", :"+", :"-", :"*", :"(", :")",
it_lexes_operators [:"=", :"<", :"<=", :">", :">=", :"+", :"-", :"*", :"/", :"//", :"(", :")",
:"==", :"!=", :"=~", :"!", :",", :".", :"..", :"...", :"&&", :"||",
:"|", :"{", :"}", :"?", :":", :"+=", :"-=", :"*=", :"%=", :"&=",
:"|", :"{", :"}", :"?", :":", :"+=", :"-=", :"*=", :"/=", :"%=", :"//=", :"&=",
:"|=", :"^=", :"**=", :"<<", :">>", :"%", :"&", :"|", :"^", :"**", :"<<=",
:">>=", :"~", :"[]", :"[]=", :"[", :"]", :"::", :"<=>", :"=>", :"||=",
:"&&=", :"===", :";", :"->", :"[]?", :"{%", :"{{", :"%}", :"@[", :"!~",
Expand All @@ -260,7 +263,7 @@ describe "Lexer" do
it_lexes_instance_var "@foo"
it_lexes_class_var "@@foo"
it_lexes_globals ["$foo", "$FOO", "$_foo", "$foo123"]
it_lexes_symbols [":foo", ":foo!", ":foo?", ":foo=", ":\"foo\"", ":かたな", ":+", ":-", ":*", ":/",
it_lexes_symbols [":foo", ":foo!", ":foo?", ":foo=", ":\"foo\"", ":かたな", ":+", ":-", ":*", ":/", "://",
":==", ":<", ":<=", ":>", ":>=", ":!", ":!=", ":=~", ":!~", ":&", ":|",
":^", ":~", ":**", ":>>", ":<<", ":%", ":[]", ":[]?", ":[]=", ":<=>", ":===",
":&+", ":&-", ":&*", ":&**"]
Expand Down Expand Up @@ -497,13 +500,6 @@ describe "Lexer" do
token.value.should eq ("a")
end

it "lexes /=" do
lexer = Lexer.new("/=")
lexer.slash_is_regex = false
token = lexer.next_token
token.type.should eq(:"/=")
end

it "lexes != after identifier (#4815)" do
lexer = Lexer.new("some_method!=5")
token = lexer.next_token
Expand Down
14 changes: 8 additions & 6 deletions spec/compiler/parser/parser_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ module Crystal
it_parses "f.x = Foo.new", Call.new("f".call, "x=", [Call.new("Foo".path, "new")] of ASTNode)
it_parses "f.x = - 1", Call.new("f".call, "x=", [Call.new(1.int32, "-")] of ASTNode)

["+", "-", "*", "/", "%", "|", "&", "^", "**", "<<", ">>", "&+", "&-", "&*"].each do |op|
["+", "-", "*", "/", "//", "%", "|", "&", "^", "**", "<<", ">>", "&+", "&-", "&*"].each do |op|
it_parses "f.x #{op}= 2", OpAssign.new(Call.new("f".call, "x"), op, 2.int32)
end

Expand All @@ -433,10 +433,11 @@ module Crystal
it_parses "def %(); end;", Def.new("%")
it_parses "def /(); end;", Def.new("/")

["<<", "<", "<=", "==", ">>", ">", ">=", "+", "-", "*", "/", "%", "|", "&", "^", "**", "===", "=~", "!~", "&+", "&-", "&*", "&**"].each do |op|
["<<", "<", "<=", "==", ">>", ">", ">=", "+", "-", "*", "/", "//", "%", "|", "&", "^", "**", "===", "=~", "!~", "&+", "&-", "&*", "&**"].each do |op|
it_parses "1 #{op} 2", Call.new(1.int32, op, 2.int32)
it_parses "n #{op} 2", Call.new("n".call, op, 2.int32)
it_parses "def #{op}(); end", Def.new(op)
it_parses "macro #{op};end", Macro.new(op, [] of Arg, Expressions.new)
end

["bar", "+", "-", "*", "/", "<", "<=", "==", ">", ">=", "%", "|", "&", "^", "**", "===", "=~", "!~"].each do |name|
Expand All @@ -445,7 +446,7 @@ module Crystal
it_parses "foo.#{name}(1, 2)", Call.new("foo".call, name, 1.int32, 2.int32)
end

["+", "-", "*", "/", "%", "|", "&", "^", "**", "<<", ">>", "&+", "&-", "&*"].each do |op|
["+", "-", "*", "/", "//", "%", "|", "&", "^", "**", "<<", ">>", "&+", "&-", "&*"].each do |op|
it_parses "a = 1; a #{op}= 1", [Assign.new("a".var, 1.int32), OpAssign.new("a".var, op, 1.int32)]
it_parses "a = 1; a #{op}=\n1", [Assign.new("a".var, 1.int32), OpAssign.new("a".var, op, 1.int32)]
it_parses "a.b #{op}=\n1", OpAssign.new(Call.new("a".call, "b"), op, 1.int32)
Expand Down Expand Up @@ -647,7 +648,7 @@ module Crystal
assert_syntax_error "#{keyword} ? 1 : 2", "void value expression"
assert_syntax_error "+#{keyword}", "void value expression"

["<<", "<", "<=", "==", ">>", ">", ">=", "+", "-", "*", "/", "%", "|",
["<<", "<", "<=", "==", ">>", ">", ">=", "+", "-", "*", "/", "//", "%", "|",
"&", "^", "**", "===", "&+", "&-", "&*", "&**"].each do |op|
assert_syntax_error "#{keyword} #{op} 1", "void value expression"
end
Expand Down Expand Up @@ -785,6 +786,9 @@ module Crystal
it_parses "{% unless 1; 2; end %}", MacroExpression.new(If.new(1.int32, Nop.new, 2.int32), output: false)
it_parses "{%\n1\n2\n3\n%}", MacroExpression.new(Expressions.new([1.int32, 2.int32, 3.int32] of ASTNode), output: false)

it_parses "{{ 1 // 2 }}", MacroExpression.new(Expressions.from([Call.new(1.int32, "//", 2.int32)] of ASTNode))
it_parses "{{ //.options }}", MacroExpression.new(Expressions.from([Call.new(RegexLiteral.new(StringLiteral.new("")), "options")] of ASTNode))

it_parses "[] of Int", ([] of ASTNode).array_of("Int".path)
it_parses "[1, 2] of Int", ([1.int32, 2.int32] of ASTNode).array_of("Int".path)

Expand All @@ -794,8 +798,6 @@ module Crystal

it_parses "macro foo;end", Macro.new("foo", [] of Arg, Expressions.new)
it_parses "macro [];end", Macro.new("[]", [] of Arg, Expressions.new)
it_parses "macro %();end", Macro.new("%", [] of Arg, Expressions.new)
it_parses "macro /();end", Macro.new("/", [] of Arg, Expressions.new)
it_parses %(macro foo; 1 + 2; end), Macro.new("foo", [] of Arg, Expressions.from([" 1 + 2; ".macro_literal] of ASTNode))
it_parses %(macro foo(x); 1 + 2; end), Macro.new("foo", ([Arg.new("x")]), Expressions.from([" 1 + 2; ".macro_literal] of ASTNode))
it_parses %(macro foo(x)\n 1 + 2; end), Macro.new("foo", ([Arg.new("x")]), Expressions.from([" 1 + 2; ".macro_literal] of ASTNode))
Expand Down
3 changes: 3 additions & 0 deletions spec/compiler/parser/to_s_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ describe "ASTNode#to_s" do
expect_to_s "@foo.bar"
expect_to_s %(:foo)
expect_to_s %(:"{")
expect_to_s %(%r())
expect_to_s %(%r()imx)
expect_to_s %(/hello world/)
expect_to_s %(/hello world/imx)
expect_to_s %(/\\s/)
expect_to_s %(/\\?/)
expect_to_s %(/\\(group\\)/)
Expand Down
143 changes: 83 additions & 60 deletions src/compiler/crystal/syntax/lexer.cr
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ module Crystal
@count_whitespace = false
@slash_is_regex = true
@wants_raw = false
@wants_def_or_macro_name = false
@string_pool = string_pool || StringPool.new

# When lexing macro tokens, when we encounter `#{` inside
Expand Down Expand Up @@ -287,8 +288,17 @@ module Crystal
line = @line_number
column = @column_number
char = next_char
if !@slash_is_regex && char == '='
if (@wants_def_or_macro_name || !@slash_is_regex) && char == '/'
case next_char
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be replaced by a smaller if statement

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using the case is more about following the pattern of the rest of the lexer.
But I don't follow which is the proposed smaller if.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine - this just makes the lexer looks a bit more complex that it is, that's it :)
Here it would be

if next_char == '/'
  next_char :"//="
else
  @token.type = :"//"
end

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought you ment the if (@wants_def_or_macro_name ....
There are some if in the lexer, bust mostly case even single when-ed (?) case. I found them easier to read the lexer if all the structures are the same and not case by case based that we will end up having if your suggestions are applied.

At there is no real change the the actual code.

So case are kept. But thanks.

when '='
next_char :"//="
else
@token.type = :"//"
end
elsif !@slash_is_regex && char == '='
next_char :"/="
elsif @wants_def_or_macro_name
@token.type = :"/"
elsif @slash_is_regex
@token.type = :DELIMITER_START
@token.delimiter_state = Token::DelimiterState.new(:regex, '/', '/')
Expand All @@ -303,65 +313,69 @@ module Crystal
@token.type = :"/"
end
when '%'
case next_char
when '='
next_char :"%="
when '(', '[', '{', '<', '|'
delimited_pair :string, current_char, closing_char, start
when 'i'
case peek_next_char
when '(', '{', '[', '<', '|'
start_char = next_char
next_char :SYMBOL_ARRAY_START
@token.raw = "%i#{start_char}" if @wants_raw
@token.delimiter_state = Token::DelimiterState.new(:symbol_array, start_char, closing_char(start_char))
else
@token.type = :"%"
end
when 'q'
case peek_next_char
when '(', '{', '[', '<', '|'
next_char
delimited_pair :string, current_char, closing_char, start, allow_escapes: false
else
@token.type = :"%"
end
when 'Q'
case peek_next_char
when '(', '{', '[', '<', '|'
next_char
delimited_pair :string, current_char, closing_char, start
else
@token.type = :"%"
end
when 'r'
case next_char
when '(', '[', '{', '<', '|'
delimited_pair :regex, current_char, closing_char, start
else
raise "unknown %r char"
end
when 'x'
if @wants_def_or_macro_name
next_char :"%"
else
case next_char
when '='
next_char :"%="
when '(', '[', '{', '<', '|'
delimited_pair :command, current_char, closing_char, start
else
raise "unknown %x char"
end
when 'w'
case peek_next_char
when '(', '{', '[', '<', '|'
start_char = next_char
next_char :STRING_ARRAY_START
@token.raw = "%w#{start_char}" if @wants_raw
@token.delimiter_state = Token::DelimiterState.new(:string_array, start_char, closing_char(start_char))
delimited_pair :string, current_char, closing_char, start
when 'i'
case peek_next_char
when '(', '{', '[', '<', '|'
start_char = next_char
next_char :SYMBOL_ARRAY_START
@token.raw = "%i#{start_char}" if @wants_raw
@token.delimiter_state = Token::DelimiterState.new(:symbol_array, start_char, closing_char(start_char))
else
@token.type = :"%"
end
when 'q'
case peek_next_char
when '(', '{', '[', '<', '|'
next_char
delimited_pair :string, current_char, closing_char, start, allow_escapes: false
else
@token.type = :"%"
end
when 'Q'
case peek_next_char
when '(', '{', '[', '<', '|'
next_char
delimited_pair :string, current_char, closing_char, start
else
@token.type = :"%"
end
when 'r'
case next_char
when '(', '[', '{', '<', '|'
delimited_pair :regex, current_char, closing_char, start
else
raise "unknown %r char"
end
when 'x'
case next_char
when '(', '[', '{', '<', '|'
delimited_pair :command, current_char, closing_char, start
else
raise "unknown %x char"
end
when 'w'
case peek_next_char
when '(', '{', '[', '<', '|'
start_char = next_char
next_char :STRING_ARRAY_START
@token.raw = "%w#{start_char}" if @wants_raw
@token.delimiter_state = Token::DelimiterState.new(:string_array, start_char, closing_char(start_char))
else
@token.type = :"%"
end
when '}'
next_char :"%}"
else
@token.type = :"%"
end
when '}'
next_char :"%}"
else
@token.type = :"%"
end
when '(' then next_char :"("
when ')' then next_char :")"
Expand Down Expand Up @@ -412,7 +426,12 @@ module Crystal
symbol "*"
end
when '/'
next_char_and_symbol "/"
case next_char
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto ditto :-)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be here

if next_char == '/'
  next_char_and_symbol "//"
else
  symbol "/"
end

when '/'
next_char_and_symbol "//"
else
symbol "/"
end
when '='
case next_char
when '='
Expand Down Expand Up @@ -694,10 +713,14 @@ module Crystal
set_token_raw_from_start(start)
when '"', '`'
delimiter = current_char
next_char
@token.type = :DELIMITER_START
@token.delimiter_state = Token::DelimiterState.new(delimiter == '`' ? :command : :string, delimiter, delimiter)
set_token_raw_from_start(start)
if delimiter == '`' && @wants_def_or_macro_name
next_char :"`"
else
next_char
@token.type = :DELIMITER_START
@token.delimiter_state = Token::DelimiterState.new(delimiter == '`' ? :command : :string, delimiter, delimiter)
set_token_raw_from_start(start)
end
when '0'
scan_zero_number(start)
when '1', '2', '3', '4', '5', '6', '7', '8', '9'
Expand Down
Loading