From ea43502c7872639f4ec815aa125e8c94a8866460 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 12 Nov 2021 23:51:32 +0100 Subject: [PATCH 1/5] Report syntax error for too-long bin/hex/oct integer literals Currently the OverflowError causes a compiler crash. Change the operations to bitwise to remove the actual overflow detection but explicitly deduce that an overflow must have happened, through the digit count. Add the actual number literal to the error message (note: needed to ensure that the literal is fully read before declaring the overflow). --- spec/compiler/lexer/lexer_spec.cr | 40 +++++++++++++++++++++++++--- src/compiler/crystal/syntax/lexer.cr | 37 +++++++++++++++++++++---- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/spec/compiler/lexer/lexer_spec.cr b/spec/compiler/lexer/lexer_spec.cr index 30910c406354..fdc22a97551d 100644 --- a/spec/compiler/lexer/lexer_spec.cr +++ b/spec/compiler/lexer/lexer_spec.cr @@ -311,13 +311,47 @@ describe "Lexer" do assert_syntax_error "18446744073709551616_u128", "18446744073709551616 doesn't fit in an UInt64. UInt128 literals that don't fit in an UInt64 are currently not supported" assert_syntax_error "-1_u128", "Invalid negative value -1 for UInt128" + assert_syntax_error "0123", "octal constants should be prefixed with 0o" + assert_syntax_error "00", "octal constants should be prefixed with 0o" + assert_syntax_error "01_i64", "octal constants should be prefixed with 0o" + assert_syntax_error "0xFF_i8", "255 doesn't fit in an Int8" assert_syntax_error "0o200_i8", "128 doesn't fit in an Int8" assert_syntax_error "0b10000000_i8", "128 doesn't fit in an Int8" - assert_syntax_error "0123", "octal constants should be prefixed with 0o" - assert_syntax_error "00", "octal constants should be prefixed with 0o" - assert_syntax_error "01_i64", "octal constants should be prefixed with 0o" + # 2**31 - 1 + it_lexes_i32 [["0x7fffffff", "2147483647"], ["0o17777777777", "2147483647"], ["0b1111111111111111111111111111111", "2147483647"]] + it_lexes_i32 [["0x7fffffff_i32", "2147483647"], ["0o17777777777_i32", "2147483647"], ["0b1111111111111111111111111111111_i32", "2147483647"]] + # 2**32 - 1 + it_lexes_i64 [["0xffffffff", "4294967295"], ["0o37777777777", "4294967295"], ["0b11111111111111111111111111111111", "4294967295"]] + # 2**32 + it_lexes_i64 [["0x100000000", "4294967296"], ["0o40000000000", "4294967296"], ["0b100000000000000000000000000000000", "4294967296"]] + assert_syntax_error "0x100000000i32", "4294967296 doesn't fit in an Int32" + assert_syntax_error "0o40000000000i32", "4294967296 doesn't fit in an Int32" + assert_syntax_error "0b100000000000000000000000000000000i32", "4294967296 doesn't fit in an Int32" + # 2**63 - 1 + it_lexes_i64 [["0x7fffffffffffffff", "9223372036854775807"], ["0o777777777777777777777", "9223372036854775807"], ["0b111111111111111111111111111111111111111111111111111111111111111", "9223372036854775807"]] + # 2**63 + it_lexes_u64 [["0x8000000000000000", "9223372036854775808"], ["0o1000000000000000000000", "9223372036854775808"], ["0b1000000000000000000000000000000000000000000000000000000000000000", "9223372036854775808"]] + assert_syntax_error "0x8000000000000000i64", "9223372036854775808 doesn't fit in an Int64" + assert_syntax_error "0o1000000000000000000000i64", "9223372036854775808 doesn't fit in an Int64" + assert_syntax_error "0b1000000000000000000000000000000000000000000000000000000000000000i64", "9223372036854775808 doesn't fit in an Int64" + # 2**64 - 1 + it_lexes_u64 [["0xffff_ffff_ffff_ffff", "18446744073709551615"], ["0o177777_77777777_77777777", "18446744073709551615"], ["0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111", "18446744073709551615"]] + it_lexes_u64 [["0x00ffffffffffffffff", "18446744073709551615"], ["0o001777777777777777777777", "18446744073709551615"], ["0b001111111111111111111111111111111111111111111111111111111111111111", "18446744073709551615"]] + # 2**64 + assert_syntax_error "0x10000_0000_0000_0000", "0x10000_0000_0000_0000 doesn't fit in an UInt64" + assert_syntax_error "0o200000_00000000_00000000", "0o200000_00000000_00000000 doesn't fit in an UInt64" + assert_syntax_error "0b100000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000", "0b100000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000 doesn't fit in an UInt64" + # Very large + assert_syntax_error "0x1afafafafafafafafafafaf", "0x1afafafafafafafafafafaf doesn't fit in an UInt64" + assert_syntax_error "0x1afafafafafafafafafafafi32", "0x1afafafafafafafafafafaf doesn't fit in an UInt64" + assert_syntax_error "0o1234567123456712345671234567", "0o1234567123456712345671234567 doesn't fit in an UInt64" + assert_syntax_error "0o12345671234567_12345671234567_i8", "0o12345671234567_12345671234567_ doesn't fit in an UInt64" + assert_syntax_error "0b100000000000000000000000000000000000000000000000000000000000000000", "0b100000000000000000000000000000000000000000000000000000000000000000 doesn't fit in an UInt64" + + it_lexes_i64 [["0o700000000000000000000", "8070450532247928832"]] + it_lexes_u64 [["0o1000000000000000000000", "9223372036854775808"]] assert_syntax_error "4f33", "invalid float suffix" assert_syntax_error "4f65", "invalid float suffix" diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 5efa3490cca8..1f36b7832628 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1780,12 +1780,17 @@ module Crystal next_char num = 0_u64 + num_size = 0 while true case next_char when '0' - num *= 2 + num = num << 1 + if num_size > 0 + num_size += 1 + end when '1' - num = num * 2 + 1 + num = (num << 1) + 1 + num_size += 1 when '_' # Nothing else @@ -1793,6 +1798,9 @@ module Crystal end end + if num_size > 64 + raise_value_doesnt_fit_in "UInt64", string_range_from_pool(start), start + end finish_scan_prefixed_number num, negative, start end @@ -1800,17 +1808,28 @@ module Crystal next_char num = 0_u64 - + num_size = 0 while true char = next_char if '0' <= char <= '7' - num = num * 8 + (char - '0') + num = (num << 3) | (char - '0') + # First digit, if it's 2 through 9, adds a fake increment to the size. + if num_size == 0 && char > '1' + num_size += 1 + end + if num_size > 0 || char != '0' + num_size += 1 + end elsif char == '_' else break end end + # 0o177777_77777777_77777777 is the largest UInt64. + if num_size > 22 # or > 21 with first digit being 2 through 9 + raise_value_doesnt_fit_in "UInt64", string_range_from_pool(start), start + end finish_scan_prefixed_number num, negative, start end @@ -1818,19 +1837,27 @@ module Crystal next_char num = 0_u64 + num_size = 0 while true char = next_char if char == '_' else hex_value = char_to_hex(char) { nil } if hex_value - num = num * 16 + hex_value + num = (num << 4) | hex_value + if num_size > 0 || char != '0' + num_size += 1 + end else break end end end + # 0xFFFF_FFFF_FFFF_FFFF is the longest UInt64. + if num_size > 16 + raise_value_doesnt_fit_in "UInt64", string_range_from_pool(start), start + end finish_scan_prefixed_number num, negative, start end From a8da279ddddfbc0b0f6d241cc5d2700355b0bdce Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 19 Nov 2021 16:09:15 +0100 Subject: [PATCH 2/5] Take the size suffix into account --- spec/compiler/lexer/lexer_spec.cr | 10 ++++---- src/compiler/crystal/syntax/lexer.cr | 35 ++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/spec/compiler/lexer/lexer_spec.cr b/spec/compiler/lexer/lexer_spec.cr index fdc22a97551d..84ff74b4b7cf 100644 --- a/spec/compiler/lexer/lexer_spec.cr +++ b/spec/compiler/lexer/lexer_spec.cr @@ -340,14 +340,14 @@ describe "Lexer" do it_lexes_u64 [["0xffff_ffff_ffff_ffff", "18446744073709551615"], ["0o177777_77777777_77777777", "18446744073709551615"], ["0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111", "18446744073709551615"]] it_lexes_u64 [["0x00ffffffffffffffff", "18446744073709551615"], ["0o001777777777777777777777", "18446744073709551615"], ["0b001111111111111111111111111111111111111111111111111111111111111111", "18446744073709551615"]] # 2**64 - assert_syntax_error "0x10000_0000_0000_0000", "0x10000_0000_0000_0000 doesn't fit in an UInt64" - assert_syntax_error "0o200000_00000000_00000000", "0o200000_00000000_00000000 doesn't fit in an UInt64" - assert_syntax_error "0b100000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000", "0b100000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000 doesn't fit in an UInt64" + assert_syntax_error "0x10000_0000_0000_0000", "0x10000000000000000 doesn't fit in an UInt64" + assert_syntax_error "0o200000_00000000_00000000", "0o2000000000000000000000 doesn't fit in an UInt64" + assert_syntax_error "0b100000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000", "0b10000000000000000000000000000000000000000000000000000000000000000 doesn't fit in an UInt64" # Very large assert_syntax_error "0x1afafafafafafafafafafaf", "0x1afafafafafafafafafafaf doesn't fit in an UInt64" - assert_syntax_error "0x1afafafafafafafafafafafi32", "0x1afafafafafafafafafafaf doesn't fit in an UInt64" + assert_syntax_error "0x1afafafafafafafafafafafi32", "0x1afafafafafafafafafafaf doesn't fit in an Int32" assert_syntax_error "0o1234567123456712345671234567", "0o1234567123456712345671234567 doesn't fit in an UInt64" - assert_syntax_error "0o12345671234567_12345671234567_i8", "0o12345671234567_12345671234567_ doesn't fit in an UInt64" + assert_syntax_error "0o12345671234567_12345671234567_i8", "0o1234567123456712345671234567 doesn't fit in an Int8" assert_syntax_error "0b100000000000000000000000000000000000000000000000000000000000000000", "0b100000000000000000000000000000000000000000000000000000000000000000 doesn't fit in an UInt64" it_lexes_i64 [["0o700000000000000000000", "8070450532247928832"]] diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 1f36b7832628..e4deed443fee 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1798,9 +1798,7 @@ module Crystal end end - if num_size > 64 - raise_value_doesnt_fit_in "UInt64", string_range_from_pool(start), start - end + num = nil if num_size > 64 finish_scan_prefixed_number num, negative, start end @@ -1827,9 +1825,7 @@ module Crystal end # 0o177777_77777777_77777777 is the largest UInt64. - if num_size > 22 # or > 21 with first digit being 2 through 9 - raise_value_doesnt_fit_in "UInt64", string_range_from_pool(start), start - end + num = nil if num_size > 22 # or > 21 with first digit being 2 through 9 finish_scan_prefixed_number num, negative, start end @@ -1855,13 +1851,32 @@ module Crystal end # 0xFFFF_FFFF_FFFF_FFFF is the longest UInt64. - if num_size > 16 - raise_value_doesnt_fit_in "UInt64", string_range_from_pool(start), start - end + num = nil if num_size > 16 finish_scan_prefixed_number num, negative, start end - def finish_scan_prefixed_number(num, negative, start) + def finish_scan_prefixed_number(num : Int?, negative : Bool, start : Int32) + if num.nil? # Doesn't even fit in UInt64 + string_value = string_range_from_pool(start).gsub("_", "") + case current_char + when 'i' + consume_int_suffix + when 'u' + consume_uint_suffix + else + @token.number_kind = :u64 + end + case @token.number_kind + when :i8, :i16, :i32, :i64, :i128 + type_name = "Int" + @token.number_kind.to_s[1..] + when :u8, :u16, :u32, :u64, :u128 + type_name = "UInt" + @token.number_kind.to_s[1..] + else + raise "BUG: Expecting an integer token, got #{@token.number_kind}" + end + raise_value_doesnt_fit_in type_name, string_value, start + end + if negative string_value = (num.to_i64 * -1).to_s else From 4883093873273fc19b73cd56b40aaf6c1c601632 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Fri, 19 Nov 2021 18:07:17 +0100 Subject: [PATCH 3/5] Octal digits go only up to 7... --- src/compiler/crystal/syntax/lexer.cr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index e4deed443fee..404b5e0155a0 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1811,7 +1811,7 @@ module Crystal char = next_char if '0' <= char <= '7' num = (num << 3) | (char - '0') - # First digit, if it's 2 through 9, adds a fake increment to the size. + # First digit, if it's 2 through 7, adds a fake increment to the size. if num_size == 0 && char > '1' num_size += 1 end @@ -1825,7 +1825,7 @@ module Crystal end # 0o177777_77777777_77777777 is the largest UInt64. - num = nil if num_size > 22 # or > 21 with first digit being 2 through 9 + num = nil if num_size > 22 # or > 21 with first digit being 2 through 7 finish_scan_prefixed_number num, negative, start end From e28fb57026a63045ad59972b036c61abfb2d5251 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Sat, 20 Nov 2021 10:09:09 +0100 Subject: [PATCH 4/5] Preserve the original literal, with suffix --- spec/compiler/lexer/lexer_spec.cr | 10 +++++----- src/compiler/crystal/syntax/lexer.cr | 3 +-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/spec/compiler/lexer/lexer_spec.cr b/spec/compiler/lexer/lexer_spec.cr index 84ff74b4b7cf..e549d823261c 100644 --- a/spec/compiler/lexer/lexer_spec.cr +++ b/spec/compiler/lexer/lexer_spec.cr @@ -340,14 +340,14 @@ describe "Lexer" do it_lexes_u64 [["0xffff_ffff_ffff_ffff", "18446744073709551615"], ["0o177777_77777777_77777777", "18446744073709551615"], ["0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111", "18446744073709551615"]] it_lexes_u64 [["0x00ffffffffffffffff", "18446744073709551615"], ["0o001777777777777777777777", "18446744073709551615"], ["0b001111111111111111111111111111111111111111111111111111111111111111", "18446744073709551615"]] # 2**64 - assert_syntax_error "0x10000_0000_0000_0000", "0x10000000000000000 doesn't fit in an UInt64" - assert_syntax_error "0o200000_00000000_00000000", "0o2000000000000000000000 doesn't fit in an UInt64" - assert_syntax_error "0b100000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000", "0b10000000000000000000000000000000000000000000000000000000000000000 doesn't fit in an UInt64" + assert_syntax_error "0x10000_0000_0000_0000", "0x10000_0000_0000_0000 doesn't fit in an UInt64" + assert_syntax_error "0o200000_00000000_00000000", "0o200000_00000000_00000000 doesn't fit in an UInt64" + assert_syntax_error "0b100000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000", "0b100000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000 doesn't fit in an UInt64" # Very large assert_syntax_error "0x1afafafafafafafafafafaf", "0x1afafafafafafafafafafaf doesn't fit in an UInt64" - assert_syntax_error "0x1afafafafafafafafafafafi32", "0x1afafafafafafafafafafaf doesn't fit in an Int32" + assert_syntax_error "0x1afafafafafafafafafafafi32", "0x1afafafafafafafafafafafi32 doesn't fit in an Int32" assert_syntax_error "0o1234567123456712345671234567", "0o1234567123456712345671234567 doesn't fit in an UInt64" - assert_syntax_error "0o12345671234567_12345671234567_i8", "0o1234567123456712345671234567 doesn't fit in an Int8" + assert_syntax_error "0o12345671234567_12345671234567_i8", "0o12345671234567_12345671234567_i8 doesn't fit in an Int8" assert_syntax_error "0b100000000000000000000000000000000000000000000000000000000000000000", "0b100000000000000000000000000000000000000000000000000000000000000000 doesn't fit in an UInt64" it_lexes_i64 [["0o700000000000000000000", "8070450532247928832"]] diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 404b5e0155a0..f02a3a6e526e 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1857,7 +1857,6 @@ module Crystal def finish_scan_prefixed_number(num : Int?, negative : Bool, start : Int32) if num.nil? # Doesn't even fit in UInt64 - string_value = string_range_from_pool(start).gsub("_", "") case current_char when 'i' consume_int_suffix @@ -1874,7 +1873,7 @@ module Crystal else raise "BUG: Expecting an integer token, got #{@token.number_kind}" end - raise_value_doesnt_fit_in type_name, string_value, start + raise_value_doesnt_fit_in type_name, string_range_from_pool(start), start end if negative From 157bf29c5f701fee154aabaccaa5319919b0490a Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Sat, 20 Nov 2021 10:59:35 +0100 Subject: [PATCH 5/5] Refactor the check for first digit --- src/compiler/crystal/syntax/lexer.cr | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index f02a3a6e526e..85d45ca68d3e 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1806,16 +1806,15 @@ module Crystal next_char num = 0_u64 - num_size = 0 + num_size = first_digit = 0 while true char = next_char if '0' <= char <= '7' num = (num << 3) | (char - '0') - # First digit, if it's 2 through 7, adds a fake increment to the size. - if num_size == 0 && char > '1' - num_size += 1 - end - if num_size > 0 || char != '0' + if num_size == 0 + first_digit = num + num_size += 1 if char != '0' + else num_size += 1 end elsif char == '_' @@ -1825,7 +1824,7 @@ module Crystal end # 0o177777_77777777_77777777 is the largest UInt64. - num = nil if num_size > 22 # or > 21 with first digit being 2 through 7 + num = nil if {num_size, first_digit} > {22, 0o1} finish_scan_prefixed_number num, negative, start end