From fdb52dc7c1269e1cf23269cdd112293fee077ee8 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Fri, 10 Sep 2021 00:04:41 +0200 Subject: [PATCH 01/18] Add int128 parsing, different compiler_rt methods and specs --- spec/compiler/codegen/arithmetics_spec.cr | 21 +- spec/compiler/lexer/lexer_spec.cr | 20 +- .../std/crystal/compiler_rt/divmod128_spec.cr | 88 ++++ spec/std/crystal/compiler_rt/mulosi4_spec.cr | 76 ++++ spec/std/crystal/compiler_rt/muloti4_spec.cr | 151 +++++++ spec/std/int_spec.cr | 58 ++- spec/std/string_spec.cr | 24 + src/compiler/crystal/codegen/codegen.cr | 6 +- src/compiler/crystal/codegen/const.cr | 22 +- src/compiler/crystal/syntax/ast.cr | 18 +- src/compiler/crystal/syntax/lexer.cr | 423 ++++++------------ src/crystal/compiler_rt.cr | 3 +- src/crystal/compiler_rt/divmod128.cr | 205 +++++++++ src/crystal/compiler_rt/mul.cr | 41 ++ src/crystal/compiler_rt/mulodi4.cr | 37 -- src/string.cr | 32 ++ 16 files changed, 828 insertions(+), 397 deletions(-) create mode 100644 spec/std/crystal/compiler_rt/divmod128_spec.cr create mode 100644 spec/std/crystal/compiler_rt/mulosi4_spec.cr create mode 100644 spec/std/crystal/compiler_rt/muloti4_spec.cr create mode 100644 src/crystal/compiler_rt/divmod128.cr create mode 100644 src/crystal/compiler_rt/mul.cr delete mode 100644 src/crystal/compiler_rt/mulodi4.cr diff --git a/spec/compiler/codegen/arithmetics_spec.cr b/spec/compiler/codegen/arithmetics_spec.cr index adc1ccd32ca8..7fc1d2cfb406 100644 --- a/spec/compiler/codegen/arithmetics_spec.cr +++ b/spec/compiler/codegen/arithmetics_spec.cr @@ -1,21 +1,10 @@ require "../../spec_helper" -{% if flag?(:darwin) %} - SupportedInts = [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128] - SupportedIntsConversions = { - to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, to_i128: Int128, - to_u8: UInt8, to_u16: UInt16, to_u32: UInt32, to_u64: UInt64, to_u128: UInt128, - } -{% else %} - # Skip Int128 and UInt128 on linux platforms due to compiler-rt dependency. - # PreviewOverflowFlags includes compiler_rt flag to support Int64 overflow - # detection in 32 bits platforms. - SupportedInts = [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64] - SupportedIntsConversions = { - to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, - to_u8: UInt8, to_u16: UInt16, to_u32: UInt32, to_u64: UInt64, - } -{% end %} +SupportedInts = [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128] +SupportedIntsConversions = { + to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, to_i128: Int128, + to_u8: UInt8, to_u16: UInt16, to_u32: UInt32, to_u64: UInt64, to_u128: UInt128, +} describe "Code gen: arithmetic primitives" do describe "&+ addition" do diff --git a/spec/compiler/lexer/lexer_spec.cr b/spec/compiler/lexer/lexer_spec.cr index 30910c406354..f24c8089e6c4 100644 --- a/spec/compiler/lexer/lexer_spec.cr +++ b/spec/compiler/lexer/lexer_spec.cr @@ -299,16 +299,16 @@ describe "Lexer" do assert_syntax_error "18446744073709551616_i32", "18446744073709551616 doesn't fit in an Int32" assert_syntax_error "9999999999999999999_i32", "9999999999999999999 doesn't fit in an Int32" - assert_syntax_error "-9999999999999999999", "-9999999999999999999 doesn't fit in an Int64" - assert_syntax_error "-99999999999999999999", "-99999999999999999999 doesn't fit in an Int64" - assert_syntax_error "-11111111111111111111", "-11111111111111111111 doesn't fit in an Int64" - assert_syntax_error "-9223372036854775809", "-9223372036854775809 doesn't fit in an Int64" - assert_syntax_error "18446744073709551616", "18446744073709551616 doesn't fit in an UInt64" - - assert_syntax_error "9223372036854775808_i128", "9223372036854775808 doesn't fit in an Int64. Int128 literals that don't fit in an Int64 are currently not supported" - assert_syntax_error "-9223372036854775809_i128", "-9223372036854775809 doesn't fit in an Int64. Int128 literals that don't fit in an Int64 are currently not supported" - assert_syntax_error "118446744073709551616_u128", "118446744073709551616 doesn't fit in an UInt64. UInt128 literals that don't fit in an UInt64 are currently not supported" - assert_syntax_error "18446744073709551616_u128", "18446744073709551616 doesn't fit in an UInt64. UInt128 literals that don't fit in an UInt64 are currently not supported" + assert_syntax_error "-9999999999999999999_i64", "-9999999999999999999 doesn't fit in an Int64" + assert_syntax_error "-99999999999999999999_i64", "-99999999999999999999 doesn't fit in an Int64" + assert_syntax_error "-11111111111111111111_i64", "-11111111111111111111 doesn't fit in an Int64" + assert_syntax_error "-9223372036854775809_i64", "-9223372036854775809 doesn't fit in an Int64" + assert_syntax_error "18446744073709551616_u64", "18446744073709551616 doesn't fit in an UInt64" + + assert_syntax_error "340282366920938463463374607431768211456", "340282366920938463463374607431768211456 doesn't fit in an UInt128" + assert_syntax_error "-170141183460469231731687303715884105729", "-170141183460469231731687303715884105729 doesn't fit in an Int128" + assert_syntax_error "-999999999999999999999999999999999999999", "-999999999999999999999999999999999999999 doesn't fit in an Int128" + assert_syntax_error "-1_u128", "Invalid negative value -1 for UInt128" assert_syntax_error "0xFF_i8", "255 doesn't fit in an Int8" diff --git a/spec/std/crystal/compiler_rt/divmod128_spec.cr b/spec/std/crystal/compiler_rt/divmod128_spec.cr new file mode 100644 index 000000000000..7e4b953ce0df --- /dev/null +++ b/spec/std/crystal/compiler_rt/divmod128_spec.cr @@ -0,0 +1,88 @@ +require "spec" + +# TODO: Remove these helpers in PR part 2 + +private def make_ti(a : Int128, b : Int128) + (a << 64) + b +end +private def make_tu(a : UInt128, b : UInt128) + (a << 64) + b +end + +# Specs ported from compiler-rt + +private def test__divti3(a : Int128, b : Int128, expected : Int128, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual = __divti3(a, b) + actual.should eq(expected), file: file, line: line + end +end + +private def test__modti3(a : Int128, b : Int128, expected : Int128, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual = __modti3(a, b) + actual.should eq(expected), file: file, line: line + end +end + +private def test__udivti3(a : UInt128, b : UInt128, expected : UInt128, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual = __udivti3(a, b) + actual.should eq(expected), file: file, line: line + end +end + +private def test__umodti3(a : UInt128, b : UInt128, expected : UInt128, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual = __umodti3(a, b) + actual.should eq(expected), file: file, line: line + end +end + +describe "__divti3" do + test__divti3(0, 1, 0) + test__divti3(0, -1, 0) + test__divti3(2, 1, 2) + test__divti3(2, -1, -2) + test__divti3(-2, 1, -2) + test__divti3(-2, -1, 2) + test__divti3(make_ti(-9223372036854775808, 0x0), 1, make_ti(-9223372036854775808, 0x0)) + test__divti3(make_ti(-9223372036854775808, 0x0), -1, make_ti(-9223372036854775808, 0x0)) + test__divti3(make_ti(-9223372036854775808, 0x0), -2, make_ti(0x4000000000000000, 0x0)) + test__divti3(make_ti(-9223372036854775808, 0x0), 2, make_ti(-0x4000000000000000, 0x0)) +end + +describe "__modti3" do + test__modti3(0, 1, 0) + test__modti3(0, -1, 0) + + test__modti3(5, 3, 2) + test__modti3(5, -3, 2) + test__modti3(-5, 3, -2) + test__modti3(-5, -3, -2) + + test__modti3(make_ti(-9223372036854775808, 0x0), 1, 0) + test__modti3(make_ti(-9223372036854775808, 0x0), -1, 0) + test__modti3(make_ti(-9223372036854775808, 0x0), 2, 0) + test__modti3(make_ti(-9223372036854775808, 0x0), -2, 0) + test__modti3(make_ti(-9223372036854775808, 0x0), 3, -2) + test__modti3(make_ti(-9223372036854775808, 0x0), -3, -2) +end + +describe "__udivti3" do + test__udivti3(0, 1, 0) + test__udivti3(2, 1, 2) + + test__udivti3(make_tu(0x0, 0x8000000000000000), 1, make_tu(0x0, 0x8000000000000000)) + test__udivti3(make_tu(0x0, 0x8000000000000000), 2, make_tu(0x0, 0x4000000000000000)) + test__udivti3(make_tu(0xffffffffffffffff, 0xffffffffffffffff), 2, make_tu(0x7fffffffffffffff, 0xffffffffffffffff)) +end + +describe "__umodti3" do + test__umodti3(0, 1, 0) + test__umodti3(2, 1, 0) + + test__umodti3(make_tu(0x0, 0x8000000000000000), 1, 0) + test__umodti3(make_tu(0x0, 0x8000000000000000), 2, 0) + test__umodti3(make_tu(0xffffffffffffffff, 0xffffffffffffffff), 2, 1) +end diff --git a/spec/std/crystal/compiler_rt/mulosi4_spec.cr b/spec/std/crystal/compiler_rt/mulosi4_spec.cr new file mode 100644 index 000000000000..cf08b0612598 --- /dev/null +++ b/spec/std/crystal/compiler_rt/mulosi4_spec.cr @@ -0,0 +1,76 @@ +require "spec" + +# Ported from compiler-rt:test/builtins/Unit/mulosi4_test.c + +private def test__mulosi4(a : Int32, b : Int32, expected : Int32, expected_overflow : Int32, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual_overflow : Int32 = 0 + actual = __mulosi4(a, b, pointerof(actual_overflow)) + actual_overflow.should eq(expected_overflow), file: file, line: line + if !expected_overflow + actual.should eq(expected), file: file, line: line + end + end +end + +describe "__mulosi4" do + test__mulosi4(0, 0, 0, 0) + test__mulosi4(0, 1, 0, 0) + test__mulosi4(1, 0, 0, 0) + test__mulosi4(0, 10, 0, 0) + test__mulosi4(10, 0, 0, 0) + test__mulosi4(0, 0x1234567, 0, 0) + test__mulosi4(0x1234567, 0, 0, 0) + + test__mulosi4(0, -1, 0, 0) + test__mulosi4(-1, 0, 0, 0) + test__mulosi4(0, -10, 0, 0) + test__mulosi4(-10, 0, 0, 0) + test__mulosi4(0, 0x1234567, 0, 0) + test__mulosi4(0x1234567, 0, 0, 0) + + test__mulosi4(1, 1, 1, 0) + test__mulosi4(1, 10, 10, 0) + test__mulosi4(10, 1, 10, 0) + test__mulosi4(1, 0x1234567, 0x1234567, 0) + test__mulosi4(0x1234567, 1, 0x1234567, 0) + + test__mulosi4(1, -1, -1, 0) + test__mulosi4(1, -10, -10, 0) + test__mulosi4(-10, 1, -10, 0) + test__mulosi4(1, -0x1234567, -0x1234567, 0) + test__mulosi4(-0x1234567, 1, -0x1234567, 0) + + test__mulosi4(0x7FFFFFFF, -2, -0x7fffffff, 1) + test__mulosi4(-2, 0x7FFFFFFF, -0x7fffffff, 1) + test__mulosi4(0x7FFFFFFF, -1, -0x7fffffff, 0) + test__mulosi4(-1, 0x7FFFFFFF, -0x7fffffff, 0) + test__mulosi4(0x7FFFFFFF, 0, 0, 0) + test__mulosi4(0, 0x7FFFFFFF, 0, 0) + test__mulosi4(0x7FFFFFFF, 1, 0x7FFFFFFF, 0) + test__mulosi4(1, 0x7FFFFFFF, 0x7FFFFFFF, 0) + test__mulosi4(0x7FFFFFFF, 2, -0x7fffffff, 1) + test__mulosi4(2, 0x7FFFFFFF, -0x7fffffff, 1) + + test__mulosi4(-0x80000000, -2, -0x80000000, 1) + test__mulosi4(-2, -0x80000000, -0x80000000, 1) + test__mulosi4(-0x80000000, -1, -0x80000000, 1) + test__mulosi4(-1, -0x80000000, -0x80000000, 1) + test__mulosi4(-0x80000000, 0, 0, 0) + test__mulosi4(0, -0x80000000, 0, 0) + test__mulosi4(-0x80000000, 1, -0x80000000, 0) + test__mulosi4(1, -0x80000000, -0x80000000, 0) + test__mulosi4(-0x80000000, 2, -0x80000000, 1) + test__mulosi4(2, -0x80000000, -0x80000000, 1) + + test__mulosi4(-0x7fffffff, -2, -0x7fffffff, 1) + test__mulosi4(-2, -0x7fffffff, -0x7fffffff, 1) + test__mulosi4(-0x7fffffff, -1, 0x7FFFFFFF, 0) + test__mulosi4(-1, -0x7fffffff, 0x7FFFFFFF, 0) + test__mulosi4(-0x7fffffff, 0, 0, 0) + test__mulosi4(0, -0x7fffffff, 0, 0) + test__mulosi4(-0x7fffffff, 1, -0x7fffffff, 0) + test__mulosi4(1, -0x7fffffff, -0x7fffffff, 0) + test__mulosi4(-0x7fffffff, 2, -0x80000000, 1) + test__mulosi4(2, -0x7fffffff, -0x80000000, 1) +end diff --git a/spec/std/crystal/compiler_rt/muloti4_spec.cr b/spec/std/crystal/compiler_rt/muloti4_spec.cr new file mode 100644 index 000000000000..08d09722c18b --- /dev/null +++ b/spec/std/crystal/compiler_rt/muloti4_spec.cr @@ -0,0 +1,151 @@ +require "spec" + +# Ported from compiler-rt:test/builtins/Unit/muloti4_test.c + +private def test__muloti4(a : Int128, b : Int128, expected : Int128, expected_overflow : Int32, file = __FILE__, line = __LINE__) + it "passes compiler-rt builtins unit tests" do + actual_overflow : Int32 = 0 + actual = __muloti4(a, b, pointerof(actual_overflow)) + actual_overflow.should eq(expected_overflow), file: file, line: line + if !expected_overflow + actual.should eq(expected), file: file, line: line + end + end +end + +# TODO: Remove this helper in PR part 2 + +private def make_ti(a : Int128, b : Int128) + (a << 64) + b +end + +describe "__muloti4" do + test__muloti4(0, 0, 0, 0) + test__muloti4(0, 1, 0, 0) + test__muloti4(1, 0, 0, 0) + test__muloti4(0, 10, 0, 0) + test__muloti4(10, 0, 0, 0) + test__muloti4(0, 81985529216486895, 0, 0) + test__muloti4(81985529216486895, 0, 0, 0) + test__muloti4(0, -1, 0, 0) + test__muloti4(-1, 0, 0, 0) + test__muloti4(0, -10, 0, 0) + test__muloti4(-10, 0, 0, 0) + test__muloti4(0, -81985529216486895, 0, 0) + test__muloti4(-81985529216486895, 0, 0, 0) + test__muloti4(1, 1, 1, 0) + test__muloti4(1, 10, 10, 0) + test__muloti4(10, 1, 10, 0) + test__muloti4(1, 81985529216486895, 81985529216486895, 0) + test__muloti4(81985529216486895, 1, 81985529216486895, 0) + test__muloti4(1, -1, -1, 0) + test__muloti4(1, -10, -10, 0) + test__muloti4(-10, 1, -10, 0) + test__muloti4(1, -81985529216486895, -81985529216486895, 0) + test__muloti4(-81985529216486895, 1, -81985529216486895, 0) + test__muloti4(3037000499, 3037000499, 9223372030926249001, 0) + test__muloti4(-3037000499, 3037000499, -9223372030926249001, 0) + test__muloti4(3037000499, -3037000499, -9223372030926249001, 0) + test__muloti4(-3037000499, -3037000499, 9223372030926249001, 0) + test__muloti4(4398046511103, 2097152, 9223372036852678656, 0) + test__muloti4(-4398046511103, 2097152, -9223372036852678656, 0) + test__muloti4(4398046511103, -2097152, -9223372036852678656, 0) + test__muloti4(-4398046511103, -2097152, 9223372036852678656, 0) + test__muloti4(2097152, 4398046511103, 9223372036852678656, 0) + test__muloti4(-2097152, 4398046511103, -9223372036852678656, 0) + test__muloti4(2097152, -4398046511103, -9223372036852678656, 0) + test__muloti4(-2097152, -4398046511103, 9223372036852678656, 0) + test__muloti4(make_ti(0x00000000000000B5, 0x04F333F9DE5BE000), + make_ti(0x0000000000000000, 0x00B504F333F9DE5B), + make_ti(0x7FFFFFFFFFFFF328, 0xDF915DA296E8A000), 0) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + -2, + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(-2, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + -1, + make_ti(0x8000000000000000, 0x0000000000000001), 0) + test__muloti4(-1, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + make_ti(0x8000000000000000, 0x0000000000000001), 0) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + 0, + 0, 0) + test__muloti4(0, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + 0, 0) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + 1, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0) + test__muloti4(1, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0) + test__muloti4(make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + 2, + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(2, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + -2, + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(-2, + make_ti(0x8000000000000000, 0x0000000000000000), + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + -1, + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(-1, + make_ti(0x8000000000000000, 0x0000000000000000), + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + 0, + 0, 0) + test__muloti4(0, + make_ti(0x8000000000000000, 0x0000000000000000), + 0, 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + 1, + make_ti(0x8000000000000000, 0x0000000000000000), 0) + test__muloti4(1, + make_ti(0x8000000000000000, 0x0000000000000000), + make_ti(0x8000000000000000, 0x0000000000000000), 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000000), + 2, + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(2, + make_ti(0x8000000000000000, 0x0000000000000000), + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + -2, + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(-2, + make_ti(0x8000000000000000, 0x0000000000000001), + make_ti(0x8000000000000000, 0x0000000000000001), 1) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + -1, + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0) + test__muloti4(-1, + make_ti(0x8000000000000000, 0x0000000000000001), + make_ti(0x7FFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + 0, + 0, 0) + test__muloti4(0, + make_ti(0x8000000000000000, 0x0000000000000001), + 0, 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + 1, + make_ti(0x8000000000000000, 0x0000000000000001), 0) + test__muloti4(1, + make_ti(0x8000000000000000, 0x0000000000000001), + make_ti(0x8000000000000000, 0x0000000000000001), 0) + test__muloti4(make_ti(0x8000000000000000, 0x0000000000000001), + 2, + make_ti(0x8000000000000000, 0x0000000000000000), 1) + test__muloti4(2, + make_ti(0x8000000000000000, 0x0000000000000001), + make_ti(0x8000000000000000, 0x0000000000000000), 1) +end diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index e2aed420ed7d..8520672fa79e 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -430,6 +430,12 @@ describe "Int" do Int64.new(" 1 ", whitespace: false) end + Int128.new("1").should be_a(Int128) + Int128.new("1").should eq(1) + expect_raises ArgumentError do + Int128.new(" 1 ", whitespace: false) + end + UInt8.new("1").should be_a(UInt8) UInt8.new("1").should eq(1) expect_raises ArgumentError do @@ -453,6 +459,12 @@ describe "Int" do expect_raises ArgumentError do UInt64.new(" 1 ", whitespace: false) end + + UInt128.new("1").should be_a(UInt128) + UInt128.new("1").should eq(1) + expect_raises ArgumentError do + UInt128.new(" 1 ", whitespace: false) + end end it "fallback overload" do @@ -468,6 +480,9 @@ describe "Int" do Int64.new(1).should be_a(Int64) Int64.new(1).should eq(1) + Int128.new(1).should be_a(Int128) + Int128.new(1).should eq(1) + UInt8.new(1).should be_a(UInt8) UInt8.new(1).should eq(1) @@ -479,6 +494,9 @@ describe "Int" do UInt64.new(1).should be_a(UInt64) UInt64.new(1).should eq(1) + + UInt128.new(1).should be_a(UInt128) + UInt128.new(1).should eq(1) end end @@ -504,6 +522,7 @@ describe "Int" do (Int16::MIN / -1).should eq(-(Int16::MIN.to_f64)) (Int32::MIN / -1).should eq(-(Int32::MIN.to_f64)) (Int64::MIN / -1).should eq(-(Int64::MIN.to_f64)) + (Int128::MIN / -1).should eq(-(Int128::MIN.to_f64)) (UInt8::MIN / -1).should eq(0) end @@ -511,7 +530,7 @@ describe "Int" do describe "floor division //" do it "preserves type of lhs" do - {% for type in [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64] %} + {% for type in [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128] %} ({{type}}.new(7) // 2).should be_a({{type}}) ({{type}}.new(7) // 2.0).should be_a({{type}}) ({{type}}.new(7) // 2.0_f32).should be_a({{type}}) @@ -558,6 +577,7 @@ describe "Int" do expect_raises(ArgumentError) { Int16::MIN // -1 } expect_raises(ArgumentError) { Int32::MIN // -1 } expect_raises(ArgumentError) { Int64::MIN // -1 } + expect_raises(ArgumentError) { Int128::MIN // -1 } (UInt8::MIN // -1).should eq(0) end @@ -583,8 +603,8 @@ describe "Int" do end it "returns 0 when doing IntN::MIN % -1 (#8306)" do - {% for n in [8, 16, 32, 64] %} - (Int{{n}}::MIN % -1_i{{n}}).should eq(0) + {% for n in [8, 16, 32, 64, 128] %} + (Int{{n}}::MIN % -1.to_i{{n}}).should eq(0) {% end %} end @@ -597,8 +617,8 @@ describe "Int" do end it "returns 0 when doing IntN::MIN.remainder(-1) (#8306)" do - {% for n in [8, 16, 32, 64] %} - (Int{{n}}::MIN.remainder(-1_i{{n}})).should eq(0) + {% for n in [8, 16, 32, 64, 128] %} + (Int{{n}}::MIN.remainder(-1.to_i{{n}})).should eq(0) {% end %} end @@ -734,27 +754,32 @@ describe "Int" do it { 5_i64.popcount.should eq(2) } it { 9223372036854775807_i64.popcount.should eq(63) } it { 18446744073709551615_u64.popcount.should eq(64) } + + it { 0_i128.popcount.should eq(0) } + # PENDING + # it { 170141183460469231731687303715884105727_i128.popcount.should eq(127) } + # it { 340282366920938463463374607431768211455_u128.popcount.should eq(128) } end describe "#leading_zeros_count" do - {% for width in %w(8 16 32 64).map(&.id) %} - it { -1_i{{width}}.leading_zeros_count.should eq(0) } - it { 0_i{{width}}.leading_zeros_count.should eq({{width}}) } - it { 0_u{{width}}.leading_zeros_count.should eq({{width}}) } + {% for width in %w(8 16 32 64 128).map(&.id) %} + it { -1.to_i{{width}}.leading_zeros_count.should eq(0) } + it { 0.to_i{{width}}.leading_zeros_count.should eq({{width}}) } + it { 0.to_u{{width}}.leading_zeros_count.should eq({{width}}) } {% end %} end describe "#trailing_zeros_count" do - {% for width in %w(8 16 32 64).map(&.id) %} - it { -2_i{{width}}.trailing_zeros_count.should eq(1) } - it { 2_i{{width}}.trailing_zeros_count.should eq(1) } - it { 2_u{{width}}.trailing_zeros_count.should eq(1) } + {% for width in %w(8 16 32 64 128).map(&.id) %} + it { -2.to_i{{width}}.trailing_zeros_count.should eq(1) } + it { 2.to_i{{width}}.trailing_zeros_count.should eq(1) } + it { 2.to_u{{width}}.trailing_zeros_count.should eq(1) } {% end %} end pending_win32 "compares signed vs. unsigned integers" do - signed_ints = [Int8::MAX, Int16::MAX, Int32::MAX, Int64::MAX, Int8::MIN, Int16::MIN, Int32::MIN, Int64::MIN, 0_i8, 0_i16, 0_i32, 0_i64] - unsigned_ints = [UInt8::MAX, UInt16::MAX, UInt32::MAX, UInt64::MAX, 0_u8, 0_u16, 0_u32, 0_u64] + signed_ints = [Int8::MAX, Int16::MAX, Int32::MAX, Int64::MAX, Int128::MAX, Int8::MIN, Int16::MIN, Int32::MIN, Int64::MIN, Int128::MIN, 0_i8, 0_i16, 0_i32, 0_i64, 0_i128] + unsigned_ints = [UInt8::MAX, UInt16::MAX, UInt32::MAX, UInt64::MAX, UInt128::MAX, 0_u8, 0_u16, 0_u32, 0_u64, 0_u128] big_signed_ints = signed_ints.map &.to_big_i big_unsigned_ints = unsigned_ints.map &.to_big_i @@ -781,7 +806,7 @@ describe "Int" do end it "clones" do - [1_u8, 2_u16, 3_u32, 4_u64, 5_i8, 6_i16, 7_i32, 8_i64].each do |value| + [1_u8, 2_u16, 3_u32, 4_u64, 5.to_u128, 6_i8, 7_i16, 8_i32, 9_i64, 10.to_i128].each do |value| value.clone.should eq(value) end end @@ -839,6 +864,7 @@ describe "Int" do Int32::MAX.digits.should eq(Int32::MAX.to_s.chars.map(&.to_i).reverse) Int64::MAX.digits.should eq(Int64::MAX.to_s.chars.map(&.to_i).reverse) UInt64::MAX.digits.should eq(UInt64::MAX.to_s.chars.map(&.to_i).reverse) + UInt128::MAX.digits.should eq(UInt128::MAX.to_s.chars.map(&.to_i).reverse) end it "works for non-Int32" do diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 325b95a550dc..99bb6e9c92eb 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -396,6 +396,30 @@ describe "String" do it { "18446744073709551616".to_u64 { 0 }.should eq(0) } end + describe "to_i128" do + it { "170141183460469231731687303715884105727".to_i128.should eq(Int128::MAX) } + it { "-170141183460469231731687303715884105728".to_i128.should eq(Int128::MIN) } + it { expect_raises(ArgumentError) { "170141183460469231731687303715884105728".to_i128 } } + it { expect_raises(ArgumentError) { "-170141183460469231731687303715884105729".to_i128 } } + + it { "170141183460469231731687303715884105727".to_i128?.should eq(Int128::MAX) } + it { "170141183460469231731687303715884105728".to_i128?.should be_nil } + it { "170141183460469231731687303715884105728".to_i128 { 0 }.should eq(0) } + + it { expect_raises(ArgumentError) { "340282366920938463463374607431768211456".to_i128 } } + end + + describe "to_u128" do + it { "340282366920938463463374607431768211455".to_u128.should eq(UInt128::MAX) } + it { "0".to_u128.should eq(0) } + it { expect_raises(ArgumentError) { "340282366920938463463374607431768211456".to_u128 } } + it { expect_raises(ArgumentError) { "-1".to_u128 } } + + it { "340282366920938463463374607431768211455".to_u128?.should eq(UInt128::MAX) } + it { "340282366920938463463374607431768211456".to_u128?.should be_nil } + it { "340282366920938463463374607431768211456".to_u128 { 0 }.should eq(0) } + end + it { "1234".to_i32.should eq(1234) } it { "1234123412341234".to_i64.should eq(1234123412341234_i64) } it { "9223372036854775808".to_u64.should eq(9223372036854775808_u64) } diff --git a/src/compiler/crystal/codegen/codegen.cr b/src/compiler/crystal/codegen/codegen.cr index 7756fcb9c3b8..d0539c79bb01 100644 --- a/src/compiler/crystal/codegen/codegen.cr +++ b/src/compiler/crystal/codegen/codegen.cr @@ -475,11 +475,9 @@ module Crystal when :u64 @last = int64(node.value.to_u64) when :i128 - # TODO: implement String#to_i128 and use it - @last = int128(node.value.to_i64) + @last = int128(node.value.to_i128) when :u128 - # TODO: implement String#to_u128 and use it - @last = int128(node.value.to_u64) + @last = int128(node.value.to_u128) when :f32 @last = float32(node.value) when :f64 diff --git a/src/compiler/crystal/codegen/const.cr b/src/compiler/crystal/codegen/const.cr index 80b91f2b7170..bcab40e1bda6 100644 --- a/src/compiler/crystal/codegen/const.cr +++ b/src/compiler/crystal/codegen/const.cr @@ -194,16 +194,18 @@ class Crystal::CodeGenVisitor # We inline constants. Otherwise we use an LLVM const global. @last = case value = const.compile_time_value - when Bool then int1(value ? 1 : 0) - when Char then int32(value.ord) - when Int8 then int8(value) - when Int16 then int16(value) - when Int32 then int32(value) - when Int64 then int64(value) - when UInt8 then int8(value) - when UInt16 then int16(value) - when UInt32 then int32(value) - when UInt64 then int64(value) + when Bool then int1(value ? 1 : 0) + when Char then int32(value.ord) + when Int8 then int8(value) + when Int16 then int16(value) + when Int32 then int32(value) + when Int64 then int64(value) + when Int128 then int128(value) + when UInt8 then int8(value) + when UInt16 then int16(value) + when UInt32 then int32(value) + when UInt64 then int64(value) + when UInt128 then int128(value) else last = read_const_pointer(const) to_lhs last, const.value.type diff --git a/src/compiler/crystal/syntax/ast.cr b/src/compiler/crystal/syntax/ast.cr index a52eebe6da9b..8a4ced97bfce 100644 --- a/src/compiler/crystal/syntax/ast.cr +++ b/src/compiler/crystal/syntax/ast.cr @@ -249,14 +249,16 @@ module Crystal def integer_value case kind - when :i8 then value.to_i8 - when :i16 then value.to_i16 - when :i32 then value.to_i32 - when :i64 then value.to_i64 - when :u8 then value.to_u8 - when :u16 then value.to_u16 - when :u32 then value.to_u32 - when :u64 then value.to_u64 + when :i8 then value.to_i8 + when :i16 then value.to_i16 + when :i32 then value.to_i32 + when :i64 then value.to_i64 + when :i128 then value.to_i128 + when :u8 then value.to_u8 + when :u16 then value.to_u16 + when :u32 then value.to_u32 + when :u64 then value.to_u64 + when :u128 then value.to_u128 else raise "Bug: called 'integer_value' for non-integer literal" end diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 5a7f9fa2d956..45c8c7d9d7c7 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1558,7 +1558,7 @@ module Crystal end_pos = current_pos - suffix_size if end_pos - start == 1 - # For numbers such as 0, 1, 2, 3, etc., we use a string from the poll + # For numbers such as 0, 1, 2, 3, etc., we use a string from the pool string_value = string_range_from_pool(start, end_pos) else string_value = string_range(start, end_pos) @@ -1580,153 +1580,68 @@ module Crystal set_token_raw_from_start(start) end - macro gen_check_int_fits_in_size(type, method, size, *, actual_type = nil) - if num_size >= 20 - {% if actual_type.nil? %} - raise_value_doesnt_fit_in "{{type}}", string_value, start - {% else %} - raise_value_restricted_by "{{actual_type}}", "{{type}}", string_value, start - {% end %} - end - if num_size >= {{size}} - int_value = absolute_integer_value(string_value, negative) - max = {{type}}::MAX.{{method}} - max += 1 if negative - - if int_value > max - {% if actual_type.nil? %} - raise_value_doesnt_fit_in "{{type}}", string_value, start - {% else %} - raise_value_restricted_by "{{actual_type}}", "{{type}}", string_value, start - {% end %} + macro gen_check_int_fits_in_size(unsigned, type, method, size) + {% if unsigned %} + if negative + raise "Invalid negative value #{string_value} for {{type}}" end - end - end + {% end %} - macro gen_check_uint_fits_in_size(type, size) - if negative - raise "Invalid negative value #{string_value} for {{type}}" - end - if num_size >= 20 + if num_size > {{size}} || (num_size == {{size}} && string_value.to_{{method.id}}? == nil) raise_value_doesnt_fit_in "{{type}}", string_value, start end - if num_size >= {{size}} - int_value = absolute_integer_value(string_value, negative) - if int_value > {{type}}::MAX - raise_value_doesnt_fit_in "{{type}}", string_value, start - end - end end def check_integer_literal_fits_in_size(string_value, num_size, negative, start) case @token.number_kind when :i8 - gen_check_int_fits_in_size Int8, to_u8, 3 + gen_check_int_fits_in_size false, Int8, i8, 3 when :u8 - gen_check_uint_fits_in_size UInt8, 3 + gen_check_int_fits_in_size true, UInt8, u8, 3 when :i16 - gen_check_int_fits_in_size Int16, to_u16, 5 + gen_check_int_fits_in_size false, Int16, i16, 5 when :u16 - gen_check_uint_fits_in_size UInt16, 5 + gen_check_int_fits_in_size true, UInt16, u16, 5 when :i32 - gen_check_int_fits_in_size Int32, to_u32, 10 + gen_check_int_fits_in_size false, Int32, i32, 10 when :u32 - gen_check_uint_fits_in_size UInt32, 10 + gen_check_int_fits_in_size true, UInt32, u32, 10 when :i64 - gen_check_int_fits_in_size Int64, to_u64, 19 + gen_check_int_fits_in_size false, Int64, i64, 19 when :u64 - if negative - raise "Invalid negative value #{string_value} for UInt64" - end - - check_value_fits_in_uint64 string_value, num_size, start + gen_check_int_fits_in_size true, UInt64, u64, 20 when :i128 - gen_check_int_fits_in_size Int64, to_u64, 19, actual_type: Int128 + gen_check_int_fits_in_size false, Int128, i128, 39 when :u128 - if negative - raise "Invalid negative value #{string_value} for UInt128" - end - - check_value_fits_in_uint64 string_value, num_size, start, actual_type: UInt128 + gen_check_int_fits_in_size true, UInt128, u128, 39 end end def deduce_integer_kind(string_value, num_size, negative, start) - if negative - check_negative_value_fits_in_int64 string_value, num_size, start - else - check_value_fits_in_uint64 string_value, num_size, start - end - - if num_size >= 10 - int_value = absolute_integer_value(string_value, negative) - - int64max = Int64::MAX.to_u64 - int64max += 1 if negative - - int32max = Int32::MAX.to_u32 - int32max += 1 if negative - - if int_value > int64max - @token.number_kind = :u64 - elsif int_value > int32max - @token.number_kind = :i64 - end - end - end - - def absolute_integer_value(string_value, negative) - if negative - string_value[1..-1].to_u64 - else - string_value.to_u64 - end - end - - def check_negative_value_fits_in_int64(string_value, num_size, start) - if num_size > 19 - raise_value_doesnt_fit_in "Int64", string_value, start - end - - if num_size == 19 - i = 1 # skip '-' - "9223372036854775808".each_byte do |byte| - string_byte = string_value.byte_at(i) - if string_byte > byte - raise_value_doesnt_fit_in "Int64", string_value, start - elsif string_byte < byte - break - end - i += 1 - end - end - end - - def check_value_fits_in_uint64(string_value, num_size, start, actual_type = UInt64) - if num_size > 20 - if actual_type == UInt64 - raise_value_doesnt_fit_in "UInt64", string_value, start - else - raise_value_restricted_by actual_type, "UInt64", string_value, start - end - end - - if num_size == 20 - i = 0 - "18446744073709551615".each_byte do |byte| - string_byte = string_value.byte_at(i) - if string_byte > byte - if actual_type == UInt64 - raise_value_doesnt_fit_in "UInt64", string_value, start - else - raise_value_restricted_by actual_type, "UInt64", string_value, start - end - elsif string_byte < byte - break - end - i += 1 - end - end + @token.number_kind = case num_size + when 0..9 # Keep as i32 + :i32 + when 10 + string_value.to_i32? ? :i32 : :i64 + when 11..18 + :i64 + when 19 + string_value.to_i64? ? :i64 : (negative ? :i128 : :u64) + when 20 + string_value.to_u64? ? :u64 : :i128 + when 20..38 + :i128 + when 39 + if string_value.to_i128? + :i128 + else + raise_value_doesnt_fit_in(Int128, string_value, start) if negative + raise_value_doesnt_fit_in(UInt128, string_value, start) unless string_value.to_u128? + :u128 + end + else + raise_value_doesnt_fit_in Int128, string_value, start + end end def raise_value_doesnt_fit_in(type, string_value, start) @@ -1740,11 +1655,11 @@ module Crystal def scan_zero_number(start, negative = false) case peek_next_char when 'x' - scan_hex_number(start, negative) + scan_base_npow2_number(4, start, negative) when 'o' - scan_octal_number(start, negative) + scan_base_npow2_number(3, start, negative) when 'b' - scan_bin_number(start, negative) + scan_base_npow2_number(1, start, negative) when '.' scan_number(start) when 'i' @@ -1771,78 +1686,54 @@ module Crystal if next_char.ascii_number? raise "octal constants should be prefixed with 0o" else - finish_scan_prefixed_number 0_u64, false, start - end - end - end - - def scan_bin_number(start, negative) - next_char - - num = 0_u64 - while true - case next_char - when '0' - num *= 2 - when '1' - num = num * 2 + 1 - when '_' - # Nothing - else - break + first_byte = @reader.string.byte_at(start) + @token.type = :NUMBER + @token.number_kind = :i32 + @token.value = case first_byte + when '+' then "+0" + when '-' then "-0" + else "0" + end + set_token_raw_from_start(start) end end - - finish_scan_prefixed_number num, negative, start end - def scan_octal_number(start, negative) + def scan_base_npow2_number(bits_per_character, start, negative) next_char - num = 0_u64 - - while true - char = next_char - if '0' <= char <= '7' - num = num * 8 + (char - '0') - elsif char == '_' - else - break - end - end - - finish_scan_prefixed_number num, negative, start - end - - def scan_hex_number(start, negative = false) - next_char + digits = String::CHAR_TO_DIGIT.to_unsafe + digit_count = 0 + base = 2 ** bits_per_character + start_pos = current_pos - num = 0_u64 while true char = next_char - if char == '_' - else - hex_value = char_to_hex(char) { nil } - if hex_value - num = num * 16 + hex_value - else - break - end - end - end - - finish_scan_prefixed_number num, negative, start - end - - def finish_scan_prefixed_number(num, negative, start) - if negative - string_value = (num.to_i64 * -1).to_s + next if char == '_' + digit = digits[char.ord] + break if digit > base || digit < 0 + digit_count += 1 + end + + string = string_range(start_pos + 1, current_pos) + string_value = nil + + case digit_count * bits_per_character + when 0 + raise "numbers cannot end with a prefix" + when 1..32 + string_value = string.to_u32(base: base, underscore: true).to_s + when 33..64 + string_value = string.to_u64(base: base, underscore: true).to_s + when 65..128 + string_value = string.to_u128(base: base, underscore: true).to_s else - string_value = num.to_s + raise_value_doesnt_fit_in "Int128", string, start end + string_value = string_value.not_nil! name_size = string_value.size - name_size -= 1 if negative + string_value = "-#{string_value}" if negative case current_char when 'i' @@ -1857,11 +1748,7 @@ module Crystal end first_byte = @reader.string.byte_at(start) - if first_byte === '+' - string_value = "+#{string_value}" - elsif first_byte === '-' && num == 0 - string_value = "-0" - end + string_value = "+#{string_value}" if first_byte === '+' @token.type = :NUMBER @token.value = string_value @@ -1869,114 +1756,60 @@ module Crystal end def consume_int_suffix - case next_char - when '8' - next_char - @token.number_kind = :i8 - 2 - when '1' - case next_char - when '2' - if next_char == '8' - next_char - @token.number_kind = :i128 - 4 - else - raise "invalid int suffix" - end - when '6' - next_char - @token.number_kind = :i16 - 3 - else - raise "invalid int suffix" - end - when '3' - if next_char == '2' - next_char - @token.number_kind = :i32 - 3 - else - raise "invalid int suffix" - end - when '6' - if next_char == '4' - next_char - @token.number_kind = :i64 - 3 - else - raise "invalid int suffix" - end - else - raise "invalid int suffix" - end + suffix_info = case next_char + when '1' + case next_char + when '2' + {:i128, 4} if next_char == '8' + when '6' + {:i16, 3} + end + when '3' + {:i32, 3} if next_char == '2' + when '6' + {:i64, 3} if next_char == '4' + when '8' + {:i8, 2} + end + raise "invalid int suffix" unless suffix_info + next_char + @token.number_kind = suffix_info[0] + suffix_info[1] end def consume_uint_suffix - case next_char - when '8' - next_char - @token.number_kind = :u8 - 2 - when '1' - case next_char - when '2' - if next_char == '8' - next_char - @token.number_kind = :u128 - 4 - else - raise "invalid uint suffix" - end - when '6' - next_char - @token.number_kind = :u16 - 3 - else - raise "invalid uint suffix" - end - when '3' - if next_char == '2' - next_char - @token.number_kind = :u32 - 3 - else - raise "invalid uint suffix" - end - when '6' - if next_char == '4' - next_char - @token.number_kind = :u64 - 3 - else - raise "invalid uint suffix" - end - else - raise "invalid uint suffix" - end + suffix_info = case next_char + when '1' + case next_char + when '2' + {:u128, 4} if next_char == '8' + when '6' + {:u16, 3} + end + when '3' + {:u32, 3} if next_char == '2' + when '6' + {:u64, 3} if next_char == '4' + when '8' + {:u8, 2} + end + raise "invalid uint suffix" unless suffix_info + next_char + @token.number_kind = suffix_info[0] + suffix_info[1] end def consume_float_suffix - case next_char - when '3' - if next_char == '2' - next_char - @token.number_kind = :f32 - 3 - else - raise "invalid float suffix" - end - when '6' - if next_char == '4' - next_char - @token.number_kind = :f64 - 3 - else - raise "invalid float suffix" - end - else - raise "invalid float suffix" - end + suffix_info = case next_char + when '3' + {:f32, 3} if next_char == '2' + when '6' + {:f64, 3} if next_char == '4' + end + raise "invalid float suffix" unless suffix_info + next_char + @token.number_kind = suffix_info[0] + suffix_info[1] end def next_string_token(delimiter_state) diff --git a/src/crystal/compiler_rt.cr b/src/crystal/compiler_rt.cr index d52e22ba6ebd..e3a557ca595d 100644 --- a/src/crystal/compiler_rt.cr +++ b/src/crystal/compiler_rt.cr @@ -1,3 +1,4 @@ {% skip_file if flag?(:skip_crystal_compiler_rt) %} -require "./compiler_rt/mulodi4.cr" +require "./compiler_rt/mul.cr" +require "./compiler_rt/divmod128.cr" diff --git a/src/crystal/compiler_rt/divmod128.cr b/src/crystal/compiler_rt/divmod128.cr new file mode 100644 index 000000000000..88b946a09ea0 --- /dev/null +++ b/src/crystal/compiler_rt/divmod128.cr @@ -0,0 +1,205 @@ +# This file includes an implementation of (U)Int128 modulo/division operations + +# :nodoc: +fun __divti3(a : Int128, b : Int128) : Int128 + # Ported from llvm/compiler-rt:/lib/builtins/divti3.c + + s_a = a >> 127 # s_a = a < 0 ? -1 : 0 + s_b = b >> 127 # s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) &- s_a # negate if s_a == -1 + b = (b ^ s_b) &- s_b # negate if s_b == -1 + s_a ^= s_b # sign of quotient + quo, _ = _u128_div_rem(a.to_u128!, b.to_u128!) + ((quo ^ s_a) &- s_a).to_i128! # negate if s_a == -1 +end + +# :nodoc: +fun __modti3(a : Int128, b : Int128) : Int128 + # Ported from llvm/compiler-rt:/lib/builtins/modti3.c + + s = b >> 127 # s = b < 0 ? -1 : 0 + b = (b ^ s) &- s # negate if s == -1 + s = a >> 127 # s = a < 0 ? -1 : 0 + a = (a ^ s) &- s # negate if s == -1 + _, rem = _u128_div_rem(a.to_u128!, b.to_u128!) + (rem.to_i128! ^ s) &- s # negate if s == -1 +end + +# :nodoc: +fun __udivti3(a : UInt128, b : UInt128) : UInt128 + # Ported from llvm/compiler-rt:/lib/builtins/udivti3.c + + quo, _ = _u128_div_rem(a, b) + quo +end + +# :nodoc: +fun __umodti3(a : UInt128, b : UInt128) : UInt128 + # Ported from llvm/compiler-rt:/lib/builtins/umodti3.c + + _, rem = _u128_div_rem(a, b) + rem +end + +# :nodoc: +def _carrying_mul(lhs : UInt64, rhs : UInt64) : Tuple(UInt64, UInt64) + # Ported from rust-lang/compiler-builtins + + tmp = lhs.to_u128! &* rhs.to_u128! + {tmp.to_u64!, (tmp >> 64).to_u64!} +end + +# :nodoc: +def _carrying_mul_add(lhs : UInt64, mul : UInt64, add : UInt64) : Tuple(UInt64, UInt64) + # Ported from rust-lang/compiler-builtins + + tmp = lhs.to_u128! + tmp &*= mul.to_u128! + tmp &+= add.to_u128! + {tmp.to_u64!, (tmp >> 64).to_u64!} +end + +# :nodoc: +def _u128_div_rem(duo : UInt128, div : UInt128) : Tuple(UInt128, UInt128) + # Ported from rust-lang/compiler-builtins (trifecta algorithm) + + # Rust also has another algorithm for 128-bit integer division + # for microarchitectures that have slow hardware integer division. + + # This algorithm is called the trifecta algorithm because it uses three main algorithms: + # - short division for small divisors + # - the two possibility algorithm for large divisors + # - an undersubtracting long division algorithm for intermediate cases + + div_lz = div.leading_zeros_count + duo_lz = duo.leading_zeros_count + + if div_lz <= duo_lz + # Resulting quotient is 0 or 1 at this point + # The highest set bit of `duo` needs to be at least one place higher than `div` for the quotient to be more than one. + if duo >= div + return {1_u128, duo - div} + else + return {0_u128, duo} + end + end + + # Use 64-bit integer division if possible + if duo_lz >= 64 + # duo fits in a 64-bit integer + # Because of the previous branch (div_lz <= duo_lz), div will also fit in an 64-bit integer + quo_local1 = duo.to_u64! // div.to_u64! + rem_local1 = duo.to_u64! % div.to_u64! + return {quo_local1.to_u128!, rem_local1.to_u128!} + end + + # Short division branch + if div_lz >= 96 + duo_hi = (duo >> 64).to_u64! + div_0 = div.to_u32!.to_u64! + quo_hi = duo_hi // div_0 + rem_3 = duo_hi % div_0 + + duo_mid = (duo >> 32).to_u32!.to_u64! | (rem_3 << 32) + quo_1 = duo_mid // div_0 + rem_2 = duo_mid % div_0 + + duo_lo = duo.to_u32!.to_u64! | (rem_2 << 32) + quo_0 = duo_lo // div_0 + rem_1 = duo_lo % div_0 + + return {quo_0.to_u128! | (quo_1.to_u128! << 32) | (quo_hi.to_u128! << 64), rem_1.to_u128!} + end + + # Relative leading significant bits (cannot overflow because of above branches) + lz_diff = div_lz - duo_lz + + if lz_diff < 32 + # Two possibility division algorithm + + # The most significant bits of duo and div are within 32 bits of each other. + # If we take the n most significant bits of duo and divide them by the corresponding bits in div, it produces the quotient value quo. + # It happens that quo or quo - 1 will always be the correct quotient for the whole number. + + shift = 64 - duo_lz + duo_sig_n = (duo >> shift).to_u64! + div_sig_n = (div >> shift).to_u64! + quo_local2 = duo_sig_n // div_sig_n + + # The larger quo can overflow, so a manual carrying mul is used with manual overflow checking. + div_lo = div.to_u64! + div_hi = (div >> 64).to_u64! + tmp_lo, carry = _carrying_mul(quo_local2, div_lo) + tmp_hi, overflow = _carrying_mul_add(quo_local2, div_hi, carry) + tmp = tmp_lo.to_u128! | (tmp_hi.to_u128! << 64) + if (overflow != 0) || (duo < tmp) + # In `duo &+ div &- tmp`, both the subtraction and addition can overflow, but the result is always a correct positive number. + return {(quo_local2 - 1).to_u128!, duo &+ div &- tmp} + else + return {quo_local2.to_u128!, duo - tmp} + end + end + + # Undersubtracting long division algorithm. + + quo : UInt128 = 0 + div_extra = 96 - div_lz # Number of lesser significant bits that aren't part of div_sig_32 + div_sig_32 = (div >> div_extra).to_u32! # Most significant 32 bits of div + div_sig_32_add1 = div_sig_32.to_u64! + 1 # This must be a UInt64 because this can overflow + + loop do + duo_extra = 64 - duo_lz # Number of lesser significant bits that aren't part of duo_sig_n + duo_sig_n = (duo >> duo_extra).to_u64! # Most significant 64 bits of duo + + # The two possibility algorithm requires that the difference between most significant bits is less than 32 + if div_extra <= duo_extra + # Undersubtracting long division step + quo_part = (duo_sig_n // div_sig_32_add1).to_u128! + extra_shl = duo_extra - div_extra + + # Addition to the quotient + quo += (quo_part << extra_shl) + + # Subtraction from duo. At least 31 bits are cleared from duo here + duo -= ((div &* quo_part) << extra_shl) + else + # Two possibility algorithm + + shift = 64 - duo_lz + duo_sig_n = (duo >> shift).to_u64! + div_sig_n = (div >> shift).to_u64! + quo_part = duo_sig_n // div_sig_n + div_lo = div.to_u64! + div_hi = (div >> 64).to_u64! + + tmp_lo, carry = _carrying_mul(quo_part, div_lo) + # The undersubtracting long division algorithm has already run once, so overflow beyond 128 bits is impossible + tmp_hi, _ = _carrying_mul_add(quo_part, div_hi, carry) + tmp = tmp_lo.to_u128! | (tmp_hi.to_u128! << 64) + + if duo < tmp + return {quo + (quo_part - 1), duo &+ div &- tmp} + else + return {quo + quo_part, duo - tmp} + end + end + + duo_lz = duo.leading_zeros_count + + if div_lz <= duo_lz + # Quotient can have 0 or 1 added to it + if div <= duo + return {quo + 1, duo - div} + else + return {quo, duo} + end + end + + # This can only happen if div_sd < 64 + if 64 <= duo_lz + quo_local3 = duo.to_u64! // div.to_u64! + rem_local2 = duo.to_u64! % div.to_u64! + return {quo + quo_local3, rem_local2.to_u128!} + end + end +end diff --git a/src/crystal/compiler_rt/mul.cr b/src/crystal/compiler_rt/mul.cr new file mode 100644 index 000000000000..6b954458144d --- /dev/null +++ b/src/crystal/compiler_rt/mul.cr @@ -0,0 +1,41 @@ +# :nodoc: +private macro __mul_impl(name, type, n) + # :nodoc: + fun {{name}}(a : {{type}}, b : {{type}}, overflow : Int32*) : {{type}} + overflow.value = 0 + result = a &* b + if a == {{type}}::MIN + if b != 0 && b != 1 + overflow.value = 1 + end + return result + end + if b == {{type}}::MIN + if a != 0 && a != 1 + overflow.value = 1 + end + return result + end + sa = a >> {{n - 1}} + abs_a = (a ^ sa) &- sa + sb = b >> {{n - 1}} + abs_b = (b ^ sb) &- sb + if abs_a < 2 || abs_b < 2 + return result + end + if sa == sb + if abs_a > ({{type}}::MAX // abs_b) + overflow.value = 1 + end + else + if abs_a > ({{type}}::MIN // -abs_b) + overflow.value = 1 + end + end + return result + end +end + +__mul_impl(__mulosi4, Int32, 32) +__mul_impl(__mulodi4, Int64, 64) +__mul_impl(__muloti4, Int128, 128) diff --git a/src/crystal/compiler_rt/mulodi4.cr b/src/crystal/compiler_rt/mulodi4.cr deleted file mode 100644 index e853b89b13aa..000000000000 --- a/src/crystal/compiler_rt/mulodi4.cr +++ /dev/null @@ -1,37 +0,0 @@ -# :nodoc: -fun __mulodi4(a : Int64, b : Int64, overflow : Int32*) : Int64 - n = 64 - min = Int64::MIN - max = Int64::MAX - overflow.value = 0 - result = a &* b - if a == min - if b != 0 && b != 1 - overflow.value = 1 - end - return result - end - if b == min - if a != 0 && a != 1 - overflow.value = 1 - end - return result - end - sa = a >> (n &- 1) - abs_a = (a ^ sa) &- sa - sb = b >> (n &- 1) - abs_b = (b ^ sb) &- sb - if abs_a < 2 || abs_b < 2 - return result - end - if sa == sb - if abs_a > max // abs_b - overflow.value = 1 - end - else - if abs_a > min // (0i64 &- abs_b) - overflow.value = 1 - end - end - return result -end diff --git a/src/string.cr b/src/string.cr index fa337155e66a..0af9f43808c4 100644 --- a/src/string.cr +++ b/src/string.cr @@ -477,6 +477,38 @@ class String gen_to_ UInt64, UInt64 end + # Same as `#to_i` but returns an `Int128`. + def to_i128(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int128 + to_i128(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid Int128: #{self}") } + end + + # Same as `#to_i` but returns an `Int128` or `nil`. + def to_i128?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : Int128? + to_i128(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } + end + + # Same as `#to_i` but returns an `Int128` or the block's value. + def to_i128(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) + # Because Int128 literals are not supported in the current crystal version, this logic is used to get the minimum + # TODO: Once a crystal version with Int128 support has been released, this can be changed to use literals + gen_to_ Int128, UInt128, Int128::MAX, (UInt128.new(Int128::MAX) + 1) + end + + # Same as `#to_i` but returns an `UInt128`. + def to_u128(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt128 + to_u128(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { raise ArgumentError.new("Invalid UInt128: #{self}") } + end + + # Same as `#to_i` but returns an `UInt128` or `nil`. + def to_u128?(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false) : UInt128? + to_u128(base, whitespace, underscore, prefix, strict, leading_zero_is_octal) { nil } + end + + # Same as `#to_i` but returns an `UInt128` or the block's value. + def to_u128(base : Int = 10, whitespace : Bool = true, underscore : Bool = false, prefix : Bool = false, strict : Bool = true, leading_zero_is_octal : Bool = false, &block) + gen_to_ UInt128, UInt128 + end + # :nodoc: CHAR_TO_DIGIT = begin table = StaticArray(Int8, 256).new(-1_i8) From 8e001b9278df6d788ab1162d31f8eb8bad31e17e Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Fri, 10 Sep 2021 00:16:02 +0200 Subject: [PATCH 02/18] Crystal tool format --- spec/std/crystal/compiler_rt/divmod128_spec.cr | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/std/crystal/compiler_rt/divmod128_spec.cr b/spec/std/crystal/compiler_rt/divmod128_spec.cr index 7e4b953ce0df..1edfc5c94447 100644 --- a/spec/std/crystal/compiler_rt/divmod128_spec.cr +++ b/spec/std/crystal/compiler_rt/divmod128_spec.cr @@ -5,6 +5,7 @@ require "spec" private def make_ti(a : Int128, b : Int128) (a << 64) + b end + private def make_tu(a : UInt128, b : UInt128) (a << 64) + b end From 21a342103d093d4dfa66c95af7569ebf01cfc135 Mon Sep 17 00:00:00 2001 From: David Keller Date: Fri, 10 Sep 2021 13:13:30 +0200 Subject: [PATCH 03/18] Update spec/std/int_spec.cr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include U/Int128 popcount spec Co-authored-by: Johannes Müller --- spec/std/int_spec.cr | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index 8520672fa79e..8930acd73669 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -756,9 +756,8 @@ describe "Int" do it { 18446744073709551615_u64.popcount.should eq(64) } it { 0_i128.popcount.should eq(0) } - # PENDING - # it { 170141183460469231731687303715884105727_i128.popcount.should eq(127) } - # it { 340282366920938463463374607431768211455_u128.popcount.should eq(128) } + it { Int128::MAX.popcount.should eq(127) } + it { UInt128::MAX.popcount.should eq(128) } end describe "#leading_zeros_count" do From 1254065ac85c619394ffeb8e505e1da83da7052c Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Fri, 10 Sep 2021 14:47:27 +0200 Subject: [PATCH 04/18] Make specs compile on windows --- spec/std/int_spec.cr | 26 ++++++++++++++++++++++++-- spec/std/uint_spec.cr | 8 ++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index 8930acd73669..4092086dd9a1 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -183,11 +183,15 @@ describe "Int" do it_converts_to_s 9223372036854775807_i64, "9223372036854775807" it_converts_to_s -9223372036854775808_i64, "-9223372036854775808" + + it_converts_to_s Int128::MAX, "170141183460469231731687303715884105727" + it_converts_to_s -Int128::MIN, "-170141183460469231731687303715884105728" it_converts_to_s 255_u8, "255" it_converts_to_s 65535_u16, "65535" it_converts_to_s 4294967295_u32, "4294967295" it_converts_to_s 18446744073709551615_u64, "18446744073709551615" + it_converts_to_s UInt128::MAX, "340282366920938463463374607431768211455" end context "base and upcase parameters" do @@ -522,15 +526,28 @@ describe "Int" do (Int16::MIN / -1).should eq(-(Int16::MIN.to_f64)) (Int32::MIN / -1).should eq(-(Int32::MIN.to_f64)) (Int64::MIN / -1).should eq(-(Int64::MIN.to_f64)) - (Int128::MIN / -1).should eq(-(Int128::MIN.to_f64)) (UInt8::MIN / -1).should eq(0) end + + pending_win32 "divides Int128::MIN by -1" do + (Int128::MIN / -1).should eq(-(Int128::MIN.to_f64)) + end end describe "floor division //" do it "preserves type of lhs" do - {% for type in [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128] %} + {% for type in [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64] %} + ({{type}}.new(7) // 2).should be_a({{type}}) + ({{type}}.new(7) // 2.0).should be_a({{type}}) + ({{type}}.new(7) // 2.0_f32).should be_a({{type}}) + {% end %} + end + + # Missing symbols: __floattidf, __floatuntidf, __fixdfti, __fixsfti, __fixunsdfti, __fixunssfti, __floatuntisf, __floattisf + # These symbols are all required to convert U/Int128s to Floats + pending_win32 "preserves type of lhs (128-bit)" do + {% for type in [UInt128, Int128] %} ({{type}}.new(7) // 2).should be_a({{type}}) ({{type}}.new(7) // 2.0).should be_a({{type}}) ({{type}}.new(7) // 2.0_f32).should be_a({{type}}) @@ -863,6 +880,11 @@ describe "Int" do Int32::MAX.digits.should eq(Int32::MAX.to_s.chars.map(&.to_i).reverse) Int64::MAX.digits.should eq(Int64::MAX.to_s.chars.map(&.to_i).reverse) UInt64::MAX.digits.should eq(UInt64::MAX.to_s.chars.map(&.to_i).reverse) + end + + # Missing symbol __floatuntidf on windows + pending_win32 "works for u/int128 maximums" do + Int128::MAX.digits.should eq(Int128::MAX.to_s.chars.map(&.to_i).reverse) UInt128::MAX.digits.should eq(UInt128::MAX.to_s.chars.map(&.to_i).reverse) end diff --git a/spec/std/uint_spec.cr b/spec/std/uint_spec.cr index 7de658f63b70..bd69c6ed3974 100644 --- a/spec/std/uint_spec.cr +++ b/spec/std/uint_spec.cr @@ -48,6 +48,14 @@ describe "UInt" do x = &-18446744073709551615_u64 x.should eq(1_u64) x.should be_a(UInt64) + + x = &-1_u128 + x.should eq(UInt128::MAX) # TODO: Change to literal once supported + x.should be_a(UInt128) + + x = &-(UInt128::MAX) # TODO: Change to literal once supported + x.should eq(1_u128) + x.should be_a(UInt128) end end end From 0a77b75dff4ee421d39d3d2777bcf93dd7295ab4 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Fri, 10 Sep 2021 15:01:18 +0200 Subject: [PATCH 05/18] Fix specs (Int128::MIN is already negative) --- spec/std/int_spec.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index 4092086dd9a1..f59674cf9589 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -185,7 +185,7 @@ describe "Int" do it_converts_to_s -9223372036854775808_i64, "-9223372036854775808" it_converts_to_s Int128::MAX, "170141183460469231731687303715884105727" - it_converts_to_s -Int128::MIN, "-170141183460469231731687303715884105728" + it_converts_to_s Int128::MIN, "-170141183460469231731687303715884105728" it_converts_to_s 255_u8, "255" it_converts_to_s 65535_u16, "65535" From 6adb83e5ab6178a6fe8f3c8ec381d074d96c2069 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Fri, 10 Sep 2021 15:06:52 +0200 Subject: [PATCH 06/18] Add source of mulo{tds}i implementation --- src/crystal/compiler_rt/mul.cr | 1 + 1 file changed, 1 insertion(+) diff --git a/src/crystal/compiler_rt/mul.cr b/src/crystal/compiler_rt/mul.cr index 6b954458144d..ef1bb53c0c90 100644 --- a/src/crystal/compiler_rt/mul.cr +++ b/src/crystal/compiler_rt/mul.cr @@ -1,5 +1,6 @@ # :nodoc: private macro __mul_impl(name, type, n) + # Ported from llvm/compiler-rt:/lib/builtins/muloti4.c / mulodi4.c / mulosi4.c # :nodoc: fun {{name}}(a : {{type}}, b : {{type}}, overflow : Int32*) : {{type}} overflow.value = 0 From 3b3634dc3150e07658191cfe66f681ca865e8a3c Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Fri, 10 Sep 2021 15:07:49 +0200 Subject: [PATCH 07/18] Crystal tool format --- spec/std/int_spec.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index f59674cf9589..d8fc5c51b324 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -183,7 +183,7 @@ describe "Int" do it_converts_to_s 9223372036854775807_i64, "9223372036854775807" it_converts_to_s -9223372036854775808_i64, "-9223372036854775808" - + it_converts_to_s Int128::MAX, "170141183460469231731687303715884105727" it_converts_to_s Int128::MIN, "-170141183460469231731687303715884105728" From 64187a22f4631098ba773273dd7131f88d54ce64 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Sat, 11 Sep 2021 13:00:35 +0200 Subject: [PATCH 08/18] Get int signness from type name --- src/compiler/crystal/syntax/lexer.cr | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 45c8c7d9d7c7..880cf37bae01 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1580,8 +1580,8 @@ module Crystal set_token_raw_from_start(start) end - macro gen_check_int_fits_in_size(unsigned, type, method, size) - {% if unsigned %} + macro gen_check_int_fits_in_size(type, method, size) + {% if type.stringify.starts_with? "U" %} if negative raise "Invalid negative value #{string_value} for {{type}}" end @@ -1595,25 +1595,25 @@ module Crystal def check_integer_literal_fits_in_size(string_value, num_size, negative, start) case @token.number_kind when :i8 - gen_check_int_fits_in_size false, Int8, i8, 3 + gen_check_int_fits_in_size Int8, i8, 3 when :u8 - gen_check_int_fits_in_size true, UInt8, u8, 3 + gen_check_int_fits_in_size UInt8, u8, 3 when :i16 - gen_check_int_fits_in_size false, Int16, i16, 5 + gen_check_int_fits_in_size Int16, i16, 5 when :u16 - gen_check_int_fits_in_size true, UInt16, u16, 5 + gen_check_int_fits_in_size UInt16, u16, 5 when :i32 - gen_check_int_fits_in_size false, Int32, i32, 10 + gen_check_int_fits_in_size Int32, i32, 10 when :u32 - gen_check_int_fits_in_size true, UInt32, u32, 10 + gen_check_int_fits_in_size UInt32, u32, 10 when :i64 - gen_check_int_fits_in_size false, Int64, i64, 19 + gen_check_int_fits_in_size Int64, i64, 19 when :u64 - gen_check_int_fits_in_size true, UInt64, u64, 20 + gen_check_int_fits_in_size UInt64, u64, 20 when :i128 - gen_check_int_fits_in_size false, Int128, i128, 39 + gen_check_int_fits_in_size Int128, i128, 39 when :u128 - gen_check_int_fits_in_size true, UInt128, u128, 39 + gen_check_int_fits_in_size UInt128, u128, 39 end end From 2b1ba7bafee7c056215c64740d45a33e8e7d6eac Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Sat, 11 Sep 2021 13:25:05 +0200 Subject: [PATCH 09/18] Change order of u/int suffix consumption --- src/compiler/crystal/syntax/lexer.cr | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 880cf37bae01..41a018d7e6a1 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1757,6 +1757,9 @@ module Crystal def consume_int_suffix suffix_info = case next_char + # This order was chosen to optimize for the most common int suffix (i8) + when '8' + {:i8, 2} when '1' case next_char when '2' @@ -1768,8 +1771,6 @@ module Crystal {:i32, 3} if next_char == '2' when '6' {:i64, 3} if next_char == '4' - when '8' - {:i8, 2} end raise "invalid int suffix" unless suffix_info next_char @@ -1779,6 +1780,9 @@ module Crystal def consume_uint_suffix suffix_info = case next_char + # This order was chosen to optimize for the most common uint suffix (u8) + when '8' + {:u8, 2} when '1' case next_char when '2' @@ -1790,8 +1794,6 @@ module Crystal {:u32, 3} if next_char == '2' when '6' {:u64, 3} if next_char == '4' - when '8' - {:u8, 2} end raise "invalid uint suffix" unless suffix_info next_char From c5e12797f1b25add0ef50e40ce8e2ebd17ab943e Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Sat, 11 Sep 2021 13:28:52 +0200 Subject: [PATCH 10/18] Assign string_value directly from case --- src/compiler/crystal/syntax/lexer.cr | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 41a018d7e6a1..3633e7dbc6f2 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1716,22 +1716,20 @@ module Crystal end string = string_range(start_pos + 1, current_pos) - string_value = nil - - case digit_count * bits_per_character - when 0 - raise "numbers cannot end with a prefix" - when 1..32 - string_value = string.to_u32(base: base, underscore: true).to_s - when 33..64 - string_value = string.to_u64(base: base, underscore: true).to_s - when 65..128 - string_value = string.to_u128(base: base, underscore: true).to_s - else - raise_value_doesnt_fit_in "Int128", string, start - end - string_value = string_value.not_nil! + string_value = case digit_count * bits_per_character + when 0 + raise "numbers cannot end with a prefix" + when 1..32 + string.to_u32(base: base, underscore: true).to_s + when 33..64 + string.to_u64(base: base, underscore: true).to_s + when 65..128 + string.to_u128(base: base, underscore: true).to_s + else + raise_value_doesnt_fit_in "Int128", string, start + end + name_size = string_value.size string_value = "-#{string_value}" if negative From 3c7d7a137c6e0fe62e224c5598f8365a976352c2 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Sat, 11 Sep 2021 13:35:31 +0200 Subject: [PATCH 11/18] crystal tool format --- src/compiler/crystal/syntax/lexer.cr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 3633e7dbc6f2..4703ff8b6707 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1778,7 +1778,7 @@ module Crystal def consume_uint_suffix suffix_info = case next_char - # This order was chosen to optimize for the most common uint suffix (u8) + # This order was chosen to optimize for the most common uint suffix (u8) when '8' {:u8, 2} when '1' From c8378b721dffe8789ce61f596007879a8529da59 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Sat, 11 Sep 2021 14:22:45 +0200 Subject: [PATCH 12/18] Make std_specs work on windows --- spec/std/int_spec.cr | 15 ++++++++++++--- spec/std/string_spec.cr | 4 ++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index d8fc5c51b324..3c3535614075 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -13,6 +13,15 @@ private macro it_converts_to_s(num, str, **opts) end end +private macro pending_win32_converts_to_s(num, str, **opts) + pending_win32 {{ "converts #{num} to #{str}" }} do + num = {{ num }} + str = {{ str }} + num.to_s({{ opts.double_splat }}).should eq(str) + String.build { |io| num.to_s(io, {{ opts.double_splat }}) }.should eq(str) + end +end + describe "Int" do describe "**" do it "with positive Int32" do @@ -184,14 +193,14 @@ describe "Int" do it_converts_to_s 9223372036854775807_i64, "9223372036854775807" it_converts_to_s -9223372036854775808_i64, "-9223372036854775808" - it_converts_to_s Int128::MAX, "170141183460469231731687303715884105727" - it_converts_to_s Int128::MIN, "-170141183460469231731687303715884105728" + pending_win32_converts_to_s Int128::MAX, "170141183460469231731687303715884105727" + pending_win32_converts_to_s Int128::MIN, "-170141183460469231731687303715884105728" it_converts_to_s 255_u8, "255" it_converts_to_s 65535_u16, "65535" it_converts_to_s 4294967295_u32, "4294967295" it_converts_to_s 18446744073709551615_u64, "18446744073709551615" - it_converts_to_s UInt128::MAX, "340282366920938463463374607431768211455" + pending_win32_converts_to_s UInt128::MAX, "340282366920938463463374607431768211455" end context "base and upcase parameters" do diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 99bb6e9c92eb..052d7ca0ce20 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -396,7 +396,7 @@ describe "String" do it { "18446744073709551616".to_u64 { 0 }.should eq(0) } end - describe "to_i128" do + pending_win32 "to_i128" do it { "170141183460469231731687303715884105727".to_i128.should eq(Int128::MAX) } it { "-170141183460469231731687303715884105728".to_i128.should eq(Int128::MIN) } it { expect_raises(ArgumentError) { "170141183460469231731687303715884105728".to_i128 } } @@ -409,7 +409,7 @@ describe "String" do it { expect_raises(ArgumentError) { "340282366920938463463374607431768211456".to_i128 } } end - describe "to_u128" do + pending_win32 "to_u128" do it { "340282366920938463463374607431768211455".to_u128.should eq(UInt128::MAX) } it { "0".to_u128.should eq(0) } it { expect_raises(ArgumentError) { "340282366920938463463374607431768211456".to_u128 } } From dae48fb643a52c0b52561c7ad6f39e10c2c54bf3 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Sat, 11 Sep 2021 14:38:42 +0200 Subject: [PATCH 13/18] remove nested it's in string_spec --- spec/std/string_spec.cr | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 052d7ca0ce20..612018933ed4 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -396,28 +396,28 @@ describe "String" do it { "18446744073709551616".to_u64 { 0 }.should eq(0) } end - pending_win32 "to_i128" do - it { "170141183460469231731687303715884105727".to_i128.should eq(Int128::MAX) } - it { "-170141183460469231731687303715884105728".to_i128.should eq(Int128::MIN) } - it { expect_raises(ArgumentError) { "170141183460469231731687303715884105728".to_i128 } } - it { expect_raises(ArgumentError) { "-170141183460469231731687303715884105729".to_i128 } } + describe "to_i128" do + pending_win32 { "170141183460469231731687303715884105727".to_i128.should eq(Int128::MAX) } + pending_win32 { "-170141183460469231731687303715884105728".to_i128.should eq(Int128::MIN) } + pending_win32 { expect_raises(ArgumentError) { "170141183460469231731687303715884105728".to_i128 } } + pending_win32 { expect_raises(ArgumentError) { "-170141183460469231731687303715884105729".to_i128 } } - it { "170141183460469231731687303715884105727".to_i128?.should eq(Int128::MAX) } - it { "170141183460469231731687303715884105728".to_i128?.should be_nil } - it { "170141183460469231731687303715884105728".to_i128 { 0 }.should eq(0) } + pending_win32 { "170141183460469231731687303715884105727".to_i128?.should eq(Int128::MAX) } + pending_win32 { "170141183460469231731687303715884105728".to_i128?.should be_nil } + pending_win32 { "170141183460469231731687303715884105728".to_i128 { 0 }.should eq(0) } - it { expect_raises(ArgumentError) { "340282366920938463463374607431768211456".to_i128 } } + pending_win32 { expect_raises(ArgumentError) { "340282366920938463463374607431768211456".to_i128 } } end - pending_win32 "to_u128" do - it { "340282366920938463463374607431768211455".to_u128.should eq(UInt128::MAX) } - it { "0".to_u128.should eq(0) } - it { expect_raises(ArgumentError) { "340282366920938463463374607431768211456".to_u128 } } - it { expect_raises(ArgumentError) { "-1".to_u128 } } + describe "to_u128" do + pending_win32 { "340282366920938463463374607431768211455".to_u128.should eq(UInt128::MAX) } + pending_win32 { "0".to_u128.should eq(0) } + pending_win32 { expect_raises(ArgumentError) { "340282366920938463463374607431768211456".to_u128 } } + pending_win32 { expect_raises(ArgumentError) { "-1".to_u128 } } - it { "340282366920938463463374607431768211455".to_u128?.should eq(UInt128::MAX) } - it { "340282366920938463463374607431768211456".to_u128?.should be_nil } - it { "340282366920938463463374607431768211456".to_u128 { 0 }.should eq(0) } + pending_win32 { "340282366920938463463374607431768211455".to_u128?.should eq(UInt128::MAX) } + pending_win32 { "340282366920938463463374607431768211456".to_u128?.should be_nil } + pending_win32 { "340282366920938463463374607431768211456".to_u128 { 0 }.should eq(0) } end it { "1234".to_i32.should eq(1234) } From fd8a77ec454d70a01472d5a9393f7e6a68e1f3fc Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Sat, 11 Sep 2021 15:02:42 +0200 Subject: [PATCH 14/18] Try to fix missing symbols error in win32 compiler specs --- spec/compiler/codegen/arithmetics_spec.cr | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spec/compiler/codegen/arithmetics_spec.cr b/spec/compiler/codegen/arithmetics_spec.cr index 7fc1d2cfb406..39826bbf4314 100644 --- a/spec/compiler/codegen/arithmetics_spec.cr +++ b/spec/compiler/codegen/arithmetics_spec.cr @@ -6,6 +6,15 @@ SupportedIntsConversions = { to_u8: UInt8, to_u16: UInt16, to_u32: UInt32, to_u64: UInt64, to_u128: UInt128, } +# Int128 and UInt128 specs do not pass on win32 because of missing symbols +{% if flag?(:win32) %} + SupportedInts = [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64] + SupportedIntsConversions = { + to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, + to_u8: UInt8, to_u16: UInt16, to_u32: UInt32, to_u64: UInt64, + } +{% end %} + describe "Code gen: arithmetic primitives" do describe "&+ addition" do {% for type in SupportedInts %} From e3e84586ee4b637c527529bbca7412d5414043cc Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Sat, 11 Sep 2021 15:10:17 +0200 Subject: [PATCH 15/18] Fix arithmetics_spec constants on win32 --- spec/compiler/codegen/arithmetics_spec.cr | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/spec/compiler/codegen/arithmetics_spec.cr b/spec/compiler/codegen/arithmetics_spec.cr index 39826bbf4314..65e2276dcfe5 100644 --- a/spec/compiler/codegen/arithmetics_spec.cr +++ b/spec/compiler/codegen/arithmetics_spec.cr @@ -1,13 +1,13 @@ require "../../spec_helper" -SupportedInts = [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128] -SupportedIntsConversions = { - to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, to_i128: Int128, - to_u8: UInt8, to_u16: UInt16, to_u32: UInt32, to_u64: UInt64, to_u128: UInt128, -} - # Int128 and UInt128 specs do not pass on win32 because of missing symbols -{% if flag?(:win32) %} +{% unless flag?(:win32) %} + SupportedInts = [UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128] + SupportedIntsConversions = { + to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, to_i128: Int128, + to_u8: UInt8, to_u16: UInt16, to_u32: UInt32, to_u64: UInt64, to_u128: UInt128, + } +{% else %} SupportedInts = [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64] SupportedIntsConversions = { to_i8: Int8, to_i16: Int16, to_i32: Int32, to_i64: Int64, From b91eb8c6f4f957715444e510479df37b02ae1aeb Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Mon, 13 Sep 2021 17:56:39 +0200 Subject: [PATCH 16/18] Completely refactor lexer number parsing --- spec/compiler/lexer/lexer_spec.cr | 11 +- src/compiler/crystal/syntax/lexer.cr | 505 +++++++++------------------ 2 files changed, 180 insertions(+), 336 deletions(-) diff --git a/spec/compiler/lexer/lexer_spec.cr b/spec/compiler/lexer/lexer_spec.cr index f24c8089e6c4..34b9ab2eb68d 100644 --- a/spec/compiler/lexer/lexer_spec.cr +++ b/spec/compiler/lexer/lexer_spec.cr @@ -164,7 +164,7 @@ describe "Lexer" do ["+1.0f32", "+1.0"], ["-1.0f32", "-1.0"], ["-0.0f32", "-0.0"], ["1_234.567_890_f32", "1234.567890"]] it_lexes_f64 ["1.0", ["1.0hello", "1.0"], "+1.0", "-1.0", ["1_234.567_890", "1234.567890"]] it_lexes_f32 [["1e+23_f32", "1e+23"], ["1.2e+23_f32", "1.2e+23"]] - it_lexes_f64 ["1e23", "1e-23", "1e+23", "1.2e+23", ["1e23f64", "1e23"], ["1.2e+23_f64", "1.2e+23"]] + it_lexes_f64 ["1e23", "1e-23", "1e+23", "1.2e+23", ["1e23f64", "1e23"], ["1.2e+23_f64", "1.2e+23"], "0e40"] it_lexes_number :i8, ["1i8", "1"] it_lexes_number :i8, ["1_i8", "1"] @@ -310,6 +310,13 @@ describe "Lexer" do assert_syntax_error "-999999999999999999999999999999999999999", "-999999999999999999999999999999999999999 doesn't fit in an Int128" assert_syntax_error "-1_u128", "Invalid negative value -1 for UInt128" + assert_syntax_error "-0_u128", "Invalid negative value -0 for UInt128" + assert_syntax_error "-0u128", "Invalid negative value -0 for UInt128" + + assert_syntax_error "1__1", "trailing '_' in number" + assert_syntax_error "-3_", "trailing '_' in number" + + assert_syntax_error "0_12", "octal constants should be prefixed with 0o" assert_syntax_error "0xFF_i8", "255 doesn't fit in an Int8" assert_syntax_error "0o200_i8", "128 doesn't fit in an Int8" @@ -330,6 +337,8 @@ describe "Lexer" do assert_syntax_error ".42", ".1 style number literal is not supported, put 0 before dot" assert_syntax_error "-.42", ".1 style number literal is not supported, put 0 before dot" + assert_syntax_error "-0_e12", "trailing '_' in number" + it "lexes not instance var" do lexer = Lexer.new "!@foo" token = lexer.next_token diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 4703ff8b6707..17dcc34c99a1 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -287,10 +287,8 @@ module Crystal case next_char when '=' next_char :"+=" - when '0' - scan_zero_number(start) - when '1', '2', '3', '4', '5', '6', '7', '8', '9' - scan_number(start) + when '0'..'9' + scan_number start when '+' raise "postfix increment is not supported, use `exp += 1`" else @@ -303,9 +301,7 @@ module Crystal next_char :"-=" when '>' next_char :"->" - when '0' - scan_zero_number start, negative: true - when '1', '2', '3', '4', '5', '6', '7', '8', '9' + when '0'..'9' scan_number start, negative: true when '-' raise "postfix decrement is not supported, use `exp -= 1`" @@ -777,10 +773,8 @@ module Crystal @token.delimiter_state = Token::DelimiterState.new(delimiter == '`' ? :command : :string, delimiter, delimiter) set_token_raw_from_start(start) end - when '0' - scan_zero_number(start) - when '1', '2', '3', '4', '5', '6', '7', '8', '9' - scan_number current_pos + when '0'..'9' + scan_number start when '@' start = current_pos case next_char @@ -1456,360 +1450,201 @@ module Crystal @token.raw = ":#{value}" if @wants_raw end - def scan_number(start, negative = false) - @token.type = :NUMBER - - has_underscore = false - is_integer = true - has_suffix = true - suffix_size = 0 + macro gen_check_int_fits_in_size(type, method, size) + {% if type.stringify.starts_with? "U" %} + raise "Invalid negative value #{str} for {{type}}", @token, (current_pos - start) if negative + {% end %} - while true - char = next_char - if char.ascii_number? - # Nothing to do - elsif char == '_' - has_underscore = true - else - break - end + if num_size > {{size}} || (num_size == {{size}} && str.to_{{method.id}}? == nil) + raise_value_doesnt_fit_in "{{type}}", str, start end + end - case current_char - when '.' - if peek_next_char.ascii_number? - is_integer = false - - while true - char = next_char - if char.ascii_number? - # Nothing to do - elsif char == '_' - has_underscore = true - else - break - end - end - - if current_char == 'e' || current_char == 'E' - next_char - - if current_char == '+' || current_char == '-' - next_char - end - - while true - if current_char.ascii_number? - # Nothing to do - elsif current_char == '_' - has_underscore = true - else - break - end - next_char - end - end + def raise_value_doesnt_fit_in(type, string_value, start) + raise "#{string_value} doesn't fit in an #{type}", @token, (current_pos - start) + end - if current_char == 'f' - suffix_size = consume_float_suffix - else - @token.number_kind = :f64 - end + private def scan_number(start, negative = false) + @token.type = :NUMBER + base = 10 + suffix_size = 0 + is_decimal = false + is_e_notation = false + underscore_count = 0 + last_is_underscore = false + pos_after_prefix = start + + # Consume prefix + if current_char == '0' + case next_char + when 'b' then base = 2 + when 'o' then base = 8 + when 'x' then base = 16 + when '0'..'9' then raise "octal constants should be prefixed with 0o" + when '_' + raise "octal constants should be prefixed with 0o" if next_char.in? '0'..'9' + last_is_underscore = true + underscore_count = 1 else - @token.number_kind = :i32 - has_suffix = false + # Go back to '0' character to properly process characters 'e', 'u', 'i', 'f' (or to return "0") + self.current_pos -= 1 end - when 'e', 'E' - is_integer = false - next_char - if current_char == '+' || current_char == '-' + # Skip prefix (b, o, x) + unless base == 10 next_char + pos_after_prefix = current_pos + # Disallow underscore after prefix + raise("numeric literal without digits", @token, (current_pos - start)) if current_char == '_' end + end - while true - if current_char.ascii_number? - # Nothing to do - elsif current_char == '_' - has_underscore = true - else - break - end + # Consume number + loop do + while String::CHAR_TO_DIGIT[current_char.ord].to_u8! < base next_char + last_is_underscore = false end - if current_char == 'f' - suffix_size = consume_float_suffix + case current_char + when '_' + raise("trailing '_' in number", @token, (current_pos - start)) if last_is_underscore + last_is_underscore = true + underscore_count += 1 + when '.' + raise("trailing '_' in number", @token, (current_pos - start)) if last_is_underscore + break if is_decimal || base != 10 || !peek_next_char.in?('0'..'9') + is_decimal = true + when 'e', 'E' + raise("trailing '_' in number", @token, (current_pos - start)) if last_is_underscore + break if is_e_notation || base != 10 + is_e_notation = last_is_underscore = is_decimal = true + underscore_count += 1 + next_char if peek_next_char.in?({'+', '-'}) + when 'i' + break if is_decimal + @token.number_kind, suffix_size = consume_int_suffix + next_char + break + when 'u' + break if is_decimal + @token.number_kind, suffix_size = consume_uint_suffix + next_char + break + when 'f' + @token.number_kind, suffix_size = consume_float_suffix + next_char + break else - @token.number_kind = :f64 + raise("trailing '_' in number", @token, (current_pos - start)) if last_is_underscore + break end - when 'f' - is_integer = false - suffix_size = consume_float_suffix - when 'i' - suffix_size = consume_int_suffix - when 'u' - suffix_size = consume_uint_suffix - else - has_suffix = false - @token.number_kind = :i32 - end - - end_pos = current_pos - suffix_size - if end_pos - start == 1 - # For numbers such as 0, 1, 2, 3, etc., we use a string from the pool - string_value = string_range_from_pool(start, end_pos) - else - string_value = string_range(start, end_pos) + next_char end - string_value = string_value.delete('_') if has_underscore - if is_integer - num_size = string_value.size - num_size -= 1 if negative - - if has_suffix - check_integer_literal_fits_in_size string_value, num_size, negative, start - else - deduce_integer_kind string_value, num_size, negative, start - end + # Sanitize string (or convert to decimal unless number is in base 10) + end_pos = current_pos - suffix_size + str = if base == 10 + ret = string_range(start, end_pos) + ret = ret.delete('_') if underscore_count > 0 + ret + else + ret = string_range(pos_after_prefix, end_pos) + required_bytes = base.trailing_zeros_count * (ret.size - underscore_count) + tmp_str = case required_bytes + when 0 then raise("numeric literal without digits", @token, (current_pos - start)) + when 1..32 then ret.to_u32(base: base, underscore: true).to_s + when 33..64 then ret.to_u64(base: base, underscore: true).to_s + when 65..128 then ret.to_u128(base: base, underscore: true).to_s + else raise_value_doesnt_fit_in(UInt128, string_range(start, end_pos), start) + end + first_byte = @reader.string.byte_at(start).chr + tmp_str = first_byte + tmp_str if first_byte.in?({'+', '-'}) + tmp_str + end + @token.value = str + + # No special checks needed for floating values + if is_decimal + @token.number_kind = :f64 if suffix_size == 0 + return end - @token.value = string_value - set_token_raw_from_start(start) - end - - macro gen_check_int_fits_in_size(type, method, size) - {% if type.stringify.starts_with? "U" %} - if negative - raise "Invalid negative value #{string_value} for {{type}}" - end - {% end %} - - if num_size > {{size}} || (num_size == {{size}} && string_value.to_{{method.id}}? == nil) - raise_value_doesnt_fit_in "{{type}}", string_value, start - end - end - - def check_integer_literal_fits_in_size(string_value, num_size, negative, start) - case @token.number_kind - when :i8 - gen_check_int_fits_in_size Int8, i8, 3 - when :u8 - gen_check_int_fits_in_size UInt8, u8, 3 - when :i16 - gen_check_int_fits_in_size Int16, i16, 5 - when :u16 - gen_check_int_fits_in_size UInt16, u16, 5 - when :i32 - gen_check_int_fits_in_size Int32, i32, 10 - when :u32 - gen_check_int_fits_in_size UInt32, u32, 10 - when :i64 - gen_check_int_fits_in_size Int64, i64, 19 - when :u64 - gen_check_int_fits_in_size UInt64, u64, 20 - when :i128 - gen_check_int_fits_in_size Int128, i128, 39 - when :u128 - gen_check_int_fits_in_size UInt128, u128, 39 - end - end - - def deduce_integer_kind(string_value, num_size, negative, start) - @token.number_kind = case num_size - when 0..9 # Keep as i32 - :i32 - when 10 - string_value.to_i32? ? :i32 : :i64 - when 11..18 - :i64 - when 19 - string_value.to_i64? ? :i64 : (negative ? :i128 : :u64) - when 20 - string_value.to_u64? ? :u64 : :i128 - when 20..38 - :i128 - when 39 - if string_value.to_i128? - :i128 + # Check or determine suffix + if suffix_size == 0 + actual_string_size = str.size + actual_string_size -= 1 if negative + @token.number_kind = case actual_string_size + when 0..9 then :i32 + when 10 then str.to_i32? ? :i32 : :i64 + when 11..18 then :i64 + when 19 then str.to_i64? ? :i64 : (negative ? :i128 : :u64) + when 20 then (negative || !str.to_u64?) ? :i128 : :u64 + when 20..38 then :i128 + when 39 + if str.to_i128? + :i128 + else + raise_value_doesnt_fit_in(Int128, str, start) if negative + raise_value_doesnt_fit_in(UInt128, str, start) unless str.to_u128? + :u128 + end else - raise_value_doesnt_fit_in(Int128, string_value, start) if negative - raise_value_doesnt_fit_in(UInt128, string_value, start) unless string_value.to_u128? - :u128 + raise_value_doesnt_fit_in(Int128, str, start) if negative + raise_value_doesnt_fit_in UInt128, str, start end - else - raise_value_doesnt_fit_in Int128, string_value, start - end - end - - def raise_value_doesnt_fit_in(type, string_value, start) - raise "#{string_value} doesn't fit in an #{type}", @token, (current_pos - start) - end - - def raise_value_restricted_by(type, restricted_by_type, string_value, start) - raise "#{string_value} doesn't fit in an #{restricted_by_type}. #{type} literals that don't fit in an #{restricted_by_type} are currently not supported", @token, (current_pos - start) - end - - def scan_zero_number(start, negative = false) - case peek_next_char - when 'x' - scan_base_npow2_number(4, start, negative) - when 'o' - scan_base_npow2_number(3, start, negative) - when 'b' - scan_base_npow2_number(1, start, negative) - when '.' - scan_number(start) - when 'i' - @token.type = :NUMBER - @token.value = "0" - next_char - consume_int_suffix - set_token_raw_from_start(start) - when 'f' - @token.type = :NUMBER - @token.value = "0" - next_char - consume_float_suffix - set_token_raw_from_start(start) - when 'u' - @token.type = :NUMBER - @token.value = "0" - next_char - consume_uint_suffix - set_token_raw_from_start(start) - when '_' - scan_number(start) else - if next_char.ascii_number? - raise "octal constants should be prefixed with 0o" - else - first_byte = @reader.string.byte_at(start) - @token.type = :NUMBER - @token.number_kind = :i32 - @token.value = case first_byte - when '+' then "+0" - when '-' then "-0" - else "0" - end - set_token_raw_from_start(start) + num_size = negative ? str.size - 1 : str.size + case @token.number_kind + when :i8 then gen_check_int_fits_in_size(Int8, :i8, 3) + when :u8 then gen_check_int_fits_in_size(UInt8, :u8, 3) + when :i16 then gen_check_int_fits_in_size(Int16, :i16, 5) + when :u16 then gen_check_int_fits_in_size(UInt16, :u16, 5) + when :i32 then gen_check_int_fits_in_size(Int32, :i32, 10) + when :u32 then gen_check_int_fits_in_size(UInt32, :u32, 10) + when :i64 then gen_check_int_fits_in_size(Int64, :i64, 19) + when :u64 then gen_check_int_fits_in_size(UInt64, :u64, 20) + when :i128 then gen_check_int_fits_in_size(Int128, :i128, 39) + when :u128 then gen_check_int_fits_in_size(UInt128, :u128, 39) + end + end + end + + private def consume_int_suffix : Tuple(Symbol, Int32) + case next_char + when '8' then return {:i8, 2} + when '1' + case next_char + when '2' then return {:i128, 4} if next_char == '8' + when '6' then return {:i16, 3} end + when '3' then return {:i32, 3} if next_char == '2' + when '6' then return {:i64, 3} if next_char == '4' end + raise "invalid int suffix" end - def scan_base_npow2_number(bits_per_character, start, negative) - next_char - - digits = String::CHAR_TO_DIGIT.to_unsafe - digit_count = 0 - base = 2 ** bits_per_character - start_pos = current_pos - - while true - char = next_char - next if char == '_' - digit = digits[char.ord] - break if digit > base || digit < 0 - digit_count += 1 - end - - string = string_range(start_pos + 1, current_pos) - - string_value = case digit_count * bits_per_character - when 0 - raise "numbers cannot end with a prefix" - when 1..32 - string.to_u32(base: base, underscore: true).to_s - when 33..64 - string.to_u64(base: base, underscore: true).to_s - when 65..128 - string.to_u128(base: base, underscore: true).to_s - else - raise_value_doesnt_fit_in "Int128", string, start - end - - name_size = string_value.size - string_value = "-#{string_value}" if negative - - case current_char - when 'i' - consume_int_suffix - check_integer_literal_fits_in_size string_value, name_size, negative, start - when 'u' - consume_uint_suffix - check_integer_literal_fits_in_size string_value, name_size, negative, start - else - @token.number_kind = :i32 - deduce_integer_kind string_value, name_size, negative, start + private def consume_uint_suffix : Tuple(Symbol, Int32) + case next_char + when '8' then return {:u8, 2} + when '1' + case next_char + when '2' then return {:u128, 4} if next_char == '8' + when '6' then return {:u16, 3} + end + when '3' then return {:u32, 3} if next_char == '2' + when '6' then return {:u64, 3} if next_char == '4' end - - first_byte = @reader.string.byte_at(start) - string_value = "+#{string_value}" if first_byte === '+' - - @token.type = :NUMBER - @token.value = string_value - set_token_raw_from_start(start) + raise "invalid uint suffix" end - def consume_int_suffix - suffix_info = case next_char - # This order was chosen to optimize for the most common int suffix (i8) - when '8' - {:i8, 2} - when '1' - case next_char - when '2' - {:i128, 4} if next_char == '8' - when '6' - {:i16, 3} - end - when '3' - {:i32, 3} if next_char == '2' - when '6' - {:i64, 3} if next_char == '4' - end - raise "invalid int suffix" unless suffix_info - next_char - @token.number_kind = suffix_info[0] - suffix_info[1] - end - - def consume_uint_suffix - suffix_info = case next_char - # This order was chosen to optimize for the most common uint suffix (u8) - when '8' - {:u8, 2} - when '1' - case next_char - when '2' - {:u128, 4} if next_char == '8' - when '6' - {:u16, 3} - end - when '3' - {:u32, 3} if next_char == '2' - when '6' - {:u64, 3} if next_char == '4' - end - raise "invalid uint suffix" unless suffix_info - next_char - @token.number_kind = suffix_info[0] - suffix_info[1] - end - - def consume_float_suffix - suffix_info = case next_char - when '3' - {:f32, 3} if next_char == '2' - when '6' - {:f64, 3} if next_char == '4' - end - raise "invalid float suffix" unless suffix_info - next_char - @token.number_kind = suffix_info[0] - suffix_info[1] + private def consume_float_suffix : Tuple(Symbol, Int32) + case next_char + when '3' then return {:f32, 3} if next_char == '2' + when '6' then return {:f64, 3} if next_char == '4' + end + raise "invalid float suffix" end def next_string_token(delimiter_state) From f64d70a3fee47b165521bb7924b770a7182601e7 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Mon, 13 Sep 2021 19:59:32 +0200 Subject: [PATCH 17/18] set token raw in lexer number parsing --- src/compiler/crystal/syntax/lexer.cr | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 17dcc34c99a1..e76f2d8984e4 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1568,6 +1568,7 @@ module Crystal # No special checks needed for floating values if is_decimal @token.number_kind = :f64 if suffix_size == 0 + set_token_raw_from_start(start) return end @@ -1609,6 +1610,7 @@ module Crystal when :u128 then gen_check_int_fits_in_size(UInt128, :u128, 39) end end + set_token_raw_from_start(start) end private def consume_int_suffix : Tuple(Symbol, Int32) From 5e0fd920b8fd11c1086026c38dd9cbf32d465665 Mon Sep 17 00:00:00 2001 From: BlobCodes Date: Tue, 14 Sep 2021 18:08:51 +0200 Subject: [PATCH 18/18] Fix CI (remove a fix that didn't fix anything) --- src/compiler/crystal/syntax/lexer.cr | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index e76f2d8984e4..3ba853b8c07f 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -1485,9 +1485,6 @@ module Crystal raise "octal constants should be prefixed with 0o" if next_char.in? '0'..'9' last_is_underscore = true underscore_count = 1 - else - # Go back to '0' character to properly process characters 'e', 'u', 'i', 'f' (or to return "0") - self.current_pos -= 1 end # Skip prefix (b, o, x)