From ebbc8a799020243f703c9a318239af467d1d1597 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 11 Sep 2023 14:21:23 -0400 Subject: [PATCH] make "dec" and ryu functions faster and simpler We had some common code in `Ryu.append_c_digits` that can be combined with Base logic for the same thing. But it turns out all of this duplicated code in Ryu seems to just make it run slightly slower in most cases. The old version had many more branches to check, even though often numbers are small, so only the last check is meaningful. But the assumption that it would be faster even if all of them were used also seems to not hold up in practice. Particularly for a function like `append_nine_digits` which unrolls completely, but the complicated version has slightly more data dependencies because of they way it is written. Similarly, we replace `unsafe_copy` with `@inbounds[]`, since this is better for the optimizer, which doesn't need to treat this operation as an unknown reference escape. Lastly, we use the append_nine_digits trick from Ryu to make printing of arbitrary big numbers much faster. ``` julia> @btime string(typemax(Int128)) 402.345 ns (2 allocations: 120 bytes) # before 151.139 ns (2 allocations: 120 bytes) # after ``` --- base/intfuncs.jl | 86 ++++++++++++++---- base/ryu/exp.jl | 70 ++++++++------- base/ryu/fixed.jl | 2 +- base/ryu/shortest.jl | 209 ++++++++++++++++++------------------------- base/ryu/utils.jl | 118 +++--------------------- 5 files changed, 200 insertions(+), 285 deletions(-) diff --git a/base/intfuncs.jl b/base/intfuncs.jl index 1b007700f4331..90dc393a0e9b4 100644 --- a/base/intfuncs.jl +++ b/base/intfuncs.jl @@ -558,7 +558,7 @@ function bit_ndigits0z(x::Base.BitUnsigned64) end function bit_ndigits0z(x::UInt128) n = 0 - while x > 0x8ac7230489e80000 + while x > 0x8ac7230489e80000 # 10e18 x = div(x,0x8ac7230489e80000) n += 19 end @@ -724,7 +724,7 @@ function bin(x::Unsigned, pad::Int, neg::Bool) x >>= 0x1 i -= 1 end - if neg; @inbounds a[1]=0x2d; end + neg && (@inbounds a[1] = 0x2d) # UInt8('-') String(a) end @@ -738,29 +738,77 @@ function oct(x::Unsigned, pad::Int, neg::Bool) x >>= 0x3 i -= 1 end - if neg; @inbounds a[1]=0x2d; end + neg && (@inbounds a[1] = 0x2d) # UInt8('-') String(a) end # 2-digit decimal characters ("00":"99") -const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] +const _dec_d100 = UInt16[ +# generating expression: UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] +# 0 0, 0 1, 0 2, 0 3, and so on in little-endian + 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930, + 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931, + 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932, + 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933, + 0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934, + 0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935, + 0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936, + 0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937, + 0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938, + 0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939 +] -function dec(x::Unsigned, pad::Int, neg::Bool) - n = neg + ndigits(x, pad=pad) - a = StringVector(n) - i = n - @inbounds while i >= 2 - d, r = divrem(x, 0x64) - d100 = _dec_d100[(r % Int)::Int + 1] - a[i-1] = d100 % UInt8 - a[i] = (d100 >> 0x8) % UInt8 - x = oftype(x, d) +function append_c_digits(olength::Int, digits::Unsigned, buf, pos::Int) + i = olength + while i >= 2 + d, c = divrem(digits, 0x64) + digits = oftype(digits, d) + @inbounds d100 = _dec_d100[(c % Int) + 1] + @inbounds buf[pos + i - 2] = d100 % UInt8 + @inbounds buf[pos + i - 1] = (d100 >> 0x8) % UInt8 i -= 2 end - if i > neg - @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8 + if i == 1 + @inbounds buf[pos] = UInt8('0') + rem(digits, 0xa) % UInt8 + i -= 1 end - if neg; @inbounds a[1]=0x2d; end + return pos + olength +end + +function append_nine_digits(digits::Unsigned, buf, pos::Int) + if digits == 0 + for _ = 1:9 + @inbounds buf[pos] = UInt8('0') + pos += 1 + end + return pos + end + return @inline append_c_digits(9, digits, buf, pos) # force loop-unrolling on the length +end + +function append_c_digits_fast(olength::Int, digits::Unsigned, buf, pos::Int) + i = olength + # n.b. olength may be larger than required to print all of `digits` (and will be padded + # with zeros), but the printed number will be undefined if it is smaller, and may include + # bits of both the high and low bytes. + maxpow10 = 0x3b9aca00 # 10e9 as UInt32 + while i > 9 && digits > typemax(UInt) + # do everything in cheap math chunks, using the processor's native math size + d, c = divrem(digits, maxpow10) + digits = oftype(digits, d) + append_nine_digits(c % UInt32, buf, pos + i - 9) + i -= 9 + end + append_c_digits(i, digits % UInt, buf, pos) + return pos + olength +end + + +function dec(x::Unsigned, pad::Int, neg::Bool) + n = neg + ndigits(x, pad=pad) + a = StringVector(n) + append_c_digits_fast(n, x, a, 1) + neg && (@inbounds a[1] = 0x2d) # UInt8('-') String(a) end @@ -781,7 +829,7 @@ function hex(x::Unsigned, pad::Int, neg::Bool) d = (x % UInt8)::UInt8 & 0xf @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30) end - if neg; @inbounds a[1]=0x2d; end + neg && (@inbounds a[1] = 0x2d) # UInt8('-') String(a) end @@ -806,7 +854,7 @@ function _base(base::Integer, x::Integer, pad::Int, neg::Bool) end i -= 1 end - if neg; @inbounds a[1]=0x2d; end + neg && (@inbounds a[1] = 0x2d) # UInt8('-') String(a) end diff --git a/base/ryu/exp.jl b/base/ryu/exp.jl index 30291212d014d..4249a9ea1b519 100644 --- a/base/ryu/exp.jl +++ b/base/ryu/exp.jl @@ -8,33 +8,33 @@ function writeexp(buf, pos, v::T, # special cases if x == 0 - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 if precision > 0 && !trimtrailingzeros - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += 1 for _ = 1:precision - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 end elseif hash - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += 1 end - buf[pos] = expchar - buf[pos + 1] = UInt8('+') - buf[pos + 2] = UInt8('0') - buf[pos + 3] = UInt8('0') + @inbounds buf[pos] = expchar + @inbounds buf[pos + 1] = UInt8('+') + @inbounds buf[pos + 2] = UInt8('0') + @inbounds buf[pos + 3] = UInt8('0') return pos + 4 elseif isnan(x) - buf[pos] = UInt8('N') - buf[pos + 1] = UInt8('a') - buf[pos + 2] = UInt8('N') + @inbounds buf[pos] = UInt8('N') + @inbounds buf[pos + 1] = UInt8('a') + @inbounds buf[pos + 2] = UInt8('N') return pos + 3 elseif !isfinite(x) - buf[pos] = UInt8('I') - buf[pos + 1] = UInt8('n') - buf[pos + 2] = UInt8('f') + @inbounds buf[pos] = UInt8('I') + @inbounds buf[pos + 1] = UInt8('n') + @inbounds buf[pos + 2] = UInt8('f') return pos + 3 end @@ -80,10 +80,10 @@ function writeexp(buf, pos, v::T, if precision > 1 pos = append_d_digits(availableDigits, digits, buf, pos, decchar) else - buf[pos] = UInt8('0') + digits + @inbounds buf[pos] = UInt8('0') + digits pos += 1 if hash - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += 1 end end @@ -121,10 +121,10 @@ function writeexp(buf, pos, v::T, if precision > 1 pos = append_d_digits(availableDigits, digits, buf, pos, decchar) else - buf[pos] = UInt8('0') + digits + @inbounds buf[pos] = UInt8('0') + digits pos += 1 if hash - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += 1 end end @@ -162,7 +162,7 @@ function writeexp(buf, pos, v::T, if printedDigits != 0 if digits == 0 for _ = 1:maximum - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 end else @@ -172,10 +172,10 @@ function writeexp(buf, pos, v::T, if precision > 1 pos = append_d_digits(maximum, digits, buf, pos, decchar) else - buf[pos] = UInt8('0') + digits + @inbounds buf[pos] = UInt8('0') + digits pos += 1 if hash - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += 1 end end @@ -184,52 +184,56 @@ function writeexp(buf, pos, v::T, roundPos = pos while true roundPos -= 1 - if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' ')) - buf[roundPos + 1] = UInt8('1') + if roundPos == (startpos - 1) || (@inbounds buf[roundPos]) == UInt8('-') || (plus && (@inbounds buf[roundPos]) == UInt8('+')) || (space && (@inbounds buf[roundPos]) == UInt8(' ')) + @inbounds buf[roundPos + 1] = UInt8('1') e += 1 break end - c = roundPos > 0 ? buf[roundPos] : 0x00 + c = roundPos > 0 ? (@inbounds buf[roundPos]) : 0x00 if c == decchar continue elseif c == UInt8('9') - buf[roundPos] = UInt8('0') + @inbounds buf[roundPos] = UInt8('0') roundUp = 1 continue else if roundUp == 2 && UInt8(c) % 2 == 0 break end - buf[roundPos] = c + 1 + @inbounds buf[roundPos] = c + 1 break end end end if trimtrailingzeros - while buf[pos - 1] == UInt8('0') + while @inbounds buf[pos - 1] == UInt8('0') pos -= 1 end - if buf[pos - 1] == decchar && !hash + if @inbounds buf[pos - 1] == decchar && !hash pos -= 1 end end buf[pos] = expchar pos += 1 if e < 0 - buf[pos] = UInt8('-') + @inbounds buf[pos] = UInt8('-') pos += 1 e = -e else - buf[pos] = UInt8('+') + @inbounds buf[pos] = UInt8('+') pos += 1 end if e >= 100 c = e % 10 - unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * div(e, 10) + 1, 2) - buf[pos + 2] = UInt8('0') + c + @inbounds d100 = DIGIT_TABLE16[div(e, 10) + 1] + @inbounds buf[pos] = d100 % UInt8 + @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8 + @inbounds buf[pos + 2] = UInt8('0') + c pos += 3 else - unsafe_copyto!(buf, pos, DIGIT_TABLE, 2 * e + 1, 2) + @inbounds d100 = DIGIT_TABLE16[e + 1] + @inbounds buf[pos] = d100 % UInt8 + @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8 pos += 2 end return pos diff --git a/base/ryu/fixed.jl b/base/ryu/fixed.jl index e0085f5c66dab..969dd70665a7e 100644 --- a/base/ryu/fixed.jl +++ b/base/ryu/fixed.jl @@ -59,7 +59,7 @@ function writefixed(buf, pos, v::T, pos = append_nine_digits(digits, buf, pos) elseif digits != 0 olength = decimallength(digits) - pos = append_n_digits(olength, digits, buf, pos) + pos = append_c_digits(olength, digits, buf, pos) nonzero = true end i -= 1 diff --git a/base/ryu/shortest.jl b/base/ryu/shortest.jl index aaa62ba33c703..32aa993467e7a 100644 --- a/base/ryu/shortest.jl +++ b/base/ryu/shortest.jl @@ -232,79 +232,79 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, # special cases if x == 0 if typed && x isa Float16 - buf[pos] = UInt8('F') - buf[pos + 1] = UInt8('l') - buf[pos + 2] = UInt8('o') - buf[pos + 3] = UInt8('a') - buf[pos + 4] = UInt8('t') - buf[pos + 5] = UInt8('1') - buf[pos + 6] = UInt8('6') - buf[pos + 7] = UInt8('(') + @inbounds buf[pos] = UInt8('F') + @inbounds buf[pos + 1] = UInt8('l') + @inbounds buf[pos + 2] = UInt8('o') + @inbounds buf[pos + 3] = UInt8('a') + @inbounds buf[pos + 4] = UInt8('t') + @inbounds buf[pos + 5] = UInt8('1') + @inbounds buf[pos + 6] = UInt8('6') + @inbounds buf[pos + 7] = UInt8('(') pos += 8 end pos = append_sign(x, plus, space, buf, pos) - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 if hash - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += 1 end if precision == -1 - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 if typed && x isa Float32 - buf[pos] = UInt8('f') - buf[pos + 1] = UInt8('0') + @inbounds buf[pos] = UInt8('f') + @inbounds buf[pos + 1] = UInt8('0') pos += 2 end if typed && x isa Float16 - buf[pos] = UInt8(')') + @inbounds buf[pos] = UInt8(')') pos += 1 end return pos end while hash && precision > 1 - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 precision -= 1 end if typed && x isa Float32 - buf[pos] = UInt8('f') - buf[pos + 1] = UInt8('0') + @inbounds buf[pos] = UInt8('f') + @inbounds buf[pos + 1] = UInt8('0') pos += 2 end if typed && x isa Float16 - buf[pos] = UInt8(')') + @inbounds buf[pos] = UInt8(')') pos += 1 end return pos elseif isnan(x) pos = append_sign(x, plus, space, buf, pos) - buf[pos] = UInt8('N') - buf[pos + 1] = UInt8('a') - buf[pos + 2] = UInt8('N') + @inbounds buf[pos] = UInt8('N') + @inbounds buf[pos + 1] = UInt8('a') + @inbounds buf[pos + 2] = UInt8('N') if typed if x isa Float32 - buf[pos + 3] = UInt8('3') - buf[pos + 4] = UInt8('2') + @inbounds buf[pos + 3] = UInt8('3') + @inbounds buf[pos + 4] = UInt8('2') elseif x isa Float16 - buf[pos + 3] = UInt8('1') - buf[pos + 4] = UInt8('6') + @inbounds buf[pos + 3] = UInt8('1') + @inbounds buf[pos + 4] = UInt8('6') end end return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0) elseif !isfinite(x) pos = append_sign(x, plus, space, buf, pos) - buf[pos] = UInt8('I') - buf[pos + 1] = UInt8('n') - buf[pos + 2] = UInt8('f') + @inbounds buf[pos] = UInt8('I') + @inbounds buf[pos + 1] = UInt8('n') + @inbounds buf[pos + 2] = UInt8('f') if typed if x isa Float32 - buf[pos + 3] = UInt8('3') - buf[pos + 4] = UInt8('2') + @inbounds buf[pos + 3] = UInt8('3') + @inbounds buf[pos + 4] = UInt8('2') elseif x isa Float16 - buf[pos + 3] = UInt8('1') - buf[pos + 4] = UInt8('6') + @inbounds buf[pos + 3] = UInt8('1') + @inbounds buf[pos + 4] = UInt8('6') end end return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0) @@ -313,14 +313,14 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, output, nexp = reduce_shortest(x, compact ? 999_999 : nothing) if typed && x isa Float16 - buf[pos] = UInt8('F') - buf[pos + 1] = UInt8('l') - buf[pos + 2] = UInt8('o') - buf[pos + 3] = UInt8('a') - buf[pos + 4] = UInt8('t') - buf[pos + 5] = UInt8('1') - buf[pos + 6] = UInt8('6') - buf[pos + 7] = UInt8('(') + @inbounds buf[pos] = UInt8('F') + @inbounds buf[pos + 1] = UInt8('l') + @inbounds buf[pos + 2] = UInt8('o') + @inbounds buf[pos + 3] = UInt8('a') + @inbounds buf[pos + 4] = UInt8('t') + @inbounds buf[pos + 5] = UInt8('1') + @inbounds buf[pos + 6] = UInt8('6') + @inbounds buf[pos + 7] = UInt8('(') pos += 8 end pos = append_sign(x, plus, space, buf, pos) @@ -332,161 +332,122 @@ function writeshortest(buf::Vector{UInt8}, pos, x::T, !(pt >= olength && abs(mod(x + 0.05, 10^(pt - olength)) - 0.05) > 0.05) exp_form = false if pt <= 0 - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += 1 for _ = 1:abs(pt) - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 end - # elseif pt >= olength + # elseif pt >= olength # nothing to do at this point - # else + # else # nothing to do at this point end else + # make space for decchar pos += 1 end - i = 0 - ptr = pointer(buf) - ptr2 = pointer(DIGIT_TABLE) - if (output >> 32) != 0 - q = output ÷ 100000000 - output2 = (output % UInt32) - UInt32(100000000) * (q % UInt32) - output = q - c = output2 % UInt32(10000) - output2 = div(output2, UInt32(10000)) - d = output2 % UInt32(10000) - c0 = (c % 100) << 1 - c1 = (c ÷ 100) << 1 - d0 = (d % 100) << 1 - d1 = (d ÷ 100) << 1 - memcpy(ptr + pos + olength - 3, ptr2 + c0, 2) - memcpy(ptr + pos + olength - 5, ptr2 + c1, 2) - memcpy(ptr + pos + olength - 7, ptr2 + d0, 2) - memcpy(ptr + pos + olength - 9, ptr2 + d1, 2) - i += 8 - end - output2 = output % UInt32 - while output2 >= 10000 - c = output2 % UInt32(10000) - output2 = div(output2, UInt32(10000)) - c0 = (c % 100) << 1 - c1 = (c ÷ 100) << 1 - memcpy(ptr + pos + olength - i - 3, ptr2 + c0, 2) - memcpy(ptr + pos + olength - i - 5, ptr2 + c1, 2) - i += 4 - end - if output2 >= 100 - c = (output2 % UInt32(100)) << 1 - output2 = div(output2, UInt32(100)) - memcpy(ptr + pos + olength - i - 3, ptr2 + c, 2) - i += 2 - end - if output2 >= 10 - c = output2 << 1 - buf[pos + 1] = DIGIT_TABLE[c + 2] - buf[pos - exp_form] = DIGIT_TABLE[c + 1] - else - buf[pos - exp_form] = UInt8('0') + (output2 % UInt8) - end + append_c_digits(olength, output, buf, pos) if !exp_form if pt <= 0 pos += olength precision -= olength - while hash && precision > 0 - buf[pos] = UInt8('0') - pos += 1 - precision -= 1 - end elseif pt >= olength pos += olength precision -= olength for _ = 1:nexp - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 precision -= 1 end if hash - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += 1 if precision < 0 - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 end - while precision > 0 - buf[pos] = UInt8('0') - pos += 1 - precision -= 1 - end end else pointoff = olength - abs(nexp) + # shift bytes after pointoff to make room for decchar + ptr = pointer(buf) memmove(ptr + pos + pointoff, ptr + pos + pointoff - 1, olength - pointoff + 1) - buf[pos + pointoff] = decchar + @inbounds buf[pos + pointoff] = decchar pos += olength + 1 precision -= olength - while hash && precision > 0 - buf[pos] = UInt8('0') + end + if hash + while precision > 0 + @inbounds buf[pos] = UInt8('0') pos += 1 precision -= 1 end end if typed && x isa Float32 - buf[pos] = UInt8('f') - buf[pos + 1] = UInt8('0') + @inbounds buf[pos] = UInt8('f') + @inbounds buf[pos + 1] = UInt8('0') pos += 2 end else + # move leading digit into place + @inbounds buf[pos - 1] = buf[pos] if olength > 1 || hash - buf[pos] = decchar + @inbounds buf[pos] = decchar pos += olength precision -= olength end - if hash && olength == 1 - buf[pos] = UInt8('0') - pos += 1 - end - while hash && precision > 0 - buf[pos] = UInt8('0') - pos += 1 - precision -= 1 + if hash + if olength == 1 + @inbounds buf[pos] = UInt8('0') + pos += 1 + end + while precision > 0 + @inbounds buf[pos] = UInt8('0') + pos += 1 + precision -= 1 + end end - buf[pos] = expchar + @inbounds buf[pos] = expchar pos += 1 exp2 = nexp + olength - 1 if exp2 < 0 - buf[pos] = UInt8('-') + @inbounds buf[pos] = UInt8('-') pos += 1 exp2 = -exp2 elseif padexp - buf[pos] = UInt8('+') + @inbounds buf[pos] = UInt8('+') pos += 1 end if exp2 >= 100 c = exp2 % 10 - memcpy(ptr + pos - 1, ptr2 + 2 * div(exp2, 10), 2) - buf[pos + 2] = UInt8('0') + (c % UInt8) + @inbounds d100 = DIGIT_TABLE16[(div(exp2, 10) % Int) + 1] + @inbounds buf[pos] = d100 % UInt8 + @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8 + @inbounds buf[pos + 2] = UInt8('0') + (c % UInt8) pos += 3 elseif exp2 >= 10 - memcpy(ptr + pos - 1, ptr2 + 2 * exp2, 2) + @inbounds d100 = DIGIT_TABLE16[(exp2 % Int) + 1] + @inbounds buf[pos] = d100 % UInt8 + @inbounds buf[pos + 1] = (d100 >> 0x8) % UInt8 pos += 2 else if padexp - buf[pos] = UInt8('0') + @inbounds buf[pos] = UInt8('0') pos += 1 end - buf[pos] = UInt8('0') + (exp2 % UInt8) + @inbounds buf[pos] = UInt8('0') + (exp2 % UInt8) pos += 1 end end if typed && x isa Float16 - buf[pos] = UInt8(')') + @inbounds buf[pos] = UInt8(')') pos += 1 end diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl index f5a88c057e2b3..2064dfbefcecd 100644 --- a/base/ryu/utils.jl +++ b/base/ryu/utils.jl @@ -134,7 +134,7 @@ end Compute `p = a*b` where `b = bLo + bHi<<64`, returning the result as `pLo, pHi` where `p = pLo + pHi<<128`. """ -function umul256(a, bHi, bLo) +function umul256(a::UInt128, bHi::UInt64, bLo::UInt64) aLo = a % UInt64 aHi = (a >> 64) % UInt64 @@ -164,7 +164,7 @@ end Compute `pHi = (a*b)>>128` where `b = bLo + bHi<<64`. """ -umul256_hi(a, bHi, bLo) = umul256(a, bHi, bLo)[2] +umul256_hi(a::UInt128, bHi::UInt64, bLo::UInt64) = umul256(a, bHi, bLo)[2] """ Ryu.mulshiftmod1e9(m, mula, mulb, mulc, j)::UInt32 @@ -183,7 +183,7 @@ function mulshiftmod1e9(m, mula, mulb, mulc, j) return (v % UInt32) - UInt32(1000000000) * shifted end -function append_sign(x, plus, space, buf, pos) +function append_sign(x, plus::Bool, space::Bool, buf, pos::Int) if signbit(x) && !isnan(x) # suppress minus sign for signaling NaNs buf[pos] = UInt8('-') pos += 1 @@ -197,101 +197,14 @@ function append_sign(x, plus, space, buf, pos) return pos end -function append_n_digits(olength, digits, buf, pos) - i = 0 - while digits >= 10000 - c = digits % 10000 - digits = div(digits, 10000) - c0 = (c % 100) << 1 - c1 = div(c, 100) << 1 - unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c0 + 1, 2) - unsafe_copyto!(buf, pos + olength - i - 4, DIGIT_TABLE, c1 + 1, 2) - i += 4 - end - if digits >= 100 - c = (digits % 100) << 1 - digits = div(digits, 100) - unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c + 1, 2) - i += 2 - end - if digits >= 10 - c = digits << 1 - unsafe_copyto!(buf, pos + olength - i - 2, DIGIT_TABLE, c + 1, 2) - i += 2 - else - buf[pos] = UInt8('0') + digits - i += 1 - end - return pos + i -end - -function append_d_digits(olength, digits, buf, pos, decchar) - i = 0 - while digits >= 10000 - c = digits % 10000 - digits = div(digits, 10000) - c0 = (c % 100) << 1 - c1 = div(c, 100) << 1 - unsafe_copyto!(buf, pos + olength + 1 - i - 2, DIGIT_TABLE, c0 + 1, 2) - unsafe_copyto!(buf, pos + olength + 1 - i - 4, DIGIT_TABLE, c1 + 1, 2) - i += 4 - end - if digits >= 100 - c = (digits % 100) << 1 - digits = div(digits, 100) - unsafe_copyto!(buf, pos + olength + 1 - i - 2, DIGIT_TABLE, c + 1, 2) - i += 2 - end - if digits >= 10 - c = digits << 1 - buf[pos] = DIGIT_TABLE[c + 1] - buf[pos + 1] = decchar - buf[pos + 2] = DIGIT_TABLE[c + 2] - i += 3 - else - buf[pos] = UInt8('0') + digits - buf[pos + 1] = decchar - i += 2 - end - return pos + i -end -function append_c_digits(count, digits, buf, pos) - i = 0 - while i < count - 1 - c = (digits % 100) << 1 - digits = div(digits, 100) - unsafe_copyto!(buf, pos + count - i - 2, DIGIT_TABLE, c + 1, 2) - i += 2 - end - if i < count - buf[pos + count - i - 1] = UInt8('0') + (digits % 10) - i += 1 - end - return pos + i -end +import Base: append_c_digits_fast as append_c_digits, append_nine_digits -function append_nine_digits(digits, buf, pos) - if digits == 0 - for _ = 1:9 - buf[pos] = UInt8('0') - pos += 1 - end - return pos - end - i = 0 - while i < 5 - c = digits % 10000 - digits = div(digits, 10000) - c0 = (c % 100) << 1 - c1 = div(c, 100) << 1 - unsafe_copyto!(buf, pos + 7 - i, DIGIT_TABLE, c0 + 1, 2) - unsafe_copyto!(buf, pos + 5 - i, DIGIT_TABLE, c1 + 1, 2) - i += 4 - end - buf[pos] = UInt8('0') + digits - i += 1 - return pos + i +function append_d_digits(olength::Int, digits::Unsigned, buf, pos::Int, decchar) + newpos = append_c_digits(olength, digits, buf, pos + 1) + @inbounds buf[pos] = buf[pos + 1] + @inbounds buf[pos + 1] = decchar + return newpos # == pos + olength + 1 end const BIG_MASK = (big(1) << 64) - 1 @@ -390,18 +303,7 @@ for T in (Float64, Float32, Float16) @eval pow5split_lookup(::Type{$T}, i) = @inbounds($table_sym[i+1]) end -const DIGIT_TABLE = UInt8[ - '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9', - '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9', - '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9', - '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9', - '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9', - '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9', - '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9', - '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9', - '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9', - '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9' -] +const DIGIT_TABLE16 = Base._dec_d100 const POW10_OFFSET = UInt16[ 0, 2, 5, 8, 12, 16, 21, 26, 32, 39,