@@ -9,70 +9,140 @@ use super::Float;
99/// These are hand-optimized bit twiddling code,
1010/// which unfortunately isn't the easiest kind of code to read.
1111///
12- /// The algorithm is explained here: <https://blog.m-ou.se/floats/>
12+ /// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
13+ /// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros)
14+ /// - Calculate a base mantissa by shifting the integer into mantissa position
15+ /// - Figure out if rounding needs to occour by classifying truncated bits. Some patterns apply
16+ /// here, so they may be "squashed" into smaller numbers to spmplifiy the classification.
1317mod int_to_float {
18+ use super :: * ;
19+
20+ /// Calculate the exponent from the number of leading zeros.
21+ fn exp < I : Int , F : Float < Int : CastFrom < u32 > > > ( n : u32 ) -> F :: Int {
22+ F :: Int :: cast_from ( I :: BITS + F :: EXPONENT_BIAS - 2 - n)
23+ }
24+
25+ /// Shift the integer into the float's mantissa bits. Keep the lowest exponent bit intact.
26+ fn m_base < I : Int , F : Float < Int : CastFrom < I > > > ( i_m : I ) -> F :: Int {
27+ F :: Int :: cast_from ( i_m >> ( ( I :: BITS - F :: BITS ) + F :: EXPONENT_BITS ) )
28+ }
29+
30+ /// Calculate the mantissa in cases where the float size is greater than integer size
31+ fn m_f_gt_i < I : Int , F : Float < Int : CastFrom < I > > > ( i : I , n : u32 ) -> F :: Int {
32+ F :: Int :: cast_from ( i) << ( F :: SIGNIFICAND_BITS - I :: BITS + 1 + n)
33+ }
34+
35+ /// Calculate the mantissa and a dropped bit adjustment when `f` and `i` are equal sizes
36+ fn m_f_eq_i < I : Int + CastInto < F :: Int > , F : Float < Int = I > > ( i : I , n : u32 ) -> ( F :: Int , F :: Int ) {
37+ let base = ( i << n) >> F :: EXPONENT_BITS ;
38+
39+ // Only the lowest `F::EXPONENT_BITS` bits will be truncated. Shift them
40+ // to the highest position
41+ let adj = ( i << n) << ( F :: SIGNIFICAND_BITS + 1 ) ;
42+
43+ ( base, adj)
44+ }
45+
46+ /// Adjust a mantissa with dropped bits
47+ fn m_adj < F : Float > ( m_base : F :: Int , dropped_bits : F :: Int ) -> F :: Int {
48+ // Branchlessly extract a `1` if rounding up should happen
49+ let adj = ( dropped_bits - ( dropped_bits >> ( F :: BITS - 1 ) & !m_base) ) >> ( F :: BITS - 1 ) ;
50+
51+ // Add one when we need to round up. Break ties to even.
52+ m_base + adj
53+ }
54+
55+ /// Combine a final float repr from an exponent and mantissa.
56+ fn repr < F : Float > ( e : F :: Int , m : F :: Int ) -> F :: Int {
57+ // + rather than | so the mantissa can overflow into the exponent
58+ ( e << F :: SIGNIFICAND_BITS ) + m
59+ }
60+
61+ /// Perform a signed operation as unsigned, then add the sign back
62+ pub fn signed < I , F , Conv > ( i : I , conv : Conv ) -> F
63+ where
64+ F : Float ,
65+ I : Int ,
66+ F :: Int : CastFrom < I > ,
67+ Conv : Fn ( I :: UnsignedInt ) -> F :: Int ,
68+ {
69+ let sign_bit = F :: Int :: cast_from ( i >> ( I :: BITS - 1 ) ) << ( F :: BITS - 1 ) ;
70+ F :: from_repr ( conv ( i. unsigned_abs ( ) ) | sign_bit)
71+ }
72+
1473 pub fn u32_to_f32_bits ( i : u32 ) -> u32 {
1574 if i == 0 {
1675 return 0 ;
1776 }
1877 let n = i. leading_zeros ( ) ;
19- let a = ( i << n) >> 8 ; // Significant bits, with bit 24 still in tact.
20- let b = ( i << n) << 24 ; // Insignificant bits, only relevant for rounding.
21- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
22- let e = 157 - n; // Exponent plus 127, minus one.
23- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
78+ let ( m_base, adj) = m_f_eq_i :: < u32 , f32 > ( i, n) ;
79+ let m = m_adj :: < f32 > ( m_base, adj) ;
80+ let e = exp :: < u32 , f32 > ( n) ;
81+ repr :: < f32 > ( e, m)
2482 }
2583
2684 pub fn u32_to_f64_bits ( i : u32 ) -> u64 {
2785 if i == 0 {
2886 return 0 ;
2987 }
3088 let n = i. leading_zeros ( ) ;
31- let m = ( i as u64 ) << ( 21 + n) ; // Significant bits, with bit 53 still in tact.
32- let e = 1053 - n as u64 ; // Exponent plus 1023, minus one.
33- ( e << 52 ) + m // Bit 53 of m will overflow into e.
89+ let m = m_f_gt_i :: < _ , f64 > ( i , n) ;
90+ let e = exp :: < u32 , f64 > ( n ) ;
91+ repr :: < f64 > ( e , m )
3492 }
3593
3694 pub fn u64_to_f32_bits ( i : u64 ) -> u32 {
3795 let n = i. leading_zeros ( ) ;
38- let y = i. wrapping_shl ( n) ;
39- let a = ( y >> 40 ) as u32 ; // Significant bits, with bit 24 still in tact.
40- let b = ( y >> 8 | y & 0xFFFF ) as u32 ; // Insignificant bits, only relevant for rounding.
41- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
42- let e = if i == 0 { 0 } else { 189 - n } ; // Exponent plus 127, minus one, except for zero.
43- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
96+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
97+ let m_base = m_base :: < _ , f32 > ( i_m) ;
98+ // The entire lower half of `i` will be truncated (masked portion), plus the
99+ // next `EXPONENT_BITS` bits.
100+ let adj = ( i_m >> f32:: EXPONENT_BITS | i_m & 0xFFFF ) as u32 ;
101+ let m = m_adj :: < f32 > ( m_base, adj) ;
102+ let e = if i == 0 { 0 } else { exp :: < u64 , f32 > ( n) } ;
103+ repr :: < f32 > ( e, m)
44104 }
45105
46106 pub fn u64_to_f64_bits ( i : u64 ) -> u64 {
47107 if i == 0 {
48108 return 0 ;
49109 }
50110 let n = i. leading_zeros ( ) ;
51- let a = ( i << n) >> 11 ; // Significant bits, with bit 53 still in tact.
52- let b = ( i << n) << 53 ; // Insignificant bits, only relevant for rounding.
53- let m = a + ( ( b - ( b >> 63 & !a) ) >> 63 ) ; // Add one when we need to round up. Break ties to even.
54- let e = 1085 - n as u64 ; // Exponent plus 1023, minus one.
55- ( e << 52 ) + m // + not |, so the mantissa can overflow into the exponent.
111+ let ( m_base, adj) = m_f_eq_i :: < u64 , f64 > ( i, n) ;
112+ let m = m_adj :: < f64 > ( m_base, adj) ;
113+ let e = exp :: < u64 , f64 > ( n) ;
114+ repr :: < f64 > ( e, m)
56115 }
57116
58117 pub fn u128_to_f32_bits ( i : u128 ) -> u32 {
59118 let n = i. leading_zeros ( ) ;
60- let y = i. wrapping_shl ( n) ;
61- let a = ( y >> 104 ) as u32 ; // Significant bits, with bit 24 still in tact.
62- let b = ( y >> 72 ) as u32 | ( ( y << 32 ) >> 32 != 0 ) as u32 ; // Insignificant bits, only relevant for rounding.
63- let m = a + ( ( b - ( b >> 31 & !a) ) >> 31 ) ; // Add one when we need to round up. Break ties to even.
64- let e = if i == 0 { 0 } else { 253 - n } ; // Exponent plus 127, minus one, except for zero.
65- ( e << 23 ) + m // + not |, so the mantissa can overflow into the exponent.
119+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
120+ let m_base = m_base :: < _ , f32 > ( i_m) ;
121+
122+ // Within the upper `F::BITS`, everything except for the signifcand
123+ // gets truncated
124+ let d1: u32 = ( i_m >> ( u128:: BITS - f32:: BITS - f32:: SIGNIFICAND_BITS - 1 ) ) . cast ( ) ;
125+
126+ // The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
127+ // check if it is nonzero.
128+ let d2: u32 = ( i_m << f32:: BITS >> f32:: BITS != 0 ) . into ( ) ;
129+ let adj = d1 | d2;
130+
131+ let m = m_adj :: < f32 > ( m_base, adj) ;
132+ let e = if i == 0 { 0 } else { exp :: < u128 , f32 > ( n) } ;
133+ repr :: < f32 > ( e, m)
66134 }
67135
68136 pub fn u128_to_f64_bits ( i : u128 ) -> u64 {
69137 let n = i. leading_zeros ( ) ;
70- let y = i. wrapping_shl ( n) ;
71- let a = ( y >> 75 ) as u64 ; // Significant bits, with bit 53 still in tact.
72- let b = ( y >> 11 | y & 0xFFFF_FFFF ) as u64 ; // Insignificant bits, only relevant for rounding.
73- let m = a + ( ( b - ( b >> 63 & !a) ) >> 63 ) ; // Add one when we need to round up. Break ties to even.
74- let e = if i == 0 { 0 } else { 1149 - n as u64 } ; // Exponent plus 1023, minus one, except for zero.
75- ( e << 52 ) + m // + not |, so the mantissa can overflow into the exponent.
138+ let i_m = i. wrapping_shl ( n) ; // Mantissa, shifted so the first bit is nonzero
139+ let m_base = m_base :: < _ , f64 > ( i_m) ;
140+ // The entire lower half of `i` will be truncated (masked portion), plus the
141+ // next `EXPONENT_BITS` bits.
142+ let adj = ( i_m >> f64:: EXPONENT_BITS | i_m & 0xFFFF_FFFF ) as u64 ;
143+ let m = m_adj :: < f64 > ( m_base, adj) ;
144+ let e = if i == 0 { 0 } else { exp :: < u128 , f64 > ( n) } ;
145+ repr :: < f64 > ( e, m)
76146 }
77147}
78148
@@ -113,38 +183,32 @@ intrinsics! {
113183intrinsics ! {
114184 #[ arm_aeabi_alias = __aeabi_i2f]
115185 pub extern "C" fn __floatsisf( i: i32 ) -> f32 {
116- let sign_bit = ( ( i >> 31 ) as u32 ) << 31 ;
117- f32 :: from_bits( int_to_float:: u32_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
186+ int_to_float:: signed( i, int_to_float:: u32_to_f32_bits)
118187 }
119188
120189 #[ arm_aeabi_alias = __aeabi_i2d]
121190 pub extern "C" fn __floatsidf( i: i32 ) -> f64 {
122- let sign_bit = ( ( i >> 31 ) as u64 ) << 63 ;
123- f64 :: from_bits( int_to_float:: u32_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
191+ int_to_float:: signed( i, int_to_float:: u32_to_f64_bits)
124192 }
125193
126194 #[ arm_aeabi_alias = __aeabi_l2f]
127195 pub extern "C" fn __floatdisf( i: i64 ) -> f32 {
128- let sign_bit = ( ( i >> 63 ) as u32 ) << 31 ;
129- f32 :: from_bits( int_to_float:: u64_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
196+ int_to_float:: signed( i, int_to_float:: u64_to_f32_bits)
130197 }
131198
132199 #[ arm_aeabi_alias = __aeabi_l2d]
133200 pub extern "C" fn __floatdidf( i: i64 ) -> f64 {
134- let sign_bit = ( ( i >> 63 ) as u64 ) << 63 ;
135- f64 :: from_bits( int_to_float:: u64_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
201+ int_to_float:: signed( i, int_to_float:: u64_to_f64_bits)
136202 }
137203
138204 #[ cfg_attr( target_os = "uefi" , unadjusted_on_win64) ]
139205 pub extern "C" fn __floattisf( i: i128 ) -> f32 {
140- let sign_bit = ( ( i >> 127 ) as u32 ) << 31 ;
141- f32 :: from_bits( int_to_float:: u128_to_f32_bits( i. unsigned_abs( ) ) | sign_bit)
206+ int_to_float:: signed( i, int_to_float:: u128_to_f32_bits)
142207 }
143208
144209 #[ cfg_attr( target_os = "uefi" , unadjusted_on_win64) ]
145210 pub extern "C" fn __floattidf( i: i128 ) -> f64 {
146- let sign_bit = ( ( i >> 127 ) as u64 ) << 63 ;
147- f64 :: from_bits( int_to_float:: u128_to_f64_bits( i. unsigned_abs( ) ) | sign_bit)
211+ int_to_float:: signed( i, int_to_float:: u128_to_f64_bits)
148212 }
149213}
150214
0 commit comments