Skip to content

Commit

Permalink
Correctly rounded floating point div_euclid.
Browse files Browse the repository at this point in the history
  • Loading branch information
tczajka committed Dec 10, 2024
1 parent 33c245b commit 3deff84
Show file tree
Hide file tree
Showing 19 changed files with 1,367 additions and 65 deletions.
33 changes: 17 additions & 16 deletions library/std/src/f128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
//!
//! Mathematically significant numbers are provided in the `consts` sub-module.
mod div_euclid;
mod u256;

#[cfg(test)]
mod tests;

Expand Down Expand Up @@ -235,10 +238,14 @@ impl f128 {

/// Calculates Euclidean division, the matching method for `rem_euclid`.
///
/// This computes the integer `n` such that
/// In infinite precision this computes the integer `n` such that
/// `self = n * rhs + self.rem_euclid(rhs)`.
/// In other words, the result is `self / rhs` rounded to the integer `n`
/// such that `self >= n * rhs`.
/// In other words, the result is `self / rhs` rounded to the
/// integer `n` such that `self >= n * rhs`.
///
/// However, due to a floating point round-off error the result can be rounded if
/// `self` is much larger in magnitude than `rhs`, such that `n` can't be represented
/// exactly.
///
/// # Precision
///
Expand All @@ -264,29 +271,23 @@ impl f128 {
#[unstable(feature = "f128", issue = "116909")]
#[must_use = "method returns a new number and does not mutate the original value"]
pub fn div_euclid(self, rhs: f128) -> f128 {
let q = (self / rhs).trunc();
if self % rhs < 0.0 {
return if rhs > 0.0 { q - 1.0 } else { q + 1.0 };
}
q
div_euclid::div_euclid(self, rhs)
}

/// Calculates the least nonnegative remainder of `self (mod rhs)`.
///
/// In particular, the return value `r` satisfies `0.0 <= r < rhs.abs()` in
/// most cases. However, due to a floating point round-off error it can
/// result in `r == rhs.abs()`, violating the mathematical definition, if
/// `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
/// This result is not an element of the function's codomain, but it is the
/// closest floating point number in the real numbers and thus fulfills the
/// property `self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)`
/// approximately.
/// In infinite precision the return value `r` satisfies `0 <= r < rhs.abs()`.
///
/// However, due to a floating point round-off error the result can round to
/// `rhs.abs()` if `self` is much smaller than `rhs.abs()` in magnitude and `self < 0.0`.
///
/// # Precision
///
/// The result of this operation is guaranteed to be the rounded
/// infinite-precision result.
///
/// The result is precise when `self >= 0.0`.
///
/// # Examples
///
/// ```
Expand Down
204 changes: 204 additions & 0 deletions library/std/src/f128/div_euclid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#[cfg(test)]
mod tests;

use crate::f128::u256::U256;

/// Software implementation of `f128::div_euclid`.
#[allow(dead_code)]
pub(crate) fn div_euclid(a: f128, b: f128) -> f128 {
if let Some((a_neg, a_exp, a_m)) = normal_form(a)
&& let Some((b_neg, b_exp, b_m)) = normal_form(b)
{
let exp = a_exp - b_exp;
match (a_neg, b_neg) {
(false, false) => div_floor(exp, a_m, b_m),
(true, false) => -div_ceil(exp, a_m, b_m),
(false, true) => -div_floor(exp, a_m, b_m),
(true, true) => div_ceil(exp, a_m, b_m),
}
} else {
// `a` or `b` are +-0.0 or infinity or NaN.
// `a / b` is also +-0.0 or infinity or NaN.
// There is no need to round to an integer.
a / b
}
}

/// Returns `floor((a << exp) / b)`.
///
/// Requires `2^112 <= a, b < 2^113`.
fn div_floor(exp: i32, a: u128, b: u128) -> f128 {
if exp < 0 {
0.0
} else if exp <= 15 {
// aa < (2^113 << 15) = 2^128
let aa = a << exp;
// q < 2^128 / 2^112 = 2^16
let q = (aa / b) as u32;
// We have to use `as` because `From<u32> for f128` is not yet implemented.
q as f128
} else if exp <= 127 {
// aa = a << exp
// aa < (2^113 << 127) = 2^240
let aa = U256::shl_u128(a, exp as u32);
// q < 2^240 / 2^112 = 2^128
let (q, _) = aa.div_rem(b);
q as f128
} else {
// aa >= (2^112 << 127) = 2^239
// aa < (2^113 << 127) = 2^240
let aa = U256::shl_u128(a, 127);
// e > 0
// The result is floor((aa << e) / b).
let e = (exp - 127) as u32;

// aa = q * b + r
// q >= 2^239 / 2^113 = 2^126
// q < 2^239 / 2^112 = 2^128
// 0 <= r < b
let (q, r) = aa.div_rem(b);

// result = floor((aa << e) / b) = (q << e) + floor((r << e) / b)
// 0 <= (r << e) / b < 2^e
//
// There are two cases:
// 1. floor((r << e) / b) = 0
// 2. 0 < floor((r << e) / b) < 2^e
//
// In case 1:
// The result is q << e.
//
// In case 2:
// The result is (q << e) + non-zero low e bits.
// This rounds the same way as (q | 1) << e because rounding beyond
// the 25 most significant bits of q depends only on whether the low-order
// bits are non-zero.
//
// Case 1 happens when:
// (r << e) / b < 1
// (r << e) <= b - 1
// r <= ((b - 1) >> e)
let case_1_bound = if e < 128 { (b - 1) >> e } else { 0 };
let q_adj = if r <= case_1_bound {
// Case 1.
q
} else {
// Case 2.
q | 1
};
q_adj as f128 * pow2(e)
}
}

/// Returns `ceil((a << exp) / b)`.
///
/// Requires `2^112 <= a, b < 2^113`.
fn div_ceil(exp: i32, a: u128, b: u128) -> f128 {
if exp < 0 {
1.0
} else if exp <= 15 {
// aa < (2^113 << 15) = 2^128
let aa = a << exp;
// q < 2^128 / 2^112 + 1 = 2^16 + 1
let q = ((aa - 1) / b) as u32 + 1;
// We have to use `as` because `From<u32> for f128` is not yet implemented.
q as f128
} else if exp <= 127 {
// aa = a << exp
// aa <= ((2^113 - 1) << 127) = 2^240 - 2^127
let aa = U256::shl_u128(a, exp as u32);
// q <= (2^240 - 2^127) / 2^112 + 1 = 2^128 - 2^15 + 1
let (q, _) = (aa - U256::ONE).div_rem(b);
(q + 1) as f128
} else {
// aa >= (2^112 << 127) = 2^239
// aa <= ((2^113 - 1) << 127) = 2^240 - 2^127
let aa = U256::shl_u128(a, 127);
// e > 0
// The result is ceil((aa << e) / b).
let e = (exp - 127) as u32;

// aa = q * b + r
// q >= 2^239 / 2^112 = 2^126
// q <= (2^240 - 2^127) / 2^112 = 2^128 - 2^15
// 0 <= r < b
let (q, r) = aa.div_rem(b);

// result = ceil((aa << e) / b) = (q << e) + ceil((r << e) / b)
// 0 <= (r << e) / b < 2^e
//
// There are three cases:
// 1. ceil((r << e) / b) = 0
// 2. 0 < ceil((r << e) / b) < 2^e
// 3. ceil((r << e) / b) = 2^e
//
// In case 1:
// The result is q << e.
//
// In case 2:
// The result is (q << e) + non-zero low e bits.
// This rounds the same way as (q | 1) << e because rounding beyond
// the 54 most significant bits of q depends only on whether the low-order
// bits are non-zero.
//
// In case 3:
// The result is (q + 1) << e.
//
// Case 1 happens when r = 0.
// Case 3 happens when:
// (r << e) / b > (1 << e) - 1
// (r << e) > (b << e) - b
// ((b - r) << e) <= b - 1
// b - r <= (b - 1) >> e
// r >= b - ((b - 1) >> e)
let case_3_bound = b - if e < 128 { (b - 1) >> e } else { 0 };
let q_adj = if r == 0 {
// Case 1.
q
} else if r < case_3_bound {
// Case 2.
q | 1
} else {
// Case 3.
q + 1
};
q_adj as f128 * pow2(e)
}
}

/// For finite, non-zero numbers returns (sign, exponent, mantissa).
///
/// `x = (-1)^sign * 2^exp * mantissa`
///
/// `2^112 <= mantissa < 2^113`
fn normal_form(x: f128) -> Option<(bool, i32, u128)> {
let bits = x.to_bits();
let sign = bits >> 127 != 0;
let biased_exponent = (bits >> 112 & 0x7fff) as i32;
let significand = bits & ((1 << 112) - 1);
match biased_exponent {
0 if significand == 0 => {
// 0.0
None
}
0 => {
// Subnormal number: 2^(-16382-112) * significand.
// We want mantissa to have exactly 15 leading zeros.
let shift = significand.leading_zeros() - 15;
Some((sign, -16382 - 112 - shift as i32, significand << shift))
}
0x7fff => {
// Infinity or NaN.
None
}
_ => {
// Normal number: 2^(biased_exponent-16383-112) * (2^112 + significand)
Some((sign, biased_exponent - 16383 - 112, 1 << 112 | significand))
}
}
}

/// Returns `2^exp`.
fn pow2(exp: u32) -> f128 {
if exp <= 16383 { f128::from_bits(u128::from(exp + 16383) << 112) } else { f128::INFINITY }
}
23 changes: 23 additions & 0 deletions library/std/src/f128/div_euclid/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#![cfg(reliable_f128_math)]

#[test]
fn test_normal_form() {
use crate::f128::div_euclid::normal_form;

assert_eq!(normal_form(-1.5f128), Some((true, -112, 3 << 111)));
assert_eq!(normal_form(f128::MIN_POSITIVE), Some((false, -16494, 1 << 112)));
assert_eq!(normal_form(f128::MIN_POSITIVE / 2.0), Some((false, -16495, 1 << 112)));
assert_eq!(normal_form(f128::MAX), Some((false, 16271, (1 << 113) - 1)));
assert_eq!(normal_form(0.0), None);
assert_eq!(normal_form(f128::INFINITY), None);
assert_eq!(normal_form(f128::NAN), None);
}

#[test]
fn test_pow2() {
use crate::f128::div_euclid::pow2;

assert_eq!(pow2(0), 1.0f128);
assert_eq!(pow2(4), 16.0f128);
assert_eq!(pow2(16384), f128::INFINITY);
}
Loading

0 comments on commit 3deff84

Please sign in to comment.