Skip to content

Commit

Permalink
Rollup merge of #92425 - calebzulawski:simd-cast, r=workingjubilee
Browse files Browse the repository at this point in the history
Improve SIMD casts

* Allows `simd_cast` intrinsic to take `usize` and `isize`
* Adds `simd_as` intrinsic, which is the same as `simd_cast` except for saturating float-to-int conversions (matching the behavior of `as`).

cc `@workingjubilee`
  • Loading branch information
matthiaskrgr authored Jan 18, 2022
2 parents f372476 + 49d36d7 commit 7889f96
Show file tree
Hide file tree
Showing 8 changed files with 314 additions and 177 deletions.
59 changes: 41 additions & 18 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -731,27 +731,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
}

fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
if !self.fptoint_sat_broken_in_llvm() {
let src_ty = self.cx.val_ty(val);
let float_width = self.cx.float_width(src_ty);
let int_width = self.cx.int_width(dest_ty);
let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
return Some(self.call_intrinsic(&name, &[val]));
}

None
self.fptoint_sat(false, val, dest_ty)
}

fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
if !self.fptoint_sat_broken_in_llvm() {
let src_ty = self.cx.val_ty(val);
let float_width = self.cx.float_width(src_ty);
let int_width = self.cx.int_width(dest_ty);
let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
return Some(self.call_intrinsic(&name, &[val]));
}

None
self.fptoint_sat(true, val, dest_ty)
}

fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
Expand Down Expand Up @@ -1455,4 +1439,43 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
_ => false,
}
}

fn fptoint_sat(
&mut self,
signed: bool,
val: &'ll Value,
dest_ty: &'ll Type,
) -> Option<&'ll Value> {
if !self.fptoint_sat_broken_in_llvm() {
let src_ty = self.cx.val_ty(val);
let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector
{
assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
(
self.cx.element_type(src_ty),
self.cx.element_type(dest_ty),
Some(self.cx.vector_length(src_ty)),
)
} else {
(src_ty, dest_ty, None)
};
let float_width = self.cx.float_width(float_ty);
let int_width = self.cx.int_width(int_ty);

let instr = if signed { "fptosi" } else { "fptoui" };
let name = if let Some(vector_length) = vector_length {
format!(
"llvm.{}.sat.v{}i{}.v{}f{}",
instr, vector_length, int_width, vector_length, float_width
)
} else {
format!("llvm.{}.sat.i{}.f{}", instr, int_width, float_width)
};
let f =
self.declare_cfn(&name, llvm::UnnamedAddr::No, self.type_func(&[src_ty], dest_ty));
Some(self.call(self.type_func(&[src_ty], dest_ty), f, &[val], None))
} else {
None
}
}
}
30 changes: 21 additions & 9 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1688,7 +1688,7 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
bitwise_red!(simd_reduce_all: vector_reduce_and, true);
bitwise_red!(simd_reduce_any: vector_reduce_or, true);

if name == sym::simd_cast {
if name == sym::simd_cast || name == sym::simd_as {
require_simd!(ret_ty, "return");
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
require!(
Expand All @@ -1714,14 +1714,26 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
let (in_style, in_width) = match in_elem.kind() {
// vectors of pointer-sized integers should've been
// disallowed before here, so this unwrap is safe.
ty::Int(i) => (Style::Int(true), i.bit_width().unwrap()),
ty::Uint(u) => (Style::Int(false), u.bit_width().unwrap()),
ty::Int(i) => (
Style::Int(true),
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Uint(u) => (
Style::Int(false),
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Float(f) => (Style::Float, f.bit_width()),
_ => (Style::Unsupported, 0),
};
let (out_style, out_width) = match out_elem.kind() {
ty::Int(i) => (Style::Int(true), i.bit_width().unwrap()),
ty::Uint(u) => (Style::Int(false), u.bit_width().unwrap()),
ty::Int(i) => (
Style::Int(true),
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Uint(u) => (
Style::Int(false),
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
),
ty::Float(f) => (Style::Float, f.bit_width()),
_ => (Style::Unsupported, 0),
};
Expand All @@ -1748,10 +1760,10 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
});
}
(Style::Float, Style::Int(out_is_signed)) => {
return Ok(if out_is_signed {
bx.fptosi(args[0].immediate(), llret_ty)
} else {
bx.fptoui(args[0].immediate(), llret_ty)
return Ok(match (out_is_signed, name == sym::simd_as) {
(false, false) => bx.fptoui(args[0].immediate(), llret_ty),
(true, false) => bx.fptosi(args[0].immediate(), llret_ty),
(_, true) => bx.cast_float_to_int(out_is_signed, args[0].immediate(), llret_ty),
});
}
(Style::Float, Style::Float) => {
Expand Down
150 changes: 3 additions & 147 deletions compiler/rustc_codegen_ssa/src/mir/rvalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ use super::place::PlaceRef;
use super::{FunctionCx, LocalRef};

use crate::base;
use crate::common::{self, IntPredicate, RealPredicate};
use crate::common::{self, IntPredicate};
use crate::traits::*;
use crate::MemFlags;

use rustc_apfloat::{ieee, Float, Round, Status};
use rustc_middle::mir;
use rustc_middle::ty::cast::{CastTy, IntTy};
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
Expand Down Expand Up @@ -368,10 +367,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
bx.inttoptr(usize_llval, ll_t_out)
}
(CastTy::Float, CastTy::Int(IntTy::I)) => {
cast_float_to_int(&mut bx, true, llval, ll_t_in, ll_t_out)
bx.cast_float_to_int(true, llval, ll_t_out)
}
(CastTy::Float, CastTy::Int(_)) => {
cast_float_to_int(&mut bx, false, llval, ll_t_in, ll_t_out)
bx.cast_float_to_int(false, llval, ll_t_out)
}
_ => bug!("unsupported cast: {:?} to {:?}", operand.layout.ty, cast.ty),
};
Expand Down Expand Up @@ -768,146 +767,3 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
// (*) this is only true if the type is suitable
}
}

fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
bx: &mut Bx,
signed: bool,
x: Bx::Value,
float_ty: Bx::Type,
int_ty: Bx::Type,
) -> Bx::Value {
if let Some(false) = bx.cx().sess().opts.debugging_opts.saturating_float_casts {
return if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
}

let try_sat_result = if signed { bx.fptosi_sat(x, int_ty) } else { bx.fptoui_sat(x, int_ty) };
if let Some(try_sat_result) = try_sat_result {
return try_sat_result;
}

let int_width = bx.cx().int_width(int_ty);
let float_width = bx.cx().float_width(float_ty);
// LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the
// destination integer type after rounding towards zero. This `undef` value can cause UB in
// safe code (see issue #10184), so we implement a saturating conversion on top of it:
// Semantically, the mathematical value of the input is rounded towards zero to the next
// mathematical integer, and then the result is clamped into the range of the destination
// integer type. Positive and negative infinity are mapped to the maximum and minimum value of
// the destination integer type. NaN is mapped to 0.
//
// Define f_min and f_max as the largest and smallest (finite) floats that are exactly equal to
// a value representable in int_ty.
// They are exactly equal to int_ty::{MIN,MAX} if float_ty has enough significand bits.
// Otherwise, int_ty::MAX must be rounded towards zero, as it is one less than a power of two.
// int_ty::MIN, however, is either zero or a negative power of two and is thus exactly
// representable. Note that this only works if float_ty's exponent range is sufficiently large.
// f16 or 256 bit integers would break this property. Right now the smallest float type is f32
// with exponents ranging up to 127, which is barely enough for i128::MIN = -2^127.
// On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
// we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
// This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
let int_max = |signed: bool, int_width: u64| -> u128 {
let shift_amount = 128 - int_width;
if signed { i128::MAX as u128 >> shift_amount } else { u128::MAX >> shift_amount }
};
let int_min = |signed: bool, int_width: u64| -> i128 {
if signed { i128::MIN >> (128 - int_width) } else { 0 }
};

let compute_clamp_bounds_single = |signed: bool, int_width: u64| -> (u128, u128) {
let rounded_min = ieee::Single::from_i128_r(int_min(signed, int_width), Round::TowardZero);
assert_eq!(rounded_min.status, Status::OK);
let rounded_max = ieee::Single::from_u128_r(int_max(signed, int_width), Round::TowardZero);
assert!(rounded_max.value.is_finite());
(rounded_min.value.to_bits(), rounded_max.value.to_bits())
};
let compute_clamp_bounds_double = |signed: bool, int_width: u64| -> (u128, u128) {
let rounded_min = ieee::Double::from_i128_r(int_min(signed, int_width), Round::TowardZero);
assert_eq!(rounded_min.status, Status::OK);
let rounded_max = ieee::Double::from_u128_r(int_max(signed, int_width), Round::TowardZero);
assert!(rounded_max.value.is_finite());
(rounded_min.value.to_bits(), rounded_max.value.to_bits())
};

let mut float_bits_to_llval = |bits| {
let bits_llval = match float_width {
32 => bx.cx().const_u32(bits as u32),
64 => bx.cx().const_u64(bits as u64),
n => bug!("unsupported float width {}", n),
};
bx.bitcast(bits_llval, float_ty)
};
let (f_min, f_max) = match float_width {
32 => compute_clamp_bounds_single(signed, int_width),
64 => compute_clamp_bounds_double(signed, int_width),
n => bug!("unsupported float width {}", n),
};
let f_min = float_bits_to_llval(f_min);
let f_max = float_bits_to_llval(f_max);
// To implement saturation, we perform the following steps:
//
// 1. Cast x to an integer with fpto[su]i. This may result in undef.
// 2. Compare x to f_min and f_max, and use the comparison results to select:
// a) int_ty::MIN if x < f_min or x is NaN
// b) int_ty::MAX if x > f_max
// c) the result of fpto[su]i otherwise
// 3. If x is NaN, return 0.0, otherwise return the result of step 2.
//
// This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
// destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
// undef does not introduce any non-determinism either.
// More importantly, the above procedure correctly implements saturating conversion.
// Proof (sketch):
// If x is NaN, 0 is returned by definition.
// Otherwise, x is finite or infinite and thus can be compared with f_min and f_max.
// This yields three cases to consider:
// (1) if x in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
// saturating conversion for inputs in that range.
// (2) if x > f_max, then x is larger than int_ty::MAX. This holds even if f_max is rounded
// (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
// than int_ty::MAX. Because x is larger than int_ty::MAX, the return value of int_ty::MAX
// is correct.
// (3) if x < f_min, then x is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
// int_ty::MIN and therefore the return value of int_ty::MIN is correct.
// QED.

let int_max = bx.cx().const_uint_big(int_ty, int_max(signed, int_width));
let int_min = bx.cx().const_uint_big(int_ty, int_min(signed, int_width) as u128);
let zero = bx.cx().const_uint(int_ty, 0);

// Step 1 ...
let fptosui_result = if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
let less_or_nan = bx.fcmp(RealPredicate::RealULT, x, f_min);
let greater = bx.fcmp(RealPredicate::RealOGT, x, f_max);

// Step 2: We use two comparisons and two selects, with %s1 being the
// result:
// %less_or_nan = fcmp ult %x, %f_min
// %greater = fcmp olt %x, %f_max
// %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
// %s1 = select %greater, int_ty::MAX, %s0
// Note that %less_or_nan uses an *unordered* comparison. This
// comparison is true if the operands are not comparable (i.e., if x is
// NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
// x is NaN.
//
// Performance note: Unordered comparison can be lowered to a "flipped"
// comparison and a negation, and the negation can be merged into the
// select. Therefore, it not necessarily any more expensive than an
// ordered ("normal") comparison. Whether these optimizations will be
// performed is ultimately up to the backend, but at least x86 does
// perform them.
let s0 = bx.select(less_or_nan, int_min, fptosui_result);
let s1 = bx.select(greater, int_max, s0);

// Step 3: NaN replacement.
// For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
// Therefore we only need to execute this step for signed integer types.
if signed {
// LLVM has no isNaN predicate, so we use (x == x) instead
let cmp = bx.fcmp(RealPredicate::RealOEQ, x, x);
bx.select(cmp, s1, zero)
} else {
s1
}
}
Loading

0 comments on commit 7889f96

Please sign in to comment.