From 194baa820d36926cfa9128211bbd61866d49d501 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 6 Aug 2024 12:25:37 +0200 Subject: [PATCH 1/2] simd_shuffle intrinsic: allow argument to be passed as vector (not just as array) --- .../src/intrinsics/simd.rs | 10 +++++++ .../rustc_codegen_gcc/src/intrinsic/simd.rs | 19 ++++++++----- compiler/rustc_codegen_llvm/src/intrinsic.rs | 19 ++++++++----- library/core/src/intrinsics/simd.rs | 2 +- tests/ui/simd/shuffle.rs | 27 +++++++++++++++++-- 5 files changed, 60 insertions(+), 17 deletions(-) diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index ca910dccb0d06..604a88393fd95 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -191,6 +191,14 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }) .try_into() .unwrap(), + _ if idx_ty.is_simd() + && matches!( + idx_ty.simd_size_and_type(fx.tcx).1.kind(), + ty::Uint(ty::UintTy::U32) + ) => + { + idx_ty.simd_size_and_type(fx.tcx).0.try_into().unwrap() + } _ => { fx.tcx.dcx().span_err( span, @@ -213,6 +221,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( let total_len = lane_count * 2; + // FIXME: this is a terrible abstraction-breaking hack. + // Find a way to reuse `immediate_const_vector` from `codegen_ssa` instead. let indexes = { use rustc_middle::mir::interpret::*; let idx_const = match &idx.node { diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs index 8da1df3be1534..96a833ccaf2b6 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs @@ -353,19 +353,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( } if name == sym::simd_shuffle { - // Make sure this is actually an array, since typeck only checks the length-suffixed + // Make sure this is actually an array or SIMD vector, since typeck only checks the length-suffixed // version of this intrinsic. - let n: u64 = match *args[2].layout.ty.kind() { + let idx_ty = args[2].layout.ty; + let n: u64 = match idx_ty.kind() { ty::Array(ty, len) if matches!(*ty.kind(), ty::Uint(ty::UintTy::U32)) => { len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else( || span_bug!(span, "could not evaluate shuffle index array length"), ) } - _ => return_error!(InvalidMonomorphization::SimdShuffle { - span, - name, - ty: args[2].layout.ty - }), + _ if idx_ty.is_simd() + && matches!( + idx_ty.simd_size_and_type(bx.cx.tcx).1.kind(), + ty::Uint(ty::UintTy::U32) + ) => + { + idx_ty.simd_size_and_type(bx.cx.tcx).0 + } + _ => return_error!(InvalidMonomorphization::SimdShuffle { span, name, ty: idx_ty }), }; require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty }); diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index f5558723d11bf..5d32ef0d9d65f 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -1279,19 +1279,24 @@ fn generic_simd_intrinsic<'ll, 'tcx>( } if name == sym::simd_shuffle { - // Make sure this is actually an array, since typeck only checks the length-suffixed + // Make sure this is actually an array or SIMD vector, since typeck only checks the length-suffixed // version of this intrinsic. - let n: u64 = match args[2].layout.ty.kind() { + let idx_ty = args[2].layout.ty; + let n: u64 = match idx_ty.kind() { ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => { len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else( || span_bug!(span, "could not evaluate shuffle index array length"), ) } - _ => return_error!(InvalidMonomorphization::SimdShuffle { - span, - name, - ty: args[2].layout.ty - }), + _ if idx_ty.is_simd() + && matches!( + idx_ty.simd_size_and_type(bx.cx.tcx).1.kind(), + ty::Uint(ty::UintTy::U32) + ) => + { + idx_ty.simd_size_and_type(bx.cx.tcx).0 + } + _ => return_error!(InvalidMonomorphization::SimdShuffle { span, name, ty: idx_ty }), }; let (out_len, out_ty) = require_simd!(ret_ty, SimdReturn); diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs index 221724d7b4ae9..5982819809937 100644 --- a/library/core/src/intrinsics/simd.rs +++ b/library/core/src/intrinsics/simd.rs @@ -232,7 +232,7 @@ extern "rust-intrinsic" { /// /// `T` must be a vector. /// - /// `U` must be a **const** array of `i32`s. This means it must either refer to a named + /// `U` must be a **const** array or vector of `u32`s. This means it must either refer to a named /// const or be given as an inline const expression (`const { ... }`). /// /// `V` must be a vector with the same element type as `T` and the same length as `U`. diff --git a/tests/ui/simd/shuffle.rs b/tests/ui/simd/shuffle.rs index 09926d95557cd..dc0d688284e3c 100644 --- a/tests/ui/simd/shuffle.rs +++ b/tests/ui/simd/shuffle.rs @@ -6,15 +6,20 @@ #![allow(incomplete_features)] #![feature(adt_const_params)] +use std::marker::ConstParamTy; + extern "rust-intrinsic" { fn simd_shuffle(a: T, b: T, i: I) -> U; } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, ConstParamTy, PartialEq, Eq)] #[repr(simd)] struct Simd([T; N]); -pub unsafe fn __shuffle_vector16(x: T, y: T) -> U { +unsafe fn __shuffle_vector16(x: T, y: T) -> U { + simd_shuffle(x, y, IDX) +} +unsafe fn __shuffle_vector16_v2, T, U>(x: T, y: T) -> U { simd_shuffle(x, y, IDX) } @@ -30,6 +35,17 @@ fn main() { let y: Simd = simd_shuffle(a, b, I2); assert_eq!(y.0, [1, 5]); } + // Test that we can also use a SIMD vector instead of a normal array for the shuffle. + const I1_SIMD: Simd = Simd([0, 2, 4, 6]); + const I2_SIMD: Simd = Simd([1, 5]); + unsafe { + let x: Simd = simd_shuffle(a, b, I1_SIMD); + assert_eq!(x.0, [0, 2, 4, 6]); + + let y: Simd = simd_shuffle(a, b, I2_SIMD); + assert_eq!(y.0, [1, 5]); + } + // Test that an indirection (via an unnamed constant) // through a const generic parameter also works. // See https://github.com/rust-lang/rust/issues/113500 for details. @@ -42,4 +58,11 @@ fn main() { Simd, >(a, b); } + unsafe { + __shuffle_vector16_v2::< + { Simd([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]) }, + Simd, + Simd, + >(a, b); + } } From daedbd4d7abb9132638cb420acc549d198c46c48 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 6 Aug 2024 17:08:50 +0200 Subject: [PATCH 2/2] make the GCC backend compatible with vector shuffle indices --- compiler/rustc_codegen_gcc/src/builder.rs | 44 +++++++++++++++-------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index 47b378cc1cd82..6ba678e2e7c65 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -1923,15 +1923,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { v2: RValue<'gcc>, mask: RValue<'gcc>, ) -> RValue<'gcc> { - let struct_type = mask.get_type().is_struct().expect("mask should be of struct type"); - // TODO(antoyo): use a recursive unqualified() here. let vector_type = v1.get_type().unqualified().dyncast_vector().expect("vector type"); let element_type = vector_type.get_element_type(); let vec_num_units = vector_type.get_num_units(); - let mask_num_units = struct_type.get_field_count(); - let mut vector_elements = vec![]; let mask_element_type = if element_type.is_integral() { element_type } else { @@ -1942,19 +1938,39 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { #[cfg(not(feature = "master"))] self.int_type }; - for i in 0..mask_num_units { - let field = struct_type.get_field(i as i32); - vector_elements.push(self.context.new_cast( - self.location, - mask.access_field(self.location, field).to_rvalue(), - mask_element_type, - )); - } + + let mut mask_elements = if let Some(vector_type) = mask.get_type().dyncast_vector() { + let mask_num_units = vector_type.get_num_units(); + let mut mask_elements = vec![]; + for i in 0..mask_num_units { + let index = self.context.new_rvalue_from_long(self.cx.type_u32(), i as _); + mask_elements.push(self.context.new_cast( + self.location, + self.extract_element(mask, index).to_rvalue(), + mask_element_type, + )); + } + mask_elements + } else { + let struct_type = mask.get_type().is_struct().expect("mask should be of struct type"); + let mask_num_units = struct_type.get_field_count(); + let mut mask_elements = vec![]; + for i in 0..mask_num_units { + let field = struct_type.get_field(i as i32); + mask_elements.push(self.context.new_cast( + self.location, + mask.access_field(self.location, field).to_rvalue(), + mask_element_type, + )); + } + mask_elements + }; + let mask_num_units = mask_elements.len(); // NOTE: the mask needs to be the same length as the input vectors, so add the missing // elements in the mask if needed. for _ in mask_num_units..vec_num_units { - vector_elements.push(self.context.new_rvalue_zero(mask_element_type)); + mask_elements.push(self.context.new_rvalue_zero(mask_element_type)); } let result_type = self.context.new_vector_type(element_type, mask_num_units as u64); @@ -1998,7 +2014,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let new_mask_num_units = std::cmp::max(mask_num_units, vec_num_units); let mask_type = self.context.new_vector_type(mask_element_type, new_mask_num_units as u64); - let mask = self.context.new_rvalue_from_vector(self.location, mask_type, &vector_elements); + let mask = self.context.new_rvalue_from_vector(self.location, mask_type, &mask_elements); let result = self.context.new_rvalue_vector_perm(self.location, v1, v2, mask); if vec_num_units != mask_num_units {