From 2e7d2562805b79647d192a1511f2d616607c565a Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Tue, 3 Sep 2024 16:25:28 -0400 Subject: [PATCH] Add more SIMD intrinsics --- src/intrinsic/llvm.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 3b9855ddc1894..2d7bb8a2d5481 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -185,7 +185,10 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( | "__builtin_ia32_vplzcntq_128_mask" | "__builtin_ia32_cvtqq2pd128_mask" | "__builtin_ia32_cvtqq2pd256_mask" - | "__builtin_ia32_cvtqq2ps256_mask" => { + | "__builtin_ia32_cvtqq2ps256_mask" + | "__builtin_ia32_cvtuqq2pd128_mask" + | "__builtin_ia32_cvtuqq2pd256_mask" + | "__builtin_ia32_cvtuqq2ps256_mask" => { let mut new_args = args.to_vec(); // Remove last arg as it doesn't seem to be used in GCC and is always false. new_args.pop(); @@ -381,7 +384,8 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( ); args = vec![arg.get_address(None)].into(); } - "__builtin_ia32_cvtqq2pd512_mask" | "__builtin_ia32_cvtqq2ps512_mask" => { + "__builtin_ia32_cvtqq2pd512_mask" | "__builtin_ia32_cvtqq2ps512_mask" | "__builtin_ia32_cvtuqq2pd512_mask" | + "__builtin_ia32_cvtuqq2ps512_mask" => { let mut old_args = args.to_vec(); let mut new_args = vec![]; new_args.push(old_args.swap_remove(0)); @@ -493,6 +497,11 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( let new_args = args.to_vec(); args = vec![new_args[1], new_args[0], new_args[2]].into(); } + "__builtin_ia32_rangesd128_mask_round" | "__builtin_ia32_rangess128_mask_round" + | "__builtin_ia32_reducesd_mask_round" => { + let new_args = args.to_vec(); + args = vec![new_args[0], new_args[1], new_args[4], new_args[2], new_args[3], new_args[5]].into(); + } _ => (), } } @@ -1031,6 +1040,14 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.x86.avx512.sitofp.round.v4f64.v4i64" => "__builtin_ia32_cvtqq2pd256_mask", "llvm.x86.avx512.sitofp.round.v8f32.v8i64" => "__builtin_ia32_cvtqq2ps512_mask", "llvm.x86.avx512.sitofp.round.v4f32.v4i64" => "__builtin_ia32_cvtqq2ps256_mask", + "llvm.x86.avx512.uitofp.round.v8f64.v8u64" => "__builtin_ia32_cvtuqq2pd512_mask", + "llvm.x86.avx512.uitofp.round.v2f64.v2u64" => "__builtin_ia32_cvtuqq2pd128_mask", + "llvm.x86.avx512.uitofp.round.v4f64.v4u64" => "__builtin_ia32_cvtuqq2pd256_mask", + "llvm.x86.avx512.uitofp.round.v8f32.v8u64" => "__builtin_ia32_cvtuqq2ps512_mask", + "llvm.x86.avx512.uitofp.round.v4f32.v4u64" => "__builtin_ia32_cvtuqq2ps256_mask", + "llvm.x86.avx512.mask.reduce.pd.512" => "__builtin_ia32_reducepd512_mask_round", + "llvm.x86.avx512.mask.reduce.ps.512" => "__builtin_ia32_reduceps512_mask_round", + "llvm.x86.avx512.mask.reduce.sd" => "__builtin_ia32_reducesd_mask_round", // NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py _ => include!("archs.rs"),