From 40cc89137b22099699d40230973b124e55ca21c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Tue, 10 Oct 2023 18:35:28 +0200 Subject: [PATCH 1/9] Remove from cranelift codegen LLVM intrinsics that are no longer needed --- src/intrinsics/llvm_x86.rs | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 0c9a94e1c231f..559c64bb13bdd 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -32,41 +32,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ret.write_cvalue(fx, CValue::by_val(res, fx.layout_of(fx.tcx.types.i64))); } - // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8` - "llvm.x86.sse2.pmovmskb.128" - | "llvm.x86.avx2.pmovmskb" - | "llvm.x86.sse.movmsk.ps" - | "llvm.x86.sse2.movmsk.pd" => { - intrinsic_args!(fx, args => (a); intrinsic); - - let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); - let lane_ty = fx.clif_type(lane_ty).unwrap(); - assert!(lane_count <= 32); - - let mut res = fx.bcx.ins().iconst(types::I32, 0); - - for lane in (0..lane_count).rev() { - let a_lane = a.value_lane(fx, lane).load_scalar(fx); - - // cast float to int - let a_lane = match lane_ty { - types::F32 => codegen_bitcast(fx, types::I32, a_lane), - types::F64 => codegen_bitcast(fx, types::I64, a_lane), - _ => a_lane, - }; - - // extract sign bit of an int - let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, i64::from(lane_ty.bits() - 1)); - - // shift sign bit into result - let a_lane_sign = clif_intcast(fx, a_lane_sign, types::I32, false); - res = fx.bcx.ins().ishl_imm(res, 1); - res = fx.bcx.ins().bor(res, a_lane_sign); - } - - let res = CValue::by_val(res, fx.layout_of(fx.tcx.types.i32)); - ret.write_cvalue(fx, res); - } "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => { let (x, y, kind) = match args { [x, y, kind] => (x, y, kind), From a302610016bd8299aff79c38f5a6b95ec1c112ba Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:22:23 +0000 Subject: [PATCH 2/9] Merge commit '93a5433f17ab5ed48cc88f1e69b0713b16183373' into sync_cg_clif-2023-10-24 --- .vscode/settings.json | 2 +- Cargo.lock | 52 +++---- Cargo.toml | 12 +- Readme.md | 6 +- example/mini_core_hello_world.rs | 11 ++ ...h-gets-miscompiled-with-llvm-sysroot.patch | 25 ---- src/abi/mod.rs | 35 ++--- src/abi/pass_mode.rs | 11 +- src/cast.rs | 6 +- src/common.rs | 19 +++ src/driver/aot.rs | 23 ++- src/inline_asm.rs | 22 +-- src/intrinsics/llvm_x86.rs | 137 ++++++++++++++++++ src/value_and_place.rs | 37 ++--- 14 files changed, 260 insertions(+), 138 deletions(-) delete mode 100644 patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch diff --git a/.vscode/settings.json b/.vscode/settings.json index 60cb51d566362..834a1362caf32 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -33,7 +33,7 @@ ] }, { - "sysroot_src": "./download/sysroot/sysroot_src/library", + "sysroot_src": "./build/stdlib/library", "crates": [ { "root_module": "./example/std_example.rs", diff --git a/Cargo.lock b/Cargo.lock index 8079913cb0ff4..c716d50117312 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,18 +45,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cranelift-bforest" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5e1df0da8488dd03b34afc134ba84b754d61862cc465932a9e5d07952f661e" +checksum = "c1512c3bb6b13018e7109fc3ac964bc87b329eaf3a77825d337558d0c7f6f1be" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-codegen" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a17ca4e699a0aaf49a0c88f6311a864f321048aa63f6b787cab20eb5f93f10" +checksum = "16cb8fb9220a6ea7a226705a273ab905309ee546267bdf34948d57932d7f0396" dependencies = [ "bumpalo", "cranelift-bforest", @@ -75,39 +75,39 @@ dependencies = [ [[package]] name = "cranelift-codegen-meta" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022f2793cdade1d37a1f755ac42938a3f832f533eac6cafc8b26b209544c3c06" +checksum = "ab3a8d3b0d4745b183da5ea0792b13d79f5c23d6e69ac04761728e2532b56649" dependencies = [ "cranelift-codegen-shared", ] [[package]] name = "cranelift-codegen-shared" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4d72dbb83c2ad788dec4ad0843070973cb48c35a3ca19b1e7437ac40834fd9c" +checksum = "524141c8e68f2abc2043de4c2b31f6d9dd42432738c246431d0572a1422a4a84" [[package]] name = "cranelift-control" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae07cf26dcc90d546826d747ac63b6c40c916f34b03e92a6ae0422c28d771b8a" +checksum = "97513b57c961c713789a03886a57b43e14ebcd204cbaa8ae50ca6c70a8e716b3" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2fe6b7e49820893691aea497f36257e9d6f52061d8c4758d61d802d5f101a3d" +checksum = "e3f23d3cf3afa7e45f239702612c76d87964f652a55e28d13ed6d7e20f3479dd" [[package]] name = "cranelift-frontend" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f497576ca3674581581601b6a55ccc1b43447217648c880e5bce70db3cf659" +checksum = "554cd4947ec9209b58bf9ae5bf83581b5ddf9128bd967208e334b504a57db54e" dependencies = [ "cranelift-codegen", "log", @@ -117,15 +117,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b96aa02eac00fffee13b0cd37d17874ccdb3d5458983041accd825ef78ce6454" +checksum = "6c1892a439696b6413cb54083806f5fd9fc431768b8de74864b3d9e8b93b124f" [[package]] name = "cranelift-jit" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1d6e0e308c873eefc185745a6b21daec2a10f7554c9fb67e334c2d7d756d979" +checksum = "32209252fb38acaf1662ccd0397907bbe0e92bdb13b6ddbfd2f74e437f83e685" dependencies = [ "anyhow", "cranelift-codegen", @@ -143,9 +143,9 @@ dependencies = [ [[package]] name = "cranelift-module" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1aa8ebb06eced4e478c3f94f1d65d4e7c93493f4640057912b27a3e34b84841" +checksum = "bf42656f5f6df7bfafc4dd7b63a1888b0627c07b43b2cb9aa54e13843fed39eb" dependencies = [ "anyhow", "cranelift-codegen", @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "cranelift-native" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2870170ca44054b202c737626607b87be6e35655084bd94a6ff807a5812ba7df" +checksum = "e0c2d3badd4b9690865f5bb68a71fa94de592fa2df3f3d11a5a062c60c0a107a" dependencies = [ "cranelift-codegen", "libc", @@ -165,9 +165,9 @@ dependencies = [ [[package]] name = "cranelift-object" -version = "0.101.0" +version = "0.101.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20647761742d17dabac8205da958910ede78599550e06418a16711a3ee2fc897" +checksum = "88eca54bbecea3170035168357306e9c779d4a63d8bf036c9e16bd21fdaa69b5" dependencies = [ "anyhow", "cranelift-codegen", @@ -374,9 +374,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasmtime-jit-icache-coherence" -version = "14.0.0" +version = "14.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3a5dda53ad6993f9b0a2d65fb49e0348a7232a27a8794064122870d6ee19eb2" +checksum = "9aaf2fa8fd2d6b65abae9b92edfe69254cc5d6b166e342364036c3e347de8da9" dependencies = [ "cfg-if", "libc", diff --git a/Cargo.toml b/Cargo.toml index 5ad5e0e8140c3..f2ce714e8ff14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,12 +8,12 @@ crate-type = ["dylib"] [dependencies] # These have to be in sync with each other -cranelift-codegen = { version = "0.101", features = ["unwind", "all-arch"] } -cranelift-frontend = { version = "0.101" } -cranelift-module = { version = "0.101" } -cranelift-native = { version = "0.101" } -cranelift-jit = { version = "0.101", optional = true } -cranelift-object = { version = "0.101" } +cranelift-codegen = { version = "0.101.1", features = ["unwind", "all-arch"] } +cranelift-frontend = { version = "0.101.1" } +cranelift-module = { version = "0.101.1" } +cranelift-native = { version = "0.101.1" } +cranelift-jit = { version = "0.101.1", optional = true } +cranelift-object = { version = "0.101.1" } target-lexicon = "0.12.0" gimli = { version = "0.28", default-features = false, features = ["write"]} object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] } diff --git a/Readme.md b/Readme.md index 6f2027be96de9..5664cbe7d4fac 100644 --- a/Readme.md +++ b/Readme.md @@ -8,7 +8,7 @@ If not please open an issue. ## Building and testing ```bash -$ git clone https://github.com/bjorn3/rustc_codegen_cranelift +$ git clone https://github.com/rust-lang/rustc_codegen_cranelift $ cd rustc_codegen_cranelift $ ./y.sh prepare $ ./y.sh build @@ -29,7 +29,7 @@ Extract the `dist` directory in the archive anywhere you want. If you want to use `cargo clif build` instead of having to specify the full path to the `cargo-clif` executable, you can add the `bin` subdirectory of the extracted `dist` directory to your `PATH`. (tutorial [for Windows](https://stackoverflow.com/a/44272417), and [for Linux/MacOS](https://unix.stackexchange.com/questions/26047/how-to-correctly-add-a-path-to-path/26059#26059)). -[releases]: https://github.com/bjorn3/rustc_codegen_cranelift/releases/tag/dev +[releases]: https://github.com/rust-lang/rustc_codegen_cranelift/releases/tag/dev ## Usage @@ -78,7 +78,7 @@ configuration options. * Inline assembly ([no cranelift support](https://github.com/bytecodealliance/wasmtime/issues/1041)) * On UNIX there is support for invoking an external assembler for `global_asm!` and `asm!`. -* SIMD ([tracked here](https://github.com/bjorn3/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported) +* SIMD ([tracked here](https://github.com/rust-lang/rustc_codegen_cranelift/issues/171), `std::simd` fully works, `std::arch` is partially supported) * Unwinding on panics ([no cranelift support](https://github.com/bytecodealliance/wasmtime/issues/1677), `-Cpanic=abort` is enabled by default) ## License diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs index 91de04d97702f..afc51a47f1407 100644 --- a/example/mini_core_hello_world.rs +++ b/example/mini_core_hello_world.rs @@ -353,6 +353,17 @@ fn main() { let f = V([0.0, 1.0]); let _a = f.0[0]; + + stack_val_align(); +} + +#[inline(never)] +fn stack_val_align() { + #[repr(align(8192))] + struct Foo(u8); + + let a = Foo(0); + assert_eq!(&a as *const Foo as usize % 8192, 0); } #[cfg(all( diff --git a/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch b/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch deleted file mode 100644 index e6ebdcec783af..0000000000000 --- a/patches/0001-regex-Ignore-test-which-gets-miscompiled-with-llvm-sysroot.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 5d4afb8d807d181038b6a004d17ed055a8d191b2 Mon Sep 17 00:00:00 2001 -From: bjorn3 <17426603+bjorn3@users.noreply.github.com> -Date: Mon, 2 Oct 2023 13:59:00 +0000 -Subject: [PATCH] Ignore test which gets miscompiled with llvm sysroot - ---- - regex-automata/src/util/pool.rs | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/regex-automata/src/util/pool.rs b/regex-automata/src/util/pool.rs -index c03d7b0..28b233b 100644 ---- a/regex-automata/src/util/pool.rs -+++ b/regex-automata/src/util/pool.rs -@@ -1081,6 +1081,8 @@ mod tests { - // into the pool. This in turn resulted in this test producing a data race. - #[cfg(feature = "std")] - #[test] -+ // FIXME(rustc_codegen_cranelift#1395) miscompilation of thread::scope with LLVM sysroot -+ #[ignore] - fn thread_owner_sync() { - let pool = Pool::new(|| vec!['a']); - { --- -2.34.1 - diff --git a/src/abi/mod.rs b/src/abi/mod.rs index c75ad852f82d0..c4572e035258d 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -120,32 +120,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> { args: &[Value], ) -> Cow<'_, [Value]> { if self.tcx.sess.target.is_like_windows { - let (mut params, mut args): (Vec<_>, Vec<_>) = - params - .into_iter() - .zip(args) - .map(|(param, &arg)| { - if param.value_type == types::I128 { - let arg_ptr = Pointer::stack_slot(self.bcx.create_sized_stack_slot( - StackSlotData { kind: StackSlotKind::ExplicitSlot, size: 16 }, - )); - arg_ptr.store(self, arg, MemFlags::trusted()); - (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self)) - } else { - (param, arg) - } - }) - .unzip(); + let (mut params, mut args): (Vec<_>, Vec<_>) = params + .into_iter() + .zip(args) + .map(|(param, &arg)| { + if param.value_type == types::I128 { + let arg_ptr = self.create_stack_slot(16, 16); + arg_ptr.store(self, arg, MemFlags::trusted()); + (AbiParam::new(self.pointer_type), arg_ptr.get_addr(self)) + } else { + (param, arg) + } + }) + .unzip(); let indirect_ret_val = returns.len() == 1 && returns[0].value_type == types::I128; if indirect_ret_val { params.insert(0, AbiParam::new(self.pointer_type)); - let ret_ptr = - Pointer::stack_slot(self.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: 16, - })); + let ret_ptr = self.create_stack_slot(16, 16); args.insert(0, ret_ptr.get_addr(self)); self.lib_call_unadjusted(name, params, vec![], &args); return Cow::Owned(vec![ret_ptr.load(self, types::I128, MemFlags::trusted())]); diff --git a/src/abi/pass_mode.rs b/src/abi/pass_mode.rs index 7c9f8c1051cbb..0652267002910 100644 --- a/src/abi/pass_mode.rs +++ b/src/abi/pass_mode.rs @@ -189,16 +189,13 @@ pub(super) fn from_casted_value<'tcx>( let abi_params = cast_target_to_abi_params(cast); let abi_param_size: u32 = abi_params.iter().map(|param| param.value_type.bytes()).sum(); let layout_size = u32::try_from(layout.size.bytes()).unwrap(); - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. + let ptr = fx.create_stack_slot( // Stack slot size may be bigger for example `[u8; 3]` which is packed into an `i32`. // It may also be smaller for example when the type is a wrapper around an integer with a // larger alignment than the integer. - size: (std::cmp::max(abi_param_size, layout_size) + 15) / 16 * 16, - }); - let ptr = Pointer::stack_slot(stack_slot); + std::cmp::max(abi_param_size, layout_size), + u32::try_from(layout.align.pref.bytes()).unwrap(), + ); let mut offset = 0; let mut block_params_iter = block_params.iter().copied(); for param in abi_params { diff --git a/src/cast.rs b/src/cast.rs index 7e027c5f1b309..0b5cb1547fc69 100644 --- a/src/cast.rs +++ b/src/cast.rs @@ -104,11 +104,7 @@ pub(crate) fn clif_int_or_float_cast( &[from], )[0]; // FIXME(bytecodealliance/wasmtime#6104) use bitcast instead of store to get from i64x2 to i128 - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: 16, - }); - let ret_ptr = Pointer::stack_slot(stack_slot); + let ret_ptr = fx.create_stack_slot(16, 16); ret_ptr.store(fx, ret, MemFlags::trusted()); ret_ptr.load(fx, types::I128, MemFlags::trusted()) } else { diff --git a/src/common.rs b/src/common.rs index 7a3ae6ebf52fa..9771f44f62cfb 100644 --- a/src/common.rs +++ b/src/common.rs @@ -383,6 +383,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> { }) } + pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer { + if align <= 16 { + let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData { + kind: StackSlotKind::ExplicitSlot, + // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to + // specify stack slot alignment. + size: (size + 15) / 16 * 16, + }); + Pointer::stack_slot(stack_slot) + } else { + // Alignment is too big to handle using the above hack. Dynamically realign a stack slot + // instead. This wastes some space for the realignment. + let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self); + let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align)); + let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align)); + Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset)) + } + } + pub(crate) fn set_debug_loc(&mut self, source_info: mir::SourceInfo) { if let Some(debug_context) = &mut self.cx.debug_context { let (file, line, column) = diff --git a/src/driver/aot.rs b/src/driver/aot.rs index d3a7decc5430f..11229dd421ecc 100644 --- a/src/driver/aot.rs +++ b/src/driver/aot.rs @@ -361,12 +361,26 @@ pub(crate) fn run_aot( metadata: EncodedMetadata, need_metadata_module: bool, ) -> Box { + // FIXME handle `-Ctarget-cpu=native` + let target_cpu = match tcx.sess.opts.cg.target_cpu { + Some(ref name) => name, + None => tcx.sess.target.cpu.as_ref(), + } + .to_owned(); + let cgus = if tcx.sess.opts.output_types.should_codegen() { tcx.collect_and_partition_mono_items(()).1 } else { // If only `--emit metadata` is used, we shouldn't perform any codegen. // Also `tcx.collect_and_partition_mono_items` may panic in that case. - &[] + return Box::new(OngoingCodegen { + modules: vec![], + allocator_module: None, + metadata_module: None, + metadata, + crate_info: CrateInfo::new(tcx, target_cpu), + concurrency_limiter: ConcurrencyLimiter::new(tcx.sess, 0), + }); }; if tcx.dep_graph.is_fully_enabled() { @@ -481,13 +495,6 @@ pub(crate) fn run_aot( None }; - // FIXME handle `-Ctarget-cpu=native` - let target_cpu = match tcx.sess.opts.cg.target_cpu { - Some(ref name) => name, - None => tcx.sess.target.cpu.as_ref(), - } - .to_owned(); - Box::new(OngoingCodegen { modules, allocator_module, diff --git a/src/inline_asm.rs b/src/inline_asm.rs index ed07723425481..0517c609337b9 100644 --- a/src/inline_asm.rs +++ b/src/inline_asm.rs @@ -878,13 +878,7 @@ fn call_inline_asm<'tcx>( inputs: Vec<(Size, Value)>, outputs: Vec<(Size, CPlace<'tcx>)>, ) { - let stack_slot = fx.bcx.func.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: u32::try_from(slot_size.bytes()).unwrap(), - }); - if fx.clif_comments.enabled() { - fx.add_comment(stack_slot, "inline asm scratch slot"); - } + let stack_slot = fx.create_stack_slot(u32::try_from(slot_size.bytes()).unwrap(), 16); let inline_asm_func = fx .module @@ -904,15 +898,23 @@ fn call_inline_asm<'tcx>( } for (offset, value) in inputs { - fx.bcx.ins().stack_store(value, stack_slot, i32::try_from(offset.bytes()).unwrap()); + stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).store( + fx, + value, + MemFlags::trusted(), + ); } - let stack_slot_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0); + let stack_slot_addr = stack_slot.get_addr(fx); fx.bcx.ins().call(inline_asm_func, &[stack_slot_addr]); for (offset, place) in outputs { let ty = fx.clif_type(place.layout().ty).unwrap(); - let value = fx.bcx.ins().stack_load(ty, stack_slot, i32::try_from(offset.bytes()).unwrap()); + let value = stack_slot.offset(fx, i32::try_from(offset.bytes()).unwrap().into()).load( + fx, + ty, + MemFlags::trusted(), + ); place.write_cvalue(fx, CValue::by_val(value, place.layout())); } } diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 0c9a94e1c231f..35f144d7dad45 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -310,6 +310,143 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let val = CValue::by_val_pair(cb_out, c, layout); ret.write_cvalue(fx, val); } + "llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + // FIXME use vector instructions when possible + simd_pair_for_each_lane( + fx, + a, + b, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| { + // (a + b + 1) >> 1 + let lane_ty = fx.bcx.func.dfg.value_type(a_lane); + let a_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), a_lane); + let b_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), b_lane); + let sum = fx.bcx.ins().iadd(a_lane, b_lane); + let num_plus_one = fx.bcx.ins().iadd_imm(sum, 1); + let res = fx.bcx.ins().ushr_imm(num_plus_one, 1); + fx.bcx.ins().ireduce(lane_ty, res) + }, + ); + } + "llvm.x86.sse2.psra.w" => { + intrinsic_args!(fx, args => (a, count); intrinsic); + + let count_lane = count.force_stack(fx).0.load(fx, types::I64, MemFlags::trusted()); + let lane_ty = fx.clif_type(a.layout().ty.simd_size_and_type(fx.tcx).1).unwrap(); + let max_count = fx.bcx.ins().iconst(types::I64, i64::from(lane_ty.bits() - 1)); + let saturated_count = fx.bcx.ins().umin(count_lane, max_count); + + // FIXME use vector instructions when possible + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, a_lane| { + fx.bcx.ins().sshr(a_lane, saturated_count) + }); + } + "llvm.x86.sse2.psad.bw" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.u8); + assert_eq!(ret_lane_ty, fx.tcx.types.u64); + assert_eq!(lane_count, ret_lane_count * 8); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.u64); + for out_lane_idx in 0..lane_count / 8 { + let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0); + + for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 { + let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx); + let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx); + + let lane_diff = fx.bcx.ins().isub(a_lane, b_lane); + let abs_lane_diff = fx.bcx.ins().iabs(lane_diff); + let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff); + lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff); + } + + let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } + "llvm.x86.ssse3.pmadd.ub.sw.128" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.u8); + assert_eq!(ret_lane_ty, fx.tcx.types.i16); + assert_eq!(lane_count, ret_lane_count * 2); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.i16); + for out_lane_idx in 0..lane_count / 2 { + let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0); + let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0); + + let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1); + let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1); + + let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0); + let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1); + + let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1); + + let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0); + + let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16)); + let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16)); + + let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min); + let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val); + + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } + "llvm.x86.sse2.pmadd.wd" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + let layout = a.layout(); + + let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); + assert_eq!(lane_ty, fx.tcx.types.i16); + assert_eq!(ret_lane_ty, fx.tcx.types.i32); + assert_eq!(lane_count, ret_lane_count * 2); + + let ret_lane_layout = fx.layout_of(fx.tcx.types.i32); + for out_lane_idx in 0..lane_count / 2 { + let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0); + let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx); + let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0); + + let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1); + let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx); + let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1); + + let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0); + let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1); + + let res_lane = fx.bcx.ins().iadd(mul0, mul1); + let res_lane = CValue::by_val(res_lane, ret_lane_layout); + + ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane); + } + } _ => { fx.tcx .sess diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 3a6a6c9e3f508..5f0aa6c5581dd 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -132,18 +132,11 @@ impl<'tcx> CValue<'tcx> { (ptr.get_addr(fx), vtable) } CValueInner::ByValPair(data, vtable) => { - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (u32::try_from(fx.target_config.pointer_type().bytes()).unwrap() + 15) - / 16 - * 16, - }); - let data_ptr = Pointer::stack_slot(stack_slot); - let mut flags = MemFlags::new(); - flags.set_notrap(); - data_ptr.store(fx, data, flags); + let data_ptr = fx.create_stack_slot( + u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(), + u32::try_from(fx.target_config.pointer_type().bytes()).unwrap(), + ); + data_ptr.store(fx, data, MemFlags::trusted()); (data_ptr.get_addr(fx), vtable) } @@ -372,13 +365,11 @@ impl<'tcx> CPlace<'tcx> { .fatal(format!("values of type {} are too big to store on the stack", layout.ty)); } - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (u32::try_from(layout.size.bytes()).unwrap() + 15) / 16 * 16, - }); - CPlace { inner: CPlaceInner::Addr(Pointer::stack_slot(stack_slot), None), layout } + let stack_slot = fx.create_stack_slot( + u32::try_from(layout.size.bytes()).unwrap(), + u32::try_from(layout.align.pref.bytes()).unwrap(), + ); + CPlace { inner: CPlaceInner::Addr(stack_slot, None), layout } } pub(crate) fn new_var( @@ -543,13 +534,7 @@ impl<'tcx> CPlace<'tcx> { _ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data), _ if src_ty.is_vector() || dst_ty.is_vector() => { // FIXME(bytecodealliance/wasmtime#6104) do something more efficient for transmutes between vectors and integers. - let stack_slot = fx.bcx.create_sized_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (src_ty.bytes() + 15) / 16 * 16, - }); - let ptr = Pointer::stack_slot(stack_slot); + let ptr = fx.create_stack_slot(src_ty.bytes(), src_ty.bytes()); ptr.store(fx, data, MemFlags::trusted()); ptr.load(fx, dst_ty, MemFlags::trusted()) } From 1e39bbf74c69f64f570d378d7cd68b314b1bae93 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Wed, 25 Oct 2023 17:19:49 +0000 Subject: [PATCH 3/9] Update Cranelift to 0.101.2 and disable host-arch feature of cranelift-codegen This ensures that cg_clif can be built for targets that aren't natively supported by Cranelift. It will not be possible to compile for the host in this case, but cross-compilation will still be possible. We won't distribute cg_clif as rustup component for any targets that aren't natively supported by Cranelift, but will still build it if codegen-backends lists "cranelift". --- Cargo.lock | 52 ++++++++++++++++++++++++++-------------------------- Cargo.toml | 12 ++++++------ 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c716d50117312..e8558198b94fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -45,18 +45,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cranelift-bforest" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1512c3bb6b13018e7109fc3ac964bc87b329eaf3a77825d337558d0c7f6f1be" +checksum = "f773437307980ac0f424bf9b9a5d0cd21a0f17248c6860c9a65bec8b5975f3fe" dependencies = [ "cranelift-entity", ] [[package]] name = "cranelift-codegen" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16cb8fb9220a6ea7a226705a273ab905309ee546267bdf34948d57932d7f0396" +checksum = "443c2ac50e97fb7de1a0f862753fce3f27215558811a6fcee508eb0c3747fa79" dependencies = [ "bumpalo", "cranelift-bforest", @@ -75,39 +75,39 @@ dependencies = [ [[package]] name = "cranelift-codegen-meta" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3a8d3b0d4745b183da5ea0792b13d79f5c23d6e69ac04761728e2532b56649" +checksum = "c5b174c411480c79ce0793c55042fa51bec27e486381d103a53cab3b480cb2db" dependencies = [ "cranelift-codegen-shared", ] [[package]] name = "cranelift-codegen-shared" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524141c8e68f2abc2043de4c2b31f6d9dd42432738c246431d0572a1422a4a84" +checksum = "73fa0151a528066a369de6debeea4d4b23a32aba68b5add8c46d3dc8091ff434" [[package]] name = "cranelift-control" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97513b57c961c713789a03886a57b43e14ebcd204cbaa8ae50ca6c70a8e716b3" +checksum = "b8adf1e6398493c9bea1190e37d28a0eb0eca5fddbc80e01e506cda34db92b1f" dependencies = [ "arbitrary", ] [[package]] name = "cranelift-entity" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3f23d3cf3afa7e45f239702612c76d87964f652a55e28d13ed6d7e20f3479dd" +checksum = "4917e2ed3bb5fe87d0ed88395ca6d644018d119a034faedd1f3e1f2c33cd52b2" [[package]] name = "cranelift-frontend" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "554cd4947ec9209b58bf9ae5bf83581b5ddf9128bd967208e334b504a57db54e" +checksum = "9aaadf1e7cf28886bbf046eaf7ef538997bc8a7e020e578ea4957b39da87d5a1" dependencies = [ "cranelift-codegen", "log", @@ -117,15 +117,15 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c1892a439696b6413cb54083806f5fd9fc431768b8de74864b3d9e8b93b124f" +checksum = "a67fda31b9d69eaa1c49a2081939454c45857596a9d45af6744680541c628b4c" [[package]] name = "cranelift-jit" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32209252fb38acaf1662ccd0397907bbe0e92bdb13b6ddbfd2f74e437f83e685" +checksum = "d6bf32710628e7ff298739f1ed80a0bfdafc0c6a3e284c4540b23f18e8889d4b" dependencies = [ "anyhow", "cranelift-codegen", @@ -143,9 +143,9 @@ dependencies = [ [[package]] name = "cranelift-module" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf42656f5f6df7bfafc4dd7b63a1888b0627c07b43b2cb9aa54e13843fed39eb" +checksum = "4d693e93a0fbf56b4bc93cffe6b107c2e52f070e1111950505fc8c83ac440b9d" dependencies = [ "anyhow", "cranelift-codegen", @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "cranelift-native" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c2d3badd4b9690865f5bb68a71fa94de592fa2df3f3d11a5a062c60c0a107a" +checksum = "76fb52ba71be98312f35e798d9e98e45ab2586f27584231bf7c644fa9501e8af" dependencies = [ "cranelift-codegen", "libc", @@ -165,9 +165,9 @@ dependencies = [ [[package]] name = "cranelift-object" -version = "0.101.1" +version = "0.101.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88eca54bbecea3170035168357306e9c779d4a63d8bf036c9e16bd21fdaa69b5" +checksum = "2551b2e185022b89e9efa5e04c0f17f679b86ef73d9f7feabc48b608ff23120d" dependencies = [ "anyhow", "cranelift-codegen", @@ -374,9 +374,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasmtime-jit-icache-coherence" -version = "14.0.1" +version = "14.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aaf2fa8fd2d6b65abae9b92edfe69254cc5d6b166e342364036c3e347de8da9" +checksum = "0980a96b16abbdaf829858d2389697b1d6cfc6a903873fd74b7e47a6b1045584" dependencies = [ "cfg-if", "libc", diff --git a/Cargo.toml b/Cargo.toml index f2ce714e8ff14..b5e6f431123e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,12 +8,12 @@ crate-type = ["dylib"] [dependencies] # These have to be in sync with each other -cranelift-codegen = { version = "0.101.1", features = ["unwind", "all-arch"] } -cranelift-frontend = { version = "0.101.1" } -cranelift-module = { version = "0.101.1" } -cranelift-native = { version = "0.101.1" } -cranelift-jit = { version = "0.101.1", optional = true } -cranelift-object = { version = "0.101.1" } +cranelift-codegen = { version = "0.101.2", default-features = false, features = ["std", "unwind", "all-arch"] } +cranelift-frontend = { version = "0.101.2" } +cranelift-module = { version = "0.101.2" } +cranelift-native = { version = "0.101.2" } +cranelift-jit = { version = "0.101.2", optional = true } +cranelift-object = { version = "0.101.2" } target-lexicon = "0.12.0" gimli = { version = "0.28", default-features = false, features = ["write"]} object = { version = "0.32", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] } From 1cb7bdb757d35d51a39219351d688e4631b86ff9 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Thu, 26 Oct 2023 17:38:52 +0000 Subject: [PATCH 4/9] Update target-lexicon to 0.12.12 This adds support for loongarch and a bunch of other targets --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e8558198b94fc..dcb6cc57584cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -362,9 +362,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "target-lexicon" -version = "0.12.5" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9410d0f6853b1d94f0e519fb95df60f29d2c1eff2d921ffdf01a4c8a3b54f12d" +checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" [[package]] name = "version_check" From 827a6d8d9f0b84fec8fd15c336d845a05435889d Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 29 Oct 2023 16:06:32 +0000 Subject: [PATCH 5/9] Use the LLVM rustc backend as external assembler The LLVM backend is generally available, while the gnu assembler is not on Windows and many other platforms by default. --- src/global_asm.rs | 70 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/global_asm.rs b/src/global_asm.rs index 6692d1b85eac0..7c2138bf0cfda 100644 --- a/src/global_asm.rs +++ b/src/global_asm.rs @@ -134,20 +134,66 @@ pub(crate) fn compile_global_asm( .join("\n"); global_asm.push('\n'); - let output_object_file = config.output_filenames.temp_path(OutputType::Object, Some(cgu_name)); + let global_asm_object_file = add_file_stem_postfix( + config.output_filenames.temp_path(OutputType::Object, Some(cgu_name)), + ".asm", + ); // Assemble `global_asm` - let global_asm_object_file = add_file_stem_postfix(output_object_file, ".asm"); - let mut child = Command::new(&config.assembler) - .arg("-o") - .arg(&global_asm_object_file) - .stdin(Stdio::piped()) - .spawn() - .expect("Failed to spawn `as`."); - child.stdin.take().unwrap().write_all(global_asm.as_bytes()).unwrap(); - let status = child.wait().expect("Failed to wait for `as`."); - if !status.success() { - return Err(format!("Failed to assemble `{}`", global_asm)); + if false { + let mut child = Command::new(&config.assembler) + .arg("-o") + .arg(&global_asm_object_file) + .stdin(Stdio::piped()) + .spawn() + .expect("Failed to spawn `as`."); + child.stdin.take().unwrap().write_all(global_asm.as_bytes()).unwrap(); + let status = child.wait().expect("Failed to wait for `as`."); + if !status.success() { + return Err(format!("Failed to assemble `{}`", global_asm)); + } + } else { + let mut child = Command::new(std::env::current_exe().unwrap()) + .arg("--target") + .arg(&config.target) + .arg("--crate-type") + .arg("staticlib") + .arg("--emit") + .arg("obj") + .arg("-o") + .arg(&global_asm_object_file) + .arg("-") + .arg("-Abad_asm_style") + .stdin(Stdio::piped()) + .spawn() + .expect("Failed to spawn `as`."); + let mut stdin = child.stdin.take().unwrap(); + stdin + .write_all( + br####" + #![feature(decl_macro, no_core, rustc_attrs)] + #![allow(internal_features)] + #![no_core] + #[rustc_builtin_macro] + #[rustc_macro_transparency = "semitransparent"] + macro global_asm() { /* compiler built-in */ } + global_asm!(r###" + "####, + ) + .unwrap(); + stdin.write_all(global_asm.as_bytes()).unwrap(); + stdin + .write_all( + br####" + "###); + "####, + ) + .unwrap(); + std::mem::drop(stdin); + let status = child.wait().expect("Failed to wait for `as`."); + if !status.success() { + return Err(format!("Failed to assemble `{}`", global_asm)); + } } Ok(Some(global_asm_object_file)) From b1e705146a26f5b650360edf2ecf05fec1315c33 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 29 Oct 2023 16:08:16 +0000 Subject: [PATCH 6/9] Unconditionally handle int $$0x29 using a shim --- src/inline_asm.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/inline_asm.rs b/src/inline_asm.rs index 0517c609337b9..b2e27399055e2 100644 --- a/src/inline_asm.rs +++ b/src/inline_asm.rs @@ -45,6 +45,14 @@ pub(crate) fn codegen_inline_asm<'tcx>( ) { // FIXME add .eh_frame unwind info directives + // Used by panic_abort on Windows, but uses a syntax which only happens to work with + // asm!() by accident and breaks with the GNU assembler as well as global_asm!() for + // the LLVM backend. + if template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string()) { + fx.bcx.ins().trap(TrapCode::User(1)); + return; + } + if !asm_supported(fx.tcx) { if template.is_empty() { let destination_block = fx.get_block(destination.unwrap()); @@ -52,12 +60,6 @@ pub(crate) fn codegen_inline_asm<'tcx>( return; } - // Used by panic_abort - if template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string()) { - fx.bcx.ins().trap(TrapCode::User(1)); - return; - } - // Used by stdarch if template[0] == InlineAsmTemplatePiece::String("mov ".to_string()) && matches!( From 35453ac0eb2fc906181903ae32a41df070f11458 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 29 Oct 2023 16:09:39 +0000 Subject: [PATCH 7/9] Stabilize inline asm usage on all platforms But exclude sym operands for now as they are somewhat broken. --- Cargo.toml | 4 +- src/global_asm.rs | 42 ++++---- src/inline_asm.rs | 206 ++-------------------------------------- src/intrinsics/cpuid.rs | 74 --------------- src/intrinsics/mod.rs | 2 - 5 files changed, 29 insertions(+), 299 deletions(-) delete mode 100644 src/intrinsics/cpuid.rs diff --git a/Cargo.toml b/Cargo.toml index f2ce714e8ff14..f422d53380fe3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,9 +35,9 @@ smallvec = "1.8.1" [features] # Enable features not ready to be enabled when compiling as part of rustc -unstable-features = ["jit", "inline_asm"] +unstable-features = ["jit", "inline_asm_sym"] jit = ["cranelift-jit", "libloading"] -inline_asm = [] +inline_asm_sym = [] [package.metadata.rust-analyzer] rustc_private = true diff --git a/src/global_asm.rs b/src/global_asm.rs index 7c2138bf0cfda..332a7ca9bf249 100644 --- a/src/global_asm.rs +++ b/src/global_asm.rs @@ -46,6 +46,13 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, global_asm.push_str(&string); } InlineAsmOperand::SymFn { anon_const } => { + if cfg!(not(feature = "inline_asm_sym")) { + tcx.sess.span_err( + item.span, + "asm! and global_asm! sym operands are not yet supported", + ); + } + let ty = tcx.typeck_body(anon_const.body).node_type(anon_const.hir_id); let instance = match ty.kind() { &ty::FnDef(def_id, args) => Instance::new(def_id, args), @@ -57,6 +64,13 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, global_asm.push_str(symbol.name); } InlineAsmOperand::SymStatic { path: _, def_id } => { + if cfg!(not(feature = "inline_asm_sym")) { + tcx.sess.span_err( + item.span, + "asm! and global_asm! sym operands are not yet supported", + ); + } + let instance = Instance::mono(tcx, def_id).polymorphize(tcx); let symbol = tcx.symbol_name(instance); global_asm.push_str(symbol.name); @@ -81,22 +95,23 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String, } } -pub(crate) fn asm_supported(tcx: TyCtxt<'_>) -> bool { - cfg!(feature = "inline_asm") && !tcx.sess.target.is_like_windows -} - #[derive(Debug)] pub(crate) struct GlobalAsmConfig { - asm_enabled: bool, assembler: PathBuf, + target: String, pub(crate) output_filenames: Arc, } impl GlobalAsmConfig { pub(crate) fn new(tcx: TyCtxt<'_>) -> Self { GlobalAsmConfig { - asm_enabled: asm_supported(tcx), assembler: crate::toolchain::get_toolchain_binary(tcx.sess, "as"), + target: match &tcx.sess.opts.target_triple { + rustc_target::spec::TargetTriple::TargetTriple(triple) => triple.clone(), + rustc_target::spec::TargetTriple::TargetJson { path_for_rustdoc, .. } => { + path_for_rustdoc.to_str().unwrap().to_owned() + } + }, output_filenames: tcx.output_filenames(()).clone(), } } @@ -111,21 +126,6 @@ pub(crate) fn compile_global_asm( return Ok(None); } - if !config.asm_enabled { - if global_asm.contains("__rust_probestack") { - return Ok(None); - } - - if cfg!(not(feature = "inline_asm")) { - return Err( - "asm! and global_asm! support is disabled while compiling rustc_codegen_cranelift" - .to_owned(), - ); - } else { - return Err("asm! and global_asm! are not yet supported on Windows".to_owned()); - } - } - // Remove all LLVM style comments let mut global_asm = global_asm .lines() diff --git a/src/inline_asm.rs b/src/inline_asm.rs index b2e27399055e2..331649b2ec24f 100644 --- a/src/inline_asm.rs +++ b/src/inline_asm.rs @@ -8,7 +8,6 @@ use rustc_span::sym; use rustc_target::asm::*; use target_lexicon::BinaryFormat; -use crate::global_asm::asm_supported; use crate::prelude::*; enum CInlineAsmOperand<'tcx> { @@ -53,205 +52,6 @@ pub(crate) fn codegen_inline_asm<'tcx>( return; } - if !asm_supported(fx.tcx) { - if template.is_empty() { - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } - - // Used by stdarch - if template[0] == InlineAsmTemplatePiece::String("mov ".to_string()) - && matches!( - template[1], - InlineAsmTemplatePiece::Placeholder { - operand_idx: 0, - modifier: Some('r'), - span: _ - } - ) - && template[2] == InlineAsmTemplatePiece::String(", rbx".to_string()) - && template[3] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[4] == InlineAsmTemplatePiece::String("cpuid".to_string()) - && template[5] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[6] == InlineAsmTemplatePiece::String("xchg ".to_string()) - && matches!( - template[7], - InlineAsmTemplatePiece::Placeholder { - operand_idx: 0, - modifier: Some('r'), - span: _ - } - ) - && template[8] == InlineAsmTemplatePiece::String(", rbx".to_string()) - { - assert_eq!(operands.len(), 4); - let (leaf, eax_place) = match operands[1] { - InlineAsmOperand::InOut { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), - late: _, - ref in_value, - out_place: Some(out_place), - } => ( - crate::base::codegen_operand(fx, in_value).load_scalar(fx), - crate::base::codegen_place(fx, out_place), - ), - _ => unreachable!(), - }; - let ebx_place = match operands[0] { - InlineAsmOperand::Out { - reg: - InlineAsmRegOrRegClass::RegClass(InlineAsmRegClass::X86( - X86InlineAsmRegClass::reg, - )), - late: _, - place: Some(place), - } => crate::base::codegen_place(fx, place), - _ => unreachable!(), - }; - let (sub_leaf, ecx_place) = match operands[2] { - InlineAsmOperand::InOut { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), - late: _, - ref in_value, - out_place: Some(out_place), - } => ( - crate::base::codegen_operand(fx, in_value).load_scalar(fx), - crate::base::codegen_place(fx, out_place), - ), - _ => unreachable!(), - }; - let edx_place = match operands[3] { - InlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), - late: _, - place: Some(place), - } => crate::base::codegen_place(fx, place), - _ => unreachable!(), - }; - - let (eax, ebx, ecx, edx) = crate::intrinsics::codegen_cpuid_call(fx, leaf, sub_leaf); - - eax_place.write_cvalue(fx, CValue::by_val(eax, fx.layout_of(fx.tcx.types.u32))); - ebx_place.write_cvalue(fx, CValue::by_val(ebx, fx.layout_of(fx.tcx.types.u32))); - ecx_place.write_cvalue(fx, CValue::by_val(ecx, fx.layout_of(fx.tcx.types.u32))); - edx_place.write_cvalue(fx, CValue::by_val(edx, fx.layout_of(fx.tcx.types.u32))); - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } - - // Used by compiler-builtins - if fx.tcx.symbol_name(fx.instance).name.starts_with("___chkstk") { - // ___chkstk, ___chkstk_ms and __alloca are only used on Windows - crate::trap::trap_unimplemented(fx, "Stack probes are not supported"); - return; - } else if fx.tcx.symbol_name(fx.instance).name == "__alloca" { - crate::trap::trap_unimplemented(fx, "Alloca is not supported"); - return; - } - - // Used by core::hint::spin_loop() - if template[0] - == InlineAsmTemplatePiece::String(".insn i 0x0F, 0, x0, x0, 0x010".to_string()) - && template.len() == 1 - { - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } - - // Used by measureme - if template[0] == InlineAsmTemplatePiece::String("xor %eax, %eax".to_string()) - && template[1] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[2] == InlineAsmTemplatePiece::String("mov %rbx, ".to_string()) - && matches!( - template[3], - InlineAsmTemplatePiece::Placeholder { - operand_idx: 0, - modifier: Some('r'), - span: _ - } - ) - && template[4] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[5] == InlineAsmTemplatePiece::String("cpuid".to_string()) - && template[6] == InlineAsmTemplatePiece::String("\n".to_string()) - && template[7] == InlineAsmTemplatePiece::String("mov ".to_string()) - && matches!( - template[8], - InlineAsmTemplatePiece::Placeholder { - operand_idx: 0, - modifier: Some('r'), - span: _ - } - ) - && template[9] == InlineAsmTemplatePiece::String(", %rbx".to_string()) - { - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } else if template[0] == InlineAsmTemplatePiece::String("rdpmc".to_string()) { - // Return zero dummy values for all performance counters - match operands[0] { - InlineAsmOperand::In { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)), - value: _, - } => {} - _ => unreachable!(), - }; - let lo = match operands[1] { - InlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)), - late: true, - place: Some(place), - } => crate::base::codegen_place(fx, place), - _ => unreachable!(), - }; - let hi = match operands[2] { - InlineAsmOperand::Out { - reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)), - late: true, - place: Some(place), - } => crate::base::codegen_place(fx, place), - _ => unreachable!(), - }; - - let u32_layout = fx.layout_of(fx.tcx.types.u32); - let zero = fx.bcx.ins().iconst(types::I32, 0); - lo.write_cvalue(fx, CValue::by_val(zero, u32_layout)); - hi.write_cvalue(fx, CValue::by_val(zero, u32_layout)); - - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } else if template[0] == InlineAsmTemplatePiece::String("lock xadd ".to_string()) - && matches!( - template[1], - InlineAsmTemplatePiece::Placeholder { operand_idx: 1, modifier: None, span: _ } - ) - && template[2] == InlineAsmTemplatePiece::String(", (".to_string()) - && matches!( - template[3], - InlineAsmTemplatePiece::Placeholder { operand_idx: 0, modifier: None, span: _ } - ) - && template[4] == InlineAsmTemplatePiece::String(")".to_string()) - { - let destination_block = fx.get_block(destination.unwrap()); - fx.bcx.ins().jump(destination_block, &[]); - return; - } - - if cfg!(not(feature = "inline_asm")) { - fx.tcx.sess.span_err( - span, - "asm! and global_asm! support is disabled while compiling rustc_codegen_cranelift", - ); - } else { - fx.tcx.sess.span_err(span, "asm! and global_asm! are not yet supported on Windows"); - } - return; - } - let operands = operands .into_iter() .map(|operand| match *operand { @@ -282,6 +82,12 @@ pub(crate) fn codegen_inline_asm<'tcx>( CInlineAsmOperand::Const { value } } InlineAsmOperand::SymFn { ref value } => { + if cfg!(not(feature = "inline_asm_sym")) { + fx.tcx + .sess + .span_err(span, "asm! and global_asm! sym operands are not yet supported"); + } + let const_ = fx.monomorphize(value.const_); if let ty::FnDef(def_id, args) = *const_.ty().kind() { let instance = ty::Instance::resolve_for_fn_ptr( diff --git a/src/intrinsics/cpuid.rs b/src/intrinsics/cpuid.rs deleted file mode 100644 index 5120b89c4e8b0..0000000000000 --- a/src/intrinsics/cpuid.rs +++ /dev/null @@ -1,74 +0,0 @@ -//! Emulation of a subset of the cpuid x86 instruction. - -use crate::prelude::*; - -/// Emulates a subset of the cpuid x86 instruction. -/// -/// This emulates an intel cpu with sse and sse2 support, but which doesn't support anything else. -pub(crate) fn codegen_cpuid_call<'tcx>( - fx: &mut FunctionCx<'_, '_, 'tcx>, - leaf: Value, - _sub_leaf: Value, -) -> (Value, Value, Value, Value) { - let leaf_0 = fx.bcx.create_block(); - let leaf_1 = fx.bcx.create_block(); - let leaf_7 = fx.bcx.create_block(); - let leaf_8000_0000 = fx.bcx.create_block(); - let leaf_8000_0001 = fx.bcx.create_block(); - let unsupported_leaf = fx.bcx.create_block(); - - let dest = fx.bcx.create_block(); - let eax = fx.bcx.append_block_param(dest, types::I32); - let ebx = fx.bcx.append_block_param(dest, types::I32); - let ecx = fx.bcx.append_block_param(dest, types::I32); - let edx = fx.bcx.append_block_param(dest, types::I32); - - let mut switch = cranelift_frontend::Switch::new(); - switch.set_entry(0, leaf_0); - switch.set_entry(1, leaf_1); - switch.set_entry(7, leaf_7); - switch.set_entry(0x8000_0000, leaf_8000_0000); - switch.set_entry(0x8000_0001, leaf_8000_0001); - switch.emit(&mut fx.bcx, leaf, unsupported_leaf); - - fx.bcx.switch_to_block(leaf_0); - let max_basic_leaf = fx.bcx.ins().iconst(types::I32, 1); - let vend0 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"Genu"))); - let vend2 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"ineI"))); - let vend1 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"ntel"))); - fx.bcx.ins().jump(dest, &[max_basic_leaf, vend0, vend1, vend2]); - - fx.bcx.switch_to_block(leaf_1); - let cpu_signature = fx.bcx.ins().iconst(types::I32, 0); - let additional_information = fx.bcx.ins().iconst(types::I32, 0); - let ecx_features = fx.bcx.ins().iconst(types::I32, 0); - let edx_features = fx.bcx.ins().iconst(types::I32, 1 << 25 /* sse */ | 1 << 26 /* sse2 */); - fx.bcx.ins().jump(dest, &[cpu_signature, additional_information, ecx_features, edx_features]); - - fx.bcx.switch_to_block(leaf_7); - // This leaf technically has subleaves, but we just return zero for all subleaves. - let zero = fx.bcx.ins().iconst(types::I32, 0); - fx.bcx.ins().jump(dest, &[zero, zero, zero, zero]); - - fx.bcx.switch_to_block(leaf_8000_0000); - let extended_max_basic_leaf = fx.bcx.ins().iconst(types::I32, 0); - let zero = fx.bcx.ins().iconst(types::I32, 0); - fx.bcx.ins().jump(dest, &[extended_max_basic_leaf, zero, zero, zero]); - - fx.bcx.switch_to_block(leaf_8000_0001); - let zero = fx.bcx.ins().iconst(types::I32, 0); - let proc_info_ecx = fx.bcx.ins().iconst(types::I32, 0); - let proc_info_edx = fx.bcx.ins().iconst(types::I32, 0); - fx.bcx.ins().jump(dest, &[zero, zero, proc_info_ecx, proc_info_edx]); - - fx.bcx.switch_to_block(unsupported_leaf); - crate::trap::trap_unimplemented( - fx, - "__cpuid_count arch intrinsic doesn't yet support specified leaf", - ); - - fx.bcx.switch_to_block(dest); - fx.bcx.ins().nop(); - - (eax, ebx, ecx, edx) -} diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index e94091e6a25eb..83d5d53624eb1 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -12,7 +12,6 @@ macro_rules! intrinsic_args { } } -mod cpuid; mod llvm; mod llvm_aarch64; mod llvm_x86; @@ -25,7 +24,6 @@ use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths}; use rustc_middle::ty::GenericArgsRef; use rustc_span::symbol::{kw, sym, Symbol}; -pub(crate) use self::cpuid::codegen_cpuid_call; pub(crate) use self::llvm::codegen_llvm_intrinsic_call; use crate::prelude::*; From 69c6aa560cfc1991822fdab1579153f6a25c392f Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 29 Oct 2023 18:53:38 +0000 Subject: [PATCH 8/9] Avoid infinite recursion when cranelift is the default codegen backend --- scripts/setup_rust_fork.sh | 4 +++- scripts/test_bootstrap.sh | 4 +++- src/global_asm.rs | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/setup_rust_fork.sh b/scripts/setup_rust_fork.sh index 3e48fb006dea4..bbb8a010d965f 100644 --- a/scripts/setup_rust_fork.sh +++ b/scripts/setup_rust_fork.sh @@ -4,7 +4,9 @@ set -e # Compiletest expects all standard library paths to start with /rustc/FAKE_PREFIX. # CG_CLIF_STDLIB_REMAP_PATH_PREFIX will cause cg_clif's build system to pass # --remap-path-prefix to handle this. -CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build +# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we +# the LLVM backend isn't compiled in here. +CG_CLIF_FORCE_GNU_AS=1 CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build echo "[SETUP] Rust fork" git clone https://github.com/rust-lang/rust.git || true diff --git a/scripts/test_bootstrap.sh b/scripts/test_bootstrap.sh index 791d457993de3..a8f6d7a202486 100755 --- a/scripts/test_bootstrap.sh +++ b/scripts/test_bootstrap.sh @@ -11,5 +11,7 @@ rm -r compiler/rustc_codegen_cranelift/{Cargo.*,src} cp ../Cargo.* compiler/rustc_codegen_cranelift/ cp -r ../src compiler/rustc_codegen_cranelift/src -./x.py build --stage 1 library/std +# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we +# the LLVM backend isn't compiled in here. +CG_CLIF_FORCE_GNU_AS=1 ./x.py build --stage 1 library/std popd diff --git a/src/global_asm.rs b/src/global_asm.rs index 332a7ca9bf249..b14007f4e5232 100644 --- a/src/global_asm.rs +++ b/src/global_asm.rs @@ -140,7 +140,7 @@ pub(crate) fn compile_global_asm( ); // Assemble `global_asm` - if false { + if option_env!("CG_CLIF_FORCE_GNU_AS").is_some() { let mut child = Command::new(&config.assembler) .arg("-o") .arg(&global_asm_object_file) @@ -164,6 +164,7 @@ pub(crate) fn compile_global_asm( .arg(&global_asm_object_file) .arg("-") .arg("-Abad_asm_style") + .arg("-Zcodegen-backend=llvm") .stdin(Stdio::piped()) .spawn() .expect("Failed to spawn `as`."); From dde58803fd6cbb270c7a437f36a8a3a29fbef679 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Sun, 29 Oct 2023 20:29:52 +0000 Subject: [PATCH 9/9] Rustup to rustc 1.75.0-nightly (e5cfc5547 2023-10-28) --- patches/stdlib-lock.toml | 17 ----------------- rust-toolchain | 2 +- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/patches/stdlib-lock.toml b/patches/stdlib-lock.toml index f10b4d6b9d4fa..9902bca8eab27 100644 --- a/patches/stdlib-lock.toml +++ b/patches/stdlib-lock.toml @@ -40,22 +40,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9" -[[package]] -name = "auxv" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e50430f9beb8effb02399fa81c76eeaa26b05e4f03b09285cad8d079c1af5a3d" -dependencies = [ - "byteorder", - "gcc", -] - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - [[package]] name = "cc" version = "1.0.79" @@ -388,7 +372,6 @@ dependencies = [ name = "std_detect" version = "0.1.5" dependencies = [ - "auxv", "cfg-if", "compiler_builtins", "cupid", diff --git a/rust-toolchain b/rust-toolchain index 3735ac1c17b14..7e3eaacf8ef08 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,3 +1,3 @@ [toolchain] -channel = "nightly-2023-10-21" +channel = "nightly-2023-10-29" components = ["rust-src", "rustc-dev", "llvm-tools"]