diff --git a/build.rs b/build.rs index 9c7df9690..5198312fa 100644 --- a/build.rs +++ b/build.rs @@ -200,6 +200,12 @@ fn main() { // As of rustc 1.68, target_feature "lse2" is not available on rustc side: // https://github.com/rust-lang/rust/blob/1.68.0/compiler/rustc_codegen_ssa/src/target_features.rs#L58 target_feature_if("lse2", is_macos, &version, None, false); + + // As of Apple M1/M1 Pro, on Apple hardware, CAS loop-based RMW is much slower than LL/SC + // loop-based RMW: https://github.com/taiki-e/portable-atomic/pull/89 + if is_macos || target_os == "ios" || target_os == "tvos" || target_os == "watchos" { + println!("cargo:rustc-cfg=portable_atomic_ll_sc_rmw"); + } } "arm" => { // #[cfg(target_feature = "v7")] and others don't work on stable. diff --git a/src/imp/atomic128/aarch64.rs b/src/imp/atomic128/aarch64.rs index e4b050df7..1f377a8f5 100644 --- a/src/imp/atomic128/aarch64.rs +++ b/src/imp/atomic128/aarch64.rs @@ -13,6 +13,8 @@ // at run-time, otherwise, use LDXP/STXP loop. // If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW. // If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store. +// When portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP +// loop for RMW. (by default, it is set on Apple hardware; see build script for details) // // Note: FEAT_LSE2 doesn't imply FEAT_LSE. // @@ -535,13 +537,19 @@ use self::atomic_compare_exchange as atomic_compare_exchange_weak; #[inline] unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 { - #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] + #[cfg(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ))] // SAFETY: the caller must uphold the safety contract. // cfg guarantee that the CPU supports FEAT_LSE. unsafe { _atomic_swap_casp(dst, val, order) } - #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] + #[cfg(not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )))] // SAFETY: the caller must uphold the safety contract. unsafe { _atomic_swap_ldxp_stxp(dst, val, order) @@ -631,12 +639,14 @@ unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> /// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`) /// - new_lo/new_hi pair: new value that will to stored by sc macro_rules! atomic_rmw_ll_sc_3 { - ($name:ident as $name_no_lse:ident, options($($options:tt)*), $($op:tt)*) => { + ($name:ident as $reexport_name:ident, options($($options:tt)*), $($op:tt)*) => { // If FEAT_LSE is available at compile-time, we use CAS based Atomic RMW // generated by atomic_rmw_by_atomic_update! macro. - #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] - use $name as $name_no_lse; - #[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] + #[cfg(not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )))] + use $name as $reexport_name; #[inline] unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); @@ -672,7 +682,7 @@ macro_rules! atomic_rmw_ll_sc_3 { #[cfg(test)] paste::paste! { // Helper to test $op separately. - unsafe fn [<$name_no_lse _op>](dst: *mut u128, val: u128) -> u128 { + unsafe fn [<$reexport_name _op>](dst: *mut u128, val: u128) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { $name(dst, val, Ordering::Relaxed) @@ -689,7 +699,12 @@ macro_rules! atomic_rmw_ll_sc_3 { /// - x6/x7 pair: previous value loaded (read-only for `$op`) /// - x4/x5 pair: new value that will to stored macro_rules! atomic_rmw_cas_3 { - ($name:ident, $($op:tt)*) => { + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + #[cfg(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ))] + use $name as $reexport_name; #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] #[inline] unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 { @@ -738,102 +753,6 @@ macro_rules! atomic_rmw_cas_3 { } }; } -atomic_rmw_ll_sc_3! { - _atomic_add_ldxp_stxp as atomic_add, - // Do not use `preserves_flags` because ADDS and ADCS modify the condition flags. - options(nostack), - concat!( - "adds ", - select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}") - ), - concat!( - "adc ", - select_le_or_be!("{new_hi}, {prev_hi}, {val_hi}", "{new_lo}, {prev_lo}, {val_lo}") - ), -} -atomic_rmw_cas_3! { - atomic_add, - concat!( - "adds ", - select_le_or_be!("x4, x6, {val_lo}", "x5, x7, {val_hi}") - ), - concat!( - "adc ", - select_le_or_be!("x5, x7, {val_hi}", "x4, x6, {val_lo}") - ), -} -atomic_rmw_ll_sc_3! { - _atomic_sub_ldxp_stxp as atomic_sub, - // Do not use `preserves_flags` because SUBS and SBCS modify the condition flags. - options(nostack), - concat!( - "subs ", - select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}") - ), - concat!( - "sbc ", - select_le_or_be!("{new_hi}, {prev_hi}, {val_hi}", "{new_lo}, {prev_lo}, {val_lo}") - ), -} -atomic_rmw_cas_3! { - atomic_sub, - concat!( - "subs ", - select_le_or_be!("x4, x6, {val_lo}", "x5, x7, {val_hi}") - ), - concat!( - "sbc ", - select_le_or_be!("x5, x7, {val_hi}", "x4, x6, {val_lo}") - ), -} -atomic_rmw_ll_sc_3! { - _atomic_and_ldxp_stxp as atomic_and, - options(nostack, preserves_flags), - "and {new_lo}, {prev_lo}, {val_lo}", - "and {new_hi}, {prev_hi}, {val_hi}", -} -atomic_rmw_cas_3! { - atomic_and, - "and x4, x6, {val_lo}", - "and x5, x7, {val_hi}", -} -atomic_rmw_ll_sc_3! { - _atomic_nand_ldxp_stxp as atomic_nand, - options(nostack, preserves_flags), - "and {new_lo}, {prev_lo}, {val_lo}", - "mvn {new_lo}, {new_lo}", - "and {new_hi}, {prev_hi}, {val_hi}", - "mvn {new_hi}, {new_hi}", -} -atomic_rmw_cas_3! { - atomic_nand, - "and x4, x6, {val_lo}", - "mvn x4, x4", - "and x5, x7, {val_hi}", - "mvn x5, x5", -} -atomic_rmw_ll_sc_3! { - _atomic_or_ldxp_stxp as atomic_or, - options(nostack, preserves_flags), - "orr {new_lo}, {prev_lo}, {val_lo}", - "orr {new_hi}, {prev_hi}, {val_hi}", -} -atomic_rmw_cas_3! { - atomic_or, - "orr x4, x6, {val_lo}", - "orr x5, x7, {val_hi}", -} -atomic_rmw_ll_sc_3! { - _atomic_xor_ldxp_stxp as atomic_xor, - options(nostack, preserves_flags), - "eor {new_lo}, {prev_lo}, {val_lo}", - "eor {new_hi}, {prev_hi}, {val_hi}", -} -atomic_rmw_cas_3! { - atomic_xor, - "eor x4, x6, {val_lo}", - "eor x5, x7, {val_hi}", -} /// Atomic RMW by LL/SC loop (2 arguments) /// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;` @@ -842,12 +761,14 @@ atomic_rmw_cas_3! { /// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`) /// - new_lo/new_hi pair: new value that will to stored by sc macro_rules! atomic_rmw_ll_sc_2 { - ($name:ident as $name_no_lse:ident, options($($options:tt)*), $($op:tt)*) => { + ($name:ident as $reexport_name:ident, options($($options:tt)*), $($op:tt)*) => { // If FEAT_LSE is available at compile-time, we use CAS based Atomic RMW // generated by atomic_rmw_by_atomic_update! macro. - #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))] - use $name as $name_no_lse; - #[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))] + #[cfg(not(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + )))] + use $name as $reexport_name; #[inline] unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { debug_assert!(dst as usize % 16 == 0); @@ -880,7 +801,7 @@ macro_rules! atomic_rmw_ll_sc_2 { #[cfg(test)] paste::paste! { // Helper to test $op separately. - unsafe fn [<$name_no_lse _op>](dst: *mut u128) -> u128 { + unsafe fn [<$reexport_name _op>](dst: *mut u128) -> u128 { // SAFETY: the caller must uphold the safety contract. unsafe { $name(dst, Ordering::Relaxed) @@ -896,7 +817,12 @@ macro_rules! atomic_rmw_ll_sc_2 { /// - x6/x7 pair: previous value loaded (read-only for `$op`) /// - x4/x5 pair: new value that will to stored macro_rules! atomic_rmw_cas_2 { - ($name:ident, $($op:tt)*) => { + ($name:ident as $reexport_name:ident, $($op:tt)*) => { + #[cfg(all( + any(target_feature = "lse", portable_atomic_target_feature = "lse"), + not(portable_atomic_ll_sc_rmw), + ))] + use $name as $reexport_name; #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))] #[inline] unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 { @@ -942,6 +868,104 @@ macro_rules! atomic_rmw_cas_2 { } }; } + +atomic_rmw_ll_sc_3! { + _atomic_add_ldxp_stxp as atomic_add, + // Do not use `preserves_flags` because ADDS and ADCS modify the condition flags. + options(nostack), + concat!( + "adds ", + select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}") + ), + concat!( + "adc ", + select_le_or_be!("{new_hi}, {prev_hi}, {val_hi}", "{new_lo}, {prev_lo}, {val_lo}") + ), +} +atomic_rmw_cas_3! { + _atomic_add_casp as atomic_add, + concat!( + "adds ", + select_le_or_be!("x4, x6, {val_lo}", "x5, x7, {val_hi}") + ), + concat!( + "adc ", + select_le_or_be!("x5, x7, {val_hi}", "x4, x6, {val_lo}") + ), +} +atomic_rmw_ll_sc_3! { + _atomic_sub_ldxp_stxp as atomic_sub, + // Do not use `preserves_flags` because SUBS and SBCS modify the condition flags. + options(nostack), + concat!( + "subs ", + select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}") + ), + concat!( + "sbc ", + select_le_or_be!("{new_hi}, {prev_hi}, {val_hi}", "{new_lo}, {prev_lo}, {val_lo}") + ), +} +atomic_rmw_cas_3! { + _atomic_sub_casp as atomic_sub, + concat!( + "subs ", + select_le_or_be!("x4, x6, {val_lo}", "x5, x7, {val_hi}") + ), + concat!( + "sbc ", + select_le_or_be!("x5, x7, {val_hi}", "x4, x6, {val_lo}") + ), +} +atomic_rmw_ll_sc_3! { + _atomic_and_ldxp_stxp as atomic_and, + options(nostack, preserves_flags), + "and {new_lo}, {prev_lo}, {val_lo}", + "and {new_hi}, {prev_hi}, {val_hi}", +} +atomic_rmw_cas_3! { + _atomic_and_casp as atomic_and, + "and x4, x6, {val_lo}", + "and x5, x7, {val_hi}", +} +atomic_rmw_ll_sc_3! { + _atomic_nand_ldxp_stxp as atomic_nand, + options(nostack, preserves_flags), + "and {new_lo}, {prev_lo}, {val_lo}", + "mvn {new_lo}, {new_lo}", + "and {new_hi}, {prev_hi}, {val_hi}", + "mvn {new_hi}, {new_hi}", +} +atomic_rmw_cas_3! { + _atomic_nand_casp as atomic_nand, + "and x4, x6, {val_lo}", + "mvn x4, x4", + "and x5, x7, {val_hi}", + "mvn x5, x5", +} +atomic_rmw_ll_sc_3! { + _atomic_or_ldxp_stxp as atomic_or, + options(nostack, preserves_flags), + "orr {new_lo}, {prev_lo}, {val_lo}", + "orr {new_hi}, {prev_hi}, {val_hi}", +} +atomic_rmw_cas_3! { + _atomic_or_casp as atomic_or, + "orr x4, x6, {val_lo}", + "orr x5, x7, {val_hi}", +} +atomic_rmw_ll_sc_3! { + _atomic_xor_ldxp_stxp as atomic_xor, + options(nostack, preserves_flags), + "eor {new_lo}, {prev_lo}, {val_lo}", + "eor {new_hi}, {prev_hi}, {val_hi}", +} +atomic_rmw_cas_3! { + _atomic_xor_casp as atomic_xor, + "eor x4, x6, {val_lo}", + "eor x5, x7, {val_hi}", +} + atomic_rmw_ll_sc_2! { _atomic_not_ldxp_stxp as atomic_not, options(nostack, preserves_flags), @@ -949,7 +973,7 @@ atomic_rmw_ll_sc_2! { "mvn {new_hi}, {prev_hi}", } atomic_rmw_cas_2! { - atomic_not, + _atomic_not_casp as atomic_not, "mvn x4, x6", "mvn x5, x7", } @@ -961,7 +985,7 @@ atomic_rmw_ll_sc_2! { concat!("ngc ", select_le_or_be!("{new_hi}, {prev_hi}", "{new_lo}, {prev_lo}")), } atomic_rmw_cas_2! { - atomic_neg, + _atomic_neg_casp as atomic_neg, concat!("negs ", select_le_or_be!("x4, x6", "x5, x7")), concat!("ngc ", select_le_or_be!("x5, x7", "x4, x6")), } @@ -976,7 +1000,7 @@ atomic_rmw_ll_sc_3! { "csel {new_lo}, {prev_lo}, {val_lo}, lt", // select lo 64-bit } atomic_rmw_cas_3! { - atomic_max, + _atomic_max_casp as atomic_max, select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"), select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"), "csel x5, x7, {val_hi}, lt", // select hi 64-bit @@ -993,7 +1017,7 @@ atomic_rmw_ll_sc_3! { "csel {new_lo}, {prev_lo}, {val_lo}, lo", // select lo 64-bit } atomic_rmw_cas_3! { - atomic_umax, + _atomic_umax_casp as atomic_umax, select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"), select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"), "csel x5, x7, {val_hi}, lo", // select hi 64-bit @@ -1010,7 +1034,7 @@ atomic_rmw_ll_sc_3! { "csel {new_lo}, {prev_lo}, {val_lo}, ge", // select lo 64-bit } atomic_rmw_cas_3! { - atomic_min, + _atomic_min_casp as atomic_min, select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"), select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"), "csel x5, x7, {val_hi}, ge", // select hi 64-bit @@ -1027,7 +1051,7 @@ atomic_rmw_ll_sc_3! { "csel {new_lo}, {prev_lo}, {val_lo}, hs", // select lo 64-bit } atomic_rmw_cas_3! { - atomic_umin, + _atomic_umin_casp as atomic_umin, select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"), select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"), "csel x5, x7, {val_hi}, hs", // select hi 64-bit