Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ fn main() {
// As of rustc 1.68, target_feature "lse2" is not available on rustc side:
// https://github.com/rust-lang/rust/blob/1.68.0/compiler/rustc_codegen_ssa/src/target_features.rs#L58
target_feature_if("lse2", is_macos, &version, None, false);

// As of Apple M1/M1 Pro, on Apple hardware, CAS loop-based RMW is much slower than LL/SC
// loop-based RMW: https://github.com/taiki-e/portable-atomic/pull/89
if is_macos || target_os == "ios" || target_os == "tvos" || target_os == "watchos" {
Copy link
Copy Markdown

@teohhanhui teohhanhui Oct 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This misses Asahi Linux...

And is still the case:

let is_apple = is_macos || target_os == "ios" || target_os == "tvos" || target_os == "watchos";

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to know that the code is being compiled against Asahi Linux at build time?

Otherwise, it would be reasonable to set --cfg portable_atomic_ll_sc_rmw on the user's side.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an option to revert the whole 93e6ec5, but we want to keep the same codegen as LLVM if possible (LLVM uses CASP regardless of whether it is Apple hardware, though), and we also need to make sure that it does not lose performance on hardware that CASP is fast.

Copy link
Copy Markdown
Owner Author

@taiki-e taiki-e Oct 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to know that the code is being compiled against Asahi Linux at build time?

I can think of is:

  • target_vendor. If custom targets are used.
  • -C target-cpu=apple-*. If the user uses it.

Both cases can be detected at build time, but I would like to know which one Asahi Linux users actually use.

Copy link
Copy Markdown
Owner Author

@taiki-e taiki-e Oct 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've implemented the latter approach in 5c3a43b.

$ RUSTFLAGS='-C target-cpu=apple-m1' cargo build -vv --target aarch64-unknown-linux-gnu | grep portable_atomic_ll_sc_rmw
[portable-atomic 1.4.3] cargo:rustc-cfg=portable_atomic_ll_sc_rmw(build)                                                                                                    

EDIT: Published in 1.5.0.

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@teohhanhui Was the above approach sufficient for you? Or was it not?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not using any RUSTFLAGS, so...

Copy link
Copy Markdown
Owner Author

@taiki-e taiki-e Dec 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear:

  • Those specified in .cargo/config (or .cargo/config.toml) are also taken into account.
  • With the builtin aarch64-unknown-linux-{gnu,musl} target, if you specify neither target-cpu nor target-feature, the default one (LL/SC) will be used and you should get good performance on apple hardwere.

println!("cargo:rustc-cfg=portable_atomic_ll_sc_rmw");
}
}
"arm" => {
// #[cfg(target_feature = "v7")] and others don't work on stable.
Expand Down
256 changes: 140 additions & 116 deletions src/imp/atomic128/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// at run-time, otherwise, use LDXP/STXP loop.
// If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW.
// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
// When portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP
// loop for RMW. (by default, it is set on Apple hardware; see build script for details)
//
// Note: FEAT_LSE2 doesn't imply FEAT_LSE.
//
Expand Down Expand Up @@ -535,13 +537,19 @@ use self::atomic_compare_exchange as atomic_compare_exchange_weak;

#[inline]
unsafe fn atomic_swap(dst: *mut u128, val: u128, order: Ordering) -> u128 {
#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
#[cfg(all(
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
not(portable_atomic_ll_sc_rmw),
))]
// SAFETY: the caller must uphold the safety contract.
// cfg guarantee that the CPU supports FEAT_LSE.
unsafe {
_atomic_swap_casp(dst, val, order)
}
#[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
#[cfg(not(all(
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
not(portable_atomic_ll_sc_rmw),
)))]
// SAFETY: the caller must uphold the safety contract.
unsafe {
_atomic_swap_ldxp_stxp(dst, val, order)
Expand Down Expand Up @@ -631,12 +639,14 @@ unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) ->
/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
/// - new_lo/new_hi pair: new value that will to stored by sc
macro_rules! atomic_rmw_ll_sc_3 {
($name:ident as $name_no_lse:ident, options($($options:tt)*), $($op:tt)*) => {
($name:ident as $reexport_name:ident, options($($options:tt)*), $($op:tt)*) => {
// If FEAT_LSE is available at compile-time, we use CAS based Atomic RMW
// generated by atomic_rmw_by_atomic_update! macro.
#[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
use $name as $name_no_lse;
#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
#[cfg(not(all(
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
not(portable_atomic_ll_sc_rmw),
)))]
use $name as $reexport_name;
#[inline]
unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
Expand Down Expand Up @@ -672,7 +682,7 @@ macro_rules! atomic_rmw_ll_sc_3 {
#[cfg(test)]
paste::paste! {
// Helper to test $op separately.
unsafe fn [<$name_no_lse _op>](dst: *mut u128, val: u128) -> u128 {
unsafe fn [<$reexport_name _op>](dst: *mut u128, val: u128) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
$name(dst, val, Ordering::Relaxed)
Expand All @@ -689,7 +699,12 @@ macro_rules! atomic_rmw_ll_sc_3 {
/// - x6/x7 pair: previous value loaded (read-only for `$op`)
/// - x4/x5 pair: new value that will to stored
macro_rules! atomic_rmw_cas_3 {
($name:ident, $($op:tt)*) => {
($name:ident as $reexport_name:ident, $($op:tt)*) => {
#[cfg(all(
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
not(portable_atomic_ll_sc_rmw),
))]
use $name as $reexport_name;
#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
#[inline]
unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
Expand Down Expand Up @@ -738,102 +753,6 @@ macro_rules! atomic_rmw_cas_3 {
}
};
}
atomic_rmw_ll_sc_3! {
_atomic_add_ldxp_stxp as atomic_add,
// Do not use `preserves_flags` because ADDS and ADCS modify the condition flags.
options(nostack),
concat!(
"adds ",
select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}")
),
concat!(
"adc ",
select_le_or_be!("{new_hi}, {prev_hi}, {val_hi}", "{new_lo}, {prev_lo}, {val_lo}")
),
}
atomic_rmw_cas_3! {
atomic_add,
concat!(
"adds ",
select_le_or_be!("x4, x6, {val_lo}", "x5, x7, {val_hi}")
),
concat!(
"adc ",
select_le_or_be!("x5, x7, {val_hi}", "x4, x6, {val_lo}")
),
}
atomic_rmw_ll_sc_3! {
_atomic_sub_ldxp_stxp as atomic_sub,
// Do not use `preserves_flags` because SUBS and SBCS modify the condition flags.
options(nostack),
concat!(
"subs ",
select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}")
),
concat!(
"sbc ",
select_le_or_be!("{new_hi}, {prev_hi}, {val_hi}", "{new_lo}, {prev_lo}, {val_lo}")
),
}
atomic_rmw_cas_3! {
atomic_sub,
concat!(
"subs ",
select_le_or_be!("x4, x6, {val_lo}", "x5, x7, {val_hi}")
),
concat!(
"sbc ",
select_le_or_be!("x5, x7, {val_hi}", "x4, x6, {val_lo}")
),
}
atomic_rmw_ll_sc_3! {
_atomic_and_ldxp_stxp as atomic_and,
options(nostack, preserves_flags),
"and {new_lo}, {prev_lo}, {val_lo}",
"and {new_hi}, {prev_hi}, {val_hi}",
}
atomic_rmw_cas_3! {
atomic_and,
"and x4, x6, {val_lo}",
"and x5, x7, {val_hi}",
}
atomic_rmw_ll_sc_3! {
_atomic_nand_ldxp_stxp as atomic_nand,
options(nostack, preserves_flags),
"and {new_lo}, {prev_lo}, {val_lo}",
"mvn {new_lo}, {new_lo}",
"and {new_hi}, {prev_hi}, {val_hi}",
"mvn {new_hi}, {new_hi}",
}
atomic_rmw_cas_3! {
atomic_nand,
"and x4, x6, {val_lo}",
"mvn x4, x4",
"and x5, x7, {val_hi}",
"mvn x5, x5",
}
atomic_rmw_ll_sc_3! {
_atomic_or_ldxp_stxp as atomic_or,
options(nostack, preserves_flags),
"orr {new_lo}, {prev_lo}, {val_lo}",
"orr {new_hi}, {prev_hi}, {val_hi}",
}
atomic_rmw_cas_3! {
atomic_or,
"orr x4, x6, {val_lo}",
"orr x5, x7, {val_hi}",
}
atomic_rmw_ll_sc_3! {
_atomic_xor_ldxp_stxp as atomic_xor,
options(nostack, preserves_flags),
"eor {new_lo}, {prev_lo}, {val_lo}",
"eor {new_hi}, {prev_hi}, {val_hi}",
}
atomic_rmw_cas_3! {
atomic_xor,
"eor x4, x6, {val_lo}",
"eor x5, x7, {val_hi}",
}

/// Atomic RMW by LL/SC loop (2 arguments)
/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
Expand All @@ -842,12 +761,14 @@ atomic_rmw_cas_3! {
/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
/// - new_lo/new_hi pair: new value that will to stored by sc
macro_rules! atomic_rmw_ll_sc_2 {
($name:ident as $name_no_lse:ident, options($($options:tt)*), $($op:tt)*) => {
($name:ident as $reexport_name:ident, options($($options:tt)*), $($op:tt)*) => {
// If FEAT_LSE is available at compile-time, we use CAS based Atomic RMW
// generated by atomic_rmw_by_atomic_update! macro.
#[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
use $name as $name_no_lse;
#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
#[cfg(not(all(
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
not(portable_atomic_ll_sc_rmw),
)))]
use $name as $reexport_name;
#[inline]
unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
debug_assert!(dst as usize % 16 == 0);
Expand Down Expand Up @@ -880,7 +801,7 @@ macro_rules! atomic_rmw_ll_sc_2 {
#[cfg(test)]
paste::paste! {
// Helper to test $op separately.
unsafe fn [<$name_no_lse _op>](dst: *mut u128) -> u128 {
unsafe fn [<$reexport_name _op>](dst: *mut u128) -> u128 {
// SAFETY: the caller must uphold the safety contract.
unsafe {
$name(dst, Ordering::Relaxed)
Expand All @@ -896,7 +817,12 @@ macro_rules! atomic_rmw_ll_sc_2 {
/// - x6/x7 pair: previous value loaded (read-only for `$op`)
/// - x4/x5 pair: new value that will to stored
macro_rules! atomic_rmw_cas_2 {
($name:ident, $($op:tt)*) => {
($name:ident as $reexport_name:ident, $($op:tt)*) => {
#[cfg(all(
any(target_feature = "lse", portable_atomic_target_feature = "lse"),
not(portable_atomic_ll_sc_rmw),
))]
use $name as $reexport_name;
#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
#[inline]
unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
Expand Down Expand Up @@ -942,14 +868,112 @@ macro_rules! atomic_rmw_cas_2 {
}
};
}

atomic_rmw_ll_sc_3! {
_atomic_add_ldxp_stxp as atomic_add,
// Do not use `preserves_flags` because ADDS and ADCS modify the condition flags.
options(nostack),
concat!(
"adds ",
select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}")
),
concat!(
"adc ",
select_le_or_be!("{new_hi}, {prev_hi}, {val_hi}", "{new_lo}, {prev_lo}, {val_lo}")
),
}
atomic_rmw_cas_3! {
_atomic_add_casp as atomic_add,
concat!(
"adds ",
select_le_or_be!("x4, x6, {val_lo}", "x5, x7, {val_hi}")
),
concat!(
"adc ",
select_le_or_be!("x5, x7, {val_hi}", "x4, x6, {val_lo}")
),
}
atomic_rmw_ll_sc_3! {
_atomic_sub_ldxp_stxp as atomic_sub,
// Do not use `preserves_flags` because SUBS and SBCS modify the condition flags.
options(nostack),
concat!(
"subs ",
select_le_or_be!("{new_lo}, {prev_lo}, {val_lo}", "{new_hi}, {prev_hi}, {val_hi}")
),
concat!(
"sbc ",
select_le_or_be!("{new_hi}, {prev_hi}, {val_hi}", "{new_lo}, {prev_lo}, {val_lo}")
),
}
atomic_rmw_cas_3! {
_atomic_sub_casp as atomic_sub,
concat!(
"subs ",
select_le_or_be!("x4, x6, {val_lo}", "x5, x7, {val_hi}")
),
concat!(
"sbc ",
select_le_or_be!("x5, x7, {val_hi}", "x4, x6, {val_lo}")
),
}
atomic_rmw_ll_sc_3! {
_atomic_and_ldxp_stxp as atomic_and,
options(nostack, preserves_flags),
"and {new_lo}, {prev_lo}, {val_lo}",
"and {new_hi}, {prev_hi}, {val_hi}",
}
atomic_rmw_cas_3! {
_atomic_and_casp as atomic_and,
"and x4, x6, {val_lo}",
"and x5, x7, {val_hi}",
}
atomic_rmw_ll_sc_3! {
_atomic_nand_ldxp_stxp as atomic_nand,
options(nostack, preserves_flags),
"and {new_lo}, {prev_lo}, {val_lo}",
"mvn {new_lo}, {new_lo}",
"and {new_hi}, {prev_hi}, {val_hi}",
"mvn {new_hi}, {new_hi}",
}
atomic_rmw_cas_3! {
_atomic_nand_casp as atomic_nand,
"and x4, x6, {val_lo}",
"mvn x4, x4",
"and x5, x7, {val_hi}",
"mvn x5, x5",
}
atomic_rmw_ll_sc_3! {
_atomic_or_ldxp_stxp as atomic_or,
options(nostack, preserves_flags),
"orr {new_lo}, {prev_lo}, {val_lo}",
"orr {new_hi}, {prev_hi}, {val_hi}",
}
atomic_rmw_cas_3! {
_atomic_or_casp as atomic_or,
"orr x4, x6, {val_lo}",
"orr x5, x7, {val_hi}",
}
atomic_rmw_ll_sc_3! {
_atomic_xor_ldxp_stxp as atomic_xor,
options(nostack, preserves_flags),
"eor {new_lo}, {prev_lo}, {val_lo}",
"eor {new_hi}, {prev_hi}, {val_hi}",
}
atomic_rmw_cas_3! {
_atomic_xor_casp as atomic_xor,
"eor x4, x6, {val_lo}",
"eor x5, x7, {val_hi}",
}

atomic_rmw_ll_sc_2! {
_atomic_not_ldxp_stxp as atomic_not,
options(nostack, preserves_flags),
"mvn {new_lo}, {prev_lo}",
"mvn {new_hi}, {prev_hi}",
}
atomic_rmw_cas_2! {
atomic_not,
_atomic_not_casp as atomic_not,
"mvn x4, x6",
"mvn x5, x7",
}
Expand All @@ -961,7 +985,7 @@ atomic_rmw_ll_sc_2! {
concat!("ngc ", select_le_or_be!("{new_hi}, {prev_hi}", "{new_lo}, {prev_lo}")),
}
atomic_rmw_cas_2! {
atomic_neg,
_atomic_neg_casp as atomic_neg,
concat!("negs ", select_le_or_be!("x4, x6", "x5, x7")),
concat!("ngc ", select_le_or_be!("x5, x7", "x4, x6")),
}
Expand All @@ -976,7 +1000,7 @@ atomic_rmw_ll_sc_3! {
"csel {new_lo}, {prev_lo}, {val_lo}, lt", // select lo 64-bit
}
atomic_rmw_cas_3! {
atomic_max,
_atomic_max_casp as atomic_max,
select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
"csel x5, x7, {val_hi}, lt", // select hi 64-bit
Expand All @@ -993,7 +1017,7 @@ atomic_rmw_ll_sc_3! {
"csel {new_lo}, {prev_lo}, {val_lo}, lo", // select lo 64-bit
}
atomic_rmw_cas_3! {
atomic_umax,
_atomic_umax_casp as atomic_umax,
select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
"csel x5, x7, {val_hi}, lo", // select hi 64-bit
Expand All @@ -1010,7 +1034,7 @@ atomic_rmw_ll_sc_3! {
"csel {new_lo}, {prev_lo}, {val_lo}, ge", // select lo 64-bit
}
atomic_rmw_cas_3! {
atomic_min,
_atomic_min_casp as atomic_min,
select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
"csel x5, x7, {val_hi}, ge", // select hi 64-bit
Expand All @@ -1027,7 +1051,7 @@ atomic_rmw_ll_sc_3! {
"csel {new_lo}, {prev_lo}, {val_lo}, hs", // select lo 64-bit
}
atomic_rmw_cas_3! {
atomic_umin,
_atomic_umin_casp as atomic_umin,
select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
"csel x5, x7, {val_hi}, hs", // select hi 64-bit
Expand Down