From 57bdd9892d0eba5bdd25fc44799235be7b9f5153 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 25 Jul 2022 15:12:10 +0200 Subject: [PATCH] [ARM] Add target feature to force 32-bit atomics This adds a +atomic-32 target feature, which instructs LLVM to assume that lock-free 32-bit atomics are available for this target, even if they usually wouldn't be. If only atomic loads/stores are used, then this won't emit libcalls. If atomic CAS is used, then the user is responsible for providing any necessary __sync implementations (e.g. by masking interrupts for single-core privileged use cases). See https://reviews.llvm.org/D120026#3674333 for context on this change. The tl;dr is that the thumbv6m target in Rust has historically made atomic load/store only available, which is incompatible with the change from D120026, which switched these to use libatomic. Differential Revision: https://reviews.llvm.org/D130480 (cherry picked from commit b1b1086973d5be26f127540852ace59c5119e90a) --- llvm/lib/Target/ARM/ARM.td | 9 + llvm/lib/Target/ARM/ARMISelLowering.cpp | 3 +- llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll | 203 +++++++++++++++++++++ 3 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 73970b9c74c568..71388bc4efa4c7 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -556,6 +556,15 @@ def FeatureAAPCSFrameChainLeaf : SubtargetFeature<"aapcs-frame-chain-leaf", "for leaf functions", [FeatureAAPCSFrameChain]>; +// Assume that lock-free 32-bit atomics are available, even if the target +// and operating system combination would not usually provide them. The user +// is responsible for providing any necessary __sync implementations. Code +// built with this feature is not ABI-compatible with code built without this +// feature, if atomic variables are exposed across the ABI boundary. +def FeatureAtomics32 : SubtargetFeature< + "atomics-32", "HasForced32BitAtomics", "true", + "Assume that lock-free 32-bit atomics are available">; + //===----------------------------------------------------------------------===// // ARM architecture class // diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 743cca9ff71f2b..4c24d702093255 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1370,7 +1370,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // instructions. (ARMv6 doesn't have dmb, but it has an equivalent // encoding; see ARMISD::MEMBARRIER_MCR.) setMaxAtomicSizeInBitsSupported(64); - } else if (Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) { + } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) || + Subtarget->hasForced32BitAtomics()) { // Cortex-M (besides Cortex-M0) have 32-bit atomics. setMaxAtomicSizeInBitsSupported(32); } else { diff --git a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll new file mode 100644 index 00000000000000..36ed03a8c384b4 --- /dev/null +++ b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll @@ -0,0 +1,203 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv6m-none-eabi < %s | FileCheck %s --check-prefixes=CHECK,NO-ATOMIC32 +; RUN: llc -mtriple=thumbv6m-none-eabi -mattr=+atomics-32 < %s | FileCheck %s --check-prefixes=CHECK,ATOMIC32 + +define i32 @load32(ptr %p) { +; NO-ATOMIC32-LABEL: load32: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r7, lr} +; NO-ATOMIC32-NEXT: push {r7, lr} +; NO-ATOMIC32-NEXT: movs r1, #5 +; NO-ATOMIC32-NEXT: bl __atomic_load_4 +; NO-ATOMIC32-NEXT: pop {r7, pc} +; +; ATOMIC32-LABEL: load32: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: ldr r0, [r0] +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: bx lr + %v = load atomic i32, ptr %p seq_cst, align 4 + ret i32 %v +} + +define void @store32(ptr %p) { +; NO-ATOMIC32-LABEL: store32: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r7, lr} +; NO-ATOMIC32-NEXT: push {r7, lr} +; NO-ATOMIC32-NEXT: movs r1, #0 +; NO-ATOMIC32-NEXT: movs r2, #5 +; NO-ATOMIC32-NEXT: bl __atomic_store_4 +; NO-ATOMIC32-NEXT: pop {r7, pc} +; +; ATOMIC32-LABEL: store32: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: movs r1, #0 +; ATOMIC32-NEXT: str r1, [r0] +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: bx lr + store atomic i32 0, ptr %p seq_cst, align 4 + ret void +} + +define i32 @rmw32(ptr %p) { +; NO-ATOMIC32-LABEL: rmw32: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r7, lr} +; NO-ATOMIC32-NEXT: push {r7, lr} +; NO-ATOMIC32-NEXT: movs r1, #1 +; NO-ATOMIC32-NEXT: movs r2, #5 +; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_4 +; NO-ATOMIC32-NEXT: pop {r7, pc} +; +; ATOMIC32-LABEL: rmw32: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: .save {r7, lr} +; ATOMIC32-NEXT: push {r7, lr} +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: movs r1, #1 +; ATOMIC32-NEXT: bl __sync_fetch_and_add_4 +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: pop {r7, pc} + %v = atomicrmw add ptr %p, i32 1 seq_cst, align 4 + ret i32 %v +} + +define i32 @cmpxchg32(ptr %p) { +; NO-ATOMIC32-LABEL: cmpxchg32: +; NO-ATOMIC32: @ %bb.0: +; NO-ATOMIC32-NEXT: .save {r7, lr} +; NO-ATOMIC32-NEXT: push {r7, lr} +; NO-ATOMIC32-NEXT: .pad #8 +; NO-ATOMIC32-NEXT: sub sp, #8 +; NO-ATOMIC32-NEXT: movs r1, #0 +; NO-ATOMIC32-NEXT: str r1, [sp, #4] +; NO-ATOMIC32-NEXT: movs r3, #5 +; NO-ATOMIC32-NEXT: str r3, [sp] +; NO-ATOMIC32-NEXT: add r1, sp, #4 +; NO-ATOMIC32-NEXT: movs r2, #1 +; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_4 +; NO-ATOMIC32-NEXT: ldr r0, [sp, #4] +; NO-ATOMIC32-NEXT: add sp, #8 +; NO-ATOMIC32-NEXT: pop {r7, pc} +; +; ATOMIC32-LABEL: cmpxchg32: +; ATOMIC32: @ %bb.0: +; ATOMIC32-NEXT: .save {r7, lr} +; ATOMIC32-NEXT: push {r7, lr} +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: movs r1, #0 +; ATOMIC32-NEXT: movs r2, #1 +; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_4 +; ATOMIC32-NEXT: dmb sy +; ATOMIC32-NEXT: pop {r7, pc} + %res = cmpxchg ptr %p, i32 0, i32 1 seq_cst seq_cst + %res.0 = extractvalue { i32, i1 } %res, 0 + ret i32 %res.0 +} + +define i64 @load64(ptr %p) { +; CHECK-LABEL: load64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: movs r0, #8 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: movs r3, #5 +; CHECK-NEXT: bl __atomic_load +; CHECK-NEXT: ldr r1, [sp, #4] +; CHECK-NEXT: ldr r0, [sp] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} + %v = load atomic i64, ptr %p seq_cst, align 4 + ret i64 %v +} + +define void @store64(ptr %p) { +; CHECK-LABEL: store64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: str r0, [sp, #4] +; CHECK-NEXT: str r0, [sp] +; CHECK-NEXT: movs r0, #8 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: movs r3, #5 +; CHECK-NEXT: bl __atomic_store +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} + store atomic i64 0, ptr %p seq_cst, align 4 + ret void +} + +define i64 @rmw64(ptr %p) { +; CHECK-LABEL: rmw64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldr r0, [r0] +; CHECK-NEXT: ldr r1, [r4, #4] +; CHECK-NEXT: .LBB6_1: @ %atomicrmw.start +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: str r0, [sp, #16] +; CHECK-NEXT: str r1, [sp, #20] +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: adds r0, r0, #1 +; CHECK-NEXT: adcs r2, r1 +; CHECK-NEXT: str r2, [sp, #12] +; CHECK-NEXT: str r0, [sp, #8] +; CHECK-NEXT: movs r0, #5 +; CHECK-NEXT: str r0, [sp] +; CHECK-NEXT: str r0, [sp, #4] +; CHECK-NEXT: movs r0, #8 +; CHECK-NEXT: add r2, sp, #16 +; CHECK-NEXT: add r3, sp, #8 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __atomic_compare_exchange +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: ldr r1, [sp, #20] +; CHECK-NEXT: ldr r0, [sp, #16] +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: beq .LBB6_1 +; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: pop {r4, pc} + %v = atomicrmw add ptr %p, i64 1 seq_cst, align 4 + ret i64 %v +} + +define i64 @cmpxchg64(ptr %p) { +; CHECK-LABEL: cmpxchg64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: str r3, [sp, #12] +; CHECK-NEXT: str r3, [sp, #8] +; CHECK-NEXT: movs r1, #5 +; CHECK-NEXT: str r1, [sp] +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: add r1, sp, #8 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: bl __atomic_compare_exchange_8 +; CHECK-NEXT: ldr r1, [sp, #12] +; CHECK-NEXT: ldr r0, [sp, #8] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: pop {r7, pc} + %res = cmpxchg ptr %p, i64 0, i64 1 seq_cst seq_cst + %res.0 = extractvalue { i64, i1 } %res, 0 + ret i64 %res.0 +}