Skip to content

Commit

Permalink
IR: Add atomicrmw uinc_wrap and udec_wrap
Browse files Browse the repository at this point in the history
These are essentially add/sub 1 with a clamping value.

AMDGPU has instructions for these. CUDA/HIP expose these as
atomicInc/atomicDec. Currently we use target intrinsics for these,
but those do no carry the ordering and syncscope. Add these to
atomicrmw so we can carry these and benefit from the regular
legalization processes.
  • Loading branch information
arsenm committed Jan 24, 2023
1 parent e44a305 commit 778cf54
Show file tree
Hide file tree
Showing 44 changed files with 5,326 additions and 71 deletions.
5 changes: 5 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10537,6 +10537,8 @@ operation. The operation must be one of the following keywords:
- fsub
- fmax
- fmin
- uinc_wrap
- udec_wrap

For most of these operations, the type of '<value>' must be an integer
type whose bit width is a power of two greater than or equal to eight
Expand Down Expand Up @@ -10581,6 +10583,9 @@ operation argument:
- fsub: ``*ptr = *ptr - val`` (using floating point arithmetic)
- fmax: ``*ptr = maxnum(*ptr, val)`` (match the `llvm.maxnum.*`` intrinsic)
- fmin: ``*ptr = minnum(*ptr, val)`` (match the `llvm.minnum.*`` intrinsic)
- uinc_wrap: ``*ptr = (*ptr u>= val) ? 0 : (*ptr + 1)`` (increment value with wraparound to zero when incremented above input value)
- udec_wrap: ``*ptr = ((*ptr == 0) || (*ptr u> val)) ? val : (*ptr - 1)`` (decrement with wraparound to input value when decremented below zero).


Example:
""""""""
Expand Down
2 changes: 2 additions & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ Changes to the LLVM IR
types that need to be preserved through the optimizer, but otherwise are not
introspectable by target-independent optimizations.

* Added ``uinc_wrap`` and ``udec_wrap`` operations to ``atomicrmw``.

Changes to building LLVM
------------------------

Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/AsmParser/LLToken.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ enum Kind {
kw_umin,
kw_fmax,
kw_fmin,
kw_uinc_wrap,
kw_udec_wrap,

// Instruction Opcodes (Opcode in UIntVal).
kw_fneg,
Expand Down
4 changes: 3 additions & 1 deletion llvm/include/llvm/Bitcode/LLVMBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,9 @@ enum RMWOperations {
RMW_FADD = 11,
RMW_FSUB = 12,
RMW_FMAX = 13,
RMW_FMIN = 14
RMW_FMIN = 14,
RMW_UINC_WRAP = 15,
RMW_UDEC_WRAP = 16
};

/// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1199,6 +1199,8 @@ enum NodeType {
ATOMIC_LOAD_FSUB,
ATOMIC_LOAD_FMAX,
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_UINC_WRAP,
ATOMIC_LOAD_UDEC_WRAP,

// Masked load and store - consecutive vector load and store operations
// with additional mask operand that prevents memory accesses to the
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1419,6 +1419,8 @@ class MemSDNode : public SDNode {
case ISD::ATOMIC_LOAD_FSUB:
case ISD::ATOMIC_LOAD_FMAX:
case ISD::ATOMIC_LOAD_FMIN:
case ISD::ATOMIC_LOAD_UINC_WRAP:
case ISD::ATOMIC_LOAD_UDEC_WRAP:
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE:
case ISD::MLOAD:
Expand Down Expand Up @@ -1486,6 +1488,8 @@ class AtomicSDNode : public MemSDNode {
N->getOpcode() == ISD::ATOMIC_LOAD_FSUB ||
N->getOpcode() == ISD::ATOMIC_LOAD_FMAX ||
N->getOpcode() == ISD::ATOMIC_LOAD_FMIN ||
N->getOpcode() == ISD::ATOMIC_LOAD_UINC_WRAP ||
N->getOpcode() == ISD::ATOMIC_LOAD_UDEC_WRAP ||
N->getOpcode() == ISD::ATOMIC_LOAD ||
N->getOpcode() == ISD::ATOMIC_STORE;
}
Expand Down
12 changes: 10 additions & 2 deletions llvm/include/llvm/IR/Instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -765,8 +765,16 @@ class AtomicRMWInst : public Instruction {
/// \p minnum matches the behavior of \p llvm.minnum.*.
FMin,

/// Increment one up to a maximum value.
/// *p = (old u>= v) ? 0 : (old + 1)
UIncWrap,

/// Decrement one until a minimum value or zero.
/// *p = ((old == 0) || (old u> v)) ? v : (old - 1)
UDecWrap,

FIRST_BINOP = Xchg,
LAST_BINOP = FMin,
LAST_BINOP = UDecWrap,
BAD_BINOP
};

Expand All @@ -778,7 +786,7 @@ class AtomicRMWInst : public Instruction {

template <unsigned Offset>
using BinOpBitfieldElement =
typename Bitfield::Element<BinOp, Offset, 4, BinOp::LAST_BINOP>;
typename Bitfield::Element<BinOp, Offset, 5, BinOp::LAST_BINOP>;

public:
AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, Align Alignment,
Expand Down
4 changes: 3 additions & 1 deletion llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,14 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_FADD)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_FSUB)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMAX)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_FMIN)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UINC_WRAP)
HANDLE_TARGET_OPCODE(G_ATOMICRMW_UDEC_WRAP)

// Marker for start of Generic AtomicRMW opcodes
HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_START, G_ATOMICRMW_XCHG)

// Marker for end of Generic AtomicRMW opcodes
HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_FMIN)
HANDLE_TARGET_OPCODE_MARKER(GENERIC_ATOMICRMW_OP_END, G_ATOMICRMW_UDEC_WRAP)

// Generic atomic fence
HANDLE_TARGET_OPCODE(G_FENCE)
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,8 @@ def G_ATOMICRMW_FADD : G_ATOMICRMW_OP;
def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP;
def G_ATOMICRMW_FMAX : G_ATOMICRMW_OP;
def G_ATOMICRMW_FMIN : G_ATOMICRMW_OP;
def G_ATOMICRMW_UINC_WRAP : G_ATOMICRMW_OP;
def G_ATOMICRMW_UDEC_WRAP : G_ATOMICRMW_OP;

def G_FENCE : GenericInstruction {
let OutOperandList = (outs);
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd>;
def : GINodeEquiv<G_ATOMICRMW_FSUB, atomic_load_fsub>;
def : GINodeEquiv<G_ATOMICRMW_FMAX, atomic_load_fmax>;
def : GINodeEquiv<G_ATOMICRMW_FMIN, atomic_load_fmin>;
def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap>;
def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap>;
def : GINodeEquiv<G_FENCE, atomic_fence>;

// Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern.
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,10 @@ def atomic_load_fmax : SDNode<"ISD::ATOMIC_LOAD_FMAX", SDTFPAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_fmin : SDNode<"ISD::ATOMIC_LOAD_FMIN", SDTFPAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_uinc_wrap : SDNode<"ISD::ATOMIC_LOAD_UINC_WRAP", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_udec_wrap : SDNode<"ISD::ATOMIC_LOAD_UDEC_WRAP", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;

def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/Transforms/Utils/LowerAtomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ bool lowerAtomicRMWInst(AtomicRMWInst *RMWI);
/// Emit IR to implement the given atomicrmw operation on values in registers,
/// returning the new value.
Value *buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder,
Value *Loaded, Value *Inc);
Value *Loaded, Value *Val);
}

#endif // LLVM_TRANSFORMS_UTILS_LOWERATOMIC_H
2 changes: 2 additions & 0 deletions llvm/lib/AsmParser/LLLexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,8 @@ lltok::Kind LLLexer::LexIdentifier() {

KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
KEYWORD(umin); KEYWORD(fmax); KEYWORD(fmin);
KEYWORD(uinc_wrap);
KEYWORD(udec_wrap);

KEYWORD(vscale);
KEYWORD(x);
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/AsmParser/LLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7736,6 +7736,12 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
case lltok::kw_min: Operation = AtomicRMWInst::Min; break;
case lltok::kw_umax: Operation = AtomicRMWInst::UMax; break;
case lltok::kw_umin: Operation = AtomicRMWInst::UMin; break;
case lltok::kw_uinc_wrap:
Operation = AtomicRMWInst::UIncWrap;
break;
case lltok::kw_udec_wrap:
Operation = AtomicRMWInst::UDecWrap;
break;
case lltok::kw_fadd:
Operation = AtomicRMWInst::FAdd;
IsFP = true;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Bitcode/Reader/BitcodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1256,6 +1256,10 @@ static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
case bitc::RMW_FSUB: return AtomicRMWInst::FSub;
case bitc::RMW_FMAX: return AtomicRMWInst::FMax;
case bitc::RMW_FMIN: return AtomicRMWInst::FMin;
case bitc::RMW_UINC_WRAP:
return AtomicRMWInst::UIncWrap;
case bitc::RMW_UDEC_WRAP:
return AtomicRMWInst::UDecWrap;
}
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,10 @@ static unsigned getEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::FSub: return bitc::RMW_FSUB;
case AtomicRMWInst::FMax: return bitc::RMW_FMAX;
case AtomicRMWInst::FMin: return bitc::RMW_FMIN;
case AtomicRMWInst::UIncWrap:
return bitc::RMW_UINC_WRAP;
case AtomicRMWInst::UDecWrap:
return bitc::RMW_UDEC_WRAP;
}
}

Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/AtomicExpandPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -833,7 +833,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
case AtomicRMWInst::FMin:
case AtomicRMWInst::FMax: {
case AtomicRMWInst::FMax:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap: {
// Finally, other ops will operate on the full value, so truncate down to
// the original size, and expand out again after doing the
// operation. Bitcasts will be inserted for FP values.
Expand Down Expand Up @@ -1704,6 +1706,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::FMin:
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
// No atomic libcalls are available for max/min/umax/umin.
return {};
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2952,6 +2952,12 @@ bool IRTranslator::translateAtomicRMW(const User &U,
case AtomicRMWInst::FMin:
Opcode = TargetOpcode::G_ATOMICRMW_FMIN;
break;
case AtomicRMWInst::UIncWrap:
Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP;
break;
case AtomicRMWInst::UDecWrap:
Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP;
break;
}

MIRBuilder.buildAtomicRMW(
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7811,6 +7811,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
Opcode == ISD::ATOMIC_LOAD_FSUB ||
Opcode == ISD::ATOMIC_LOAD_FMAX ||
Opcode == ISD::ATOMIC_LOAD_FMIN ||
Opcode == ISD::ATOMIC_LOAD_UINC_WRAP ||
Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP ||
Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4671,6 +4671,12 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
case AtomicRMWInst::UIncWrap:
NT = ISD::ATOMIC_LOAD_UINC_WRAP;
break;
case AtomicRMWInst::UDecWrap:
NT = ISD::ATOMIC_LOAD_UDEC_WRAP;
break;
}
AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
case ISD::ATOMIC_LOAD_UINC_WRAP:
return "AtomicLoadUIncWrap";
case ISD::ATOMIC_LOAD_UDEC_WRAP:
return "AtomicLoadUDecWrap";
case ISD::ATOMIC_LOAD: return "AtomicLoad";
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4348,6 +4348,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
return Builder.saveIP();
}

// FIXME: Duplicating AtomicExpand
Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
AtomicRMWInst::BinOp RMWOp) {
switch (RMWOp) {
Expand All @@ -4373,6 +4374,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
case AtomicRMWInst::UMin:
case AtomicRMWInst::FMax:
case AtomicRMWInst::FMin:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
llvm_unreachable("Unsupported atomic update operation");
}
llvm_unreachable("Unsupported atomic update operation");
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/IR/Instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1802,6 +1802,10 @@ StringRef AtomicRMWInst::getOperationName(BinOp Op) {
return "fmax";
case AtomicRMWInst::FMin:
return "fmin";
case AtomicRMWInst::UIncWrap:
return "uinc_wrap";
case AtomicRMWInst::UDecWrap:
return "udec_wrap";
case AtomicRMWInst::BAD_BINOP:
return "<invalid operation>";
}
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2753,7 +2753,9 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

// Since floating-point operation requires a non-trivial set of data
// operations, use CmpXChg to expand.
if (AI->isFloatingPointOperation())
if (AI->isFloatingPointOperation() ||
AI->getOperation() == AtomicRMWInst::UIncWrap ||
AI->getOperation() == AtomicRMWInst::UDecWrap)
return AtomicExpansionKind::CmpXChg;

unsigned Size = AI->getType()->getPrimitiveSizeInBits();
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18343,7 +18343,16 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicRMWInIR(AI);

switch (AI->getOperation()) {
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
return AtomicExpansionKind::CmpXChg;
default:
return TargetLowering::shouldExpandAtomicRMWInIR(AI);
}

llvm_unreachable("unreachable atomicrmw operation");
}

TargetLowering::AtomicExpansionKind
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13750,7 +13750,9 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
// point operations can't be used in an lr/sc sequence without breaking the
// forward-progress guarantee.
if (AI->isFloatingPointOperation())
if (AI->isFloatingPointOperation() ||
AI->getOperation() == AtomicRMWInst::UIncWrap ||
AI->getOperation() == AtomicRMWInst::UDecWrap)
return AtomicExpansionKind::CmpXChg;

// Don't expand forced atomics, we want to have __sync libcalls instead.
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31845,8 +31845,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

AtomicRMWInst::BinOp Op = AI->getOperation();
switch (Op) {
default:
llvm_unreachable("Unknown atomic operation");
case AtomicRMWInst::Xchg:
return AtomicExpansionKind::None;
case AtomicRMWInst::Add:
Expand All @@ -31870,6 +31868,9 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::FSub:
case AtomicRMWInst::FMax:
case AtomicRMWInst::FMin:
case AtomicRMWInst::UIncWrap:
case AtomicRMWInst::UDecWrap:
default:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
return AtomicExpansionKind::CmpXChg;
Expand Down
Loading

0 comments on commit 778cf54

Please sign in to comment.