diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index e0e79bf07d58e..0c928a4ac5161 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -27815,6 +27815,139 @@ The '``llvm.masked.compressstore``' intrinsic is designed for compressing data i Other targets may support this intrinsic differently, for example, by lowering it into a sequence of branches that guard scalar store operations. +Speculative Load Intrinsics +--------------------------- + +LLVM provides intrinsics for speculatively loading memory that may be +out-of-bounds. These intrinsics enable optimizations like early-exit loop +vectorization where the vectorized loop may read beyond the end of an array, +provided the access is guaranteed be valid by target-specific checks. + +.. _int_speculative_load: + +'``llvm.speculative.load``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + ; Direct form: number of accessible bytes given as i64 + declare <4 x float> @llvm.speculative.load.v4f32.p0(ptr , i1 , i64 ) + declare <8 x i32> @llvm.speculative.load.v8i32.p0(ptr , i1 , i64 ) + + ; Oracle form: accessible bytes computed by calling oracle_fn(args...) + declare <4 x float> @llvm.speculative.load.v4f32.p0(ptr , i1 , ptr , ...) + +Overview: +""""""""" + +The '``llvm.speculative.load``' intrinsic loads a value from memory. Unlike a +regular load, the memory access may extend beyond the bounds of the allocated +object, provided the pointer has been verified by +:ref:`llvm.can.load.speculatively ` to ensure the +access is valid. + +Arguments: +"""""""""" + +The first argument is a pointer to the memory location to load from. The return +type must be a vector type with a power-of-2 size in bytes. The second argument +is an ``i1`` constant flag ``from_end`` that specifies whether the ``N`` +accessible bytes are counted from the start or the end of the loaded values (see +Semantics). The remaining arguments determine the *number of accessible bytes*, +denoted ``N`` below. + +In the **direct form**, the third argument is an ``i64`` specifying ``N`` +directly. In the **oracle form**, the third argument must be a direct +reference to a function returning ``i64`` that may only read memory through its +arguments (indirect function pointers are not permitted); the remaining +arguments are forwarded to it, and its return value is ``N``. + +Semantics: +"""""""""" + +Let ``S`` denote the size of the return type in bytes. The intrinsic performs +a load of ``S`` bytes starting from ``ptr``. + +When ``from_end`` is ``false``, the first ``N`` bytes (offsets ``[0, N)``) +are the stored values read from memory. Bytes at offsets ``[N, S)`` are +``poison``. + +When ``from_end`` is ``true``, the last ``N`` bytes (offsets ``[S - N, S)``) +are the stored values read from memory. Bytes at offsets ``[0, S - N)`` are +``poison``. + +In both cases, the ``N`` accessible bytes must lie within the bounds of an +allocated object that ``ptr`` is :ref:`based ` on, and +poison bytes are not considered accessed for the purposes of data races or +``noalias`` constraints. The behavior is undefined if ``N`` exceeds ``S``. + +The behavior is undefined if the speculative load accesses memory that would +fault (i.e., the oracle or ``llvm.can.load.speculatively`` would indicate the +access is not safe). + +.. _int_can_load_speculatively: + +'``llvm.can.load.speculatively``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare i1 @llvm.can.load.speculatively.p0(ptr , i64 ) + declare i1 @llvm.can.load.speculatively.p1(ptr addrspace(1) , i64 ) + +Overview: +""""""""" + +The '``llvm.can.load.speculatively``' intrinsic returns true if it is safe +to speculatively load ``num_bytes`` bytes starting from ``ptr``, +even if the memory may be beyond the bounds of an allocated object. + +Arguments: +"""""""""" + +The first argument is a pointer to the memory location. + +The second argument is an i64 specifying the size in bytes of the load. +The size must be a positive power of 2. If the size is not a power-of-2, the +result is ``poison``. + +Semantics: +"""""""""" + +This intrinsic has **target-dependent** semantics. It returns ``true`` if +``num_bytes`` bytes starting at ``ptr + I * num_bytes``, for all non-negative +integers ``I`` where the computed address does not wrap around the address +space, can be loaded speculatively, even if the memory is beyond the bounds of +an allocated object. It returns ``false`` otherwise. + +The specific conditions under which this intrinsic returns ``true`` are +determined by the target. For example, a target may check whether the pointer +alignment guarantees all such loads cannot cross a page boundary. + +.. code-block:: llvm + + ; Check if we can safely load 16 bytes from %ptr + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 16) + br i1 %can_load, label %speculative_path, label %safe_path + + speculative_path: + ; Safe to speculatively load from %ptr + %vec = call <4 x i32> @llvm.speculative.load.v4i32.p0(ptr %ptr, i64 16) + ... + + safe_path: + ; Fall back to masked load or scalar operations + ... + + Memory Use Markers ------------------ diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 2e66f574981ab..8cd5f78c1c8e1 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2284,6 +2284,19 @@ class LLVM_ABI TargetLoweringBase { llvm_unreachable("Store conditional unimplemented on this target"); } + /// Emit code to check if a speculative load of the given size from Ptr is + /// safe. Returns a Value* representing the check result (i1), or nullptr + /// to use the default lowering (which returns false). Targets can override + /// to provide their own safety check (e.g., alignment-based page boundary + /// check). + /// \param Builder IRBuilder positioned at the intrinsic call site + /// \param Ptr the pointer operand + /// \param Size the size in bytes (constant or runtime value for scalable) + virtual Value *emitCanLoadSpeculatively(IRBuilderBase &Builder, Value *Ptr, + Value *Size) const { + return nullptr; + } + /// Perform a masked atomicrmw using a target-specific intrinsic. This /// represents the core LL/SC loop which will be lowered at a late stage by /// the backend. The target-specific intrinsic returns the loaded value and diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 30e6c8f614a00..4d722c3d65216 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2604,6 +2604,20 @@ def int_experimental_vector_compress: [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], [IntrNoMem]>; +// Speculatively load a value from memory; lowers to a regular aligned load. +def int_speculative_load: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyptr_ty, llvm_i1_ty, llvm_vararg_ty], + [IntrReadMem, IntrArgMemOnly, IntrWillReturn, NoCapture>, + ImmArg>]>; + +// Returns true if it's safe to speculatively load 'num_bytes' from 'ptr'. +// The size can be a runtime value to support scalable vectors. +def int_can_load_speculatively: + DefaultAttrsIntrinsic<[llvm_i1_ty], + [llvm_anyptr_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + // Test whether a pointer is associated with a type metadata identifier. def int_type_test : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], [IntrNoMem, IntrSpeculatable]>; diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 9ae3f4e866b42..b1d7634cd2062 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -136,6 +136,39 @@ static bool lowerLoadRelative(Function &F) { return Changed; } +/// Lower @llvm.can.load.speculatively using target-specific expansion. +/// Each target provides its own expansion via +/// TargetLowering::emitCanLoadSpeculatively. +/// The default expansion returns false (conservative). +static bool lowerCanLoadSpeculatively(Function &F, const TargetMachine *TM) { + bool Changed = false; + + for (Use &U : llvm::make_early_inc_range(F.uses())) { + auto *CI = dyn_cast(U.getUser()); + if (!CI || CI->getCalledOperand() != &F) + continue; + + Function *ParentFunc = CI->getFunction(); + const TargetLowering *TLI = + TM->getSubtargetImpl(*ParentFunc)->getTargetLowering(); + + IRBuilder<> Builder(CI); + Value *Ptr = CI->getArgOperand(0); + Value *Size = CI->getArgOperand(1); + + // Ask target for expansion; nullptr means use default (return false) + Value *Result = TLI->emitCanLoadSpeculatively(Builder, Ptr, Size); + if (!Result) + Result = Builder.getFalse(); + + CI->replaceAllUsesWith(Result); + CI->eraseFromParent(); + Changed = true; + } + + return Changed; +} + // ObjCARC has knowledge about whether an obj-c runtime function needs to be // always tail-called or never tail-called. static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) { @@ -694,6 +727,9 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { case Intrinsic::load_relative: Changed |= lowerLoadRelative(F); break; + case Intrinsic::can_load_speculatively: + Changed |= lowerCanLoadSpeculatively(F, TM); + break; case Intrinsic::is_constant: case Intrinsic::objectsize: Changed |= forEachCall(F, [&](CallInst *CI) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 04b17b56b3d49..357d318197f5b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5144,6 +5144,35 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { setValue(&I, Res); } +void SelectionDAGBuilder::visitSpeculativeLoad(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + Value *PtrOperand = I.getArgOperand(0); + // The remaining arguments (num_accessible_bytes or oracle function + args) + // are IR-level semantics only; they are not needed at codegen. + SDValue Ptr = getValue(PtrOperand); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + Align Alignment = I.getParamAlign(0).valueOrOne(); + AAMDNodes AAInfo = I.getAAMetadata(); + TypeSize StoreSize = VT.getStoreSize(); + + SDValue InChain = DAG.getRoot(); + + // Use MOLoad but NOT MODereferenceable - the memory may not be + // fully dereferenceable. + MachineMemOperand::Flags MMOFlags = MachineMemOperand::MOLoad; + LocationSize LocSize = StoreSize.isScalable() + ? LocationSize::beforeOrAfterPointer() + : LocationSize::precise(StoreSize); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MMOFlags, LocSize, Alignment, AAInfo); + + SDValue Load = DAG.getLoad(VT, sdl, InChain, Ptr, MMO); + PendingLoads.push_back(Load.getValue(1)); + setValue(&I, Load); +} + void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDLoc sdl = getCurSDLoc(); @@ -6905,6 +6934,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); return; + case Intrinsic::speculative_load: + visitSpeculativeLoad(I); + return; case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index bab0509dd138f..419e6a5fba6eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -620,6 +620,7 @@ class SelectionDAGBuilder { void visitStore(const StoreInst &I); void visitMaskedLoad(const CallInst &I, bool IsExpanding = false); void visitMaskedStore(const CallInst &I, bool IsCompressing = false); + void visitSpeculativeLoad(const CallInst &I); void visitMaskedGather(const CallInst &I); void visitMaskedScatter(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 5909738b0c903..d413102cfe0a5 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6783,6 +6783,70 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { &Call); break; } + case Intrinsic::speculative_load: { + Type *LoadTy = Call.getType(); + TypeSize Size = DL.getTypeStoreSize(LoadTy); + // For scalable vectors, check the known minimum size is a power of 2. + Check(Size.getKnownMinValue() > 0 && isPowerOf2_64(Size.getKnownMinValue()), + "llvm.speculative.load type must have a power-of-2 size", &Call); + + unsigned NumArgs = Call.arg_size(); + Check(NumArgs >= 3, "llvm.speculative.load requires at least 3 arguments", + &Call); + + Value *PayloadArg = Call.getArgOperand(2); + if (PayloadArg->getType()->isIntegerTy(64)) { + // Direct form: (ptr, i1 from_end, i64 num_accessible_bytes) + Check(NumArgs == 3, + "llvm.speculative.load direct form has too many arguments", &Call); + if (auto *CI = dyn_cast(PayloadArg)) { + Check(Size.isScalable() || CI->getZExtValue() <= Size.getFixedValue(), + "llvm.speculative.load num_accessible_bytes must not exceed " + "the result size in bytes", + &Call); + } + } else { + // Oracle form: (ptr, i1 from_end, oracle_fn_ptr, args...) + auto *OracleFn = dyn_cast(PayloadArg); + Check(OracleFn, + "llvm.speculative.load third argument must be i64 or a direct " + "reference to an oracle function", + &Call); + + Check(OracleFn->onlyReadsMemory() && OracleFn->onlyAccessesArgMemory(), + "llvm.speculative.load oracle function must not have side effects " + "and may only read memory through its arguments", + &Call); + + FunctionType *FTy = OracleFn->getFunctionType(); + Check(FTy->getReturnType()->isIntegerTy(64), + "llvm.speculative.load oracle function must return i64", &Call); + + unsigned OracleArgsStart = 3; + unsigned NumOracleArgs = NumArgs - OracleArgsStart; + Check(FTy->isVarArg() ? NumOracleArgs >= FTy->getNumParams() + : NumOracleArgs == FTy->getNumParams(), + "llvm.speculative.load oracle function argument count mismatch", + &Call); + for (unsigned I = 0, E = FTy->getNumParams(); I < E; ++I) { + Check(FTy->getParamType(I) == + Call.getArgOperand(I + OracleArgsStart)->getType(), + "llvm.speculative.load oracle function argument type mismatch", + &Call); + } + } + break; + } + case Intrinsic::can_load_speculatively: { + // If size is a constant, verify it's a positive power of 2. + if (auto *SizeCI = dyn_cast(Call.getArgOperand(1))) { + uint64_t Size = SizeCI->getZExtValue(); + Check(Size > 0 && isPowerOf2_64(Size), + "llvm.can.load.speculatively size must be a positive power of 2", + &Call); + } + break; + } case Intrinsic::vector_insert: { Value *Vec = Call.getArgOperand(0); Value *SubVec = Call.getArgOperand(1); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 38db1ac4a2fb9..79a3f3648db50 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -30996,6 +30996,56 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder, return CI; } +Value *AArch64TargetLowering::emitCanLoadSpeculatively(IRBuilderBase &Builder, + Value *Ptr, + Value *Size) const { + unsigned AS = cast(Ptr->getType())->getAddressSpace(); + // Conservatively only allow speculation for address space 0. + if (AS != 0) + return nullptr; + // For power-of-2 sizes <= 16, emit alignment check: (ptr & (size - 1)) == 0. + // If the pointer is aligned to at least 'size' bytes, loading 'size' bytes + // cannot cross a page boundary, so it's safe to speculate. + // The 16-byte limit ensures correctness with MTE (memory tagging), since + // MTE uses 16-byte tag granules. + // + // The alignment check only works for power-of-2 sizes. For non-power-of-2 + // sizes, we conservatively return false. + const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); + + unsigned PtrBits = DL.getPointerSizeInBits(AS); + Type *IntPtrTy = Builder.getIntNTy(PtrBits); + if (auto *CI = dyn_cast(Size)) { + uint64_t SizeVal = CI->getZExtValue(); + assert(isPowerOf2_64(SizeVal) && "size must be power-of-two"); + // For constant sizes > 16, return nullptr (default false). + if (SizeVal > 16) + return nullptr; + + // Power-of-2 constant size <= 16: use fast alignment check. + Value *PtrInt = Builder.CreatePtrToInt(Ptr, IntPtrTy); + Value *Mask = ConstantInt::get(IntPtrTy, SizeVal - 1); + Value *Masked = Builder.CreateAnd(PtrInt, Mask); + return Builder.CreateICmpEQ(Masked, ConstantInt::get(IntPtrTy, 0)); + } + + // Check power-of-2 size <= 16 and alignment. + Value *PtrInt = Builder.CreatePtrToInt(Ptr, IntPtrTy); + Value *SizeExt = Builder.CreateZExtOrTrunc(Size, IntPtrTy); + + Value *SizeLE16 = + Builder.CreateICmpULE(SizeExt, ConstantInt::get(IntPtrTy, 16)); + + // alignment check: (ptr & (size - 1)) == 0 + Value *SizeMinusOne = + Builder.CreateSub(SizeExt, ConstantInt::get(IntPtrTy, 1)); + Value *Masked = Builder.CreateAnd(PtrInt, SizeMinusOne); + Value *AlignCheck = + Builder.CreateICmpEQ(Masked, ConstantInt::get(IntPtrTy, 0)); + + return Builder.CreateAnd(SizeLE16, AlignCheck); +} + bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 49ff76bb2f469..9d64426b35577 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -352,6 +352,8 @@ class AArch64TargetLowering : public TargetLowering { AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; + Value *emitCanLoadSpeculatively(IRBuilderBase &Builder, Value *Ptr, + Value *Size) const override; void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override; diff --git a/llvm/test/CodeGen/AArch64/can-load-speculatively.ll b/llvm/test/CodeGen/AArch64/can-load-speculatively.ll new file mode 100644 index 0000000000000..b6679f22b0989 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/can-load-speculatively.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=aarch64-unknown-linux-gnu -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s + +; Test that @llvm.can.load.speculatively is lowered to an alignment check +; for power-of-2 sizes <= 16 bytes on AArch64, and returns false for larger sizes. +; The 16-byte limit ensures correctness with MTE (memory tagging). +; Note: non-power-of-2 constant sizes are rejected by the verifier. + +define i1 @can_load_speculatively_16(ptr %ptr) { +; CHECK-LABEL: @can_load_speculatively_16( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 16) + ret i1 %can_load +} + + +define i1 @can_load_speculatively_8_ptr_aligned(ptr align 8 %ptr) { +; CHECK-LABEL: @can_load_speculatively_8_ptr_aligned( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 16) + ret i1 %can_load +} + +define i1 @can_load_speculatively_16_ptr_aligned(ptr align 16 %ptr) { +; CHECK-LABEL: @can_load_speculatively_16_ptr_aligned( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 16) + ret i1 %can_load +} + +define i1 @can_load_speculatively_16_ptr_aligned2(ptr align 16 %ptr) { +; CHECK-LABEL: @can_load_speculatively_16_ptr_aligned2( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr align 16 %ptr, i64 16) + ret i1 %can_load +} + +define i1 @can_load_speculatively_32_ptr_aligned(ptr align 32 %ptr) { +; CHECK-LABEL: @can_load_speculatively_32_ptr_aligned( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 16) + ret i1 %can_load +} + +; Size > 16 - returns false (may cross MTE tag granule boundary) +define i1 @can_load_speculatively_32(ptr %ptr) { +; CHECK-LABEL: @can_load_speculatively_32( +; CHECK-NEXT: ret i1 false +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 32) + ret i1 %can_load +} + +; Size > 16 - returns false (may cross MTE tag granule boundary) +define i1 @can_load_speculatively_64(ptr %ptr) { +; CHECK-LABEL: @can_load_speculatively_64( +; CHECK-NEXT: ret i1 false +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 64) + ret i1 %can_load +} + +; Test with address space +define i1 @can_load_speculatively_addrspace1(ptr addrspace(1) %ptr) { +; CHECK-LABEL: @can_load_speculatively_addrspace1( +; CHECK-NEXT: ret i1 false +; + %can_load = call i1 @llvm.can.load.speculatively.p1(ptr addrspace(1) %ptr, i64 16) + ret i1 %can_load +} + +; Test size 8 (within limit, power-of-2) +define i1 @can_load_speculatively_8(ptr %ptr) { +; CHECK-LABEL: @can_load_speculatively_8( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[TMP3]] +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 8) + ret i1 %can_load +} + +; Test with runtime size - checks size <= 16 and alignment +define i1 @can_load_speculatively_runtime(ptr %ptr, i64 %size) { +; CHECK-LABEL: @can_load_speculatively_runtime( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[SIZE:%.*]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[SIZE]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[TMP5]] +; CHECK-NEXT: ret i1 [[TMP6]] +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 %size) + ret i1 %can_load +} + +declare i1 @llvm.can.load.speculatively.p0(ptr, i64) +declare i1 @llvm.can.load.speculatively.p1(ptr addrspace(1), i64) diff --git a/llvm/test/CodeGen/AArch64/speculative-load-intrinsic.ll b/llvm/test/CodeGen/AArch64/speculative-load-intrinsic.ll new file mode 100644 index 0000000000000..a547cee576864 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/speculative-load-intrinsic.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve < %s | FileCheck %s + +; Test that @llvm.speculative.load is lowered to a regular load +; in SelectionDAG for fixed vectors, scalable vectors, and scalars. + +; Fixed-width vector tests + +define <4 x i32> @speculative_load_v4i32(ptr %ptr) { +; CHECK-LABEL: speculative_load_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr align 16 %ptr, i1 false, i64 12) + ret <4 x i32> %load +} + +define <2 x double> @speculative_load_v2f64(ptr %ptr) { +; CHECK-LABEL: speculative_load_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <2 x double> (ptr, i1, ...) @llvm.speculative.load.v2f64.p0(ptr align 16 %ptr, i1 false, i64 8) + ret <2 x double> %load +} + +; Scalable vector tests + +define @speculative_load_nxv4i32(ptr %ptr) { +; CHECK-LABEL: speculative_load_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ret + %load = call (ptr, i1, ...) @llvm.speculative.load.nxv4i32.p0(ptr align 16 %ptr, i1 false, i64 8) + ret %load +} + +define @speculative_load_nxv2f64(ptr %ptr) { +; CHECK-LABEL: speculative_load_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ret + %load = call (ptr, i1, ...) @llvm.speculative.load.nxv2f64.p0(ptr align 16 %ptr, i1 false, i64 4) + ret %load +} + +; Oracle form tests + +declare i64 @oracle(ptr, i64) memory(argmem: read) + +define <4 x i32> @speculative_load_v4i32_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v4i32_oracle: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <4 x i32> %load +} + +define <2 x i64> @speculative_load_v2i64_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v2i64_oracle: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <2 x i64> (ptr, i1, ...) @llvm.speculative.load.v2i64.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <2 x i64> %load +} + +define <8 x i16> @speculative_load_v8i16_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v8i16_oracle: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <8 x i16> (ptr, i1, ...) @llvm.speculative.load.v8i16.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <8 x i16> %load +} + +define <16 x i8> @speculative_load_v16i8_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v16i8_oracle: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <16 x i8> (ptr, i1, ...) @llvm.speculative.load.v16i8.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <16 x i8> %load +} + +define <4 x float> @speculative_load_v4f32_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v4f32_oracle: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <4 x float> (ptr, i1, ...) @llvm.speculative.load.v4f32.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <4 x float> %load +} + +define <2 x double> @speculative_load_v2f64_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v2f64_oracle: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <2 x double> (ptr, i1, ...) @llvm.speculative.load.v2f64.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <2 x double> %load +} + +; from_end tests + +define <4 x i32> @speculative_load_v4i32_from_end(ptr %ptr) { +; CHECK-LABEL: speculative_load_v4i32_from_end: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr align 16 %ptr, i1 true, i64 12) + ret <4 x i32> %load +} + +define @speculative_load_nxv4i32_from_end(ptr %ptr) { +; CHECK-LABEL: speculative_load_nxv4i32_from_end: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ret + %load = call (ptr, i1, ...) @llvm.speculative.load.nxv4i32.p0(ptr align 16 %ptr, i1 true, i64 8) + ret %load +} + +define <4 x i32> @speculative_load_v4i32_oracle_from_end(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v4i32_oracle_from_end: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr align 16 %ptr, i1 true, ptr @oracle, ptr %ptr, i64 %n) + ret <4 x i32> %load +} + +define @speculative_load_nxv4i32_oracle_from_end(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_nxv4i32_oracle_from_end: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ret + %load = call (ptr, i1, ...) @llvm.speculative.load.nxv4i32.p0(ptr align 16 %ptr, i1 true, ptr @oracle, ptr %ptr, i64 %n) + ret %load +} + +define @speculative_load_nxv4i32_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_nxv4i32_oracle: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ret + %load = call (ptr, i1, ...) @llvm.speculative.load.nxv4i32.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret %load +} diff --git a/llvm/test/CodeGen/X86/can-load-speculatively.ll b/llvm/test/CodeGen/X86/can-load-speculatively.ll new file mode 100644 index 0000000000000..f51d3847e921d --- /dev/null +++ b/llvm/test/CodeGen/X86/can-load-speculatively.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s + +; Test that @llvm.can.load.speculatively returns false (default) on X86, +; as X86 does not provide a target-specific expansion. + +define i1 @can_load_speculatively_16(ptr %ptr) { +; CHECK-LABEL: @can_load_speculatively_16( +; CHECK-NEXT: ret i1 false +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 16) + ret i1 %can_load +} + +define i1 @can_load_speculatively_32(ptr %ptr) { +; CHECK-LABEL: @can_load_speculatively_32( +; CHECK-NEXT: ret i1 false +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 32) + ret i1 %can_load +} + +define i1 @can_load_speculatively_8(ptr %ptr) { +; CHECK-LABEL: @can_load_speculatively_8( +; CHECK-NEXT: ret i1 false +; + %can_load = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 8) + ret i1 %can_load +} + +declare i1 @llvm.can.load.speculatively.p0(ptr, i64) + diff --git a/llvm/test/CodeGen/X86/speculative-load-intrinsic.ll b/llvm/test/CodeGen/X86/speculative-load-intrinsic.ll new file mode 100644 index 0000000000000..fa345408940d1 --- /dev/null +++ b/llvm/test/CodeGen/X86/speculative-load-intrinsic.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s + +; Test that @llvm.speculative.load is lowered to a regular load +; in SelectionDAG. + +define <4 x i32> @speculative_load_v4i32(ptr %ptr) { +; CHECK-LABEL: speculative_load_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr align 16 %ptr, i1 false, i64 8) + ret <4 x i32> %load +} + +define <8 x i32> @speculative_load_v8i32(ptr %ptr) { +; CHECK-LABEL: speculative_load_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: retq + %load = call <8 x i32> (ptr, i1, ...) @llvm.speculative.load.v8i32.p0(ptr align 32 %ptr, i1 false, i64 24) + ret <8 x i32> %load +} + +; Oracle form tests +declare i64 @oracle(ptr, i64) memory(argmem: read) + +define <4 x i32> @speculative_load_v4i32_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v4i32_oracle: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <4 x i32> %load +} + +define <8 x i32> @speculative_load_v8i32_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v8i32_oracle: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: retq + %load = call <8 x i32> (ptr, i1, ...) @llvm.speculative.load.v8i32.p0(ptr align 32 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <8 x i32> %load +} + +define <2 x i64> @speculative_load_v2i64_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v2i64_oracle: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <2 x i64> (ptr, i1, ...) @llvm.speculative.load.v2i64.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <2 x i64> %load +} + +define <4 x i64> @speculative_load_v4i64_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v4i64_oracle: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: retq + %load = call <4 x i64> (ptr, i1, ...) @llvm.speculative.load.v4i64.p0(ptr align 32 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <4 x i64> %load +} + +define <4 x float> @speculative_load_v4f32_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v4f32_oracle: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <4 x float> (ptr, i1, ...) @llvm.speculative.load.v4f32.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <4 x float> %load +} + +define <2 x double> @speculative_load_v2f64_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v2f64_oracle: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <2 x double> (ptr, i1, ...) @llvm.speculative.load.v2f64.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <2 x double> %load +} + +define <8 x i16> @speculative_load_v8i16_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v8i16_oracle: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <8 x i16> (ptr, i1, ...) @llvm.speculative.load.v8i16.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <8 x i16> %load +} + +define <16 x i8> @speculative_load_v16i8_oracle(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v16i8_oracle: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <16 x i8> (ptr, i1, ...) @llvm.speculative.load.v16i8.p0(ptr align 16 %ptr, i1 false, ptr @oracle, ptr %ptr, i64 %n) + ret <16 x i8> %load +} + +; from_end tests + +define <4 x i32> @speculative_load_v4i32_from_end(ptr %ptr) { +; CHECK-LABEL: speculative_load_v4i32_from_end: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr align 16 %ptr, i1 true, i64 8) + ret <4 x i32> %load +} + +define <8 x i32> @speculative_load_v8i32_from_end(ptr %ptr) { +; CHECK-LABEL: speculative_load_v8i32_from_end: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: retq + %load = call <8 x i32> (ptr, i1, ...) @llvm.speculative.load.v8i32.p0(ptr align 32 %ptr, i1 true, i64 24) + ret <8 x i32> %load +} + +define <4 x i32> @speculative_load_v4i32_oracle_from_end(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v4i32_oracle_from_end: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %xmm0 +; CHECK-NEXT: retq + %load = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr align 16 %ptr, i1 true, ptr @oracle, ptr %ptr, i64 %n) + ret <4 x i32> %load +} + +define <8 x i32> @speculative_load_v8i32_oracle_from_end(ptr %ptr, i64 %n) { +; CHECK-LABEL: speculative_load_v8i32_oracle_from_end: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: retq + %load = call <8 x i32> (ptr, i1, ...) @llvm.speculative.load.v8i32.p0(ptr align 32 %ptr, i1 true, ptr @oracle, ptr %ptr, i64 %n) + ret <8 x i32> %load +} diff --git a/llvm/test/Verifier/can-load-speculatively.ll b/llvm/test/Verifier/can-load-speculatively.ll new file mode 100644 index 0000000000000..d2d69f70cfb60 --- /dev/null +++ b/llvm/test/Verifier/can-load-speculatively.ll @@ -0,0 +1,19 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +declare i1 @llvm.can.load.speculatively.p0(ptr, i64) + +; Test that constant size must be a positive power of 2 + +define i1 @test_size_zero(ptr %ptr) { +; CHECK: llvm.can.load.speculatively size must be a positive power of 2 +; CHECK-NEXT: %res = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 0) + %res = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 0) + ret i1 %res +} + +define i1 @test_non_power_of_2(ptr %ptr) { +; CHECK: llvm.can.load.speculatively size must be a positive power of 2 +; CHECK-NEXT: %res = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 3) + %res = call i1 @llvm.can.load.speculatively.p0(ptr %ptr, i64 3) + ret i1 %res +} diff --git a/llvm/test/Verifier/speculative-load.ll b/llvm/test/Verifier/speculative-load.ll new file mode 100644 index 0000000000000..7fd2d340679aa --- /dev/null +++ b/llvm/test/Verifier/speculative-load.ll @@ -0,0 +1,87 @@ +; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s + +declare <4 x i32> @llvm.speculative.load.v4i32.p0(ptr, i1, ...) +declare <3 x i32> @llvm.speculative.load.v3i32.p0(ptr, i1, ...) +declare @llvm.speculative.load.nxv3i32.p0(ptr, i1, ...) + +declare i32 @bad_oracle_ret(ptr, i64) memory(argmem: read) +declare i64 @good_oracle(ptr, i64) memory(argmem: read) +declare i64 @oracle_i32_param(i32) memory(argmem: read) +declare i64 @side_effecting_oracle(ptr, i64) + +define <3 x i32> @test_non_power_of_2_fixed(ptr %ptr) { +; CHECK: llvm.speculative.load type must have a power-of-2 size +; CHECK-NEXT: %res = call <3 x i32> (ptr, i1, ...) @llvm.speculative.load.v3i32.p0(ptr %ptr, i1 false, i64 0) + %res = call <3 x i32> (ptr, i1, ...) @llvm.speculative.load.v3i32.p0(ptr %ptr, i1 false, i64 0) + ret <3 x i32> %res +} + +define @test_non_power_of_2_scalable(ptr %ptr) { +; CHECK: llvm.speculative.load type must have a power-of-2 size +; CHECK-NEXT: %res = call (ptr, i1, ...) @llvm.speculative.load.nxv3i32.p0(ptr %ptr, i1 false, i64 0) + %res = call (ptr, i1, ...) @llvm.speculative.load.nxv3i32.p0(ptr %ptr, i1 false, i64 0) + ret %res +} + +define <4 x i32> @test_too_few_args(ptr %ptr) { +; CHECK: llvm.speculative.load requires at least 3 arguments +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 true) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 true) + ret <4 x i32> %res +} + +define <4 x i32> @test_direct_form_extra_args(ptr %ptr) { +; CHECK: llvm.speculative.load direct form has too many arguments +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, i64 8, i64 4) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, i64 8, i64 4) + ret <4 x i32> %res +} + +define <4 x i32> @test_oracle_wrong_return_type(ptr %ptr, i64 %n) { +; CHECK: llvm.speculative.load oracle function must return i64 +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @bad_oracle_ret, ptr %ptr, i64 %n) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @bad_oracle_ret, ptr %ptr, i64 %n) + ret <4 x i32> %res +} + +define <4 x i32> @test_oracle_too_few_args(ptr %ptr) { +; CHECK: llvm.speculative.load oracle function argument count mismatch +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @good_oracle) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @good_oracle) + ret <4 x i32> %res +} + +define <4 x i32> @test_oracle_too_many_args(ptr %ptr, i64 %n) { +; CHECK: llvm.speculative.load oracle function argument count mismatch +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @good_oracle, ptr %ptr, i64 %n, i64 %n) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @good_oracle, ptr %ptr, i64 %n, i64 %n) + ret <4 x i32> %res +} + +define <4 x i32> @test_oracle_arg_type_mismatch(ptr %ptr) { +; CHECK: llvm.speculative.load oracle function argument type mismatch +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @oracle_i32_param, i64 42) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @oracle_i32_param, i64 42) + ret <4 x i32> %res +} + +define <4 x i32> @test_non_function_oracle(ptr %ptr, ptr %not_fn) { +; CHECK: llvm.speculative.load third argument must be i64 or a direct reference to an oracle function +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr %not_fn) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr %not_fn) + ret <4 x i32> %res +} + +define <4 x i32> @test_oracle_side_effects(ptr %ptr, i64 %n) { +; CHECK: llvm.speculative.load oracle function must not have side effects and may only read memory through its arguments +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @side_effecting_oracle, ptr %ptr, i64 %n) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, ptr @side_effecting_oracle, ptr %ptr, i64 %n) + ret <4 x i32> %res +} + +define <4 x i32> @test_num_accessible_bytes_exceeds_size(ptr %ptr) { +; CHECK: llvm.speculative.load num_accessible_bytes must not exceed the result size in bytes +; CHECK-NEXT: call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, i64 32) + %res = call <4 x i32> (ptr, i1, ...) @llvm.speculative.load.v4i32.p0(ptr %ptr, i1 false, i64 32) + ret <4 x i32> %res +}