Skip to content

Commit

Permalink
[RISCV] Recommit "Expand vp.stride.load to splat of a scalar load."
Browse files Browse the repository at this point in the history
This is recommit of #98140. It should be based on #98205 which changes
the feature of hardware zero stride optimization.

It's a similar patch as a214c52 for vp.stride.load.
Some targets prefer pattern (vmv.v.x (load)) instead of vlse with zero stride.
  • Loading branch information
Yeting Kuo committed Jul 12, 2024
1 parent cb3bc5b commit 7573853
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 0 deletions.
48 changes: 48 additions & 0 deletions llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
Expand All @@ -35,6 +37,7 @@ namespace {
class RISCVCodeGenPrepare : public FunctionPass,
public InstVisitor<RISCVCodeGenPrepare, bool> {
const DataLayout *DL;
const DominatorTree *DT;
const RISCVSubtarget *ST;

public:
Expand All @@ -48,12 +51,14 @@ class RISCVCodeGenPrepare : public FunctionPass,

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetPassConfig>();
}

bool visitInstruction(Instruction &I) { return false; }
bool visitAnd(BinaryOperator &BO);
bool visitIntrinsicInst(IntrinsicInst &I);
bool expandVPStrideLoad(IntrinsicInst &I);
};

} // end anonymous namespace
Expand Down Expand Up @@ -128,6 +133,9 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
// Which eliminates the scalar -> vector -> scalar crossing during instruction
// selection.
bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
if (expandVPStrideLoad(I))
return true;

if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
return false;

Expand Down Expand Up @@ -155,6 +163,45 @@ bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
return true;
}

bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
Value *BasePtr, *VL;

using namespace PatternMatch;
if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
return false;

if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
return false;

auto *VTy = cast<VectorType>(II.getType());

IRBuilder<> Builder(&II);

// Extend VL from i32 to XLen if needed.
if (ST->is64Bit())
VL = Builder.CreateZExt(VL, Builder.getInt64Ty());

Type *STy = VTy->getElementType();
Value *Val = Builder.CreateLoad(STy, BasePtr);
const auto &TLI = *ST->getTargetLowering();
Value *Res;

// TODO: Also support fixed/illegal vector types to splat with evl = vl.
if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
: Intrinsic::riscv_vmv_v_x;
Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
{PoisonValue::get(VTy), Val, VL});
} else {
Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
}

II.replaceAllUsesWith(Res);
II.eraseFromParent();
return true;
}

bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
Expand All @@ -164,6 +211,7 @@ bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
ST = &TM.getSubtarget<RISCVSubtarget>(F);

DL = &F.getDataLayout();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

bool MadeChange = false;
for (auto &BB : F)
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -626,3 +626,29 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
}

declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, <33 x i1>, i32)

; TODO: Use accurate evl.
; Test unmasked integer zero strided
define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
; CHECK-LABEL: zero_strided_unmasked_vpload_4i8_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: lbu a0, 0(a0)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: ret
%load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3)
ret <4 x i8> %load
}

; TODO: Use accurate evl.
; Test unmasked float zero strided
define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
; CHECK-LABEL: zero_strided_unmasked_vpload_4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: flh fa5, 0(a0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa5
; CHECK-NEXT: ret
%load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3)
ret <4 x half> %load
}
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -780,3 +780,27 @@ define <vscale x 16 x double> @strided_load_nxv17f64(ptr %ptr, i64 %stride, <vsc
declare <vscale x 17 x double> @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr, i64, <vscale x 17 x i1>, i32)
declare <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %vec, i64 %idx)
declare <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %vec, i64 %idx)

; Test unmasked integer zero strided
define <vscale x 1 x i8> @zero_strided_unmasked_vpload_nxv1i8_i8(ptr %ptr) {
; CHECK-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: lbu a0, 0(a0)
; CHECK-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: ret
%load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 0, <vscale x 1 x i1> splat (i1 true), i32 4)
ret <vscale x 1 x i8> %load
}

; Test unmasked float zero strided
define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
; CHECK-LABEL: zero_strided_unmasked_vpload_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: flh fa5, 0(a0)
; CHECK-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
; CHECK-NEXT: vfmv.v.f v8, fa5
; CHECK-NEXT: ret
%load = call <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 4)
ret <vscale x 1 x half> %load
}

0 comments on commit 7573853

Please sign in to comment.