Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 1 addition & 31 deletions llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,6 @@ multiclass VPatUSLoadStoreSDNode<ValueType type,
(store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>;
}

multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type,
int log2sew,
LMULInfo vlmul,
VReg reg_class,
int sew = !shl(1, log2sew)> {
defvar load_instr =
!cast<Instruction>("VL"#!substr(vlmul.MX, 1)#"RE"#sew#"_V");
defvar store_instr =
!cast<Instruction>("VS"#!substr(vlmul.MX, 1)#"R_V");

// Load
def : Pat<(type (load GPR:$rs1)),
(load_instr GPR:$rs1)>;
// Store
def : Pat<(store type:$rs2, GPR:$rs1),
(store_instr reg_class:$rs2, GPR:$rs1)>;
}

multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m> {
defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#m.BX);
defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#m.BX);
Expand Down Expand Up @@ -895,23 +877,11 @@ multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm, string suffix = ""> {
//===----------------------------------------------------------------------===//

// 7.4. Vector Unit-Stride Instructions
foreach vti = !listconcat(FractionalGroupIntegerVectors,
FractionalGroupFloatVectors,
FractionalGroupBFloatVectors) in
foreach vti = AllVectors in
let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.AVL, vti.RegClass>;
foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VBF16M1, VF16M1, VF32M1, VF64M1] in
let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors, GroupBFloatVectors) in
let Predicates = !if(!eq(vti.Scalar, f16), [HasVInstructionsF16Minimal],
GetVTypePredicates<vti>.Predicates) in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach mti = AllMasks in
let Predicates = [HasVInstructions] in
defm : VPatUSLoadStoreMaskSDNode<mti>;
Expand Down
54 changes: 54 additions & 0 deletions llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {

private:
bool convertToVLMAX(MachineInstr &MI) const;
bool convertToWholeRegister(MachineInstr &MI) const;
bool convertToUnmasked(MachineInstr &MI) const;
bool convertVMergeToVMv(MachineInstr &MI) const;

Expand Down Expand Up @@ -155,6 +156,58 @@ bool RISCVVectorPeephole::isAllOnesMask(const MachineInstr *MaskDef) const {
}
}

/// Convert unit strided unmasked loads and stores to whole-register equivalents
/// to avoid the dependency on $vl and $vtype.
///
/// %x = PseudoVLE8_V_M1 %passthru, %ptr, %vlmax, policy
/// PseudoVSE8_V_M1 %v, %ptr, %vlmax
///
/// ->
///
/// %x = VL1RE8_V %ptr
/// VS1R_V %v, %ptr
bool RISCVVectorPeephole::convertToWholeRegister(MachineInstr &MI) const {
#define CASE_WHOLE_REGISTER_LMUL_SEW(lmul, sew) \
case RISCV::PseudoVLE##sew##_V_M##lmul: \
NewOpc = RISCV::VL##lmul##RE##sew##_V; \
break; \
case RISCV::PseudoVSE##sew##_V_M##lmul: \
NewOpc = RISCV::VS##lmul##R_V; \
break;
#define CASE_WHOLE_REGISTER_LMUL(lmul) \
CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 8) \
CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 16) \
CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 32) \
CASE_WHOLE_REGISTER_LMUL_SEW(lmul, 64)

unsigned NewOpc;
switch (MI.getOpcode()) {
CASE_WHOLE_REGISTER_LMUL(1)
CASE_WHOLE_REGISTER_LMUL(2)
CASE_WHOLE_REGISTER_LMUL(4)
CASE_WHOLE_REGISTER_LMUL(8)
default:
return false;
}

MachineOperand &VLOp = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
if (!VLOp.isImm() || VLOp.getImm() != RISCV::VLMaxSentinel)
return false;

// Whole register instructions aren't pseudos so they don't have
// policy/SEW/AVL ops.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

passthru?

if (RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags))
MI.removeOperand(RISCVII::getVecPolicyOpNum(MI.getDesc()));
MI.removeOperand(RISCVII::getSEWOpNum(MI.getDesc()));
MI.removeOperand(RISCVII::getVLOpNum(MI.getDesc()));
if (RISCVII::isFirstDefTiedToFirstUse(MI.getDesc()))
MI.removeOperand(1);

MI.setDesc(TII->get(NewOpc));

return true;
}

// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
bool RISCVVectorPeephole::convertVMergeToVMv(MachineInstr &MI) const {
Expand Down Expand Up @@ -281,6 +334,7 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr &MI : MBB) {
Changed |= convertToVLMAX(MI);
Changed |= convertToUnmasked(MI);
Changed |= convertToWholeRegister(MI);
Changed |= convertVMergeToVMv(MI);
}
}
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions are marked with optnone so the machine SSA optimisation passes aren't run, including RISCVVectorPeephole.

Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ define dso_local void @lots_args(i32 signext %x0, i32 signext %x1, <vscale x 16
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: sub a0, s0, a0
; CHECK-NEXT: addi a0, a0, -64
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: sw a2, -36(s0)
; CHECK-NEXT: sw a3, -40(s0)
; CHECK-NEXT: sw a4, -44(s0)
Expand Down Expand Up @@ -85,7 +86,8 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: slli s1, s1, 3
; CHECK-NEXT: sub s1, s0, s1
; CHECK-NEXT: addi s1, s1, -112
; CHECK-NEXT: vs8r.v v8, (s1)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vse32.v v8, (s1)
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: sw a0, -76(s0)
; CHECK-NEXT: sw a0, -80(s0)
Expand All @@ -99,7 +101,7 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: sw a0, -112(s0)
; CHECK-NEXT: lw a0, -76(s0)
; CHECK-NEXT: lw a1, -80(s0)
; CHECK-NEXT: vl8re32.v v8, (s1)
; CHECK-NEXT: vle32.v v8, (s1)
; CHECK-NEXT: lw a2, -84(s0)
; CHECK-NEXT: lw a3, -88(s0)
; CHECK-NEXT: lw a4, -92(s0)
Expand All @@ -115,7 +117,8 @@ define dso_local signext i32 @main() #0 {
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: lw a0, -76(s0)
; CHECK-NEXT: lw a1, -80(s0)
; CHECK-NEXT: vl8re32.v v8, (s1)
; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v8, (s1)
; CHECK-NEXT: lw a2, -84(s0)
; CHECK-NEXT: lw a3, -88(s0)
; CHECK-NEXT: lw a4, -92(s0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
; CHECK-NEXT: $v0 = COPY [[COPY1]]
; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size from %ir.p, align 8)
; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store (<vscale x 1 x s64>) into %ir.p)
; CHECK-NEXT: PseudoVSE32_V_M1 killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]], -1, 5 /* e32 */ :: (store (<vscale x 1 x s64>) into %ir.p)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test stops after isel, so this is expected since RISCVVectorPeephole isn't run.

; CHECK-NEXT: PseudoRET
%a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
%b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
Expand All @@ -36,7 +36,7 @@ define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
; CHECK-NEXT: $v0 = COPY [[COPY1]]
; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 1 /* ta, mu */ :: (load unknown-size from %ir.p, align 8)
; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store (<vscale x 1 x s64>) into %ir.p)
; CHECK-NEXT: PseudoVSE32_V_M1 killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]], -1, 5 /* e32 */ :: (store (<vscale x 1 x s64>) into %ir.p)
; CHECK-NEXT: PseudoRET
%a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
%b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
Expand Down
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vpload.ll
Original file line number Diff line number Diff line change
Expand Up @@ -536,3 +536,14 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
store <vscale x 1 x double> %hi, ptr %out
ret <vscale x 16 x double> %lo
}

define <vscale x 8 x i8> @vpload_all_active_nxv8i8(ptr %ptr) {
; CHECK-LABEL: vpload_all_active_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vl1r.v v8, (a0)
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale()
%evl = mul i32 %vscale, 8
%load = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x i8> %load
}
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vpstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -459,3 +459,14 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
call void @llvm.vp.store.nxv17f64.p0(<vscale x 17 x double> %val, ptr %ptr, <vscale x 17 x i1> %m, i32 %evl)
ret void
}

define void @vpstore_all_active_nxv8i8(<vscale x 8 x i8> %val, ptr %ptr) {
; CHECK-LABEL: vpstore_all_active_nxv8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vs1r.v v8, (a0)
; CHECK-NEXT: ret
%vscale = call i32 @llvm.vscale()
%evl = mul i32 %vscale, 8
call void @llvm.vp.store.nxv8i8.p0(<vscale x 8 x i8> %val, ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret void
}
40 changes: 8 additions & 32 deletions llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -487,42 +487,18 @@ define <vscale x 8 x double> @vfmerge_nzv_nxv8f64(<vscale x 8 x double> %va, <vs
define <vscale x 16 x double> @vselect_combine_regression(<vscale x 16 x i64> %va, <vscale x 16 x double> %vb) {
; CHECK-LABEL: vselect_combine_regression:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vmv8r.v v24, v16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, a0, a1
; CHECK-NEXT: vl8re64.v v8, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v8, (a0)
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vmseq.vi v24, v16, 0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmseq.vi v0, v16, 0
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, mu
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: vmseq.vi v7, v24, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vle64.v v8, (a0), v0.t
; CHECK-NEXT: vmv1r.v v0, v7
; CHECK-NEXT: vle64.v v16, (a1), v0.t
; CHECK-NEXT: ret
%cond = icmp eq <vscale x 16 x i64> %va, zeroinitializer
%sel = select <vscale x 16 x i1> %cond, <vscale x 16 x double> %vb, <vscale x 16 x double> zeroinitializer
Expand Down