diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp index 0a66a38f6d5abc..be2e880ecd3a98 100644 --- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { auto *VTy = cast(II.getType()); IRBuilder<> Builder(&II); - - // Extend VL from i32 to XLen if needed. - if (ST->is64Bit()) - VL = Builder.CreateZExt(VL, Builder.getInt64Ty()); - Type *STy = VTy->getElementType(); Value *Val = Builder.CreateLoad(STy, BasePtr); - const auto &TLI = *ST->getTargetLowering(); - Value *Res; - - // TODO: Also support fixed/illegal vector types to splat with evl = vl. - if (isa(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) { - unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f - : Intrinsic::riscv_vmv_v_x; - Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()}, - {PoisonValue::get(VTy), Val, VL}); - } else { - Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val); - } + Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, + {Val, II.getOperand(2), VL}); II.replaceAllUsesWith(Res); II.eraseFromParent(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index b8c7037580c46b..849f98c26f4593 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -638,14 +638,14 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT: lbu a0, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma ; CHECK-NO-OPT-NEXT: vmv.v.x v8, a0 ; CHECK-NO-OPT-NEXT: ret %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3) @@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) { define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) { ; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-OPT: # %bb.0: -; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero ; CHECK-OPT-NEXT: ret ; ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16: ; CHECK-NO-OPT: # %bb.0: ; CHECK-NO-OPT-NEXT: flh fa5, 0(a0) -; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma ; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5 ; CHECK-NO-OPT-NEXT: ret %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 0010f64a93fd62..14976f21b7dbba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT,CHECK-OPT-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+optimized-zero-stride-load \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT,CHECK-OPT-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \ ; RUN: -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-RV64 declare @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, , i32) @@ -823,15 +823,15 @@ define @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) { ret %load } -define @zero_strided_vadd.vx( %v, ptr %ptr) { -; CHECK-RV32-LABEL: zero_strided_vadd.vx: +define @zero_strided_vadd_nxv1i64( %v, ptr %ptr) { +; CHECK-RV32-LABEL: zero_strided_vadd_nxv1i64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero ; CHECK-RV32-NEXT: vadd.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; -; CHECK-RV64-LABEL: zero_strided_vadd.vx: +; CHECK-RV64-LABEL: zero_strided_vadd_nxv1i64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: ld a0, 0(a0) ; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma @@ -842,3 +842,69 @@ define @zero_strided_vadd.vx( %v, ptr %ptr) %w = add %v, %load ret %w } + +define @zero_strided_vadd_nxv16i64( %v, ptr %ptr) { +; CHECK-RV32-LABEL: zero_strided_vadd_nxv16i64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: csrr a1, vlenb +; CHECK-RV32-NEXT: srli a2, a1, 3 +; CHECK-RV32-NEXT: sub a3, a2, a1 +; CHECK-RV32-NEXT: sltu a4, a2, a3 +; CHECK-RV32-NEXT: addi a4, a4, -1 +; CHECK-RV32-NEXT: and a3, a4, a3 +; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero +; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2 +; CHECK-RV32-NEXT: # %bb.1: +; CHECK-RV32-NEXT: mv a2, a1 +; CHECK-RV32-NEXT: .LBB55_2: +; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero +; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-RV32-NEXT: vadd.vv v16, v16, v24 +; CHECK-RV32-NEXT: vadd.vv v8, v8, v0 +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: ld a0, 0(a0) +; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-RV64-NEXT: vadd.vx v8, v8, a0 +; CHECK-RV64-NEXT: vadd.vx v16, v16, a0 +; CHECK-RV64-NEXT: ret + %vscale = call i32 @llvm.vscale() + %load = call @llvm.experimental.vp.strided.load.nxv16i64.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) + %w = add %v, %load + ret %w +} + +define @zero_strided_vadd_nxv1p0( %v, ptr %ptr) { +; CHECK-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0: +; CHECK-OPT-RV32: # %bb.0: +; CHECK-OPT-RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-OPT-RV32-NEXT: vlse32.v v8, (a0), zero +; CHECK-OPT-RV32-NEXT: ret +; +; CHECK-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0: +; CHECK-OPT-RV64: # %bb.0: +; CHECK-OPT-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-OPT-RV64-NEXT: vlse64.v v8, (a0), zero +; CHECK-OPT-RV64-NEXT: ret +; +; CHECK-NO-OPT-RV32-LABEL: zero_strided_vadd_nxv1p0: +; CHECK-NO-OPT-RV32: # %bb.0: +; CHECK-NO-OPT-RV32-NEXT: lw a0, 0(a0) +; CHECK-NO-OPT-RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NO-OPT-RV32-NEXT: vmv.v.x v8, a0 +; CHECK-NO-OPT-RV32-NEXT: ret +; +; CHECK-NO-OPT-RV64-LABEL: zero_strided_vadd_nxv1p0: +; CHECK-NO-OPT-RV64: # %bb.0: +; CHECK-NO-OPT-RV64-NEXT: ld a0, 0(a0) +; CHECK-NO-OPT-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NO-OPT-RV64-NEXT: vmv.v.x v8, a0 +; CHECK-NO-OPT-RV64-NEXT: ret + %vscale = call i32 @llvm.vscale() + %load = call @llvm.experimental.vp.strided.load.nxv1p0.p0.i32(ptr %ptr, i32 0, splat (i1 true), i32 %vscale) + ret %load +}