diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6e97575c167cd..9eede7261b113 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13356,10 +13356,56 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); } -static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) { +// Try to expand a scalar multiply to a faster sequence. +static SDValue expandMul(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + EVT VT = N->getValueType(0); - if (!VT.isVector()) + + // LI + MUL is usually smaller than the alternative sequence. + if (DAG.getMachineFunction().getFunction().hasMinSize()) + return SDValue(); + + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + if (VT != Subtarget.getXLenVT()) + return SDValue(); + + if (!Subtarget.hasStdExtZba()) + return SDValue(); + + ConstantSDNode *CNode = dyn_cast(N->getOperand(1)); + if (!CNode) return SDValue(); + uint64_t MulAmt = CNode->getZExtValue(); + + // If this is a power 2 + 2/4/8, we can use a shift followed by a single + // shXadd. First check if this a sum of two power of 2s because that's + // easy. Then count how many zeros are up to the first bit. + if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { + unsigned ScaleShift = llvm::countr_zero(MulAmt); + if (ScaleShift >= 1 && ScaleShift < 4) { + unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); + SDLoc DL(N); + SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ShiftAmt, DL, VT)); + SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(ScaleShift, DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); + } + } + return SDValue(); +} + + +static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector()) + return expandMul(N, DAG, DCI, Subtarget); SDLoc DL(N); SDValue N0 = N->getOperand(0); @@ -15906,7 +15952,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::MUL: if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) return V; - return performMULCombine(N, DAG); + return performMULCombine(N, DAG, DCI, Subtarget); case ISD::SDIV: case ISD::UDIV: case ISD::SREM: diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll index 48fa69e104565..10103f071462c 100644 --- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll +++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll @@ -551,8 +551,9 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { ; RV32IMB-NEXT: sh3add a1, a1, a2 ; RV32IMB-NEXT: sh1add a0, a0, a0 ; RV32IMB-NEXT: slli a2, a0, 3 -; RV32IMB-NEXT: addi a0, a2, 2047 -; RV32IMB-NEXT: addi a0, a0, 1 +; RV32IMB-NEXT: li a3, 1 +; RV32IMB-NEXT: slli a3, a3, 11 +; RV32IMB-NEXT: sh3add a0, a0, a3 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: ret @@ -561,8 +562,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { ; RV64IMB: # %bb.0: ; RV64IMB-NEXT: addi a0, a0, 86 ; RV64IMB-NEXT: sh1add a0, a0, a0 -; RV64IMB-NEXT: li a1, -16 -; RV64IMB-NEXT: sh3add a0, a0, a1 +; RV64IMB-NEXT: slli a0, a0, 3 +; RV64IMB-NEXT: addi a0, a0, -16 ; RV64IMB-NEXT: ret %tmp0 = mul i64 %x, 24 %tmp1 = add i64 %tmp0, 2048 diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll index 0908a393338c5..cc632a09c8054 100644 --- a/llvm/test/CodeGen/RISCV/rv32zba.ll +++ b/llvm/test/CodeGen/RISCV/rv32zba.ll @@ -271,31 +271,49 @@ define i32 @mul288(i32 %a) { } define i32 @mul258(i32 %a) { -; CHECK-LABEL: mul258: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 258 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: mul258: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 258 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: mul258: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: slli a1, a0, 8 +; RV32ZBA-NEXT: sh1add a0, a0, a1 +; RV32ZBA-NEXT: ret %c = mul i32 %a, 258 ret i32 %c } define i32 @mul260(i32 %a) { -; CHECK-LABEL: mul260: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 260 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: mul260: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 260 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: mul260: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: slli a1, a0, 8 +; RV32ZBA-NEXT: sh2add a0, a0, a1 +; RV32ZBA-NEXT: ret %c = mul i32 %a, 260 ret i32 %c } define i32 @mul264(i32 %a) { -; CHECK-LABEL: mul264: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 264 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: mul264: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 264 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: mul264: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: slli a1, a0, 8 +; RV32ZBA-NEXT: sh3add a0, a0, a1 +; RV32ZBA-NEXT: ret %c = mul i32 %a, 264 ret i32 %c } diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll index 90cfb1fdcb779..ee9b73ca82f21 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll @@ -811,31 +811,49 @@ define i64 @adduw_imm(i32 signext %0) nounwind { } define i64 @mul258(i64 %a) { -; CHECK-LABEL: mul258: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 258 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul258: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 258 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul258: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 258 ret i64 %c } define i64 @mul260(i64 %a) { -; CHECK-LABEL: mul260: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 260 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul260: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 260 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul260: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh2add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 260 ret i64 %c } define i64 @mul264(i64 %a) { -; CHECK-LABEL: mul264: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 264 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul264: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 264 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul264: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh3add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 264 ret i64 %c } diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 7e32253c8653f..d334109ec9bf8 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -834,31 +834,49 @@ define i64 @adduw_imm(i32 signext %0) nounwind { } define i64 @mul258(i64 %a) { -; CHECK-LABEL: mul258: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 258 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul258: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 258 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul258: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 258 ret i64 %c } define i64 @mul260(i64 %a) { -; CHECK-LABEL: mul260: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 260 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul260: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 260 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul260: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh2add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 260 ret i64 %c } define i64 @mul264(i64 %a) { -; CHECK-LABEL: mul264: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 264 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul264: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 264 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul264: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh3add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 264 ret i64 %c }