Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Expand mul to shNadd x, (slli x, c) in DAGCombine #88524

Merged
merged 3 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13356,10 +13356,56 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
}

static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG) {
// Try to expand a scalar multiply to a faster sequence.
static SDValue expandMul(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {

EVT VT = N->getValueType(0);
if (!VT.isVector())

// LI + MUL is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();

if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();

if (VT != Subtarget.getXLenVT())
return SDValue();

if (!Subtarget.hasStdExtZba())
return SDValue();

ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!CNode)
return SDValue();
uint64_t MulAmt = CNode->getZExtValue();

// If this is a power 2 + 2/4/8, we can use a shift followed by a single
// shXadd. First check if this a sum of two power of 2s because that's
// easy. Then count how many zeros are up to the first bit.
if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
unsigned ScaleShift = llvm::countr_zero(MulAmt);
if (ScaleShift >= 1 && ScaleShift < 4) {
unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
SDLoc DL(N);
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(ShiftAmt, DL, VT));
SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(ScaleShift, DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2);
}
}
return SDValue();
}


static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
EVT VT = N->getValueType(0);
if (!VT.isVector())
return expandMul(N, DAG, DCI, Subtarget);

SDLoc DL(N);
SDValue N0 = N->getOperand(0);
Expand Down Expand Up @@ -15906,7 +15952,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MUL:
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
return V;
return performMULCombine(N, DAG);
return performMULCombine(N, DAG, DCI, Subtarget);
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
Expand Down
9 changes: 5 additions & 4 deletions llvm/test/CodeGen/RISCV/addimm-mulimm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,9 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) {
; RV32IMB-NEXT: sh3add a1, a1, a2
; RV32IMB-NEXT: sh1add a0, a0, a0
; RV32IMB-NEXT: slli a2, a0, 3
; RV32IMB-NEXT: addi a0, a2, 2047
; RV32IMB-NEXT: addi a0, a0, 1
; RV32IMB-NEXT: li a3, 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this worse?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a) This test is really checking for a combine loop, and (b) zbs would produce a bset here and zba w/o zbs is mildly odd, and c) the overall better result would probably be (x*3+256)*8 (SH1ADD, ADDI, SHL) so we're not optimal either way.

So yes, but I don't think we care.

; RV32IMB-NEXT: slli a3, a3, 11
; RV32IMB-NEXT: sh3add a0, a0, a3
; RV32IMB-NEXT: sltu a2, a0, a2
; RV32IMB-NEXT: add a1, a1, a2
; RV32IMB-NEXT: ret
Expand All @@ -561,8 +562,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) {
; RV64IMB: # %bb.0:
; RV64IMB-NEXT: addi a0, a0, 86
; RV64IMB-NEXT: sh1add a0, a0, a0
; RV64IMB-NEXT: li a1, -16
; RV64IMB-NEXT: sh3add a0, a0, a1
; RV64IMB-NEXT: slli a0, a0, 3
; RV64IMB-NEXT: addi a0, a0, -16
; RV64IMB-NEXT: ret
%tmp0 = mul i64 %x, 24
%tmp1 = add i64 %tmp0, 2048
Expand Down
48 changes: 33 additions & 15 deletions llvm/test/CodeGen/RISCV/rv32zba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -271,31 +271,49 @@ define i32 @mul288(i32 %a) {
}

define i32 @mul258(i32 %a) {
; CHECK-LABEL: mul258:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 258
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV32I-LABEL: mul258:
; RV32I: # %bb.0:
; RV32I-NEXT: li a1, 258
; RV32I-NEXT: mul a0, a0, a1
; RV32I-NEXT: ret
;
; RV32ZBA-LABEL: mul258:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: slli a1, a0, 8
; RV32ZBA-NEXT: sh1add a0, a0, a1
; RV32ZBA-NEXT: ret
%c = mul i32 %a, 258
ret i32 %c
}

define i32 @mul260(i32 %a) {
; CHECK-LABEL: mul260:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 260
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV32I-LABEL: mul260:
; RV32I: # %bb.0:
; RV32I-NEXT: li a1, 260
; RV32I-NEXT: mul a0, a0, a1
; RV32I-NEXT: ret
;
; RV32ZBA-LABEL: mul260:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: slli a1, a0, 8
; RV32ZBA-NEXT: sh2add a0, a0, a1
; RV32ZBA-NEXT: ret
%c = mul i32 %a, 260
ret i32 %c
}

define i32 @mul264(i32 %a) {
; CHECK-LABEL: mul264:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 264
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV32I-LABEL: mul264:
; RV32I: # %bb.0:
; RV32I-NEXT: li a1, 264
; RV32I-NEXT: mul a0, a0, a1
; RV32I-NEXT: ret
;
; RV32ZBA-LABEL: mul264:
; RV32ZBA: # %bb.0:
; RV32ZBA-NEXT: slli a1, a0, 8
; RV32ZBA-NEXT: sh3add a0, a0, a1
; RV32ZBA-NEXT: ret
%c = mul i32 %a, 264
ret i32 %c
}
Expand Down
48 changes: 33 additions & 15 deletions llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -811,31 +811,49 @@ define i64 @adduw_imm(i32 signext %0) nounwind {
}

define i64 @mul258(i64 %a) {
; CHECK-LABEL: mul258:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 258
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: mul258:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 258
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul258:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 8
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
%c = mul i64 %a, 258
ret i64 %c
}

define i64 @mul260(i64 %a) {
; CHECK-LABEL: mul260:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 260
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: mul260:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 260
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul260:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 8
; RV64ZBA-NEXT: sh2add a0, a0, a1
; RV64ZBA-NEXT: ret
%c = mul i64 %a, 260
ret i64 %c
}

define i64 @mul264(i64 %a) {
; CHECK-LABEL: mul264:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 264
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: mul264:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 264
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul264:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 8
; RV64ZBA-NEXT: sh3add a0, a0, a1
; RV64ZBA-NEXT: ret
%c = mul i64 %a, 264
ret i64 %c
}
Expand Down
48 changes: 33 additions & 15 deletions llvm/test/CodeGen/RISCV/rv64zba.ll
Original file line number Diff line number Diff line change
Expand Up @@ -834,31 +834,49 @@ define i64 @adduw_imm(i32 signext %0) nounwind {
}

define i64 @mul258(i64 %a) {
; CHECK-LABEL: mul258:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 258
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: mul258:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 258
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul258:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 8
; RV64ZBA-NEXT: sh1add a0, a0, a1
; RV64ZBA-NEXT: ret
%c = mul i64 %a, 258
ret i64 %c
}

define i64 @mul260(i64 %a) {
; CHECK-LABEL: mul260:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 260
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: mul260:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 260
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul260:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 8
; RV64ZBA-NEXT: sh2add a0, a0, a1
; RV64ZBA-NEXT: ret
%c = mul i64 %a, 260
ret i64 %c
}

define i64 @mul264(i64 %a) {
; CHECK-LABEL: mul264:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 264
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: ret
; RV64I-LABEL: mul264:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, 264
; RV64I-NEXT: mul a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: mul264:
; RV64ZBA: # %bb.0:
; RV64ZBA-NEXT: slli a1, a0, 8
; RV64ZBA-NEXT: sh3add a0, a0, a1
; RV64ZBA-NEXT: ret
%c = mul i64 %a, 264
ret i64 %c
}
Expand Down
Loading