From 5ed1a190aaf0d5f4c11f912dca7ce52f5e745296 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 16 Apr 2024 10:18:12 -0700 Subject: [PATCH] [RISCV] Avoid matching 3/5/9 * 2^N as 2^N + 2/4/8 (e.g. 24) The former is better as a zero extend can be folded into the sll, whereas the later currently produces a seperate zext.w due to bad interactions with other combines. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 ++++++ llvm/test/CodeGen/RISCV/addimm-mulimm.ll | 9 ++++----- llvm/test/CodeGen/RISCV/rv64zba.ll | 22 +++++++++++++++++++++ 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 765838aafb58d..de2ad639f0d6c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13416,6 +13416,12 @@ static SDValue expandMul(SDNode *N, SelectionDAG &DAG, return SDValue(); uint64_t MulAmt = CNode->getZExtValue(); + // 3/5/9 * 2^N -> shXadd (sll X, C), (sll X, C) + // Matched in tablegen, avoid perturbing patterns. + for (uint64_t Divisor : {3, 5, 9}) + if (MulAmt % Divisor == 0 && isPowerOf2_64(MulAmt / Divisor)) + return SDValue(); + // If this is a power 2 + 2/4/8, we can use a shift followed by a single // shXadd. First check if this a sum of two power of 2s because that's // easy. Then count how many zeros are up to the first bit. diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll index 10103f071462c..48fa69e104565 100644 --- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll +++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll @@ -551,9 +551,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { ; RV32IMB-NEXT: sh3add a1, a1, a2 ; RV32IMB-NEXT: sh1add a0, a0, a0 ; RV32IMB-NEXT: slli a2, a0, 3 -; RV32IMB-NEXT: li a3, 1 -; RV32IMB-NEXT: slli a3, a3, 11 -; RV32IMB-NEXT: sh3add a0, a0, a3 +; RV32IMB-NEXT: addi a0, a2, 2047 +; RV32IMB-NEXT: addi a0, a0, 1 ; RV32IMB-NEXT: sltu a2, a0, a2 ; RV32IMB-NEXT: add a1, a1, a2 ; RV32IMB-NEXT: ret @@ -562,8 +561,8 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { ; RV64IMB: # %bb.0: ; RV64IMB-NEXT: addi a0, a0, 86 ; RV64IMB-NEXT: sh1add a0, a0, a0 -; RV64IMB-NEXT: slli a0, a0, 3 -; RV64IMB-NEXT: addi a0, a0, -16 +; RV64IMB-NEXT: li a1, -16 +; RV64IMB-NEXT: sh3add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = mul i64 %x, 24 %tmp1 = add i64 %tmp0, 2048 diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index a84b9e5e7962f..c3c757656be93 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -2490,3 +2490,25 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) { %gep2 = getelementptr i64, ptr %gep1, i64 %a1 ret ptr %gep2 } + +define i64 @regression(i32 signext %x, i32 signext %y) { +; RV64I-LABEL: regression: +; RV64I: # %bb.0: +; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: slli a1, a1, 35 +; RV64I-NEXT: mulhu a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: regression: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: subw a0, a0, a1 +; RV64ZBA-NEXT: slli.uw a0, a0, 3 +; RV64ZBA-NEXT: sh1add a0, a0, a0 +; RV64ZBA-NEXT: ret + %sub = sub i32 %x, %y + %ext = zext i32 %sub to i64 + %res = mul nuw nsw i64 %ext, 24 + ret i64 %res +}