[AArch64] Combine subtract with borrow to SBC.#165271
Merged
Conversation
Specifically, this patch adds the following combines: SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b) SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b) The CSET may be preceded by a ZEXT.
Member
|
@llvm/pr-subscribers-backend-aarch64 Author: Ricardo Jesus (rj-jesus) ChangesSpecifically, this patch adds the following combines: The CSET may be preceded by a ZEXT. Fixes #164748, but please let me know if anyone has a better suggestion. Full diff: https://github.com/llvm/llvm-project/pull/165271.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d16b11686e3c1..f7cdfd00d84ec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22328,6 +22328,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
}
+// Attempt to combine the following patterns:
+// SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
+// SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
+// The CSET may be preceded by a ZEXT.
+static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() != ISD::SUB)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse())
+ N1 = N1.getOperand(0);
+ if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO)
+ return SDValue();
+
+ SDValue Flags = N1.getOperand(3);
+ if (Flags.getOpcode() != AArch64ISD::SUBS)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ if (N0->getOpcode() != ISD::SUB)
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
+ Flags);
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), Flags);
+}
+
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Try to change sum of two reductions.
@@ -22349,6 +22380,8 @@ static SDValue performAddSubCombine(SDNode *N,
return Val;
if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG))
return Val;
+ if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG))
+ return Val;
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
diff --git a/llvm/test/CodeGen/AArch64/sbc.ll b/llvm/test/CodeGen/AArch64/sbc.ll
new file mode 100644
index 0000000000000..fff63c1709218
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sbc.ll
@@ -0,0 +1,392 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck --check-prefixes=CHECK,CHECK-SD %s
+; RUN: llc < %s -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define i32 @test_basic_i32(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_basic_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_basic_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i64 @test_basic_i64(i64 %a, i64 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_basic_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, x1
+; CHECK-SD-NEXT: sbc x0, x2, x3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_basic_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp x0, x1
+; CHECK-GI-NEXT: sub x9, x2, x3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub x0, x9, x8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i64 @test_mixed_i32_i64(i32 %a, i32 %b, i64 %x, i64 %y) {
+; CHECK-SD-LABEL: test_mixed_i32_i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc x0, x2, x3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_mixed_i32_i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub x9, x2, x3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub x0, x9, x8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i64
+ %sub = sub i64 %x, %y
+ %res = sub i64 %sub, %carry
+ ret i64 %res
+}
+
+define i32 @test_mixed_i64_i32(i64 %a, i64 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_mixed_i64_i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp x0, x1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_mixed_i64_i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp x0, x1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i64 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_only_borrow(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_only_borrow:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, wzr
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_only_borrow:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w2, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ ret i32 %res
+}
+
+define i32 @test_sext_add(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_sext_add:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sext_add:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sbfx w8, w8, #0, #1
+; CHECK-GI-NEXT: add w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = sext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = add i32 %sub, %carry
+ ret i32 %res
+}
+
+; FIXME: This case could be supported with reversed operands to the CMP.
+define i32 @test_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_ugt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, hi
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_ugt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ugt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_unsupported_cc_slt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_slt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, lt
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_slt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp slt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_unsupported_cc_sgt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_unsupported_cc_sgt:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cset w9, gt
+; CHECK-SD-NEXT: sub w0, w8, w9
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_unsupported_cc_sgt:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp sgt i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ ret i32 %res
+}
+
+define i32 @test_multiple_setcc_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_setcc_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: sub w19, w2, w0
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_setcc_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w19, w2
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: mov w0, w20
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i1 %cc)
+ ret i32 %res
+}
+
+define i32 @test_multiple_carry_uses(i32 %a, i32 %b, i32 %x) {
+; CHECK-SD-LABEL: test_multiple_carry_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: sub w19, w2, w0
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_carry_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w19, w2
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: mov w0, w20
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %res = sub i32 %x, %carry
+ tail call void @use(i32 %carry)
+ ret i32 %res
+}
+
+define i32 @test_multiple_sub_uses(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-SD-LABEL: test_multiple_sub_uses:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w19, -8
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: sub w8, w2, w3
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: mov w0, w8
+; CHECK-SD-NEXT: sbc w19, w2, w3
+; CHECK-SD-NEXT: bl use
+; CHECK-SD-NEXT: mov w0, w19
+; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_multiple_sub_uses:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 32
+; CHECK-GI-NEXT: .cfi_offset w19, -8
+; CHECK-GI-NEXT: .cfi_offset w20, -16
+; CHECK-GI-NEXT: .cfi_offset w30, -32
+; CHECK-GI-NEXT: sub w19, w2, w3
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: mov w0, w19
+; CHECK-GI-NEXT: cset w20, lo
+; CHECK-GI-NEXT: bl use
+; CHECK-GI-NEXT: sub w0, w19, w20
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i32 %a, %b
+ %carry = zext i1 %cc to i32
+ %sub = sub i32 %x, %y
+ %res = sub i32 %sub, %carry
+ tail call void @use(i32 %sub)
+ ret i32 %res
+}
+
+define i8 @test_i8(i8 %a, i8 %b, i8 %x, i8 %y) {
+; CHECK-SD-LABEL: test_i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: cmp w8, w1, uxtb
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xff
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cmp w8, w1, uxtb
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i8 %a, %b
+ %carry = zext i1 %cc to i8
+ %sub = sub i8 %x, %y
+ %res = sub i8 %sub, %carry
+ ret i8 %res
+}
+
+define i16 @test_i16(i16 %a, i16 %b, i16 %x, i16 %y) {
+; CHECK-SD-LABEL: test_i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xffff
+; CHECK-SD-NEXT: cmp w8, w1, uxth
+; CHECK-SD-NEXT: sbc w0, w2, w3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xffff
+; CHECK-GI-NEXT: sub w9, w2, w3
+; CHECK-GI-NEXT: cmp w8, w1, uxth
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: sub w0, w9, w8
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult i16 %a, %b
+ %carry = zext i1 %cc to i16
+ %sub = sub i16 %x, %y
+ %res = sub i16 %sub, %carry
+ ret i16 %res
+}
+
+define <4 x i32> @test_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-SD-LABEL: test_v4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub v2.4s, v2.4s, v3.4s
+; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-SD-NEXT: add v0.4s, v2.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v4.4s, #1
+; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: sub v1.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v4.16b
+; CHECK-GI-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: ret
+ %cc = icmp ult <4 x i32> %a, %b
+ %carry = zext <4 x i1> %cc to <4 x i32>
+ %sub = sub <4 x i32> %x, %y
+ %res = sub <4 x i32> %sub, %carry
+ ret <4 x i32> %res
+}
+
+declare void @use()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
|
rj-jesus
commented
Oct 27, 2025
Contributor
|
question: does adc have this problem too or? |
Contributor
Author
I believe so. I can look into extending this PR with ADC once it lands (or I can do so during review if that's more practical). |
paulwalker-arm
approved these changes
Nov 6, 2025
ckoparkar
added a commit
to ckoparkar/llvm-project
that referenced
this pull request
Nov 10, 2025
* main: (1028 commits) [clang][DebugInfo] Attach `DISubprogram` to additional call variants (llvm#166202) [C2y] Claim nonconformance to WG14 N3348 (llvm#166966) [X86] 2012-01-10-UndefExceptionEdge.ll - regenerate test checks (llvm#167307) Remove unused standard headers: <string>, <optional>, <numeric>, <tuple> (llvm#167232) [DebugInfo] Add Verifier check for incorrectly-scoped retainedNodes (llvm#166855) [VPlan] Don't apply predication discount to non-originally-predicated blocks (llvm#160449) [libc++] Avoid overloaded `operator,` for (`T`, `Iter`) cases (llvm#161049) [tools][llc] Make save-stats.ll test target independent (llvm#167238) [AArch64] Fallback to PRFUM for PRFM with negative or unaligned offset (llvm#166756) [X86] ldexp-avx512.ll - add v8f16/v16f16/v32f16 test coverage for llvm#165694 (llvm#167294) [DropAssumes] Drop dereferenceable assumptions after vectorization. (llvm#166947) [VPlan] Simplify branch-cond with getVectorTripCount (llvm#155604) Remove unused <algorithm> inclusion (llvm#166942) [AArch64] Combine subtract with borrow to SBC. (llvm#165271) [AArch64][SVE] Avoid redundant extend of unsigned i8/i16 extracts. (llvm#165863) [SPIRV] Fix failing assertion in SPIRVAsmPrinter (llvm#166909) [libc++] Merge insert/emplace(const_iterator, Args...) implementations (llvm#166470) [libc++] Replace __libcpp_is_final with a variable template (llvm#167137) [gn build] Port 152bda7 [libc++] Replace the last uses of __tuple_types with __type_list (llvm#167214) ...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Specifically, this patch adds the following combines:
SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
The CSET may be preceded by a ZEXT.
Fixes #164748, but please let me know if anyone has a better suggestion.