From 6e89500d88bbce00406d50dd6931872e3e4be9d8 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 19 Mar 2024 11:14:57 +0100 Subject: [PATCH] [SystemZ] Fix overflow flag for i128 USUBO We use the VSCBIQ/VSBIQ/VSBCBIQ family of instructions to implement USUBO/USUBO_CARRY for the i128 data type. However, these instructions use an inverted sense of the borrow indication flag (a value of 1 indicates *no* borrow, while a value of 0 indicated borrow). This does not match the semantics of the boolean "overflow" flag of the USUBO/USUBO_CARRY ISD nodes. Fix this by generating code to explicitly invert the flag. These cancel out of the result of USUBO feeds into an USUBO_CARRY. To avoid unnecessary zero-extend operations, also improve the DAGCombine handling of ZERO_EXTEND to optimize (zext (xor (trunc))) sequences where appropriate. Fixes: https://github.com/llvm/llvm-project/issues/83268 --- .../Target/SystemZ/SystemZISelLowering.cpp | 34 +++++++++++++++++++ llvm/test/CodeGen/SystemZ/int-usub-12.ll | 22 ++++++++++++ llvm/test/CodeGen/SystemZ/int-usub-13.ll | 2 ++ 3 files changed, 58 insertions(+) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 5db04a8bef824a..5e0b0594b0a421 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -4252,6 +4252,7 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, if (N->getValueType(0) == MVT::i128) { unsigned BaseOp = 0; unsigned FlagOp = 0; + bool IsBorrow = false; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::UADDO: @@ -4261,6 +4262,7 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, case ISD::USUBO: BaseOp = ISD::SUB; FlagOp = SystemZISD::VSCBI; + IsBorrow = true; break; } SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); @@ -4268,6 +4270,9 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, DAG.getValueType(MVT::i1)); Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); + if (IsBorrow) + Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), + Flag, DAG.getConstant(1, DL, Flag.getValueType())); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); } @@ -4340,6 +4345,7 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, if (VT == MVT::i128) { unsigned BaseOp = 0; unsigned FlagOp = 0; + bool IsBorrow = false; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::UADDO_CARRY: @@ -4349,14 +4355,21 @@ SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, case ISD::USUBO_CARRY: BaseOp = SystemZISD::VSBI; FlagOp = SystemZISD::VSBCBI; + IsBorrow = true; break; } + if (IsBorrow) + Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(), + Carry, DAG.getConstant(1, DL, Carry.getValueType())); Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, DAG.getValueType(MVT::i1)); Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); + if (IsBorrow) + Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), + Flag, DAG.getConstant(1, DL, Flag.getValueType())); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); } @@ -6611,6 +6624,27 @@ SDValue SystemZTargetLowering::combineZERO_EXTEND( return NewSelect; } } + // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size + // of the result is smaller than the size of X and all the truncated bits + // of X are already zero. + if (N0.getOpcode() == ISD::XOR && + N0.hasOneUse() && N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant) { + SDValue X = N0.getOperand(0).getOperand(0); + if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) { + KnownBits Known = DAG.computeKnownBits(X); + APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(), + N0.getValueSizeInBits(), + VT.getSizeInBits()); + if (TruncatedBits.isSubsetOf(Known.Zero)) { + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); + return DAG.getNode(ISD::XOR, SDLoc(N0), VT, + X, DAG.getConstant(Mask, SDLoc(N0), VT)); + } + } + } return SDValue(); } diff --git a/llvm/test/CodeGen/SystemZ/int-usub-12.ll b/llvm/test/CodeGen/SystemZ/int-usub-12.ll index c39a6da37048d3..147fbfd920a9dc 100644 --- a/llvm/test/CodeGen/SystemZ/int-usub-12.ll +++ b/llvm/test/CodeGen/SystemZ/int-usub-12.ll @@ -11,6 +11,7 @@ define zeroext i1 @f1(i128 %a, i128 %b, ptr %res) { ; CHECK-NEXT: vscbiq %v2, %v1, %v0 ; CHECK-NEXT: vlgvg %r2, %v2, 1 ; CHECK-NEXT: vsq %v0, %v1, %v0 +; CHECK-NEXT: xilf %r2, 1 ; CHECK-NEXT: vst %v0, 0(%r4), 3 ; CHECK-NEXT: br %r14 %t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %a, i128 %b) @@ -27,6 +28,7 @@ define zeroext i1 @f2(i128 %a, i128 %b) { ; CHECK-NEXT: vl %v1, 0(%r2), 3 ; CHECK-NEXT: vscbiq %v0, %v1, %v0 ; CHECK-NEXT: vlgvg %r2, %v0, 1 +; CHECK-NEXT: xilf %r2, 1 ; CHECK-NEXT: br %r14 %t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %a, i128 %b) %obit = extractvalue {i128, i1} %t, 1 @@ -46,5 +48,25 @@ define i128 @f3(i128 %a, i128 %b) { ret i128 %val } +define i128 @f4(i128 %a, i128 %b) { +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vscbiq %v2, %v1, %v0 +; CHECK-NEXT: vlgvf %r0, %v2, 3 +; CHECK-NEXT: vgbm %v2, 0 +; CHECK-NEXT: xilf %r0, 1 +; CHECK-NEXT: jl .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: vsq %v2, %v1, %v0 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: vst %v2, 0(%r2), 3 +; CHECK-NEXT: br %r14 + %val = call i128 @llvm.usub.sat.i128(i128 %a, i128 %b) + ret i128 %val +} + declare {i128, i1} @llvm.usub.with.overflow.i128(i128, i128) nounwind readnone +declare i128 @llvm.usub.sat.i128(i128, i128) nounwind readnone diff --git a/llvm/test/CodeGen/SystemZ/int-usub-13.ll b/llvm/test/CodeGen/SystemZ/int-usub-13.ll index 637e1a81de996f..794af3b73fbe2a 100644 --- a/llvm/test/CodeGen/SystemZ/int-usub-13.ll +++ b/llvm/test/CodeGen/SystemZ/int-usub-13.ll @@ -15,6 +15,7 @@ define zeroext i1 @f1(i256 %a, i256 %b, ptr %res) { ; CHECK-NEXT: vlgvg %r2, %v5, 1 ; CHECK-NEXT: vsbiq %v0, %v1, %v0, %v4 ; CHECK-NEXT: vsq %v1, %v3, %v2 +; CHECK-NEXT: xilf %r2, 1 ; CHECK-NEXT: vst %v1, 16(%r4), 3 ; CHECK-NEXT: vst %v0, 0(%r4), 3 ; CHECK-NEXT: br %r14 @@ -35,6 +36,7 @@ define zeroext i1 @f2(i256 %a, i256 %b) { ; CHECK-NEXT: vscbiq %v2, %v3, %v2 ; CHECK-NEXT: vsbcbiq %v0, %v1, %v0, %v2 ; CHECK-NEXT: vlgvg %r2, %v0, 1 +; CHECK-NEXT: xilf %r2, 1 ; CHECK-NEXT: br %r14 %t = call {i256, i1} @llvm.usub.with.overflow.i256(i256 %a, i256 %b) %obit = extractvalue {i256, i1} %t, 1