-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[AArch64] Add lowering for NEON saturating shift intrinsics #171485
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: None (Lukacma) ChangesThis patch extends on the work done in #161840 and adds lowering with bitcasts for saturating shift intrinsics. Patch is 23.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171485.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d1441a744eee8..815ed15ad4d1e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4561,7 +4561,8 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
}
static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ bool IsLastInt = false) {
SDLoc DL(Op);
auto getFloatVT = [](EVT VT) {
assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT");
@@ -4570,11 +4571,18 @@ static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
auto bitcastToFloat = [&](SDValue Val) {
return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
};
+
+ const unsigned NumOps = Op.getNumOperands();
+ const unsigned LastOpIdx = NumOps - 1;
SmallVector<SDValue, 2> NewOps;
- NewOps.reserve(Op.getNumOperands() - 1);
+ NewOps.reserve(NumOps - 1);
- for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
+ // Skip first operand as it is intrinsic ID.
+ for (unsigned I = 1, E = LastOpIdx; I < E; ++I)
NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
+ SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx)
+ : bitcastToFloat(Op.getOperand(LastOpIdx));
+ NewOps.push_back(LastOp);
EVT OrigVT = Op.getValueType();
SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps);
return DAG.getBitcast(OrigVT, OpNode);
@@ -6390,42 +6398,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getNode(AArch64ISD::VASHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::VASHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG, true);
case Intrinsic::aarch64_neon_uqshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::VLSHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::SRSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::SRSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG, true);
case Intrinsic::aarch64_neon_uqrshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::URSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
- return SDValue();
+ return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshl:
if (Op.getValueType().isVector())
return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4d2e740779961..1bdf37dc4b2c0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -10177,7 +10177,7 @@ multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
FPR32, FPR64, vecshiftR32, asm,
- [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
+ [(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn), vecshiftR32:$imm))]> {
let Inst{20-16} = imm{4-0};
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7ee094ad4ac87..819b84b48f7cf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1035,9 +1035,12 @@ def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>;
def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL",
SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>,
SDTCisFP<0>, SDTCisFP<1>]>>;
-
-//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
-
+def AArch64sqshrun: SDNode<"AArch64ISD::SQSHRUN", SDTFPTruncRoundOp>;
+def AArch64sqrshrun: SDNode<"AArch64ISD::SQRSHRUN", SDTFPTruncRoundOp>;
+def AArch64sqshrn: SDNode<"AArch64ISD::SQSHRN", SDTFPTruncRoundOp>;
+def AArch64uqshrn: SDNode<"AArch64ISD::UQSHRN", SDTFPTruncRoundOp>;
+def AArch64sqrshrn: SDNode<"AArch64ISD::SQRSHRN", SDTFPTruncRoundOp>;
+def AArch64uqrshrn: SDNode<"AArch64ISD::UQRSHRN", SDTFPTruncRoundOp>;
// Vector immediate ops
def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
@@ -8902,15 +8905,15 @@ def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>;
defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
- int_aarch64_neon_sqrshrn>;
+ AArch64sqrshrn>;
defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
- int_aarch64_neon_sqrshrun>;
+ AArch64sqrshrun>;
defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
- int_aarch64_neon_sqshrn>;
+ AArch64sqshrn>;
defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
- int_aarch64_neon_sqshrun>;
+ AArch64sqshrun>;
defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri", AArch64vsri>;
defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
@@ -8921,10 +8924,10 @@ defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vashr node:$MHS, node:$RHS))>>;
defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
- int_aarch64_neon_uqrshrn>;
+ AArch64uqrshrn>;
defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
- int_aarch64_neon_uqshrn>;
+ AArch64uqshrn>;
defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
index e8ae8a3e53c9b..9b530534d00f7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
@@ -3,7 +3,13 @@
; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for test_uqadd_s32
+; CHECK-GI: warning: Instruction selection used fallback path for test_sqshrn_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshrun_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshrn_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrn_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrun_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshrn_s32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64
@@ -113,6 +119,90 @@ entry:
ret i64 %res
}
+define void @test_sqshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqshrn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqshrn s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_sqshrun_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqshrun_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqshrun s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_uqshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_uqshrn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: uqshrn s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_sqrshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqrshrn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqrshrn s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_sqrshrun_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqrshrun_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: sqrshrun s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
+define void @test_uqrshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_uqrshrn_s32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs d0, s0
+; CHECK-NEXT: uqrshrn s0, d0, #1
+; CHECK-NEXT: str s0, [x0]
+; CHECK-NEXT: ret
+entry:
+ %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+ %res = tail call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %cvt, i32 1)
+ store i32 %res, ptr %dst, align 4
+ ret void
+}
+
define i32 @test_sqadd_s32(float noundef %a) {
; CHECK-LABEL: test_sqadd_s32:
; CHECK: // %bb.0: // %entry
@@ -227,4 +317,4 @@ define i64 @test_sqdmulls_scalar(float %A){
%cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
%prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %cvt, i32 %cvt)
ret i64 %prod
-}
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.s b/llvm/test/CodeGen/AArch64/arm64-int-neon.s
new file mode 100644
index 0000000000000..4599c60e82703
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.s
@@ -0,0 +1,325 @@
+ .file "arm64-int-neon.ll"
+ .text
+ .globl test_sqrshl_s32 // -- Begin function test_sqrshl_s32
+ .p2align 2
+ .type test_sqrshl_s32,@function
+test_sqrshl_s32: // @test_sqrshl_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ sqrshl s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end0:
+ .size test_sqrshl_s32, .Lfunc_end0-test_sqrshl_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqrshl_s64 // -- Begin function test_sqrshl_s64
+ .p2align 2
+ .type test_sqrshl_s64,@function
+test_sqrshl_s64: // @test_sqrshl_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqrshl d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end1:
+ .size test_sqrshl_s64, .Lfunc_end1-test_sqrshl_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_sqshl_s32 // -- Begin function test_sqshl_s32
+ .p2align 2
+ .type test_sqshl_s32,@function
+test_sqshl_s32: // @test_sqshl_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ sqshl s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end2:
+ .size test_sqshl_s32, .Lfunc_end2-test_sqshl_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqshl_s64 // -- Begin function test_sqshl_s64
+ .p2align 2
+ .type test_sqshl_s64,@function
+test_sqshl_s64: // @test_sqshl_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqshl d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end3:
+ .size test_sqshl_s64, .Lfunc_end3-test_sqshl_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_uqrshl_s32 // -- Begin function test_uqrshl_s32
+ .p2align 2
+ .type test_uqrshl_s32,@function
+test_uqrshl_s32: // @test_uqrshl_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ uqrshl s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end4:
+ .size test_uqrshl_s32, .Lfunc_end4-test_uqrshl_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_uqrshl_s64 // -- Begin function test_uqrshl_s64
+ .p2align 2
+ .type test_uqrshl_s64,@function
+test_uqrshl_s64: // @test_uqrshl_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ uqrshl d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end5:
+ .size test_uqrshl_s64, .Lfunc_end5-test_uqrshl_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_uqshl_s32 // -- Begin function test_uqshl_s32
+ .p2align 2
+ .type test_uqshl_s32,@function
+test_uqshl_s32: // @test_uqshl_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ uqshl s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end6:
+ .size test_uqshl_s32, .Lfunc_end6-test_uqshl_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_uqshl_s64 // -- Begin function test_uqshl_s64
+ .p2align 2
+ .type test_uqshl_s64,@function
+test_uqshl_s64: // @test_uqshl_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ uqshl d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end7:
+ .size test_uqshl_s64, .Lfunc_end7-test_uqshl_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_sqshrn_s32 // -- Begin function test_sqshrn_s32
+ .p2align 2
+ .type test_sqshrn_s32,@function
+test_sqshrn_s32: // @test_sqshrn_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqshrn s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end8:
+ .size test_sqshrn_s32, .Lfunc_end8-test_sqshrn_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqshrun_s32 // -- Begin function test_sqshrun_s32
+ .p2align 2
+ .type test_sqshrun_s32,@function
+test_sqshrun_s32: // @test_sqshrun_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqshrun s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end9:
+ .size test_sqshrun_s32, .Lfunc_end9-test_sqshrun_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_uqshrn_s32 // -- Begin function test_uqshrn_s32
+ .p2align 2
+ .type test_uqshrn_s32,@function
+test_uqshrn_s32: // @test_uqshrn_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ uqshrn s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end10:
+ .size test_uqshrn_s32, .Lfunc_end10-test_uqshrn_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqrshrn_s32 // -- Begin function test_sqrshrn_s32
+ .p2align 2
+ .type test_sqrshrn_s32,@function
+test_sqrshrn_s32: // @test_sqrshrn_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqrshrn s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end11:
+ .size test_sqrshrn_s32, .Lfunc_end11-test_sqrshrn_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqrshrun_s32 // -- Begin function test_sqrshrun_s32
+ .p2align 2
+ .type test_sqrshrun_s32,@function
+test_sqrshrun_s32: // @test_sqrshrun_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqrshrun s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end12:
+ .size test_sqrshrun_s32, .Lfunc_end12-test_sqrshrun_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_uqrshrn_s32 // -- Begin function test_uqrshrn_s32
+ .p2align 2
+ .type test_uqrshrn_s32,@function
+test_uqrshrn_s32: // @test_uqrshrn_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ uqrshrn s0, d0, #1
+ str s0, [x0]
+ ret
+.Lfunc_end13:
+ .size test_uqrshrn_s32, .Lfunc_end13-test_uqrshrn_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqadd_s32 // -- Begin function test_sqadd_s32
+ .p2align 2
+ .type test_sqadd_s32,@function
+test_sqadd_s32: // @test_sqadd_s32
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs s0, s0
+ sqadd s0, s0, s0
+ fmov w0, s0
+ ret
+.Lfunc_end14:
+ .size test_sqadd_s32, .Lfunc_end14-test_sqadd_s32
+ .cfi_endproc
+ // -- End function
+ .globl test_sqadd_s64 // -- Begin function test_sqadd_s64
+ .p2align 2
+ .type test_sqadd_s64,@function
+test_sqadd_s64: // @test_sqadd_s64
+ .cfi_startproc
+// %bb.0: // %entry
+ fcvtzs d0, s0
+ sqadd d0, d0, d0
+ fmov x0, d0
+ ret
+.Lfunc_end15:
+ .size test_sqadd_s64, .Lfunc_end15-test_sqadd_s64
+ .cfi_endproc
+ // -- End function
+ .globl test_sqsub_s32 // -- B...
[truncated]
|
🪟 Windows x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/AArch64/arm64-int-neon.sIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/AArch64/arm64-int-neon.sIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
| const unsigned LastOpIdx = NumOps - 1; | ||
| SmallVector<SDValue, 2> NewOps; | ||
| NewOps.reserve(Op.getNumOperands() - 1); | ||
| NewOps.reserve(NumOps - 1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| NewOps.reserve(NumOps - 1); | |
| NewOps.reserve(LastOpIdx); |
| for (unsigned I = 1, E = LastOpIdx; I < E; ++I) | ||
| NewOps.push_back(bitcastToFloat(Op.getOperand(I))); | ||
| SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx) | ||
| : bitcastToFloat(Op.getOperand(LastOpIdx)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this instead check whether the last operand needs a bitcast based on the type, without passing the extra IsLastInt parameter?
i.e.
| : bitcastToFloat(Op.getOperand(LastOpIdx)); | |
| SDValue LastOp = Op.getOperand(LastOpIdx); | |
| LastOp = isa<ConstantSDNode>(LastOp) ? LastOp : bitcastToFloat(LastOp); |
| defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>; | ||
| defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", | ||
| int_aarch64_neon_sqrshrn>; | ||
| AArch64sqrshrn>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you make these PatFrags that match the node or the intrinsics? That should allow GISel to keep working and not take a step backwards.
This patch extends on the work done in #161840 and adds lowering with bitcasts for saturating shift intrinsics.