[AArch64] Add lowering for NEON saturating shift intrinsics #171485

Lukacma · 2025-12-09T18:11:16Z

This patch extends on the work done in #161840 and adds lowering with bitcasts for saturating shift intrinsics.

llvmbot · 2025-12-09T18:11:50Z

@llvm/pr-subscribers-backend-aarch64

Author: None (Lukacma)

Changes

This patch extends on the work done in #161840 and adds lowering with bitcasts for saturating shift intrinsics.

Patch is 23.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171485.diff

5 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+17-9)
(modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+1-1)
(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+12-9)
(modified) llvm/test/CodeGen/AArch64/arm64-int-neon.ll (+92-2)
(added) llvm/test/CodeGen/AArch64/arm64-int-neon.s (+325)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d1441a744eee8..815ed15ad4d1e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4561,7 +4561,8 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
 }
 
 static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
-                                     SelectionDAG &DAG) {
+                                     SelectionDAG &DAG,
+                                     bool IsLastInt = false) {
   SDLoc DL(Op);
   auto getFloatVT = [](EVT VT) {
     assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT");
@@ -4570,11 +4571,18 @@ static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
   auto bitcastToFloat = [&](SDValue Val) {
     return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
   };
+
+  const unsigned NumOps = Op.getNumOperands();
+  const unsigned LastOpIdx = NumOps - 1;
   SmallVector<SDValue, 2> NewOps;
-  NewOps.reserve(Op.getNumOperands() - 1);
+  NewOps.reserve(NumOps - 1);
 
-  for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
+  // Skip first operand as it is intrinsic ID.
+  for (unsigned I = 1, E = LastOpIdx; I < E; ++I)
     NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
+  SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx)
+                             : bitcastToFloat(Op.getOperand(LastOpIdx));
+  NewOps.push_back(LastOp);
   EVT OrigVT = Op.getValueType();
   SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps);
   return DAG.getBitcast(OrigVT, OpNode);
@@ -6390,42 +6398,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                          DAG.getNode(AArch64ISD::VASHR, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG, true);
   case Intrinsic::aarch64_neon_sqshrun:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::VASHR, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG, true);
   case Intrinsic::aarch64_neon_uqshrn:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::VLSHR, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG, true);
   case Intrinsic::aarch64_neon_sqrshrn:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::SRSHR_I, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG, true);
   case Intrinsic::aarch64_neon_sqrshrun:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::SRSHR_I, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG, true);
   case Intrinsic::aarch64_neon_uqrshrn:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::URSHR_I, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG, true);
   case Intrinsic::aarch64_neon_sqrshl:
     if (Op.getValueType().isVector())
       return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4d2e740779961..1bdf37dc4b2c0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -10177,7 +10177,7 @@ multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
 
   def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
                               FPR32, FPR64, vecshiftR32, asm,
-    [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
+    [(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn), vecshiftR32:$imm))]> {
     let Inst{20-16} = imm{4-0};
   }
 }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7ee094ad4ac87..819b84b48f7cf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1035,9 +1035,12 @@ def AArch64uqsub:   SDNode<"AArch64ISD::UQSUB",   SDTFPBinOp>;
 def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL", 
                            SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>, 
                            SDTCisFP<0>, SDTCisFP<1>]>>;
-
-//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
-
+def AArch64sqshrun: SDNode<"AArch64ISD::SQSHRUN", SDTFPTruncRoundOp>;
+def AArch64sqrshrun: SDNode<"AArch64ISD::SQRSHRUN", SDTFPTruncRoundOp>;
+def AArch64sqshrn: SDNode<"AArch64ISD::SQSHRN", SDTFPTruncRoundOp>;
+def AArch64uqshrn: SDNode<"AArch64ISD::UQSHRN", SDTFPTruncRoundOp>;
+def AArch64sqrshrn: SDNode<"AArch64ISD::SQRSHRN", SDTFPTruncRoundOp>;
+def AArch64uqrshrn: SDNode<"AArch64ISD::UQRSHRN", SDTFPTruncRoundOp>;
 // Vector immediate ops
 def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
 def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
@@ -8902,15 +8905,15 @@ def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
 defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
 defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>;
 defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
-                                     int_aarch64_neon_sqrshrn>;
+                                     AArch64sqrshrn>;
 defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
-                                     int_aarch64_neon_sqrshrun>;
+                                     AArch64sqrshrun>;
 defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
 defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
 defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
-                                     int_aarch64_neon_sqshrn>;
+                                     AArch64sqshrn>;
 defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
-                                     int_aarch64_neon_sqshrun>;
+                                     AArch64sqshrun>;
 defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri", AArch64vsri>;
 defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
 defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
@@ -8921,10 +8924,10 @@ defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
     TriOpFrag<(add_and_or_is_add node:$LHS,
                    (AArch64vashr node:$MHS, node:$RHS))>>;
 defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
-                                     int_aarch64_neon_uqrshrn>;
+                                     AArch64uqrshrn>;
 defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
 defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
-                                     int_aarch64_neon_uqshrn>;
+                                     AArch64uqshrn>;
 defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
 defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
     TriOpFrag<(add node:$LHS,
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
index e8ae8a3e53c9b..9b530534d00f7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
@@ -3,7 +3,13 @@
 ; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 
-; CHECK-GI:  warning: Instruction selection used fallback path for test_uqadd_s32
+; CHECK-GI:  warning: Instruction selection used fallback path for test_sqshrn_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_sqshrun_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqshrn_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_sqrshrn_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_sqrshrun_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqrshrn_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqadd_s32
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqadd_s64
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqsub_s32
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqsub_s64
@@ -113,6 +119,90 @@ entry:
   ret i64 %res
 }
 
+define void @test_sqshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqshrn_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    sqshrn s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_sqshrun_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqshrun_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    sqshrun s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_uqshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_uqshrn_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    uqshrn s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_sqrshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqrshrn_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    sqrshrn s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_sqrshrun_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqrshrun_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    sqrshrun s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_uqrshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_uqrshrn_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    uqrshrn s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
 define i32 @test_sqadd_s32(float noundef %a) {
 ; CHECK-LABEL: test_sqadd_s32:
 ; CHECK:       // %bb.0: // %entry
@@ -227,4 +317,4 @@ define i64 @test_sqdmulls_scalar(float %A){
   %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
   %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32  %cvt, i32  %cvt)
   ret i64 %prod
-}
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.s b/llvm/test/CodeGen/AArch64/arm64-int-neon.s
new file mode 100644
index 0000000000000..4599c60e82703
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.s
@@ -0,0 +1,325 @@
+	.file	"arm64-int-neon.ll"
+	.text
+	.globl	test_sqrshl_s32                 // -- Begin function test_sqrshl_s32
+	.p2align	2
+	.type	test_sqrshl_s32,@function
+test_sqrshl_s32:                        // @test_sqrshl_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	sqrshl	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end0:
+	.size	test_sqrshl_s32, .Lfunc_end0-test_sqrshl_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqrshl_s64                 // -- Begin function test_sqrshl_s64
+	.p2align	2
+	.type	test_sqrshl_s64,@function
+test_sqrshl_s64:                        // @test_sqrshl_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqrshl	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end1:
+	.size	test_sqrshl_s64, .Lfunc_end1-test_sqrshl_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqshl_s32                  // -- Begin function test_sqshl_s32
+	.p2align	2
+	.type	test_sqshl_s32,@function
+test_sqshl_s32:                         // @test_sqshl_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	sqshl	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end2:
+	.size	test_sqshl_s32, .Lfunc_end2-test_sqshl_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqshl_s64                  // -- Begin function test_sqshl_s64
+	.p2align	2
+	.type	test_sqshl_s64,@function
+test_sqshl_s64:                         // @test_sqshl_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqshl	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end3:
+	.size	test_sqshl_s64, .Lfunc_end3-test_sqshl_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqrshl_s32                 // -- Begin function test_uqrshl_s32
+	.p2align	2
+	.type	test_uqrshl_s32,@function
+test_uqrshl_s32:                        // @test_uqrshl_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	uqrshl	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end4:
+	.size	test_uqrshl_s32, .Lfunc_end4-test_uqrshl_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqrshl_s64                 // -- Begin function test_uqrshl_s64
+	.p2align	2
+	.type	test_uqrshl_s64,@function
+test_uqrshl_s64:                        // @test_uqrshl_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	uqrshl	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end5:
+	.size	test_uqrshl_s64, .Lfunc_end5-test_uqrshl_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqshl_s32                  // -- Begin function test_uqshl_s32
+	.p2align	2
+	.type	test_uqshl_s32,@function
+test_uqshl_s32:                         // @test_uqshl_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	uqshl	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end6:
+	.size	test_uqshl_s32, .Lfunc_end6-test_uqshl_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqshl_s64                  // -- Begin function test_uqshl_s64
+	.p2align	2
+	.type	test_uqshl_s64,@function
+test_uqshl_s64:                         // @test_uqshl_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	uqshl	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end7:
+	.size	test_uqshl_s64, .Lfunc_end7-test_uqshl_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqshrn_s32                 // -- Begin function test_sqshrn_s32
+	.p2align	2
+	.type	test_sqshrn_s32,@function
+test_sqshrn_s32:                        // @test_sqshrn_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqshrn	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end8:
+	.size	test_sqshrn_s32, .Lfunc_end8-test_sqshrn_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqshrun_s32                // -- Begin function test_sqshrun_s32
+	.p2align	2
+	.type	test_sqshrun_s32,@function
+test_sqshrun_s32:                       // @test_sqshrun_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqshrun	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end9:
+	.size	test_sqshrun_s32, .Lfunc_end9-test_sqshrun_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqshrn_s32                 // -- Begin function test_uqshrn_s32
+	.p2align	2
+	.type	test_uqshrn_s32,@function
+test_uqshrn_s32:                        // @test_uqshrn_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	uqshrn	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end10:
+	.size	test_uqshrn_s32, .Lfunc_end10-test_uqshrn_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqrshrn_s32                // -- Begin function test_sqrshrn_s32
+	.p2align	2
+	.type	test_sqrshrn_s32,@function
+test_sqrshrn_s32:                       // @test_sqrshrn_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqrshrn	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end11:
+	.size	test_sqrshrn_s32, .Lfunc_end11-test_sqrshrn_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqrshrun_s32               // -- Begin function test_sqrshrun_s32
+	.p2align	2
+	.type	test_sqrshrun_s32,@function
+test_sqrshrun_s32:                      // @test_sqrshrun_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqrshrun	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end12:
+	.size	test_sqrshrun_s32, .Lfunc_end12-test_sqrshrun_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqrshrn_s32                // -- Begin function test_uqrshrn_s32
+	.p2align	2
+	.type	test_uqrshrn_s32,@function
+test_uqrshrn_s32:                       // @test_uqrshrn_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	uqrshrn	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end13:
+	.size	test_uqrshrn_s32, .Lfunc_end13-test_uqrshrn_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqadd_s32                  // -- Begin function test_sqadd_s32
+	.p2align	2
+	.type	test_sqadd_s32,@function
+test_sqadd_s32:                         // @test_sqadd_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	sqadd	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end14:
+	.size	test_sqadd_s32, .Lfunc_end14-test_sqadd_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqadd_s64                  // -- Begin function test_sqadd_s64
+	.p2align	2
+	.type	test_sqadd_s64,@function
+test_sqadd_s64:                         // @test_sqadd_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqadd	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end15:
+	.size	test_sqadd_s64, .Lfunc_end15-test_sqadd_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqsub_s32                  // -- B...
[truncated]

github-actions · 2025-12-09T18:51:42Z

🪟 Windows x64 Test Results

128485 tests passed
2794 tests skipped
1 test failed

Failed Tests

(click on a test name to see its output)

LLVM

LLVM.CodeGen/AArch64/arm64-int-neon.s

Test has no 'RUN:' line

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

github-actions · 2025-12-09T18:51:43Z

🐧 Linux x64 Test Results

166960 tests passed
2921 tests skipped
1 test failed

Failed Tests

(click on a test name to see its output)

LLVM

LLVM.CodeGen/AArch64/arm64-int-neon.s

Test has no 'RUN:' line

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

kmclaughlin-arm · 2025-12-10T09:53:48Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+  const unsigned LastOpIdx = NumOps - 1;
  SmallVector<SDValue, 2> NewOps;
-  NewOps.reserve(Op.getNumOperands() - 1);
+  NewOps.reserve(NumOps - 1);


Suggested change

NewOps.reserve(NumOps - 1);

NewOps.reserve(LastOpIdx);

kmclaughlin-arm · 2025-12-10T10:05:15Z

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+  for (unsigned I = 1, E = LastOpIdx; I < E; ++I)
    NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
+  SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx)
+                             : bitcastToFloat(Op.getOperand(LastOpIdx));


Could this instead check whether the last operand needs a bitcast based on the type, without passing the extra IsLastInt parameter?

i.e.

Suggested change

: bitcastToFloat(Op.getOperand(LastOpIdx));

SDValue LastOp = Op.getOperand(LastOpIdx);

LastOp = isa<ConstantSDNode>(LastOp) ? LastOp : bitcastToFloat(LastOp);

davemgreen · 2025-12-10T18:33:29Z

llvm/lib/Target/AArch64/AArch64InstrInfo.td

 defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>;
 defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
-                                     int_aarch64_neon_sqrshrn>;
+                                     AArch64sqrshrn>;


Can you make these PatFrags that match the node or the intrinsics? That should allow GISel to keep working and not take a step backwards.

[AArch64] Add lowering for NEON saturating shift intrinsics

61a974f

Lukacma requested review from CarolineConcatto and kmclaughlin-arm December 9, 2025 18:11

llvmbot added the backend:AArch64 label Dec 9, 2025

kmclaughlin-arm reviewed Dec 10, 2025

View reviewed changes

davemgreen reviewed Dec 10, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Add lowering for NEON saturating shift intrinsics #171485

[AArch64] Add lowering for NEON saturating shift intrinsics #171485

Lukacma commented Dec 9, 2025

Uh oh!

llvmbot commented Dec 9, 2025

Uh oh!

github-actions bot commented Dec 9, 2025

Uh oh!

github-actions bot commented Dec 9, 2025

Uh oh!

kmclaughlin-arm Dec 10, 2025

Uh oh!

kmclaughlin-arm Dec 10, 2025

Uh oh!

davemgreen Dec 10, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

	: bitcastToFloat(Op.getOperand(LastOpIdx));
	SDValue LastOp = Op.getOperand(LastOpIdx);
	LastOp = isa<ConstantSDNode>(LastOp) ? LastOp : bitcastToFloat(LastOp);

[AArch64] Add lowering for NEON saturating shift intrinsics #171485

Are you sure you want to change the base?

[AArch64] Add lowering for NEON saturating shift intrinsics #171485

Conversation

Lukacma commented Dec 9, 2025

Uh oh!

llvmbot commented Dec 9, 2025

Uh oh!

github-actions bot commented Dec 9, 2025

🪟 Windows x64 Test Results

Failed Tests

LLVM

Uh oh!

github-actions bot commented Dec 9, 2025

🐧 Linux x64 Test Results

Failed Tests

LLVM

Uh oh!

kmclaughlin-arm Dec 10, 2025

Choose a reason for hiding this comment

Uh oh!

kmclaughlin-arm Dec 10, 2025

Choose a reason for hiding this comment

Uh oh!

davemgreen Dec 10, 2025

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants