Skip to content

Conversation

@Lukacma
Copy link
Contributor

@Lukacma Lukacma commented Dec 9, 2025

This patch extends on the work done in #161840 and adds lowering with bitcasts for saturating shift intrinsics.

@llvmbot
Copy link
Member

llvmbot commented Dec 9, 2025

@llvm/pr-subscribers-backend-aarch64

Author: None (Lukacma)

Changes

This patch extends on the work done in #161840 and adds lowering with bitcasts for saturating shift intrinsics.


Patch is 23.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171485.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+17-9)
  • (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+1-1)
  • (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+12-9)
  • (modified) llvm/test/CodeGen/AArch64/arm64-int-neon.ll (+92-2)
  • (added) llvm/test/CodeGen/AArch64/arm64-int-neon.s (+325)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d1441a744eee8..815ed15ad4d1e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4561,7 +4561,8 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
 }
 
 static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
-                                     SelectionDAG &DAG) {
+                                     SelectionDAG &DAG,
+                                     bool IsLastInt = false) {
   SDLoc DL(Op);
   auto getFloatVT = [](EVT VT) {
     assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT");
@@ -4570,11 +4571,18 @@ static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
   auto bitcastToFloat = [&](SDValue Val) {
     return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
   };
+
+  const unsigned NumOps = Op.getNumOperands();
+  const unsigned LastOpIdx = NumOps - 1;
   SmallVector<SDValue, 2> NewOps;
-  NewOps.reserve(Op.getNumOperands() - 1);
+  NewOps.reserve(NumOps - 1);
 
-  for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
+  // Skip first operand as it is intrinsic ID.
+  for (unsigned I = 1, E = LastOpIdx; I < E; ++I)
     NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
+  SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx)
+                             : bitcastToFloat(Op.getOperand(LastOpIdx));
+  NewOps.push_back(LastOp);
   EVT OrigVT = Op.getValueType();
   SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps);
   return DAG.getBitcast(OrigVT, OpNode);
@@ -6390,42 +6398,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                          DAG.getNode(AArch64ISD::VASHR, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG, true);
   case Intrinsic::aarch64_neon_sqshrun:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::VASHR, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG, true);
   case Intrinsic::aarch64_neon_uqshrn:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::VLSHR, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG, true);
   case Intrinsic::aarch64_neon_sqrshrn:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::SRSHR_I, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG, true);
   case Intrinsic::aarch64_neon_sqrshrun:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::SRSHR_I, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG, true);
   case Intrinsic::aarch64_neon_uqrshrn:
     if (Op.getValueType().isVector())
       return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
                          DAG.getNode(AArch64ISD::URSHR_I, DL,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
-    return SDValue();
+    return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG, true);
   case Intrinsic::aarch64_neon_sqrshl:
     if (Op.getValueType().isVector())
       return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4d2e740779961..1bdf37dc4b2c0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -10177,7 +10177,7 @@ multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
 
   def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
                               FPR32, FPR64, vecshiftR32, asm,
-    [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
+    [(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn), vecshiftR32:$imm))]> {
     let Inst{20-16} = imm{4-0};
   }
 }
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7ee094ad4ac87..819b84b48f7cf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1035,9 +1035,12 @@ def AArch64uqsub:   SDNode<"AArch64ISD::UQSUB",   SDTFPBinOp>;
 def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL", 
                            SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>, 
                            SDTCisFP<0>, SDTCisFP<1>]>>;
-
-//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
-
+def AArch64sqshrun: SDNode<"AArch64ISD::SQSHRUN", SDTFPTruncRoundOp>;
+def AArch64sqrshrun: SDNode<"AArch64ISD::SQRSHRUN", SDTFPTruncRoundOp>;
+def AArch64sqshrn: SDNode<"AArch64ISD::SQSHRN", SDTFPTruncRoundOp>;
+def AArch64uqshrn: SDNode<"AArch64ISD::UQSHRN", SDTFPTruncRoundOp>;
+def AArch64sqrshrn: SDNode<"AArch64ISD::SQRSHRN", SDTFPTruncRoundOp>;
+def AArch64uqrshrn: SDNode<"AArch64ISD::UQRSHRN", SDTFPTruncRoundOp>;
 // Vector immediate ops
 def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
 def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
@@ -8902,15 +8905,15 @@ def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
 defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
 defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>;
 defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
-                                     int_aarch64_neon_sqrshrn>;
+                                     AArch64sqrshrn>;
 defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
-                                     int_aarch64_neon_sqrshrun>;
+                                     AArch64sqrshrun>;
 defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
 defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
 defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
-                                     int_aarch64_neon_sqshrn>;
+                                     AArch64sqshrn>;
 defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
-                                     int_aarch64_neon_sqshrun>;
+                                     AArch64sqshrun>;
 defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri", AArch64vsri>;
 defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
 defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
@@ -8921,10 +8924,10 @@ defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
     TriOpFrag<(add_and_or_is_add node:$LHS,
                    (AArch64vashr node:$MHS, node:$RHS))>>;
 defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
-                                     int_aarch64_neon_uqrshrn>;
+                                     AArch64uqrshrn>;
 defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
 defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
-                                     int_aarch64_neon_uqshrn>;
+                                     AArch64uqshrn>;
 defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
 defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
     TriOpFrag<(add node:$LHS,
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
index e8ae8a3e53c9b..9b530534d00f7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll
@@ -3,7 +3,13 @@
 ; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 
-; CHECK-GI:  warning: Instruction selection used fallback path for test_uqadd_s32
+; CHECK-GI:  warning: Instruction selection used fallback path for test_sqshrn_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_sqshrun_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqshrn_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_sqrshrn_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_sqrshrun_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqrshrn_s32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqadd_s32
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqadd_s64
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqsub_s32
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for test_uqsub_s64
@@ -113,6 +119,90 @@ entry:
   ret i64 %res
 }
 
+define void @test_sqshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqshrn_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    sqshrn s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_sqshrun_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqshrun_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    sqshrun s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_uqshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_uqshrn_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    uqshrn s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_sqrshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqrshrn_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    sqrshrn s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_sqrshrun_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_sqrshrun_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    sqrshrun s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
+define void @test_uqrshrn_s32(float noundef %a, ptr %dst) {
+; CHECK-LABEL: test_uqrshrn_s32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs d0, s0
+; CHECK-NEXT:    uqrshrn s0, d0, #1
+; CHECK-NEXT:    str s0, [x0]
+; CHECK-NEXT:    ret
+entry:
+  %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
+  %res = tail call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %cvt, i32 1)
+  store i32 %res, ptr %dst, align 4
+  ret void
+}
+
 define i32 @test_sqadd_s32(float noundef %a) {
 ; CHECK-LABEL: test_sqadd_s32:
 ; CHECK:       // %bb.0: // %entry
@@ -227,4 +317,4 @@ define i64 @test_sqdmulls_scalar(float %A){
   %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
   %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32  %cvt, i32  %cvt)
   ret i64 %prod
-}
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.s b/llvm/test/CodeGen/AArch64/arm64-int-neon.s
new file mode 100644
index 0000000000000..4599c60e82703
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.s
@@ -0,0 +1,325 @@
+	.file	"arm64-int-neon.ll"
+	.text
+	.globl	test_sqrshl_s32                 // -- Begin function test_sqrshl_s32
+	.p2align	2
+	.type	test_sqrshl_s32,@function
+test_sqrshl_s32:                        // @test_sqrshl_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	sqrshl	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end0:
+	.size	test_sqrshl_s32, .Lfunc_end0-test_sqrshl_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqrshl_s64                 // -- Begin function test_sqrshl_s64
+	.p2align	2
+	.type	test_sqrshl_s64,@function
+test_sqrshl_s64:                        // @test_sqrshl_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqrshl	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end1:
+	.size	test_sqrshl_s64, .Lfunc_end1-test_sqrshl_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqshl_s32                  // -- Begin function test_sqshl_s32
+	.p2align	2
+	.type	test_sqshl_s32,@function
+test_sqshl_s32:                         // @test_sqshl_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	sqshl	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end2:
+	.size	test_sqshl_s32, .Lfunc_end2-test_sqshl_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqshl_s64                  // -- Begin function test_sqshl_s64
+	.p2align	2
+	.type	test_sqshl_s64,@function
+test_sqshl_s64:                         // @test_sqshl_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqshl	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end3:
+	.size	test_sqshl_s64, .Lfunc_end3-test_sqshl_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqrshl_s32                 // -- Begin function test_uqrshl_s32
+	.p2align	2
+	.type	test_uqrshl_s32,@function
+test_uqrshl_s32:                        // @test_uqrshl_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	uqrshl	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end4:
+	.size	test_uqrshl_s32, .Lfunc_end4-test_uqrshl_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqrshl_s64                 // -- Begin function test_uqrshl_s64
+	.p2align	2
+	.type	test_uqrshl_s64,@function
+test_uqrshl_s64:                        // @test_uqrshl_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	uqrshl	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end5:
+	.size	test_uqrshl_s64, .Lfunc_end5-test_uqrshl_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqshl_s32                  // -- Begin function test_uqshl_s32
+	.p2align	2
+	.type	test_uqshl_s32,@function
+test_uqshl_s32:                         // @test_uqshl_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	uqshl	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end6:
+	.size	test_uqshl_s32, .Lfunc_end6-test_uqshl_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqshl_s64                  // -- Begin function test_uqshl_s64
+	.p2align	2
+	.type	test_uqshl_s64,@function
+test_uqshl_s64:                         // @test_uqshl_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	uqshl	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end7:
+	.size	test_uqshl_s64, .Lfunc_end7-test_uqshl_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqshrn_s32                 // -- Begin function test_sqshrn_s32
+	.p2align	2
+	.type	test_sqshrn_s32,@function
+test_sqshrn_s32:                        // @test_sqshrn_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqshrn	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end8:
+	.size	test_sqshrn_s32, .Lfunc_end8-test_sqshrn_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqshrun_s32                // -- Begin function test_sqshrun_s32
+	.p2align	2
+	.type	test_sqshrun_s32,@function
+test_sqshrun_s32:                       // @test_sqshrun_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqshrun	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end9:
+	.size	test_sqshrun_s32, .Lfunc_end9-test_sqshrun_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqshrn_s32                 // -- Begin function test_uqshrn_s32
+	.p2align	2
+	.type	test_uqshrn_s32,@function
+test_uqshrn_s32:                        // @test_uqshrn_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	uqshrn	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end10:
+	.size	test_uqshrn_s32, .Lfunc_end10-test_uqshrn_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqrshrn_s32                // -- Begin function test_sqrshrn_s32
+	.p2align	2
+	.type	test_sqrshrn_s32,@function
+test_sqrshrn_s32:                       // @test_sqrshrn_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqrshrn	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end11:
+	.size	test_sqrshrn_s32, .Lfunc_end11-test_sqrshrn_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqrshrun_s32               // -- Begin function test_sqrshrun_s32
+	.p2align	2
+	.type	test_sqrshrun_s32,@function
+test_sqrshrun_s32:                      // @test_sqrshrun_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqrshrun	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end12:
+	.size	test_sqrshrun_s32, .Lfunc_end12-test_sqrshrun_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_uqrshrn_s32                // -- Begin function test_uqrshrn_s32
+	.p2align	2
+	.type	test_uqrshrn_s32,@function
+test_uqrshrn_s32:                       // @test_uqrshrn_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	uqrshrn	s0, d0, #1
+	str	s0, [x0]
+	ret
+.Lfunc_end13:
+	.size	test_uqrshrn_s32, .Lfunc_end13-test_uqrshrn_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqadd_s32                  // -- Begin function test_sqadd_s32
+	.p2align	2
+	.type	test_sqadd_s32,@function
+test_sqadd_s32:                         // @test_sqadd_s32
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	s0, s0
+	sqadd	s0, s0, s0
+	fmov	w0, s0
+	ret
+.Lfunc_end14:
+	.size	test_sqadd_s32, .Lfunc_end14-test_sqadd_s32
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqadd_s64                  // -- Begin function test_sqadd_s64
+	.p2align	2
+	.type	test_sqadd_s64,@function
+test_sqadd_s64:                         // @test_sqadd_s64
+	.cfi_startproc
+// %bb.0:                               // %entry
+	fcvtzs	d0, s0
+	sqadd	d0, d0, d0
+	fmov	x0, d0
+	ret
+.Lfunc_end15:
+	.size	test_sqadd_s64, .Lfunc_end15-test_sqadd_s64
+	.cfi_endproc
+                                        // -- End function
+	.globl	test_sqsub_s32                  // -- B...
[truncated]

@github-actions
Copy link

github-actions bot commented Dec 9, 2025

🪟 Windows x64 Test Results

  • 128485 tests passed
  • 2794 tests skipped
  • 1 test failed

Failed Tests

(click on a test name to see its output)

LLVM

LLVM.CodeGen/AArch64/arm64-int-neon.s
Test has no 'RUN:' line

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

@github-actions
Copy link

github-actions bot commented Dec 9, 2025

🐧 Linux x64 Test Results

  • 166960 tests passed
  • 2921 tests skipped
  • 1 test failed

Failed Tests

(click on a test name to see its output)

LLVM

LLVM.CodeGen/AArch64/arm64-int-neon.s
Test has no 'RUN:' line

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

const unsigned LastOpIdx = NumOps - 1;
SmallVector<SDValue, 2> NewOps;
NewOps.reserve(Op.getNumOperands() - 1);
NewOps.reserve(NumOps - 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
NewOps.reserve(NumOps - 1);
NewOps.reserve(LastOpIdx);

for (unsigned I = 1, E = LastOpIdx; I < E; ++I)
NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx)
: bitcastToFloat(Op.getOperand(LastOpIdx));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this instead check whether the last operand needs a bitcast based on the type, without passing the extra IsLastInt parameter?

i.e.

Suggested change
: bitcastToFloat(Op.getOperand(LastOpIdx));
SDValue LastOp = Op.getOperand(LastOpIdx);
LastOp = isa<ConstantSDNode>(LastOp) ? LastOp : bitcastToFloat(LastOp);

defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>;
defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
int_aarch64_neon_sqrshrn>;
AArch64sqrshrn>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you make these PatFrags that match the node or the intrinsics? That should allow GISel to keep working and not take a step backwards.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants