From 61a974f805530b4d3a542a753415bebbdf5b39bd Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 9 Dec 2025 18:09:21 +0000 Subject: [PATCH 1/4] [AArch64] Add lowering for NEON saturating shift intrinsics --- .../Target/AArch64/AArch64ISelLowering.cpp | 26 +- .../lib/Target/AArch64/AArch64InstrFormats.td | 2 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 21 +- llvm/test/CodeGen/AArch64/arm64-int-neon.ll | 94 ++++- llvm/test/CodeGen/AArch64/arm64-int-neon.s | 325 ++++++++++++++++++ 5 files changed, 447 insertions(+), 21 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-int-neon.s diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d1441a744eee8..815ed15ad4d1e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4561,7 +4561,8 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG, } static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, - SelectionDAG &DAG) { + SelectionDAG &DAG, + bool IsLastInt = false) { SDLoc DL(Op); auto getFloatVT = [](EVT VT) { assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT"); @@ -4570,11 +4571,18 @@ static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, auto bitcastToFloat = [&](SDValue Val) { return DAG.getBitcast(getFloatVT(Val.getValueType()), Val); }; + + const unsigned NumOps = Op.getNumOperands(); + const unsigned LastOpIdx = NumOps - 1; SmallVector NewOps; - NewOps.reserve(Op.getNumOperands() - 1); + NewOps.reserve(NumOps - 1); - for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I) + // Skip first operand as it is intrinsic ID. + for (unsigned I = 1, E = LastOpIdx; I < E; ++I) NewOps.push_back(bitcastToFloat(Op.getOperand(I))); + SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx) + : bitcastToFloat(Op.getOperand(LastOpIdx)); + NewOps.push_back(LastOp); EVT OrigVT = Op.getValueType(); SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps); return DAG.getBitcast(OrigVT, OpNode); @@ -6390,42 +6398,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getNode(AArch64ISD::VASHR, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG, true); case Intrinsic::aarch64_neon_sqshrun: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(), DAG.getNode(AArch64ISD::VASHR, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG, true); case Intrinsic::aarch64_neon_uqshrn: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(), DAG.getNode(AArch64ISD::VLSHR, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG, true); case Intrinsic::aarch64_neon_sqrshrn: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(), DAG.getNode(AArch64ISD::SRSHR_I, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG, true); case Intrinsic::aarch64_neon_sqrshrun: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(), DAG.getNode(AArch64ISD::SRSHR_I, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG, true); case Intrinsic::aarch64_neon_uqrshrn: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(), DAG.getNode(AArch64ISD::URSHR_I, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return SDValue(); + return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG, true); case Intrinsic::aarch64_neon_sqrshl: if (Op.getValueType().isVector()) return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 4d2e740779961..1bdf37dc4b2c0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -10177,7 +10177,7 @@ multiclass SIMDScalarRShiftBHS opc, string asm, def s : BaseSIMDScalarShift { + [(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn), vecshiftR32:$imm))]> { let Inst{20-16} = imm{4-0}; } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 7ee094ad4ac87..819b84b48f7cf 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1035,9 +1035,12 @@ def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>; def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL", SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>, SDTCisFP<0>, SDTCisFP<1>]>>; - -//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>; - +def AArch64sqshrun: SDNode<"AArch64ISD::SQSHRUN", SDTFPTruncRoundOp>; +def AArch64sqrshrun: SDNode<"AArch64ISD::SQRSHRUN", SDTFPTruncRoundOp>; +def AArch64sqshrn: SDNode<"AArch64ISD::SQSHRN", SDTFPTruncRoundOp>; +def AArch64uqshrn: SDNode<"AArch64ISD::UQSHRN", SDTFPTruncRoundOp>; +def AArch64sqrshrn: SDNode<"AArch64ISD::SQRSHRN", SDTFPTruncRoundOp>; +def AArch64uqrshrn: SDNode<"AArch64ISD::UQRSHRN", SDTFPTruncRoundOp>; // Vector immediate ops def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; @@ -8902,15 +8905,15 @@ def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>; defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", - int_aarch64_neon_sqrshrn>; + AArch64sqrshrn>; defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", - int_aarch64_neon_sqrshrun>; + AArch64sqrshrun>; defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", - int_aarch64_neon_sqshrn>; + AArch64sqshrn>; defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", - int_aarch64_neon_sqshrun>; + AArch64sqshrun>; defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri", AArch64vsri>; defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", @@ -8921,10 +8924,10 @@ defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", - int_aarch64_neon_uqrshrn>; + AArch64uqrshrn>; defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", - int_aarch64_neon_uqshrn>; + AArch64uqshrn>; defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", TriOpFrag<(add node:$LHS, diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll index e8ae8a3e53c9b..9b530534d00f7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-int-neon.ll +++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.ll @@ -3,7 +3,13 @@ ; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for test_uqadd_s32 +; CHECK-GI: warning: Instruction selection used fallback path for test_sqshrn_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshrun_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshrn_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrn_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrun_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshrn_s32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64 @@ -113,6 +119,90 @@ entry: ret i64 %res } +define void @test_sqshrn_s32(float noundef %a, ptr %dst) { +; CHECK-LABEL: test_sqshrn_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: sqshrn s0, d0, #1 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %cvt, i32 1) + store i32 %res, ptr %dst, align 4 + ret void +} + +define void @test_sqshrun_s32(float noundef %a, ptr %dst) { +; CHECK-LABEL: test_sqshrun_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: sqshrun s0, d0, #1 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %cvt, i32 1) + store i32 %res, ptr %dst, align 4 + ret void +} + +define void @test_uqshrn_s32(float noundef %a, ptr %dst) { +; CHECK-LABEL: test_uqshrn_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: uqshrn s0, d0, #1 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %cvt, i32 1) + store i32 %res, ptr %dst, align 4 + ret void +} + +define void @test_sqrshrn_s32(float noundef %a, ptr %dst) { +; CHECK-LABEL: test_sqrshrn_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: sqrshrn s0, d0, #1 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %cvt, i32 1) + store i32 %res, ptr %dst, align 4 + ret void +} + +define void @test_sqrshrun_s32(float noundef %a, ptr %dst) { +; CHECK-LABEL: test_sqrshrun_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: sqrshrun s0, d0, #1 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %cvt, i32 1) + store i32 %res, ptr %dst, align 4 + ret void +} + +define void @test_uqrshrn_s32(float noundef %a, ptr %dst) { +; CHECK-LABEL: test_uqrshrn_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: uqrshrn s0, d0, #1 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret +entry: + %cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a) + %res = tail call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %cvt, i32 1) + store i32 %res, ptr %dst, align 4 + ret void +} + define i32 @test_sqadd_s32(float noundef %a) { ; CHECK-LABEL: test_sqadd_s32: ; CHECK: // %bb.0: // %entry @@ -227,4 +317,4 @@ define i64 @test_sqdmulls_scalar(float %A){ %cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A) %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %cvt, i32 %cvt) ret i64 %prod -} +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.s b/llvm/test/CodeGen/AArch64/arm64-int-neon.s new file mode 100644 index 0000000000000..4599c60e82703 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-int-neon.s @@ -0,0 +1,325 @@ + .file "arm64-int-neon.ll" + .text + .globl test_sqrshl_s32 // -- Begin function test_sqrshl_s32 + .p2align 2 + .type test_sqrshl_s32,@function +test_sqrshl_s32: // @test_sqrshl_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs s0, s0 + sqrshl s0, s0, s0 + fmov w0, s0 + ret +.Lfunc_end0: + .size test_sqrshl_s32, .Lfunc_end0-test_sqrshl_s32 + .cfi_endproc + // -- End function + .globl test_sqrshl_s64 // -- Begin function test_sqrshl_s64 + .p2align 2 + .type test_sqrshl_s64,@function +test_sqrshl_s64: // @test_sqrshl_s64 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + sqrshl d0, d0, d0 + fmov x0, d0 + ret +.Lfunc_end1: + .size test_sqrshl_s64, .Lfunc_end1-test_sqrshl_s64 + .cfi_endproc + // -- End function + .globl test_sqshl_s32 // -- Begin function test_sqshl_s32 + .p2align 2 + .type test_sqshl_s32,@function +test_sqshl_s32: // @test_sqshl_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs s0, s0 + sqshl s0, s0, s0 + fmov w0, s0 + ret +.Lfunc_end2: + .size test_sqshl_s32, .Lfunc_end2-test_sqshl_s32 + .cfi_endproc + // -- End function + .globl test_sqshl_s64 // -- Begin function test_sqshl_s64 + .p2align 2 + .type test_sqshl_s64,@function +test_sqshl_s64: // @test_sqshl_s64 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + sqshl d0, d0, d0 + fmov x0, d0 + ret +.Lfunc_end3: + .size test_sqshl_s64, .Lfunc_end3-test_sqshl_s64 + .cfi_endproc + // -- End function + .globl test_uqrshl_s32 // -- Begin function test_uqrshl_s32 + .p2align 2 + .type test_uqrshl_s32,@function +test_uqrshl_s32: // @test_uqrshl_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs s0, s0 + uqrshl s0, s0, s0 + fmov w0, s0 + ret +.Lfunc_end4: + .size test_uqrshl_s32, .Lfunc_end4-test_uqrshl_s32 + .cfi_endproc + // -- End function + .globl test_uqrshl_s64 // -- Begin function test_uqrshl_s64 + .p2align 2 + .type test_uqrshl_s64,@function +test_uqrshl_s64: // @test_uqrshl_s64 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + uqrshl d0, d0, d0 + fmov x0, d0 + ret +.Lfunc_end5: + .size test_uqrshl_s64, .Lfunc_end5-test_uqrshl_s64 + .cfi_endproc + // -- End function + .globl test_uqshl_s32 // -- Begin function test_uqshl_s32 + .p2align 2 + .type test_uqshl_s32,@function +test_uqshl_s32: // @test_uqshl_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs s0, s0 + uqshl s0, s0, s0 + fmov w0, s0 + ret +.Lfunc_end6: + .size test_uqshl_s32, .Lfunc_end6-test_uqshl_s32 + .cfi_endproc + // -- End function + .globl test_uqshl_s64 // -- Begin function test_uqshl_s64 + .p2align 2 + .type test_uqshl_s64,@function +test_uqshl_s64: // @test_uqshl_s64 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + uqshl d0, d0, d0 + fmov x0, d0 + ret +.Lfunc_end7: + .size test_uqshl_s64, .Lfunc_end7-test_uqshl_s64 + .cfi_endproc + // -- End function + .globl test_sqshrn_s32 // -- Begin function test_sqshrn_s32 + .p2align 2 + .type test_sqshrn_s32,@function +test_sqshrn_s32: // @test_sqshrn_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + sqshrn s0, d0, #1 + str s0, [x0] + ret +.Lfunc_end8: + .size test_sqshrn_s32, .Lfunc_end8-test_sqshrn_s32 + .cfi_endproc + // -- End function + .globl test_sqshrun_s32 // -- Begin function test_sqshrun_s32 + .p2align 2 + .type test_sqshrun_s32,@function +test_sqshrun_s32: // @test_sqshrun_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + sqshrun s0, d0, #1 + str s0, [x0] + ret +.Lfunc_end9: + .size test_sqshrun_s32, .Lfunc_end9-test_sqshrun_s32 + .cfi_endproc + // -- End function + .globl test_uqshrn_s32 // -- Begin function test_uqshrn_s32 + .p2align 2 + .type test_uqshrn_s32,@function +test_uqshrn_s32: // @test_uqshrn_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + uqshrn s0, d0, #1 + str s0, [x0] + ret +.Lfunc_end10: + .size test_uqshrn_s32, .Lfunc_end10-test_uqshrn_s32 + .cfi_endproc + // -- End function + .globl test_sqrshrn_s32 // -- Begin function test_sqrshrn_s32 + .p2align 2 + .type test_sqrshrn_s32,@function +test_sqrshrn_s32: // @test_sqrshrn_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + sqrshrn s0, d0, #1 + str s0, [x0] + ret +.Lfunc_end11: + .size test_sqrshrn_s32, .Lfunc_end11-test_sqrshrn_s32 + .cfi_endproc + // -- End function + .globl test_sqrshrun_s32 // -- Begin function test_sqrshrun_s32 + .p2align 2 + .type test_sqrshrun_s32,@function +test_sqrshrun_s32: // @test_sqrshrun_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + sqrshrun s0, d0, #1 + str s0, [x0] + ret +.Lfunc_end12: + .size test_sqrshrun_s32, .Lfunc_end12-test_sqrshrun_s32 + .cfi_endproc + // -- End function + .globl test_uqrshrn_s32 // -- Begin function test_uqrshrn_s32 + .p2align 2 + .type test_uqrshrn_s32,@function +test_uqrshrn_s32: // @test_uqrshrn_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + uqrshrn s0, d0, #1 + str s0, [x0] + ret +.Lfunc_end13: + .size test_uqrshrn_s32, .Lfunc_end13-test_uqrshrn_s32 + .cfi_endproc + // -- End function + .globl test_sqadd_s32 // -- Begin function test_sqadd_s32 + .p2align 2 + .type test_sqadd_s32,@function +test_sqadd_s32: // @test_sqadd_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs s0, s0 + sqadd s0, s0, s0 + fmov w0, s0 + ret +.Lfunc_end14: + .size test_sqadd_s32, .Lfunc_end14-test_sqadd_s32 + .cfi_endproc + // -- End function + .globl test_sqadd_s64 // -- Begin function test_sqadd_s64 + .p2align 2 + .type test_sqadd_s64,@function +test_sqadd_s64: // @test_sqadd_s64 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + sqadd d0, d0, d0 + fmov x0, d0 + ret +.Lfunc_end15: + .size test_sqadd_s64, .Lfunc_end15-test_sqadd_s64 + .cfi_endproc + // -- End function + .globl test_sqsub_s32 // -- Begin function test_sqsub_s32 + .p2align 2 + .type test_sqsub_s32,@function +test_sqsub_s32: // @test_sqsub_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs s0, s0 + sqsub s0, s0, s0 + fmov w0, s0 + ret +.Lfunc_end16: + .size test_sqsub_s32, .Lfunc_end16-test_sqsub_s32 + .cfi_endproc + // -- End function + .globl test_sqsub_s64 // -- Begin function test_sqsub_s64 + .p2align 2 + .type test_sqsub_s64,@function +test_sqsub_s64: // @test_sqsub_s64 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + sqsub d0, d0, d0 + fmov x0, d0 + ret +.Lfunc_end17: + .size test_sqsub_s64, .Lfunc_end17-test_sqsub_s64 + .cfi_endproc + // -- End function + .globl test_uqadd_s32 // -- Begin function test_uqadd_s32 + .p2align 2 + .type test_uqadd_s32,@function +test_uqadd_s32: // @test_uqadd_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs s0, s0 + uqadd s0, s0, s0 + fmov w0, s0 + ret +.Lfunc_end18: + .size test_uqadd_s32, .Lfunc_end18-test_uqadd_s32 + .cfi_endproc + // -- End function + .globl test_uqadd_s64 // -- Begin function test_uqadd_s64 + .p2align 2 + .type test_uqadd_s64,@function +test_uqadd_s64: // @test_uqadd_s64 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + uqadd d0, d0, d0 + fmov x0, d0 + ret +.Lfunc_end19: + .size test_uqadd_s64, .Lfunc_end19-test_uqadd_s64 + .cfi_endproc + // -- End function + .globl test_uqsub_s32 // -- Begin function test_uqsub_s32 + .p2align 2 + .type test_uqsub_s32,@function +test_uqsub_s32: // @test_uqsub_s32 + .cfi_startproc +// %bb.0: // %entry + fcvtzs s0, s0 + uqsub s0, s0, s0 + fmov w0, s0 + ret +.Lfunc_end20: + .size test_uqsub_s32, .Lfunc_end20-test_uqsub_s32 + .cfi_endproc + // -- End function + .globl test_uqsub_s64 // -- Begin function test_uqsub_s64 + .p2align 2 + .type test_uqsub_s64,@function +test_uqsub_s64: // @test_uqsub_s64 + .cfi_startproc +// %bb.0: // %entry + fcvtzs d0, s0 + uqsub d0, d0, d0 + fmov x0, d0 + ret +.Lfunc_end21: + .size test_uqsub_s64, .Lfunc_end21-test_uqsub_s64 + .cfi_endproc + // -- End function + .globl test_sqdmulls_scalar // -- Begin function test_sqdmulls_scalar + .p2align 2 + .type test_sqdmulls_scalar,@function +test_sqdmulls_scalar: // @test_sqdmulls_scalar + .cfi_startproc +// %bb.0: + fcvtzs s0, s0 + sqdmull d0, s0, s0 + fmov x0, d0 + ret +.Lfunc_end22: + .size test_sqdmulls_scalar, .Lfunc_end22-test_sqdmulls_scalar + .cfi_endproc + // -- End function + .section ".note.GNU-stack","",@progbits From c70d915509e455ee1939fbc382d696720f0ca013 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 16 Dec 2025 12:24:06 +0000 Subject: [PATCH 2/4] address review comments --- .../Target/AArch64/AArch64ISelLowering.cpp | 30 ++++++++----------- .../lib/Target/AArch64/AArch64InstrFormats.td | 9 ++++-- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 12 ++++---- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 815ed15ad4d1e..cf608e39c5075 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4561,9 +4561,9 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG, } static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, - SelectionDAG &DAG, - bool IsLastInt = false) { + SelectionDAG &DAG) { SDLoc DL(Op); + SmallVector NewOps; auto getFloatVT = [](EVT VT) { assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT"); return VT == MVT::i32 ? MVT::f32 : MVT::f64; @@ -4572,17 +4572,11 @@ static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, return DAG.getBitcast(getFloatVT(Val.getValueType()), Val); }; - const unsigned NumOps = Op.getNumOperands(); - const unsigned LastOpIdx = NumOps - 1; - SmallVector NewOps; - NewOps.reserve(NumOps - 1); - // Skip first operand as it is intrinsic ID. - for (unsigned I = 1, E = LastOpIdx; I < E; ++I) - NewOps.push_back(bitcastToFloat(Op.getOperand(I))); - SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx) - : bitcastToFloat(Op.getOperand(LastOpIdx)); - NewOps.push_back(LastOp); + for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I){ + SDValue Val = Op.getOperand(I); + NewOps.push_back(isa(Val.getNode()) ? Val : bitcastToFloat(Val)); + } EVT OrigVT = Op.getValueType(); SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps); return DAG.getBitcast(OrigVT, OpNode); @@ -6398,42 +6392,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getNode(AArch64ISD::VASHR, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG, true); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG); case Intrinsic::aarch64_neon_sqshrun: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(), DAG.getNode(AArch64ISD::VASHR, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG, true); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG); case Intrinsic::aarch64_neon_uqshrn: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(), DAG.getNode(AArch64ISD::VLSHR, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG, true); + return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG); case Intrinsic::aarch64_neon_sqrshrn: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(), DAG.getNode(AArch64ISD::SRSHR_I, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG, true); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG); case Intrinsic::aarch64_neon_sqrshrun: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(), DAG.getNode(AArch64ISD::SRSHR_I, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG, true); + return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG); case Intrinsic::aarch64_neon_uqrshrn: if (Op.getValueType().isVector()) return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(), DAG.getNode(AArch64ISD::URSHR_I, DL, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2))); - return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG, true); + return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG); case Intrinsic::aarch64_neon_sqrshl: if (Op.getValueType().isVector()) return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 1bdf37dc4b2c0..9f495c36fa128 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -10162,9 +10162,10 @@ multiclass SIMDScalarLShiftDTied opc, string asm, } } -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in multiclass SIMDScalarRShiftBHS opc, string asm, - SDPatternOperator OpNode = null_frag> { + SDPatternOperator OpNode = null_frag, + SDPatternOperator GOpNode = null_frag> { + let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in { def b : BaseSIMDScalarShift { let Inst{18-16} = imm{2-0}; @@ -10180,6 +10181,10 @@ multiclass SIMDScalarRShiftBHS opc, string asm, [(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn), vecshiftR32:$imm))]> { let Inst{20-16} = imm{4-0}; } + } + + def: Pat<(i32 (GOpNode (i64 FPR64:$Rd), vecshiftR32:$imm)), + (!cast(NAME # "s") FPR64:$Rd, vecshiftR32:$imm)>; } multiclass SIMDScalarLShiftBHSD opc, string asm, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 819b84b48f7cf..034eb82cecf58 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8905,15 +8905,15 @@ def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>; defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", - AArch64sqrshrn>; + AArch64sqrshrn, int_aarch64_neon_sqrshrn>; defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", - AArch64sqrshrun>; + AArch64sqrshrun, int_aarch64_neon_sqrshrun>; defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", - AArch64sqshrn>; + AArch64sqshrn, int_aarch64_neon_sqshrn>; defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", - AArch64sqshrun>; + AArch64sqshrun, int_aarch64_neon_sqshrun>; defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri", AArch64vsri>; defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", @@ -8924,10 +8924,10 @@ defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", - AArch64uqrshrn>; + AArch64uqrshrn, int_aarch64_neon_uqrshrn>; defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", - AArch64uqshrn>; + AArch64uqshrn, int_aarch64_neon_uqshrn>; defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", TriOpFrag<(add node:$LHS, From be2507a0697dcbd64212df6c5903c36aae8978b7 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 16 Dec 2025 12:26:47 +0000 Subject: [PATCH 3/4] fix formatting --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index cf608e39c5075..b0a324e5973d0 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4573,9 +4573,10 @@ static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode, }; // Skip first operand as it is intrinsic ID. - for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I){ + for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I) { SDValue Val = Op.getOperand(I); - NewOps.push_back(isa(Val.getNode()) ? Val : bitcastToFloat(Val)); + NewOps.push_back(isa(Val.getNode()) ? Val + : bitcastToFloat(Val)); } EVT OrigVT = Op.getValueType(); SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps); From 9a0bd8553b0191efcbed03cc0c81424184a35424 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 16 Dec 2025 12:47:53 +0000 Subject: [PATCH 4/4] fix test --- llvm/test/CodeGen/AArch64/arm64-int-neon.s | 325 --------------------- llvm/test/CodeGen/AArch64/arm64-vshift.ll | 50 +--- 2 files changed, 16 insertions(+), 359 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/arm64-int-neon.s diff --git a/llvm/test/CodeGen/AArch64/arm64-int-neon.s b/llvm/test/CodeGen/AArch64/arm64-int-neon.s deleted file mode 100644 index 4599c60e82703..0000000000000 --- a/llvm/test/CodeGen/AArch64/arm64-int-neon.s +++ /dev/null @@ -1,325 +0,0 @@ - .file "arm64-int-neon.ll" - .text - .globl test_sqrshl_s32 // -- Begin function test_sqrshl_s32 - .p2align 2 - .type test_sqrshl_s32,@function -test_sqrshl_s32: // @test_sqrshl_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs s0, s0 - sqrshl s0, s0, s0 - fmov w0, s0 - ret -.Lfunc_end0: - .size test_sqrshl_s32, .Lfunc_end0-test_sqrshl_s32 - .cfi_endproc - // -- End function - .globl test_sqrshl_s64 // -- Begin function test_sqrshl_s64 - .p2align 2 - .type test_sqrshl_s64,@function -test_sqrshl_s64: // @test_sqrshl_s64 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - sqrshl d0, d0, d0 - fmov x0, d0 - ret -.Lfunc_end1: - .size test_sqrshl_s64, .Lfunc_end1-test_sqrshl_s64 - .cfi_endproc - // -- End function - .globl test_sqshl_s32 // -- Begin function test_sqshl_s32 - .p2align 2 - .type test_sqshl_s32,@function -test_sqshl_s32: // @test_sqshl_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs s0, s0 - sqshl s0, s0, s0 - fmov w0, s0 - ret -.Lfunc_end2: - .size test_sqshl_s32, .Lfunc_end2-test_sqshl_s32 - .cfi_endproc - // -- End function - .globl test_sqshl_s64 // -- Begin function test_sqshl_s64 - .p2align 2 - .type test_sqshl_s64,@function -test_sqshl_s64: // @test_sqshl_s64 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - sqshl d0, d0, d0 - fmov x0, d0 - ret -.Lfunc_end3: - .size test_sqshl_s64, .Lfunc_end3-test_sqshl_s64 - .cfi_endproc - // -- End function - .globl test_uqrshl_s32 // -- Begin function test_uqrshl_s32 - .p2align 2 - .type test_uqrshl_s32,@function -test_uqrshl_s32: // @test_uqrshl_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs s0, s0 - uqrshl s0, s0, s0 - fmov w0, s0 - ret -.Lfunc_end4: - .size test_uqrshl_s32, .Lfunc_end4-test_uqrshl_s32 - .cfi_endproc - // -- End function - .globl test_uqrshl_s64 // -- Begin function test_uqrshl_s64 - .p2align 2 - .type test_uqrshl_s64,@function -test_uqrshl_s64: // @test_uqrshl_s64 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - uqrshl d0, d0, d0 - fmov x0, d0 - ret -.Lfunc_end5: - .size test_uqrshl_s64, .Lfunc_end5-test_uqrshl_s64 - .cfi_endproc - // -- End function - .globl test_uqshl_s32 // -- Begin function test_uqshl_s32 - .p2align 2 - .type test_uqshl_s32,@function -test_uqshl_s32: // @test_uqshl_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs s0, s0 - uqshl s0, s0, s0 - fmov w0, s0 - ret -.Lfunc_end6: - .size test_uqshl_s32, .Lfunc_end6-test_uqshl_s32 - .cfi_endproc - // -- End function - .globl test_uqshl_s64 // -- Begin function test_uqshl_s64 - .p2align 2 - .type test_uqshl_s64,@function -test_uqshl_s64: // @test_uqshl_s64 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - uqshl d0, d0, d0 - fmov x0, d0 - ret -.Lfunc_end7: - .size test_uqshl_s64, .Lfunc_end7-test_uqshl_s64 - .cfi_endproc - // -- End function - .globl test_sqshrn_s32 // -- Begin function test_sqshrn_s32 - .p2align 2 - .type test_sqshrn_s32,@function -test_sqshrn_s32: // @test_sqshrn_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - sqshrn s0, d0, #1 - str s0, [x0] - ret -.Lfunc_end8: - .size test_sqshrn_s32, .Lfunc_end8-test_sqshrn_s32 - .cfi_endproc - // -- End function - .globl test_sqshrun_s32 // -- Begin function test_sqshrun_s32 - .p2align 2 - .type test_sqshrun_s32,@function -test_sqshrun_s32: // @test_sqshrun_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - sqshrun s0, d0, #1 - str s0, [x0] - ret -.Lfunc_end9: - .size test_sqshrun_s32, .Lfunc_end9-test_sqshrun_s32 - .cfi_endproc - // -- End function - .globl test_uqshrn_s32 // -- Begin function test_uqshrn_s32 - .p2align 2 - .type test_uqshrn_s32,@function -test_uqshrn_s32: // @test_uqshrn_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - uqshrn s0, d0, #1 - str s0, [x0] - ret -.Lfunc_end10: - .size test_uqshrn_s32, .Lfunc_end10-test_uqshrn_s32 - .cfi_endproc - // -- End function - .globl test_sqrshrn_s32 // -- Begin function test_sqrshrn_s32 - .p2align 2 - .type test_sqrshrn_s32,@function -test_sqrshrn_s32: // @test_sqrshrn_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - sqrshrn s0, d0, #1 - str s0, [x0] - ret -.Lfunc_end11: - .size test_sqrshrn_s32, .Lfunc_end11-test_sqrshrn_s32 - .cfi_endproc - // -- End function - .globl test_sqrshrun_s32 // -- Begin function test_sqrshrun_s32 - .p2align 2 - .type test_sqrshrun_s32,@function -test_sqrshrun_s32: // @test_sqrshrun_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - sqrshrun s0, d0, #1 - str s0, [x0] - ret -.Lfunc_end12: - .size test_sqrshrun_s32, .Lfunc_end12-test_sqrshrun_s32 - .cfi_endproc - // -- End function - .globl test_uqrshrn_s32 // -- Begin function test_uqrshrn_s32 - .p2align 2 - .type test_uqrshrn_s32,@function -test_uqrshrn_s32: // @test_uqrshrn_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - uqrshrn s0, d0, #1 - str s0, [x0] - ret -.Lfunc_end13: - .size test_uqrshrn_s32, .Lfunc_end13-test_uqrshrn_s32 - .cfi_endproc - // -- End function - .globl test_sqadd_s32 // -- Begin function test_sqadd_s32 - .p2align 2 - .type test_sqadd_s32,@function -test_sqadd_s32: // @test_sqadd_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs s0, s0 - sqadd s0, s0, s0 - fmov w0, s0 - ret -.Lfunc_end14: - .size test_sqadd_s32, .Lfunc_end14-test_sqadd_s32 - .cfi_endproc - // -- End function - .globl test_sqadd_s64 // -- Begin function test_sqadd_s64 - .p2align 2 - .type test_sqadd_s64,@function -test_sqadd_s64: // @test_sqadd_s64 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - sqadd d0, d0, d0 - fmov x0, d0 - ret -.Lfunc_end15: - .size test_sqadd_s64, .Lfunc_end15-test_sqadd_s64 - .cfi_endproc - // -- End function - .globl test_sqsub_s32 // -- Begin function test_sqsub_s32 - .p2align 2 - .type test_sqsub_s32,@function -test_sqsub_s32: // @test_sqsub_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs s0, s0 - sqsub s0, s0, s0 - fmov w0, s0 - ret -.Lfunc_end16: - .size test_sqsub_s32, .Lfunc_end16-test_sqsub_s32 - .cfi_endproc - // -- End function - .globl test_sqsub_s64 // -- Begin function test_sqsub_s64 - .p2align 2 - .type test_sqsub_s64,@function -test_sqsub_s64: // @test_sqsub_s64 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - sqsub d0, d0, d0 - fmov x0, d0 - ret -.Lfunc_end17: - .size test_sqsub_s64, .Lfunc_end17-test_sqsub_s64 - .cfi_endproc - // -- End function - .globl test_uqadd_s32 // -- Begin function test_uqadd_s32 - .p2align 2 - .type test_uqadd_s32,@function -test_uqadd_s32: // @test_uqadd_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs s0, s0 - uqadd s0, s0, s0 - fmov w0, s0 - ret -.Lfunc_end18: - .size test_uqadd_s32, .Lfunc_end18-test_uqadd_s32 - .cfi_endproc - // -- End function - .globl test_uqadd_s64 // -- Begin function test_uqadd_s64 - .p2align 2 - .type test_uqadd_s64,@function -test_uqadd_s64: // @test_uqadd_s64 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - uqadd d0, d0, d0 - fmov x0, d0 - ret -.Lfunc_end19: - .size test_uqadd_s64, .Lfunc_end19-test_uqadd_s64 - .cfi_endproc - // -- End function - .globl test_uqsub_s32 // -- Begin function test_uqsub_s32 - .p2align 2 - .type test_uqsub_s32,@function -test_uqsub_s32: // @test_uqsub_s32 - .cfi_startproc -// %bb.0: // %entry - fcvtzs s0, s0 - uqsub s0, s0, s0 - fmov w0, s0 - ret -.Lfunc_end20: - .size test_uqsub_s32, .Lfunc_end20-test_uqsub_s32 - .cfi_endproc - // -- End function - .globl test_uqsub_s64 // -- Begin function test_uqsub_s64 - .p2align 2 - .type test_uqsub_s64,@function -test_uqsub_s64: // @test_uqsub_s64 - .cfi_startproc -// %bb.0: // %entry - fcvtzs d0, s0 - uqsub d0, d0, d0 - fmov x0, d0 - ret -.Lfunc_end21: - .size test_uqsub_s64, .Lfunc_end21-test_uqsub_s64 - .cfi_endproc - // -- End function - .globl test_sqdmulls_scalar // -- Begin function test_sqdmulls_scalar - .p2align 2 - .type test_sqdmulls_scalar,@function -test_sqdmulls_scalar: // @test_sqdmulls_scalar - .cfi_startproc -// %bb.0: - fcvtzs s0, s0 - sqdmull d0, s0, s0 - fmov x0, d0 - ret -.Lfunc_end22: - .size test_sqdmulls_scalar, .Lfunc_end22-test_sqdmulls_scalar - .cfi_endproc - // -- End function - .section ".note.GNU-stack","",@progbits diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 9743639d99d9b..7e796fe72928d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -927,23 +927,14 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind { } define i64 @sqrshl_scalar_constant(ptr %A) nounwind { -; CHECK-SD-LABEL: sqrshl_scalar_constant: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov x8, #1 // =0x1 -; CHECK-SD-NEXT: ldr d0, [x0] -; CHECK-SD-NEXT: fmov d1, x8 -; CHECK-SD-NEXT: sqrshl d0, d0, d1 -; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sqrshl_scalar_constant: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov w8, #1 // =0x1 -; CHECK-GI-NEXT: ldr d0, [x0] -; CHECK-GI-NEXT: fmov d1, x8 -; CHECK-GI-NEXT: sqrshl d0, d0, d1 -; CHECK-GI-NEXT: fmov x0, d0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sqrshl_scalar_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: sqrshl d0, d0, d1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1) ret i64 %tmp3 @@ -1042,23 +1033,14 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind { } define i64 @uqrshl_scalar_constant(ptr %A) nounwind { -; CHECK-SD-LABEL: uqrshl_scalar_constant: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov x8, #1 // =0x1 -; CHECK-SD-NEXT: ldr d0, [x0] -; CHECK-SD-NEXT: fmov d1, x8 -; CHECK-SD-NEXT: uqrshl d0, d0, d1 -; CHECK-SD-NEXT: fmov x0, d0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: uqrshl_scalar_constant: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov w8, #1 // =0x1 -; CHECK-GI-NEXT: ldr d0, [x0] -; CHECK-GI-NEXT: fmov d1, x8 -; CHECK-GI-NEXT: uqrshl d0, d0, d1 -; CHECK-GI-NEXT: fmov x0, d0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: uqrshl_scalar_constant: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: uqrshl d0, d0, d1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret %tmp1 = load i64, ptr %A %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1) ret i64 %tmp3