Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4563,18 +4563,21 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
SelectionDAG &DAG) {
SDLoc DL(Op);
SmallVector<SDValue, 2> NewOps;
auto getFloatVT = [](EVT VT) {
assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT");
return VT == MVT::i32 ? MVT::f32 : MVT::f64;
};
auto bitcastToFloat = [&](SDValue Val) {
return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
};
SmallVector<SDValue, 2> NewOps;
NewOps.reserve(Op.getNumOperands() - 1);

for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
// Skip first operand as it is intrinsic ID.
for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I) {
SDValue Val = Op.getOperand(I);
NewOps.push_back(isa<ConstantSDNode>(Val.getNode()) ? Val
: bitcastToFloat(Val));
}
EVT OrigVT = Op.getValueType();
SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps);
return DAG.getBitcast(OrigVT, OpNode);
Expand Down Expand Up @@ -6390,42 +6393,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getNode(AArch64ISD::VASHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG);
case Intrinsic::aarch64_neon_sqshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::VASHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG);
case Intrinsic::aarch64_neon_uqshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::VLSHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG);
case Intrinsic::aarch64_neon_sqrshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::SRSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG);
case Intrinsic::aarch64_neon_sqrshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::SRSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG);
case Intrinsic::aarch64_neon_uqrshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::URSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG);
case Intrinsic::aarch64_neon_sqrshl:
if (Op.getValueType().isVector())
return SDValue();
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -10162,9 +10162,10 @@ multiclass SIMDScalarLShiftDTied<bit U, bits<5> opc, string asm,
}
}

let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
SDPatternOperator OpNode = null_frag,
SDPatternOperator GOpNode = null_frag> {
let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in {
def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
FPR8, FPR16, vecshiftR8, asm, []> {
let Inst{18-16} = imm{2-0};
Expand All @@ -10177,9 +10178,13 @@ multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,

def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
FPR32, FPR64, vecshiftR32, asm,
[(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
[(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn), vecshiftR32:$imm))]> {
let Inst{20-16} = imm{4-0};
}
}

def: Pat<(i32 (GOpNode (i64 FPR64:$Rd), vecshiftR32:$imm)),
(!cast<Instruction>(NAME # "s") FPR64:$Rd, vecshiftR32:$imm)>;
}

multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm,
Expand Down
21 changes: 12 additions & 9 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1035,9 +1035,12 @@ def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>;
def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL",
SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>,
SDTCisFP<0>, SDTCisFP<1>]>>;

//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;

def AArch64sqshrun: SDNode<"AArch64ISD::SQSHRUN", SDTFPTruncRoundOp>;
def AArch64sqrshrun: SDNode<"AArch64ISD::SQRSHRUN", SDTFPTruncRoundOp>;
def AArch64sqshrn: SDNode<"AArch64ISD::SQSHRN", SDTFPTruncRoundOp>;
def AArch64uqshrn: SDNode<"AArch64ISD::UQSHRN", SDTFPTruncRoundOp>;
def AArch64sqrshrn: SDNode<"AArch64ISD::SQRSHRN", SDTFPTruncRoundOp>;
def AArch64uqrshrn: SDNode<"AArch64ISD::UQRSHRN", SDTFPTruncRoundOp>;
// Vector immediate ops
def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
Expand Down Expand Up @@ -8902,15 +8905,15 @@ def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>;
defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
int_aarch64_neon_sqrshrn>;
AArch64sqrshrn, int_aarch64_neon_sqrshrn>;
defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
int_aarch64_neon_sqrshrun>;
AArch64sqrshrun, int_aarch64_neon_sqrshrun>;
defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
int_aarch64_neon_sqshrn>;
AArch64sqshrn, int_aarch64_neon_sqshrn>;
defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
int_aarch64_neon_sqshrun>;
AArch64sqshrun, int_aarch64_neon_sqshrun>;
defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri", AArch64vsri>;
defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
Expand All @@ -8921,10 +8924,10 @@ defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vashr node:$MHS, node:$RHS))>>;
defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
AArch64uqrshrn, int_aarch64_neon_uqrshrn>;
defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
int_aarch64_neon_uqshrn>;
AArch64uqshrn, int_aarch64_neon_uqshrn>;
defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,
Expand Down
94 changes: 92 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-int-neon.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI


; CHECK-GI: warning: Instruction selection used fallback path for test_uqadd_s32
; CHECK-GI: warning: Instruction selection used fallback path for test_sqshrn_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshrun_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshrn_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrn_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrun_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshrn_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64
Expand Down Expand Up @@ -113,6 +119,90 @@ entry:
ret i64 %res
}

define void @test_sqshrn_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqshrn_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: sqshrn s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_sqshrun_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqshrun_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: sqshrun s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_uqshrn_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_uqshrn_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: uqshrn s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_sqrshrn_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqrshrn_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: sqrshrn s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_sqrshrun_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqrshrun_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: sqrshrun s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_uqrshrn_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_uqrshrn_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: uqrshrn s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define i32 @test_sqadd_s32(float noundef %a) {
; CHECK-LABEL: test_sqadd_s32:
; CHECK: // %bb.0: // %entry
Expand Down Expand Up @@ -227,4 +317,4 @@ define i64 @test_sqdmulls_scalar(float %A){
%cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
%prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %cvt, i32 %cvt)
ret i64 %prod
}
}
50 changes: 16 additions & 34 deletions llvm/test/CodeGen/AArch64/arm64-vshift.ll
Original file line number Diff line number Diff line change
Expand Up @@ -927,23 +927,14 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind {
}

define i64 @sqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: sqrshl_scalar_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov x8, #1 // =0x1
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: fmov d1, x8
; CHECK-SD-NEXT: sqrshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sqrshl_scalar_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: sqrshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
; CHECK-LABEL: sqrshl_scalar_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: sqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
Expand Down Expand Up @@ -1042,23 +1033,14 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind {
}

define i64 @uqrshl_scalar_constant(ptr %A) nounwind {
; CHECK-SD-LABEL: uqrshl_scalar_constant:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: mov x8, #1 // =0x1
; CHECK-SD-NEXT: ldr d0, [x0]
; CHECK-SD-NEXT: fmov d1, x8
; CHECK-SD-NEXT: uqrshl d0, d0, d1
; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: uqrshl_scalar_constant:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: ldr d0, [x0]
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: uqrshl d0, d0, d1
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
; CHECK-LABEL: uqrshl_scalar_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: uqrshl d0, d0, d1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%tmp1 = load i64, ptr %A
%tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
ret i64 %tmp3
Expand Down