Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4561,7 +4561,8 @@ static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
}

static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
SelectionDAG &DAG) {
SelectionDAG &DAG,
bool IsLastInt = false) {
SDLoc DL(Op);
auto getFloatVT = [](EVT VT) {
assert((VT == MVT::i32 || VT == MVT::i64) && "Unexpected VT");
Expand All @@ -4570,11 +4571,18 @@ static SDValue lowerIntNeonIntrinsic(SDValue Op, unsigned Opcode,
auto bitcastToFloat = [&](SDValue Val) {
return DAG.getBitcast(getFloatVT(Val.getValueType()), Val);
};

const unsigned NumOps = Op.getNumOperands();
const unsigned LastOpIdx = NumOps - 1;
SmallVector<SDValue, 2> NewOps;
NewOps.reserve(Op.getNumOperands() - 1);
NewOps.reserve(NumOps - 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
NewOps.reserve(NumOps - 1);
NewOps.reserve(LastOpIdx);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rewrote this whole part in the end


for (unsigned I = 1, E = Op.getNumOperands(); I < E; ++I)
// Skip first operand as it is intrinsic ID.
for (unsigned I = 1, E = LastOpIdx; I < E; ++I)
NewOps.push_back(bitcastToFloat(Op.getOperand(I)));
SDValue LastOp = IsLastInt ? Op.getOperand(LastOpIdx)
: bitcastToFloat(Op.getOperand(LastOpIdx));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this instead check whether the last operand needs a bitcast based on the type, without passing the extra IsLastInt parameter?

i.e.

Suggested change
: bitcastToFloat(Op.getOperand(LastOpIdx));
SDValue LastOp = Op.getOperand(LastOpIdx);
LastOp = isa<ConstantSDNode>(LastOp) ? LastOp : bitcastToFloat(LastOp);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks that makes way more sense !

NewOps.push_back(LastOp);
EVT OrigVT = Op.getValueType();
SDValue OpNode = DAG.getNode(Opcode, DL, getFloatVT(OrigVT), NewOps);
return DAG.getBitcast(OrigVT, OpNode);
Expand Down Expand Up @@ -6390,42 +6398,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getNode(AArch64ISD::VASHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::VASHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQSHRUN, DAG, true);
case Intrinsic::aarch64_neon_uqshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::VLSHR, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::SRSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshrun:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::SRSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::SQRSHRUN, DAG, true);
case Intrinsic::aarch64_neon_uqrshrn:
if (Op.getValueType().isVector())
return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
DAG.getNode(AArch64ISD::URSHR_I, DL,
Op.getOperand(1).getValueType(),
Op.getOperand(1), Op.getOperand(2)));
return SDValue();
return lowerIntNeonIntrinsic(Op, AArch64ISD::UQRSHRN, DAG, true);
case Intrinsic::aarch64_neon_sqrshl:
if (Op.getValueType().isVector())
return SDValue();
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -10177,7 +10177,7 @@ multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,

def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
FPR32, FPR64, vecshiftR32, asm,
[(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
[(set (f32 FPR32:$Rd), (OpNode (f64 FPR64:$Rn), vecshiftR32:$imm))]> {
let Inst{20-16} = imm{4-0};
}
}
Expand Down
21 changes: 12 additions & 9 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1035,9 +1035,12 @@ def AArch64uqsub: SDNode<"AArch64ISD::UQSUB", SDTFPBinOp>;
def AArch64sqdmull: SDNode<"AArch64ISD::SQDMULL",
SDTypeProfile<1, 2, [ SDTCisSameAs<1, 2>,
SDTCisFP<0>, SDTCisFP<1>]>>;

//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;

def AArch64sqshrun: SDNode<"AArch64ISD::SQSHRUN", SDTFPTruncRoundOp>;
def AArch64sqrshrun: SDNode<"AArch64ISD::SQRSHRUN", SDTFPTruncRoundOp>;
def AArch64sqshrn: SDNode<"AArch64ISD::SQSHRN", SDTFPTruncRoundOp>;
def AArch64uqshrn: SDNode<"AArch64ISD::UQSHRN", SDTFPTruncRoundOp>;
def AArch64sqrshrn: SDNode<"AArch64ISD::SQRSHRN", SDTFPTruncRoundOp>;
def AArch64uqrshrn: SDNode<"AArch64ISD::UQRSHRN", SDTFPTruncRoundOp>;
// Vector immediate ops
def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
Expand Down Expand Up @@ -8902,15 +8905,15 @@ def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli", AArch64vsli>;
defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
int_aarch64_neon_sqrshrn>;
AArch64sqrshrn>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you make these PatFrags that match the node or the intrinsics? That should allow GISel to keep working and not take a step backwards.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added them back, but as you can see from the test, there is no GlobalIsel support for these nodes so not sure if it is okay to have these untested patterns there.

defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
int_aarch64_neon_sqrshrun>;
AArch64sqrshrun>;
defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
int_aarch64_neon_sqshrn>;
AArch64sqshrn>;
defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
int_aarch64_neon_sqshrun>;
AArch64sqshrun>;
defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri", AArch64vsri>;
defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
Expand All @@ -8921,10 +8924,10 @@ defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vashr node:$MHS, node:$RHS))>>;
defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
AArch64uqrshrn>;
defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
int_aarch64_neon_uqshrn>;
AArch64uqshrn>;
defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,
Expand Down
94 changes: 92 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-int-neon.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI


; CHECK-GI: warning: Instruction selection used fallback path for test_uqadd_s32
; CHECK-GI: warning: Instruction selection used fallback path for test_sqshrn_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqshrun_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqshrn_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrn_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_sqrshrun_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqrshrn_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqadd_s64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s32
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_uqsub_s64
Expand Down Expand Up @@ -113,6 +119,90 @@ entry:
ret i64 %res
}

define void @test_sqshrn_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqshrn_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: sqshrn s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_sqshrun_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqshrun_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: sqshrun s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_uqshrn_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_uqshrn_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: uqshrn s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_sqrshrn_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqrshrn_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: sqrshrn s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_sqrshrun_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_sqrshrun_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: sqrshrun s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define void @test_uqrshrn_s32(float noundef %a, ptr %dst) {
; CHECK-LABEL: test_uqrshrn_s32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs d0, s0
; CHECK-NEXT: uqrshrn s0, d0, #1
; CHECK-NEXT: str s0, [x0]
; CHECK-NEXT: ret
entry:
%cvt = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %a)
%res = tail call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %cvt, i32 1)
store i32 %res, ptr %dst, align 4
ret void
}

define i32 @test_sqadd_s32(float noundef %a) {
; CHECK-LABEL: test_sqadd_s32:
; CHECK: // %bb.0: // %entry
Expand Down Expand Up @@ -227,4 +317,4 @@ define i64 @test_sqdmulls_scalar(float %A){
%cvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %A)
%prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %cvt, i32 %cvt)
ret i64 %prod
}
}
Loading
Loading