Skip to content

Commit

Permalink
[AArch64][GlobalISel] Prefer to use Vector Truncate (#105692)
Browse files Browse the repository at this point in the history
Tries to combine scalarised truncates into vector truncate operations

EXAMPLE:
`%a(i32), %b(i32) = G_UNMERGE %src(<2 x i32>)`
`%T_a(i16) = G_TRUNC %a(i32)`
`%T_b(i16) = G_TRUNC %b(i32)`
`%Imp(i16) = G_IMPLICIT_DEF(i16)`
`%dst(v8i16) = G_MERGE_VALUES %T_a(i16), %T_b(i16), %Imp(i16),
%Imp(i16)`

===>
`%Imp(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)`
`%Mid(<4 x s16>) = G_CONCAT_VECTORS %src(<2 x i32>), %Imp(<2 x i32>)`
`%dst(<4 x s16>) = G_TRUNC %Mid(<4 x s16>)`
  • Loading branch information
chuongg3 authored Sep 23, 2024
1 parent f4eeae1 commit b0dc7b5
Show file tree
Hide file tree
Showing 15 changed files with 178 additions and 238 deletions.
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,9 @@ class CombinerHelper {
bool matchRotateOutOfRange(MachineInstr &MI);
void applyRotateOutOfRange(MachineInstr &MI);

bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo);
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo);

/// \returns true if a G_ICMP instruction \p MI can be replaced with a true
/// or false constant based off of KnownBits information.
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo);
Expand Down
10 changes: 9 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1505,6 +1505,13 @@ def insert_vector_elt_oob : GICombineRule<
[{ return Helper.matchInsertVectorElementOOB(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

// Combine v8i8 (buildvector i8 (trunc(unmerge)), i8 (trunc), i8 (trunc), i8 (trunc), undef, undef, undef, undef)
def combine_use_vector_truncate : GICombineRule<
(defs root:$root, register_matchinfo:$matchinfo),
(match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
[{ return Helper.matchUseVectorTruncate(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyUseVectorTruncate(*${root}, ${matchinfo}); }])>;

def add_of_vscale : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_VSCALE $left, $imm1),
Expand Down Expand Up @@ -1912,7 +1919,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
sub_add_reg, select_to_minmax,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
combine_concat_vector, match_addos,
sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat,
combine_use_vector_truncate]>;

// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
Expand Down
106 changes: 106 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3320,6 +3320,112 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
isConstTrueVal(TLI, Cst, IsVector, IsFP);
}

// This combine tries to reduce the number of scalarised G_TRUNC instructions by
// using vector truncates instead
//
// EXAMPLE:
// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
// %T_a(i16) = G_TRUNC %a(i32)
// %T_b(i16) = G_TRUNC %b(i32)
// %Undef(i16) = G_IMPLICIT_DEF(i16)
// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
//
// ===>
// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
//
// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
bool CombinerHelper::matchUseVectorTruncate(MachineInstr &MI,
Register &MatchInfo) {
auto BuildMI = cast<GBuildVector>(&MI);
unsigned NumOperands = BuildMI->getNumSources();
LLT DstTy = MRI.getType(BuildMI->getReg(0));

// Check the G_BUILD_VECTOR sources
unsigned I;
MachineInstr *UnmergeMI = nullptr;

// Check all source TRUNCs come from the same UNMERGE instruction
for (I = 0; I < NumOperands; ++I) {
auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
auto SrcMIOpc = SrcMI->getOpcode();

// Check if the G_TRUNC instructions all come from the same MI
if (SrcMIOpc == TargetOpcode::G_TRUNC) {
if (!UnmergeMI) {
UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
return false;
} else {
auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
if (UnmergeMI != UnmergeSrcMI)
return false;
}
} else {
break;
}
}
if (I < 2)
return false;

// Check the remaining source elements are only G_IMPLICIT_DEF
for (; I < NumOperands; ++I) {
auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
auto SrcMIOpc = SrcMI->getOpcode();

if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
return false;
}

// Check the size of unmerge source
MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
LLT UnmergeSrcTy = MRI.getType(MatchInfo);
if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
return false;

// Only generate legal instructions post-legalizer
if (!IsPreLegalize) {
LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());

if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
!isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
return false;

if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
return false;
}

return true;
}

void CombinerHelper::applyUseVectorTruncate(MachineInstr &MI,
Register &MatchInfo) {
Register MidReg;
auto BuildMI = cast<GBuildVector>(&MI);
Register DstReg = BuildMI->getReg(0);
LLT DstTy = MRI.getType(DstReg);
LLT UnmergeSrcTy = MRI.getType(MatchInfo);
unsigned DstTyNumElt = DstTy.getNumElements();
unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();

// No need to pad vector if only G_TRUNC is needed
if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
MidReg = MatchInfo;
} else {
Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
SmallVector<Register> ConcatRegs = {MatchInfo};
for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
ConcatRegs.push_back(UndefReg);

auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
}

Builder.buildTrunc(DstReg, MidReg);
MI.eraseFromParent();
}

bool CombinerHelper::matchNotCmp(MachineInstr &MI,
SmallVectorImpl<Register> &RegsToNegate) {
assert(MI.getOpcode() == TargetOpcode::G_XOR);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -330,5 +330,5 @@ def AArch64PostLegalizerCombiner
select_to_minmax, or_to_bsp, combine_concat_vector,
commute_constant_to_rhs,
push_freeze_to_prevent_poison_from_propagating,
combine_mul_cmlt]> {
combine_mul_cmlt, combine_use_vector_truncate]> {
}
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
.legalFor({p0, s8, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
.legalFor({v16s8, v8s16, v4s32, v2s64, v2p0, v8s8, v4s16, v2s32, v4s8,
v2s16, v2s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s8, s64)
.moreElementsToNextPow2(0)
Expand Down
24 changes: 6 additions & 18 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
Original file line number Diff line number Diff line change
Expand Up @@ -159,25 +159,13 @@ body: |
; CHECK-LABEL: name: test_freeze_v3s8
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[UV4]]
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s8>) = G_FREEZE [[DEF]]
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[FREEZE]](<4 x s8>)
; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: %ext0:_(s32) = G_AND [[ANYEXT1]], [[C]]
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
; CHECK-NEXT: %ext1:_(s32) = G_AND [[ANYEXT2]], [[C]]
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
; CHECK-NEXT: %ext2:_(s32) = G_AND [[ANYEXT3]], [[C]]
; CHECK-NEXT: %ext0:_(s32) = G_ZEXT [[UV]](s8)
; CHECK-NEXT: %ext1:_(s32) = G_ZEXT [[UV1]](s8)
; CHECK-NEXT: %ext2:_(s32) = G_ZEXT [[UV2]](s8)
; CHECK-NEXT: %res:_(<4 x s32>) = G_BUILD_VECTOR %ext0(s32), %ext1(s32), %ext2(s32), %undef(s32)
; CHECK-NEXT: $q0 = COPY %res(<4 x s32>)
%x:_(<3 x s8>) = G_IMPLICIT_DEF
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,10 @@ body: |
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16)
; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16)
; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s16)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF2]](<4 x s8>)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>)
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>)
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/AArch64/bswap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,7 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){
;
; CHECK-GI-LABEL: bswap_v2i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov w8, v0.s[1]
; CHECK-GI-NEXT: mov v0.h[1], w8
; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-NEXT: rev16 v0.8b, v0.8b
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
Expand Down
16 changes: 5 additions & 11 deletions llvm/test/CodeGen/AArch64/concat-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,12 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
;
; CHECK-GI-LABEL: concat_v8s16_v2s16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr h1, [x0]
; CHECK-GI-NEXT: ldr h2, [x0, #2]
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
; CHECK-GI-NEXT: xtn v2.4h, v0.4s
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
; CHECK-GI-NEXT: fmov w8, s1
; CHECK-GI-NEXT: ldr h0, [x0]
; CHECK-GI-NEXT: ldr h1, [x0, #2]
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: mov v0.s[0], w8
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: mov v0.s[2], w8
; CHECK-GI-NEXT: mov v0.s[3], w8
Expand All @@ -209,10 +206,7 @@ define <16 x i8> @concat_v16s8_v4s8(ptr %ptr) {
;
; CHECK-GI-LABEL: concat_v16s8_v4s8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: dup v0.8h, w8
; CHECK-GI-NEXT: xtn v1.8b, v0.8h
; CHECK-GI-NEXT: ldr s0, [x0]
; CHECK-GI-NEXT: fmov w8, s1
; CHECK-GI-NEXT: mov v0.s[1], w8
; CHECK-GI-NEXT: mov v0.s[2], w8
; CHECK-GI-NEXT: mov v0.s[3], w8
Expand Down
22 changes: 4 additions & 18 deletions llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,10 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
; CHECK-SD-LABEL: interleave2_v4f16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: zip1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: interleave2_v4f16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: dup v2.4s, w8
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: xtn v0.4h, v2.4s
; CHECK-GI-NEXT: mov v1.s[0], w8
; CHECK-GI-NEXT: mov v2.s[0], w9
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: mov v1.s[1], w8
; CHECK-GI-NEXT: mov v2.s[1], w8
; CHECK-GI-NEXT: zip1 v0.4h, v1.4h, v2.4h
; CHECK-GI-NEXT: ret
; CHECK-LABEL: interleave2_v4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: zip1 v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%retval = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %vec0, <2 x half> %vec1)
ret <4 x half> %retval
}
Expand Down
52 changes: 12 additions & 40 deletions llvm/test/CodeGen/AArch64/fptoi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3172,42 +3172,22 @@ entry:
}

define <3 x i16> @fptos_v3f32_v3i16(<3 x float> %a) {
; CHECK-SD-LABEL: fptos_v3f32_v3i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s
; CHECK-SD-NEXT: xtn v0.4h, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptos_v3f32_v3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s
; CHECK-GI-NEXT: mov w8, v0.s[1]
; CHECK-GI-NEXT: mov w9, v0.s[2]
; CHECK-GI-NEXT: mov v0.h[1], w8
; CHECK-GI-NEXT: mov v0.h[2], w9
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
; CHECK-LABEL: fptos_v3f32_v3i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c = fptosi <3 x float> %a to <3 x i16>
ret <3 x i16> %c
}

define <3 x i16> @fptou_v3f32_v3i16(<3 x float> %a) {
; CHECK-SD-LABEL: fptou_v3f32_v3i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-SD-NEXT: xtn v0.4h, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v3f32_v3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-GI-NEXT: mov w8, v0.s[1]
; CHECK-GI-NEXT: mov w9, v0.s[2]
; CHECK-GI-NEXT: mov v0.h[1], w8
; CHECK-GI-NEXT: mov v0.h[2], w9
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
; CHECK-LABEL: fptou_v3f32_v3i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c = fptoui <3 x float> %a to <3 x i16>
ret <3 x i16> %c
Expand Down Expand Up @@ -6077,11 +6057,7 @@ define <3 x i16> @fptos_v3f16_v3i16(<3 x half> %a) {
; CHECK-GI-NOFP16: // %bb.0: // %entry
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s
; CHECK-GI-NOFP16-NEXT: mov w8, v0.s[1]
; CHECK-GI-NOFP16-NEXT: mov w9, v0.s[2]
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w8
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w9
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i16:
Expand Down Expand Up @@ -6110,11 +6086,7 @@ define <3 x i16> @fptou_v3f16_v3i16(<3 x half> %a) {
; CHECK-GI-NOFP16: // %bb.0: // %entry
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s
; CHECK-GI-NOFP16-NEXT: mov w8, v0.s[1]
; CHECK-GI-NOFP16-NEXT: mov w9, v0.s[2]
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w8
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w9
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i16:
Expand Down
11 changes: 3 additions & 8 deletions llvm/test/CodeGen/AArch64/itofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7450,9 +7450,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
;
; CHECK-GI-FP16-LABEL: stofp_v2i16_v2f16:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
Expand Down Expand Up @@ -7493,9 +7491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
;
; CHECK-GI-FP16-LABEL: utofp_v2i16_v2f16:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
Expand Down Expand Up @@ -8059,8 +8055,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
Expand Down
Loading

0 comments on commit b0dc7b5

Please sign in to comment.