Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Additional instructions available with z17.
if (Subtarget.hasVectorEnhancements3()) {
setOperationAction(ISD::ABS, MVT::i128, Legal);

setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},
MVT::i128, Legal);
}
}

Expand Down Expand Up @@ -492,6 +495,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
// and inverting the result as necessary.
setOperationAction(ISD::SETCC, VT, Custom);

setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VT,
Legal);
}
}

Expand Down
119 changes: 55 additions & 64 deletions llvm/lib/Target/SystemZ/SystemZInstrVector.td
Original file line number Diff line number Diff line change
Expand Up @@ -680,41 +680,41 @@ let Predicates = [FeatureVector] in {
let isCommutable = 1 in {
// Maximum.
def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>;
def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
def VMXB : BinaryVRRc<"vmxb", 0xE7FF, smax, v128b, v128b, 0>;
def VMXH : BinaryVRRc<"vmxh", 0xE7FF, smax, v128h, v128h, 1>;
def VMXF : BinaryVRRc<"vmxf", 0xE7FF, smax, v128f, v128f, 2>;
def VMXG : BinaryVRRc<"vmxg", 0xE7FF, smax, v128g, v128g, 3>;
let Predicates = [FeatureVectorEnhancements3] in
def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, null_frag, v128q, v128q, 4>;
def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, smax, v128q, v128q, 4>;

// Maximum logical.
def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, umax, v128b, v128b, 0>;
def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, umax, v128h, v128h, 1>;
def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, umax, v128f, v128f, 2>;
def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, umax, v128g, v128g, 3>;
let Predicates = [FeatureVectorEnhancements3] in
def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, null_frag, v128q, v128q, 4>;
def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, umax, v128q, v128q, 4>;
}

let isCommutable = 1 in {
// Minimum.
def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>;
def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
def VMNB : BinaryVRRc<"vmnb", 0xE7FE, smin, v128b, v128b, 0>;
def VMNH : BinaryVRRc<"vmnh", 0xE7FE, smin, v128h, v128h, 1>;
def VMNF : BinaryVRRc<"vmnf", 0xE7FE, smin, v128f, v128f, 2>;
def VMNG : BinaryVRRc<"vmng", 0xE7FE, smin, v128g, v128g, 3>;
let Predicates = [FeatureVectorEnhancements3] in
def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, null_frag, v128q, v128q, 4>;
def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, smin, v128q, v128q, 4>;

// Minimum logical.
def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, umin, v128b, v128b, 0>;
def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, umin, v128h, v128h, 1>;
def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, umin, v128f, v128f, 2>;
def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, umin, v128g, v128g, 3>;
let Predicates = [FeatureVectorEnhancements3] in
def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, null_frag, v128q, v128q, 4>;
def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, umin, v128q, v128q, 4>;
}

let isCommutable = 1 in {
Expand Down Expand Up @@ -1250,54 +1250,45 @@ defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>;
defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>;
defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>;

// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the
// signed or unsigned "set if greater than" comparison instruction and
// MIN and MAX are the associated minimum and maximum instructions.
multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph,
Instruction min, Instruction max> {
let Predicates = [FeatureVector] in {
def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)),
(max VR128:$x, VR128:$y)>;
def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)),
(min VR128:$x, VR128:$y)>;
def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
VR128:$x, VR128:$y)),
(min VR128:$x, VR128:$y)>;
def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
VR128:$y, VR128:$x)),
(max VR128:$x, VR128:$y)>;
}
// Instantiate packs/packu: recognize a saturating truncation and convert
// into the corresponding packs/packu instruction.
multiclass SignedSaturatingTruncate<ValueType input, ValueType output,
Instruction packs> {
def : Pat<
(output (z_pack
(smin (smax (input VR128:$a), ssat_trunc_min_vec), ssat_trunc_max_vec),
(smin (smax (input VR128:$b), ssat_trunc_min_vec), ssat_trunc_max_vec)
)),
(packs VR128:$a, VR128:$b)
>;

def : Pat<
(output (z_pack
(smax (smin (input VR128:$a), ssat_trunc_max_vec), ssat_trunc_min_vec),
(smax (smin (input VR128:$b), ssat_trunc_max_vec), ssat_trunc_min_vec)
)),
(packs VR128:$a, VR128:$b)
>;
}

// Signed min/max.
defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>;
defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;

let Predicates = [FeatureVectorEnhancements3] in {
def : Pat<(i128 (or (and VR128:$x, (z_vicmph VR128:$x, VR128:$y)),
(and VR128:$y, (not (z_vicmph VR128:$x, VR128:$y))))),
(VMXQ VR128:$x, VR128:$y)>;
def : Pat<(i128 (or (and VR128:$y, (z_vicmph VR128:$x, VR128:$y)),
(and VR128:$x, (not (z_vicmph VR128:$x, VR128:$y))))),
(VMNQ VR128:$x, VR128:$y)>;
defm : SignedSaturatingTruncate<v8i16, v16i8, VPKSH>;
defm : SignedSaturatingTruncate<v4i32, v8i16, VPKSF>;
defm : SignedSaturatingTruncate<v2i64, v4i32, VPKSG>;

multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output,
Instruction packu> {
def : Pat<
(output (z_pack
(umin (input VR128:$a), usat_trunc_max_vec),
(umin (input VR128:$b), usat_trunc_max_vec)
)),
(packu VR128:$a, VR128:$b)
>;
}

// Unsigned min/max.
defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;

let Predicates = [FeatureVectorEnhancements3] in {
def : Pat<(i128 (or (and VR128:$x, (z_vicmphl VR128:$x, VR128:$y)),
(and VR128:$y, (not (z_vicmphl VR128:$x, VR128:$y))))),
(VMXLQ VR128:$x, VR128:$y)>;
def : Pat<(i128 (or (and VR128:$y, (z_vicmphl VR128:$x, VR128:$y)),
(and VR128:$x, (not (z_vicmphl VR128:$x, VR128:$y))))),
(VMNLQ VR128:$x, VR128:$y)>;
}
defm : UnsignedSaturatingTruncate<v8i16, v16i8, VPKLSH>;
defm : UnsignedSaturatingTruncate<v4i32, v8i16, VPKLSF>;
defm : UnsignedSaturatingTruncate<v2i64, v4i32, VPKLSG>;

// Instantiate comparison patterns to recognize VACC/VSCBI for TYPE.
multiclass IntegerComputeCarryOrBorrow<ValueType type,
Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZOperators.td
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,31 @@ def vsplat_imm_eq_1 : PatFrag<(ops), (build_vector), [{
}]>;
def z_vzext1 : PatFrag<(ops node:$x), (and node:$x, vsplat_imm_eq_1)>;

// Vector constants for saturating truncation, containing the minimum and
// maximum value for the integer type that is half of the element width.
def ssat_trunc_min_vec: PatFrag<(ops), (build_vector), [{
APInt Imm;
EVT EltTy = N->getValueType(0).getVectorElementType();
unsigned SizeInBits = EltTy.getSizeInBits();
APInt min = APInt::getSignedMinValue(SizeInBits / 2).sext(SizeInBits);
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, min);
}]>;
def ssat_trunc_max_vec: PatFrag<(ops), (build_vector), [{
APInt Imm;
EVT EltTy = N->getValueType(0).getVectorElementType();
unsigned SizeInBits = EltTy.getSizeInBits();
APInt max = APInt::getSignedMaxValue(SizeInBits / 2).sext(SizeInBits);
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max);
}]>;

def usat_trunc_max_vec: PatFrag<(ops), (build_vector), [{
APInt Imm;
EVT EltTy = N->getValueType(0).getVectorElementType();
unsigned SizeInBits = EltTy.getSizeInBits();
APInt max = APInt::getMaxValue(SizeInBits / 2).zext(SizeInBits);
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max);
}]>;

// Signed "integer greater than zero" on vectors.
def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>;

Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/SystemZ/int-max-02.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
define i128 @f1(i128 %val1, i128 %val2) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vl %v1, 0(%r4), 3
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vmxq %v0, %v1, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
Expand Down Expand Up @@ -49,8 +49,8 @@ define i128 @f3(i128 %val1, i128 %val2) {
define i128 @f4(i128 %val1, i128 %val2) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vl %v1, 0(%r4), 3
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vmxq %v0, %v1, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
Expand All @@ -63,8 +63,8 @@ define i128 @f4(i128 %val1, i128 %val2) {
define i128 @f5(i128 %val1, i128 %val2) {
; CHECK-LABEL: f5:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vl %v1, 0(%r4), 3
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vmxlq %v0, %v1, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
Expand Down Expand Up @@ -105,8 +105,8 @@ define i128 @f7(i128 %val1, i128 %val2) {
define i128 @f8(i128 %val1, i128 %val2) {
; CHECK-LABEL: f8:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vl %v1, 0(%r4), 3
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vmxlq %v0, %v1, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/SystemZ/int-min-02.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
define i128 @f1(i128 %val1, i128 %val2) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vl %v1, 0(%r4), 3
; CHECK-NEXT: vmnq %v0, %v1, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
Expand Down Expand Up @@ -49,8 +49,8 @@ define i128 @f3(i128 %val1, i128 %val2) {
define i128 @f4(i128 %val1, i128 %val2) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vl %v1, 0(%r4), 3
; CHECK-NEXT: vmnq %v0, %v1, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
Expand All @@ -63,8 +63,8 @@ define i128 @f4(i128 %val1, i128 %val2) {
define i128 @f5(i128 %val1, i128 %val2) {
; CHECK-LABEL: f5:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vl %v1, 0(%r4), 3
; CHECK-NEXT: vmnlq %v0, %v1, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
Expand Down Expand Up @@ -105,8 +105,8 @@ define i128 @f7(i128 %val1, i128 %val2) {
define i128 @f8(i128 %val1, i128 %val2) {
; CHECK-LABEL: f8:
; CHECK: # %bb.0:
; CHECK-NEXT: vl %v0, 0(%r4), 3
; CHECK-NEXT: vl %v1, 0(%r3), 3
; CHECK-NEXT: vl %v0, 0(%r3), 3
; CHECK-NEXT: vl %v1, 0(%r4), 3
; CHECK-NEXT: vmnlq %v0, %v1, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
Expand Down
95 changes: 95 additions & 0 deletions llvm/test/CodeGen/SystemZ/saturating-truncation.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5

; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s

declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2

define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: i16_signed:
; CHECK: # %bb.0: # %bb2
; CHECK-NEXT: vpksh %v24, %v24, %v26
; CHECK-NEXT: br %r14
bb2:
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
%2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
%3 = trunc nsw <16 x i16> %2 to <16 x i8>
ret <16 x i8> %3
ret <16 x i8> %3
}

define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: i32_signed:
; CHECK: # %bb.0: # %bb2
; CHECK-NEXT: vpksf %v24, %v24, %v26
; CHECK-NEXT: br %r14
bb2:
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
%2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
%3 = trunc nsw <8 x i32> %2 to <8 x i16>
ret <8 x i16> %3
}

define <4 x i32> @i64_signed(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: i64_signed:
; CHECK: # %bb.0: # %bb2
; CHECK-NEXT: vpksg %v24, %v24, %v26
; CHECK-NEXT: br %r14
bb2:
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%1 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %0, <4 x i64> splat (i64 -2147483648))
%2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> splat (i64 2147483647))
%3 = trunc nsw <4 x i64> %2 to <4 x i32>
ret <4 x i32> %3
}

define <4 x i32> @i64_signed_flipped(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: i64_signed_flipped:
; CHECK: # %bb.0: # %bb2
; CHECK-NEXT: vpksg %v24, %v24, %v26
; CHECK-NEXT: br %r14
bb2:
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%1 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> splat (i64 2147483647), <4 x i64> %0)
%2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> splat (i64 -2147483648), <4 x i64> %1)
%3 = trunc nsw <4 x i64> %2 to <4 x i32>
ret <4 x i32> %3
}

define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: i16_unsigned:
; CHECK: # %bb.0: # %bb2
; CHECK-NEXT: vpklsh %v24, %v24, %v26
; CHECK-NEXT: br %r14
bb2:
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
%2 = trunc nuw <16 x i16> %1 to <16 x i8>
ret <16 x i8> %2
}

define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: i32_unsigned:
; CHECK: # %bb.0: # %bb2
; CHECK-NEXT: vpklsf %v24, %v24, %v26
; CHECK-NEXT: br %r14
bb2:
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
%2 = trunc nsw <8 x i32> %1 to <8 x i16>
ret <8 x i16> %2
}

define <4 x i32> @i64_unsigned(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: i64_unsigned:
; CHECK: # %bb.0: # %bb2
; CHECK-NEXT: vpklsg %v24, %v24, %v26
; CHECK-NEXT: br %r14
bb2:
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%1 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %0, <4 x i64> splat (i64 4294967295))
%2 = trunc nuw <4 x i64> %1 to <4 x i32>
ret <4 x i32> %2
}
Loading