Skip to content

Commit 5586572

Browse files
authored
s390x: pattern match saturated truncation (#155377)
Simplify min/max instruction matching by making the related SelectionDAG operations legal. Add patterns to match (signed and unsigned) saturated truncation based on open-coded min/max patterns. Fixes #153655
1 parent 2677728 commit 5586572

File tree

6 files changed

+197
-80
lines changed

6 files changed

+197
-80
lines changed

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
287287
// Additional instructions available with z17.
288288
if (Subtarget.hasVectorEnhancements3()) {
289289
setOperationAction(ISD::ABS, MVT::i128, Legal);
290+
291+
setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX},
292+
MVT::i128, Legal);
290293
}
291294
}
292295

@@ -492,6 +495,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
492495
// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
493496
// and inverting the result as necessary.
494497
setOperationAction(ISD::SETCC, VT, Custom);
498+
499+
setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VT,
500+
Legal);
495501
}
496502
}
497503

llvm/lib/Target/SystemZ/SystemZInstrVector.td

Lines changed: 55 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -680,41 +680,41 @@ let Predicates = [FeatureVector] in {
680680
let isCommutable = 1 in {
681681
// Maximum.
682682
def VMX : BinaryVRRcGeneric<"vmx", 0xE7FF>;
683-
def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
684-
def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
685-
def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
686-
def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
683+
def VMXB : BinaryVRRc<"vmxb", 0xE7FF, smax, v128b, v128b, 0>;
684+
def VMXH : BinaryVRRc<"vmxh", 0xE7FF, smax, v128h, v128h, 1>;
685+
def VMXF : BinaryVRRc<"vmxf", 0xE7FF, smax, v128f, v128f, 2>;
686+
def VMXG : BinaryVRRc<"vmxg", 0xE7FF, smax, v128g, v128g, 3>;
687687
let Predicates = [FeatureVectorEnhancements3] in
688-
def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, null_frag, v128q, v128q, 4>;
688+
def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, smax, v128q, v128q, 4>;
689689

690690
// Maximum logical.
691691
def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
692-
def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
693-
def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
694-
def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
695-
def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
692+
def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, umax, v128b, v128b, 0>;
693+
def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, umax, v128h, v128h, 1>;
694+
def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, umax, v128f, v128f, 2>;
695+
def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, umax, v128g, v128g, 3>;
696696
let Predicates = [FeatureVectorEnhancements3] in
697-
def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, null_frag, v128q, v128q, 4>;
697+
def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, umax, v128q, v128q, 4>;
698698
}
699699

700700
let isCommutable = 1 in {
701701
// Minimum.
702702
def VMN : BinaryVRRcGeneric<"vmn", 0xE7FE>;
703-
def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
704-
def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
705-
def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
706-
def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
703+
def VMNB : BinaryVRRc<"vmnb", 0xE7FE, smin, v128b, v128b, 0>;
704+
def VMNH : BinaryVRRc<"vmnh", 0xE7FE, smin, v128h, v128h, 1>;
705+
def VMNF : BinaryVRRc<"vmnf", 0xE7FE, smin, v128f, v128f, 2>;
706+
def VMNG : BinaryVRRc<"vmng", 0xE7FE, smin, v128g, v128g, 3>;
707707
let Predicates = [FeatureVectorEnhancements3] in
708-
def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, null_frag, v128q, v128q, 4>;
708+
def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, smin, v128q, v128q, 4>;
709709

710710
// Minimum logical.
711711
def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
712-
def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
713-
def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
714-
def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
715-
def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
712+
def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, umin, v128b, v128b, 0>;
713+
def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, umin, v128h, v128h, 1>;
714+
def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, umin, v128f, v128f, 2>;
715+
def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, umin, v128g, v128g, 3>;
716716
let Predicates = [FeatureVectorEnhancements3] in
717-
def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, null_frag, v128q, v128q, 4>;
717+
def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, umin, v128q, v128q, 4>;
718718
}
719719

720720
let isCommutable = 1 in {
@@ -1250,54 +1250,45 @@ defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>;
12501250
defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>;
12511251
defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>;
12521252

1253-
// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the
1254-
// signed or unsigned "set if greater than" comparison instruction and
1255-
// MIN and MAX are the associated minimum and maximum instructions.
1256-
multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph,
1257-
Instruction min, Instruction max> {
1258-
let Predicates = [FeatureVector] in {
1259-
def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)),
1260-
(max VR128:$x, VR128:$y)>;
1261-
def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)),
1262-
(min VR128:$x, VR128:$y)>;
1263-
def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
1264-
VR128:$x, VR128:$y)),
1265-
(min VR128:$x, VR128:$y)>;
1266-
def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
1267-
VR128:$y, VR128:$x)),
1268-
(max VR128:$x, VR128:$y)>;
1269-
}
1253+
// Instantiate packs/packu: recognize a saturating truncation and convert
1254+
// into the corresponding packs/packu instruction.
1255+
multiclass SignedSaturatingTruncate<ValueType input, ValueType output,
1256+
Instruction packs> {
1257+
def : Pat<
1258+
(output (z_pack
1259+
(smin (smax (input VR128:$a), ssat_trunc_min_vec), ssat_trunc_max_vec),
1260+
(smin (smax (input VR128:$b), ssat_trunc_min_vec), ssat_trunc_max_vec)
1261+
)),
1262+
(packs VR128:$a, VR128:$b)
1263+
>;
1264+
1265+
def : Pat<
1266+
(output (z_pack
1267+
(smax (smin (input VR128:$a), ssat_trunc_max_vec), ssat_trunc_min_vec),
1268+
(smax (smin (input VR128:$b), ssat_trunc_max_vec), ssat_trunc_min_vec)
1269+
)),
1270+
(packs VR128:$a, VR128:$b)
1271+
>;
12701272
}
12711273

1272-
// Signed min/max.
1273-
defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>;
1274-
defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
1275-
defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
1276-
defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;
1277-
1278-
let Predicates = [FeatureVectorEnhancements3] in {
1279-
def : Pat<(i128 (or (and VR128:$x, (z_vicmph VR128:$x, VR128:$y)),
1280-
(and VR128:$y, (not (z_vicmph VR128:$x, VR128:$y))))),
1281-
(VMXQ VR128:$x, VR128:$y)>;
1282-
def : Pat<(i128 (or (and VR128:$y, (z_vicmph VR128:$x, VR128:$y)),
1283-
(and VR128:$x, (not (z_vicmph VR128:$x, VR128:$y))))),
1284-
(VMNQ VR128:$x, VR128:$y)>;
1274+
defm : SignedSaturatingTruncate<v8i16, v16i8, VPKSH>;
1275+
defm : SignedSaturatingTruncate<v4i32, v8i16, VPKSF>;
1276+
defm : SignedSaturatingTruncate<v2i64, v4i32, VPKSG>;
1277+
1278+
multiclass UnsignedSaturatingTruncate<ValueType input, ValueType output,
1279+
Instruction packu> {
1280+
def : Pat<
1281+
(output (z_pack
1282+
(umin (input VR128:$a), usat_trunc_max_vec),
1283+
(umin (input VR128:$b), usat_trunc_max_vec)
1284+
)),
1285+
(packu VR128:$a, VR128:$b)
1286+
>;
12851287
}
12861288

1287-
// Unsigned min/max.
1288-
defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
1289-
defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
1290-
defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
1291-
defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
1292-
1293-
let Predicates = [FeatureVectorEnhancements3] in {
1294-
def : Pat<(i128 (or (and VR128:$x, (z_vicmphl VR128:$x, VR128:$y)),
1295-
(and VR128:$y, (not (z_vicmphl VR128:$x, VR128:$y))))),
1296-
(VMXLQ VR128:$x, VR128:$y)>;
1297-
def : Pat<(i128 (or (and VR128:$y, (z_vicmphl VR128:$x, VR128:$y)),
1298-
(and VR128:$x, (not (z_vicmphl VR128:$x, VR128:$y))))),
1299-
(VMNLQ VR128:$x, VR128:$y)>;
1300-
}
1289+
defm : UnsignedSaturatingTruncate<v8i16, v16i8, VPKLSH>;
1290+
defm : UnsignedSaturatingTruncate<v4i32, v8i16, VPKLSF>;
1291+
defm : UnsignedSaturatingTruncate<v2i64, v4i32, VPKLSG>;
13011292

13021293
// Instantiate comparison patterns to recognize VACC/VSCBI for TYPE.
13031294
multiclass IntegerComputeCarryOrBorrow<ValueType type,

llvm/lib/Target/SystemZ/SystemZOperators.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,31 @@ def vsplat_imm_eq_1 : PatFrag<(ops), (build_vector), [{
10671067
}]>;
10681068
def z_vzext1 : PatFrag<(ops node:$x), (and node:$x, vsplat_imm_eq_1)>;
10691069

1070+
// Vector constants for saturating truncation, containing the minimum and
1071+
// maximum value for the integer type that is half of the element width.
1072+
def ssat_trunc_min_vec: PatFrag<(ops), (build_vector), [{
1073+
APInt Imm;
1074+
EVT EltTy = N->getValueType(0).getVectorElementType();
1075+
unsigned SizeInBits = EltTy.getSizeInBits();
1076+
APInt min = APInt::getSignedMinValue(SizeInBits / 2).sext(SizeInBits);
1077+
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, min);
1078+
}]>;
1079+
def ssat_trunc_max_vec: PatFrag<(ops), (build_vector), [{
1080+
APInt Imm;
1081+
EVT EltTy = N->getValueType(0).getVectorElementType();
1082+
unsigned SizeInBits = EltTy.getSizeInBits();
1083+
APInt max = APInt::getSignedMaxValue(SizeInBits / 2).sext(SizeInBits);
1084+
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max);
1085+
}]>;
1086+
1087+
def usat_trunc_max_vec: PatFrag<(ops), (build_vector), [{
1088+
APInt Imm;
1089+
EVT EltTy = N->getValueType(0).getVectorElementType();
1090+
unsigned SizeInBits = EltTy.getSizeInBits();
1091+
APInt max = APInt::getMaxValue(SizeInBits / 2).zext(SizeInBits);
1092+
return ISD::isConstantSplatVector(N, Imm) && APInt::isSameValue(Imm, max);
1093+
}]>;
1094+
10701095
// Signed "integer greater than zero" on vectors.
10711096
def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>;
10721097

llvm/test/CodeGen/SystemZ/int-max-02.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
define i128 @f1(i128 %val1, i128 %val2) {
88
; CHECK-LABEL: f1:
99
; CHECK: # %bb.0:
10-
; CHECK-NEXT: vl %v0, 0(%r3), 3
11-
; CHECK-NEXT: vl %v1, 0(%r4), 3
10+
; CHECK-NEXT: vl %v0, 0(%r4), 3
11+
; CHECK-NEXT: vl %v1, 0(%r3), 3
1212
; CHECK-NEXT: vmxq %v0, %v1, %v0
1313
; CHECK-NEXT: vst %v0, 0(%r2), 3
1414
; CHECK-NEXT: br %r14
@@ -49,8 +49,8 @@ define i128 @f3(i128 %val1, i128 %val2) {
4949
define i128 @f4(i128 %val1, i128 %val2) {
5050
; CHECK-LABEL: f4:
5151
; CHECK: # %bb.0:
52-
; CHECK-NEXT: vl %v0, 0(%r3), 3
53-
; CHECK-NEXT: vl %v1, 0(%r4), 3
52+
; CHECK-NEXT: vl %v0, 0(%r4), 3
53+
; CHECK-NEXT: vl %v1, 0(%r3), 3
5454
; CHECK-NEXT: vmxq %v0, %v1, %v0
5555
; CHECK-NEXT: vst %v0, 0(%r2), 3
5656
; CHECK-NEXT: br %r14
@@ -63,8 +63,8 @@ define i128 @f4(i128 %val1, i128 %val2) {
6363
define i128 @f5(i128 %val1, i128 %val2) {
6464
; CHECK-LABEL: f5:
6565
; CHECK: # %bb.0:
66-
; CHECK-NEXT: vl %v0, 0(%r3), 3
67-
; CHECK-NEXT: vl %v1, 0(%r4), 3
66+
; CHECK-NEXT: vl %v0, 0(%r4), 3
67+
; CHECK-NEXT: vl %v1, 0(%r3), 3
6868
; CHECK-NEXT: vmxlq %v0, %v1, %v0
6969
; CHECK-NEXT: vst %v0, 0(%r2), 3
7070
; CHECK-NEXT: br %r14
@@ -105,8 +105,8 @@ define i128 @f7(i128 %val1, i128 %val2) {
105105
define i128 @f8(i128 %val1, i128 %val2) {
106106
; CHECK-LABEL: f8:
107107
; CHECK: # %bb.0:
108-
; CHECK-NEXT: vl %v0, 0(%r3), 3
109-
; CHECK-NEXT: vl %v1, 0(%r4), 3
108+
; CHECK-NEXT: vl %v0, 0(%r4), 3
109+
; CHECK-NEXT: vl %v1, 0(%r3), 3
110110
; CHECK-NEXT: vmxlq %v0, %v1, %v0
111111
; CHECK-NEXT: vst %v0, 0(%r2), 3
112112
; CHECK-NEXT: br %r14

llvm/test/CodeGen/SystemZ/int-min-02.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
define i128 @f1(i128 %val1, i128 %val2) {
88
; CHECK-LABEL: f1:
99
; CHECK: # %bb.0:
10-
; CHECK-NEXT: vl %v0, 0(%r4), 3
11-
; CHECK-NEXT: vl %v1, 0(%r3), 3
10+
; CHECK-NEXT: vl %v0, 0(%r3), 3
11+
; CHECK-NEXT: vl %v1, 0(%r4), 3
1212
; CHECK-NEXT: vmnq %v0, %v1, %v0
1313
; CHECK-NEXT: vst %v0, 0(%r2), 3
1414
; CHECK-NEXT: br %r14
@@ -49,8 +49,8 @@ define i128 @f3(i128 %val1, i128 %val2) {
4949
define i128 @f4(i128 %val1, i128 %val2) {
5050
; CHECK-LABEL: f4:
5151
; CHECK: # %bb.0:
52-
; CHECK-NEXT: vl %v0, 0(%r4), 3
53-
; CHECK-NEXT: vl %v1, 0(%r3), 3
52+
; CHECK-NEXT: vl %v0, 0(%r3), 3
53+
; CHECK-NEXT: vl %v1, 0(%r4), 3
5454
; CHECK-NEXT: vmnq %v0, %v1, %v0
5555
; CHECK-NEXT: vst %v0, 0(%r2), 3
5656
; CHECK-NEXT: br %r14
@@ -63,8 +63,8 @@ define i128 @f4(i128 %val1, i128 %val2) {
6363
define i128 @f5(i128 %val1, i128 %val2) {
6464
; CHECK-LABEL: f5:
6565
; CHECK: # %bb.0:
66-
; CHECK-NEXT: vl %v0, 0(%r4), 3
67-
; CHECK-NEXT: vl %v1, 0(%r3), 3
66+
; CHECK-NEXT: vl %v0, 0(%r3), 3
67+
; CHECK-NEXT: vl %v1, 0(%r4), 3
6868
; CHECK-NEXT: vmnlq %v0, %v1, %v0
6969
; CHECK-NEXT: vst %v0, 0(%r2), 3
7070
; CHECK-NEXT: br %r14
@@ -105,8 +105,8 @@ define i128 @f7(i128 %val1, i128 %val2) {
105105
define i128 @f8(i128 %val1, i128 %val2) {
106106
; CHECK-LABEL: f8:
107107
; CHECK: # %bb.0:
108-
; CHECK-NEXT: vl %v0, 0(%r4), 3
109-
; CHECK-NEXT: vl %v1, 0(%r3), 3
108+
; CHECK-NEXT: vl %v0, 0(%r3), 3
109+
; CHECK-NEXT: vl %v1, 0(%r4), 3
110110
; CHECK-NEXT: vmnlq %v0, %v1, %v0
111111
; CHECK-NEXT: vst %v0, 0(%r2), 3
112112
; CHECK-NEXT: br %r14
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z17 | FileCheck %s
4+
5+
declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
6+
declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2
7+
8+
define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
9+
; CHECK-LABEL: i16_signed:
10+
; CHECK: # %bb.0: # %bb2
11+
; CHECK-NEXT: vpksh %v24, %v24, %v26
12+
; CHECK-NEXT: br %r14
13+
bb2:
14+
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15+
%1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
16+
%2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
17+
%3 = trunc nsw <16 x i16> %2 to <16 x i8>
18+
ret <16 x i8> %3
19+
ret <16 x i8> %3
20+
}
21+
22+
define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
23+
; CHECK-LABEL: i32_signed:
24+
; CHECK: # %bb.0: # %bb2
25+
; CHECK-NEXT: vpksf %v24, %v24, %v26
26+
; CHECK-NEXT: br %r14
27+
bb2:
28+
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
29+
%1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
30+
%2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
31+
%3 = trunc nsw <8 x i32> %2 to <8 x i16>
32+
ret <8 x i16> %3
33+
}
34+
35+
define <4 x i32> @i64_signed(<2 x i64> %a, <2 x i64> %b) {
36+
; CHECK-LABEL: i64_signed:
37+
; CHECK: # %bb.0: # %bb2
38+
; CHECK-NEXT: vpksg %v24, %v24, %v26
39+
; CHECK-NEXT: br %r14
40+
bb2:
41+
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
42+
%1 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> %0, <4 x i64> splat (i64 -2147483648))
43+
%2 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %1, <4 x i64> splat (i64 2147483647))
44+
%3 = trunc nsw <4 x i64> %2 to <4 x i32>
45+
ret <4 x i32> %3
46+
}
47+
48+
define <4 x i32> @i64_signed_flipped(<2 x i64> %a, <2 x i64> %b) {
49+
; CHECK-LABEL: i64_signed_flipped:
50+
; CHECK: # %bb.0: # %bb2
51+
; CHECK-NEXT: vpksg %v24, %v24, %v26
52+
; CHECK-NEXT: br %r14
53+
bb2:
54+
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
55+
%1 = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> splat (i64 2147483647), <4 x i64> %0)
56+
%2 = tail call <4 x i64> @llvm.smax.v4i64(<4 x i64> splat (i64 -2147483648), <4 x i64> %1)
57+
%3 = trunc nsw <4 x i64> %2 to <4 x i32>
58+
ret <4 x i32> %3
59+
}
60+
61+
define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
62+
; CHECK-LABEL: i16_unsigned:
63+
; CHECK: # %bb.0: # %bb2
64+
; CHECK-NEXT: vpklsh %v24, %v24, %v26
65+
; CHECK-NEXT: br %r14
66+
bb2:
67+
%0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
68+
%1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
69+
%2 = trunc nuw <16 x i16> %1 to <16 x i8>
70+
ret <16 x i8> %2
71+
}
72+
73+
define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
74+
; CHECK-LABEL: i32_unsigned:
75+
; CHECK: # %bb.0: # %bb2
76+
; CHECK-NEXT: vpklsf %v24, %v24, %v26
77+
; CHECK-NEXT: br %r14
78+
bb2:
79+
%0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
80+
%1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
81+
%2 = trunc nsw <8 x i32> %1 to <8 x i16>
82+
ret <8 x i16> %2
83+
}
84+
85+
define <4 x i32> @i64_unsigned(<2 x i64> %a, <2 x i64> %b) {
86+
; CHECK-LABEL: i64_unsigned:
87+
; CHECK: # %bb.0: # %bb2
88+
; CHECK-NEXT: vpklsg %v24, %v24, %v26
89+
; CHECK-NEXT: br %r14
90+
bb2:
91+
%0 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
92+
%1 = tail call <4 x i64> @llvm.umin.v4i64(<4 x i64> %0, <4 x i64> splat (i64 4294967295))
93+
%2 = trunc nuw <4 x i64> %1 to <4 x i32>
94+
ret <4 x i32> %2
95+
}

0 commit comments

Comments
 (0)