11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F,X86-AVX512F
33; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F,X64-AVX512F
4- ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
5- ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
4+ ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW,X86-AVX512BW
5+ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW,X64-AVX512BW
66
77define <16 x i32 > @shuffle_v8i64 (<16 x i32 > %t0 , <16 x i32 > %t1 ) {
8- ; AVX512F-LABEL: shuffle_v8i64:
9- ; AVX512F: # %bb.0: # %entry
10- ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm2
11- ; AVX512F-NEXT: vpsubd %zmm1, %zmm0, %zmm0
12- ; AVX512F-NEXT: movb $-86, %al
13- ; AVX512F-NEXT: kmovw %eax, %k1
14- ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
15- ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
16- ; AVX512F-NEXT: ret{{[l|q]}}
17- ;
18- ; AVX512BW-LABEL: shuffle_v8i64:
19- ; AVX512BW: # %bb.0: # %entry
20- ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm2
21- ; AVX512BW-NEXT: vpsubd %zmm1, %zmm0, %zmm0
22- ; AVX512BW-NEXT: movb $-86, %al
23- ; AVX512BW-NEXT: kmovd %eax, %k1
24- ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
25- ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
26- ; AVX512BW-NEXT: ret{{[l|q]}}
8+ ; CHECK-LABEL: shuffle_v8i64:
9+ ; CHECK: # %bb.0: # %entry
10+ ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2
11+ ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
12+ ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm2[0,1],zmm0[2,3],zmm2[4,5],zmm0[6,7],zmm2[8,9],zmm0[10,11],zmm2[12,13],zmm0[14,15]
13+ ; CHECK-NEXT: ret{{[l|q]}}
2714entry:
2815 %t2 = add nsw <16 x i32 > %t0 , %t1
2916 %t3 = sub nsw <16 x i32 > %t0 , %t1
@@ -96,15 +83,24 @@ define <64 x i8> @addb_selectw_64xi8(<64 x i8> %t0, <64 x i8> %t1) {
9683; X64-AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
9784; X64-AVX512F-NEXT: retq
9885;
99- ; AVX512BW-LABEL: addb_selectw_64xi8:
100- ; AVX512BW: # %bb.0:
101- ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm2
102- ; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
103- ; AVX512BW-NEXT: movl $1, %eax
104- ; AVX512BW-NEXT: kmovd %eax, %k1
105- ; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1}
106- ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
107- ; AVX512BW-NEXT: ret{{[l|q]}}
86+ ; X86-AVX512BW-LABEL: addb_selectw_64xi8:
87+ ; X86-AVX512BW: # %bb.0:
88+ ; X86-AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm2
89+ ; X86-AVX512BW-NEXT: movl $3, %eax
90+ ; X86-AVX512BW-NEXT: kmovd %eax, %k0
91+ ; X86-AVX512BW-NEXT: kmovd %k0, %k1
92+ ; X86-AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1}
93+ ; X86-AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
94+ ; X86-AVX512BW-NEXT: retl
95+ ;
96+ ; X64-AVX512BW-LABEL: addb_selectw_64xi8:
97+ ; X64-AVX512BW: # %bb.0:
98+ ; X64-AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm2
99+ ; X64-AVX512BW-NEXT: movl $3, %eax
100+ ; X64-AVX512BW-NEXT: kmovq %rax, %k1
101+ ; X64-AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm2 {%k1}
102+ ; X64-AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
103+ ; X64-AVX512BW-NEXT: retq
108104 %t2 = add nsw <64 x i8 > %t0 , %t1
109105 %t3 = sub nsw <64 x i8 > %t0 , %t1
110106 %t4 = shufflevector <64 x i8 > %t2 , <64 x i8 > %t3 , <64 x i32 > <i32 64 , i32 65 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 , i32 32 , i32 33 , i32 34 , i32 35 , i32 36 , i32 37 , i32 38 , i32 39 , i32 40 , i32 41 , i32 42 , i32 43 , i32 44 , i32 45 , i32 46 , i32 47 , i32 48 , i32 49 , i32 50 , i32 51 , i32 52 , i32 53 , i32 54 , i32 55 , i32 56 , i32 57 , i32 58 , i32 59 , i32 60 , i32 61 , i32 62 , i32 63 >
@@ -169,10 +165,9 @@ define <32 x i16> @addw_selectd_32xi16(<32 x i16> %t0, <32 x i16> %t1) {
169165; AVX512BW-LABEL: addw_selectd_32xi16:
170166; AVX512BW: # %bb.0:
171167; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm2
172- ; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm0
173- ; AVX512BW-NEXT: movw $1, %ax
168+ ; AVX512BW-NEXT: movl $3, %eax
174169; AVX512BW-NEXT: kmovd %eax, %k1
175- ; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1}
170+ ; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 {%k1}
176171; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
177172; AVX512BW-NEXT: ret{{[l|q]}}
178173 %t2 = add nsw <32 x i16 > %t0 , %t1
@@ -198,20 +193,18 @@ define <16 x i32> @addd_selectq_16xi32(<16 x i32> %t0, <16 x i32> %t1) {
198193; AVX512F-LABEL: addd_selectq_16xi32:
199194; AVX512F: # %bb.0:
200195; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm2
201- ; AVX512F-NEXT: vpsubd %zmm1, %zmm0, %zmm0
202- ; AVX512F-NEXT: movb $1, %al
196+ ; AVX512F-NEXT: movw $3, %ax
203197; AVX512F-NEXT: kmovw %eax, %k1
204- ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
198+ ; AVX512F-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
205199; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0
206200; AVX512F-NEXT: ret{{[l|q]}}
207201;
208202; AVX512BW-LABEL: addd_selectq_16xi32:
209203; AVX512BW: # %bb.0:
210204; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm2
211- ; AVX512BW-NEXT: vpsubd %zmm1, %zmm0, %zmm0
212- ; AVX512BW-NEXT: movb $1, %al
205+ ; AVX512BW-NEXT: movw $3, %ax
213206; AVX512BW-NEXT: kmovd %eax, %k1
214- ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
207+ ; AVX512BW-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
215208; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0
216209; AVX512BW-NEXT: ret{{[l|q]}}
217210 %t2 = add nsw <16 x i32 > %t0 , %t1
0 commit comments