Skip to content

Commit 3bb147e

Browse files
committed
[X86] combine-sse41-intrinsics.ll - add AVX2 test coverage
1 parent c39df49 commit 3bb147e

File tree

1 file changed

+36
-16
lines changed

1 file changed

+36
-16
lines changed

llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll

+36-16
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
45

56

67
define <2 x double> @test_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) {
@@ -146,12 +147,18 @@ define <16 x i8> @demandedelts_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>
146147
; SSE-NEXT: movdqa %xmm3, %xmm0
147148
; SSE-NEXT: retq
148149
;
149-
; AVX-LABEL: demandedelts_pblendvb:
150-
; AVX: # %bb.0:
151-
; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
152-
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
153-
; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
154-
; AVX-NEXT: retq
150+
; AVX1-LABEL: demandedelts_pblendvb:
151+
; AVX1: # %bb.0:
152+
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
153+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
154+
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
155+
; AVX1-NEXT: retq
156+
;
157+
; AVX2-LABEL: demandedelts_pblendvb:
158+
; AVX2: # %bb.0:
159+
; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
160+
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
161+
; AVX2-NEXT: retq
155162
%1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
156163
%2 = shufflevector <16 x i8> %a1, <16 x i8> undef, <16 x i32> zeroinitializer
157164
%3 = shufflevector <16 x i8> %a2, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -193,15 +200,28 @@ define <4 x float> @demandedbits_uitofp_blendvps(<4 x float> %a0, <4 x float> %a
193200
; SSE-NEXT: movaps %xmm3, %xmm0
194201
; SSE-NEXT: retq
195202
;
196-
; AVX-LABEL: demandedbits_uitofp_blendvps:
197-
; AVX: # %bb.0:
198-
; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0],mem[1],xmm2[2],mem[3],xmm2[4],mem[5],xmm2[6],mem[7]
199-
; AVX-NEXT: vpsrld $16, %xmm2, %xmm2
200-
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],mem[1],xmm2[2],mem[3],xmm2[4],mem[5],xmm2[6],mem[7]
201-
; AVX-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
202-
; AVX-NEXT: vaddps %xmm2, %xmm3, %xmm2
203-
; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
204-
; AVX-NEXT: retq
203+
; AVX1-LABEL: demandedbits_uitofp_blendvps:
204+
; AVX1: # %bb.0:
205+
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0],mem[1],xmm2[2],mem[3],xmm2[4],mem[5],xmm2[6],mem[7]
206+
; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2
207+
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],mem[1],xmm2[2],mem[3],xmm2[4],mem[5],xmm2[6],mem[7]
208+
; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
209+
; AVX1-NEXT: vaddps %xmm2, %xmm3, %xmm2
210+
; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
211+
; AVX1-NEXT: retq
212+
;
213+
; AVX2-LABEL: demandedbits_uitofp_blendvps:
214+
; AVX2: # %bb.0:
215+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1258291200,1258291200,1258291200,1258291200]
216+
; AVX2-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
217+
; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2
218+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928]
219+
; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7]
220+
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm4 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11]
221+
; AVX2-NEXT: vsubps %xmm4, %xmm2, %xmm2
222+
; AVX2-NEXT: vaddps %xmm2, %xmm3, %xmm2
223+
; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
224+
; AVX2-NEXT: retq
205225
%cvt = uitofp <4 x i32> %a2 to <4 x float>
206226
%sel = tail call noundef <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %cvt)
207227
ret <4 x float> %sel

0 commit comments

Comments
 (0)