|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
2 | 2 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE
|
3 |
| -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX |
| 3 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 |
| 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 |
4 | 5 |
|
5 | 6 |
|
6 | 7 | define <2 x double> @test_x86_sse41_blend_pd(<2 x double> %a0, <2 x double> %a1) {
|
@@ -146,12 +147,18 @@ define <16 x i8> @demandedelts_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>
|
146 | 147 | ; SSE-NEXT: movdqa %xmm3, %xmm0
|
147 | 148 | ; SSE-NEXT: retq
|
148 | 149 | ;
|
149 |
| -; AVX-LABEL: demandedelts_pblendvb: |
150 |
| -; AVX: # %bb.0: |
151 |
| -; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 |
152 |
| -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
153 |
| -; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 |
154 |
| -; AVX-NEXT: retq |
| 150 | +; AVX1-LABEL: demandedelts_pblendvb: |
| 151 | +; AVX1: # %bb.0: |
| 152 | +; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 |
| 153 | +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| 154 | +; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 |
| 155 | +; AVX1-NEXT: retq |
| 156 | +; |
| 157 | +; AVX2-LABEL: demandedelts_pblendvb: |
| 158 | +; AVX2: # %bb.0: |
| 159 | +; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 |
| 160 | +; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 |
| 161 | +; AVX2-NEXT: retq |
155 | 162 | %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
|
156 | 163 | %2 = shufflevector <16 x i8> %a1, <16 x i8> undef, <16 x i32> zeroinitializer
|
157 | 164 | %3 = shufflevector <16 x i8> %a2, <16 x i8> undef, <16 x i32> zeroinitializer
|
@@ -193,15 +200,28 @@ define <4 x float> @demandedbits_uitofp_blendvps(<4 x float> %a0, <4 x float> %a
|
193 | 200 | ; SSE-NEXT: movaps %xmm3, %xmm0
|
194 | 201 | ; SSE-NEXT: retq
|
195 | 202 | ;
|
196 |
| -; AVX-LABEL: demandedbits_uitofp_blendvps: |
197 |
| -; AVX: # %bb.0: |
198 |
| -; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0],mem[1],xmm2[2],mem[3],xmm2[4],mem[5],xmm2[6],mem[7] |
199 |
| -; AVX-NEXT: vpsrld $16, %xmm2, %xmm2 |
200 |
| -; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],mem[1],xmm2[2],mem[3],xmm2[4],mem[5],xmm2[6],mem[7] |
201 |
| -; AVX-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 |
202 |
| -; AVX-NEXT: vaddps %xmm2, %xmm3, %xmm2 |
203 |
| -; AVX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 |
204 |
| -; AVX-NEXT: retq |
| 203 | +; AVX1-LABEL: demandedbits_uitofp_blendvps: |
| 204 | +; AVX1: # %bb.0: |
| 205 | +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0],mem[1],xmm2[2],mem[3],xmm2[4],mem[5],xmm2[6],mem[7] |
| 206 | +; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 |
| 207 | +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],mem[1],xmm2[2],mem[3],xmm2[4],mem[5],xmm2[6],mem[7] |
| 208 | +; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 |
| 209 | +; AVX1-NEXT: vaddps %xmm2, %xmm3, %xmm2 |
| 210 | +; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 |
| 211 | +; AVX1-NEXT: retq |
| 212 | +; |
| 213 | +; AVX2-LABEL: demandedbits_uitofp_blendvps: |
| 214 | +; AVX2: # %bb.0: |
| 215 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1258291200,1258291200,1258291200,1258291200] |
| 216 | +; AVX2-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] |
| 217 | +; AVX2-NEXT: vpsrld $16, %xmm2, %xmm2 |
| 218 | +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928] |
| 219 | +; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7] |
| 220 | +; AVX2-NEXT: vbroadcastss {{.*#+}} xmm4 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] |
| 221 | +; AVX2-NEXT: vsubps %xmm4, %xmm2, %xmm2 |
| 222 | +; AVX2-NEXT: vaddps %xmm2, %xmm3, %xmm2 |
| 223 | +; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 |
| 224 | +; AVX2-NEXT: retq |
205 | 225 | %cvt = uitofp <4 x i32> %a2 to <4 x float>
|
206 | 226 | %sel = tail call noundef <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %cvt)
|
207 | 227 | ret <4 x float> %sel
|
|
0 commit comments