22; RUN: llc < %s -mtriple=x86_64-- -mcpu=nehalem | FileCheck %s --check-prefixes=NHM
33; RUN: llc < %s -mtriple=x86_64-- -mcpu=sandybridge | FileCheck %s --check-prefixes=FAST-SCALAR,SNB
44; RUN: llc < %s -mtriple=x86_64-- -mcpu=broadwell | FileCheck %s --check-prefixes=FAST-SCALAR,BDW
5- ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,SKL
6- ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=SLOW -SCALAR,ZN1
7- ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=SLOW -SCALAR,ZN3
5+ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
6+ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=FAST -SCALAR,FAST-VECTOR
7+ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=FAST -SCALAR,FAST-VECTOR
88
99define float @f32_no_daz (float %f ) #0 {
1010; NHM-LABEL: f32_no_daz:
@@ -26,19 +26,6 @@ define float @f32_no_daz(float %f) #0 {
2626; FAST-SCALAR: # %bb.0:
2727; FAST-SCALAR-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
2828; FAST-SCALAR-NEXT: retq
29- ;
30- ; SLOW-SCALAR-LABEL: f32_no_daz:
31- ; SLOW-SCALAR: # %bb.0:
32- ; SLOW-SCALAR-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
33- ; SLOW-SCALAR-NEXT: vbroadcastss {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN]
34- ; SLOW-SCALAR-NEXT: vmulss %xmm1, %xmm0, %xmm2
35- ; SLOW-SCALAR-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
36- ; SLOW-SCALAR-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
37- ; SLOW-SCALAR-NEXT: vandps %xmm3, %xmm0, %xmm0
38- ; SLOW-SCALAR-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
39- ; SLOW-SCALAR-NEXT: vmulss %xmm1, %xmm2, %xmm1
40- ; SLOW-SCALAR-NEXT: vandnps %xmm1, %xmm0, %xmm0
41- ; SLOW-SCALAR-NEXT: retq
4229 %call = tail call fast float @llvm.sqrt.f32 (float %f ) #2
4330 ret float %call
4431}
@@ -91,42 +78,10 @@ define <4 x float> @v4f32_no_daz(<4 x float> %f) #0 {
9178; BDW-NEXT: vandps %xmm1, %xmm0, %xmm0
9279; BDW-NEXT: retq
9380;
94- ; SKL-LABEL: v4f32_no_daz:
95- ; SKL: # %bb.0:
96- ; SKL-NEXT: vsqrtps %xmm0, %xmm0
97- ; SKL-NEXT: retq
98- ;
99- ; ZN1-LABEL: v4f32_no_daz:
100- ; ZN1: # %bb.0:
101- ; ZN1-NEXT: vrsqrtps %xmm0, %xmm1
102- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
103- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN]
104- ; ZN1-NEXT: vmulps %xmm1, %xmm0, %xmm2
105- ; ZN1-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
106- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
107- ; ZN1-NEXT: vandps %xmm4, %xmm0, %xmm0
108- ; ZN1-NEXT: vmulps %xmm1, %xmm2, %xmm1
109- ; ZN1-NEXT: vmulps %xmm3, %xmm1, %xmm1
110- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
111- ; ZN1-NEXT: vcmpleps %xmm0, %xmm3, %xmm0
112- ; ZN1-NEXT: vandps %xmm1, %xmm0, %xmm0
113- ; ZN1-NEXT: retq
114- ;
115- ; ZN3-LABEL: v4f32_no_daz:
116- ; ZN3: # %bb.0:
117- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
118- ; ZN3-NEXT: vrsqrtps %xmm0, %xmm1
119- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN]
120- ; ZN3-NEXT: vmulps %xmm1, %xmm0, %xmm2
121- ; ZN3-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
122- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
123- ; ZN3-NEXT: vandps %xmm4, %xmm0, %xmm0
124- ; ZN3-NEXT: vmulps %xmm1, %xmm2, %xmm1
125- ; ZN3-NEXT: vmulps %xmm3, %xmm1, %xmm1
126- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
127- ; ZN3-NEXT: vcmpleps %xmm0, %xmm3, %xmm0
128- ; ZN3-NEXT: vandps %xmm1, %xmm0, %xmm0
129- ; ZN3-NEXT: retq
81+ ; FAST-VECTOR-LABEL: v4f32_no_daz:
82+ ; FAST-VECTOR: # %bb.0:
83+ ; FAST-VECTOR-NEXT: vsqrtps %xmm0, %xmm0
84+ ; FAST-VECTOR-NEXT: retq
13085 %call = tail call fast <4 x float > @llvm.sqrt.v4f32 (<4 x float > %f ) #2
13186 ret <4 x float > %call
13287}
@@ -194,42 +149,10 @@ define <8 x float> @v8f32_no_daz(<8 x float> %f) #0 {
194149; BDW-NEXT: vandps %ymm1, %ymm0, %ymm0
195150; BDW-NEXT: retq
196151;
197- ; SKL-LABEL: v8f32_no_daz:
198- ; SKL: # %bb.0:
199- ; SKL-NEXT: vsqrtps %ymm0, %ymm0
200- ; SKL-NEXT: retq
201- ;
202- ; ZN1-LABEL: v8f32_no_daz:
203- ; ZN1: # %bb.0:
204- ; ZN1-NEXT: vrsqrtps %ymm0, %ymm1
205- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
206- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
207- ; ZN1-NEXT: vmulps %ymm1, %ymm0, %ymm2
208- ; ZN1-NEXT: vandps %ymm4, %ymm0, %ymm0
209- ; ZN1-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm2 * ymm1) + ymm3
210- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
211- ; ZN1-NEXT: vmulps %ymm1, %ymm2, %ymm1
212- ; ZN1-NEXT: vmulps %ymm3, %ymm1, %ymm1
213- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
214- ; ZN1-NEXT: vcmpleps %ymm0, %ymm3, %ymm0
215- ; ZN1-NEXT: vandps %ymm1, %ymm0, %ymm0
216- ; ZN1-NEXT: retq
217- ;
218- ; ZN3-LABEL: v8f32_no_daz:
219- ; ZN3: # %bb.0:
220- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
221- ; ZN3-NEXT: vrsqrtps %ymm0, %ymm1
222- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
223- ; ZN3-NEXT: vmulps %ymm1, %ymm0, %ymm2
224- ; ZN3-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm2 * ymm1) + ymm3
225- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
226- ; ZN3-NEXT: vandps %ymm4, %ymm0, %ymm0
227- ; ZN3-NEXT: vmulps %ymm1, %ymm2, %ymm1
228- ; ZN3-NEXT: vmulps %ymm3, %ymm1, %ymm1
229- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
230- ; ZN3-NEXT: vcmpleps %ymm0, %ymm3, %ymm0
231- ; ZN3-NEXT: vandps %ymm1, %ymm0, %ymm0
232- ; ZN3-NEXT: retq
152+ ; FAST-VECTOR-LABEL: v8f32_no_daz:
153+ ; FAST-VECTOR: # %bb.0:
154+ ; FAST-VECTOR-NEXT: vsqrtps %ymm0, %ymm0
155+ ; FAST-VECTOR-NEXT: retq
233156 %call = tail call fast <8 x float > @llvm.sqrt.v8f32 (<8 x float > %f ) #2
234157 ret <8 x float > %call
235158}
@@ -256,18 +179,6 @@ define float @f32_daz(float %f) #1 {
256179; FAST-SCALAR: # %bb.0:
257180; FAST-SCALAR-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
258181; FAST-SCALAR-NEXT: retq
259- ;
260- ; SLOW-SCALAR-LABEL: f32_daz:
261- ; SLOW-SCALAR: # %bb.0:
262- ; SLOW-SCALAR-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
263- ; SLOW-SCALAR-NEXT: vmulss %xmm1, %xmm0, %xmm2
264- ; SLOW-SCALAR-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
265- ; SLOW-SCALAR-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
266- ; SLOW-SCALAR-NEXT: vmulss %xmm1, %xmm2, %xmm1
267- ; SLOW-SCALAR-NEXT: vxorps %xmm2, %xmm2, %xmm2
268- ; SLOW-SCALAR-NEXT: vcmpeqss %xmm2, %xmm0, %xmm0
269- ; SLOW-SCALAR-NEXT: vandnps %xmm1, %xmm0, %xmm0
270- ; SLOW-SCALAR-NEXT: retq
271182 %call = tail call fast float @llvm.sqrt.f32 (float %f ) #2
272183 ret float %call
273184}
@@ -315,38 +226,10 @@ define <4 x float> @v4f32_daz(<4 x float> %f) #1 {
315226; BDW-NEXT: vandps %xmm1, %xmm0, %xmm0
316227; BDW-NEXT: retq
317228;
318- ; SKL-LABEL: v4f32_daz:
319- ; SKL: # %bb.0:
320- ; SKL-NEXT: vsqrtps %xmm0, %xmm0
321- ; SKL-NEXT: retq
322- ;
323- ; ZN1-LABEL: v4f32_daz:
324- ; ZN1: # %bb.0:
325- ; ZN1-NEXT: vrsqrtps %xmm0, %xmm1
326- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
327- ; ZN1-NEXT: vmulps %xmm1, %xmm0, %xmm2
328- ; ZN1-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
329- ; ZN1-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
330- ; ZN1-NEXT: vmulps %xmm1, %xmm2, %xmm1
331- ; ZN1-NEXT: vxorps %xmm2, %xmm2, %xmm2
332- ; ZN1-NEXT: vcmpneqps %xmm2, %xmm0, %xmm0
333- ; ZN1-NEXT: vmulps %xmm3, %xmm1, %xmm1
334- ; ZN1-NEXT: vandps %xmm1, %xmm0, %xmm0
335- ; ZN1-NEXT: retq
336- ;
337- ; ZN3-LABEL: v4f32_daz:
338- ; ZN3: # %bb.0:
339- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
340- ; ZN3-NEXT: vrsqrtps %xmm0, %xmm1
341- ; ZN3-NEXT: vmulps %xmm1, %xmm0, %xmm2
342- ; ZN3-NEXT: vfmadd231ps {{.*#+}} xmm3 = (xmm2 * xmm1) + xmm3
343- ; ZN3-NEXT: vbroadcastss {{.*#+}} xmm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
344- ; ZN3-NEXT: vmulps %xmm1, %xmm2, %xmm1
345- ; ZN3-NEXT: vxorps %xmm2, %xmm2, %xmm2
346- ; ZN3-NEXT: vcmpneqps %xmm2, %xmm0, %xmm0
347- ; ZN3-NEXT: vmulps %xmm3, %xmm1, %xmm1
348- ; ZN3-NEXT: vandps %xmm1, %xmm0, %xmm0
349- ; ZN3-NEXT: retq
229+ ; FAST-VECTOR-LABEL: v4f32_daz:
230+ ; FAST-VECTOR: # %bb.0:
231+ ; FAST-VECTOR-NEXT: vsqrtps %xmm0, %xmm0
232+ ; FAST-VECTOR-NEXT: retq
350233 %call = tail call fast <4 x float > @llvm.sqrt.v4f32 (<4 x float > %f ) #2
351234 ret <4 x float > %call
352235}
@@ -405,38 +288,10 @@ define <8 x float> @v8f32_daz(<8 x float> %f) #1 {
405288; BDW-NEXT: vandps %ymm1, %ymm0, %ymm0
406289; BDW-NEXT: retq
407290;
408- ; SKL-LABEL: v8f32_daz:
409- ; SKL: # %bb.0:
410- ; SKL-NEXT: vsqrtps %ymm0, %ymm0
411- ; SKL-NEXT: retq
412- ;
413- ; ZN1-LABEL: v8f32_daz:
414- ; ZN1: # %bb.0:
415- ; ZN1-NEXT: vrsqrtps %ymm0, %ymm1
416- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
417- ; ZN1-NEXT: vmulps %ymm1, %ymm0, %ymm2
418- ; ZN1-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm2 * ymm1) + ymm3
419- ; ZN1-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
420- ; ZN1-NEXT: vmulps %ymm1, %ymm2, %ymm1
421- ; ZN1-NEXT: vxorps %xmm2, %xmm2, %xmm2
422- ; ZN1-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
423- ; ZN1-NEXT: vmulps %ymm3, %ymm1, %ymm1
424- ; ZN1-NEXT: vandps %ymm1, %ymm0, %ymm0
425- ; ZN1-NEXT: retq
426- ;
427- ; ZN3-LABEL: v8f32_daz:
428- ; ZN3: # %bb.0:
429- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
430- ; ZN3-NEXT: vrsqrtps %ymm0, %ymm1
431- ; ZN3-NEXT: vmulps %ymm1, %ymm0, %ymm2
432- ; ZN3-NEXT: vfmadd231ps {{.*#+}} ymm3 = (ymm2 * ymm1) + ymm3
433- ; ZN3-NEXT: vbroadcastss {{.*#+}} ymm1 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1]
434- ; ZN3-NEXT: vmulps %ymm1, %ymm2, %ymm1
435- ; ZN3-NEXT: vxorps %xmm2, %xmm2, %xmm2
436- ; ZN3-NEXT: vcmpneqps %ymm2, %ymm0, %ymm0
437- ; ZN3-NEXT: vmulps %ymm3, %ymm1, %ymm1
438- ; ZN3-NEXT: vandps %ymm1, %ymm0, %ymm0
439- ; ZN3-NEXT: retq
291+ ; FAST-VECTOR-LABEL: v8f32_daz:
292+ ; FAST-VECTOR: # %bb.0:
293+ ; FAST-VECTOR-NEXT: vsqrtps %ymm0, %ymm0
294+ ; FAST-VECTOR-NEXT: retq
440295 %call = tail call fast <8 x float > @llvm.sqrt.v8f32 (<8 x float > %f ) #2
441296 ret <8 x float > %call
442297}
0 commit comments