Skip to content

Commit

Permalink
[X86] mayFoldIntoStore - peek through oneuse bitcase users to find a …
Browse files Browse the repository at this point in the history
…store node (#123366)

mayFoldIntoStore currently just checks the direct (oneuse) user of a
SDValue to check its stored, which prevents cases where we bitcast the
value prior to storing (usually the bitcast will be removed later).

This patch peeks up through oneuse BITCAST nodes chain to see if its
eventually stored.

The main use of mayFoldIntoStore is v8i16 EXTRACT_VECTOR_ELT lowering
which will only use PEXTRW/PEXTRB for index0 extractions (vs the faster
MOVD) if the extracted value will be folded into a store on SSE41+
targets.

Fixes #107086
  • Loading branch information
RKSimon authored Jan 18, 2025
1 parent c3a935e commit 67c3f2b
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 79 deletions.
11 changes: 10 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2812,7 +2812,16 @@ bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
}

bool X86::mayFoldIntoStore(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->user_begin());
if (!Op.hasOneUse())
return false;
// Peek through (oneuse) bitcast users
SDNode *User = *Op->user_begin();
while (User->getOpcode() == ISD::BITCAST) {
if (!User->hasOneUse())
return false;
User = *User->user_begin();
}
return ISD::isNormalStore(User);
}

bool X86::mayFoldIntoZeroExtend(SDValue Op) {
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind {
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: movw %ax, (%rdi)
; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512-NEXT: retq
entry:
%val = load half, half addrspace(1)* %out
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/cvt16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ define void @test1(float %src, ptr %dest) nounwind {
; F16C-LABEL: test1:
; F16C: # %bb.0:
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; SOFTFLOAT-LABEL: test1:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/fp-strict-scalar-fp16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,7 @@ define void @fptrunc_float_to_f16(ptr %val, ptr%ret) nounwind strictfp {
; AVX: # %bb.0:
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: movw %ax, (%rsi)
; AVX-NEXT: vpextrw $0, %xmm0, (%rsi)
; AVX-NEXT: retq
;
; X86-LABEL: fptrunc_float_to_f16:
Expand Down Expand Up @@ -411,8 +410,7 @@ define void @fsqrt_f16(ptr %a) nounwind strictfp {
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: movw %ax, (%rdi)
; AVX-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX-NEXT: retq
;
; X86-LABEL: fsqrt_f16:
Expand Down
60 changes: 20 additions & 40 deletions llvm/test/CodeGen/X86/fp16-libcalls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ define void @test_half_ceil(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_ceil:
Expand Down Expand Up @@ -108,8 +107,7 @@ define void @test_half_cos(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq cosf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -167,8 +165,7 @@ define void @test_half_exp(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq expf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -226,8 +223,7 @@ define void @test_half_exp2(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq exp2f@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -285,8 +281,7 @@ define void @test_half_exp10(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq exp10f@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -342,8 +337,7 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_fabs:
Expand Down Expand Up @@ -383,8 +377,7 @@ define void @test_half_floor(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_floor:
Expand Down Expand Up @@ -438,8 +431,7 @@ define void @test_half_fma(half %a0, half %a1, half %a2, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm2, %xmm2
; F16C-NEXT: callq fmaf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -525,8 +517,7 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_fneg:
Expand Down Expand Up @@ -568,8 +559,7 @@ define void @test_half_log(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq logf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -627,8 +617,7 @@ define void @test_half_log2(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq log2f@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -686,8 +675,7 @@ define void @test_half_log10(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq log10f@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -743,8 +731,7 @@ define void @test_half_nearbyint(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_nearbyint:
Expand Down Expand Up @@ -797,8 +784,7 @@ define void @test_half_pow(half %a0, half %a1, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; F16C-NEXT: callq powf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -876,8 +862,7 @@ define void @test_half_powi(half %a0, i32 %a1, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq __powisf2@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -943,8 +928,7 @@ define void @test_half_rint(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_rint:
Expand Down Expand Up @@ -996,8 +980,7 @@ define void @test_half_sin(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq sinf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -1053,8 +1036,7 @@ define void @test_half_sqrt(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_sqrt:
Expand Down Expand Up @@ -1107,8 +1089,7 @@ define void @test_half_tan(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: callq tanf@PLT
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rbx)
; F16C-NEXT: vpextrw $0, %xmm0, (%rbx)
; F16C-NEXT: popq %rbx
; F16C-NEXT: retq
;
Expand Down Expand Up @@ -1164,8 +1145,7 @@ define void @test_half_trunc(half %a0, ptr %p0) nounwind {
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; F16C-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; FP16-LABEL: test_half_trunc:
Expand Down
16 changes: 6 additions & 10 deletions llvm/test/CodeGen/X86/half-constrained.ll
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,7 @@ define void @float_to_half(float %0) strictfp {
; X86-F16C: # %bb.0:
; X86-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; X86-F16C-NEXT: vmovd %xmm0, %eax
; X86-F16C-NEXT: movw %ax, a
; X86-F16C-NEXT: vpextrw $0, %xmm0, a
; X86-F16C-NEXT: retl
;
; X64-NOF16C-LABEL: float_to_half:
Expand All @@ -197,9 +196,8 @@ define void @float_to_half(float %0) strictfp {
; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; X64-F16C-NEXT: vmovd %xmm0, %eax
; X64-F16C-NEXT: movq a@GOTPCREL(%rip), %rcx
; X64-F16C-NEXT: movw %ax, (%rcx)
; X64-F16C-NEXT: movq a@GOTPCREL(%rip), %rax
; X64-F16C-NEXT: vpextrw $0, %xmm0, (%rax)
; X64-F16C-NEXT: retq
%2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
store half %2, ptr @a, align 2
Expand Down Expand Up @@ -354,8 +352,7 @@ define void @add() strictfp {
; X86-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X86-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; X86-F16C-NEXT: vmovd %xmm0, %eax
; X86-F16C-NEXT: movw %ax, c
; X86-F16C-NEXT: vpextrw $0, %xmm0, c
; X86-F16C-NEXT: retl
;
; X64-NOF16C-LABEL: add:
Expand Down Expand Up @@ -392,9 +389,8 @@ define void @add() strictfp {
; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; X64-F16C-NEXT: vmovd %xmm0, %eax
; X64-F16C-NEXT: movq c@GOTPCREL(%rip), %rcx
; X64-F16C-NEXT: movw %ax, (%rcx)
; X64-F16C-NEXT: movq c@GOTPCREL(%rip), %rax
; X64-F16C-NEXT: vpextrw $0, %xmm0, (%rax)
; X64-F16C-NEXT: retq
%1 = load half, ptr @a, align 2
%2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/half-darwin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ define void @truncsfhf(float %in, ptr %ptr) nounwind {
; CHECK-F16C-LABEL: truncsfhf:
; CHECK-F16C: ## %bb.0:
; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-F16C-NEXT: vmovd %xmm0, %eax
; CHECK-F16C-NEXT: movw %ax, (%rdi)
; CHECK-F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; CHECK-F16C-NEXT: retq
;
; CHECK-FP16-LABEL: truncsfhf:
Expand Down Expand Up @@ -108,8 +107,7 @@ define void @strict_truncsfhf(float %in, ptr %ptr) nounwind strictfp {
; CHECK-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-F16C-NEXT: vmovd %xmm0, %eax
; CHECK-F16C-NEXT: movw %ax, (%rdi)
; CHECK-F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; CHECK-F16C-NEXT: retq
;
; CHECK-FP16-LABEL: strict_truncsfhf:
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/X86/half.ll
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,7 @@ define void @test_trunc32(float %in, ptr %addr) #0 {
; BWON-F16C-LABEL: test_trunc32:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; BWON-F16C-NEXT: vmovd %xmm0, %eax
; BWON-F16C-NEXT: movw %ax, (%rdi)
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; BWON-F16C-NEXT: retq
;
; CHECK-I686-LABEL: test_trunc32:
Expand Down Expand Up @@ -265,8 +264,7 @@ define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; BWON-F16C-NEXT: vmovd %xmm0, %eax
; BWON-F16C-NEXT: movw %ax, (%rsi)
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi)
; BWON-F16C-NEXT: retq
;
; CHECK-I686-LABEL: test_sitofp_i64:
Expand Down Expand Up @@ -398,8 +396,7 @@ define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
; BWON-F16C-NEXT: .LBB10_3:
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; BWON-F16C-NEXT: vmovd %xmm0, %eax
; BWON-F16C-NEXT: movw %ax, (%rsi)
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi)
; BWON-F16C-NEXT: retq
;
; CHECK-I686-LABEL: test_uitofp_i64:
Expand Down Expand Up @@ -1075,8 +1072,7 @@ define void @main.158() #0 {
; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; BWON-F16C-NEXT: .LBB20_2: # %entry
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; BWON-F16C-NEXT: vmovd %xmm0, %eax
; BWON-F16C-NEXT: movw %ax, (%rax)
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rax)
; BWON-F16C-NEXT: retq
;
; CHECK-I686-LABEL: main.158:
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/pr91005.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ define void @PR91005(ptr %0) minsize {
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: movw %ax, (%rdi)
; CHECK-NEXT: vpextrw $0, %xmm0, (%rdi)
; CHECK-NEXT: .LBB0_2: # %common.ret
; CHECK-NEXT: retq
%2 = bitcast <2 x half> poison to <2 x i16>
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/pr95278.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ define void @PR95278(ptr %p0, ptr %p1) {
; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: movw %ax, (%rsi)
; CHECK-NEXT: vpextrw $0, %xmm0, (%rsi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%load = load <1024 x half>, ptr %p0, align 2
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/vector-half-conversions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2596,15 +2596,13 @@ define void @store_cvt_f32_to_i16(float %a0, ptr %a1) nounwind {
; F16C-LABEL: store_cvt_f32_to_i16:
; F16C: # %bb.0:
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; F16C-NEXT: vmovd %xmm0, %eax
; F16C-NEXT: movw %ax, (%rdi)
; F16C-NEXT: vpextrw $0, %xmm0, (%rdi)
; F16C-NEXT: retq
;
; AVX512-LABEL: store_cvt_f32_to_i16:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: movw %ax, (%rdi)
; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512-NEXT: retq
%1 = fptrunc float %a0 to half
%2 = bitcast half %1 to i16
Expand Down

0 comments on commit 67c3f2b

Please sign in to comment.