Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2694,7 +2694,9 @@
;; above that we have to handle properly. (Conversely, if they
;; don't differ, then the native instruction's answer is the
;; right one per CLIF semantics.)
(let ((min1 Xmm (x64_minps x y))
(let ((x Xmm x) ;; force x/y into registers and disallow load sinking
(y Xmm y)
(min1 Xmm (x64_minps x y))
(min2 Xmm (x64_minps y x))
;; Compute the OR of the two. Note that NaNs have an
;; exponent field of all-ones (0xFF for F32), so if either
Expand Down Expand Up @@ -2732,7 +2734,9 @@
;; Likewise for F64 lanes, except that the right-shift is by 13 bits
;; (1 sign, 11 exponent, 1 QNaN bit).
(rule (lower (has_type $F64X2 (fmin x y)))
(let ((min1 Xmm (x64_minpd x y))
(let ((x Xmm x) ;; force x/y into registers and disallow load sinking
(y Xmm y)
(min1 Xmm (x64_minpd x y))
(min2 Xmm (x64_minpd y x))
(min_or Xmm (x64_orpd min1 min2))
(is_nan_mask Xmm (x64_cmppd min1 min2 (FcmpImm.Unordered)))
Expand All @@ -2757,7 +2761,9 @@
;; above that we have to handle properly. (Conversely, if they
;; don't differ, then the native instruction's answer is the
;; right one per CLIF semantics.)
(let ((max1 Xmm (x64_maxps x y))
(let ((x Xmm x) ;; force x/y into registers and disallow load sinking
(y Xmm y)
(max1 Xmm (x64_maxps x y))
(max2 Xmm (x64_maxps y x))
;; Compute the XOR of the two maxima. In the case
;; where we don't have a +/-0 mismatch or NaNs, then
Expand Down Expand Up @@ -2800,7 +2806,9 @@
;; above that we have to handle properly. (Conversely, if they
;; don't differ, then the native instruction's answer is the
;; right one per CLIF semantics.)
(let ((max1 Xmm (x64_maxpd x y))
(let ((x Xmm x) ;; force x/y into registers and disallow load sinking
(y Xmm y)
(max1 Xmm (x64_maxpd x y))
(max2 Xmm (x64_maxpd y x))
;; Compute the XOR of the two maxima. In the case
;; where we don't have a +/-0 mismatch or NaNs, then
Expand Down
187 changes: 187 additions & 0 deletions cranelift/filetests/filetests/isa/x64/simd-float-min-max.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
test compile precise-output
target x86_64 sse41

function %fmax_f32x4(i64, f32x4) -> f32x4 {
block0(v0: i64, v1: f32x4):
v2 = load.f32x4 v0
v3 = fmax v1, v2
return v3
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movups 0(%rdi), %xmm4
; movdqa %xmm0, %xmm6
; maxps %xmm0, %xmm4, %xmm0
; maxps %xmm4, %xmm6, %xmm4
; movdqa %xmm0, %xmm1
; xorps %xmm1, %xmm4, %xmm1
; orps %xmm0, %xmm1, %xmm0
; movdqa %xmm0, %xmm4
; subps %xmm4, %xmm1, %xmm4
; cmpps $3, %xmm0, %xmm0, %xmm0
; psrld %xmm0, $10, %xmm0
; andnps %xmm0, %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movups (%rdi), %xmm4 ; trap: heap_oob
; movdqa %xmm0, %xmm6
; maxps %xmm4, %xmm0
; maxps %xmm6, %xmm4
; movdqa %xmm0, %xmm1
; xorps %xmm4, %xmm1
; orps %xmm1, %xmm0
; movdqa %xmm0, %xmm4
; subps %xmm1, %xmm4
; cmpunordps %xmm0, %xmm0
; psrld $0xa, %xmm0
; andnps %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %fmin_f32x4(i64, f32x4) -> f32x4 {
block0(v0: i64, v1: f32x4):
v2 = load.f32x4 v0
v3 = fmin v1, v2
return v3
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movups 0(%rdi), %xmm4
; movdqa %xmm0, %xmm1
; minps %xmm1, %xmm4, %xmm1
; minps %xmm4, %xmm0, %xmm4
; orps %xmm1, %xmm4, %xmm1
; movdqa %xmm1, %xmm0
; cmpps $3, %xmm0, %xmm4, %xmm0
; orps %xmm1, %xmm0, %xmm1
; psrld %xmm0, $10, %xmm0
; andnps %xmm0, %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movups (%rdi), %xmm4 ; trap: heap_oob
; movdqa %xmm0, %xmm1
; minps %xmm4, %xmm1
; minps %xmm0, %xmm4
; orps %xmm4, %xmm1
; movdqa %xmm1, %xmm0
; cmpunordps %xmm4, %xmm0
; orps %xmm0, %xmm1
; psrld $0xa, %xmm0
; andnps %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %fmax_f64x2(i64, f64x2) -> f64x2 {
block0(v0: i64, v1: f64x2):
v2 = load.f64x2 v0
v3 = fmax v1, v2
return v3
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movupd 0(%rdi), %xmm4
; movdqa %xmm0, %xmm6
; maxpd %xmm0, %xmm4, %xmm0
; maxpd %xmm4, %xmm6, %xmm4
; movdqa %xmm0, %xmm1
; xorpd %xmm1, %xmm4, %xmm1
; orpd %xmm0, %xmm1, %xmm0
; movdqa %xmm0, %xmm4
; subpd %xmm4, %xmm1, %xmm4
; cmppd $3, %xmm0, %xmm0, %xmm0
; psrlq %xmm0, $13, %xmm0
; andnpd %xmm0, %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movupd (%rdi), %xmm4 ; trap: heap_oob
; movdqa %xmm0, %xmm6
; maxpd %xmm4, %xmm0
; maxpd %xmm6, %xmm4
; movdqa %xmm0, %xmm1
; xorpd %xmm4, %xmm1
; orpd %xmm1, %xmm0
; movdqa %xmm0, %xmm4
; subpd %xmm1, %xmm4
; cmpunordpd %xmm0, %xmm0
; psrlq $0xd, %xmm0
; andnpd %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

function %fmin_f64x2(i64, f64x2) -> f64x2 {
block0(v0: i64, v1: f64x2):
v2 = load.f64x2 v0
v3 = fmin v1, v2
return v3
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movupd 0(%rdi), %xmm4
; movdqa %xmm0, %xmm5
; minpd %xmm0, %xmm4, %xmm0
; minpd %xmm4, %xmm5, %xmm4
; movdqa %xmm0, %xmm2
; orpd %xmm2, %xmm4, %xmm2
; cmppd $3, %xmm0, %xmm4, %xmm0
; orpd %xmm2, %xmm0, %xmm2
; psrlq %xmm0, $13, %xmm0
; andnpd %xmm0, %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movupd (%rdi), %xmm4 ; trap: heap_oob
; movdqa %xmm0, %xmm5
; minpd %xmm4, %xmm0
; minpd %xmm5, %xmm4
; movdqa %xmm0, %xmm2
; orpd %xmm4, %xmm2
; cmpunordpd %xmm4, %xmm0
; orpd %xmm0, %xmm2
; psrlq $0xd, %xmm0
; andnpd %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq