Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -4489,6 +4489,8 @@
(convert SyntheticAmode XmmMem synthetic_amode_to_xmm_mem)
(convert Amode XmmMemAligned amode_to_xmm_mem_aligned)
(convert SyntheticAmode XmmMemAligned synthetic_amode_to_xmm_mem_aligned)
(convert VCodeConstant SyntheticAmode const_to_synthetic_amode)
(convert VCodeConstant XmmMem const_to_xmm_mem)

(convert IntCC CC intcc_to_cc)
(convert AtomicRmwOp MachAtomicRmwOp atomic_rmw_op_to_mach_atomic_rmw_op)
Expand Down Expand Up @@ -4537,6 +4539,8 @@
(synthetic_amode_to_reg_mem amode))
(decl const_to_synthetic_amode (VCodeConstant) SyntheticAmode)
(extern constructor const_to_synthetic_amode const_to_synthetic_amode)
(decl const_to_xmm_mem (VCodeConstant) XmmMem)
(rule (const_to_xmm_mem c) (const_to_synthetic_amode c))

(decl xmm_to_xmm_mem_aligned (Xmm) XmmMemAligned)
(rule (xmm_to_xmm_mem_aligned reg) (xmm_mem_to_xmm_mem_aligned reg))
Expand Down
40 changes: 18 additions & 22 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1908,7 +1908,7 @@
(rule (lower (has_type $I8X16
(popcnt src)))
(let ((nibble_table_const VCodeConstant (popcount_4bit_table))
(low_mask Xmm (x64_xmm_load_const $I8X16 (popcount_low_mask)))
(low_mask XmmMem (popcount_low_mask))
(low_nibbles Xmm (sse_and $I8X16 src low_mask))
;; Note that this is a 16x8 shift, but that's OK; we mask
;; off anything that traverses from one byte to the next
Expand Down Expand Up @@ -2990,9 +2990,9 @@
;; every value of the mantissa represents a corresponding uint32 number.
;; When we subtract 0x1.0p52 we are left with double(src).
(rule 1 (lower (has_type $F64X2 (fcvt_from_uint (uwiden_low val @ (value_type $I32X4)))))
(let ((uint_mask Xmm (x64_xmm_load_const $I32X4 (fcvt_uint_mask_const)))
(let ((uint_mask XmmMem (fcvt_uint_mask_const))
(res Xmm (x64_unpcklps val uint_mask))
(uint_mask_high Xmm (x64_xmm_load_const $I32X4 (fcvt_uint_mask_high_const))))
(uint_mask_high XmmMem (fcvt_uint_mask_high_const)))
(x64_subpd res uint_mask_high)))

;; When AVX512VL and AVX512F are available,
Expand Down Expand Up @@ -3190,27 +3190,27 @@
(has_type $I32X4 (iadd_pairwise
(swiden_low val @ (value_type $I16X8))
(swiden_high val))))
(let ((mul_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32))))
(let ((mul_const XmmMem (iadd_pairwise_mul_const_32)))
(x64_pmaddwd val mul_const)))

(rule (lower
(has_type $I16X8 (iadd_pairwise
(uwiden_low val @ (value_type $I8X16))
(uwiden_high val))))
(let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16))))
(let ((mul_const XmmMem (iadd_pairwise_mul_const_16)))
(x64_pmaddubsw val mul_const)))

(rule (lower
(has_type $I32X4 (iadd_pairwise
(uwiden_low val @ (value_type $I16X8))
(uwiden_high val))))
(let ((xor_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_xor_const_32)))
(let ((xor_const XmmMem (iadd_pairwise_xor_const_32))
(dst Xmm (x64_pxor val xor_const))

(madd_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32)))
(madd_const XmmMem (iadd_pairwise_mul_const_32))
(dst Xmm (x64_pmaddwd dst madd_const))

(addd_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_addd_const_32))))
(addd_const XmmMem (iadd_pairwise_addd_const_32)))
(x64_paddd dst addd_const)))

;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down Expand Up @@ -3288,7 +3288,7 @@
;; CVTTPD2DQ xmm_y, xmm_y

(tmp1 Xmm (x64_cmppd a a (FcmpImm.Equal)))
(umax_mask Xmm (x64_xmm_load_const $F64X2 (snarrow_umax_mask)))
(umax_mask XmmMem (snarrow_umax_mask))

;; ANDPD xmm_y, [wasm_f64x2_splat(2147483647.0)]
(tmp1 Xmm (x64_andps tmp1 umax_mask))
Expand Down Expand Up @@ -3504,17 +3504,15 @@
;; indices (may not be completely necessary: verification could fail incorrect
;; mask values) and fix the indexes to all point to the `dst` vector.
(rule 3 (lower (shuffle a a (vec_mask_from_immediate mask)))
(x64_pshufb a (x64_xmm_load_const $I8X16 (shuffle_0_31_mask mask))))
(x64_pshufb a (shuffle_0_31_mask mask)))

;; For the case where the shuffle mask contains out-of-bounds values (values
;; greater than 31) we must mask off those resulting values in the result of
;; `vpermi2b`.
(rule 2 (lower (has_type (and (avx512vl_enabled $true) (avx512vbmi_enabled $true))
(shuffle a b (vec_mask_from_immediate
(perm_from_mask_with_zeros mask zeros)))))
(x64_andps
(x64_xmm_load_const $I8X16 zeros)
(x64_vpermi2b b a (x64_xmm_load_const $I8X16 mask))))
(x64_andps (x64_vpermi2b b a (x64_xmm_load_const $I8X16 mask)) zeros))

;; However, if the shuffle mask contains no out-of-bounds values, we can use
;; `vpermi2b` without any masking.
Expand All @@ -3527,8 +3525,8 @@
;; above, we build the `constructed_mask` for each case statically.
(rule (lower (shuffle a b (vec_mask_from_immediate mask)))
(x64_por
(x64_pshufb a (x64_xmm_load_const $I8X16 (shuffle_0_15_mask mask)))
(x64_pshufb b (x64_xmm_load_const $I8X16 (shuffle_16_31_mask mask)))))
(x64_pshufb a (shuffle_0_15_mask mask))
(x64_pshufb b (shuffle_16_31_mask mask))))

;; Rules for `swizzle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand All @@ -3539,9 +3537,7 @@
;; Wasm SIMD semantics for this instruction. The instruction format maps to
;; variables like: %dst = swizzle %src, %mask
(rule (lower (swizzle src mask))
(let ((mask Xmm (x64_paddusb
mask
(x64_xmm_load_const $I8X16 (swizzle_zero_mask)))))
(let ((mask Xmm (x64_paddusb mask (swizzle_zero_mask))))
(x64_pshufb src mask)))

;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand Down Expand Up @@ -3716,9 +3712,9 @@
(let ((src1 Xmm qx)
(src2 Xmm qy)

(mask Xmm (x64_xmm_load_const $I16X8 (sqmul_round_sat_mask)))
(mask XmmMem (sqmul_round_sat_mask))
(dst Xmm (x64_pmulhrsw src1 src2))
(cmp Xmm (x64_pcmpeqw mask dst)))
(cmp Xmm (x64_pcmpeqw dst mask)))
(x64_pxor dst cmp)))

;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
Expand All @@ -3744,7 +3740,7 @@
(zeros Xmm (xmm_zero $F64X2))
(dst Xmm (x64_maxpd src zeros))

(umax_mask Xmm (x64_xmm_load_const $F64X2 (uunarrow_umax_mask)))
(umax_mask XmmMem (uunarrow_umax_mask))

;; MINPD xmm_y, [wasm_f64x2_splat(4294967295.0)]
(dst Xmm (x64_minpd dst umax_mask))
Expand All @@ -3753,7 +3749,7 @@
(dst Xmm (x64_roundpd dst (RoundImm.RoundZero)))

;; ADDPD xmm_y, [wasm_f64x2_splat(0x1.0p+52)]
(uint_mask Xmm (x64_xmm_load_const $F64X2 (uunarrow_uint_mask)))
(uint_mask XmmMem (uunarrow_uint_mask))
(dst Xmm (x64_addpd dst uint_mask)))

;; SHUFPS xmm_y, xmm_xmp, 0x88
Expand Down
16 changes: 8 additions & 8 deletions cranelift/filetests/filetests/isa/x64/fcvt.clif
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,8 @@ block0(v0: i32x4):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqu const(0), %xmm2
; unpcklps %xmm0, %xmm2, %xmm0
; movdqu const(1), %xmm6
; subpd %xmm0, %xmm6, %xmm0
; unpcklps %xmm0, const(0), %xmm0
; subpd %xmm0, const(1), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -317,14 +315,16 @@ block0(v0: i32x4):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqu 0x14(%rip), %xmm2
; unpcklps %xmm2, %xmm0
; movdqu 0x19(%rip), %xmm6
; subpd %xmm6, %xmm0
; unpcklps 0x15(%rip), %xmm0
; subpd 0x1d(%rip), %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; xorb %al, (%rbx)
; addb %dh, (%rax)
; addb %al, (%r8)
Expand Down
21 changes: 7 additions & 14 deletions cranelift/filetests/filetests/isa/x64/float-avx.clif
Original file line number Diff line number Diff line change
Expand Up @@ -566,10 +566,9 @@ block0(v0: f64x2):
; movq %rsp, %rbp
; block0:
; vcmppd $0 %xmm0, %xmm0, %xmm2
; movupd const(0), %xmm4
; vandps %xmm2, %xmm4, %xmm6
; vminpd %xmm0, %xmm6, %xmm8
; vcvttpd2dq %xmm8, %xmm0
; vandps %xmm2, const(0), %xmm4
; vminpd %xmm0, %xmm4, %xmm6
; vcvttpd2dq %xmm6, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -580,19 +579,13 @@ block0(v0: f64x2):
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcmpeqpd %xmm0, %xmm0, %xmm2
; movupd 0x1f(%rip), %xmm4
; vandps %xmm4, %xmm2, %xmm6
; vminpd %xmm6, %xmm0, %xmm8
; vcvttpd2dq %xmm8, %xmm0
; vandps 0xf(%rip), %xmm2, %xmm4
; vminpd %xmm4, %xmm0, %xmm6
; vcvttpd2dq %xmm6, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, %al
; sarb $0xff, %bh

31 changes: 16 additions & 15 deletions cranelift/filetests/filetests/isa/x64/narrowing.clif
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,12 @@ block0(v0: f64x2):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm4
; cmppd $0, %xmm4, %xmm0, %xmm4
; movupd const(0), %xmm5
; andps %xmm4, %xmm5, %xmm4
; movdqa %xmm0, %xmm8
; minpd %xmm8, %xmm4, %xmm8
; cvttpd2dq %xmm8, %xmm0
; movdqa %xmm0, %xmm3
; cmppd $0, %xmm3, %xmm0, %xmm3
; andps %xmm3, const(0), %xmm3
; movdqa %xmm0, %xmm6
; minpd %xmm6, %xmm3, %xmm6
; cvttpd2dq %xmm6, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -79,20 +78,22 @@ block0(v0: f64x2):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqa %xmm0, %xmm4
; cmpeqpd %xmm0, %xmm4
; movupd 0x1b(%rip), %xmm5
; andps %xmm5, %xmm4
; movdqa %xmm0, %xmm8
; minpd %xmm4, %xmm8
; cvttpd2dq %xmm8, %xmm0
; movdqa %xmm0, %xmm3
; cmpeqpd %xmm0, %xmm3
; andps 0x1c(%rip), %xmm3
; movdqa %xmm0, %xmm6
; minpd %xmm3, %xmm6
; cvttpd2dq %xmm6, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; sarb $0xff, %bh
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, %al

function %f4(i16x8, i16x8) -> i8x16 {
block0(v0: i16x8, v1: i16x8):
Expand Down
26 changes: 13 additions & 13 deletions cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,11 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm7
; movdqu const(1), %xmm0
; movdqu const(0), %xmm6
; movdqa %xmm7, %xmm9
; vpermi2b %xmm1, %xmm9, %xmm6, %xmm6
; andps %xmm0, %xmm6, %xmm0
; movdqa %xmm0, %xmm6
; movdqu const(0), %xmm0
; movdqa %xmm6, %xmm7
; vpermi2b %xmm1, %xmm7, %xmm0, %xmm0
; andps %xmm0, const(1), %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand All @@ -70,12 +69,11 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movdqa %xmm0, %xmm7
; movdqu 0x30(%rip), %xmm0
; movdqu 0x18(%rip), %xmm6
; movdqa %xmm7, %xmm9
; vpermi2b %xmm1, %xmm9, %xmm6
; andps %xmm6, %xmm0
; movdqa %xmm0, %xmm6
; movdqu 0x20(%rip), %xmm0
; movdqa %xmm6, %xmm7
; vpermi2b %xmm1, %xmm7, %xmm0
; andps 0x1f(%rip), %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
Expand All @@ -89,7 +87,9 @@ block0(v0: i8x16, v1: i8x16):
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, (%rax)
; cmpb $0xff, %bh
; addb %al, (%rax)
; addb %al, (%rax)
; addb %al, -1(%rax)

function %f3(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
Expand Down
Loading