bytecodealliance · saulecabrera · Sep 10, 2023 · Sep 8, 2023 · Sep 9, 2023 · Sep 9, 2023
@@ -370,7 +370,11 @@ fn winch_supports_module(module: &[u8]) -> bool {
                         | Unreachable { .. }
                         | Return { .. }
                         | F32Const { .. }
-                        | F64Const { .. } => {}
+                        | F64Const { .. }
+                        | F32Abs { .. }
+                        | F64Abs { .. }
+                        | F32Neg { .. }
+                        | F64Neg { .. } => {}
                         _ => {
                             supported = false;
                             break 'main;

@@ -0,0 +1,50 @@
+;; Test all the f32 bitwise operators on major boundary values and all special
+;; values.
+
+(module
+  (func (export "abs") (param $x f32) (result f32) (f32.abs (local.get $x)))
+  (func (export "neg") (param $x f32) (result f32) (f32.neg (local.get $x)))
+)
+
+(assert_return (invoke "abs" (f32.const -0x0p+0)) (f32.const 0x0p+0))
+(assert_return (invoke "abs" (f32.const 0x0p+0)) (f32.const 0x0p+0))
+(assert_return (invoke "abs" (f32.const -0x1p-149)) (f32.const 0x1p-149))
+(assert_return (invoke "abs" (f32.const 0x1p-149)) (f32.const 0x1p-149))
+(assert_return (invoke "abs" (f32.const -0x1p-126)) (f32.const 0x1p-126))
+(assert_return (invoke "abs" (f32.const 0x1p-126)) (f32.const 0x1p-126))
+(assert_return (invoke "abs" (f32.const -0x1p-1)) (f32.const 0x1p-1))
+(assert_return (invoke "abs" (f32.const 0x1p-1)) (f32.const 0x1p-1))
+(assert_return (invoke "abs" (f32.const -0x1p+0)) (f32.const 0x1p+0))
+(assert_return (invoke "abs" (f32.const 0x1p+0)) (f32.const 0x1p+0))
+(assert_return (invoke "abs" (f32.const -0x1.921fb6p+2)) (f32.const 0x1.921fb6p+2))
+(assert_return (invoke "abs" (f32.const 0x1.921fb6p+2)) (f32.const 0x1.921fb6p+2))
+(assert_return (invoke "abs" (f32.const -0x1.fffffep+127)) (f32.const 0x1.fffffep+127))
+(assert_return (invoke "abs" (f32.const 0x1.fffffep+127)) (f32.const 0x1.fffffep+127))
+(assert_return (invoke "abs" (f32.const -inf)) (f32.const inf))
+(assert_return (invoke "abs" (f32.const inf)) (f32.const inf))
+(assert_return (invoke "abs" (f32.const -nan)) (f32.const nan))
+(assert_return (invoke "abs" (f32.const nan)) (f32.const nan))
+(assert_return (invoke "neg" (f32.const -0x0p+0)) (f32.const 0x0p+0))
+(assert_return (invoke "neg" (f32.const 0x0p+0)) (f32.const -0x0p+0))
+(assert_return (invoke "neg" (f32.const -0x1p-149)) (f32.const 0x1p-149))
+(assert_return (invoke "neg" (f32.const 0x1p-149)) (f32.const -0x1p-149))
+(assert_return (invoke "neg" (f32.const -0x1p-126)) (f32.const 0x1p-126))
+(assert_return (invoke "neg" (f32.const 0x1p-126)) (f32.const -0x1p-126))
+(assert_return (invoke "neg" (f32.const -0x1p-1)) (f32.const 0x1p-1))
+(assert_return (invoke "neg" (f32.const 0x1p-1)) (f32.const -0x1p-1))
+(assert_return (invoke "neg" (f32.const -0x1p+0)) (f32.const 0x1p+0))
+(assert_return (invoke "neg" (f32.const 0x1p+0)) (f32.const -0x1p+0))
+(assert_return (invoke "neg" (f32.const -0x1.921fb6p+2)) (f32.const 0x1.921fb6p+2))
+(assert_return (invoke "neg" (f32.const 0x1.921fb6p+2)) (f32.const -0x1.921fb6p+2))
+(assert_return (invoke "neg" (f32.const -0x1.fffffep+127)) (f32.const 0x1.fffffep+127))
+(assert_return (invoke "neg" (f32.const 0x1.fffffep+127)) (f32.const -0x1.fffffep+127))
+(assert_return (invoke "neg" (f32.const -inf)) (f32.const inf))
+(assert_return (invoke "neg" (f32.const inf)) (f32.const -inf))
+(assert_return (invoke "neg" (f32.const -nan)) (f32.const nan))
+(assert_return (invoke "neg" (f32.const nan)) (f32.const -nan))
+
+
+;; Type check
+
+(assert_invalid (module (func (result f32) (f32.abs (i64.const 0)))) "type mismatch")
+(assert_invalid (module (func (result f32) (f32.neg (i64.const 0)))) "type mismatch")
@@ -0,0 +1,50 @@
+;; Test all the f64 bitwise operators on major boundary values and all special
+;; values.
+
+(module
+  (func (export "abs") (param $x f64) (result f64) (f64.abs (local.get $x)))
+  (func (export "neg") (param $x f64) (result f64) (f64.neg (local.get $x)))
+)
+
+(assert_return (invoke "abs" (f64.const -0x0p+0)) (f64.const 0x0p+0))
+(assert_return (invoke "abs" (f64.const 0x0p+0)) (f64.const 0x0p+0))
+(assert_return (invoke "abs" (f64.const -0x0.0000000000001p-1022)) (f64.const 0x0.0000000000001p-1022))
+(assert_return (invoke "abs" (f64.const 0x0.0000000000001p-1022)) (f64.const 0x0.0000000000001p-1022))
+(assert_return (invoke "abs" (f64.const -0x1p-1022)) (f64.const 0x1p-1022))
+(assert_return (invoke "abs" (f64.const 0x1p-1022)) (f64.const 0x1p-1022))
+(assert_return (invoke "abs" (f64.const -0x1p-1)) (f64.const 0x1p-1))
+(assert_return (invoke "abs" (f64.const 0x1p-1)) (f64.const 0x1p-1))
+(assert_return (invoke "abs" (f64.const -0x1p+0)) (f64.const 0x1p+0))
+(assert_return (invoke "abs" (f64.const 0x1p+0)) (f64.const 0x1p+0))
+(assert_return (invoke "abs" (f64.const -0x1.921fb54442d18p+2)) (f64.const 0x1.921fb54442d18p+2))
+(assert_return (invoke "abs" (f64.const 0x1.921fb54442d18p+2)) (f64.const 0x1.921fb54442d18p+2))
+(assert_return (invoke "abs" (f64.const -0x1.fffffffffffffp+1023)) (f64.const 0x1.fffffffffffffp+1023))
+(assert_return (invoke "abs" (f64.const 0x1.fffffffffffffp+1023)) (f64.const 0x1.fffffffffffffp+1023))
+(assert_return (invoke "abs" (f64.const -inf)) (f64.const inf))
+(assert_return (invoke "abs" (f64.const inf)) (f64.const inf))
+(assert_return (invoke "abs" (f64.const -nan)) (f64.const nan))
+(assert_return (invoke "abs" (f64.const nan)) (f64.const nan))
+(assert_return (invoke "neg" (f64.const -0x0p+0)) (f64.const 0x0p+0))
+(assert_return (invoke "neg" (f64.const 0x0p+0)) (f64.const -0x0p+0))
+(assert_return (invoke "neg" (f64.const -0x0.0000000000001p-1022)) (f64.const 0x0.0000000000001p-1022))
+(assert_return (invoke "neg" (f64.const 0x0.0000000000001p-1022)) (f64.const -0x0.0000000000001p-1022))
+(assert_return (invoke "neg" (f64.const -0x1p-1022)) (f64.const 0x1p-1022))
+(assert_return (invoke "neg" (f64.const 0x1p-1022)) (f64.const -0x1p-1022))
+(assert_return (invoke "neg" (f64.const -0x1p-1)) (f64.const 0x1p-1))
+(assert_return (invoke "neg" (f64.const 0x1p-1)) (f64.const -0x1p-1))
+(assert_return (invoke "neg" (f64.const -0x1p+0)) (f64.const 0x1p+0))
+(assert_return (invoke "neg" (f64.const 0x1p+0)) (f64.const -0x1p+0))
+(assert_return (invoke "neg" (f64.const -0x1.921fb54442d18p+2)) (f64.const 0x1.921fb54442d18p+2))
+(assert_return (invoke "neg" (f64.const 0x1.921fb54442d18p+2)) (f64.const -0x1.921fb54442d18p+2))
+(assert_return (invoke "neg" (f64.const -0x1.fffffffffffffp+1023)) (f64.const 0x1.fffffffffffffp+1023))
+(assert_return (invoke "neg" (f64.const 0x1.fffffffffffffp+1023)) (f64.const -0x1.fffffffffffffp+1023))
+(assert_return (invoke "neg" (f64.const -inf)) (f64.const inf))
+(assert_return (invoke "neg" (f64.const inf)) (f64.const -inf))
+(assert_return (invoke "neg" (f64.const -nan)) (f64.const nan))
+(assert_return (invoke "neg" (f64.const nan)) (f64.const -nan))
+
+
+;; Type check
+
+(assert_invalid (module (func (result f64) (f64.abs (i64.const 0)))) "type mismatch")
+(assert_invalid (module (func (result f64) (f64.neg (i64.const 0)))) "type mismatch")
@@ -228,6 +228,14 @@ impl Masm for MacroAssembler {
         }
     }
 
+    fn float_neg(&mut self, _dst: Reg, _src: RegImm, _size: OperandSize) {
+        todo!()
+    }
+
+    fn float_abs(&mut self, _dst: Reg, _src: RegImm, _size: OperandSize) {
+        todo!()
+    }
+
     fn and(&mut self, _dst: RegImm, _lhs: RegImm, _rhs: RegImm, _size: OperandSize) {
         todo!()
     }

@@ -1,7 +1,7 @@
 //! Assembler library implementation for x64.
 
 use crate::{
-    isa::reg::Reg,
+    isa::reg::{Reg, RegClass},
     masm::{CalleeKind, CmpKind, DivKind, OperandSize, RemKind, ShiftKind},
 };
 use cranelift_codegen::{
@@ -410,18 +410,36 @@ impl Assembler {
 
     /// "and" two registers.
     pub fn and_rr(&mut self, src: Reg, dst: Reg, size: OperandSize) {
-        self.emit(Inst::AluRmiR {
-            size: size.into(),
-            op: AluRmiROpcode::And,
-            src1: dst.into(),
-            src2: src.into(),
-            dst: dst.into(),
-        });
+        match dst.class() {
+            RegClass::Int => {
+                self.emit(Inst::AluRmiR {
+                    size: size.into(),
+                    op: AluRmiROpcode::And,
+                    src1: dst.into(),
+                    src2: src.into(),
+                    dst: dst.into(),
+                });
+            }
+            RegClass::Float => {
+                let op = match size {
+                    OperandSize::S32 => SseOpcode::Andps,
+                    OperandSize::S64 => SseOpcode::Andpd,
+                    OperandSize::S128 => unreachable!(),
+                };
+
+                self.emit(Inst::XmmRmR {
+                    op,
+                    src1: dst.into(),
+                    src2: XmmMemAligned::from(Xmm::from(src)),
+                    dst: dst.into(),
+                });
+            }
+            RegClass::Vector => unreachable!(),
+        }
     }
 
     pub fn and_ir(&mut self, imm: i32, dst: Reg, size: OperandSize) {
         let imm = RegMemImm::imm(imm as u32);
-
         self.emit(Inst::AluRmiR {
             size: size.into(),
             op: AluRmiROpcode::And,
@@ -431,6 +449,21 @@ impl Assembler {
         });
     }
 
+    pub fn gpr_to_xmm(&mut self, src: Reg, dst: Reg, size: OperandSize) {
+        let op = match size {
+            OperandSize::S32 => SseOpcode::Movd,
+            OperandSize::S64 => SseOpcode::Movq,
+            OperandSize::S128 => unreachable!(),
+        };
+
+        self.emit(Inst::GprToXmm {
+            op,
+            src: src.into(),
+            dst: dst.into(),
+            src_size: size.into(),
+        })
+    }
+
     pub fn or_rr(&mut self, src: Reg, dst: Reg, size: OperandSize) {
         self.emit(Inst::AluRmiR {
             size: size.into(),
@@ -455,13 +488,32 @@ impl Assembler {
 
     /// Logical exclusive or with registers.
     pub fn xor_rr(&mut self, src: Reg, dst: Reg, size: OperandSize) {
-        self.emit(Inst::AluRmiR {
-            size: size.into(),
-            op: AluRmiROpcode::Xor,
-            src1: dst.into(),
-            src2: src.into(),
-            dst: dst.into(),
-        });
+        match dst.class() {
+            RegClass::Int => {
+                self.emit(Inst::AluRmiR {
+                    size: size.into(),
+                    op: AluRmiROpcode::Xor,
+                    src1: dst.into(),
+                    src2: src.into(),
+                    dst: dst.into(),
+                });
+            }
+            RegClass::Float => {
+                let op = match size {
+                    OperandSize::S32 => SseOpcode::Xorps,
+                    OperandSize::S64 => SseOpcode::Xorpd,
+                    OperandSize::S128 => unreachable!(),
+                };
+
+                self.emit(Inst::XmmRmR {
+                    op,
+                    src1: dst.into(),
+                    src2: XmmMemAligned::from(Xmm::from(src)),
+                    dst: dst.into(),
+                });
+            }
+            RegClass::Vector => todo!(),
+        }
     }
 
     pub fn xor_ir(&mut self, imm: i32, dst: Reg, size: OperandSize) {

@@ -265,6 +265,36 @@ impl Masm for MacroAssembler {
         }
     }
 
+    fn float_neg(&mut self, dst: Reg, src: RegImm, size: OperandSize) {
+        Self::ensure_two_argument_form(&dst.into(), &src);
+        assert_eq!(dst.class(), RegClass::Float);
+        let mask = match size {
+            OperandSize::S32 => I::I32(0x80000000),
+            OperandSize::S64 => I::I64(0x8000000000000000),
+            OperandSize::S128 => unreachable!(),
+        };
+        let scratch_gpr = regs::scratch();
+        self.load_constant(&mask, scratch_gpr, size);
+        let scratch_xmm = regs::scratch_xmm();
+        self.asm.gpr_to_xmm(scratch_gpr, scratch_xmm, size);
+        self.asm.xor_rr(scratch_xmm, dst, size);
+    }
+
+    fn float_abs(&mut self, dst: Reg, src: RegImm, size: OperandSize) {
+        Self::ensure_two_argument_form(&dst.into(), &src);
+        assert_eq!(dst.class(), RegClass::Float);
+        let mask = match size {
+            OperandSize::S32 => I::I32(0x7fffffff),
+            OperandSize::S64 => I::I64(0x7fffffffffffffff),
+            OperandSize::S128 => unreachable!(),
+        };
+        let scratch_gpr = regs::scratch();
+        self.load_constant(&mask, scratch_gpr, size);
+        let scratch_xmm = regs::scratch_xmm();
+        self.asm.gpr_to_xmm(scratch_gpr, scratch_xmm, size);
+        self.asm.and_rr(scratch_xmm, dst, size);
+    }
+
     fn and(&mut self, dst: RegImm, lhs: RegImm, rhs: RegImm, size: OperandSize) {
         Self::ensure_two_argument_form(&dst, &lhs);
         match (rhs, dst) {

@@ -166,6 +166,10 @@ pub(crate) fn xmm15() -> Reg {
     fpr(15)
 }
 
+pub(crate) fn scratch_xmm() -> Reg {
+    xmm15()
+}
+
 const GPR: u32 = 16;
 const FPR: u32 = 16;
 const ALLOCATABLE_GPR: u32 = (1 << GPR) - 1;
@@ -174,12 +178,15 @@ const ALLOCATABLE_FPR: u32 = (1 << FPR) - 1;
 // R14: Is a pinned register, used as the instance register.
 const NON_ALLOCATABLE_GPR: u32 = (1 << ENC_RBP) | (1 << ENC_RSP) | (1 << ENC_R11) | (1 << ENC_R14);
 
+// xmm15: Is used as the scratch register.
+const NON_ALLOCATABLE_FPR: u32 = 1 << 15;
+
 /// Bitmask to represent the available general purpose registers.
 pub(crate) const ALL_GPR: u32 = ALLOCATABLE_GPR & !NON_ALLOCATABLE_GPR;
 /// Bitmask to represent the available floating point registers.
 // Note: at the time of writing all floating point registers are allocatable,
 // but we might need a scratch register in the future.
-pub(crate) const ALL_FPR: u32 = ALLOCATABLE_FPR;
+pub(crate) const ALL_FPR: u32 = ALLOCATABLE_FPR & !NON_ALLOCATABLE_FPR;
 
 /// Returns the callee-saved registers according to a particular calling
 /// convention.

@@ -317,6 +317,12 @@ pub(crate) trait MacroAssembler {
     /// Perform multiplication operation.
     fn mul(&mut self, dst: RegImm, lhs: RegImm, rhs: RegImm, size: OperandSize);
 
+    /// Perform a floating point abs operation.
+    fn float_abs(&mut self, dst: Reg, src: RegImm, size: OperandSize);
+
+    /// Perform a floating point negation operation.
+    fn float_neg(&mut self, dst: Reg, src: RegImm, size: OperandSize);
+
     /// Perform logical and operation.
     fn and(&mut self, dst: RegImm, lhs: RegImm, rhs: RegImm, size: OperandSize);
 

@@ -39,6 +39,10 @@ macro_rules! def_unsupported {
     (emit I64Const $($rest:tt)*) => {};
     (emit F32Const $($rest:tt)*) => {};
     (emit F64Const $($rest:tt)*) => {};
+    (emit F32Abs $($rest:tt)*) => {};
+    (emit F64Abs $($rest:tt)*) => {};
+    (emit F32Neg $($rest:tt)*) => {};
+    (emit F64Neg $($rest:tt)*) => {};
     (emit I32Add $($rest:tt)*) => {};
     (emit I64Add $($rest:tt)*) => {};
     (emit I32Sub $($rest:tt)*) => {};
@@ -142,6 +146,34 @@ where
         self.context.stack.push(Val::f64(val));
     }
 
+    fn visit_f32_abs(&mut self) {
+        self.context
+            .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| {
+                masm.float_abs(reg, RegImm::Reg(reg), size);
+            });
+    }
+
+    fn visit_f64_abs(&mut self) {
+        self.context
+            .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| {
+                masm.float_abs(reg, RegImm::Reg(reg), size);
+            });
+    }
+
+    fn visit_f32_neg(&mut self) {
+        self.context
+            .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| {
+                masm.float_neg(reg, RegImm::Reg(reg), size);
+            });
+    }
+
+    fn visit_f64_neg(&mut self) {
+        self.context
+            .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| {
+                masm.float_neg(reg, RegImm::Reg(reg), size);
+            });
+    }
+
     fn visit_i32_add(&mut self) {
         self.context.i32_binop(self.masm, |masm, dst, src, size| {
             masm.add(dst, dst, src, size);

@@ -0,0 +1,25 @@
+;;! target = "x86_64"
+
+(module
+    (func (result f32)
+        (f32.const -1.32)
+        (f32.abs)
+    )
+)
+;;    0:	 55                   	push	rbp
+;;    1:	 4889e5               	mov	rbp, rsp
+;;    4:	 4883ec08             	sub	rsp, 8
+;;    8:	 4c893424             	mov	qword ptr [rsp], r14
+;;    c:	 f30f10051c000000     	movss	xmm0, dword ptr [rip + 0x1c]
+;;   14:	 41bbffffff7f         	mov	r11d, 0x7fffffff
+;;   1a:	 66450f6efb           	movd	xmm15, r11d
+;;   1f:	 410f54c7             	andps	xmm0, xmm15
+;;   23:	 4883c408             	add	rsp, 8
+;;   27:	 5d                   	pop	rbp
+;;   28:	 c3                   	ret	
+;;   29:	 0000                 	add	byte ptr [rax], al
+;;   2b:	 0000                 	add	byte ptr [rax], al
+;;   2d:	 0000                 	add	byte ptr [rax], al
+;;   2f:	 00c3                 	add	bl, al
+;;   31:	 f5                   	cmc	
+;;   32:	 a8bf                 	test	al, 0xbf