diff --git a/crates/test-util/src/wast.rs b/crates/test-util/src/wast.rs index 7047c00e0a8f..259eda8155a6 100644 --- a/crates/test-util/src/wast.rs +++ b/crates/test-util/src/wast.rs @@ -622,16 +622,6 @@ impl WastTest { #[cfg(target_arch = "x86_64")] { - let unsupported = [ - // externref/reference-types related - // simd-related failures - "misc_testsuite/simd/canonicalize-nan.wast", - ]; - - if unsupported.iter().any(|part| self.path.ends_with(part)) { - return true; - } - // SIMD on Winch requires AVX instructions. #[cfg(target_arch = "x86_64")] if !(std::is_x86_feature_detected!("avx") && std::is_x86_feature_detected!("avx2")) diff --git a/tests/disas/winch/x64/f32_add/nan_canon.wat b/tests/disas/winch/x64/f32_add/nan_canon.wat new file mode 100644 index 000000000000..9aa0ed26395f --- /dev/null +++ b/tests/disas/winch/x64/f32_add/nan_canon.wat @@ -0,0 +1,40 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = "-Wnan-canonicalization" + +(module + (func (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.add + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x18(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x69 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movss %xmm0, 0xc(%rsp) +;; movss %xmm1, 8(%rsp) +;; movss 8(%rsp), %xmm0 +;; movss 0xc(%rsp), %xmm1 +;; addss %xmm0, %xmm1 +;; ucomiss %xmm1, %xmm1 +;; jnp 0x5d +;; 55: movss 0x13(%rip), %xmm1 +;; movaps %xmm1, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 69: ud2 +;; 6b: addb %al, (%rax) +;; 6d: addb %al, (%rax) +;; 6f: addb %al, (%rax) +;; 71: addb %al, %al diff --git a/tests/disas/winch/x64/f32x4_add/nan_canon.wat b/tests/disas/winch/x64/f32x4_add/nan_canon.wat new file mode 100644 index 000000000000..89f1878a80a0 --- /dev/null +++ b/tests/disas/winch/x64/f32x4_add/nan_canon.wat @@ -0,0 +1,42 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = ["-Wnan-canonicalization", "-Ccranelift-has-avx"] + +(module + (func (param v128 v128) (result v128) + local.get 0 + local.get 1 + f32x4.add + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x18(%r11), %r11 +;; addq $0x30, %r11 +;; cmpq %rsp, %r11 +;; ja 0x6c +;; 1c: movq %rdi, %r14 +;; subq $0x30, %rsp +;; movq %rdi, 0x28(%rsp) +;; movq %rsi, 0x20(%rsp) +;; movdqu %xmm0, 0x10(%rsp) +;; movdqu %xmm1, (%rsp) +;; movdqu (%rsp), %xmm0 +;; movdqu 0x10(%rsp), %xmm1 +;; vaddps %xmm0, %xmm1, %xmm1 +;; vcmpunordps %xmm1, %xmm1, %xmm15 +;; vandnps %xmm1, %xmm15, %xmm1 +;; vandps 0x15(%rip), %xmm15, %xmm15 +;; vorps %xmm1, %xmm15, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x30, %rsp +;; popq %rbp +;; retq +;; 6c: ud2 +;; 6e: addb %al, (%rax) +;; 70: addb %al, (%rax) +;; 72: sarb $0, (%rdi) +;; 76: sarb $0, (%rdi) +;; 7a: sarb $0, (%rdi) diff --git a/tests/disas/winch/x64/f64_div/nan_canon.wat b/tests/disas/winch/x64/f64_div/nan_canon.wat new file mode 100644 index 000000000000..cf0285da39fd --- /dev/null +++ b/tests/disas/winch/x64/f64_div/nan_canon.wat @@ -0,0 +1,43 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = "-Wnan-canonicalization" + +(module + (func (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.div + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x18(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x68 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movsd %xmm0, 8(%rsp) +;; movsd %xmm1, (%rsp) +;; movsd (%rsp), %xmm0 +;; movsd 8(%rsp), %xmm1 +;; divsd %xmm0, %xmm1 +;; ucomisd %xmm1, %xmm1 +;; jnp 0x5c +;; 54: movsd 0x14(%rip), %xmm1 +;; movaps %xmm1, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 68: ud2 +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) +;; 70: addb %al, (%rax) +;; 72: addb %al, (%rax) +;; 74: addb %al, (%rax) +;; 76: clc diff --git a/tests/misc_testsuite/canonicalize-nan-scalar.wast b/tests/misc_testsuite/canonicalize-nan-scalar.wast new file mode 100644 index 000000000000..8b019cdec803 --- /dev/null +++ b/tests/misc_testsuite/canonicalize-nan-scalar.wast @@ -0,0 +1,153 @@ +;;! nan_canonicalization = true + +;; Scalar counterpart to simd/canonicalize-nan.wast. + +(module + (func (export "f32.add") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.add) + (func (export "f32.sub") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.sub) + (func (export "f32.mul") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.mul) + (func (export "f32.div") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.div) + (func (export "f32.min") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.min) + (func (export "f32.max") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.max) + (func (export "f32.sqrt") (param f32) (result f32) + local.get 0 + f32.sqrt) + (func (export "f32.ceil") (param f32) (result f32) + local.get 0 + f32.ceil) + (func (export "f32.floor") (param f32) (result f32) + local.get 0 + f32.floor) + (func (export "f32.trunc") (param f32) (result f32) + local.get 0 + f32.trunc) + (func (export "f32.nearest") (param f32) (result f32) + local.get 0 + f32.nearest) + + (func (export "f64.add") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.add) + (func (export "f64.sub") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.sub) + (func (export "f64.mul") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.mul) + (func (export "f64.div") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.div) + (func (export "f64.min") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.min) + (func (export "f64.max") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.max) + (func (export "f64.sqrt") (param f64) (result f64) + local.get 0 + f64.sqrt) + (func (export "f64.ceil") (param f64) (result f64) + local.get 0 + f64.ceil) + (func (export "f64.floor") (param f64) (result f64) + local.get 0 + f64.floor) + (func (export "f64.trunc") (param f64) (result f64) + local.get 0 + f64.trunc) + (func (export "f64.nearest") (param f64) (result f64) + local.get 0 + f64.nearest) + + (func (export "reinterpret-and-demote") (param i64) (result i32) + local.get 0 + f64.reinterpret_i64 + f32.demote_f64 + i32.reinterpret_f32) + (func (export "reinterpret-and-promote") (param i32) (result i64) + local.get 0 + f32.reinterpret_i32 + f64.promote_f32 + i64.reinterpret_f64) + + ;; Expose raw bits of 0/0 to verify exact canonical NaN bit patterns. + (func (export "f32.div-nan-bits") (result i32) + f32.const 0 + f32.const 0 + f32.div + i32.reinterpret_f32) + (func (export "f64.div-nan-bits") (result i64) + f64.const 0 + f64.const 0 + f64.div + i64.reinterpret_f64) +) + +;; Exact bit patterns: canonical f32 NaN = 0x7fc00000, f64 = 0x7ff8000000000000 +(assert_return (invoke "f32.div-nan-bits") (i32.const 0x7fc00000)) +(assert_return (invoke "f64.div-nan-bits") (i64.const 0x7ff8000000000000)) + +;; NaN-producing operations +(assert_return (invoke "f32.div" (f32.const 0) (f32.const 0)) (f32.const nan:0x400000)) +(assert_return (invoke "f64.div" (f64.const 0) (f64.const 0)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f32.sqrt" (f32.const -1)) (f32.const nan:0x400000)) +(assert_return (invoke "f64.sqrt" (f64.const -1)) (f64.const nan:0x8000000000000)) + +;; NaN propagation through f32 arithmetic +(assert_return (invoke "f32.add" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.sub" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.mul" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.min" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.max" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) + +;; NaN propagation through f64 arithmetic +(assert_return (invoke "f64.add" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.sub" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.mul" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.min" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.max" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) + +;; Rounding NaN (f32) +(assert_return (invoke "f32.ceil" (f32.const nan)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.floor" (f32.const nan)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.trunc" (f32.const nan)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.nearest" (f32.const nan)) (f32.const nan:0x400000)) + +;; Rounding NaN (f64) +(assert_return (invoke "f64.ceil" (f64.const nan)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.floor" (f64.const nan)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.trunc" (f64.const nan)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.nearest" (f64.const nan)) (f64.const nan:0x8000000000000)) + +;; Demote/promote with non-canonical NaN bit patterns +(assert_return (invoke "reinterpret-and-demote" (i64.const 0xfffefdfccccdcecf)) (i32.const 0x7fc00000)) +(assert_return (invoke "reinterpret-and-promote" (i32.const 0xfffefdfc)) (i64.const 0x7ff8000000000000)) + +;; Normal values pass through unchanged +(assert_return (invoke "f32.add" (f32.const 1) (f32.const 2)) (f32.const 3)) +(assert_return (invoke "f64.div" (f64.const 10) (f64.const 2)) (f64.const 5)) +(assert_return (invoke "f32.sqrt" (f32.const 4)) (f32.const 2)) diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 26948ed15fe7..cf8324489cfc 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -59,6 +59,8 @@ pub(crate) struct MacroAssembler { ptr_size: OperandSize, /// Scratch register scope. scratch_scope: RegAlloc, + /// Shared flags. + shared_flags: settings::Flags, } impl MacroAssembler { @@ -71,10 +73,11 @@ impl MacroAssembler { Ok(Self { sp_max: 0, stack_max_use_add: None, - asm: Assembler::new(shared_flags, isa_flags), + asm: Assembler::new(shared_flags.clone(), isa_flags), sp_offset: 0u32, ptr_size: ptr_type_from_ptr_size(ptr_size.size()).try_into()?, scratch_scope: RegAlloc::from(scratch_gpr_bitset(), scratch_fpr_bitset()), + shared_flags, }) } @@ -713,6 +716,43 @@ impl Masm for MacroAssembler { Ok(()) } + fn maybe_canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> { + if !self.shared_flags.enable_nan_canonicalization() { + return Ok(()); + } + + let done_label = self.asm.buffer_mut().get_label(); + + self.asm.fcmp(reg.to_reg(), reg.to_reg(), size); + self.asm.jmp_if(Cond::Vc, done_label); + + let canonical_nan = match size { + OperandSize::S32 => crate::masm::CANONICAL_NAN_F32, + OperandSize::S64 => crate::masm::CANONICAL_NAN_F64, + _ => bail!(CodeGenError::unexpected_operand_size()), + }; + let constant = self.asm.add_constant(canonical_nan); + self.asm.uload( + inst::AMode::Const { addr: constant }, + reg, + size, + TRUSTED_FLAGS, + ); + + self.asm + .buffer_mut() + .bind_label(done_label, &mut Default::default()); + Ok(()) + } + + fn maybe_canonicalize_v128_nan( + &mut self, + _reg: WritableReg, + _lane_size: OperandSize, + ) -> Result<()> { + bail!(CodeGenError::unimplemented_masm_instruction()) + } + fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> { match (rhs, lhs, dst) { (RegImm::Imm(v), rn, rd) => { diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 7902d3339b31..2294ad8c2109 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -686,6 +686,68 @@ impl Masm for MacroAssembler { Ok(()) } + fn maybe_canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> { + if !self.shared_flags.enable_nan_canonicalization() { + return Ok(()); + } + + let done_label = self.asm.buffer_mut().get_label(); + + self.asm.ucomis(reg.to_reg(), reg.to_reg(), size); + self.asm.jmp_if(CC::NP, done_label); + + let canonical_nan = match size { + OperandSize::S32 => crate::masm::CANONICAL_NAN_F32, + OperandSize::S64 => crate::masm::CANONICAL_NAN_F64, + _ => bail!(CodeGenError::unexpected_operand_size()), + }; + self.asm.load_fp_const(reg, canonical_nan, size); + + self.asm + .buffer_mut() + .bind_label(done_label, &mut Default::default()); + Ok(()) + } + + fn maybe_canonicalize_v128_nan( + &mut self, + reg: WritableReg, + lane_size: OperandSize, + ) -> Result<()> { + if !self.shared_flags.enable_nan_canonicalization() { + return Ok(()); + } + + self.ensure_has_avx()?; + + self.with_scratch::(|masm, scratch| { + // scratch = NaN mask (all-1s for NaN lanes) + masm.asm.xmm_vcmpp_rrr( + scratch.writable(), + reg.to_reg(), + reg.to_reg(), + lane_size, + VcmpKind::Unord, + ); + // reg = ~mask & original (zero out NaN lanes, keep non-NaN) + masm.asm + .xmm_vandnp_rrr(scratch.inner(), reg.to_reg(), reg, lane_size); + // scratch = mask & splatted canonical NaN = canonical NaN in NaN lanes only + let canon_nan = match lane_size { + OperandSize::S32 => &crate::masm::CANONICAL_NAN_F32X4[..], + OperandSize::S64 => &crate::masm::CANONICAL_NAN_F64X2[..], + _ => bail!(CodeGenError::unexpected_operand_size()), + }; + let addr = masm.asm.add_constant(canon_nan); + masm.asm + .xmm_vandp_rrm(scratch.inner(), &addr, scratch.writable(), lane_size); + // reg = non-NaN values | canonical NaN for NaN lanes + masm.asm + .xmm_vorp_rrr(scratch.inner(), reg.to_reg(), reg, lane_size); + Ok(()) + }) + } + fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> { Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?; match (rhs, dst) { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 0955bf46197e..1dc68778e706 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -13,6 +13,28 @@ use cranelift_codegen::{ use std::{fmt::Debug, ops::Range}; use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType}; +pub(crate) const CANONICAL_NAN_F32: &[u8] = &0x7FC00000u32.to_le_bytes(); +pub(crate) const CANONICAL_NAN_F64: &[u8] = &0x7FF8000000000000u64.to_le_bytes(); + +const NAN32: [u8; 4] = 0x7FC00000u32.to_le_bytes(); +const NAN64: [u8; 8] = 0x7FF8000000000000u64.to_le_bytes(); + +pub(crate) const CANONICAL_NAN_F32X4: [u8; 16] = { + let n = NAN32; + [ + n[0], n[1], n[2], n[3], n[0], n[1], n[2], n[3], n[0], n[1], n[2], n[3], n[0], n[1], n[2], + n[3], + ] +}; + +pub(crate) const CANONICAL_NAN_F64X2: [u8; 16] = { + let n = NAN64; + [ + n[0], n[1], n[2], n[3], n[4], n[5], n[6], n[7], n[0], n[1], n[2], n[3], n[4], n[5], n[6], + n[7], + ] +}; + pub(crate) use cranelift_codegen::ir::TrapCode; #[derive(Eq, PartialEq)] @@ -1692,6 +1714,17 @@ pub(crate) trait MacroAssembler { /// Perform a floating point square root operation. fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>; + /// Canonicalize NaN values in `reg` if the setting is enabled. + fn maybe_canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()>; + + /// Canonicalize NaN lanes in a v128 register if the setting is enabled. + /// `lane_size` is S32 for f32x4 or S64 for f64x2. + fn maybe_canonicalize_v128_nan( + &mut self, + reg: WritableReg, + lane_size: OperandSize, + ) -> Result<()>; + /// Perform logical and operation. fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 4d643266ed48..934628bb9ed0 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -594,6 +594,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_add(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -605,6 +606,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_add(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -616,6 +618,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_sub(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -627,6 +630,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_sub(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -638,6 +642,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_mul(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -649,6 +654,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_mul(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -660,6 +666,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_div(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -671,6 +678,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_div(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -682,6 +690,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_min(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -693,6 +702,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_min(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -704,6 +714,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_max(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -715,6 +726,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_max(writable!(dst), dst, src, size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -780,7 +792,12 @@ where let builtin = env.builtins.floor_f32::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64_floor(&mut self) -> Self::Output { @@ -793,7 +810,12 @@ where let builtin = env.builtins.floor_f64::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32_ceil(&mut self) -> Self::Output { @@ -806,7 +828,12 @@ where let builtin = env.builtins.ceil_f32::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64_ceil(&mut self) -> Self::Output { @@ -819,7 +846,12 @@ where let builtin = env.builtins.ceil_f64::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32_nearest(&mut self) -> Self::Output { @@ -832,7 +864,12 @@ where let builtin = env.builtins.nearest_f32::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64_nearest(&mut self) -> Self::Output { @@ -845,7 +882,12 @@ where let builtin = env.builtins.nearest_f64::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32_trunc(&mut self) -> Self::Output { @@ -858,7 +900,12 @@ where let builtin = env.builtins.trunc_f32::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64_trunc(&mut self) -> Self::Output { @@ -871,12 +918,18 @@ where let builtin = env.builtins.trunc_f64::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.float_sqrt(writable!(reg), reg, OperandSize::S32)?; + masm.maybe_canonicalize_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::f32(reg)) }) } @@ -884,6 +937,7 @@ where fn visit_f64_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.float_sqrt(writable!(reg), reg, OperandSize::S64)?; + masm.maybe_canonicalize_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::f64(reg)) }) } @@ -1097,6 +1151,7 @@ where fn visit_f32_demote_f64(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.demote(writable!(reg), reg)?; + masm.maybe_canonicalize_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::f32(reg)) }) } @@ -1104,6 +1159,7 @@ where fn visit_f64_promote_f32(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.promote(writable!(reg), reg)?; + masm.maybe_canonicalize_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::f64(reg)) }) } @@ -3724,6 +3780,7 @@ where fn visit_f32x4_demote_f64x2_zero(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_demote(reg, writable!(reg))?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -3731,6 +3788,7 @@ where fn visit_f64x2_promote_low_f32x4(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_promote(reg, writable!(reg))?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } @@ -4381,6 +4439,7 @@ where self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { masm.v128_add(dst, src, writable!(dst), V128AddKind::F32x4)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4389,6 +4448,7 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, _size| { masm.v128_add(dst, src, writable!(dst), V128AddKind::F64x2)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } @@ -4397,6 +4457,7 @@ where self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { masm.v128_sub(dst, src, writable!(dst), V128SubKind::F32x4)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4405,22 +4466,34 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, _size| { masm.v128_sub(dst, src, writable!(dst), V128SubKind::F64x2)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } fn visit_f32x4_mul(&mut self) -> Self::Output { - self.masm.v128_mul(&mut self.context, V128MulKind::F32x4) + self.masm.v128_mul(&mut self.context, V128MulKind::F32x4)?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64x2_mul(&mut self) -> Self::Output { - self.masm.v128_mul(&mut self.context, V128MulKind::F64x2) + self.masm.v128_mul(&mut self.context, V128MulKind::F64x2)?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32x4_div(&mut self) -> Self::Output { self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, size| { masm.v128_div(dst, src, writable!(dst), size)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4429,6 +4502,7 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, size| { masm.v128_div(dst, src, writable!(dst), size)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } @@ -4443,6 +4517,7 @@ where fn visit_f32x4_ceil(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_ceil(reg, writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -4457,6 +4532,7 @@ where fn visit_f64x2_ceil(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_ceil(reg, writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } @@ -4464,6 +4540,7 @@ where fn visit_f32x4_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_sqrt(reg, writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -4471,6 +4548,7 @@ where fn visit_f32x4_floor(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_floor(reg, writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -4478,6 +4556,7 @@ where fn visit_f64x2_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_sqrt(reg, writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } @@ -4485,6 +4564,7 @@ where fn visit_f64x2_floor(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_floor(reg, writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } @@ -4492,6 +4572,7 @@ where fn visit_f32x4_nearest(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_nearest(reg, writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -4499,18 +4580,29 @@ where fn visit_f64x2_nearest(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_nearest(reg, writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } fn visit_f32x4_trunc(&mut self) -> Self::Output { self.masm - .v128_trunc(&mut self.context, V128TruncKind::F32x4) + .v128_trunc(&mut self.context, V128TruncKind::F32x4)?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64x2_trunc(&mut self) -> Self::Output { self.masm - .v128_trunc(&mut self.context, V128TruncKind::F64x2) + .v128_trunc(&mut self.context, V128TruncKind::F64x2)?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm + .maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_v128_load32_zero(&mut self, memarg: MemArg) -> Self::Output { @@ -4565,6 +4657,7 @@ where self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { masm.v128_min(dst, src, writable!(dst), V128MinKind::F32x4)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4573,6 +4666,7 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, _size| { masm.v128_min(dst, src, writable!(dst), V128MinKind::F64x2)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } @@ -4581,6 +4675,7 @@ where self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { masm.v128_max(dst, src, writable!(dst), V128MaxKind::F32x4)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4589,6 +4684,7 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, _size| { masm.v128_max(dst, src, writable!(dst), V128MaxKind::F64x2)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) }