From 0c5e168aaee1fcc5a196473c4433c6253c6164c1 Mon Sep 17 00:00:00 2001 From: r-near <163825889+r-near@users.noreply.github.com> Date: Wed, 1 Apr 2026 21:24:32 -0700 Subject: [PATCH 1/7] winch: respect the enable_nan_canonicalization setting --- .../canonicalize-nan-scalar.wast | 153 ++++++++++++++++++ winch/codegen/src/isa/aarch64/masm.rs | 34 +++- winch/codegen/src/isa/x64/masm.rs | 23 +++ winch/codegen/src/masm.rs | 3 + winch/codegen/src/visitor.rs | 47 +++++- 5 files changed, 251 insertions(+), 9 deletions(-) create mode 100644 tests/misc_testsuite/canonicalize-nan-scalar.wast diff --git a/tests/misc_testsuite/canonicalize-nan-scalar.wast b/tests/misc_testsuite/canonicalize-nan-scalar.wast new file mode 100644 index 000000000000..8b019cdec803 --- /dev/null +++ b/tests/misc_testsuite/canonicalize-nan-scalar.wast @@ -0,0 +1,153 @@ +;;! nan_canonicalization = true + +;; Scalar counterpart to simd/canonicalize-nan.wast. + +(module + (func (export "f32.add") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.add) + (func (export "f32.sub") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.sub) + (func (export "f32.mul") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.mul) + (func (export "f32.div") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.div) + (func (export "f32.min") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.min) + (func (export "f32.max") (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.max) + (func (export "f32.sqrt") (param f32) (result f32) + local.get 0 + f32.sqrt) + (func (export "f32.ceil") (param f32) (result f32) + local.get 0 + f32.ceil) + (func (export "f32.floor") (param f32) (result f32) + local.get 0 + f32.floor) + (func (export "f32.trunc") (param f32) (result f32) + local.get 0 + f32.trunc) + (func (export "f32.nearest") (param f32) (result f32) + local.get 0 + f32.nearest) + + (func (export "f64.add") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.add) + (func (export "f64.sub") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.sub) + (func (export "f64.mul") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.mul) + (func (export "f64.div") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.div) + (func (export "f64.min") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.min) + (func (export "f64.max") (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.max) + (func (export "f64.sqrt") (param f64) (result f64) + local.get 0 + f64.sqrt) + (func (export "f64.ceil") (param f64) (result f64) + local.get 0 + f64.ceil) + (func (export "f64.floor") (param f64) (result f64) + local.get 0 + f64.floor) + (func (export "f64.trunc") (param f64) (result f64) + local.get 0 + f64.trunc) + (func (export "f64.nearest") (param f64) (result f64) + local.get 0 + f64.nearest) + + (func (export "reinterpret-and-demote") (param i64) (result i32) + local.get 0 + f64.reinterpret_i64 + f32.demote_f64 + i32.reinterpret_f32) + (func (export "reinterpret-and-promote") (param i32) (result i64) + local.get 0 + f32.reinterpret_i32 + f64.promote_f32 + i64.reinterpret_f64) + + ;; Expose raw bits of 0/0 to verify exact canonical NaN bit patterns. + (func (export "f32.div-nan-bits") (result i32) + f32.const 0 + f32.const 0 + f32.div + i32.reinterpret_f32) + (func (export "f64.div-nan-bits") (result i64) + f64.const 0 + f64.const 0 + f64.div + i64.reinterpret_f64) +) + +;; Exact bit patterns: canonical f32 NaN = 0x7fc00000, f64 = 0x7ff8000000000000 +(assert_return (invoke "f32.div-nan-bits") (i32.const 0x7fc00000)) +(assert_return (invoke "f64.div-nan-bits") (i64.const 0x7ff8000000000000)) + +;; NaN-producing operations +(assert_return (invoke "f32.div" (f32.const 0) (f32.const 0)) (f32.const nan:0x400000)) +(assert_return (invoke "f64.div" (f64.const 0) (f64.const 0)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f32.sqrt" (f32.const -1)) (f32.const nan:0x400000)) +(assert_return (invoke "f64.sqrt" (f64.const -1)) (f64.const nan:0x8000000000000)) + +;; NaN propagation through f32 arithmetic +(assert_return (invoke "f32.add" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.sub" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.mul" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.min" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.max" (f32.const nan) (f32.const 1)) (f32.const nan:0x400000)) + +;; NaN propagation through f64 arithmetic +(assert_return (invoke "f64.add" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.sub" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.mul" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.min" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.max" (f64.const nan) (f64.const 1)) (f64.const nan:0x8000000000000)) + +;; Rounding NaN (f32) +(assert_return (invoke "f32.ceil" (f32.const nan)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.floor" (f32.const nan)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.trunc" (f32.const nan)) (f32.const nan:0x400000)) +(assert_return (invoke "f32.nearest" (f32.const nan)) (f32.const nan:0x400000)) + +;; Rounding NaN (f64) +(assert_return (invoke "f64.ceil" (f64.const nan)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.floor" (f64.const nan)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.trunc" (f64.const nan)) (f64.const nan:0x8000000000000)) +(assert_return (invoke "f64.nearest" (f64.const nan)) (f64.const nan:0x8000000000000)) + +;; Demote/promote with non-canonical NaN bit patterns +(assert_return (invoke "reinterpret-and-demote" (i64.const 0xfffefdfccccdcecf)) (i32.const 0x7fc00000)) +(assert_return (invoke "reinterpret-and-promote" (i32.const 0xfffefdfc)) (i64.const 0x7ff8000000000000)) + +;; Normal values pass through unchanged +(assert_return (invoke "f32.add" (f32.const 1) (f32.const 2)) (f32.const 3)) +(assert_return (invoke "f64.div" (f64.const 10) (f64.const 2)) (f64.const 5)) +(assert_return (invoke "f32.sqrt" (f32.const 4)) (f32.const 2)) diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 26948ed15fe7..d73f9ba7f982 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -59,6 +59,8 @@ pub(crate) struct MacroAssembler { ptr_size: OperandSize, /// Scratch register scope. scratch_scope: RegAlloc, + /// Shared flags. + shared_flags: settings::Flags, } impl MacroAssembler { @@ -71,10 +73,11 @@ impl MacroAssembler { Ok(Self { sp_max: 0, stack_max_use_add: None, - asm: Assembler::new(shared_flags, isa_flags), + asm: Assembler::new(shared_flags.clone(), isa_flags), sp_offset: 0u32, ptr_size: ptr_type_from_ptr_size(ptr_size.size()).try_into()?, scratch_scope: RegAlloc::from(scratch_gpr_bitset(), scratch_fpr_bitset()), + shared_flags, }) } @@ -713,6 +716,35 @@ impl Masm for MacroAssembler { Ok(()) } + fn canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> { + if !self.shared_flags.enable_nan_canonicalization() { + return Ok(()); + } + + let done_label = self.asm.buffer_mut().get_label(); + + self.asm.fcmp(reg.to_reg(), reg.to_reg(), size); + self.asm.jmp_if(Cond::Vc, done_label); + + let canonical_nan: &[u8] = match size { + OperandSize::S32 => &0x7FC00000u32.to_le_bytes(), + OperandSize::S64 => &0x7FF8000000000000u64.to_le_bytes(), + _ => bail!(CodeGenError::unexpected_operand_size()), + }; + let constant = self.asm.add_constant(canonical_nan); + self.asm.uload( + inst::AMode::Const { addr: constant }, + reg, + size, + TRUSTED_FLAGS, + ); + + self.asm + .buffer_mut() + .bind_label(done_label, &mut Default::default()); + Ok(()) + } + fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> { match (rhs, lhs, dst) { (RegImm::Imm(v), rn, rd) => { diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 7902d3339b31..37b20173b6e0 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -686,6 +686,29 @@ impl Masm for MacroAssembler { Ok(()) } + fn canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> { + if !self.shared_flags.enable_nan_canonicalization() { + return Ok(()); + } + + let done_label = self.asm.buffer_mut().get_label(); + + self.asm.ucomis(reg.to_reg(), reg.to_reg(), size); + self.asm.jmp_if(CC::NP, done_label); + + let canonical_nan: &[u8] = match size { + OperandSize::S32 => &0x7FC00000u32.to_le_bytes(), + OperandSize::S64 => &0x7FF8000000000000u64.to_le_bytes(), + _ => bail!(CodeGenError::unexpected_operand_size()), + }; + self.asm.load_fp_const(reg, canonical_nan, size); + + self.asm + .buffer_mut() + .bind_label(done_label, &mut Default::default()); + Ok(()) + } + fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> { Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?; match (rhs, dst) { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 0955bf46197e..64de3927cf09 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1692,6 +1692,9 @@ pub(crate) trait MacroAssembler { /// Perform a floating point square root operation. fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>; + /// Canonicalize NaN values in `reg` if the setting is enabled. + fn canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()>; + /// Perform logical and operation. fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 4d643266ed48..9cdb04cf9fcb 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -594,6 +594,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_add(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -605,6 +606,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_add(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -616,6 +618,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_sub(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -627,6 +630,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_sub(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -638,6 +642,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_mul(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -649,6 +654,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_mul(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -660,6 +666,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_div(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -671,6 +678,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_div(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -682,6 +690,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_min(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -693,6 +702,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_min(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -704,6 +714,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_max(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -715,6 +726,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_max(writable!(dst), dst, src, size)?; + masm.canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -780,7 +792,8 @@ where let builtin = env.builtins.floor_f32::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + self.canonicalize_nan_for_round(OperandSize::S32) } fn visit_f64_floor(&mut self) -> Self::Output { @@ -793,7 +806,8 @@ where let builtin = env.builtins.floor_f64::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + self.canonicalize_nan_for_round(OperandSize::S64) } fn visit_f32_ceil(&mut self) -> Self::Output { @@ -806,7 +820,8 @@ where let builtin = env.builtins.ceil_f32::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + self.canonicalize_nan_for_round(OperandSize::S32) } fn visit_f64_ceil(&mut self) -> Self::Output { @@ -819,7 +834,8 @@ where let builtin = env.builtins.ceil_f64::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + self.canonicalize_nan_for_round(OperandSize::S64) } fn visit_f32_nearest(&mut self) -> Self::Output { @@ -832,7 +848,8 @@ where let builtin = env.builtins.nearest_f32::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + self.canonicalize_nan_for_round(OperandSize::S32) } fn visit_f64_nearest(&mut self) -> Self::Output { @@ -845,7 +862,8 @@ where let builtin = env.builtins.nearest_f64::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + self.canonicalize_nan_for_round(OperandSize::S64) } fn visit_f32_trunc(&mut self) -> Self::Output { @@ -858,7 +876,8 @@ where let builtin = env.builtins.trunc_f32::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + self.canonicalize_nan_for_round(OperandSize::S32) } fn visit_f64_trunc(&mut self) -> Self::Output { @@ -871,12 +890,14 @@ where let builtin = env.builtins.trunc_f64::()?; FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, - ) + )?; + self.canonicalize_nan_for_round(OperandSize::S64) } fn visit_f32_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.float_sqrt(writable!(reg), reg, OperandSize::S32)?; + masm.canonicalize_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::f32(reg)) }) } @@ -884,6 +905,7 @@ where fn visit_f64_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.float_sqrt(writable!(reg), reg, OperandSize::S64)?; + masm.canonicalize_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::f64(reg)) }) } @@ -1097,6 +1119,7 @@ where fn visit_f32_demote_f64(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.demote(writable!(reg), reg)?; + masm.canonicalize_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::f32(reg)) }) } @@ -1104,6 +1127,7 @@ where fn visit_f64_promote_f32(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.promote(writable!(reg), reg)?; + masm.canonicalize_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::f64(reg)) }) } @@ -4600,6 +4624,13 @@ impl<'a, 'translation, 'data, M> CodeGen<'a, 'translation, 'data, M, Emission> where M: MacroAssembler, { + fn canonicalize_nan_for_round(&mut self, size: OperandSize) -> Result<()> { + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.canonicalize_nan(writable!(result.into()), size)?; + self.context.stack.push(result.into()); + Ok(()) + } + fn cmp_i32s(&mut self, kind: IntCmpKind) -> Result<()> { self.context.i32_binop(self.masm, |masm, dst, src, size| { masm.cmp_with_set(writable!(dst), src, kind, size)?; From c476a6af0adbea68e1f822e75faec039ae7b4ca7 Mon Sep 17 00:00:00 2001 From: r-near <163825889+r-near@users.noreply.github.com> Date: Thu, 2 Apr 2026 09:56:16 -0700 Subject: [PATCH 2/7] add disas tests for NaN canonicalization --- tests/disas/winch/x64/f32_add/nan_canon.wat | 40 +++++++++++++++++++ tests/disas/winch/x64/f64_div/nan_canon.wat | 43 +++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 tests/disas/winch/x64/f32_add/nan_canon.wat create mode 100644 tests/disas/winch/x64/f64_div/nan_canon.wat diff --git a/tests/disas/winch/x64/f32_add/nan_canon.wat b/tests/disas/winch/x64/f32_add/nan_canon.wat new file mode 100644 index 000000000000..9aa0ed26395f --- /dev/null +++ b/tests/disas/winch/x64/f32_add/nan_canon.wat @@ -0,0 +1,40 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = "-Wnan-canonicalization" + +(module + (func (param f32 f32) (result f32) + local.get 0 + local.get 1 + f32.add + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x18(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x69 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movss %xmm0, 0xc(%rsp) +;; movss %xmm1, 8(%rsp) +;; movss 8(%rsp), %xmm0 +;; movss 0xc(%rsp), %xmm1 +;; addss %xmm0, %xmm1 +;; ucomiss %xmm1, %xmm1 +;; jnp 0x5d +;; 55: movss 0x13(%rip), %xmm1 +;; movaps %xmm1, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 69: ud2 +;; 6b: addb %al, (%rax) +;; 6d: addb %al, (%rax) +;; 6f: addb %al, (%rax) +;; 71: addb %al, %al diff --git a/tests/disas/winch/x64/f64_div/nan_canon.wat b/tests/disas/winch/x64/f64_div/nan_canon.wat new file mode 100644 index 000000000000..cf0285da39fd --- /dev/null +++ b/tests/disas/winch/x64/f64_div/nan_canon.wat @@ -0,0 +1,43 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = "-Wnan-canonicalization" + +(module + (func (param f64 f64) (result f64) + local.get 0 + local.get 1 + f64.div + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x18(%r11), %r11 +;; addq $0x20, %r11 +;; cmpq %rsp, %r11 +;; ja 0x68 +;; 1c: movq %rdi, %r14 +;; subq $0x20, %rsp +;; movq %rdi, 0x18(%rsp) +;; movq %rsi, 0x10(%rsp) +;; movsd %xmm0, 8(%rsp) +;; movsd %xmm1, (%rsp) +;; movsd (%rsp), %xmm0 +;; movsd 8(%rsp), %xmm1 +;; divsd %xmm0, %xmm1 +;; ucomisd %xmm1, %xmm1 +;; jnp 0x5c +;; 54: movsd 0x14(%rip), %xmm1 +;; movaps %xmm1, %xmm0 +;; addq $0x20, %rsp +;; popq %rbp +;; retq +;; 68: ud2 +;; 6a: addb %al, (%rax) +;; 6c: addb %al, (%rax) +;; 6e: addb %al, (%rax) +;; 70: addb %al, (%rax) +;; 72: addb %al, (%rax) +;; 74: addb %al, (%rax) +;; 76: clc From 7cdd58eadfac53fea387494408869caf34e6b042 Mon Sep 17 00:00:00 2001 From: r-near <163825889+r-near@users.noreply.github.com> Date: Mon, 6 Apr 2026 11:05:09 -0700 Subject: [PATCH 3/7] rename canonicalize_nan to maybe_canonicalize_nan --- winch/codegen/src/isa/aarch64/masm.rs | 2 +- winch/codegen/src/isa/x64/masm.rs | 2 +- winch/codegen/src/masm.rs | 2 +- winch/codegen/src/visitor.rs | 34 +++++++++++++-------------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index d73f9ba7f982..b62498a81de6 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -716,7 +716,7 @@ impl Masm for MacroAssembler { Ok(()) } - fn canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> { + fn maybe_canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> { if !self.shared_flags.enable_nan_canonicalization() { return Ok(()); } diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 37b20173b6e0..1566e65f264d 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -686,7 +686,7 @@ impl Masm for MacroAssembler { Ok(()) } - fn canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> { + fn maybe_canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()> { if !self.shared_flags.enable_nan_canonicalization() { return Ok(()); } diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 64de3927cf09..1992525bf926 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1693,7 +1693,7 @@ pub(crate) trait MacroAssembler { fn float_sqrt(&mut self, dst: WritableReg, src: Reg, size: OperandSize) -> Result<()>; /// Canonicalize NaN values in `reg` if the setting is enabled. - fn canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()>; + fn maybe_canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()>; /// Perform logical and operation. fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 9cdb04cf9fcb..0d085aea4d1a 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -594,7 +594,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_add(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -606,7 +606,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_add(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -618,7 +618,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_sub(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -630,7 +630,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_sub(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -642,7 +642,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_mul(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -654,7 +654,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_mul(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -666,7 +666,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_div(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -678,7 +678,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_div(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -690,7 +690,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_min(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -702,7 +702,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_min(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -714,7 +714,7 @@ where OperandSize::S32, &mut |masm: &mut M, dst, src, size| { masm.float_max(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f32(dst)) }, ) @@ -726,7 +726,7 @@ where OperandSize::S64, &mut |masm: &mut M, dst, src, size| { masm.float_max(writable!(dst), dst, src, size)?; - masm.canonicalize_nan(writable!(dst), size)?; + masm.maybe_canonicalize_nan(writable!(dst), size)?; Ok(TypedReg::f64(dst)) }, ) @@ -897,7 +897,7 @@ where fn visit_f32_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.float_sqrt(writable!(reg), reg, OperandSize::S32)?; - masm.canonicalize_nan(writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::f32(reg)) }) } @@ -905,7 +905,7 @@ where fn visit_f64_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.float_sqrt(writable!(reg), reg, OperandSize::S64)?; - masm.canonicalize_nan(writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::f64(reg)) }) } @@ -1119,7 +1119,7 @@ where fn visit_f32_demote_f64(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.demote(writable!(reg), reg)?; - masm.canonicalize_nan(writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::f32(reg)) }) } @@ -1127,7 +1127,7 @@ where fn visit_f64_promote_f32(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.promote(writable!(reg), reg)?; - masm.canonicalize_nan(writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::f64(reg)) }) } @@ -4626,7 +4626,7 @@ where { fn canonicalize_nan_for_round(&mut self, size: OperandSize) -> Result<()> { let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.canonicalize_nan(writable!(result.into()), size)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), size)?; self.context.stack.push(result.into()); Ok(()) } From 75cecea996194437e58c8f3b3b14102bd84b308e Mon Sep 17 00:00:00 2001 From: r-near <163825889+r-near@users.noreply.github.com> Date: Mon, 6 Apr 2026 11:05:46 -0700 Subject: [PATCH 4/7] extract canonical NaN constants to shared masm module --- winch/codegen/src/isa/aarch64/masm.rs | 6 +++--- winch/codegen/src/isa/x64/masm.rs | 6 +++--- winch/codegen/src/masm.rs | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index b62498a81de6..6911a2559490 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -726,9 +726,9 @@ impl Masm for MacroAssembler { self.asm.fcmp(reg.to_reg(), reg.to_reg(), size); self.asm.jmp_if(Cond::Vc, done_label); - let canonical_nan: &[u8] = match size { - OperandSize::S32 => &0x7FC00000u32.to_le_bytes(), - OperandSize::S64 => &0x7FF8000000000000u64.to_le_bytes(), + let canonical_nan = match size { + OperandSize::S32 => crate::masm::CANONICAL_NAN_F32, + OperandSize::S64 => crate::masm::CANONICAL_NAN_F64, _ => bail!(CodeGenError::unexpected_operand_size()), }; let constant = self.asm.add_constant(canonical_nan); diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 1566e65f264d..c8b703544ff8 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -696,9 +696,9 @@ impl Masm for MacroAssembler { self.asm.ucomis(reg.to_reg(), reg.to_reg(), size); self.asm.jmp_if(CC::NP, done_label); - let canonical_nan: &[u8] = match size { - OperandSize::S32 => &0x7FC00000u32.to_le_bytes(), - OperandSize::S64 => &0x7FF8000000000000u64.to_le_bytes(), + let canonical_nan = match size { + OperandSize::S32 => crate::masm::CANONICAL_NAN_F32, + OperandSize::S64 => crate::masm::CANONICAL_NAN_F64, _ => bail!(CodeGenError::unexpected_operand_size()), }; self.asm.load_fp_const(reg, canonical_nan, size); diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 1992525bf926..bb7305799a2b 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -13,6 +13,9 @@ use cranelift_codegen::{ use std::{fmt::Debug, ops::Range}; use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType}; +pub(crate) const CANONICAL_NAN_F32: &[u8] = &0x7FC00000u32.to_le_bytes(); +pub(crate) const CANONICAL_NAN_F64: &[u8] = &0x7FF8000000000000u64.to_le_bytes(); + pub(crate) use cranelift_codegen::ir::TrapCode; #[derive(Eq, PartialEq)] From 172280745fc784d0f59c1bc7a5c06b973fd2fe12 Mon Sep 17 00:00:00 2001 From: r-near <163825889+r-near@users.noreply.github.com> Date: Mon, 6 Apr 2026 11:06:26 -0700 Subject: [PATCH 5/7] remove canonicalize_nan_for_round, inline at call sites --- winch/codegen/src/visitor.rs | 47 ++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 0d085aea4d1a..de7b7b1b39c7 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -793,7 +793,10 @@ where FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, )?; - self.canonicalize_nan_for_round(OperandSize::S32) + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64_floor(&mut self) -> Self::Output { @@ -807,7 +810,10 @@ where FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, )?; - self.canonicalize_nan_for_round(OperandSize::S64) + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32_ceil(&mut self) -> Self::Output { @@ -821,7 +827,10 @@ where FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, )?; - self.canonicalize_nan_for_round(OperandSize::S32) + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64_ceil(&mut self) -> Self::Output { @@ -835,7 +844,10 @@ where FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, )?; - self.canonicalize_nan_for_round(OperandSize::S64) + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32_nearest(&mut self) -> Self::Output { @@ -849,7 +861,10 @@ where FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, )?; - self.canonicalize_nan_for_round(OperandSize::S32) + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64_nearest(&mut self) -> Self::Output { @@ -863,7 +878,10 @@ where FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, )?; - self.canonicalize_nan_for_round(OperandSize::S64) + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32_trunc(&mut self) -> Self::Output { @@ -877,7 +895,10 @@ where FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, )?; - self.canonicalize_nan_for_round(OperandSize::S32) + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64_trunc(&mut self) -> Self::Output { @@ -891,7 +912,10 @@ where FnCall::emit::(env, masm, cx, Callee::Builtin(builtin)) }, )?; - self.canonicalize_nan_for_round(OperandSize::S64) + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32_sqrt(&mut self) -> Self::Output { @@ -4624,13 +4648,6 @@ impl<'a, 'translation, 'data, M> CodeGen<'a, 'translation, 'data, M, Emission> where M: MacroAssembler, { - fn canonicalize_nan_for_round(&mut self, size: OperandSize) -> Result<()> { - let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), size)?; - self.context.stack.push(result.into()); - Ok(()) - } - fn cmp_i32s(&mut self, kind: IntCmpKind) -> Result<()> { self.context.i32_binop(self.masm, |masm, dst, src, size| { masm.cmp_with_set(writable!(dst), src, kind, size)?; From cdef987aabb1682748559e747f6432b95f900768 Mon Sep 17 00:00:00 2001 From: r-near <163825889+r-near@users.noreply.github.com> Date: Mon, 6 Apr 2026 11:15:19 -0700 Subject: [PATCH 6/7] implement SIMD NaN canonicalization for x64 --- crates/test-util/src/wast.rs | 10 ----- tests/disas/winch/x64/f32x4_add/nan_canon.wat | 42 ++++++++++++++++++ winch/codegen/src/isa/aarch64/masm.rs | 8 ++++ winch/codegen/src/isa/x64/masm.rs | 39 ++++++++++++++++ winch/codegen/src/masm.rs | 27 ++++++++++++ winch/codegen/src/visitor.rs | 44 +++++++++++++++++-- 6 files changed, 156 insertions(+), 14 deletions(-) create mode 100644 tests/disas/winch/x64/f32x4_add/nan_canon.wat diff --git a/crates/test-util/src/wast.rs b/crates/test-util/src/wast.rs index 7047c00e0a8f..259eda8155a6 100644 --- a/crates/test-util/src/wast.rs +++ b/crates/test-util/src/wast.rs @@ -622,16 +622,6 @@ impl WastTest { #[cfg(target_arch = "x86_64")] { - let unsupported = [ - // externref/reference-types related - // simd-related failures - "misc_testsuite/simd/canonicalize-nan.wast", - ]; - - if unsupported.iter().any(|part| self.path.ends_with(part)) { - return true; - } - // SIMD on Winch requires AVX instructions. #[cfg(target_arch = "x86_64")] if !(std::is_x86_feature_detected!("avx") && std::is_x86_feature_detected!("avx2")) diff --git a/tests/disas/winch/x64/f32x4_add/nan_canon.wat b/tests/disas/winch/x64/f32x4_add/nan_canon.wat new file mode 100644 index 000000000000..89f1878a80a0 --- /dev/null +++ b/tests/disas/winch/x64/f32x4_add/nan_canon.wat @@ -0,0 +1,42 @@ +;;! target = "x86_64" +;;! test = "winch" +;;! flags = ["-Wnan-canonicalization", "-Ccranelift-has-avx"] + +(module + (func (param v128 v128) (result v128) + local.get 0 + local.get 1 + f32x4.add + ) +) +;; wasm[0]::function[0]: +;; pushq %rbp +;; movq %rsp, %rbp +;; movq 8(%rdi), %r11 +;; movq 0x18(%r11), %r11 +;; addq $0x30, %r11 +;; cmpq %rsp, %r11 +;; ja 0x6c +;; 1c: movq %rdi, %r14 +;; subq $0x30, %rsp +;; movq %rdi, 0x28(%rsp) +;; movq %rsi, 0x20(%rsp) +;; movdqu %xmm0, 0x10(%rsp) +;; movdqu %xmm1, (%rsp) +;; movdqu (%rsp), %xmm0 +;; movdqu 0x10(%rsp), %xmm1 +;; vaddps %xmm0, %xmm1, %xmm1 +;; vcmpunordps %xmm1, %xmm1, %xmm15 +;; vandnps %xmm1, %xmm15, %xmm1 +;; vandps 0x15(%rip), %xmm15, %xmm15 +;; vorps %xmm1, %xmm15, %xmm1 +;; movdqa %xmm1, %xmm0 +;; addq $0x30, %rsp +;; popq %rbp +;; retq +;; 6c: ud2 +;; 6e: addb %al, (%rax) +;; 70: addb %al, (%rax) +;; 72: sarb $0, (%rdi) +;; 76: sarb $0, (%rdi) +;; 7a: sarb $0, (%rdi) diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 6911a2559490..cf8324489cfc 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -745,6 +745,14 @@ impl Masm for MacroAssembler { Ok(()) } + fn maybe_canonicalize_v128_nan( + &mut self, + _reg: WritableReg, + _lane_size: OperandSize, + ) -> Result<()> { + bail!(CodeGenError::unimplemented_masm_instruction()) + } + fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> { match (rhs, lhs, dst) { (RegImm::Imm(v), rn, rd) => { diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index c8b703544ff8..2294ad8c2109 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -709,6 +709,45 @@ impl Masm for MacroAssembler { Ok(()) } + fn maybe_canonicalize_v128_nan( + &mut self, + reg: WritableReg, + lane_size: OperandSize, + ) -> Result<()> { + if !self.shared_flags.enable_nan_canonicalization() { + return Ok(()); + } + + self.ensure_has_avx()?; + + self.with_scratch::(|masm, scratch| { + // scratch = NaN mask (all-1s for NaN lanes) + masm.asm.xmm_vcmpp_rrr( + scratch.writable(), + reg.to_reg(), + reg.to_reg(), + lane_size, + VcmpKind::Unord, + ); + // reg = ~mask & original (zero out NaN lanes, keep non-NaN) + masm.asm + .xmm_vandnp_rrr(scratch.inner(), reg.to_reg(), reg, lane_size); + // scratch = mask & splatted canonical NaN = canonical NaN in NaN lanes only + let canon_nan = match lane_size { + OperandSize::S32 => &crate::masm::CANONICAL_NAN_F32X4[..], + OperandSize::S64 => &crate::masm::CANONICAL_NAN_F64X2[..], + _ => bail!(CodeGenError::unexpected_operand_size()), + }; + let addr = masm.asm.add_constant(canon_nan); + masm.asm + .xmm_vandp_rrm(scratch.inner(), &addr, scratch.writable(), lane_size); + // reg = non-NaN values | canonical NaN for NaN lanes + masm.asm + .xmm_vorp_rrr(scratch.inner(), reg.to_reg(), reg, lane_size); + Ok(()) + }) + } + fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()> { Self::ensure_two_argument_form(&dst.to_reg(), &lhs)?; match (rhs, dst) { diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index bb7305799a2b..df573fab00d0 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -16,6 +16,25 @@ use wasmtime_environ::{PtrSize, WasmHeapType, WasmRefType, WasmValType}; pub(crate) const CANONICAL_NAN_F32: &[u8] = &0x7FC00000u32.to_le_bytes(); pub(crate) const CANONICAL_NAN_F64: &[u8] = &0x7FF8000000000000u64.to_le_bytes(); +const NAN32: [u8; 4] = 0x7FC00000u32.to_le_bytes(); +const NAN64: [u8; 8] = 0x7FF8000000000000u64.to_le_bytes(); + +pub(crate) const CANONICAL_NAN_F32X4: [u8; 16] = { + let n = NAN32; + [ + n[0], n[1], n[2], n[3], n[0], n[1], n[2], n[3], + n[0], n[1], n[2], n[3], n[0], n[1], n[2], n[3], + ] +}; + +pub(crate) const CANONICAL_NAN_F64X2: [u8; 16] = { + let n = NAN64; + [ + n[0], n[1], n[2], n[3], n[4], n[5], n[6], n[7], + n[0], n[1], n[2], n[3], n[4], n[5], n[6], n[7], + ] +}; + pub(crate) use cranelift_codegen::ir::TrapCode; #[derive(Eq, PartialEq)] @@ -1698,6 +1717,14 @@ pub(crate) trait MacroAssembler { /// Canonicalize NaN values in `reg` if the setting is enabled. fn maybe_canonicalize_nan(&mut self, reg: WritableReg, size: OperandSize) -> Result<()>; + /// Canonicalize NaN lanes in a v128 register if the setting is enabled. + /// `lane_size` is S32 for f32x4 or S64 for f64x2. + fn maybe_canonicalize_v128_nan( + &mut self, + reg: WritableReg, + lane_size: OperandSize, + ) -> Result<()>; + /// Perform logical and operation. fn and(&mut self, dst: WritableReg, lhs: Reg, rhs: RegImm, size: OperandSize) -> Result<()>; diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index de7b7b1b39c7..80b67994849f 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -3772,6 +3772,7 @@ where fn visit_f32x4_demote_f64x2_zero(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_demote(reg, writable!(reg))?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -3779,6 +3780,7 @@ where fn visit_f64x2_promote_low_f32x4(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_promote(reg, writable!(reg))?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } @@ -4429,6 +4431,7 @@ where self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { masm.v128_add(dst, src, writable!(dst), V128AddKind::F32x4)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4437,6 +4440,7 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, _size| { masm.v128_add(dst, src, writable!(dst), V128AddKind::F64x2)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } @@ -4445,6 +4449,7 @@ where self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { masm.v128_sub(dst, src, writable!(dst), V128SubKind::F32x4)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4453,22 +4458,32 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, _size| { masm.v128_sub(dst, src, writable!(dst), V128SubKind::F64x2)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } fn visit_f32x4_mul(&mut self) -> Self::Output { - self.masm.v128_mul(&mut self.context, V128MulKind::F32x4) + self.masm.v128_mul(&mut self.context, V128MulKind::F32x4)?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64x2_mul(&mut self) -> Self::Output { - self.masm.v128_mul(&mut self.context, V128MulKind::F64x2) + self.masm.v128_mul(&mut self.context, V128MulKind::F64x2)?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f32x4_div(&mut self) -> Self::Output { self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, size| { masm.v128_div(dst, src, writable!(dst), size)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4477,6 +4492,7 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, size| { masm.v128_div(dst, src, writable!(dst), size)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } @@ -4491,6 +4507,7 @@ where fn visit_f32x4_ceil(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_ceil(reg, writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -4505,6 +4522,7 @@ where fn visit_f64x2_ceil(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_ceil(reg, writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } @@ -4512,6 +4530,7 @@ where fn visit_f32x4_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_sqrt(reg, writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -4519,6 +4538,7 @@ where fn visit_f32x4_floor(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_floor(reg, writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -4526,6 +4546,7 @@ where fn visit_f64x2_sqrt(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_sqrt(reg, writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } @@ -4533,6 +4554,7 @@ where fn visit_f64x2_floor(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_floor(reg, writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } @@ -4540,6 +4562,7 @@ where fn visit_f32x4_nearest(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_nearest(reg, writable!(reg), OperandSize::S32)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S32)?; Ok(TypedReg::v128(reg)) }) } @@ -4547,18 +4570,27 @@ where fn visit_f64x2_nearest(&mut self) -> Self::Output { self.context.unop(self.masm, |masm, reg| { masm.v128_nearest(reg, writable!(reg), OperandSize::S64)?; + masm.maybe_canonicalize_v128_nan(writable!(reg), OperandSize::S64)?; Ok(TypedReg::v128(reg)) }) } fn visit_f32x4_trunc(&mut self) -> Self::Output { self.masm - .v128_trunc(&mut self.context, V128TruncKind::F32x4) + .v128_trunc(&mut self.context, V128TruncKind::F32x4)?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S32)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_f64x2_trunc(&mut self) -> Self::Output { self.masm - .v128_trunc(&mut self.context, V128TruncKind::F64x2) + .v128_trunc(&mut self.context, V128TruncKind::F64x2)?; + let result = self.context.pop_to_reg(self.masm, None)?; + self.masm.maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S64)?; + self.context.stack.push(result.into()); + Ok(()) } fn visit_v128_load32_zero(&mut self, memarg: MemArg) -> Self::Output { @@ -4613,6 +4645,7 @@ where self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { masm.v128_min(dst, src, writable!(dst), V128MinKind::F32x4)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4621,6 +4654,7 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, _size| { masm.v128_min(dst, src, writable!(dst), V128MinKind::F64x2)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } @@ -4629,6 +4663,7 @@ where self.context .binop(self.masm, OperandSize::S32, |masm, dst, src, _size| { masm.v128_max(dst, src, writable!(dst), V128MaxKind::F32x4)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S32)?; Ok(TypedReg::v128(dst)) }) } @@ -4637,6 +4672,7 @@ where self.context .binop(self.masm, OperandSize::S64, |masm, dst, src, _size| { masm.v128_max(dst, src, writable!(dst), V128MaxKind::F64x2)?; + masm.maybe_canonicalize_v128_nan(writable!(dst), OperandSize::S64)?; Ok(TypedReg::v128(dst)) }) } From 3649937f3921eb5fe14c442dcd80696aaa47e636 Mon Sep 17 00:00:00 2001 From: r-near <163825889+r-near@users.noreply.github.com> Date: Mon, 6 Apr 2026 11:15:36 -0700 Subject: [PATCH 7/7] cargo fmt --- winch/codegen/src/masm.rs | 8 ++++---- winch/codegen/src/visitor.rs | 36 ++++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index df573fab00d0..1dc68778e706 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -22,16 +22,16 @@ const NAN64: [u8; 8] = 0x7FF8000000000000u64.to_le_bytes(); pub(crate) const CANONICAL_NAN_F32X4: [u8; 16] = { let n = NAN32; [ - n[0], n[1], n[2], n[3], n[0], n[1], n[2], n[3], - n[0], n[1], n[2], n[3], n[0], n[1], n[2], n[3], + n[0], n[1], n[2], n[3], n[0], n[1], n[2], n[3], n[0], n[1], n[2], n[3], n[0], n[1], n[2], + n[3], ] }; pub(crate) const CANONICAL_NAN_F64X2: [u8; 16] = { let n = NAN64; [ - n[0], n[1], n[2], n[3], n[4], n[5], n[6], n[7], - n[0], n[1], n[2], n[3], n[4], n[5], n[6], n[7], + n[0], n[1], n[2], n[3], n[4], n[5], n[6], n[7], n[0], n[1], n[2], n[3], n[4], n[5], n[6], + n[7], ] }; diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 80b67994849f..934628bb9ed0 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -794,7 +794,8 @@ where }, )?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; self.context.stack.push(result.into()); Ok(()) } @@ -811,7 +812,8 @@ where }, )?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; self.context.stack.push(result.into()); Ok(()) } @@ -828,7 +830,8 @@ where }, )?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; self.context.stack.push(result.into()); Ok(()) } @@ -845,7 +848,8 @@ where }, )?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; self.context.stack.push(result.into()); Ok(()) } @@ -862,7 +866,8 @@ where }, )?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; self.context.stack.push(result.into()); Ok(()) } @@ -879,7 +884,8 @@ where }, )?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; self.context.stack.push(result.into()); Ok(()) } @@ -896,7 +902,8 @@ where }, )?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S32)?; self.context.stack.push(result.into()); Ok(()) } @@ -913,7 +920,8 @@ where }, )?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; + self.masm + .maybe_canonicalize_nan(writable!(result.into()), OperandSize::S64)?; self.context.stack.push(result.into()); Ok(()) } @@ -4466,7 +4474,8 @@ where fn visit_f32x4_mul(&mut self) -> Self::Output { self.masm.v128_mul(&mut self.context, V128MulKind::F32x4)?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S32)?; + self.masm + .maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S32)?; self.context.stack.push(result.into()); Ok(()) } @@ -4474,7 +4483,8 @@ where fn visit_f64x2_mul(&mut self) -> Self::Output { self.masm.v128_mul(&mut self.context, V128MulKind::F64x2)?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S64)?; + self.masm + .maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S64)?; self.context.stack.push(result.into()); Ok(()) } @@ -4579,7 +4589,8 @@ where self.masm .v128_trunc(&mut self.context, V128TruncKind::F32x4)?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S32)?; + self.masm + .maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S32)?; self.context.stack.push(result.into()); Ok(()) } @@ -4588,7 +4599,8 @@ where self.masm .v128_trunc(&mut self.context, V128TruncKind::F64x2)?; let result = self.context.pop_to_reg(self.masm, None)?; - self.masm.maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S64)?; + self.masm + .maybe_canonicalize_v128_nan(writable!(result.into()), OperandSize::S64)?; self.context.stack.push(result.into()); Ok(()) }