crypto: Implement wNAF MSM in ECC

chfast · chfast · commit afbdec887060 · 2026-01-14T15:32:43.000+01:00
- Replace Straus-Shamir MSM with windowed NAF (sliding window) method.
- Set conservative initial windows size w=4.
- Add NAF helper class, wNAF recoding, and shared MSM utility.
- Cover NAF encoding/decoding with new crypto_wnaf unit tests.

### Benchmark results

```
                                                    │   o/ec.txt   │           o/ec-wnaf-4.txt            │
                                                    │    sec/op    │    sec/op     vs base                │
precompile&lt;PrecompileId::ecrecover,_evmmax_cpp&gt;-14    126.3µ ±  0%   111.0µ ±  0%  -12.13% (p=0.001 n=11)
precompile&lt;PrecompileId::ecmul,_evmmax_cpp&gt;-14        53.91µ ±  0%   46.83µ ±  0%  -13.13% (p=0.000 n=11)
precompile&lt;PrecompileId::p256verify,_evmone_cpp&gt;-14   124.4µ ± 90%   107.2µ ± 90%  -13.83% (p=0.028 n=11)
geomean                                               94.61µ         82.28µ        -13.03%

                                                    │  o/ec.txt   │           o/ec-wnaf-4.txt            │
                                                    │   gas/op    │   gas/op     vs base                 │
precompile&lt;PrecompileId::ecrecover,_evmmax_cpp&gt;-14    30.00k ± 0%   30.00k ± 0%       ~ (p=1.000 n=11) ¹
precompile&lt;PrecompileId::ecmul,_evmmax_cpp&gt;-14        60.00k ± 0%   60.00k ± 0%       ~ (p=1.000 n=11) ¹
precompile&lt;PrecompileId::p256verify,_evmone_cpp&gt;-14   69.00k ± 0%   69.00k ± 0%       ~ (p=1.000 n=11) ¹
geomean                                               49.89k        49.89k       +0.00%
¹ all samples are equal

                                                    │  o/ec.txt   │           o/ec-wnaf-4.txt           │
                                                    │    gas/s    │    gas/s     vs base                │
precompile&lt;PrecompileId::ecrecover,_evmmax_cpp&gt;-14    23.75M ± 0%   27.04M ± 0%  +13.86% (p=0.000 n=11)
precompile&lt;PrecompileId::ecmul,_evmmax_cpp&gt;-14        111.3M ± 0%   128.1M ± 1%  +15.09% (p=0.000 n=11)
precompile&lt;PrecompileId::p256verify,_evmone_cpp&gt;-14   55.39M ± 0%   64.30M ± 0%  +16.09% (p=0.000 n=11)
geomean                                               52.71M        60.62M       +15.01%

                                                    │   o/ec.txt   │           o/ec-wnaf-4.txt           │
                                                    │  cycles/op   │  cycles/op   vs base                │
precompile&lt;PrecompileId::ecrecover,_evmmax_cpp&gt;-14    503.5k ±  1%   441.4k ± 0%  -12.33% (p=0.000 n=11)
precompile&lt;PrecompileId::ecmul,_evmmax_cpp&gt;-14        214.9k ±  0%   186.5k ± 0%  -13.22% (p=0.000 n=11)
precompile&lt;PrecompileId::p256verify,_evmone_cpp&gt;-14   495.8k ± 90%   427.4k ± 0%  -13.79% (p=0.010 n=11)
geomean                                               377.1k         327.7k       -13.11%

                                                    │    o/ec.txt     │             o/ec-wnaf-4.txt             │
                                                    │ instructions/op │ instructions/op  vs base                │
precompile&lt;PrecompileId::ecrecover,_evmmax_cpp&gt;-14        1.537M ± 0%       1.382M ± 0%  -10.12% (p=0.000 n=11)
precompile&lt;PrecompileId::ecmul,_evmmax_cpp&gt;-14            737.5k ± 0%       663.6k ± 0%  -10.03% (p=0.000 n=11)
precompile&lt;PrecompileId::p256verify,_evmone_cpp&gt;-14       1.552M ± 0%       1.386M ± 0%  -10.74% (p=0.000 n=11)
geomean                                                   1.207M            1.083M       -10.29%
```
diff --git a/lib/evmone_precompiles/ecc.hpp b/lib/evmone_precompiles/ecc.hpp
@@ -319,9 +319,6 @@ ProjPoint<Curve> add(const ProjPoint<Curve>& p, const ProjPoint<Curve>& q) noexc
     const auto r = s2 - s1;
 
     // Handle point doubling in case p == q, i.e. when u1 == u2 and s1 == s2.
-    // TODO: Untested case of two points having the same y coordinate but different x.
-    //       The following assertion (r == 0) => (h == 0) should fail in that case.
-    assert(r != 0 || h == 0);
     if (h == 0 && r == 0) [[unlikely]]
         return dbl(p);
 
@@ -490,42 +487,128 @@ ProjPoint<Curve> mul(const AffinePoint<Curve>& p, typename Curve::uint_type c) n
     return r;
 }
 
-/// Computes multi-scalar multiplication of u×P ⊕ v×Q.
+/// Windowed Non-adjacent Form (wNAF).
+template <typename UIntT>
+class NAF
+{
+public:
+    using digit_type = int8_t;
+
+private:
+    /// The storage for the NAF digits, starting from the least significant one.
+    /// For a k-bit scalar, there can be at most k+1 digits.
+    std::array<digit_type, sizeof(UIntT) * 8 + 1> digits_{};
+
+    /// The number of digits used to store the NAF representation.
+    size_t width_ = 0;
+
+public:
+    /// Returns the number of digits in the NAF representation.
+    size_t width() const noexcept { return width_; }
+
+    /// Returns the i-th digit in the NAF representation.
+    ///
+    /// It is allowed to access digits beyond the current width, which will return 0.
+    digit_type operator[](size_t i) const noexcept { return digits_[i]; }
+
+    /// Sets the i-th digit in the NAF representation and updates the width accordingly.
+    void set(size_t i, digit_type d) noexcept
+    {
+        if (d != 0)
+        {
+            digits_[i] = d;
+            width_ = std::max(width_, i + 1);
+        }
+    }
+};
+
+/// Convert an unsigned scalar value to its windowed Non-adjacent Form (wNAF).
 ///
-/// The implementation uses the "Straus-Shamir trick": https://eprint.iacr.org/2003/257.pdf#page=7.
-template <typename Curve>
-ProjPoint<Curve> msm(const typename Curve::uint_type& u, const AffinePoint<Curve>& p,
-    const typename Curve::uint_type& v, const AffinePoint<Curve>& q)
+/// See
+/// https://en.wikipedia.org/wiki/Elliptic_curve_point_multiplication#w-ary_non-adjacent_form_(wNAF)_method.
+template <unsigned W, typename UIntT>
+constexpr NAF<UIntT> to_wnaf(UIntT k) noexcept
 {
-    ProjPoint<Curve> r;
+    using digit_type = NAF<UIntT>::digit_type;
+    static_assert(W >= 2);
+    static_assert(W <= sizeof(digit_type) * 8);
+    constexpr unsigned RADIX = 1 << W;
 
-    const auto w = u | v;
-    const auto bit_width = sizeof(w) * 8 - intx::clz(w);
-    if (bit_width == 0)
-        return r;
+    NAF<UIntT> naf;
+    for (size_t i = 0; k != 0; ++i, k >>= 1)
+    {
+        const auto r = static_cast<unsigned>(k) % RADIX;
+        if (r % 2 != 0)
+        {
+            const auto d_sign = r > RADIX / 2;
+            const auto d_abs = d_sign ? RADIX - r : r;
+            const auto d = d_sign ? -d_abs : d_abs;
+            naf.set(i, static_cast<digit_type>(d));
+            k -= d_sign ? -UIntT{d_abs} : UIntT{d_abs};  // intx lacks sign extending conversion.
+        }
+    }
+    return naf;
+}
 
-    // Precompute affine P + Q. Works correctly if P == Q.
-    const auto h = add_affine(p, q);
+template <unsigned W, typename Curve>
+void precompute_wnaf_table(
+    std::span<ProjPoint<Curve>, 1 << (W - 2)> table, const AffinePoint<Curve>& p) noexcept
+{
+    table[0] = ProjPoint{p};           // 1P.
+    const auto two_p = dbl(table[0]);  // 2P.
 
-    // Create lookup table for points. The index 0 is unused.
-    // TODO: Put 0 at index 0 and use it in the loop to avoid the branch.
-    const AffinePoint<Curve>* const points[]{nullptr, &p, &q, &h};
+    for (size_t i = 1; i < table.size(); ++i)
+        table[i] = add(table[i - 1], two_p);  // (2i+3)P = (2i+1)P + 2P.
+}
 
-    for (auto i = bit_width; i != 0; --i)
+/// Computes multi-scalar multiplication using the wNAF (sliding window) method.
+template <unsigned W, size_t S, typename Curve>
+ProjPoint<Curve> msm_wnaf(std::span<const AffinePoint<Curve>* const, S> points,
+    std::span<const typename Curve::uint_type* const, S> scalars) noexcept
+{
+    static constexpr size_t TABLE_SIZE = 1 << (W - 2);
+
+    std::array<NAF<typename Curve::uint_type>, S> nafs;
+    std::array<ProjPoint<Curve>, S * TABLE_SIZE> joint_table;
+
+    for (size_t s = 0; s < S; ++s)
+    {
+        nafs[s] = to_wnaf<W>(*scalars[s]);
+        precompute_wnaf_table<W>(
+            std::span<ProjPoint<Curve>, TABLE_SIZE>{&joint_table[s * TABLE_SIZE], TABLE_SIZE},
+            *points[s]);
+    }
+
+    ProjPoint<Curve> r;
+    const auto max_width =
+        std::ranges::max(nafs, {}, &NAF<typename Curve::uint_type>::width).width();
+    for (size_t i = max_width; i != 0; --i)
     {
         r = dbl(r);
 
-        const auto u_bit = bit_test(u, i - 1);
-        const auto v_bit = bit_test(v, i - 1);
-        const auto idx = 2 * size_t{v_bit} + size_t{u_bit};
-        if (idx == 0)
-            continue;
-        r = add(r, *points[idx]);
+        for (size_t s = 0; s < S; ++s)
+        {
+            const auto d = nafs[s][i - 1];
+            if (d == 0)  // TODO: likely
+                continue;
+
+            const auto* table = &joint_table[s * TABLE_SIZE];
+            const auto& pt = table[(static_cast<unsigned>(std::abs(d)) - 1) / 2];
+            r = add(r, d >= 0 ? pt : -pt);
+        }
     }
 
     return r;
 }
 
+/// Computes multi-scalar multiplication of u×P ⊕ v×Q.
+template <typename Curve>
+ProjPoint<Curve> msm(const typename Curve::uint_type& u, const AffinePoint<Curve>& p,
+    const typename Curve::uint_type& v, const AffinePoint<Curve>& q)
+{
+    return msm_wnaf<4, 2, Curve>(std::array{&p, &q}, std::array{&u, &v});
+}
+
 template <typename UIntT>
 struct SignedScalar
 {
diff --git a/test/unittests/CMakeLists.txt b/test/unittests/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(
     baseline_analysis_test.cpp
     blockchaintest_loader_test.cpp
     bytecode_test.cpp
+    crypto_wnaf.cpp
     evm_fixture.cpp
     evm_fixture.hpp
     evm_test.cpp
diff --git a/test/unittests/crypto_wnaf.cpp b/test/unittests/crypto_wnaf.cpp
@@ -0,0 +1,116 @@
+// evmone: Fast Ethereum Virtual Machine implementation
+// Copyright 2026 The evmone Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+#include <evmone_precompiles/ecc.hpp>
+#include <gtest/gtest.h>
+#include <intx/intx.hpp>
+#include <random>
+
+using namespace evmmax::ecc;
+
+namespace
+{
+template <typename UIntT>
+UIntT evaluate(NAF<UIntT> naf)
+{
+    UIntT result = 0;
+    UIntT base = 1;
+    for (size_t i = 0; i < naf.width(); ++i)
+    {
+        const auto d = naf[i];
+        const auto d_abs = static_cast<unsigned>(std::abs(d));
+        const auto d_sign = d < 0;
+        const auto r_abs = UIntT{d_abs};
+        const auto r = d_sign ? -r_abs : r_abs;
+        result += r * base;
+        base <<= 1;
+    }
+
+    if (naf.width() == 0)
+    {
+        // NAF == 0 <=> result == 0.
+        EXPECT_EQ(result, 0);
+    }
+    else
+    {
+        // The most significant digit must be non-zero.
+        EXPECT_NE(naf[naf.width() - 1], 0);
+    }
+    return result;
+}
+}  // namespace
+
+TEST(crypto_wnaf, example1)
+{
+    const auto naf = to_wnaf<3>(uint32_t{21});
+    EXPECT_EQ(naf.width(), 4u);
+    EXPECT_EQ(naf[0], -3);
+    EXPECT_EQ(naf[1], 0);
+    EXPECT_EQ(naf[2], 0);
+    EXPECT_EQ(naf[3], 3);
+    EXPECT_EQ(naf[4], 0);
+    EXPECT_EQ(evaluate(naf), 21u);
+}
+
+TEST(crypto_wnaf, zero)
+{
+    const auto naf = to_wnaf<7>(uint64_t{0});
+    EXPECT_EQ(naf.width(), 0);
+    for (size_t i = 0; i <= 32; ++i)
+        EXPECT_EQ(naf[i], 0);
+    EXPECT_EQ(evaluate(naf), 0);
+}
+
+TEST(crypto_wnaf, max_width)
+{
+    const auto x = uint32_t{0xfffffffe};
+    const auto naf = to_wnaf<4>(x);
+    EXPECT_EQ(naf.width(), 33u);
+    EXPECT_EQ(naf[0], 0);
+    EXPECT_EQ(naf[1], -1);
+    for (size_t i = 2; i <= 31; ++i)
+        EXPECT_EQ(naf[i], 0);
+    EXPECT_EQ(naf[32], 1);
+    EXPECT_EQ(evaluate(naf), x);
+}
+
+TEST(crypto_wnaf, max_digit)
+{
+    const auto x = uint32_t{0xfffffcfe};
+    const auto naf = to_wnaf<8>(x);
+    EXPECT_EQ(naf.width(), 33u);
+    EXPECT_EQ(naf[1], 127);
+    EXPECT_EQ(evaluate(naf), x);
+}
+
+TEST(crypto_wnaf, min_digit)
+{
+    const auto x = uint32_t{0x102};
+    const auto naf = to_wnaf<8>(x);
+    EXPECT_EQ(naf.width(), 10u);
+    EXPECT_EQ(naf[1], -127);
+    EXPECT_EQ(evaluate(naf), x);
+}
+
+TEST(crypto_wnaf, uint256_fuzz)
+{
+    std::mt19937_64 rng{std::random_device{}()};
+    std::uniform_int_distribution<uint64_t> dist{};
+    const intx::uint256 start{dist(rng), dist(rng), dist(rng), dist(rng)};
+
+    for (size_t i = 0; i < 100; ++i)
+    {
+        const auto x = start + i;
+        const auto naf2 = to_wnaf<2>(x);
+        ASSERT_EQ(evaluate(naf2), x);
+        const auto naf3 = to_wnaf<3>(x);
+        ASSERT_EQ(evaluate(naf3), x);
+        const auto naf4 = to_wnaf<4>(x);
+        ASSERT_EQ(evaluate(naf4), x);
+        const auto naf5 = to_wnaf<5>(x);
+        ASSERT_EQ(evaluate(naf5), x);
+        const auto naf8 = to_wnaf<8>(x);
+        ASSERT_EQ(evaluate(naf8), x);
+    }
+}