Skip to content

Commit afbdec8

Browse files
committed
crypto: Implement wNAF MSM in ECC
- Replace Straus-Shamir MSM with windowed NAF (sliding window) method. - Set conservative initial windows size w=4. - Add NAF helper class, wNAF recoding, and shared MSM utility. - Cover NAF encoding/decoding with new crypto_wnaf unit tests. ### Benchmark results ``` │ o/ec.txt │ o/ec-wnaf-4.txt │ │ sec/op │ sec/op vs base │ precompile<PrecompileId::ecrecover,_evmmax_cpp>-14 126.3µ ± 0% 111.0µ ± 0% -12.13% (p=0.001 n=11) precompile<PrecompileId::ecmul,_evmmax_cpp>-14 53.91µ ± 0% 46.83µ ± 0% -13.13% (p=0.000 n=11) precompile<PrecompileId::p256verify,_evmone_cpp>-14 124.4µ ± 90% 107.2µ ± 90% -13.83% (p=0.028 n=11) geomean 94.61µ 82.28µ -13.03% │ o/ec.txt │ o/ec-wnaf-4.txt │ │ gas/op │ gas/op vs base │ precompile<PrecompileId::ecrecover,_evmmax_cpp>-14 30.00k ± 0% 30.00k ± 0% ~ (p=1.000 n=11) ¹ precompile<PrecompileId::ecmul,_evmmax_cpp>-14 60.00k ± 0% 60.00k ± 0% ~ (p=1.000 n=11) ¹ precompile<PrecompileId::p256verify,_evmone_cpp>-14 69.00k ± 0% 69.00k ± 0% ~ (p=1.000 n=11) ¹ geomean 49.89k 49.89k +0.00% ¹ all samples are equal │ o/ec.txt │ o/ec-wnaf-4.txt │ │ gas/s │ gas/s vs base │ precompile<PrecompileId::ecrecover,_evmmax_cpp>-14 23.75M ± 0% 27.04M ± 0% +13.86% (p=0.000 n=11) precompile<PrecompileId::ecmul,_evmmax_cpp>-14 111.3M ± 0% 128.1M ± 1% +15.09% (p=0.000 n=11) precompile<PrecompileId::p256verify,_evmone_cpp>-14 55.39M ± 0% 64.30M ± 0% +16.09% (p=0.000 n=11) geomean 52.71M 60.62M +15.01% │ o/ec.txt │ o/ec-wnaf-4.txt │ │ cycles/op │ cycles/op vs base │ precompile<PrecompileId::ecrecover,_evmmax_cpp>-14 503.5k ± 1% 441.4k ± 0% -12.33% (p=0.000 n=11) precompile<PrecompileId::ecmul,_evmmax_cpp>-14 214.9k ± 0% 186.5k ± 0% -13.22% (p=0.000 n=11) precompile<PrecompileId::p256verify,_evmone_cpp>-14 495.8k ± 90% 427.4k ± 0% -13.79% (p=0.010 n=11) geomean 377.1k 327.7k -13.11% │ o/ec.txt │ o/ec-wnaf-4.txt │ │ instructions/op │ instructions/op vs base │ precompile<PrecompileId::ecrecover,_evmmax_cpp>-14 1.537M ± 0% 1.382M ± 0% -10.12% (p=0.000 n=11) precompile<PrecompileId::ecmul,_evmmax_cpp>-14 737.5k ± 0% 663.6k ± 0% -10.03% (p=0.000 n=11) precompile<PrecompileId::p256verify,_evmone_cpp>-14 1.552M ± 0% 1.386M ± 0% -10.74% (p=0.000 n=11) geomean 1.207M 1.083M -10.29% ```
1 parent 67adeb2 commit afbdec8

3 files changed

Lines changed: 225 additions & 25 deletions

File tree

lib/evmone_precompiles/ecc.hpp

Lines changed: 108 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -319,9 +319,6 @@ ProjPoint<Curve> add(const ProjPoint<Curve>& p, const ProjPoint<Curve>& q) noexc
319319
const auto r = s2 - s1;
320320

321321
// Handle point doubling in case p == q, i.e. when u1 == u2 and s1 == s2.
322-
// TODO: Untested case of two points having the same y coordinate but different x.
323-
// The following assertion (r == 0) => (h == 0) should fail in that case.
324-
assert(r != 0 || h == 0);
325322
if (h == 0 && r == 0) [[unlikely]]
326323
return dbl(p);
327324

@@ -490,42 +487,128 @@ ProjPoint<Curve> mul(const AffinePoint<Curve>& p, typename Curve::uint_type c) n
490487
return r;
491488
}
492489

493-
/// Computes multi-scalar multiplication of u×P ⊕ v×Q.
490+
/// Windowed Non-adjacent Form (wNAF).
491+
template <typename UIntT>
492+
class NAF
493+
{
494+
public:
495+
using digit_type = int8_t;
496+
497+
private:
498+
/// The storage for the NAF digits, starting from the least significant one.
499+
/// For a k-bit scalar, there can be at most k+1 digits.
500+
std::array<digit_type, sizeof(UIntT) * 8 + 1> digits_{};
501+
502+
/// The number of digits used to store the NAF representation.
503+
size_t width_ = 0;
504+
505+
public:
506+
/// Returns the number of digits in the NAF representation.
507+
size_t width() const noexcept { return width_; }
508+
509+
/// Returns the i-th digit in the NAF representation.
510+
///
511+
/// It is allowed to access digits beyond the current width, which will return 0.
512+
digit_type operator[](size_t i) const noexcept { return digits_[i]; }
513+
514+
/// Sets the i-th digit in the NAF representation and updates the width accordingly.
515+
void set(size_t i, digit_type d) noexcept
516+
{
517+
if (d != 0)
518+
{
519+
digits_[i] = d;
520+
width_ = std::max(width_, i + 1);
521+
}
522+
}
523+
};
524+
525+
/// Convert an unsigned scalar value to its windowed Non-adjacent Form (wNAF).
494526
///
495-
/// The implementation uses the "Straus-Shamir trick": https://eprint.iacr.org/2003/257.pdf#page=7.
496-
template <typename Curve>
497-
ProjPoint<Curve> msm(const typename Curve::uint_type& u, const AffinePoint<Curve>& p,
498-
const typename Curve::uint_type& v, const AffinePoint<Curve>& q)
527+
/// See
528+
/// https://en.wikipedia.org/wiki/Elliptic_curve_point_multiplication#w-ary_non-adjacent_form_(wNAF)_method.
529+
template <unsigned W, typename UIntT>
530+
constexpr NAF<UIntT> to_wnaf(UIntT k) noexcept
499531
{
500-
ProjPoint<Curve> r;
532+
using digit_type = NAF<UIntT>::digit_type;
533+
static_assert(W >= 2);
534+
static_assert(W <= sizeof(digit_type) * 8);
535+
constexpr unsigned RADIX = 1 << W;
501536

502-
const auto w = u | v;
503-
const auto bit_width = sizeof(w) * 8 - intx::clz(w);
504-
if (bit_width == 0)
505-
return r;
537+
NAF<UIntT> naf;
538+
for (size_t i = 0; k != 0; ++i, k >>= 1)
539+
{
540+
const auto r = static_cast<unsigned>(k) % RADIX;
541+
if (r % 2 != 0)
542+
{
543+
const auto d_sign = r > RADIX / 2;
544+
const auto d_abs = d_sign ? RADIX - r : r;
545+
const auto d = d_sign ? -d_abs : d_abs;
546+
naf.set(i, static_cast<digit_type>(d));
547+
k -= d_sign ? -UIntT{d_abs} : UIntT{d_abs}; // intx lacks sign extending conversion.
548+
}
549+
}
550+
return naf;
551+
}
506552

507-
// Precompute affine P + Q. Works correctly if P == Q.
508-
const auto h = add_affine(p, q);
553+
template <unsigned W, typename Curve>
554+
void precompute_wnaf_table(
555+
std::span<ProjPoint<Curve>, 1 << (W - 2)> table, const AffinePoint<Curve>& p) noexcept
556+
{
557+
table[0] = ProjPoint{p}; // 1P.
558+
const auto two_p = dbl(table[0]); // 2P.
509559

510-
// Create lookup table for points. The index 0 is unused.
511-
// TODO: Put 0 at index 0 and use it in the loop to avoid the branch.
512-
const AffinePoint<Curve>* const points[]{nullptr, &p, &q, &h};
560+
for (size_t i = 1; i < table.size(); ++i)
561+
table[i] = add(table[i - 1], two_p); // (2i+3)P = (2i+1)P + 2P.
562+
}
513563

514-
for (auto i = bit_width; i != 0; --i)
564+
/// Computes multi-scalar multiplication using the wNAF (sliding window) method.
565+
template <unsigned W, size_t S, typename Curve>
566+
ProjPoint<Curve> msm_wnaf(std::span<const AffinePoint<Curve>* const, S> points,
567+
std::span<const typename Curve::uint_type* const, S> scalars) noexcept
568+
{
569+
static constexpr size_t TABLE_SIZE = 1 << (W - 2);
570+
571+
std::array<NAF<typename Curve::uint_type>, S> nafs;
572+
std::array<ProjPoint<Curve>, S * TABLE_SIZE> joint_table;
573+
574+
for (size_t s = 0; s < S; ++s)
575+
{
576+
nafs[s] = to_wnaf<W>(*scalars[s]);
577+
precompute_wnaf_table<W>(
578+
std::span<ProjPoint<Curve>, TABLE_SIZE>{&joint_table[s * TABLE_SIZE], TABLE_SIZE},
579+
*points[s]);
580+
}
581+
582+
ProjPoint<Curve> r;
583+
const auto max_width =
584+
std::ranges::max(nafs, {}, &NAF<typename Curve::uint_type>::width).width();
585+
for (size_t i = max_width; i != 0; --i)
515586
{
516587
r = dbl(r);
517588

518-
const auto u_bit = bit_test(u, i - 1);
519-
const auto v_bit = bit_test(v, i - 1);
520-
const auto idx = 2 * size_t{v_bit} + size_t{u_bit};
521-
if (idx == 0)
522-
continue;
523-
r = add(r, *points[idx]);
589+
for (size_t s = 0; s < S; ++s)
590+
{
591+
const auto d = nafs[s][i - 1];
592+
if (d == 0) // TODO: likely
593+
continue;
594+
595+
const auto* table = &joint_table[s * TABLE_SIZE];
596+
const auto& pt = table[(static_cast<unsigned>(std::abs(d)) - 1) / 2];
597+
r = add(r, d >= 0 ? pt : -pt);
598+
}
524599
}
525600

526601
return r;
527602
}
528603

604+
/// Computes multi-scalar multiplication of u×P ⊕ v×Q.
605+
template <typename Curve>
606+
ProjPoint<Curve> msm(const typename Curve::uint_type& u, const AffinePoint<Curve>& p,
607+
const typename Curve::uint_type& v, const AffinePoint<Curve>& q)
608+
{
609+
return msm_wnaf<4, 2, Curve>(std::array{&p, &q}, std::array{&u, &v});
610+
}
611+
529612
template <typename UIntT>
530613
struct SignedScalar
531614
{

test/unittests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ target_sources(
1010
baseline_analysis_test.cpp
1111
blockchaintest_loader_test.cpp
1212
bytecode_test.cpp
13+
crypto_wnaf.cpp
1314
evm_fixture.cpp
1415
evm_fixture.hpp
1516
evm_test.cpp

test/unittests/crypto_wnaf.cpp

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// evmone: Fast Ethereum Virtual Machine implementation
2+
// Copyright 2026 The evmone Authors.
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include <evmone_precompiles/ecc.hpp>
6+
#include <gtest/gtest.h>
7+
#include <intx/intx.hpp>
8+
#include <random>
9+
10+
using namespace evmmax::ecc;
11+
12+
namespace
13+
{
14+
template <typename UIntT>
15+
UIntT evaluate(NAF<UIntT> naf)
16+
{
17+
UIntT result = 0;
18+
UIntT base = 1;
19+
for (size_t i = 0; i < naf.width(); ++i)
20+
{
21+
const auto d = naf[i];
22+
const auto d_abs = static_cast<unsigned>(std::abs(d));
23+
const auto d_sign = d < 0;
24+
const auto r_abs = UIntT{d_abs};
25+
const auto r = d_sign ? -r_abs : r_abs;
26+
result += r * base;
27+
base <<= 1;
28+
}
29+
30+
if (naf.width() == 0)
31+
{
32+
// NAF == 0 <=> result == 0.
33+
EXPECT_EQ(result, 0);
34+
}
35+
else
36+
{
37+
// The most significant digit must be non-zero.
38+
EXPECT_NE(naf[naf.width() - 1], 0);
39+
}
40+
return result;
41+
}
42+
} // namespace
43+
44+
TEST(crypto_wnaf, example1)
45+
{
46+
const auto naf = to_wnaf<3>(uint32_t{21});
47+
EXPECT_EQ(naf.width(), 4u);
48+
EXPECT_EQ(naf[0], -3);
49+
EXPECT_EQ(naf[1], 0);
50+
EXPECT_EQ(naf[2], 0);
51+
EXPECT_EQ(naf[3], 3);
52+
EXPECT_EQ(naf[4], 0);
53+
EXPECT_EQ(evaluate(naf), 21u);
54+
}
55+
56+
TEST(crypto_wnaf, zero)
57+
{
58+
const auto naf = to_wnaf<7>(uint64_t{0});
59+
EXPECT_EQ(naf.width(), 0);
60+
for (size_t i = 0; i <= 32; ++i)
61+
EXPECT_EQ(naf[i], 0);
62+
EXPECT_EQ(evaluate(naf), 0);
63+
}
64+
65+
TEST(crypto_wnaf, max_width)
66+
{
67+
const auto x = uint32_t{0xfffffffe};
68+
const auto naf = to_wnaf<4>(x);
69+
EXPECT_EQ(naf.width(), 33u);
70+
EXPECT_EQ(naf[0], 0);
71+
EXPECT_EQ(naf[1], -1);
72+
for (size_t i = 2; i <= 31; ++i)
73+
EXPECT_EQ(naf[i], 0);
74+
EXPECT_EQ(naf[32], 1);
75+
EXPECT_EQ(evaluate(naf), x);
76+
}
77+
78+
TEST(crypto_wnaf, max_digit)
79+
{
80+
const auto x = uint32_t{0xfffffcfe};
81+
const auto naf = to_wnaf<8>(x);
82+
EXPECT_EQ(naf.width(), 33u);
83+
EXPECT_EQ(naf[1], 127);
84+
EXPECT_EQ(evaluate(naf), x);
85+
}
86+
87+
TEST(crypto_wnaf, min_digit)
88+
{
89+
const auto x = uint32_t{0x102};
90+
const auto naf = to_wnaf<8>(x);
91+
EXPECT_EQ(naf.width(), 10u);
92+
EXPECT_EQ(naf[1], -127);
93+
EXPECT_EQ(evaluate(naf), x);
94+
}
95+
96+
TEST(crypto_wnaf, uint256_fuzz)
97+
{
98+
std::mt19937_64 rng{std::random_device{}()};
99+
std::uniform_int_distribution<uint64_t> dist{};
100+
const intx::uint256 start{dist(rng), dist(rng), dist(rng), dist(rng)};
101+
102+
for (size_t i = 0; i < 100; ++i)
103+
{
104+
const auto x = start + i;
105+
const auto naf2 = to_wnaf<2>(x);
106+
ASSERT_EQ(evaluate(naf2), x);
107+
const auto naf3 = to_wnaf<3>(x);
108+
ASSERT_EQ(evaluate(naf3), x);
109+
const auto naf4 = to_wnaf<4>(x);
110+
ASSERT_EQ(evaluate(naf4), x);
111+
const auto naf5 = to_wnaf<5>(x);
112+
ASSERT_EQ(evaluate(naf5), x);
113+
const auto naf8 = to_wnaf<8>(x);
114+
ASSERT_EQ(evaluate(naf8), x);
115+
}
116+
}

0 commit comments

Comments
 (0)