Skip to content

Commit

Permalink
Support fast field inversion for P-256, P-384, and P-521
Browse files Browse the repository at this point in the history
For P-256 this speeds up projective->affine conversion by about 5%,
saving ~2K cycles.

For P-384 it speeds up conversion by 25%, saving 8K cycles.

For P-521 it speeds up conversion by 20%, saving 33K cycles.

The savings are most pronounced with ECDSA signing (since it is the
fastest operation), improving by 2%, 4%, and 2% resp.

GH #4027
  • Loading branch information
randombit committed Jul 15, 2024
1 parent 55007af commit 71afb7b
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 13 deletions.
14 changes: 10 additions & 4 deletions src/lib/math/pcurves/pcurves_impl/pcurves_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,14 @@ class IntMod final {
return Self(Rep::redc(z));
}

void square_n(size_t n) {
std::array<W, 2 * N> z;
for(size_t i = 0; i != n; ++i) {
comba_sqr<N>(z.data(), this->data());
m_val = Rep::redc(z);
}
}

// Negation modulo p
constexpr Self negate() const {
auto x_is_zero = CT::all_zeros(this->data(), N);
Expand Down Expand Up @@ -213,9 +221,7 @@ class IntMod final {
}

for(size_t i = 1; i != Windows; ++i) {
for(size_t j = 0; j != WindowBits; ++j) {
r = r.square();
}
r.square_n(WindowBits);

const size_t w = read_window_bits<WindowBits>(std::span{exp}, (Windows - i - 1) * WindowBits);

Expand Down Expand Up @@ -1381,7 +1387,7 @@ inline auto map_to_curve_sswu(const typename C::FieldElement& u) -> typename C::
x1.conditional_assign(tv1.is_zero(), C::SSWU_C2());
const auto gx1 = C::AffinePoint::x3_ax_b(x1);

const auto x2 = C::SSWU_Z * u.square() * x1;
const auto x2 = z_u2 * x1;
const auto gx2 = C::AffinePoint::x3_ax_b(x2);

// Will be zero if gx1 is not a square
Expand Down
33 changes: 30 additions & 3 deletions src/lib/math/pcurves/pcurves_impl/pcurves_wrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

namespace Botan::PCurve {

template <typename C>
concept curve_supports_fe_invert2 = requires(const typename C::FieldElement& fe) {
{ C::fe_invert2(fe) } -> std::same_as<typename C::FieldElement>;
};

template <typename C>
class PrimeOrderCurveImpl final : public PrimeOrderCurve {
public:
Expand All @@ -28,6 +33,28 @@ class PrimeOrderCurveImpl final : public PrimeOrderCurve {
WindowedMul2Table<C, WindowBits> m_table;
};

static typename C::AffinePoint curve_point_to_affine(const typename C::ProjectivePoint& pt) {
if constexpr(curve_supports_fe_invert2<C>) {
if(pt.is_identity().as_bool()) {
return C::AffinePoint::identity();
}

const auto z2_inv = C::fe_invert2(pt.z());
const auto z3_inv = z2_inv.square() * pt.z();
return typename C::AffinePoint(pt.x() * z2_inv, pt.y() * z3_inv);
} else {
return pt.to_affine();
}
}

static typename C::FieldElement curve_point_to_affine_x(const typename C::ProjectivePoint& pt) {
if constexpr(curve_supports_fe_invert2<C>) {
return pt.x() * C::fe_invert2(pt.z());
} else {
return pt.to_affine().x();
}
}

static_assert(C::OrderBits <= PrimeOrderCurve::MaximumBitLength);
static_assert(C::PrimeFieldBits <= PrimeOrderCurve::MaximumBitLength);

Expand Down Expand Up @@ -78,7 +105,7 @@ class PrimeOrderCurveImpl final : public PrimeOrderCurve {
return {};
}
std::array<uint8_t, C::FieldElement::BYTES> x_bytes;
pt.to_affine().x().serialize_to(std::span{x_bytes});
curve_point_to_affine_x(pt).serialize_to(std::span{x_bytes});
return stash(C::Scalar::from_wide_bytes(std::span<const uint8_t, C::FieldElement::BYTES>{x_bytes}));
} catch(std::bad_cast&) {
throw Invalid_Argument("Curve mismatch");
Expand All @@ -88,14 +115,14 @@ class PrimeOrderCurveImpl final : public PrimeOrderCurve {
Scalar base_point_mul_x_mod_order(const Scalar& scalar, RandomNumberGenerator& rng) const override {
auto pt = m_mul_by_g.mul(from_stash(scalar), rng);
std::array<uint8_t, C::FieldElement::BYTES> x_bytes;
pt.to_affine().x().serialize_to(std::span{x_bytes});
curve_point_to_affine_x(pt).serialize_to(std::span{x_bytes});
return stash(C::Scalar::from_wide_bytes(std::span<const uint8_t, C::FieldElement::BYTES>{x_bytes}));
}

AffinePoint generator() const override { return stash(C::G); }

AffinePoint point_to_affine(const ProjectivePoint& pt) const override {
return stash(from_stash(pt).to_affine());
return stash(curve_point_to_affine(from_stash(pt)));
}

ProjectivePoint point_to_projective(const AffinePoint& pt) const override {
Expand Down
44 changes: 42 additions & 2 deletions src/lib/math/pcurves/pcurves_secp256r1/pcurves_secp256r1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,51 @@ class Params final : public EllipticCurveParameters<
#if BOTAN_MP_WORD_BITS == 32
// Secp256r1Rep works for 64 bit also, but is at best marginally faster at least
// on compilers/CPUs tested so far
class Curve final : public EllipticCurve<Params, Secp256r1Rep> {};
typedef EllipticCurve<Params, Secp256r1Rep> Secp256r1Base;
#else
class Curve final : public EllipticCurve<Params> {};
typedef EllipticCurve<Params> Secp256r1Base;
#endif

class Curve final : public Secp256r1Base {
public:
// Return the square of the inverse of x
static FieldElement fe_invert2(const FieldElement& x) {
FieldElement r = x.square();
r *= x;

const auto p2 = r;
r.square_n(2);
r *= p2;
const auto p4 = r;
r.square_n(4);
r *= p4;
const auto p8 = r;
r.square_n(8);
r *= p8;
const auto p16 = r;
r.square_n(16);
r *= p16;
const auto p32 = r;
r.square_n(32);
r *= x;
r.square_n(128);
r *= p32;
r.square_n(32);
r *= p32;
r.square_n(16);
r *= p16;
r.square_n(8);
r *= p8;
r.square_n(4);
r *= p4;
r.square_n(2);
r *= p2;
r.square_n(2);

return r;
}
};

} // namespace secp256r1

} // namespace
Expand Down
49 changes: 46 additions & 3 deletions src/lib/math/pcurves/pcurves_secp384r1/pcurves_secp384r1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,54 @@ class Params final : public EllipticCurveParameters<
-12> {
};

class Curve final : public EllipticCurve<Params, Secp384r1Rep> {};
// clang-format on

}
class Curve final : public EllipticCurve<Params, Secp384r1Rep> {
public:
// Return the square of the inverse of x
static FieldElement fe_invert2(const FieldElement& x) {
// From https://briansmith.org/ecc-inversion-addition-chains-01

FieldElement r = x.square();
r *= x;
const auto x2 = r;
r = r.square();
r *= x;
const auto x3 = r;
r.square_n(3);
r *= x3;
auto rl = r;
r.square_n(6);
r *= rl;
r.square_n(3);
r *= x3;
const auto x15 = r;
r.square_n(15);
r *= x15;
const auto x30 = r;
r.square_n(30);
r *= x30;
rl = r;
r.square_n(60);
r *= rl;
rl = r;
r.square_n(120);
r *= rl;
r.square_n(15);
r *= x15;
r.square_n(31);
r *= x30;
r.square_n(2);
r *= x2;
r.square_n(94);
r *= x30;
r.square_n(2);

// clang-format on
return r;
}
};

} // namespace secp384r1

} // namespace

Expand Down
44 changes: 43 additions & 1 deletion src/lib/math/pcurves/pcurves_secp521r1/pcurves_secp521r1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,49 @@ class Params final : public EllipticCurveParameters<

// clang-format on

class Curve final : public EllipticCurve<Params, P521Rep> {};
class Curve final : public EllipticCurve<Params, P521Rep> {
public:
// Return the square of the inverse of x
static FieldElement fe_invert2(const FieldElement& x) {
// Addition chain from https://eprint.iacr.org/2014/852.pdf page 6

FieldElement r = x.square();
r *= x;
r = r.square();
r *= x;
FieldElement rl = r;
r.square_n(3);
r *= rl;
r.square_n(1);
r *= x;
const auto a7 = r;
r.square_n(1);
r *= x;
rl = r;
r.square_n(8);
r *= rl;
rl = r;
r.square_n(16);
r *= rl;
rl = r;
r.square_n(32);
r *= rl;
rl = r;
r.square_n(64);
r *= rl;
rl = r;
r.square_n(128);
r *= rl;
rl = r;
r.square_n(256);
r *= rl;
r.square_n(7);
r *= a7;
r.square_n(2);

return r;
}
};

} // namespace secp521r1

Expand Down

0 comments on commit 71afb7b

Please sign in to comment.