Skip to content

Commit

Permalink
[EC] Unify point doubling for P-256/384/521 (#1567)
Browse files Browse the repository at this point in the history
Implement and use a single version of point doubling
for implementations of NIST curves P-384, P-521, and
Fiat-crypto based implementation of P-256. The change
does not affect performance.

Point addition will be unified in a subsequent change.

I verified the performance was not affected on Graviton 3, Intel, and M1
CPUs. Example for M1:
```
Before
Did 2882750 EC POINT P-384 dbl operations in 1000082us (2882513.6 ops/sec)
Did 1600000 EC POINT P-384 add operations in 1000497us (1599205.2 ops/sec)
Did 7051 EC POINT P-384 mul operations in 1078289us (6539.1 ops/sec)
Did 28000 EC POINT P-384 mul base operations in 1000115us (27996.8 ops/sec)
Did 5632 EC POINT P-384 mul public operations in 1062456us (5300.9 ops/sec)
Did 2685500 EC POINT P-521 dbl operations in 1000037us (2685400.6 ops/sec)
Did 1435000 EC POINT P-521 add operations in 1000129us (1434814.9 ops/sec)
Did 4928 EC POINT P-521 mul operations in 1055318us (4669.7 ops/sec)
Did 19000 EC POINT P-521 mul base operations in 1022199us (18587.4 ops/sec)
Did 3850 EC POINT P-521 mul public operations in 1036809us (3713.3 ops/sec)

After:
Did 2888250 EC POINT P-384 dbl operations in 1000028us (2888169.1 ops/sec)
Did 1593000 EC POINT P-384 add operations in 1000405us (1592355.1 ops/sec)
Did 6875 EC POINT P-384 mul operations in 1054301us (6520.9 ops/sec)
Did 28000 EC POINT P-384 mul base operations in 1000818us (27977.1 ops/sec)
Did 5555 EC POINT P-384 mul public operations in 1056370us (5258.6 ops/sec)
Did 2775250 EC POINT P-521 dbl operations in 1000021us (2775191.7 ops/sec)
Did 1435000 EC POINT P-521 add operations in 1000085us (1434878.0 ops/sec)
Did 4840 EC POINT P-521 mul operations in 1044164us (4635.3 ops/sec)
Did 19000 EC POINT P-521 mul base operations in 1027887us (18484.5 ops/sec)
Did 3883 EC POINT P-521 mul public operations in 1051447us (3693.0 ops/sec)
```
  • Loading branch information
dkostic authored May 20, 2024
1 parent a83bcb5 commit fc06ecb
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 253 deletions.
2 changes: 1 addition & 1 deletion crypto/fipsmodule/bcm.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
// to control the order. $b section will place bcm in between the start/end markers
// which are in $a and $z.
#if defined(BORINGSSL_FIPS) && defined(OPENSSL_WINDOWS)

#pragma code_seg(".fipstx$b")
#pragma data_seg(".fipsda$b")
#pragma const_seg(".fipsco$b")
Expand Down Expand Up @@ -93,6 +92,7 @@
#include "ec/ec.c"
#include "ec/ec_key.c"
#include "ec/ec_montgomery.c"
#include "ec/ec_nistp.c"
#include "ec/felem.c"
#include "ec/oct.c"
#include "ec/p224-64.c"
Expand Down
112 changes: 112 additions & 0 deletions crypto/fipsmodule/ec/ec_nistp.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC

// In this file we will implement elliptic curve point operations for
// NIST curves P-256, P-384, and P-521. The idea is to implement the operations
// in a generic way such that the code can be reused instead of having
// a separate implementation for each of the curves. We implement:
// 1. point addition,
// 2. point doubling,
// 3. scalar multiplication of a base point,
// 4. scalar multiplication of an arbitrary point,
// 5. scalar multiplication of a base and an arbitrary point.
//
// Matrix of what has been done so far:
//
// | op | P-521 | P-384 | P-256 |
// |----------------------------|
// | 1. | | | |
// | 2. | x | x | x* |
// | 3. | | | |
// | 4. | | | |
// | 5. | | | |
// * For P-256, only the Fiat-crypto implementation in p256.c is replaced.

#include "ec_nistp.h"

// Some of the functions below need temporary field element variables.
// To avoid dynamic allocation we define nistp_felem type to have the maximum
// size possible (which is currently P-521 curve). The values are hard-coded
// for the moment, this will be fixed when we migrate the whole P-521
// implementation to ec_nistp.c.
#if defined(EC_NISTP_USE_64BIT_LIMB)
#define NISTP_FELEM_MAX_NUM_OF_LIMBS (9)
#else
#define NISTP_FELEM_MAX_NUM_OF_LIMBS (19)
#endif
typedef ec_nistp_felem_limb ec_nistp_felem[NISTP_FELEM_MAX_NUM_OF_LIMBS];

// Group operations
// ----------------
//
// Building on top of the field operations we have the operations on the
// elliptic curve group itself. Points on the curve are represented in Jacobian
// coordinates.
//
// ec_nistp_point_double calculates 2*(x_in, y_in, z_in)
//
// The method is based on:
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
// for which there is a Coq transcription and correctness proof:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
//
// However, we slighty changed the computation for efficiency (see the full
// explanation within the function body), which makes the Coq proof above
// not applicable to our implementation.
// TODO(awslc): Write a Coq correctness proof for our version of the algorithm.
//
// Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed;
// while x_out == y_in is not (maybe this works, but it's not tested).
void ec_nistp_point_double(const ec_nistp_felem_meth *ctx,
ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const ec_nistp_felem_limb *x_in,
const ec_nistp_felem_limb *y_in,
const ec_nistp_felem_limb *z_in) {
ec_nistp_felem delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
// delta = z^2
ctx->sqr(delta, z_in);
// gamma = y^2
ctx->sqr(gamma, y_in);
// beta = x*gamma
ctx->mul(beta, x_in, gamma);

// alpha = 3*(x-delta)*(x+delta)
ctx->sub(ftmp, x_in, delta);
ctx->add(ftmp2, x_in, delta);

ctx->add(tmptmp, ftmp2, ftmp2);
ctx->add(ftmp2, ftmp2, tmptmp);
ctx->mul(alpha, ftmp, ftmp2);

// x' = alpha^2 - 8*beta
ctx->sqr(x_out, alpha);
ctx->add(fourbeta, beta, beta);
ctx->add(fourbeta, fourbeta, fourbeta);
ctx->add(tmptmp, fourbeta, fourbeta);
ctx->sub(x_out, x_out, tmptmp);

// z' = (y + z)^2 - gamma - delta
// The following calculation differs from the Coq proof cited above.
// The proof is for:
// add(delta, gamma, delta);
// add(ftmp, y_in, z_in);
// square(z_out, ftmp);
// sub(z_out, z_out, delta);
// Our operations sequence is a bit more efficient because it saves us
// a certain number of conditional moves.
ctx->add(ftmp, y_in, z_in);
ctx->sqr(z_out, ftmp);
ctx->sub(z_out, z_out, gamma);
ctx->sub(z_out, z_out, delta);

// y' = alpha*(4*beta - x') - 8*gamma^2
ctx->sub(y_out, fourbeta, x_out);
ctx->add(gamma, gamma, gamma);
ctx->sqr(gamma, gamma);
ctx->mul(y_out, alpha, y_out);
ctx->add(gamma, gamma, gamma);
ctx->sub(y_out, y_out, gamma);
}
65 changes: 65 additions & 0 deletions crypto/fipsmodule/ec/ec_nistp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
#ifndef EC_NISTP_H
#define EC_NISTP_H

#include <openssl/target.h>

#include <stdint.h>

// We have two implementations of the field arithmetic for NIST curves:
// - Fiat-crypto
// - s2n-bignum
// Both Fiat-crypto and s2n-bignum implementations are formally verified.
// Fiat-crypto implementation is fully portable C code, while s2n-bignum
// implements the operations in assembly for x86_64 and aarch64 platforms.
// If (1) x86_64 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not
// set, s2n-bignum path is capable.
#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \
((defined(OPENSSL_X86_64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \
defined(OPENSSL_AARCH64))
# define EC_NISTP_USE_S2N_BIGNUM
# define EC_NISTP_USE_64BIT_LIMB
#else
// Fiat-crypto has both 64-bit and 32-bit implementation.
# if defined(BORINGSSL_HAS_UINT128)
# define EC_NISTP_USE_64BIT_LIMB
# endif
#endif

#if defined(EC_NISTP_USE_64BIT_LIMB)
typedef uint64_t ec_nistp_felem_limb;
#else
typedef uint32_t ec_nistp_felem_limb;
#endif

// ec_nistp_felem_meth is a struct that holds pointers to implementations of field
// arithmetic functions for specific curves. It is meant to be used
// in higher level functions like this:
// void point_double(nistp_felem_methods *ctx, ...) {
// ctx->add(...);
// ctx->mul(...);
// }
// This makes the functions reusable between different curves by simply
// providing an appropriate methods object.
typedef struct {
void (*add)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*sub)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*mul)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b);
void (*sqr)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a);
} ec_nistp_felem_meth;

const ec_nistp_felem_meth *p256_felem_methods(void);
const ec_nistp_felem_meth *p384_felem_methods(void);
const ec_nistp_felem_meth *p521_felem_methods(void);

void ec_nistp_point_double(const ec_nistp_felem_meth *ctx,
ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const ec_nistp_felem_limb *x_in,
const ec_nistp_felem_limb *y_in,
const ec_nistp_felem_limb *z_in);
#endif // EC_NISTP_H

6 changes: 3 additions & 3 deletions crypto/fipsmodule/ec/make_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ func writeP384Table(path string) error {
// is based on the generation method in:
// https://gitlab.com/nisec/ecckiila/-/blob/master/main.py#L296
#if defined(P384_USE_64BIT_LIMBS_FELEM)`
#if defined(EC_NISTP_USE_64BIT_LIMB)`

table_def_str := fmt.Sprintf("static const p384_felem p384_g_pre_comp[%d][%d][2] = ", num_subtables, pts_per_subtable)

Expand Down Expand Up @@ -462,7 +462,7 @@ func writeP521Table(path string) error {
// is based on the generation method in:
// https://gitlab.com/nisec/ecckiila/-/blob/master/main.py#L296
#if defined(P521_USE_S2N_BIGNUM_FIELD_ARITH)`
#if defined(EC_NISTP_USE_S2N_BIGNUM)`

table_def_str := fmt.Sprintf("static const p521_felem p521_g_pre_comp[%d][%d][2] = ", num_subtables, pts_per_subtable)

Expand All @@ -472,7 +472,7 @@ func writeP521Table(path string) error {
if err := writeTables(w, curve, tables, writeU64, nil); err != nil {
return err
}
if _, err := io.WriteString(w, ";\n#else\n#if defined(P521_USE_64BIT_LIMBS_FELEM)\n" + table_def_str); err != nil {
if _, err := io.WriteString(w, ";\n#else\n#if defined(EC_NISTP_USE_64BIT_LIMB)\n" + table_def_str); err != nil {
return err
}
// P-521 Fiat-crypto implementation for 64-bit systems represents a field
Expand Down
74 changes: 11 additions & 63 deletions crypto/fipsmodule/ec/p256.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "../../internal.h"
#include "../delocate.h"
#include "./internal.h"
#include "ec_nistp.h"

#if defined(BORINGSSL_HAS_UINT128)
#define BORINGSSL_NISTP256_64BIT 1
Expand Down Expand Up @@ -166,73 +167,20 @@ static void fiat_p256_inv_square(fiat_p256_felem out,
fiat_p256_square(out, ret); // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
}

// Group operations
// ----------------
//
// Building on top of the field operations we have the operations on the
// elliptic curve group itself. Points on the curve are represented in Jacobian
// coordinates.
//
// Both operations were transcribed to Coq and proven to correspond to naive
// implementations using Affine coordinates, for all suitable fields. In the
// Coq proofs, issues of constant-time execution and memory layout (aliasing)
// conventions were not considered. Specification of affine coordinates:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Spec/WeierstrassCurve.v#L28>
// As a sanity check, a proof that these points form a commutative group:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/AffineProofs.v#L33>

// fiat_p256_point_double calculates 2*(x_in, y_in, z_in)
//
// The method is taken from:
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
//
// Coq transcription and correctness proof:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
//
// Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
// while x_out == y_in is not (maybe this works, but it's not tested).
static void fiat_p256_point_double(fiat_p256_felem x_out, fiat_p256_felem y_out,
DEFINE_METHOD_FUNCTION(ec_nistp_felem_meth, p256_felem_methods) {
out->add = fiat_p256_add;
out->sub = fiat_p256_sub;
out->mul = fiat_p256_mul;
out->sqr = fiat_p256_square;
}

static void fiat_p256_point_double(fiat_p256_felem x_out,
fiat_p256_felem y_out,
fiat_p256_felem z_out,
const fiat_p256_felem x_in,
const fiat_p256_felem y_in,
const fiat_p256_felem z_in) {
fiat_p256_felem delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
// delta = z^2
fiat_p256_square(delta, z_in);
// gamma = y^2
fiat_p256_square(gamma, y_in);
// beta = x*gamma
fiat_p256_mul(beta, x_in, gamma);

// alpha = 3*(x-delta)*(x+delta)
fiat_p256_sub(ftmp, x_in, delta);
fiat_p256_add(ftmp2, x_in, delta);

fiat_p256_add(tmptmp, ftmp2, ftmp2);
fiat_p256_add(ftmp2, ftmp2, tmptmp);
fiat_p256_mul(alpha, ftmp, ftmp2);

// x' = alpha^2 - 8*beta
fiat_p256_square(x_out, alpha);
fiat_p256_add(fourbeta, beta, beta);
fiat_p256_add(fourbeta, fourbeta, fourbeta);
fiat_p256_add(tmptmp, fourbeta, fourbeta);
fiat_p256_sub(x_out, x_out, tmptmp);

// z' = (y + z)^2 - gamma - delta
fiat_p256_add(delta, gamma, delta);
fiat_p256_add(ftmp, y_in, z_in);
fiat_p256_square(z_out, ftmp);
fiat_p256_sub(z_out, z_out, delta);

// y' = alpha*(4*beta - x') - 8*gamma^2
fiat_p256_sub(y_out, fourbeta, x_out);
fiat_p256_add(gamma, gamma, gamma);
fiat_p256_square(gamma, gamma);
fiat_p256_mul(y_out, alpha, y_out);
fiat_p256_add(gamma, gamma, gamma);
fiat_p256_sub(y_out, y_out, gamma);
ec_nistp_point_double(p256_felem_methods(), x_out, y_out, z_out, x_in, y_in, z_in);
}

// fiat_p256_point_add calculates (x1, y1, z1) + (x2, y2, z2)
Expand Down
Loading

0 comments on commit fc06ecb

Please sign in to comment.