Skip to content

Commit

Permalink
[EC] Unify scalar_mul_public for ec_nistp curves (#2004)
Browse files Browse the repository at this point in the history
Added unified scalar_mul_public implemented in ec_nistp.
This is a refactor of the algorithm in p384.c and p521.c
that makes it generic. The implementations in p384.c, p521.c,
as well as in p256.c, are substituted with this new unified
implementation.
  • Loading branch information
dkostic authored Nov 25, 2024
1 parent 1dc8f2a commit 7c04616
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 326 deletions.
137 changes: 135 additions & 2 deletions crypto/fipsmodule/ec/ec_nistp.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
// | 2. | x | x | x* |
// | 3. | x | x | |
// | 4. | x | x | x* |
// | 5. | | | |
// | 5. | x | x | |
// * For P-256, only the Fiat-crypto implementation in p256.c is replaced.

#include "ec_nistp.h"
Expand Down Expand Up @@ -317,9 +317,13 @@ static void scalar_rwnaf(int16_t *out, size_t window_size,
#define SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS \
(SCALAR_MUL_TABLE_NUM_POINTS * 3 * FELEM_MAX_NUM_OF_LIMBS)

// The maximum number of bits for a scalar.
#define SCALAR_MUL_MAX_SCALAR_BITS (521)

// Maximum number of windows (digits) for a scalar encoding which is
// determined by the maximum scalar bit size -- 521 bits in our case.
#define SCALAR_MUL_MAX_NUM_WINDOWS DIV_AND_CEIL(521, SCALAR_MUL_WINDOW_SIZE)
#define SCALAR_MUL_MAX_NUM_WINDOWS \
DIV_AND_CEIL(SCALAR_MUL_MAX_SCALAR_BITS, SCALAR_MUL_WINDOW_SIZE)

// Generate table of multiples of the input point P = (x_in, y_in, z_in):
// table <-- [2i + 1]P for i in [0, SCALAR_MUL_TABLE_NUM_POINTS - 1].
Expand Down Expand Up @@ -636,3 +640,132 @@ void ec_nistp_scalar_mul_base(const ec_nistp_meth *ctx,
cmovznz(y_out, ctx->felem_num_limbs, t, y_tmp, y_res);
cmovznz(z_out, ctx->felem_num_limbs, t, z_tmp, z_res);
}

// Computes [g_scalar]G + [p_scalar]P, where G is the base point of the curve
// curve, and P is the given point (x_p, y_p, z_p).
//
// Both scalar products are computed by the same "textbook" wNAF method,
// with w = 5 for g_scalar and w = 5 for p_scalar.
// For the base point G product we use the first sub-table of the precomputed
// table, while for P we generate the table on-the-fly. The tables hold the
// first 16 odd multiples of G or P:
// g_pre_comp = {[1]G, [3]G, ..., [31]G},
// p_pre_comp = {[1]P, [3]P, ..., [31]P}.
// Computing the negation of a point P = (x, y) is relatively easy:
// -P = (x, -y).
// So we may assume that we also have the negatives of the points in the tables.
//
// The scalars are recoded by the textbook wNAF method to digits, where a digit
// is either a zero or an odd integer in [-31, 31]. The method guarantees that
// each non-zero digit is followed by at least four zeroes.
//
// The result [g_scalar]G + [p_scalar]P is computed by the following algorithm:
// 1. Initialize the accumulator with the point-at-infinity.
// 2. For i starting from 521 down to 0:
// 3. Double the accumulator (doubling can be skipped while the
// accumulator is equal to the point-at-infinity).
// 4. Read from |p_pre_comp| the point corresponding to the i-th digit of
// p_scalar, negate it if the digit is negative, and add it to the
// accumulator.
// 5. Read from |g_pre_comp| the point corresponding to the i-th digit of
// g_scalar, negate it if the digit is negative, and add it to the
// accumulator.
// Note: this function is NOT constant-time.
void ec_nistp_scalar_mul_public(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const EC_SCALAR *g_scalar,
const ec_nistp_felem_limb *x_p,
const ec_nistp_felem_limb *y_p,
const ec_nistp_felem_limb *z_p,
const EC_SCALAR *p_scalar) {

const size_t felem_num_bytes = ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb);

// Table of multiples of P.
ec_nistp_felem_limb p_table[SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS];
generate_table(ctx, p_table, x_p, y_p, z_p);
const size_t p_point_num_limbs = 3 * ctx->felem_num_limbs; // Projective.

// Table of multiples of G.
const ec_nistp_felem_limb *g_table = ctx->scalar_mul_base_table;
const size_t g_point_num_limbs = 2 * ctx->felem_num_limbs; // Affine.

// Recode the scalars.
int8_t p_wnaf[SCALAR_MUL_MAX_SCALAR_BITS + 1] = {0};
int8_t g_wnaf[SCALAR_MUL_MAX_SCALAR_BITS + 1] = {0};
ec_compute_wNAF(p_wnaf, p_scalar, ctx->felem_num_bits, SCALAR_MUL_WINDOW_SIZE);
ec_compute_wNAF(g_wnaf, g_scalar, ctx->felem_num_bits, SCALAR_MUL_WINDOW_SIZE);

// In the beginning res is set to point-at-infinity, so we set the flag.
int16_t res_is_inf = 1;
int16_t d, is_neg, idx;
ec_nistp_felem ftmp;

for (int i = ctx->felem_num_bits; i >= 0; i--) {

// If |res| is point-at-infinity there is no point in doubling so skip it.
if (!res_is_inf) {
ctx->point_dbl(x_out, y_out, z_out, x_out, y_out, z_out);
}

// Process the p_scalar digit.
d = p_wnaf[i];
if (d != 0) {
is_neg = d < 0 ? 1 : 0;
idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;

if (res_is_inf) {
// If |res| is point-at-infinity there is no need to add the new point,
// we can simply copy it.
const size_t table_idx = idx * p_point_num_limbs;
OPENSSL_memcpy(x_out, &p_table[table_idx], felem_num_bytes);
OPENSSL_memcpy(y_out, &p_table[table_idx + ctx->felem_num_limbs], felem_num_bytes);
OPENSSL_memcpy(z_out, &p_table[table_idx + ctx->felem_num_limbs * 2], felem_num_bytes);
res_is_inf = 0;
} else {
// Otherwise, add to the accumulator either the point at position idx
// in the table or its negation.
const ec_nistp_felem_limb *y_tmp;
y_tmp = &p_table[idx * p_point_num_limbs + ctx->felem_num_limbs];
if (is_neg) {
ctx->felem_neg(ftmp, y_tmp);
y_tmp = ftmp;
}
ctx->point_add(x_out, y_out, z_out, x_out, y_out, z_out, 0,
&p_table[idx * p_point_num_limbs],
y_tmp,
&p_table[idx * p_point_num_limbs + ctx->felem_num_limbs * 2]);
}
}

/* // Process the g_scalar digit. */
d = g_wnaf[i];
if (d != 0) {
is_neg = d < 0 ? 1 : 0;
idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;

if (res_is_inf) {
// If |res| is point-at-infinity there is no need to add the new point,
// we can simply copy it.
const size_t table_idx = idx * g_point_num_limbs;
OPENSSL_memcpy(x_out, &g_table[table_idx], felem_num_bytes);
OPENSSL_memcpy(y_out, &g_table[table_idx + ctx->felem_num_limbs], felem_num_bytes);
OPENSSL_memcpy(z_out, ctx->felem_one, felem_num_bytes);
res_is_inf = 0;
} else {
// Otherwise, add to the accumulator either the point at position idx
// in the table or its negation.
const ec_nistp_felem_limb *y_tmp ;
y_tmp = &g_table[idx * g_point_num_limbs + ctx->felem_num_limbs];
if (is_neg) {
ctx->felem_neg(ftmp, &g_table[idx * g_point_num_limbs + ctx->felem_num_limbs]);
y_tmp = ftmp;
}
ctx->point_add(x_out, y_out, z_out, x_out, y_out, z_out, 1,
&g_table[idx * g_point_num_limbs], y_tmp, ctx->felem_one);
}
}
}
}
10 changes: 10 additions & 0 deletions crypto/fipsmodule/ec/ec_nistp.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,15 @@ void ec_nistp_scalar_mul_base(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const EC_SCALAR *scalar);

void ec_nistp_scalar_mul_public(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const EC_SCALAR *g_scalar,
const ec_nistp_felem_limb *x_p,
const ec_nistp_felem_limb *y_p,
const ec_nistp_felem_limb *z_p,
const EC_SCALAR *p_scalar);
#endif // EC_NISTP_H

3 changes: 1 addition & 2 deletions crypto/fipsmodule/ec/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,7 @@ void ec_GFp_mont_felem_exp(const EC_GROUP *group, EC_FELEM *out,
// where at most one of any w+1 consecutive digits is non-zero
// with the exception that the most significant digit may be only
// w-1 zeros away from that next non-zero digit.
void ec_compute_wNAF(const EC_GROUP *group, int8_t *out,
const EC_SCALAR *scalar, size_t bits, int w);
void ec_compute_wNAF(int8_t *out, const EC_SCALAR *scalar, size_t bits, int w);

int ec_GFp_mont_mul_public_batch(const EC_GROUP *group, EC_JACOBIAN *r,
const EC_SCALAR *g_scalar,
Expand Down
2 changes: 1 addition & 1 deletion crypto/fipsmodule/ec/p256.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,

// Set up the coefficients for |p_scalar|.
int8_t p_wNAF[257];
ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);
ec_compute_wNAF(p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);

// Set |ret| to the point at infinity.
int skip = 1; // Save some point operations.
Expand Down
162 changes: 5 additions & 157 deletions crypto/fipsmodule/ec/p384.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,6 @@ static p384_limb_t p384_felem_nz(const p384_limb_t in1[P384_NLIMBS]) {
#endif // EC_NISTP_USE_S2N_BIGNUM


static void p384_felem_copy(p384_limb_t out[P384_NLIMBS],
const p384_limb_t in1[P384_NLIMBS]) {
for (size_t i = 0; i < P384_NLIMBS; i++) {
out[i] = in1[i];
}
}

static void p384_from_generic(p384_felem out, const EC_FELEM *in) {
#ifdef OPENSSL_BIG_ENDIAN
uint8_t tmp[P384_EC_FELEM_BYTES];
Expand Down Expand Up @@ -470,26 +463,6 @@ static int ec_GFp_nistp384_cmp_x_coordinate(const EC_GROUP *group,
//
// For detailed analysis of different window sizes see the bottom of this file.

// Constants for scalar encoding in the scalar multiplication functions.
#define P384_MUL_WSIZE (5) // window size w
// Assert the window size is 5 because the pre-computed table in |p384_table.h|
// is generated for window size 5.
OPENSSL_STATIC_ASSERT(P384_MUL_WSIZE == 5,
p384_scalar_mul_window_size_is_not_equal_to_five)

#define P384_MUL_TWO_TO_WSIZE (1 << P384_MUL_WSIZE)

// Number of |P384_MUL_WSIZE|-bit windows in a 384-bit value
#define P384_MUL_NWINDOWS ((384 + P384_MUL_WSIZE - 1)/P384_MUL_WSIZE)

// For the public point in |ec_GFp_nistp384_point_mul_public| function
// we use window size w = 5.
#define P384_MUL_PUB_WSIZE (5)

// We keep only odd multiples in tables, hence the table size is (2^w)/2
#define P384_MUL_TABLE_SIZE (P384_MUL_TWO_TO_WSIZE >> 1)
#define P384_MUL_PUB_TABLE_SIZE (1 << (P384_MUL_PUB_WSIZE - 1))

// Multiplication of an arbitrary point by a scalar, r = [scalar]P.
static void ec_GFp_nistp384_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
const EC_JACOBIAN *p,
Expand Down Expand Up @@ -523,146 +496,21 @@ static void ec_GFp_nistp384_point_mul_base(const EC_GROUP *group,

// Computes [g_scalar]G + [p_scalar]P, where G is the base point of the P-384
// curve, and P is the given point |p|.
//
// Both scalar products are computed by the same "textbook" wNAF method,
// with w = 5 for g_scalar and w = 5 for p_scalar.
// For the base point G product we use the first sub-table of the precomputed
// table |p384_g_pre_comp| from |p384_table.h| file, while for P we generate
// |p_pre_comp| table on-the-fly. The tables hold the first 16 odd multiples
// of G or P:
// g_pre_comp = {[1]G, [3]G, ..., [31]G},
// p_pre_comp = {[1]P, [3]P, ..., [31]P}.
// Computing the negation of a point P = (x, y) is relatively easy:
// -P = (x, -y).
// So we may assume that we also have the negatives of the points in the tables.
//
// The 384-bit scalars are recoded by the textbook wNAF method to 385 digits,
// where a digit is either a zero or an odd integer in [-31, 31]. The method
// guarantees that each non-zero digit is followed by at least four
// zeroes.
//
// The result [g_scalar]G + [p_scalar]P is computed by the following algorithm:
// 1. Initialize the accumulator with the point-at-infinity.
// 2. For i starting from 384 down to 0:
// 3. Double the accumulator (doubling can be skipped while the
// accumulator is equal to the point-at-infinity).
// 4. Read from |p_pre_comp| the point corresponding to the i-th digit of
// p_scalar, negate it if the digit is negative, and add it to the
// accumulator.
// 5. Read from |g_pre_comp| the point corresponding to the i-th digit of
// g_scalar, negate it if the digit is negative, and add it to the
// accumulator.
//
// Note: this function is NOT constant-time.
static void ec_GFp_nistp384_point_mul_public(const EC_GROUP *group,
EC_JACOBIAN *r,
const EC_SCALAR *g_scalar,
const EC_JACOBIAN *p,
const EC_SCALAR *p_scalar) {

p384_felem res[3] = {{0}, {0}, {0}}, two_p[3] = {{0}, {0}, {0}}, ftmp;

// Table of multiples of P: [2i + 1]P for i in [0, 15].
p384_felem p_pre_comp[P384_MUL_PUB_TABLE_SIZE][3];

// Set the first point in the table to P.
p384_from_generic(p_pre_comp[0][0], &p->X);
p384_from_generic(p_pre_comp[0][1], &p->Y);
p384_from_generic(p_pre_comp[0][2], &p->Z);

// Compute two_p = [2]P.
p384_point_double(two_p[0], two_p[1], two_p[2],
p_pre_comp[0][0], p_pre_comp[0][1], p_pre_comp[0][2]);

// Generate the remaining 15 multiples of P.
for (size_t i = 1; i < P384_MUL_PUB_TABLE_SIZE; i++) {
p384_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
two_p[0], two_p[1], two_p[2], 0 /* both Jacobian */,
p_pre_comp[i - 1][0],
p_pre_comp[i - 1][1],
p_pre_comp[i - 1][2]);
}

// Recode the scalars.
int8_t p_wnaf[385] = {0}, g_wnaf[385] = {0};
ec_compute_wNAF(group, p_wnaf, p_scalar, 384, P384_MUL_PUB_WSIZE);
ec_compute_wNAF(group, g_wnaf, g_scalar, 384, P384_MUL_WSIZE);

// In the beginning res is set to point-at-infinity, so we set the flag.
int16_t res_is_inf = 1;
int16_t d, is_neg, idx;

for (int i = 384; i >= 0; i--) {

// If |res| is point-at-infinity there is no point in doubling so skip it.
if (!res_is_inf) {
p384_point_double(res[0], res[1], res[2], res[0], res[1], res[2]);
}
p384_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}};

// Process the p_scalar digit.
d = p_wnaf[i];
if (d != 0) {
is_neg = d < 0 ? 1 : 0;
idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;

if (res_is_inf) {
// If |res| is point-at-infinity there is no need to add the new point,
// we can simply copy it.
p384_felem_copy(res[0], p_pre_comp[idx][0]);
p384_felem_copy(res[1], p_pre_comp[idx][1]);
p384_felem_copy(res[2], p_pre_comp[idx][2]);
res_is_inf = 0;
} else {
// Otherwise, add to the accumulator either the point at position idx
// in the table or its negation.
if (is_neg) {
p384_felem_opp(ftmp, p_pre_comp[idx][1]);
} else {
p384_felem_copy(ftmp, p_pre_comp[idx][1]);
}
p384_point_add(res[0], res[1], res[2],
res[0], res[1], res[2],
0 /* both Jacobian */,
p_pre_comp[idx][0], ftmp, p_pre_comp[idx][2]);
}
}
p384_from_generic(tmp[0], &p->X);
p384_from_generic(tmp[1], &p->Y);
p384_from_generic(tmp[2], &p->Z);

// Process the g_scalar digit.
d = g_wnaf[i];
if (d != 0) {
is_neg = d < 0 ? 1 : 0;
idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;

if (res_is_inf) {
// If |res| is point-at-infinity there is no need to add the new point,
// we can simply copy it.
p384_felem_copy(res[0], p384_g_pre_comp[0][idx][0]);
p384_felem_copy(res[1], p384_g_pre_comp[0][idx][1]);
p384_felem_copy(res[2], p384_felem_one);
res_is_inf = 0;
} else {
// Otherwise, add to the accumulator either the point at position idx
// in the table or its negation.
if (is_neg) {
p384_felem_opp(ftmp, p384_g_pre_comp[0][idx][1]);
} else {
p384_felem_copy(ftmp, p384_g_pre_comp[0][idx][1]);
}
// Add the point to the accumulator |res|.
// Note that the points in the pre-computed table are given with affine
// coordinates. The point addition function computes a sum of two points,
// either both given in projective, or one in projective and one in
// affine coordinates. The |mixed| flag indicates the latter option,
// in which case we set the third coordinate of the second point to one.
p384_point_add(res[0], res[1], res[2],
res[0], res[1], res[2],
1 /* mixed */,
p384_g_pre_comp[0][idx][0], ftmp, p384_felem_one);
}
}
}
ec_nistp_scalar_mul_public(p384_methods(), res[0], res[1], res[2], g_scalar, tmp[0], tmp[1], tmp[2], p_scalar);

// Copy the result to the output.
p384_to_generic(&r->X, res[0]);
p384_to_generic(&r->Y, res[1]);
p384_to_generic(&r->Z, res[2]);
Expand Down
Loading

0 comments on commit 7c04616

Please sign in to comment.