[EC] Unify scalar_mul_public for ec_nistp curves (#2004)

Added unified scalar_mul_public implemented in ec_nistp. This is a refactor of the algorithm in p384.c and p521.c that makes it generic. The implementations in p384.c, p521.c, as well as in p256.c, are substituted with this new unified implementation.
aws · Nov 25, 2024 · 7c04616 · 7c04616
1 parent 1dc8f2a
commit 7c04616
Show file tree

Hide file tree

Showing 7 changed files with 163 additions and 326 deletions.
diff --git a/crypto/fipsmodule/ec/ec_nistp.c b/crypto/fipsmodule/ec/ec_nistp.c
@@ -19,7 +19,7 @@
 // | 2. |   x   |   x   |   x*  |
 // | 3. |   x   |   x   |       |
 // | 4. |   x   |   x   |   x*  |
-// | 5. |       |       |       |
+// | 5. |   x   |   x   |       |
 //  * For P-256, only the Fiat-crypto implementation in p256.c is replaced. 
 
 #include "ec_nistp.h"
@@ -317,9 +317,13 @@ static void scalar_rwnaf(int16_t *out, size_t window_size,
 #define SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS \
                 (SCALAR_MUL_TABLE_NUM_POINTS * 3 * FELEM_MAX_NUM_OF_LIMBS)
 
+// The maximum number of bits for a scalar.
+#define SCALAR_MUL_MAX_SCALAR_BITS (521)
+
 // Maximum number of windows (digits) for a scalar encoding which is
 // determined by the maximum scalar bit size -- 521 bits in our case.
-#define SCALAR_MUL_MAX_NUM_WINDOWS DIV_AND_CEIL(521, SCALAR_MUL_WINDOW_SIZE)
+#define SCALAR_MUL_MAX_NUM_WINDOWS \
+                DIV_AND_CEIL(SCALAR_MUL_MAX_SCALAR_BITS, SCALAR_MUL_WINDOW_SIZE)
 
 // Generate table of multiples of the input point P = (x_in, y_in, z_in):
 //  table <-- [2i + 1]P for i in [0, SCALAR_MUL_TABLE_NUM_POINTS - 1].
@@ -636,3 +640,132 @@ void ec_nistp_scalar_mul_base(const ec_nistp_meth *ctx,
   cmovznz(y_out, ctx->felem_num_limbs, t, y_tmp, y_res);
   cmovznz(z_out, ctx->felem_num_limbs, t, z_tmp, z_res);
 }
+
+// Computes [g_scalar]G + [p_scalar]P, where G is the base point of the curve
+// curve, and P is the given point (x_p, y_p, z_p).
+//
+// Both scalar products are computed by the same "textbook" wNAF method,
+// with w = 5 for g_scalar and w = 5 for p_scalar.
+// For the base point G product we use the first sub-table of the precomputed
+// table, while for P we generate the table on-the-fly. The tables hold the
+// first 16 odd multiples of G or P:
+//     g_pre_comp = {[1]G, [3]G, ..., [31]G},
+//     p_pre_comp = {[1]P, [3]P, ..., [31]P}.
+// Computing the negation of a point P = (x, y) is relatively easy:
+//     -P = (x, -y).
+// So we may assume that we also have the negatives of the points in the tables.
+//
+// The scalars are recoded by the textbook wNAF method to digits, where a digit
+// is either a zero or an odd integer in [-31, 31]. The method guarantees that
+// each non-zero digit is followed by at least four zeroes.
+//
+// The result [g_scalar]G + [p_scalar]P is computed by the following algorithm:
+//     1. Initialize the accumulator with the point-at-infinity.
+//     2. For i starting from 521 down to 0:
+//     3.   Double the accumulator (doubling can be skipped while the
+//          accumulator is equal to the point-at-infinity).
+//     4.   Read from |p_pre_comp| the point corresponding to the i-th digit of
+//          p_scalar, negate it if the digit is negative, and add it to the
+//          accumulator.
+//     5.   Read from |g_pre_comp| the point corresponding to the i-th digit of
+//          g_scalar, negate it if the digit is negative, and add it to the
+//          accumulator.
+// Note: this function is NOT constant-time.
+void ec_nistp_scalar_mul_public(const ec_nistp_meth *ctx,
+                                ec_nistp_felem_limb *x_out,
+                                ec_nistp_felem_limb *y_out,
+                                ec_nistp_felem_limb *z_out,
+                                const EC_SCALAR *g_scalar,
+                                const ec_nistp_felem_limb *x_p,
+                                const ec_nistp_felem_limb *y_p,
+                                const ec_nistp_felem_limb *z_p,
+                                const EC_SCALAR *p_scalar) {
+
+  const size_t felem_num_bytes = ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb);
+
+  // Table of multiples of P.
+  ec_nistp_felem_limb p_table[SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS];
+  generate_table(ctx, p_table, x_p, y_p, z_p);
+  const size_t p_point_num_limbs = 3 * ctx->felem_num_limbs; // Projective.
+
+  // Table of multiples of G.
+  const ec_nistp_felem_limb *g_table = ctx->scalar_mul_base_table;
+  const size_t g_point_num_limbs = 2 * ctx->felem_num_limbs; // Affine.
+
+  // Recode the scalars.
+  int8_t p_wnaf[SCALAR_MUL_MAX_SCALAR_BITS + 1] = {0};
+  int8_t g_wnaf[SCALAR_MUL_MAX_SCALAR_BITS + 1] = {0};
+  ec_compute_wNAF(p_wnaf, p_scalar, ctx->felem_num_bits, SCALAR_MUL_WINDOW_SIZE);
+  ec_compute_wNAF(g_wnaf, g_scalar, ctx->felem_num_bits, SCALAR_MUL_WINDOW_SIZE);
+
+  // In the beginning res is set to point-at-infinity, so we set the flag.
+  int16_t res_is_inf = 1;
+  int16_t d, is_neg, idx;
+  ec_nistp_felem ftmp;
+
+  for (int i = ctx->felem_num_bits; i >= 0; i--) {
+
+    // If |res| is point-at-infinity there is no point in doubling so skip it.
+    if (!res_is_inf) {
+      ctx->point_dbl(x_out, y_out, z_out, x_out, y_out, z_out);
+    }
+
+    // Process the p_scalar digit.
+    d = p_wnaf[i];
+    if (d != 0) {
+      is_neg = d < 0 ? 1 : 0;
+      idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+
+      if (res_is_inf) {
+        // If |res| is point-at-infinity there is no need to add the new point,
+        // we can simply copy it.
+        const size_t table_idx = idx * p_point_num_limbs;
+        OPENSSL_memcpy(x_out, &p_table[table_idx], felem_num_bytes);
+        OPENSSL_memcpy(y_out, &p_table[table_idx + ctx->felem_num_limbs], felem_num_bytes);
+        OPENSSL_memcpy(z_out, &p_table[table_idx + ctx->felem_num_limbs * 2], felem_num_bytes);
+        res_is_inf = 0;
+      } else {
+        // Otherwise, add to the accumulator either the point at position idx
+        // in the table or its negation.
+        const ec_nistp_felem_limb *y_tmp;
+        y_tmp = &p_table[idx * p_point_num_limbs + ctx->felem_num_limbs];
+        if (is_neg) {
+          ctx->felem_neg(ftmp, y_tmp);
+          y_tmp = ftmp;
+        }
+        ctx->point_add(x_out, y_out, z_out, x_out, y_out, z_out, 0,
+                       &p_table[idx * p_point_num_limbs],
+                       y_tmp,
+                       &p_table[idx * p_point_num_limbs + ctx->felem_num_limbs * 2]);
+      }
+    }
+
+    /* // Process the g_scalar digit. */
+    d = g_wnaf[i];
+    if (d != 0) {
+      is_neg = d < 0 ? 1 : 0;
+      idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
+
+      if (res_is_inf) {
+        // If |res| is point-at-infinity there is no need to add the new point,
+        // we can simply copy it.
+        const size_t table_idx = idx * g_point_num_limbs;
+        OPENSSL_memcpy(x_out, &g_table[table_idx], felem_num_bytes);
+        OPENSSL_memcpy(y_out, &g_table[table_idx + ctx->felem_num_limbs], felem_num_bytes);
+        OPENSSL_memcpy(z_out, ctx->felem_one, felem_num_bytes);
+        res_is_inf = 0;
+      } else {
+        // Otherwise, add to the accumulator either the point at position idx
+        // in the table or its negation.
+        const ec_nistp_felem_limb *y_tmp ;
+        y_tmp = &g_table[idx * g_point_num_limbs + ctx->felem_num_limbs];
+        if (is_neg) {
+          ctx->felem_neg(ftmp, &g_table[idx * g_point_num_limbs + ctx->felem_num_limbs]);
+          y_tmp = ftmp;
+        }
+        ctx->point_add(x_out, y_out, z_out, x_out, y_out, z_out, 1,
+                       &g_table[idx * g_point_num_limbs], y_tmp, ctx->felem_one);
+      }
+    }
+  }
+}
diff --git a/crypto/fipsmodule/ec/ec_nistp.h b/crypto/fipsmodule/ec/ec_nistp.h
@@ -114,5 +114,15 @@ void ec_nistp_scalar_mul_base(const ec_nistp_meth *ctx,
                               ec_nistp_felem_limb *y_out,
                               ec_nistp_felem_limb *z_out,
                               const EC_SCALAR *scalar);
+
+void ec_nistp_scalar_mul_public(const ec_nistp_meth *ctx,
+                                ec_nistp_felem_limb *x_out,
+                                ec_nistp_felem_limb *y_out,
+                                ec_nistp_felem_limb *z_out,
+                                const EC_SCALAR *g_scalar,
+                                const ec_nistp_felem_limb *x_p,
+                                const ec_nistp_felem_limb *y_p,
+                                const ec_nistp_felem_limb *z_p,
+                                const EC_SCALAR *p_scalar);
 #endif // EC_NISTP_H
 
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
@@ -697,8 +697,7 @@ void ec_GFp_mont_felem_exp(const EC_GROUP *group, EC_FELEM *out,
 // where at most one of any  w+1  consecutive digits is non-zero
 // with the exception that the most significant digit may be only
 // w-1 zeros away from that next non-zero digit.
-void ec_compute_wNAF(const EC_GROUP *group, int8_t *out,
-                     const EC_SCALAR *scalar, size_t bits, int w);
+void ec_compute_wNAF(int8_t *out, const EC_SCALAR *scalar, size_t bits, int w);
 
 int ec_GFp_mont_mul_public_batch(const EC_GROUP *group, EC_JACOBIAN *r,
                                  const EC_SCALAR *g_scalar,

diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c
@@ -380,7 +380,7 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group,
 
   // Set up the coefficients for |p_scalar|.
   int8_t p_wNAF[257];
-  ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);
+  ec_compute_wNAF(p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC);
 
   // Set |ret| to the point at infinity.
   int skip = 1;  // Save some point operations.

diff --git a/crypto/fipsmodule/ec/p384.c b/crypto/fipsmodule/ec/p384.c
@@ -84,13 +84,6 @@ static p384_limb_t p384_felem_nz(const p384_limb_t in1[P384_NLIMBS]) {
 #endif // EC_NISTP_USE_S2N_BIGNUM
 
 
-static void p384_felem_copy(p384_limb_t out[P384_NLIMBS],
-                           const p384_limb_t in1[P384_NLIMBS]) {
-  for (size_t i = 0; i < P384_NLIMBS; i++) {
-    out[i] = in1[i];
-  }
-}
-
 static void p384_from_generic(p384_felem out, const EC_FELEM *in) {
 #ifdef OPENSSL_BIG_ENDIAN
   uint8_t tmp[P384_EC_FELEM_BYTES];
@@ -470,26 +463,6 @@ static int ec_GFp_nistp384_cmp_x_coordinate(const EC_GROUP *group,
 //
 // For detailed analysis of different window sizes see the bottom of this file.
 
-// Constants for scalar encoding in the scalar multiplication functions.
-#define P384_MUL_WSIZE        (5) // window size w
-// Assert the window size is 5 because the pre-computed table in |p384_table.h|
-// is generated for window size 5.
-OPENSSL_STATIC_ASSERT(P384_MUL_WSIZE == 5,
-    p384_scalar_mul_window_size_is_not_equal_to_five)
-
-#define P384_MUL_TWO_TO_WSIZE (1 << P384_MUL_WSIZE)
-
-// Number of |P384_MUL_WSIZE|-bit windows in a 384-bit value
-#define P384_MUL_NWINDOWS     ((384 + P384_MUL_WSIZE - 1)/P384_MUL_WSIZE)
-
-// For the public point in |ec_GFp_nistp384_point_mul_public| function
-// we use window size w = 5.
-#define P384_MUL_PUB_WSIZE    (5)
-
-// We keep only odd multiples in tables, hence the table size is (2^w)/2
-#define P384_MUL_TABLE_SIZE     (P384_MUL_TWO_TO_WSIZE >> 1)
-#define P384_MUL_PUB_TABLE_SIZE (1 << (P384_MUL_PUB_WSIZE - 1))
-
 // Multiplication of an arbitrary point by a scalar, r = [scalar]P.
 static void ec_GFp_nistp384_point_mul(const EC_GROUP *group, EC_JACOBIAN *r,
                                       const EC_JACOBIAN *p,
@@ -523,146 +496,21 @@ static void ec_GFp_nistp384_point_mul_base(const EC_GROUP *group,
 
 // Computes [g_scalar]G + [p_scalar]P, where G is the base point of the P-384
 // curve, and P is the given point |p|.
-//
-// Both scalar products are computed by the same "textbook" wNAF method,
-// with w = 5 for g_scalar and w = 5 for p_scalar.
-// For the base point G product we use the first sub-table of the precomputed
-// table |p384_g_pre_comp| from |p384_table.h| file, while for P we generate
-// |p_pre_comp| table on-the-fly. The tables hold the first 16 odd multiples
-// of G or P:
-//     g_pre_comp = {[1]G, [3]G, ..., [31]G},
-//     p_pre_comp = {[1]P, [3]P, ..., [31]P}.
-// Computing the negation of a point P = (x, y) is relatively easy:
-//     -P = (x, -y).
-// So we may assume that we also have the negatives of the points in the tables.
-//
-// The 384-bit scalars are recoded by the textbook wNAF method to 385 digits,
-// where a digit is either a zero or an odd integer in [-31, 31]. The method
-// guarantees that each non-zero digit is followed by at least four
-// zeroes.
-//
-// The result [g_scalar]G + [p_scalar]P is computed by the following algorithm:
-//     1. Initialize the accumulator with the point-at-infinity.
-//     2. For i starting from 384 down to 0:
-//     3.   Double the accumulator (doubling can be skipped while the
-//          accumulator is equal to the point-at-infinity).
-//     4.   Read from |p_pre_comp| the point corresponding to the i-th digit of
-//          p_scalar, negate it if the digit is negative, and add it to the
-//          accumulator.
-//     5.   Read from |g_pre_comp| the point corresponding to the i-th digit of
-//          g_scalar, negate it if the digit is negative, and add it to the
-//          accumulator.
-//
 // Note: this function is NOT constant-time.
 static void ec_GFp_nistp384_point_mul_public(const EC_GROUP *group,
                                              EC_JACOBIAN *r,
                                              const EC_SCALAR *g_scalar,
                                              const EC_JACOBIAN *p,
                                              const EC_SCALAR *p_scalar) {
 
-  p384_felem res[3] = {{0}, {0}, {0}}, two_p[3] = {{0}, {0}, {0}}, ftmp;
-
-  // Table of multiples of P:  [2i + 1]P for i in [0, 15].
-  p384_felem p_pre_comp[P384_MUL_PUB_TABLE_SIZE][3];
-
-  // Set the first point in the table to P.
-  p384_from_generic(p_pre_comp[0][0], &p->X);
-  p384_from_generic(p_pre_comp[0][1], &p->Y);
-  p384_from_generic(p_pre_comp[0][2], &p->Z);
-
-  // Compute two_p = [2]P.
-  p384_point_double(two_p[0], two_p[1], two_p[2],
-                    p_pre_comp[0][0], p_pre_comp[0][1], p_pre_comp[0][2]);
-
-  // Generate the remaining 15 multiples of P.
-  for (size_t i = 1; i < P384_MUL_PUB_TABLE_SIZE; i++) {
-    p384_point_add(p_pre_comp[i][0], p_pre_comp[i][1], p_pre_comp[i][2],
-                   two_p[0], two_p[1], two_p[2], 0 /* both Jacobian */,
-                   p_pre_comp[i - 1][0],
-                   p_pre_comp[i - 1][1],
-                   p_pre_comp[i - 1][2]);
-  }
-
-  // Recode the scalars.
-  int8_t p_wnaf[385] = {0}, g_wnaf[385] = {0};
-  ec_compute_wNAF(group, p_wnaf, p_scalar, 384, P384_MUL_PUB_WSIZE);
-  ec_compute_wNAF(group, g_wnaf, g_scalar, 384, P384_MUL_WSIZE);
-
-  // In the beginning res is set to point-at-infinity, so we set the flag.
-  int16_t res_is_inf = 1;
-  int16_t d, is_neg, idx;
-
-  for (int i = 384; i >= 0; i--) {
-
-    // If |res| is point-at-infinity there is no point in doubling so skip it.
-    if (!res_is_inf) {
-      p384_point_double(res[0], res[1], res[2], res[0], res[1], res[2]);
-    }
+  p384_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}};
 
-    // Process the p_scalar digit.
-    d = p_wnaf[i];
-    if (d != 0) {
-      is_neg = d < 0 ? 1 : 0;
-      idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
-
-      if (res_is_inf) {
-        // If |res| is point-at-infinity there is no need to add the new point,
-        // we can simply copy it.
-        p384_felem_copy(res[0], p_pre_comp[idx][0]);
-        p384_felem_copy(res[1], p_pre_comp[idx][1]);
-        p384_felem_copy(res[2], p_pre_comp[idx][2]);
-        res_is_inf = 0;
-      } else {
-        // Otherwise, add to the accumulator either the point at position idx
-        // in the table or its negation.
-        if (is_neg) {
-          p384_felem_opp(ftmp, p_pre_comp[idx][1]);
-        } else {
-          p384_felem_copy(ftmp, p_pre_comp[idx][1]);
-        }
-        p384_point_add(res[0], res[1], res[2],
-                       res[0], res[1], res[2],
-                       0 /* both Jacobian */,
-                       p_pre_comp[idx][0], ftmp, p_pre_comp[idx][2]);
-      }
-    }
+  p384_from_generic(tmp[0], &p->X);
+  p384_from_generic(tmp[1], &p->Y);
+  p384_from_generic(tmp[2], &p->Z);
 
-    // Process the g_scalar digit.
-    d = g_wnaf[i];
-    if (d != 0) {
-      is_neg = d < 0 ? 1 : 0;
-      idx = (is_neg) ? (-d - 1) >> 1 : (d - 1) >> 1;
-
-      if (res_is_inf) {
-        // If |res| is point-at-infinity there is no need to add the new point,
-        // we can simply copy it.
-        p384_felem_copy(res[0], p384_g_pre_comp[0][idx][0]);
-        p384_felem_copy(res[1], p384_g_pre_comp[0][idx][1]);
-        p384_felem_copy(res[2], p384_felem_one);
-        res_is_inf = 0;
-      } else {
-        // Otherwise, add to the accumulator either the point at position idx
-        // in the table or its negation.
-        if (is_neg) {
-          p384_felem_opp(ftmp, p384_g_pre_comp[0][idx][1]);
-        } else {
-          p384_felem_copy(ftmp, p384_g_pre_comp[0][idx][1]);
-        }
-        // Add the point to the accumulator |res|.
-        // Note that the points in the pre-computed table are given with affine
-        // coordinates. The point addition function computes a sum of two points,
-        // either both given in projective, or one in projective and one in
-        // affine coordinates. The |mixed| flag indicates the latter option,
-        // in which case we set the third coordinate of the second point to one.
-        p384_point_add(res[0], res[1], res[2],
-                       res[0], res[1], res[2],
-                       1 /* mixed */,
-                       p384_g_pre_comp[0][idx][0], ftmp, p384_felem_one);
-      }
-    }
-  }
+  ec_nistp_scalar_mul_public(p384_methods(), res[0], res[1], res[2], g_scalar, tmp[0], tmp[1], tmp[2], p_scalar);
 
-  // Copy the result to the output.
   p384_to_generic(&r->X, res[0]);
   p384_to_generic(&r->Y, res[1]);
   p384_to_generic(&r->Z, res[2]);