Skip to content

Commit 51ae4b1

Browse files
authored
[EC] Use s2n-bignum's modular inversion for P-256/384/521 (#2057)
Use s2n-bignum's inversion modulo the field characteristic for curves P-256/384/521. This gives us the following performance improvements: ``` _____Apple M1____| before | after | speedup | P-256 ECDH | 22724 | 23419 | 1.03x | P-256 ECDSA sign | 60677 | 69731 | 1.15x | P-384 ECDH | 5863 | 6217 | 1.06x | P-384 ECDSA sign | 13232 | 15011 | 1.13x | P-521 ECDH | 4041 | 4163 | 1.03x | P-521 ECDSA sign | 7079 | 7584 | 1.07x | ______x86_64_____| before | after | speedup | P-256 ECDH | 19410 | 20408 | 1.05x | P-256 ECDSA sign | 54477 | 63617 | 1.17x | P-384 ECDH | 5309 | 5599 | 1.05x | P-384 ECDSA sign | 12087 | 13780 | 1.14x | P-521 ECDH | 3539 | 3677 | 1.04x | P-521 ECDSA sign | 6584 | 7068 | 1.07x | _______GV4_______| before | after | speedup | P-256 ECDH | 16642 | 17491 | 1.05x | P-256 ECDSA sign | 51527 | 61108 | 1.18x | P-384 ECDH | 4208 | 4453 | 1.06x | P-384 ECDSA sign | 9848 | 11308 | 1.15x | P-521 ECDH | 2668 | 2811 | 1.05x | P-521 ECDSA sign | 5092 | 5626 | 1.10x | ```
1 parent b090db7 commit 51ae4b1

File tree

9 files changed

+46
-12
lines changed

9 files changed

+46
-12
lines changed

crypto/fipsmodule/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR
207207

208208
p256/p256_montjscalarmul.S
209209
p256/p256_montjscalarmul_alt.S
210+
p256/bignum_montinv_p256.S
210211

211212
p384/bignum_add_p384.S
212213
p384/bignum_sub_p384.S
@@ -223,6 +224,7 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR
223224
p384/p384_montjdouble_alt.S
224225
p384/p384_montjscalarmul.S
225226
p384/p384_montjscalarmul_alt.S
227+
p384/bignum_montinv_p384.S
226228

227229
p521/bignum_add_p521.S
228230
p521/bignum_sub_p521.S
@@ -237,6 +239,7 @@ if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) OR
237239
p521/p521_jdouble_alt.S
238240
p521/p521_jscalarmul.S
239241
p521/p521_jscalarmul_alt.S
242+
p521/bignum_inv_p521.S
240243

241244
curve25519/bignum_mod_n25519.S
242245
curve25519/bignum_neg_p25519.S

crypto/fipsmodule/ec/p256-nistz.c

+6
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,11 @@ static BN_ULONG is_not_zero(BN_ULONG in) {
126126
// the Montgomery domain.
127127
static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS],
128128
const BN_ULONG in[P256_LIMBS]) {
129+
#if defined(EC_NISTP_USE_S2N_BIGNUM)
130+
ec_nistp_felem_limb in_sqr[P256_LIMBS];
131+
ecp_nistz256_sqr_mont(in_sqr, in);
132+
bignum_montinv_p256(r, in_sqr);
133+
#else
129134
// This implements the addition chain described in
130135
// https://briansmith.org/ecc-inversion-addition-chains-01#p256_field_inversion
131136
BN_ULONG x2[P256_LIMBS], x3[P256_LIMBS], x6[P256_LIMBS], x12[P256_LIMBS],
@@ -188,6 +193,7 @@ static void ecp_nistz256_mod_inverse_sqr_mont(BN_ULONG r[P256_LIMBS],
188193

189194
ecp_nistz256_sqr_mont(ret, ret);
190195
ecp_nistz256_sqr_mont(r, ret); // 2^256 - 2^224 + 2^192 + 2^96 - 2^2
196+
#endif
191197
}
192198

193199
// r = p * p_scalar

crypto/fipsmodule/ec/p384.c

+6
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,11 @@ static void p384_from_scalar(p384_felem out, const EC_SCALAR *in) {
131131
// ffffffff 00000000 00000000 fffffffc
132132
static void p384_inv_square(p384_felem out,
133133
const p384_felem in) {
134+
#if defined(EC_NISTP_USE_S2N_BIGNUM)
135+
ec_nistp_felem_limb in_sqr[P384_NLIMBS];
136+
p384_felem_sqr(in_sqr, in);
137+
bignum_montinv_p384(out, in_sqr);
138+
#else
134139
// This implements the addition chain described in
135140
// https://briansmith.org/ecc-inversion-addition-chains-01#p384_field_inversion
136141
// The side comments show the value of the exponent:
@@ -222,6 +227,7 @@ static void p384_inv_square(p384_felem out,
222227

223228
p384_felem_sqr(ret, ret);
224229
p384_felem_sqr(out, ret); // 2^384 - 2^128 - 2^96 + 2^32 - 2^2 = p - 3
230+
#endif
225231
}
226232

227233
static void p384_point_double(p384_felem x_out,

crypto/fipsmodule/ec/p521.c

+4
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ static void p521_to_generic(EC_FELEM *out, const p521_felem in) {
183183
// The code is autogenerated by the ECCKiila project:
184184
// https://arxiv.org/abs/2007.11481
185185
static void p521_felem_inv(p521_felem output, const p521_felem t1) {
186+
#if defined(EC_NISTP_USE_S2N_BIGNUM)
187+
bignum_inv_p521(output, t1);
188+
#else
186189
/* temporary variables */
187190
p521_felem acc, t2, t4, t8, t16, t32, t64;
188191
p521_felem t128, t256, t512, t516, t518, t519;
@@ -240,6 +243,7 @@ static void p521_felem_inv(p521_felem output, const p521_felem t1) {
240243
p521_felem_sqr(acc, t519);
241244
p521_felem_sqr(acc, acc);
242245
p521_felem_mul(output, acc, t1);
246+
#endif
243247
}
244248

245249
static void p521_point_double(p521_felem x_out,

third_party/s2n-bignum/arm/curve25519/bignum_mod_n25519.S

+2-2
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ S2N_BN_SYMBOL(bignum_mod_n25519):
110110

111111
cbz k, writeback
112112

113-
loop:
113+
bignum_mod_n25519_loop:
114114

115115
// Assume that the new 5-digit x is 2^64 * previous_x + next_digit.
116116
// Get the quotient estimate q = max (floor(x/2^252)) (2^64 - 1)
@@ -154,7 +154,7 @@ loop:
154154
adcs m2, t2, xzr
155155
adc m3, t3, m3
156156

157-
cbnz k, loop
157+
cbnz k, bignum_mod_n25519_loop
158158

159159
// Finally write back [m3;m2;m1;m0] and return
160160

third_party/s2n-bignum/arm/p521/bignum_inv_p521.S

+4-4
Original file line numberDiff line numberDiff line change
@@ -789,9 +789,9 @@ S2N_BN_SYMBOL(bignum_inv_p521):
789789

790790
mov i, #21
791791
mov d, #1
792-
b midloop
792+
b bignum_inv_p521_midloop
793793

794-
loop:
794+
bignum_inv_p521_loop:
795795

796796
// Separate the matrix elements into sign-magnitude pairs
797797

@@ -1424,7 +1424,7 @@ loop:
14241424
adc x2, x2, x6
14251425
str x2, [v+64]
14261426

1427-
midloop:
1427+
bignum_inv_p521_midloop:
14281428

14291429
mov x1, d
14301430
ldr x2, [f]
@@ -1435,7 +1435,7 @@ midloop:
14351435
// Next iteration
14361436

14371437
subs i, i, #1
1438-
bne loop
1438+
bne bignum_inv_p521_loop
14391439

14401440
// The 21st and last iteration does not need anything except the
14411441
// u value and the sign of f; the latter can be obtained from the

third_party/s2n-bignum/include/s2n-bignum_aws-lc.h

+15
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ static inline void p256_montjscalarmul_selector(uint64_t res[S2N_BIGNUM_STATIC 1
6363
else { p256_montjscalarmul(res, scalar, point); }
6464
}
6565

66+
// Montgomery inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1
67+
// z = x^-1 mod p_256.
68+
// The function is constant-time.
69+
extern void bignum_montinv_p256(uint64_t z[S2N_BIGNUM_STATIC 4], const uint64_t x[S2N_BIGNUM_STATIC 4]);
70+
6671
// Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced
6772
// Inputs x[6], y[6]; output z[6]
6873
extern void bignum_add_p384(uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6], const uint64_t y[S2N_BIGNUM_STATIC 6]);
@@ -124,6 +129,11 @@ static inline void p384_montjscalarmul_selector(uint64_t res[S2N_BIGNUM_STATIC 1
124129
else { p384_montjscalarmul(res, scalar, point); }
125130
}
126131

132+
// Montgomery inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1
133+
// z = x^-1 mod p_384.
134+
// The function is constant-time.
135+
extern void bignum_montinv_p384(uint64_t z[S2N_BIGNUM_STATIC 6], const uint64_t x[S2N_BIGNUM_STATIC 6]);
136+
127137
// Convert 6-digit (384-bit) bignum from little-endian form
128138
// Input x[6]; output z[6]
129139
extern void bignum_fromlebytes_6(uint64_t z[S2N_BIGNUM_STATIC 6], const uint8_t x[S2N_BIGNUM_STATIC 48]);
@@ -185,6 +195,11 @@ static inline void p521_jscalarmul_selector(uint64_t res[S2N_BIGNUM_STATIC 27],
185195
else { p521_jscalarmul(res, scalar, point); }
186196
}
187197

198+
// Modular inverse modulo p_521 = 2^521 - 1
199+
// z = x^-1 mod p_521.
200+
// The function is constant-time.
201+
extern void bignum_inv_p521(uint64_t z[S2N_BIGNUM_STATIC 9], const uint64_t x[S2N_BIGNUM_STATIC 9]);
202+
188203
// curve25519_x25519_byte and curve25519_x25519_byte_alt computes the x25519
189204
// function specified in https://www.rfc-editor.org/rfc/rfc7748. |scalar| is the
190205
// scalar, |point| is the u-coordinate of the elliptic curve

third_party/s2n-bignum/x86_att/curve25519/bignum_mod_n25519.S

+2-2
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ S2N_BN_SYMBOL(bignum_mod_n25519):
121121
testq k, k
122122
jz writeback
123123

124-
loop:
124+
bignum_mod_n25519_loop:
125125

126126
// Assume that the new 5-digit x is 2^64 * previous_x + next_digit.
127127
// Get the quotient estimate q = max (floor(x/2^252)) (2^64 - 1)
@@ -183,7 +183,7 @@ loop:
183183
movq d, m0
184184

185185
decq k
186-
jnz loop
186+
jnz bignum_mod_n25519_loop
187187

188188
// Write back
189189

third_party/s2n-bignum/x86_att/p521/bignum_inv_p521.S

+4-4
Original file line numberDiff line numberDiff line change
@@ -1095,9 +1095,9 @@ S2N_BN_SYMBOL(bignum_inv_p521):
10951095

10961096
movq $21, i
10971097
movq $1, d
1098-
jmp midloop
1098+
jmp bignum_inv_p521_midloop
10991099

1100-
loop:
1100+
bignum_inv_p521_loop:
11011101

11021102
// Separate out the matrix into sign-magnitude pairs
11031103

@@ -1775,15 +1775,15 @@ loop:
17751775
adcq %rax, %rbp
17761776
movq %rbp, V+8*N(%rsp)
17771777

1778-
midloop:
1778+
bignum_inv_p521_midloop:
17791779

17801780
divstep59(d,ff,gg)
17811781
movq %rsi, d
17821782

17831783
// Next iteration
17841784

17851785
decq i
1786-
jnz loop
1786+
jnz bignum_inv_p521_loop
17871787

17881788
// The 21st and last iteration does not need anything except the
17891789
// u value and the sign of f; the latter can be obtained from the

0 commit comments

Comments
 (0)