diff --git a/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519.S b/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519.S index 8b884d2480..0171271872 100644 --- a/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519.S +++ b/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519.S @@ -1,5 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 OR ISC +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 // ---------------------------------------------------------------------------- // Multiply-add modulo the order of the curve25519/edwards25519 basepoint @@ -280,5 +280,5 @@ S2N_BN_SYMBOL(bignum_madd_n25519): ret #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stacz,"",%progbits +.section .note.GNU-stack,"",%progbits #endif diff --git a/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519_alt.S b/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519_alt.S index 0745386eb8..d1cdfb2c3b 100644 --- a/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519_alt.S +++ b/third_party/s2n-bignum/arm/curve25519/bignum_madd_n25519_alt.S @@ -1,5 +1,5 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 OR ISC +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 // ---------------------------------------------------------------------------- // Multiply-add modulo the order of the curve25519/edwards25519 basepoint @@ -206,5 +206,5 @@ S2N_BN_SYMBOL(bignum_madd_n25519_alt): ret #if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stacz,"",%progbits +.section .note.GNU-stack,"",%progbits #endif diff --git a/third_party/s2n-bignum/arm/p256/bignum_montinv_p256.S b/third_party/s2n-bignum/arm/p256/bignum_montinv_p256.S new file mode 100644 index 0000000000..1a5a7a0ffc --- /dev/null +++ b/third_party/s2n-bignum/arm/p256/bignum_montinv_p256.S @@ -0,0 +1,1303 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1 +// Input x[4]; output z[4] +// +// extern void bignum_montinv_p256(uint64_t z[static 4],uint64_t x[static 4]); +// +// If the 4-digit input x is coprime to p_256, i.e. is not divisible +// by it, returns z < p_256 such that x * z == 2^512 (mod p_256). This +// is effectively "Montgomery inverse" because if we consider x and z as +// Montgomery forms of X and Z, i.e. x == 2^256 * X and z == 2^256 * Z +// (both mod p_256) then X * Z == 1 (mod p_256). That is, this function +// gives the analog of the modular inverse bignum_inv_p256 but with both +// input and output in the Montgomery domain. Note that x does not need +// to be reduced modulo p_256, but the output always is. If the input +// is divisible (i.e. is 0 or p_256), then there can be no solution to +// the congruence x * z == 2^512 (mod p_256), and z = 0 is returned. +// +// Standard ARM ABI: X0 = z, X1 = x +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montinv_p256) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montinv_p256) + + .text + .balign 4 + +// Size in bytes of a 64-bit word + +#define N 8 + +// Used for the return pointer + +#define res x20 + +// Loop counter and d = 2 * delta value for divstep + +#define i x21 +#define d x22 + +// Registers used for matrix element magnitudes and signs + +#define m00 x10 +#define m01 x11 +#define m10 x12 +#define m11 x13 +#define s00 x14 +#define s01 x15 +#define s10 x16 +#define s11 x17 + +// Initial carries for combinations + +#define car0 x9 +#define car1 x19 + +// Input and output, plain registers treated according to pattern + +#define reg0 x0, #0 +#define reg1 x1, #0 +#define reg2 x2, #0 +#define reg3 x3, #0 +#define reg4 x4, #0 + +#define x x1, #0 +#define z x0, #0 + +// Pointer-offset pairs for temporaries on stack + +#define f sp, #0 +#define g sp, #(6*N) +#define u sp, #(12*N) +#define v sp, #(16*N) + +// Total size to reserve on the stack + +#define NSPACE #(20*N) + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +// --------------------------------------------------------------------------- +// Core signed almost-Montgomery reduction macro. Takes input in +// [d4;d3;d2;d1;d0] and returns result in [d4;d3;d2;d1], adding to +// the existing [d4;d3;d2;d1], and re-using d0 as a temporary internally +// as well as t0, t1, t2. This is almost-Montgomery, i.e. the result fits +// in 4 digits but is not necessarily strictly reduced mod p_256. +// --------------------------------------------------------------------------- + +#define amontred(d4,d3,d2,d1,d0, t2,t1,t0) \ +/* We only know the input is -2^316 < x < 2^316. To do traditional */ \ +/* unsigned Montgomery reduction, start by adding 2^61 * p_256. */ \ + mov t0, #0xe000000000000000; \ + adds d0, d0, t0; \ + sbcs d1, d1, xzr; \ + mov t1, #0x000000001fffffff; \ + adcs d2, d2, t1; \ + mov t2, #0x2000000000000000; \ + adcs d3, d3, t2; \ + mov t0, #0x1fffffffe0000000; \ + adc d4, d4, t0; \ +/* Let w = d0, the original word we use as offset; d0 gets recycled */ \ +/* First let [t2;t1] = 2^32 * w */ \ +/* then let [d0;t0] = (2^64 - 2^32 + 1) * w (overwrite old d0) */ \ + lsl t1, d0, #32; \ + subs t0, d0, t1; \ + lsr t2, d0, #32; \ + sbc d0, d0, t2; \ +/* Hence basic [d4;d3;d2;d1] += (2^256 - 2^224 + 2^192 + 2^96) * w */ \ + adds d1, d1, t1; \ + adcs d2, d2, t2; \ + adcs d3, d3, t0; \ + adcs d4, d4, d0; \ +/* Now capture top carry and subtract p_256 if set (almost-Montgomery) */ \ + mov t0, #0xffffffffffffffff; \ + mov t1, #0x00000000ffffffff; \ + mov t2, #0xffffffff00000001; \ + csel t0, t0, xzr, cs; \ + csel t1, t1, xzr, cs; \ + csel t2, t2, xzr, cs; \ + subs d1, d1, t0; \ + sbcs d2, d2, t1; \ + sbcs d3, d3, xzr; \ + sbc d4, d4, t2 + +// Very similar to a subroutine call to the s2n-bignum word_divstep59. +// But different in register usage and returning the final matrix in +// registers as follows +// +// [ m00 m01] +// [ m10 m11] + +#define divstep59() \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x8, x4, #0x100, lsl #12; \ + sbfx x8, x8, #21, #21; \ + mov x11, #0x100000; \ + add x11, x11, x11, lsl #21; \ + add x9, x4, x11; \ + asr x9, x9, #42; \ + add x10, x5, #0x100, lsl #12; \ + sbfx x10, x10, #21, #21; \ + add x11, x5, x11; \ + asr x11, x11, #42; \ + mul x6, x8, x2; \ + mul x7, x9, x3; \ + mul x2, x10, x2; \ + mul x3, x11, x3; \ + add x4, x6, x7; \ + add x5, x2, x3; \ + asr x2, x4, #20; \ + asr x3, x5, #20; \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x12, x4, #0x100, lsl #12; \ + sbfx x12, x12, #21, #21; \ + mov x15, #0x100000; \ + add x15, x15, x15, lsl #21; \ + add x13, x4, x15; \ + asr x13, x13, #42; \ + add x14, x5, #0x100, lsl #12; \ + sbfx x14, x14, #21, #21; \ + add x15, x5, x15; \ + asr x15, x15, #42; \ + mul x6, x12, x2; \ + mul x7, x13, x3; \ + mul x2, x14, x2; \ + mul x3, x15, x3; \ + add x4, x6, x7; \ + add x5, x2, x3; \ + asr x2, x4, #20; \ + asr x3, x5, #20; \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + mul x2, x12, x8; \ + mul x3, x12, x9; \ + mul x6, x14, x8; \ + mul x7, x14, x9; \ + madd x8, x13, x10, x2; \ + madd x9, x13, x11, x3; \ + madd x16, x15, x10, x6; \ + madd x17, x15, x11, x7; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x12, x4, #0x100, lsl #12; \ + sbfx x12, x12, #22, #21; \ + mov x15, #0x100000; \ + add x15, x15, x15, lsl #21; \ + add x13, x4, x15; \ + asr x13, x13, #43; \ + add x14, x5, #0x100, lsl #12; \ + sbfx x14, x14, #22, #21; \ + add x15, x5, x15; \ + asr x15, x15, #43; \ + mneg x2, x12, x8; \ + mneg x3, x12, x9; \ + mneg x4, x14, x8; \ + mneg x5, x14, x9; \ + msub m00, x13, x16, x2; \ + msub m01, x13, x17, x3; \ + msub m10, x15, x16, x4; \ + msub m11, x15, x17, x5 + +S2N_BN_SYMBOL(bignum_montinv_p256): + +// Save registers and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, NSPACE + +// Save the return pointer for the end so we can overwrite x0 later + + mov res, x0 + +// Copy the prime and input into the main f and g variables respectively. +// Make sure x is reduced so that g <= f as assumed in the bound proof. + + mov x10, #0xffffffffffffffff + mov x11, #0x00000000ffffffff + mov x13, #0xffffffff00000001 + stp x10, x11, [f] + stp xzr, x13, [f+2*N] + str xzr, [f+4*N] + + ldp x2, x3, [x1] + subs x10, x2, x10 + sbcs x11, x3, x11 + ldp x4, x5, [x1, #(2*N)] + sbcs x12, x4, xzr + sbcs x13, x5, x13 + + csel x2, x2, x10, cc + csel x3, x3, x11, cc + csel x4, x4, x12, cc + csel x5, x5, x13, cc + + stp x2, x3, [g] + stp x4, x5, [g+2*N] + str xzr, [g+4*N] + +// Also maintain reduced < 2^256 vector [u,v] such that +// [f,g] == x * 2^{5*i-562} * [u,v] (mod p_256) +// starting with [p_256,x] == x * 2^{5*0-562} * [0,2^562] (mod p_256) +// The weird-looking 5*i modifications come in because we are doing +// 64-bit word-sized Montgomery reductions at each stage, which is +// 5 bits more than the 59-bit requirement to keep things stable. +// After the 10th and last iteration and sign adjustment, when +// f == 1 for in-scope cases, we have x * 2^{50-562} * u == 1, i.e. +// x * u == 2^512 as required. + + stp xzr, xzr, [u] + stp xzr, xzr, [u+2*N] + +// The starting constant 2^562 mod p_256 is +// 0x000bffffffebffff:fffbffffffefffff:ffe8000000000000:000c000000140000 +// where colons separate 64-bit subwords, least significant at the right. +// Only word number 1, value 0xffe8000000000000, is a single ARM move. + + mov x10, #0x0000000000140000 + orr x10, x10, #0x000c000000000000 + + mov x11, #0xffe8000000000000 + + movbig(x13, #0x000b, #0xffff, #0xffef, #0xffff) + orr x12, x13, #0xfff0000000000000 + and x13, x13, #0xfffffffffffbffff + + stp x10, x11, [v] + stp x12, x13, [v+2*N] + +// Start of main loop. We jump into the middle so that the divstep +// portion is common to the special tenth iteration after a uniform +// first 9. + + mov i, #10 + mov d, #1 + b bignum_montinv_p256_midloop + +bignum_montinv_p256_loop: + +// Separate the matrix elements into sign-magnitude pairs + + cmp m00, xzr + csetm s00, mi + cneg m00, m00, mi + + cmp m01, xzr + csetm s01, mi + cneg m01, m01, mi + + cmp m10, xzr + csetm s10, mi + cneg m10, m10, mi + + cmp m11, xzr + csetm s11, mi + cneg m11, m11, mi + +// Adjust the initial values to allow for complement instead of negation +// This initial offset is the same for [f,g] and [u,v] compositions. +// Save it in stable registers for the [u,v] part and do [f,g] first. + + and x0, m00, s00 + and x1, m01, s01 + add car0, x0, x1 + + and x0, m10, s10 + and x1, m11, s11 + add car1, x0, x1 + +// Now the computation of the updated f and g values. This maintains a +// 2-word carry between stages so we can conveniently insert the shift +// right by 59 before storing back, and not overwrite digits we need +// again of the old f and g values. +// +// Digit 0 of [f,g] + + ldr x7, [f] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [g] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + adc x2, x2, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, car1, x0 + adc x3, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + adc x3, x3, x1 + +// Digit 1 of [f,g] + + ldr x7, [f+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [g+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [f] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [g] + +// Digit 2 of [f,g] + + ldr x7, [f+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [g+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [f+N] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [g+N] + +// Digits 3 and 4 of [f,g] + + ldr x7, [f+3*N] + eor x1, x7, s00 + ldr x23, [f+4*N] + eor x3, x23, s00 + and x3, x3, m00 + neg x3, x3 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [g+3*N] + eor x1, x8, s01 + ldr x24, [g+4*N] + eor x0, x24, s01 + and x0, x0, m01 + sub x3, x3, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [f+2*N] + extr x5, x3, x5, #59 + str x5, [f+3*N] + asr x3, x3, #59 + str x3, [f+4*N] + + eor x1, x7, s10 + eor x5, x23, s10 + and x5, x5, m10 + neg x5, x5 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, s11 + eor x0, x24, s11 + and x0, x0, m11 + sub x5, x5, x0 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x2, x2, x0 + adc x5, x5, x1 + extr x4, x2, x4, #59 + str x4, [g+2*N] + extr x2, x5, x2, #59 + str x2, [g+3*N] + asr x5, x5, #59 + str x5, [g+4*N] + +// Now the computation of the updated u and v values and their +// Montgomery reductions. A very similar accumulation except that +// the top words of u and v are unsigned and we don't shift. +// +// Digit 0 of [u,v] + + ldr x7, [u] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [v] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u] + adc x2, x2, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, car1, x0 + adc x3, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + str x5, [v] + adc x3, x3, x1 + +// Digit 1 of [u,v] + + ldr x7, [u+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [v+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + str x2, [u+N] + adc x6, x6, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x3, x3, x0 + str x3, [v+N] + adc x4, x4, x1 + +// Digit 2 of [u,v] + + ldr x7, [u+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [v+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + str x6, [u+2*N] + adc x5, x5, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x4, x4, x0 + str x4, [v+2*N] + adc x2, x2, x1 + +// Digits 3 and 4 of u (top is unsigned) + + ldr x7, [u+3*N] + eor x1, x7, s00 + and x3, s00, m00 + neg x3, x3 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [v+3*N] + eor x1, x8, s01 + and x0, s01, m01 + sub x3, x3, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + adc x3, x3, x1 + +// Montgomery reduction of u + + ldp x0, x1, [u] + ldr x6, [u+2*N] + amontred(x3,x5,x6,x1,x0, x10,x11,x14) + stp x1, x6, [u] + stp x5, x3, [u+16] + +// Digits 3 and 4 of v (top is unsigned) + + eor x1, x7, s10 + and x5, s10, m10 + neg x5, x5 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x2, x2, x0 + adc x5, x5, x1 + eor x1, x8, s11 + and x0, s11, m11 + sub x5, x5, x0 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x2, x2, x0 + adc x5, x5, x1 + +// Montgomery reduction of v + + ldp x0, x1, [v] + ldr x3, [v+2*N] + amontred(x5,x2,x3,x1,x0, x10,x11,x14) + stp x1, x3, [v] + stp x2, x5, [v+16] + +bignum_montinv_p256_midloop: + + mov x1, d + ldr x2, [f] + ldr x3, [g] + divstep59() + mov d, x1 + +// Next iteration + + subs i, i, #1 + bne bignum_montinv_p256_loop + +// The 10th and last iteration does not need anything except the +// u value and the sign of f; the latter can be obtained from the +// lowest word of f. So it's done differently from the main loop. +// Find the sign of the new f. For this we just need one digit +// since we know (for in-scope cases) that f is either +1 or -1. +// We don't explicitly shift right by 59 either, but looking at +// bit 63 (or any bit >= 60) of the unshifted result is enough +// to distinguish -1 from +1; this is then made into a mask. + + ldr x0, [f] + ldr x1, [g] + mul x0, x0, m00 + madd x1, x1, m01, x0 + asr x0, x1, #63 + +// Now separate out the matrix into sign-magnitude pairs +// and adjust each one based on the sign of f. +// +// Note that at this point we expect |f|=1 and we got its +// sign above, so then since [f,0] == x * 2^{-512} [u,v] (mod p_256) +// we want to flip the sign of u according to that of f. + + cmp m00, xzr + csetm s00, mi + cneg m00, m00, mi + eor s00, s00, x0 + + cmp m01, xzr + csetm s01, mi + cneg m01, m01, mi + eor s01, s01, x0 + + cmp m10, xzr + csetm s10, mi + cneg m10, m10, mi + eor s10, s10, x0 + + cmp m11, xzr + csetm s11, mi + cneg m11, m11, mi + eor s11, s11, x0 + +// Adjust the initial value to allow for complement instead of negation + + and x0, m00, s00 + and x1, m01, s01 + add car0, x0, x1 + +// Digit 0 of [u] + + ldr x7, [u] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [v] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u] + adc x2, x2, x1 + +// Digit 1 of [u] + + ldr x7, [u+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [v+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + str x2, [u+N] + adc x6, x6, x1 + +// Digit 2 of [u] + + ldr x7, [u+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [v+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + str x6, [u+2*N] + adc x5, x5, x1 + +// Digits 3 and 4 of u (top is unsigned) + + ldr x7, [u+3*N] + eor x1, x7, s00 + and x3, s00, m00 + neg x3, x3 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, x3, x1 + ldr x8, [v+3*N] + eor x1, x8, s01 + and x0, s01, m01 + sub x3, x3, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + adc x3, x3, x1 + +// Montgomery reduction of u. This needs to be strict not "almost" +// so it is followed by an optional subtraction of p_256 + + ldp x0, x1, [u] + ldr x2, [u+2*N] + amontred(x3,x5,x2,x1,x0, x10,x11,x14) + + mov x10, #0xffffffffffffffff + subs x10, x1, x10 + mov x11, #0x00000000ffffffff + sbcs x11, x2, x11 + mov x13, #0xffffffff00000001 + sbcs x12, x5, xzr + sbcs x13, x3, x13 + + csel x10, x1, x10, cc + csel x11, x2, x11, cc + csel x12, x5, x12, cc + csel x13, x3, x13, cc + +// Store it back to the final output + + stp x10, x11, [res] + stp x12, x13, [res, #16] + +// Restore stack and registers + + add sp, sp, NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/arm/p256/p256_montjscalarmul.S b/third_party/s2n-bignum/arm/p256/p256_montjscalarmul.S new file mode 100644 index 0000000000..246421ff37 --- /dev/null +++ b/third_party/s2n-bignum/arm/p256/p256_montjscalarmul.S @@ -0,0 +1,5017 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery-Jacobian form scalar multiplication for P-256 +// Input scalar[4], point[12]; output res[12] +// +// extern void p256_montjscalarmul +// (uint64_t res[static 12], +// uint64_t scalar[static 4], +// uint64_t point[static 12]); +// +// This function is a variant of its affine point version p256_scalarmul. +// Here, input and output points are assumed to be in Jacobian form with +// their coordinates in the Montgomery domain. Thus, if priming indicates +// Montgomery form, x' = (2^256 * x) mod p_256 etc., each point argument +// is a triple (x',y',z') representing the affine point (x/z^2,y/z^3) when +// z' is nonzero or the point at infinity (group identity) if z' = 0. +// +// Given scalar = n and point = P, assumed to be on the NIST elliptic +// curve P-256, returns a representation of n * P. If the result is the +// point at infinity (either because the input point was or because the +// scalar was a multiple of p_256) then the output is guaranteed to +// represent the point at infinity, i.e. to have its z coordinate zero. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p256_montjscalarmul) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p256_montjscalarmul) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Safe copies of inputs (res lasts the whole code, point not so long) +// and additional values in variables, with some aliasing + +#define res x19 +#define sgn x20 +#define j x20 +#define point x21 + +// Intermediate variables on the stack. + +#define scalarb sp, #(0*NUMSIZE) +#define acc sp, #(1*NUMSIZE) +#define tabent sp, #(4*NUMSIZE) + +#define tab sp, #(7*NUMSIZE) + +#define NSPACE #(31*NUMSIZE) + +// Avoid using .rep for the sake of the BoringSSL/AWS-LC delocator, +// which doesn't accept repetitions, assembler macros etc. + +#define selectblock(I) \ + cmp x14, #(1*I); \ + ldp x12, x13, [x15]; \ + csel x0, x12, x0, eq; \ + csel x1, x13, x1, eq; \ + ldp x12, x13, [x15, #16]; \ + csel x2, x12, x2, eq; \ + csel x3, x13, x3, eq; \ + ldp x12, x13, [x15, #32]; \ + csel x4, x12, x4, eq; \ + csel x5, x13, x5, eq; \ + ldp x12, x13, [x15, #48]; \ + csel x6, x12, x6, eq; \ + csel x7, x13, x7, eq; \ + ldp x12, x13, [x15, #64]; \ + csel x8, x12, x8, eq; \ + csel x9, x13, x9, eq; \ + ldp x12, x13, [x15, #80]; \ + csel x10, x12, x10, eq; \ + csel x11, x13, x11, eq; \ + add x15, x15, #96 + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +S2N_BN_SYMBOL(p256_montjscalarmul): + + stp x19, x20, [sp, #-16]! + stp x21, x30, [sp, #-16]! + sub sp, sp, NSPACE + +// Preserve the "res" and "point" input arguments. We load and process the +// scalar immediately so we don't bother preserving that input argument. +// Also, "point" is only needed early on and so its register gets re-used. + + mov res, x0 + mov point, x2 + +// Load the digits of group order n_256 = [x12;x13;x14;x15] + + movbig(x12, #0xf3b9, #0xcac2, #0xfc63, #0x2551) + movbig(x13, #0xbce6, #0xfaad, #0xa717, #0x9e84) + mov x14, #0xffffffffffffffff + mov x15, #0xffffffff00000000 + +// First, reduce the input scalar mod n_256, i.e. conditionally subtract n_256 + + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + + subs x6, x2, x12 + sbcs x7, x3, x13 + sbcs x8, x4, x14 + sbcs x9, x5, x15 + + csel x2, x2, x6, cc + csel x3, x3, x7, cc + csel x4, x4, x8, cc + csel x5, x5, x9, cc + +// Now if the top bit of the reduced scalar is set, negate it mod n_256, +// i.e. do n |-> n_256 - n. Remember the sign as "sgn" so we can +// correspondingly negate the point below. + + subs x6, x12, x2 + sbcs x7, x13, x3 + sbcs x8, x14, x4 + sbc x9, x15, x5 + + tst x5, #0x8000000000000000 + csel x2, x2, x6, eq + csel x3, x3, x7, eq + csel x4, x4, x8, eq + csel x5, x5, x9, eq + cset sgn, ne + +// In either case then add the recoding constant 0x08888...888 to allow +// signed digits. + + mov x6, 0x8888888888888888 + adds x2, x2, x6 + adcs x3, x3, x6 + bic x7, x6, #0xF000000000000000 + adcs x4, x4, x6 + adc x5, x5, x7 + + stp x2, x3, [scalarb] + stp x4, x5, [scalarb+16] + +// Set the tab[0] table entry to the input point = 1 * P, except +// that we negate it if the top bit of the scalar was set. This +// negation takes care over the y = 0 case to maintain all the +// coordinates < p_256 throughout, even though triples (x,y,z) +// with y = 0 can only represent a point on the curve when z = 0 +// and it represents the point at infinity regardless of x and y. + + ldp x0, x1, [point] + stp x0, x1, [tab] + ldp x2, x3, [point, #16] + stp x2, x3, [tab+16] + + ldp x4, x5, [point, #32] + ldp x6, x7, [point, #48] + + mov x0, 0xffffffffffffffff + subs x0, x0, x4 + mov x1, 0x00000000ffffffff + sbcs x1, x1, x5 + mov x3, 0xffffffff00000001 + sbcs x2, xzr, x6 + sbc x3, x3, x7 + + orr x8, x4, x5 + orr x9, x6, x7 + orr x8, x8, x9 + cmp x8, xzr + ccmp sgn, xzr, #4, ne + csel x4, x0, x4, ne + csel x5, x1, x5, ne + csel x6, x2, x6, ne + csel x7, x3, x7, ne + + stp x4, x5, [tab+32] + stp x6, x7, [tab+48] + + ldp x0, x1, [point, #64] + stp x0, x1, [tab+64] + ldp x2, x3, [point, #80] + stp x2, x3, [tab+80] + +// Compute and record tab[1] = 2 * p, ..., tab[7] = 8 * P + + add x0, tab+96*1 + add x1, tab + bl p256_montjscalarmul_p256_montjdouble + + add x0, tab+96*2 + add x1, tab+96*1 + add x2, tab + bl p256_montjscalarmul_p256_montjadd + + add x0, tab+96*3 + add x1, tab+96*1 + bl p256_montjscalarmul_p256_montjdouble + + add x0, tab+96*4 + add x1, tab+96*3 + add x2, tab + bl p256_montjscalarmul_p256_montjadd + + add x0, tab+96*5 + add x1, tab+96*2 + bl p256_montjscalarmul_p256_montjdouble + + add x0, tab+96*6 + add x1, tab+96*5 + add x2, tab + bl p256_montjscalarmul_p256_montjadd + + add x0, tab+96*7 + add x1, tab+96*3 + bl p256_montjscalarmul_p256_montjdouble + +// Initialize the accumulator as a table entry for top 4 bits (unrecoded) + + ldr x14, [scalarb+24] + lsr x14, x14, #60 + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + add x15, tab + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + + stp x0, x1, [acc] + stp x2, x3, [acc+16] + stp x4, x5, [acc+32] + stp x6, x7, [acc+48] + stp x8, x9, [acc+64] + stp x10, x11, [acc+80] + + mov j, #252 + +// Main loop over size-4 bitfields: double 4 times then add signed digit + +p256_montjscalarmul_mainloop: + sub j, j, #4 + + add x0, acc + add x1, acc + bl p256_montjscalarmul_p256_montjdouble + + add x0, acc + add x1, acc + bl p256_montjscalarmul_p256_montjdouble + + add x0, acc + add x1, acc + bl p256_montjscalarmul_p256_montjdouble + + add x0, acc + add x1, acc + bl p256_montjscalarmul_p256_montjdouble + + lsr x2, j, #6 + ldr x14, [sp, x2, lsl #3] // Exploits scalarb = sp exactly + lsr x14, x14, j + and x14, x14, #15 + + subs x14, x14, #8 + cset x16, lo // x16 = sign of digit (1 = negative) + cneg x14, x14, lo // x14 = absolute value of digit + +// Conditionally select the table entry tab[i-1] = i * P in constant time + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + add x15, tab + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + +// Store it to "tabent" with the y coordinate optionally negated +// Again, do it carefully to give coordinates < p_256 even in +// the degenerate case y = 0 (when z = 0 for points on the curve). + + stp x0, x1, [tabent] + stp x2, x3, [tabent+16] + + mov x0, 0xffffffffffffffff + subs x0, x0, x4 + mov x1, 0x00000000ffffffff + sbcs x1, x1, x5 + mov x3, 0xffffffff00000001 + sbcs x2, xzr, x6 + sbc x3, x3, x7 + + orr x12, x4, x5 + orr x13, x6, x7 + orr x12, x12, x13 + cmp x12, xzr + ccmp x16, xzr, #4, ne + csel x4, x0, x4, ne + csel x5, x1, x5, ne + csel x6, x2, x6, ne + csel x7, x3, x7, ne + + stp x4, x5, [tabent+32] + stp x6, x7, [tabent+48] + stp x8, x9, [tabent+64] + stp x10, x11, [tabent+80] + + add x0, acc + add x1, acc + add x2, tabent + bl p256_montjscalarmul_p256_montjadd + + cbnz j, p256_montjscalarmul_mainloop + +// That's the end of the main loop, and we just need to copy the +// result in "acc" to the output. + + ldp x0, x1, [acc] + stp x0, x1, [res] + ldp x0, x1, [acc+16] + stp x0, x1, [res, #16] + ldp x0, x1, [acc+32] + stp x0, x1, [res, #32] + ldp x0, x1, [acc+48] + stp x0, x1, [res, #48] + ldp x0, x1, [acc+64] + stp x0, x1, [res, #64] + ldp x0, x1, [acc+80] + stp x0, x1, [res, #80] + +// Restore stack and registers and return + + add sp, sp, NSPACE + ldp x21, x30, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +// Local copies of subroutines, complete clones at the moment + +p256_montjscalarmul_p256_montjadd: + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, x30, [sp, #-16]! + sub sp, sp, #0xe0 + mov x21, x0 + mov x22, x1 + mov x23, x2 + mov x0, sp + ldr q19, [x22, #64] + ldp x9, x13, [x22, #64] + ldr q23, [x22, #80] + ldr q0, [x22, #64] + ldp x1, x10, [x22, #80] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x9, x13 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x9, x13 + umulh x15, x9, x1 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x9, x13 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x10, x1 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x13, x10 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x1, x10 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x1, x10 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x10, x10 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x10, x10 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x1, x1 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x1, x1 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x19, x3, x16, cs + csel x14, x8, x14, cs + csel x12, x11, x12, cs + csel x20, x5, x2, cs + stp x14, x12, [x0, #16] + stp x19, x20, [x0] + ldr q19, [x23, #64] + ldp x9, x13, [x23, #64] + ldr q23, [x23, #80] + ldr q0, [x23, #64] + ldp x1, x10, [x23, #80] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x9, x13 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x9, x13 + umulh x15, x9, x1 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x9, x13 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x10, x1 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x13, x10 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x1, x10 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x1, x10 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x10, x10 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x10, x10 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x1, x1 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x1, x1 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x16, x3, x16, cs + csel x14, x8, x14, cs + csel x12, x11, x12, cs + csel x2, x5, x2, cs + stp x14, x12, [sp, #176] + stp x16, x2, [sp, #160] + ldr q20, [x22, #32] + ldp x7, x17, [x23, #64] + ldr q0, [x23, #64] + ldp x6, x10, [x22, #32] + ldp x11, x15, [x23, #80] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [x22, #48] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [x23, #80] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [x22, #48] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x24, x3, x13 + adcs x25, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x24 + adcs x15, x16, x25 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x11, x11, x13 + and x1, x1, x13 + adcs x4, x4, x1 + and x1, x12, x13 + stp x11, x4, [sp, #192] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #208] + ldr q20, [x23, #32] + ldp x7, x17, [x22, #64] + ldr q0, [x22, #64] + ldp x6, x10, [x23, #32] + ldp x11, x15, [x22, #80] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [x23, #48] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [x22, #80] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [x23, #48] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x24, x3, x13 + adcs x25, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x24 + adcs x15, x16, x25 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x24, x11, x13 + and x1, x1, x13 + adcs x25, x4, x1 + and x1, x12, x13 + stp x24, x25, [sp, #32] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #48] + mov x1, sp + ldr q20, [x23] + ldr q0, [x1] + ldp x6, x10, [x23] + ldp x11, x15, [x1, #16] + rev64 v16.4s, v20.4s + subs x4, x19, x20 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x20, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x19 + ldr q20, [x23, #16] + sbcs x5, x15, x20 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x19, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [x1, #16] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [x23, #16] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x19, x3, x13 + adcs x20, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x19 + adcs x15, x16, x20 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x11, x11, x13 + and x1, x1, x13 + adcs x4, x4, x1 + and x1, x12, x13 + stp x11, x4, [sp, #64] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #80] + ldr q20, [x22] + ldp x7, x17, [sp, #160] + ldr q0, [sp, #160] + ldp x6, x10, [x22] + ldp x11, x15, [sp, #176] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [x22, #16] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #176] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [x22, #16] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x19, x3, x13 + adcs x20, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x19 + adcs x15, x16, x20 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x11, x11, x13 + and x1, x1, x13 + adcs x4, x4, x1 + and x1, x12, x13 + stp x11, x4, [sp, #128] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #144] + mov x1, sp + ldr q20, [sp, #32] + ldp x7, x17, [x1] + ldr q0, [x1] + ldp x11, x15, [x1, #16] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x25 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [sp, #48] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x24 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x25, x24 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [x1, #16] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [sp, #48] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x24, x7 + sbcs x9, x25, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x19, x3, x13 + adcs x20, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x24, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x25, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x19 + adcs x15, x16, x20 + eor x5, x17, x4 + adcs x9, x1, x24 + eor x1, x10, x5 + adcs x16, x2, x25 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x19, x11, x13 + and x1, x1, x13 + adcs x20, x4, x1 + and x1, x12, x13 + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #48] + ldr q20, [sp, #192] + ldp x7, x17, [sp, #160] + ldr q0, [sp, #160] + ldp x6, x10, [sp, #192] + ldp x11, x15, [sp, #176] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [sp, #208] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #176] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [sp, #208] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x24, x3, x13 + adcs x25, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x24 + adcs x15, x16, x25 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x9, x11, x13 + and x1, x1, x13 + adcs x10, x4, x1 + and x1, x12, x13 + stp x9, x10, [sp, #192] + adcs x11, x7, xzr + adc x12, x17, x1 + stp x11, x12, [sp, #208] + ldp x5, x6, [sp, #64] + ldp x4, x3, [sp, #128] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #80] + ldp x4, x3, [sp, #144] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x13, x5, x3 + and x4, x3, #0xffffffff + adcs x24, x6, x4 + adcs x25, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x26, x8, x4 + stp x13, x24, [sp, #160] + stp x25, x26, [sp, #176] + subs x5, x19, x9 + sbcs x6, x20, x10 + ldp x7, x8, [sp, #48] + sbcs x7, x7, x11 + sbcs x8, x8, x12 + csetm x3, cc + adds x19, x5, x3 + and x4, x3, #0xffffffff + adcs x20, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x19, x20, [sp, #32] + stp x7, x8, [sp, #48] + ldr q19, [sp, #160] + ldr q23, [sp, #176] + ldr q0, [sp, #160] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x13, x24 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x13, x24 + umulh x15, x13, x25 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x13, x24 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x26, x25 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x24, x26 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x25, x26 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x25, x26 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x26, x26 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x26, x26 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x25, x25 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x25, x25 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x24, x3, x16, cs + csel x25, x8, x14, cs + csel x26, x11, x12, cs + csel x27, x5, x2, cs + stp x25, x26, [sp, #112] + stp x24, x27, [sp, #96] + mov x0, sp + ldr q19, [sp, #32] + ldr q23, [sp, #48] + ldr q0, [sp, #32] + ldp x1, x10, [sp, #48] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x19, x20 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x19, x20 + umulh x15, x19, x1 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x19, x20 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x10, x1 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x20, x10 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x1, x10 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x1, x10 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x10, x10 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x10, x10 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x1, x1 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x1, x1 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x16, x3, x16, cs + csel x14, x8, x14, cs + csel x12, x11, x12, cs + csel x2, x5, x2, cs + stp x14, x12, [x0, #16] + stp x16, x2, [x0] + ldr q20, [sp, #128] + ldr q0, [sp, #96] + ldp x6, x10, [sp, #128] + rev64 v16.4s, v20.4s + subs x4, x24, x27 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x27, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x25, x24 + ldr q20, [sp, #144] + sbcs x5, x26, x27 + ngc x17, xzr + subs x8, x25, x26 + uaddlp v27.2d, v16.4s + umulh x4, x24, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #112] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [sp, #144] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x19, x3, x13 + adcs x20, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x25, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x26, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x19 + adcs x15, x16, x20 + eor x5, x17, x4 + adcs x9, x1, x25 + eor x1, x10, x5 + adcs x16, x2, x26 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x19, x11, x13 + and x1, x1, x13 + adcs x20, x4, x1 + and x1, x12, x13 + stp x19, x20, [sp, #128] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #144] + ldr q20, [sp, #64] + ldr q0, [sp, #96] + ldp x6, x10, [sp, #64] + ldp x11, x15, [sp, #112] + rev64 v16.4s, v20.4s + subs x4, x24, x27 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x27, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x24 + ldr q20, [sp, #80] + sbcs x5, x15, x27 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x24, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #112] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [sp, #80] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x24, x3, x13 + adcs x25, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x24 + adcs x15, x16, x25 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x9, x11, x13 + and x1, x1, x13 + adcs x10, x4, x1 + and x1, x12, x13 + stp x9, x10, [sp, #64] + adcs x11, x7, xzr + adc x12, x17, x1 + stp x11, x12, [sp, #80] + mov x0, sp + mov x1, sp + ldp x5, x6, [x1] + subs x5, x5, x19 + sbcs x6, x6, x20 + ldp x7, x8, [x1, #16] + ldp x4, x3, [sp, #144] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x24, x5, x3 + and x4, x3, #0xffffffff + adcs x25, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x7, x8, [x0, #16] + subs x5, x9, x19 + sbcs x6, x10, x20 + ldp x4, x3, [sp, #144] + sbcs x7, x11, x4 + sbcs x8, x12, x3 + csetm x3, cc + adds x5, x5, x3 + and x4, x3, #0xffffffff + adcs x6, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x5, x6, [sp, #96] + stp x7, x8, [sp, #112] + ldr q20, [x22, #64] + ldp x7, x17, [sp, #160] + ldr q0, [sp, #160] + ldp x6, x10, [x22, #64] + ldp x11, x15, [sp, #176] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [x22, #80] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #176] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [x22, #80] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x19, x3, x13 + adcs x20, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x19 + adcs x15, x16, x20 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x11, x11, x13 + and x1, x1, x13 + adcs x4, x4, x1 + and x1, x12, x13 + stp x11, x4, [sp, #160] + adcs x19, x7, xzr + adc x20, x17, x1 + stp x19, x20, [sp, #176] + mov x0, sp + mov x1, sp + ldp x4, x3, [sp, #64] + subs x5, x24, x4 + sbcs x6, x25, x3 + ldp x7, x8, [x1, #16] + ldp x4, x3, [sp, #80] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x9, x5, x3 + and x4, x3, #0xffffffff + adcs x10, x6, x4 + adcs x11, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x3, x8, x4 + stp x9, x10, [x0] + stp x11, x3, [x0, #16] + ldp x5, x6, [sp, #128] + subs x5, x5, x9 + sbcs x6, x6, x10 + ldp x7, x8, [sp, #144] + sbcs x7, x7, x11 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + and x4, x3, #0xffffffff + adcs x6, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x5, x6, [sp, #128] + stp x7, x8, [sp, #144] + ldr q20, [sp, #192] + ldp x7, x17, [sp, #96] + ldr q0, [sp, #96] + ldp x6, x10, [sp, #192] + ldp x11, x15, [sp, #112] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [sp, #208] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #112] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [sp, #208] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x24, x3, x13 + adcs x25, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x24 + adcs x15, x16, x25 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x11, x11, x13 + and x1, x1, x13 + adcs x4, x4, x1 + and x1, x12, x13 + stp x11, x4, [sp, #96] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #112] + ldr q20, [x23, #64] + ldp x7, x17, [sp, #160] + ldr q0, [sp, #160] + ldp x6, x10, [x23, #64] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x19, x7 + ldr q20, [x23, #80] + sbcs x5, x20, x17 + ngc x17, xzr + subs x8, x19, x20 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #176] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [x23, #80] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x19, x3, x13 + adcs x20, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x24, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x25, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x19 + adcs x15, x16, x20 + eor x5, x17, x4 + adcs x9, x1, x24 + eor x1, x10, x5 + adcs x16, x2, x25 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x19, x11, x13 + and x1, x1, x13 + adcs x20, x4, x1 + and x1, x12, x13 + stp x19, x20, [sp, #160] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #176] + ldr q20, [sp, #128] + ldp x7, x17, [sp, #32] + ldr q0, [sp, #32] + ldp x6, x10, [sp, #128] + ldp x11, x15, [sp, #48] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [sp, #144] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #48] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [sp, #144] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x24, x3, x13 + adcs x25, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x24 + adcs x15, x16, x25 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x5, x11, x13 + and x1, x1, x13 + adcs x6, x4, x1 + and x1, x12, x13 + adcs x7, x7, xzr + adc x9, x17, x1 + ldp x4, x3, [sp, #96] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x4, x3, [sp, #112] + sbcs x7, x7, x4 + sbcs x8, x9, x3 + csetm x3, cc + adds x15, x5, x3 + and x4, x3, #0xffffffff + adcs x24, x6, x4 + adcs x25, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x26, x8, x4 + stp x15, x24, [sp, #128] + stp x25, x26, [sp, #144] + ldp x0, x1, [x22, #64] + ldp x2, x3, [x22, #80] + orr x12, x0, x1 + orr x13, x2, x3 + orr x12, x12, x13 + cmp x12, xzr + cset x12, ne + ldp x4, x5, [x23, #64] + ldp x6, x7, [x23, #80] + orr x13, x4, x5 + orr x14, x6, x7 + orr x13, x13, x14 + cmp x13, xzr + cset x13, ne + cmp x13, x12 + csel x8, x0, x19, cc + csel x9, x1, x20, cc + csel x8, x4, x8, hi + csel x9, x5, x9, hi + ldp x10, x11, [sp, #176] + csel x10, x2, x10, cc + csel x11, x3, x11, cc + csel x10, x6, x10, hi + csel x11, x7, x11, hi + ldp x12, x13, [x22] + ldp x0, x1, [sp] + csel x0, x12, x0, cc + csel x1, x13, x1, cc + ldp x12, x13, [x23] + csel x0, x12, x0, hi + csel x1, x13, x1, hi + ldp x12, x13, [x22, #16] + ldp x2, x3, [sp, #16] + csel x2, x12, x2, cc + csel x3, x13, x3, cc + ldp x12, x13, [x23, #16] + csel x2, x12, x2, hi + csel x3, x13, x3, hi + ldp x12, x13, [x22, #32] + csel x4, x12, x15, cc + csel x5, x13, x24, cc + ldp x12, x13, [x23, #32] + csel x4, x12, x4, hi + csel x5, x13, x5, hi + ldp x12, x13, [x22, #48] + csel x6, x12, x25, cc + csel x7, x13, x26, cc + ldp x12, x13, [x23, #48] + csel x6, x12, x6, hi + csel x7, x13, x7, hi + stp x0, x1, [x21] + stp x2, x3, [x21, #16] + stp x4, x5, [x21, #32] + stp x6, x7, [x21, #48] + stp x8, x9, [x21, #64] + stp x10, x11, [x21, #80] + add sp, sp, #0xe0 + ldp x27, x30, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ret + +p256_montjscalarmul_p256_montjdouble: + sub sp, sp, #0x110 + stp x19, x20, [sp, #192] + stp x21, x22, [sp, #208] + stp x23, x24, [sp, #224] + stp x25, x26, [sp, #240] + stp x27, xzr, [sp, #256] + mov x19, x0 + mov x20, x1 + mov x0, sp + ldr q19, [x20, #64] + ldp x9, x13, [x20, #64] + ldr q23, [x20, #80] + ldr q0, [x20, #64] + ldp x1, x10, [x20, #80] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x9, x13 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x9, x13 + umulh x15, x9, x1 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x9, x13 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x10, x1 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x13, x10 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x1, x10 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x1, x10 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x10, x10 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x10, x10 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x1, x1 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x1, x1 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x21, x3, x16, cs + csel x22, x8, x14, cs + csel x23, x11, x12, cs + csel x24, x5, x2, cs + stp x22, x23, [x0, #16] + stp x21, x24, [x0] + ldr q19, [x20, #32] + ldp x9, x13, [x20, #32] + ldr q23, [x20, #48] + ldr q0, [x20, #32] + ldp x1, x10, [x20, #48] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x9, x13 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x9, x13 + umulh x15, x9, x1 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x9, x13 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x10, x1 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x13, x10 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x1, x10 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x1, x10 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x10, x10 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x10, x10 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x1, x1 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x1, x1 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x16, x3, x16, cs + csel x14, x8, x14, cs + csel x12, x11, x12, cs + csel x2, x5, x2, cs + stp x14, x12, [sp, #48] + stp x16, x2, [sp, #32] + ldp x5, x6, [x20] + subs x5, x5, x21 + sbcs x6, x6, x24 + ldp x7, x8, [x20, #16] + sbcs x7, x7, x22 + sbcs x8, x8, x23 + csetm x3, cc + adds x10, x5, x3 + and x4, x3, #0xffffffff + adcs x25, x6, x4 + adcs x26, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x27, x8, x4 + stp x10, x25, [sp, #96] + stp x26, x27, [sp, #112] + ldp x5, x6, [x20] + adds x5, x5, x21 + adcs x6, x6, x24 + ldp x7, x8, [x20, #16] + adcs x7, x7, x22 + adcs x8, x8, x23 + csetm x3, cs + subs x9, x5, x3 + and x1, x3, #0xffffffff + sbcs x5, x6, x1 + sbcs x7, x7, xzr + and x2, x3, #0xffffffff00000001 + sbc x8, x8, x2 + stp x9, x5, [sp, #64] + stp x7, x8, [sp, #80] + ldr q20, [sp, #96] + ldr q0, [sp, #64] + rev64 v16.4s, v20.4s + subs x4, x9, x5 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x5, x25 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x7, x9 + ldr q20, [sp, #112] + sbcs x5, x8, x5 + ngc x17, xzr + subs x8, x7, x8 + uaddlp v27.2d, v16.4s + umulh x4, x9, x10 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x25, x10 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #80] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x10, x26 + sbcs x9, x25, x27 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x27, x26 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x21, x3, x13 + adcs x22, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x23, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x24, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x21 + adcs x15, x16, x22 + eor x5, x17, x4 + adcs x9, x1, x23 + eor x1, x10, x5 + adcs x16, x2, x24 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x21, x11, x13 + and x1, x1, x13 + adcs x22, x4, x1 + and x1, x12, x13 + stp x21, x22, [sp, #96] + adcs x23, x7, xzr + adc x24, x17, x1 + stp x23, x24, [sp, #112] + ldp x4, x5, [x20, #32] + ldp x8, x9, [x20, #64] + adds x4, x4, x8 + adcs x5, x5, x9 + ldp x6, x7, [x20, #48] + ldp x10, x11, [x20, #80] + adcs x6, x6, x10 + adcs x7, x7, x11 + adc x3, xzr, xzr + adds x8, x4, #0x1 + mov x9, #0xffffffff + sbcs x9, x5, x9 + sbcs x10, x6, xzr + mov x11, #0xffffffff00000001 + sbcs x11, x7, x11 + sbcs x3, x3, xzr + csel x4, x4, x8, cc + csel x5, x5, x9, cc + csel x6, x6, x10, cc + csel x7, x7, x11, cc + stp x4, x5, [sp, #64] + stp x6, x7, [sp, #80] + ldr q20, [sp, #32] + ldp x7, x17, [x20] + ldr q0, [x20] + ldp x6, x10, [sp, #32] + ldp x11, x15, [x20, #16] + rev64 v16.4s, v20.4s + subs x4, x7, x17 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x17, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x11, x7 + ldr q20, [sp, #48] + sbcs x5, x15, x17 + ngc x17, xzr + subs x8, x11, x15 + uaddlp v27.2d, v16.4s + umulh x4, x7, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [x20, #16] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [sp, #48] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x20, x3, x13 + adcs x25, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x26, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x27, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x20 + adcs x15, x16, x25 + eor x5, x17, x4 + adcs x9, x1, x26 + eor x1, x10, x5 + adcs x16, x2, x27 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x20, x11, x13 + and x1, x1, x13 + adcs x25, x4, x1 + and x1, x12, x13 + stp x20, x25, [sp, #128] + adcs x4, x7, xzr + adc x1, x17, x1 + stp x4, x1, [sp, #144] + ldr q19, [sp, #96] + ldr q23, [sp, #112] + ldr q0, [sp, #96] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x21, x22 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x21, x22 + umulh x15, x21, x23 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x21, x22 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x24, x23 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x22, x24 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x23, x24 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x23, x24 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x24, x24 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x24, x24 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x23, x23 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x23, x23 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x21, x3, x16, cs + csel x22, x8, x14, cs + csel x23, x11, x12, cs + csel x24, x5, x2, cs + ldr q19, [sp, #64] + ldp x9, x13, [sp, #64] + ldr q23, [sp, #80] + ldr q0, [sp, #64] + ldp x1, x10, [sp, #80] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x9, x13 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x9, x13 + umulh x15, x9, x1 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x9, x13 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x10, x1 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x13, x10 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x1, x10 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x1, x10 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x10, x10 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x10, x10 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x1, x1 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x1, x1 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x13, x3, x16, cs + csel x14, x8, x14, cs + csel x15, x11, x12, cs + csel x26, x5, x2, cs + mov x1, #0x9 + mov x2, #0xffffffffffffffff + subs x9, x2, x21 + mov x2, #0xffffffff + sbcs x10, x2, x24 + ngcs x11, x22 + mov x2, #0xffffffff00000001 + sbc x12, x2, x23 + mul x3, x1, x9 + mul x4, x1, x10 + mul x5, x1, x11 + mul x6, x1, x12 + umulh x9, x1, x9 + umulh x10, x1, x10 + umulh x11, x1, x11 + umulh x7, x1, x12 + adds x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, xzr + mov x1, #0xc + mul x8, x20, x1 + umulh x9, x20, x1 + adds x3, x3, x8 + mul x8, x25, x1 + umulh x10, x25, x1 + adcs x4, x4, x8 + ldp x11, x12, [sp, #144] + mul x8, x11, x1 + umulh x11, x11, x1 + adcs x5, x5, x8 + mul x8, x12, x1 + umulh x12, x12, x1 + adcs x6, x6, x8 + adc x7, x7, xzr + adds x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, x12 + add x8, x7, #0x1 + lsl x10, x8, #32 + adds x6, x6, x10 + adc x7, x7, xzr + neg x9, x8 + sub x10, x10, #0x1 + subs x3, x3, x9 + sbcs x4, x4, x10 + sbcs x5, x5, xzr + sbcs x6, x6, x8 + sbc x8, x7, x8 + adds x20, x3, x8 + and x9, x8, #0xffffffff + adcs x21, x4, x9 + adcs x22, x5, xzr + neg x10, x9 + adc x23, x6, x10 + stp x20, x21, [sp, #160] + stp x22, x23, [sp, #176] + mov x2, sp + ldp x4, x3, [x2] + subs x5, x13, x4 + sbcs x6, x26, x3 + ldp x4, x3, [x2, #16] + sbcs x7, x14, x4 + sbcs x8, x15, x3 + csetm x3, cc + adds x5, x5, x3 + and x4, x3, #0xffffffff + adcs x6, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x5, x6, [sp, #64] + stp x7, x8, [sp, #80] + mov x0, sp + ldr q19, [sp, #32] + ldp x9, x13, [sp, #32] + ldr q23, [sp, #48] + ldr q0, [sp, #32] + ldp x1, x10, [sp, #48] + uzp2 v29.4s, v19.4s, v19.4s + xtn v4.2s, v19.2d + umulh x8, x9, x13 + rev64 v20.4s, v23.4s + umull v16.2d, v19.2s, v19.2s + umull v1.2d, v29.2s, v4.2s + mul v20.4s, v20.4s, v0.4s + subs x14, x9, x13 + umulh x15, x9, x1 + mov x16, v16.d[1] + umull2 v4.2d, v19.4s, v19.4s + mov x4, v16.d[0] + uzp1 v17.4s, v23.4s, v0.4s + uaddlp v19.2d, v20.4s + lsr x7, x8, #63 + mul x11, x9, x13 + mov x12, v1.d[0] + csetm x5, cc + cneg x6, x14, cc + mov x3, v4.d[1] + mov x14, v4.d[0] + subs x2, x10, x1 + mov x9, v1.d[1] + cneg x17, x2, cc + cinv x2, x5, cc + adds x5, x4, x12, lsl #33 + extr x4, x8, x11, #63 + lsr x8, x12, #31 + uzp1 v20.4s, v0.4s, v0.4s + shl v19.2d, v19.2d, #32 + adc x16, x16, x8 + adds x8, x14, x9, lsl #33 + lsr x14, x9, #31 + lsl x9, x5, #32 + umlal v19.2d, v20.2s, v17.2s + adc x14, x3, x14 + adds x16, x16, x11, lsl #1 + lsr x3, x5, #32 + umulh x12, x6, x17 + adcs x4, x8, x4 + adc x11, x14, x7 + subs x8, x5, x9 + sbc x5, x5, x3 + adds x16, x16, x9 + mov x14, v19.d[0] + mul x17, x6, x17 + adcs x3, x4, x3 + lsl x7, x16, #32 + umulh x13, x13, x10 + adcs x11, x11, x8 + lsr x8, x16, #32 + adc x5, x5, xzr + subs x9, x16, x7 + sbc x16, x16, x8 + adds x7, x3, x7 + mov x3, v19.d[1] + adcs x6, x11, x8 + umulh x11, x1, x10 + adcs x5, x5, x9 + eor x8, x12, x2 + adc x9, x16, xzr + adds x16, x14, x15 + adc x15, x15, xzr + adds x12, x16, x3 + eor x16, x17, x2 + mul x4, x1, x10 + adcs x15, x15, x13 + adc x17, x13, xzr + adds x15, x15, x3 + adc x3, x17, xzr + cmn x2, #0x1 + mul x17, x10, x10 + adcs x12, x12, x16 + adcs x16, x15, x8 + umulh x10, x10, x10 + adc x2, x3, x2 + adds x14, x14, x14 + adcs x12, x12, x12 + adcs x16, x16, x16 + adcs x2, x2, x2 + adc x15, xzr, xzr + adds x14, x14, x7 + mul x3, x1, x1 + adcs x12, x12, x6 + lsr x7, x14, #32 + adcs x16, x16, x5 + lsl x5, x14, #32 + umulh x13, x1, x1 + adcs x2, x2, x9 + mov x6, #0xffffffff + adc x15, x15, xzr + adds x8, x4, x4 + adcs x1, x11, x11 + mov x11, #0xffffffff00000001 + adc x4, xzr, xzr + subs x9, x14, x5 + sbc x14, x14, x7 + adds x12, x12, x5 + adcs x16, x16, x7 + lsl x5, x12, #32 + lsr x7, x12, #32 + adcs x2, x2, x9 + adcs x14, x15, x14 + adc x15, xzr, xzr + subs x9, x12, x5 + sbc x12, x12, x7 + adds x16, x16, x5 + adcs x2, x2, x7 + adcs x14, x14, x9 + adcs x12, x15, x12 + adc x15, xzr, xzr + adds x16, x16, x3 + adcs x2, x2, x13 + adcs x14, x14, x17 + adcs x12, x12, x10 + adc x15, x15, xzr + adds x2, x2, x8 + adcs x14, x14, x1 + adcs x12, x12, x4 + adcs x15, x15, xzr + adds x3, x16, #0x1 + sbcs x5, x2, x6 + sbcs x8, x14, xzr + sbcs x11, x12, x11 + sbcs xzr, x15, xzr + csel x24, x3, x16, cs + csel x25, x8, x14, cs + csel x26, x11, x12, cs + csel x27, x5, x2, cs + stp x25, x26, [x0, #16] + stp x24, x27, [x0] + ldr q20, [sp, #96] + ldr q0, [sp, #160] + ldp x6, x10, [sp, #96] + rev64 v16.4s, v20.4s + subs x4, x20, x21 + csetm x3, cc + cneg x13, x4, cc + mul v16.4s, v16.4s, v0.4s + umulh x12, x21, x10 + uzp1 v28.4s, v20.4s, v0.4s + subs x14, x22, x20 + ldr q20, [sp, #112] + sbcs x5, x23, x21 + ngc x17, xzr + subs x8, x22, x23 + uaddlp v27.2d, v16.4s + umulh x4, x20, x6 + uzp1 v21.4s, v0.4s, v0.4s + cneg x11, x8, cc + shl v17.2d, v27.2d, #32 + csetm x15, cc + subs x9, x10, x6 + eor x7, x14, x17 + umlal v17.2d, v21.2s, v28.2s + cneg x8, x9, cc + cinv x9, x3, cc + cmn x17, #0x1 + ldr q28, [sp, #176] + adcs x14, x7, xzr + mul x7, x13, x8 + eor x1, x5, x17 + adcs x5, x1, xzr + xtn v1.2s, v20.2d + mov x1, v17.d[0] + mov x3, v17.d[1] + uzp2 v16.4s, v20.4s, v20.4s + umulh x16, x13, x8 + eor x13, x7, x9 + adds x8, x1, x3 + adcs x7, x4, x12 + xtn v0.2s, v28.2d + adcs x12, x12, xzr + adds x8, x4, x8 + adcs x3, x3, x7 + ldp x7, x2, [sp, #112] + adcs x12, x12, xzr + cmn x9, #0x1 + adcs x8, x8, x13 + eor x13, x16, x9 + adcs x16, x3, x13 + lsl x3, x1, #32 + adc x13, x12, x9 + subs x12, x6, x7 + sbcs x9, x10, x2 + lsr x10, x1, #32 + ngc x4, xzr + subs x6, x2, x7 + cinv x2, x15, cc + cneg x6, x6, cc + subs x7, x1, x3 + eor x9, x9, x4 + sbc x1, x1, x10 + adds x15, x8, x3 + adcs x3, x16, x10 + mul x16, x11, x6 + adcs x8, x13, x7 + eor x13, x12, x4 + adc x10, x1, xzr + cmn x4, #0x1 + umulh x6, x11, x6 + adcs x11, x13, xzr + adcs x1, x9, xzr + lsl x13, x15, #32 + subs x12, x15, x13 + lsr x7, x15, #32 + sbc x15, x15, x7 + adds x20, x3, x13 + adcs x21, x8, x7 + umulh x8, x14, x11 + umull v21.2d, v0.2s, v1.2s + adcs x22, x10, x12 + umull v3.2d, v0.2s, v16.2s + adc x23, x15, xzr + rev64 v24.4s, v20.4s + movi v2.2d, #0xffffffff + mul x10, x14, x11 + mul v4.4s, v24.4s, v28.4s + subs x13, x14, x5 + uzp2 v19.4s, v28.4s, v28.4s + csetm x15, cc + usra v3.2d, v21.2d, #32 + mul x7, x5, x1 + umull v21.2d, v19.2s, v16.2s + cneg x13, x13, cc + uaddlp v5.2d, v4.4s + subs x11, x1, x11 + and v16.16b, v3.16b, v2.16b + umulh x5, x5, x1 + shl v24.2d, v5.2d, #32 + cneg x11, x11, cc + umlal v16.2d, v19.2s, v1.2s + cinv x12, x15, cc + umlal v24.2d, v0.2s, v1.2s + adds x15, x10, x7 + mul x14, x13, x11 + eor x1, x6, x2 + adcs x6, x8, x5 + usra v21.2d, v3.2d, #32 + adcs x9, x5, xzr + umulh x11, x13, x11 + adds x15, x8, x15 + adcs x7, x7, x6 + eor x8, x14, x12 + usra v21.2d, v16.2d, #32 + adcs x13, x9, xzr + cmn x12, #0x1 + mov x9, v24.d[1] + adcs x14, x15, x8 + eor x6, x11, x12 + adcs x6, x7, x6 + mov x5, v24.d[0] + mov x11, v21.d[1] + mov x7, v21.d[0] + adc x3, x13, x12 + adds x12, x5, x9 + adcs x13, x7, x11 + adcs x11, x11, xzr + adds x12, x7, x12 + eor x16, x16, x2 + adcs x7, x9, x13 + adcs x11, x11, xzr + cmn x2, #0x1 + adcs x16, x12, x16 + adcs x1, x7, x1 + adc x2, x11, x2 + adds x7, x5, x20 + adcs x15, x16, x21 + eor x5, x17, x4 + adcs x9, x1, x22 + eor x1, x10, x5 + adcs x16, x2, x23 + adc x2, xzr, xzr + cmn x5, #0x1 + eor x13, x14, x5 + adcs x14, x1, x7 + eor x1, x6, x5 + adcs x6, x13, x15 + adcs x10, x1, x9 + eor x4, x3, x5 + mov x1, #0xffffffff + adcs x8, x4, x16 + lsr x13, x14, #32 + adcs x17, x2, x5 + adcs x11, x5, xzr + adc x4, x5, xzr + adds x12, x10, x7 + adcs x7, x8, x15 + adcs x5, x17, x9 + adcs x9, x11, x16 + lsl x11, x14, #32 + adc x10, x4, x2 + subs x17, x14, x11 + sbc x4, x14, x13 + adds x11, x6, x11 + adcs x12, x12, x13 + lsl x15, x11, #32 + adcs x17, x7, x17 + lsr x7, x11, #32 + adc x13, x4, xzr + subs x4, x11, x15 + sbc x11, x11, x7 + adds x8, x12, x15 + adcs x15, x17, x7 + adcs x4, x13, x4 + adc x11, x11, xzr + adds x7, x5, x4 + adcs x17, x9, x11 + adc x13, x10, xzr + add x12, x13, #0x1 + neg x11, x12 + lsl x4, x12, #32 + adds x17, x17, x4 + sub x4, x4, #0x1 + adc x13, x13, xzr + subs x11, x8, x11 + sbcs x4, x15, x4 + sbcs x7, x7, xzr + sbcs x17, x17, x12 + sbcs x13, x13, x12 + mov x12, #0xffffffff00000001 + adds x14, x11, x13 + and x1, x1, x13 + adcs x15, x4, x1 + and x1, x12, x13 + stp x14, x15, [sp, #96] + adcs x13, x7, xzr + adc x20, x17, x1 + stp x13, x20, [sp, #112] + ldp x5, x6, [sp, #64] + ldp x4, x3, [sp, #32] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #80] + ldp x4, x3, [sp, #48] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + and x4, x3, #0xffffffff + adcs x6, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x5, x6, [x19, #64] + stp x7, x8, [x19, #80] + ldp x1, x2, [sp, #128] + lsl x0, x1, #2 + ldp x6, x7, [sp, #160] + subs x0, x0, x6 + extr x1, x2, x1, #62 + sbcs x1, x1, x7 + ldp x3, x4, [sp, #144] + extr x2, x3, x2, #62 + ldp x6, x7, [sp, #176] + sbcs x2, x2, x6 + extr x3, x4, x3, #62 + sbcs x3, x3, x7 + lsr x4, x4, #62 + sbc x4, x4, xzr + add x5, x4, #0x1 + lsl x8, x5, #32 + negs x6, x8 + ngcs x7, xzr + sbc x8, x8, x5 + adds x0, x0, x5 + adcs x1, x1, x6 + adcs x2, x2, x7 + adcs x3, x3, x8 + csetm x5, cc + adds x0, x0, x5 + and x6, x5, #0xffffffff + adcs x1, x1, x6 + adcs x2, x2, xzr + neg x7, x6 + adc x3, x3, x7 + stp x0, x1, [x19] + stp x2, x3, [x19, #16] + mov x2, #0xffffffffffffffff + subs x9, x2, x24 + mov x2, #0xffffffff + sbcs x10, x2, x27 + ngcs x11, x25 + mov x2, #0xffffffff00000001 + sbc x12, x2, x26 + lsl x3, x9, #3 + extr x4, x10, x9, #61 + extr x5, x11, x10, #61 + extr x6, x12, x11, #61 + lsr x7, x12, #61 + mov x1, #0x3 + mul x8, x14, x1 + umulh x9, x14, x1 + adds x3, x3, x8 + mul x8, x15, x1 + umulh x10, x15, x1 + adcs x4, x4, x8 + mul x8, x13, x1 + umulh x11, x13, x1 + adcs x5, x5, x8 + mul x8, x20, x1 + umulh x12, x20, x1 + adcs x6, x6, x8 + adc x7, x7, xzr + adds x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, x12 + add x8, x7, #0x1 + lsl x10, x8, #32 + adds x6, x6, x10 + adc x7, x7, xzr + neg x9, x8 + sub x10, x10, #0x1 + subs x3, x3, x9 + sbcs x4, x4, x10 + sbcs x5, x5, xzr + sbcs x6, x6, x8 + sbc x8, x7, x8 + adds x3, x3, x8 + and x9, x8, #0xffffffff + adcs x4, x4, x9 + adcs x5, x5, xzr + neg x10, x9 + adc x6, x6, x10 + stp x3, x4, [x19, #32] + stp x5, x6, [x19, #48] + ldp x27, xzr, [sp, #256] + ldp x25, x26, [sp, #240] + ldp x23, x24, [sp, #224] + ldp x21, x22, [sp, #208] + ldp x19, x20, [sp, #192] + add sp, sp, #0x110 + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/arm/p256/p256_montjscalarmul_alt.S b/third_party/s2n-bignum/arm/p256/p256_montjscalarmul_alt.S new file mode 100644 index 0000000000..8ac5806a72 --- /dev/null +++ b/third_party/s2n-bignum/arm/p256/p256_montjscalarmul_alt.S @@ -0,0 +1,3357 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery-Jacobian form scalar multiplication for P-256 +// Input scalar[4], point[12]; output res[12] +// +// extern void p256_montjscalarmul_alt +// (uint64_t res[static 12], +// uint64_t scalar[static 4], +// uint64_t point[static 12]); +// +// This function is a variant of its affine point version p256_scalarmul_alt. +// Here, input and output points are assumed to be in Jacobian form with +// their coordinates in the Montgomery domain. Thus, if priming indicates +// Montgomery form, x' = (2^256 * x) mod p_256 etc., each point argument +// is a triple (x',y',z') representing the affine point (x/z^2,y/z^3) when +// z' is nonzero or the point at infinity (group identity) if z' = 0. +// +// Given scalar = n and point = P, assumed to be on the NIST elliptic +// curve P-256, returns a representation of n * P. If the result is the +// point at infinity (either because the input point was or because the +// scalar was a multiple of p_256) then the output is guaranteed to +// represent the point at infinity, i.e. to have its z coordinate zero. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p256_montjscalarmul_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p256_montjscalarmul_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Safe copies of inputs (res lasts the whole code, point not so long) +// and additional values in variables, with some aliasing + +#define res x19 +#define sgn x20 +#define j x20 +#define point x21 + +// Intermediate variables on the stack. + +#define scalarb sp, #(0*NUMSIZE) +#define acc sp, #(1*NUMSIZE) +#define tabent sp, #(4*NUMSIZE) + +#define tab sp, #(7*NUMSIZE) + +#define NSPACE #(31*NUMSIZE) + +// Avoid using .rep for the sake of the BoringSSL/AWS-LC delocator, +// which doesn't accept repetitions, assembler macros etc. + +#define selectblock(I) \ + cmp x14, #(1*I); \ + ldp x12, x13, [x15]; \ + csel x0, x12, x0, eq; \ + csel x1, x13, x1, eq; \ + ldp x12, x13, [x15, #16]; \ + csel x2, x12, x2, eq; \ + csel x3, x13, x3, eq; \ + ldp x12, x13, [x15, #32]; \ + csel x4, x12, x4, eq; \ + csel x5, x13, x5, eq; \ + ldp x12, x13, [x15, #48]; \ + csel x6, x12, x6, eq; \ + csel x7, x13, x7, eq; \ + ldp x12, x13, [x15, #64]; \ + csel x8, x12, x8, eq; \ + csel x9, x13, x9, eq; \ + ldp x12, x13, [x15, #80]; \ + csel x10, x12, x10, eq; \ + csel x11, x13, x11, eq; \ + add x15, x15, #96 + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +S2N_BN_SYMBOL(p256_montjscalarmul_alt): + + stp x19, x20, [sp, #-16]! + stp x21, x30, [sp, #-16]! + sub sp, sp, NSPACE + +// Preserve the "res" and "point" input arguments. We load and process the +// scalar immediately so we don't bother preserving that input argument. +// Also, "point" is only needed early on and so its register gets re-used. + + mov res, x0 + mov point, x2 + +// Load the digits of group order n_256 = [x12;x13;x14;x15] + + movbig(x12, #0xf3b9, #0xcac2, #0xfc63, #0x2551) + movbig(x13, #0xbce6, #0xfaad, #0xa717, #0x9e84) + mov x14, #0xffffffffffffffff + mov x15, #0xffffffff00000000 + +// First, reduce the input scalar mod n_256, i.e. conditionally subtract n_256 + + ldp x2, x3, [x1] + ldp x4, x5, [x1, #16] + + subs x6, x2, x12 + sbcs x7, x3, x13 + sbcs x8, x4, x14 + sbcs x9, x5, x15 + + csel x2, x2, x6, cc + csel x3, x3, x7, cc + csel x4, x4, x8, cc + csel x5, x5, x9, cc + +// Now if the top bit of the reduced scalar is set, negate it mod n_256, +// i.e. do n |-> n_256 - n. Remember the sign as "sgn" so we can +// correspondingly negate the point below. + + subs x6, x12, x2 + sbcs x7, x13, x3 + sbcs x8, x14, x4 + sbc x9, x15, x5 + + tst x5, #0x8000000000000000 + csel x2, x2, x6, eq + csel x3, x3, x7, eq + csel x4, x4, x8, eq + csel x5, x5, x9, eq + cset sgn, ne + +// In either case then add the recoding constant 0x08888...888 to allow +// signed digits. + + mov x6, 0x8888888888888888 + adds x2, x2, x6 + adcs x3, x3, x6 + bic x7, x6, #0xF000000000000000 + adcs x4, x4, x6 + adc x5, x5, x7 + + stp x2, x3, [scalarb] + stp x4, x5, [scalarb+16] + +// Set the tab[0] table entry to the input point = 1 * P, except +// that we negate it if the top bit of the scalar was set. This +// negation takes care over the y = 0 case to maintain all the +// coordinates < p_256 throughout, even though triples (x,y,z) +// with y = 0 can only represent a point on the curve when z = 0 +// and it represents the point at infinity regardless of x and y. + + ldp x0, x1, [point] + stp x0, x1, [tab] + ldp x2, x3, [point, #16] + stp x2, x3, [tab+16] + + ldp x4, x5, [point, #32] + ldp x6, x7, [point, #48] + + mov x0, 0xffffffffffffffff + subs x0, x0, x4 + mov x1, 0x00000000ffffffff + sbcs x1, x1, x5 + mov x3, 0xffffffff00000001 + sbcs x2, xzr, x6 + sbc x3, x3, x7 + + orr x8, x4, x5 + orr x9, x6, x7 + orr x8, x8, x9 + cmp x8, xzr + ccmp sgn, xzr, #4, ne + csel x4, x0, x4, ne + csel x5, x1, x5, ne + csel x6, x2, x6, ne + csel x7, x3, x7, ne + + stp x4, x5, [tab+32] + stp x6, x7, [tab+48] + + ldp x0, x1, [point, #64] + stp x0, x1, [tab+64] + ldp x2, x3, [point, #80] + stp x2, x3, [tab+80] + +// Compute and record tab[1] = 2 * p, ..., tab[7] = 8 * P + + add x0, tab+96*1 + add x1, tab + bl p256_montjscalarmul_alt_p256_montjdouble + + add x0, tab+96*2 + add x1, tab+96*1 + add x2, tab + bl p256_montjscalarmul_alt_p256_montjadd + + add x0, tab+96*3 + add x1, tab+96*1 + bl p256_montjscalarmul_alt_p256_montjdouble + + add x0, tab+96*4 + add x1, tab+96*3 + add x2, tab + bl p256_montjscalarmul_alt_p256_montjadd + + add x0, tab+96*5 + add x1, tab+96*2 + bl p256_montjscalarmul_alt_p256_montjdouble + + add x0, tab+96*6 + add x1, tab+96*5 + add x2, tab + bl p256_montjscalarmul_alt_p256_montjadd + + add x0, tab+96*7 + add x1, tab+96*3 + bl p256_montjscalarmul_alt_p256_montjdouble + +// Initialize the accumulator as a table entry for top 4 bits (unrecoded) + + ldr x14, [scalarb+24] + lsr x14, x14, #60 + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + add x15, tab + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + + stp x0, x1, [acc] + stp x2, x3, [acc+16] + stp x4, x5, [acc+32] + stp x6, x7, [acc+48] + stp x8, x9, [acc+64] + stp x10, x11, [acc+80] + + mov j, #252 + +// Main loop over size-4 bitfields: double 4 times then add signed digit + +p256_montjscalarmul_alt_mainloop: + sub j, j, #4 + + add x0, acc + add x1, acc + bl p256_montjscalarmul_alt_p256_montjdouble + + add x0, acc + add x1, acc + bl p256_montjscalarmul_alt_p256_montjdouble + + add x0, acc + add x1, acc + bl p256_montjscalarmul_alt_p256_montjdouble + + add x0, acc + add x1, acc + bl p256_montjscalarmul_alt_p256_montjdouble + + lsr x2, j, #6 + ldr x14, [sp, x2, lsl #3] // Exploits scalarb = sp exactly + lsr x14, x14, j + and x14, x14, #15 + + subs x14, x14, #8 + cset x16, lo // x16 = sign of digit (1 = negative) + cneg x14, x14, lo // x14 = absolute value of digit + +// Conditionally select the table entry tab[i-1] = i * P in constant time + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + add x15, tab + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + +// Store it to "tabent" with the y coordinate optionally negated +// Again, do it carefully to give coordinates < p_256 even in +// the degenerate case y = 0 (when z = 0 for points on the curve). + + stp x0, x1, [tabent] + stp x2, x3, [tabent+16] + + mov x0, 0xffffffffffffffff + subs x0, x0, x4 + mov x1, 0x00000000ffffffff + sbcs x1, x1, x5 + mov x3, 0xffffffff00000001 + sbcs x2, xzr, x6 + sbc x3, x3, x7 + + orr x12, x4, x5 + orr x13, x6, x7 + orr x12, x12, x13 + cmp x12, xzr + ccmp x16, xzr, #4, ne + csel x4, x0, x4, ne + csel x5, x1, x5, ne + csel x6, x2, x6, ne + csel x7, x3, x7, ne + + stp x4, x5, [tabent+32] + stp x6, x7, [tabent+48] + stp x8, x9, [tabent+64] + stp x10, x11, [tabent+80] + + add x0, acc + add x1, acc + add x2, tabent + bl p256_montjscalarmul_alt_p256_montjadd + + cbnz j, p256_montjscalarmul_alt_mainloop + +// That's the end of the main loop, and we just need to copy the +// result in "acc" to the output. + + ldp x0, x1, [acc] + stp x0, x1, [res] + ldp x0, x1, [acc+16] + stp x0, x1, [res, #16] + ldp x0, x1, [acc+32] + stp x0, x1, [res, #32] + ldp x0, x1, [acc+48] + stp x0, x1, [res, #48] + ldp x0, x1, [acc+64] + stp x0, x1, [res, #64] + ldp x0, x1, [acc+80] + stp x0, x1, [res, #80] + +// Restore stack and registers and return + + add sp, sp, NSPACE + ldp x21, x30, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +// Local copies of subroutines, complete clones at the moment + +p256_montjscalarmul_alt_p256_montjadd: + sub sp, sp, #0xe0 + mov x15, x0 + mov x16, x1 + mov x17, x2 + ldp x2, x3, [x16, #64] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [x16, #80] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mov x3, #0xffffffff00000001 + mul x2, x8, x3 + umulh x8, x8, x3 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mov x3, #0xffffffff00000001 + mul x2, x9, x3 + umulh x9, x9, x3 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mov x3, #0xffffffff00000001 + mul x2, x10, x3 + umulh x10, x10, x3 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mov x3, #0xffffffff00000001 + mul x2, x11, x3 + umulh x11, x11, x3 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + mov x2, #0xffffffffffffffff + csel x2, xzr, x2, cc + mov x3, #0xffffffff + csel x3, xzr, x3, cc + mov x5, #0xffffffff00000001 + csel x5, xzr, x5, cc + subs x8, x8, x2 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbc x11, x11, x5 + stp x8, x9, [sp] + stp x10, x11, [sp, #16] + ldp x2, x3, [x17, #64] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [x17, #80] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mov x3, #0xffffffff00000001 + mul x2, x8, x3 + umulh x8, x8, x3 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mov x3, #0xffffffff00000001 + mul x2, x9, x3 + umulh x9, x9, x3 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mov x3, #0xffffffff00000001 + mul x2, x10, x3 + umulh x10, x10, x3 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mov x3, #0xffffffff00000001 + mul x2, x11, x3 + umulh x11, x11, x3 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + mov x2, #0xffffffffffffffff + csel x2, xzr, x2, cc + mov x3, #0xffffffff + csel x3, xzr, x3, cc + mov x5, #0xffffffff00000001 + csel x5, xzr, x5, cc + subs x8, x8, x2 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbc x11, x11, x5 + stp x8, x9, [sp, #160] + stp x10, x11, [sp, #176] + ldp x3, x4, [x17, #64] + ldp x7, x8, [x16, #32] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [x16, #48] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [x17, #80] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #192] + stp x14, x0, [sp, #208] + ldp x3, x4, [x16, #64] + ldp x7, x8, [x17, #32] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [x17, #48] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [x16, #80] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #32] + stp x14, x0, [sp, #48] + ldp x3, x4, [sp] + ldp x7, x8, [x17] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [x17, #16] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #16] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #64] + stp x14, x0, [sp, #80] + ldp x3, x4, [sp, #160] + ldp x7, x8, [x16] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [x16, #16] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #176] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #128] + stp x14, x0, [sp, #144] + ldp x3, x4, [sp] + ldp x7, x8, [sp, #32] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #48] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #16] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #32] + stp x14, x0, [sp, #48] + ldp x3, x4, [sp, #160] + ldp x7, x8, [sp, #192] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #208] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #176] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #192] + stp x14, x0, [sp, #208] + ldp x5, x6, [sp, #64] + ldp x4, x3, [sp, #128] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #80] + ldp x4, x3, [sp, #144] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + mov x4, #0xffffffff + and x4, x4, x3 + adcs x6, x6, x4 + adcs x7, x7, xzr + mov x4, #0xffffffff00000001 + and x4, x4, x3 + adc x8, x8, x4 + stp x5, x6, [sp, #160] + stp x7, x8, [sp, #176] + ldp x5, x6, [sp, #32] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #48] + ldp x4, x3, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + mov x4, #0xffffffff + and x4, x4, x3 + adcs x6, x6, x4 + adcs x7, x7, xzr + mov x4, #0xffffffff00000001 + and x4, x4, x3 + adc x8, x8, x4 + stp x5, x6, [sp, #32] + stp x7, x8, [sp, #48] + ldp x2, x3, [sp, #160] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #176] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mov x3, #0xffffffff00000001 + mul x2, x8, x3 + umulh x8, x8, x3 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mov x3, #0xffffffff00000001 + mul x2, x9, x3 + umulh x9, x9, x3 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mov x3, #0xffffffff00000001 + mul x2, x10, x3 + umulh x10, x10, x3 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mov x3, #0xffffffff00000001 + mul x2, x11, x3 + umulh x11, x11, x3 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + mov x2, #0xffffffffffffffff + csel x2, xzr, x2, cc + mov x3, #0xffffffff + csel x3, xzr, x3, cc + mov x5, #0xffffffff00000001 + csel x5, xzr, x5, cc + subs x8, x8, x2 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbc x11, x11, x5 + stp x8, x9, [sp, #96] + stp x10, x11, [sp, #112] + ldp x2, x3, [sp, #32] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #48] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mov x3, #0xffffffff00000001 + mul x2, x8, x3 + umulh x8, x8, x3 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mov x3, #0xffffffff00000001 + mul x2, x9, x3 + umulh x9, x9, x3 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mov x3, #0xffffffff00000001 + mul x2, x10, x3 + umulh x10, x10, x3 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mov x3, #0xffffffff00000001 + mul x2, x11, x3 + umulh x11, x11, x3 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + cset x2, cs + mov x3, #0xffffffff + mov x5, #0xffffffff00000001 + adds x12, x8, #0x1 + sbcs x13, x9, x3 + sbcs x14, x10, xzr + sbcs x7, x11, x5 + sbcs xzr, x2, xzr + csel x8, x8, x12, cc + csel x9, x9, x13, cc + csel x10, x10, x14, cc + csel x11, x11, x7, cc + stp x8, x9, [sp] + stp x10, x11, [sp, #16] + ldp x3, x4, [sp, #96] + ldp x7, x8, [sp, #128] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #144] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #112] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #128] + stp x14, x0, [sp, #144] + ldp x3, x4, [sp, #96] + ldp x7, x8, [sp, #64] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #80] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #112] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #64] + stp x14, x0, [sp, #80] + ldp x5, x6, [sp] + ldp x4, x3, [sp, #128] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #16] + ldp x4, x3, [sp, #144] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + mov x4, #0xffffffff + and x4, x4, x3 + adcs x6, x6, x4 + adcs x7, x7, xzr + mov x4, #0xffffffff00000001 + and x4, x4, x3 + adc x8, x8, x4 + stp x5, x6, [sp] + stp x7, x8, [sp, #16] + ldp x5, x6, [sp, #64] + ldp x4, x3, [sp, #128] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #80] + ldp x4, x3, [sp, #144] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + mov x4, #0xffffffff + and x4, x4, x3 + adcs x6, x6, x4 + adcs x7, x7, xzr + mov x4, #0xffffffff00000001 + and x4, x4, x3 + adc x8, x8, x4 + stp x5, x6, [sp, #96] + stp x7, x8, [sp, #112] + ldp x3, x4, [sp, #160] + ldp x7, x8, [x16, #64] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [x16, #80] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #176] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #160] + stp x14, x0, [sp, #176] + ldp x5, x6, [sp] + ldp x4, x3, [sp, #64] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #16] + ldp x4, x3, [sp, #80] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + mov x4, #0xffffffff + and x4, x4, x3 + adcs x6, x6, x4 + adcs x7, x7, xzr + mov x4, #0xffffffff00000001 + and x4, x4, x3 + adc x8, x8, x4 + stp x5, x6, [sp] + stp x7, x8, [sp, #16] + ldp x5, x6, [sp, #128] + ldp x4, x3, [sp] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #144] + ldp x4, x3, [sp, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + mov x4, #0xffffffff + and x4, x4, x3 + adcs x6, x6, x4 + adcs x7, x7, xzr + mov x4, #0xffffffff00000001 + and x4, x4, x3 + adc x8, x8, x4 + stp x5, x6, [sp, #128] + stp x7, x8, [sp, #144] + ldp x3, x4, [sp, #96] + ldp x7, x8, [sp, #192] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #208] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #112] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #96] + stp x14, x0, [sp, #112] + ldp x3, x4, [sp, #160] + ldp x7, x8, [x17, #64] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [x17, #80] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #176] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #160] + stp x14, x0, [sp, #176] + ldp x3, x4, [sp, #32] + ldp x7, x8, [sp, #128] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #144] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #48] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #128] + stp x14, x0, [sp, #144] + ldp x5, x6, [sp, #128] + ldp x4, x3, [sp, #96] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #144] + ldp x4, x3, [sp, #112] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + mov x4, #0xffffffff + and x4, x4, x3 + adcs x6, x6, x4 + adcs x7, x7, xzr + mov x4, #0xffffffff00000001 + and x4, x4, x3 + adc x8, x8, x4 + stp x5, x6, [sp, #128] + stp x7, x8, [sp, #144] + ldp x0, x1, [x16, #64] + ldp x2, x3, [x16, #80] + orr x12, x0, x1 + orr x13, x2, x3 + orr x12, x12, x13 + cmp x12, xzr + cset x12, ne + ldp x4, x5, [x17, #64] + ldp x6, x7, [x17, #80] + orr x13, x4, x5 + orr x14, x6, x7 + orr x13, x13, x14 + cmp x13, xzr + cset x13, ne + cmp x13, x12 + ldp x8, x9, [sp, #160] + csel x8, x0, x8, cc + csel x9, x1, x9, cc + csel x8, x4, x8, hi + csel x9, x5, x9, hi + ldp x10, x11, [sp, #176] + csel x10, x2, x10, cc + csel x11, x3, x11, cc + csel x10, x6, x10, hi + csel x11, x7, x11, hi + ldp x12, x13, [x16] + ldp x0, x1, [sp] + csel x0, x12, x0, cc + csel x1, x13, x1, cc + ldp x12, x13, [x17] + csel x0, x12, x0, hi + csel x1, x13, x1, hi + ldp x12, x13, [x16, #16] + ldp x2, x3, [sp, #16] + csel x2, x12, x2, cc + csel x3, x13, x3, cc + ldp x12, x13, [x17, #16] + csel x2, x12, x2, hi + csel x3, x13, x3, hi + ldp x12, x13, [x16, #32] + ldp x4, x5, [sp, #128] + csel x4, x12, x4, cc + csel x5, x13, x5, cc + ldp x12, x13, [x17, #32] + csel x4, x12, x4, hi + csel x5, x13, x5, hi + ldp x12, x13, [x16, #48] + ldp x6, x7, [sp, #144] + csel x6, x12, x6, cc + csel x7, x13, x7, cc + ldp x12, x13, [x17, #48] + csel x6, x12, x6, hi + csel x7, x13, x7, hi + stp x0, x1, [x15] + stp x2, x3, [x15, #16] + stp x4, x5, [x15, #32] + stp x6, x7, [x15, #48] + stp x8, x9, [x15, #64] + stp x10, x11, [x15, #80] + add sp, sp, #0xe0 + ret + +p256_montjscalarmul_alt_p256_montjdouble: + sub sp, sp, #0xc0 + mov x15, x0 + mov x16, x1 + ldp x2, x3, [x16, #64] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [x16, #80] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + mov x5, #0xffffffff00000001 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mul x2, x8, x5 + umulh x8, x8, x5 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mul x2, x9, x5 + umulh x9, x9, x5 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mul x2, x10, x5 + umulh x10, x10, x5 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mul x2, x11, x5 + umulh x11, x11, x5 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + cset x2, cs + mov x3, #0xffffffff + adds x12, x8, #0x1 + sbcs x13, x9, x3 + sbcs x14, x10, xzr + sbcs x7, x11, x5 + sbcs xzr, x2, xzr + csel x8, x8, x12, cc + csel x9, x9, x13, cc + csel x10, x10, x14, cc + csel x11, x11, x7, cc + stp x8, x9, [sp] + stp x10, x11, [sp, #16] + ldp x2, x3, [x16, #32] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [x16, #48] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + mov x5, #0xffffffff00000001 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mul x2, x8, x5 + umulh x8, x8, x5 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mul x2, x9, x5 + umulh x9, x9, x5 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mul x2, x10, x5 + umulh x10, x10, x5 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mul x2, x11, x5 + umulh x11, x11, x5 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + cset x2, cs + mov x3, #0xffffffff + adds x12, x8, #0x1 + sbcs x13, x9, x3 + sbcs x14, x10, xzr + sbcs x7, x11, x5 + sbcs xzr, x2, xzr + csel x8, x8, x12, cc + csel x9, x9, x13, cc + csel x10, x10, x14, cc + csel x11, x11, x7, cc + stp x8, x9, [sp, #32] + stp x10, x11, [sp, #48] + ldp x5, x6, [x16] + ldp x4, x3, [sp] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x16, #16] + ldp x4, x3, [sp, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + and x4, x3, #0xffffffff + adcs x6, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x5, x6, [sp, #96] + stp x7, x8, [sp, #112] + ldp x5, x6, [x16] + ldp x4, x3, [sp] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x16, #16] + ldp x4, x3, [sp, #16] + adcs x7, x7, x4 + adcs x8, x8, x3 + csetm x3, cs + subs x5, x5, x3 + and x1, x3, #0xffffffff + sbcs x6, x6, x1 + sbcs x7, x7, xzr + and x2, x3, #0xffffffff00000001 + sbc x8, x8, x2 + stp x5, x6, [sp, #64] + stp x7, x8, [sp, #80] + ldp x3, x4, [sp, #64] + ldp x7, x8, [sp, #96] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #112] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #80] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #96] + stp x14, x0, [sp, #112] + ldp x5, x6, [x16, #32] + ldp x4, x3, [x16, #64] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x16, #48] + ldp x4, x3, [x16, #80] + adcs x7, x7, x4 + adcs x8, x8, x3 + adc x3, xzr, xzr + cmn x5, #0x1 + mov x4, #0xffffffff + sbcs xzr, x6, x4 + sbcs xzr, x7, xzr + mov x4, #0xffffffff00000001 + sbcs xzr, x8, x4 + adcs x3, x3, xzr + csetm x3, ne + subs x5, x5, x3 + and x4, x3, #0xffffffff + sbcs x6, x6, x4 + sbcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + sbc x8, x8, x4 + stp x5, x6, [sp, #64] + stp x7, x8, [sp, #80] + ldp x3, x4, [x16] + ldp x7, x8, [sp, #32] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #48] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [x16, #16] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #128] + stp x14, x0, [sp, #144] + ldp x2, x3, [sp, #96] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #112] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + mov x5, #0xffffffff00000001 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mul x2, x8, x5 + umulh x8, x8, x5 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mul x2, x9, x5 + umulh x9, x9, x5 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mul x2, x10, x5 + umulh x10, x10, x5 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mul x2, x11, x5 + umulh x11, x11, x5 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + cset x2, cs + mov x3, #0xffffffff + adds x12, x8, #0x1 + sbcs x13, x9, x3 + sbcs x14, x10, xzr + sbcs x7, x11, x5 + sbcs xzr, x2, xzr + csel x8, x8, x12, cc + csel x9, x9, x13, cc + csel x10, x10, x14, cc + csel x11, x11, x7, cc + stp x8, x9, [sp, #160] + stp x10, x11, [sp, #176] + ldp x2, x3, [sp, #64] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #80] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + mov x5, #0xffffffff00000001 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mul x2, x8, x5 + umulh x8, x8, x5 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mul x2, x9, x5 + umulh x9, x9, x5 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mul x2, x10, x5 + umulh x10, x10, x5 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mul x2, x11, x5 + umulh x11, x11, x5 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + cset x2, cs + mov x3, #0xffffffff + adds x12, x8, #0x1 + sbcs x13, x9, x3 + sbcs x14, x10, xzr + sbcs x7, x11, x5 + sbcs xzr, x2, xzr + csel x8, x8, x12, cc + csel x9, x9, x13, cc + csel x10, x10, x14, cc + csel x11, x11, x7, cc + stp x8, x9, [sp, #64] + stp x10, x11, [sp, #80] + mov x1, #0x9 + mov x2, #0xffffffffffffffff + ldp x9, x10, [sp, #160] + subs x9, x2, x9 + mov x2, #0xffffffff + sbcs x10, x2, x10 + ldp x11, x12, [sp, #176] + ngcs x11, x11 + mov x2, #0xffffffff00000001 + sbc x12, x2, x12 + mul x3, x1, x9 + mul x4, x1, x10 + mul x5, x1, x11 + mul x6, x1, x12 + umulh x9, x1, x9 + umulh x10, x1, x10 + umulh x11, x1, x11 + umulh x7, x1, x12 + adds x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, xzr + mov x1, #0xc + ldp x9, x10, [sp, #128] + mul x8, x9, x1 + umulh x9, x9, x1 + adds x3, x3, x8 + mul x8, x10, x1 + umulh x10, x10, x1 + adcs x4, x4, x8 + ldp x11, x12, [sp, #144] + mul x8, x11, x1 + umulh x11, x11, x1 + adcs x5, x5, x8 + mul x8, x12, x1 + umulh x12, x12, x1 + adcs x6, x6, x8 + adc x7, x7, xzr + adds x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, x12 + add x8, x7, #0x1 + lsl x10, x8, #32 + adds x6, x6, x10 + adc x7, x7, xzr + neg x9, x8 + sub x10, x10, #0x1 + subs x3, x3, x9 + sbcs x4, x4, x10 + sbcs x5, x5, xzr + sbcs x6, x6, x8 + sbc x8, x7, x8 + adds x3, x3, x8 + and x9, x8, #0xffffffff + adcs x4, x4, x9 + adcs x5, x5, xzr + neg x10, x9 + adc x6, x6, x10 + stp x3, x4, [sp, #160] + stp x5, x6, [sp, #176] + ldp x5, x6, [sp, #64] + ldp x4, x3, [sp] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #80] + ldp x4, x3, [sp, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + and x4, x3, #0xffffffff + adcs x6, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x5, x6, [sp, #64] + stp x7, x8, [sp, #80] + ldp x2, x3, [sp, #32] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #48] + mul x11, x2, x5 + umulh x12, x2, x5 + mul x6, x2, x4 + umulh x7, x2, x4 + adds x10, x10, x6 + adcs x11, x11, x7 + mul x6, x3, x4 + umulh x7, x3, x4 + adc x7, x7, xzr + adds x11, x11, x6 + mul x13, x4, x5 + umulh x14, x4, x5 + adcs x12, x12, x7 + mul x6, x3, x5 + umulh x7, x3, x5 + adc x7, x7, xzr + adds x12, x12, x6 + adcs x13, x13, x7 + adc x14, x14, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + cset x7, cs + umulh x6, x2, x2 + mul x8, x2, x2 + adds x9, x9, x6 + mul x6, x3, x3 + adcs x10, x10, x6 + umulh x6, x3, x3 + adcs x11, x11, x6 + mul x6, x4, x4 + adcs x12, x12, x6 + umulh x6, x4, x4 + adcs x13, x13, x6 + mul x6, x5, x5 + adcs x14, x14, x6 + umulh x6, x5, x5 + adc x7, x7, x6 + mov x5, #0xffffffff00000001 + adds x9, x9, x8, lsl #32 + lsr x3, x8, #32 + adcs x10, x10, x3 + mul x2, x8, x5 + umulh x8, x8, x5 + adcs x11, x11, x2 + adc x8, x8, xzr + adds x10, x10, x9, lsl #32 + lsr x3, x9, #32 + adcs x11, x11, x3 + mul x2, x9, x5 + umulh x9, x9, x5 + adcs x8, x8, x2 + adc x9, x9, xzr + adds x11, x11, x10, lsl #32 + lsr x3, x10, #32 + adcs x8, x8, x3 + mul x2, x10, x5 + umulh x10, x10, x5 + adcs x9, x9, x2 + adc x10, x10, xzr + adds x8, x8, x11, lsl #32 + lsr x3, x11, #32 + adcs x9, x9, x3 + mul x2, x11, x5 + umulh x11, x11, x5 + adcs x10, x10, x2 + adc x11, x11, xzr + adds x8, x8, x12 + adcs x9, x9, x13 + adcs x10, x10, x14 + adcs x11, x11, x7 + cset x2, cs + mov x3, #0xffffffff + adds x12, x8, #0x1 + sbcs x13, x9, x3 + sbcs x14, x10, xzr + sbcs x7, x11, x5 + sbcs xzr, x2, xzr + csel x8, x8, x12, cc + csel x9, x9, x13, cc + csel x10, x10, x14, cc + csel x11, x11, x7, cc + stp x8, x9, [sp] + stp x10, x11, [sp, #16] + ldp x3, x4, [sp, #160] + ldp x7, x8, [sp, #96] + mul x12, x3, x7 + umulh x13, x3, x7 + mul x11, x3, x8 + umulh x14, x3, x8 + adds x13, x13, x11 + ldp x9, x10, [sp, #112] + mul x11, x3, x9 + umulh x0, x3, x9 + adcs x14, x14, x11 + mul x11, x3, x10 + umulh x1, x3, x10 + adcs x0, x0, x11 + adc x1, x1, xzr + ldp x5, x6, [sp, #176] + mul x11, x4, x7 + adds x13, x13, x11 + mul x11, x4, x8 + adcs x14, x14, x11 + mul x11, x4, x9 + adcs x0, x0, x11 + mul x11, x4, x10 + adcs x1, x1, x11 + umulh x3, x4, x10 + adc x3, x3, xzr + umulh x11, x4, x7 + adds x14, x14, x11 + umulh x11, x4, x8 + adcs x0, x0, x11 + umulh x11, x4, x9 + adcs x1, x1, x11 + adc x3, x3, xzr + mul x11, x5, x7 + adds x14, x14, x11 + mul x11, x5, x8 + adcs x0, x0, x11 + mul x11, x5, x9 + adcs x1, x1, x11 + mul x11, x5, x10 + adcs x3, x3, x11 + umulh x4, x5, x10 + adc x4, x4, xzr + umulh x11, x5, x7 + adds x0, x0, x11 + umulh x11, x5, x8 + adcs x1, x1, x11 + umulh x11, x5, x9 + adcs x3, x3, x11 + adc x4, x4, xzr + mul x11, x6, x7 + adds x0, x0, x11 + mul x11, x6, x8 + adcs x1, x1, x11 + mul x11, x6, x9 + adcs x3, x3, x11 + mul x11, x6, x10 + adcs x4, x4, x11 + umulh x5, x6, x10 + adc x5, x5, xzr + mov x10, #0xffffffff00000001 + adds x13, x13, x12, lsl #32 + lsr x11, x12, #32 + adcs x14, x14, x11 + mul x11, x12, x10 + umulh x12, x12, x10 + adcs x0, x0, x11 + adc x12, x12, xzr + umulh x11, x6, x7 + adds x1, x1, x11 + umulh x11, x6, x8 + adcs x3, x3, x11 + umulh x11, x6, x9 + adcs x4, x4, x11 + adc x5, x5, xzr + adds x14, x14, x13, lsl #32 + lsr x11, x13, #32 + adcs x0, x0, x11 + mul x11, x13, x10 + umulh x13, x13, x10 + adcs x12, x12, x11 + adc x13, x13, xzr + adds x0, x0, x14, lsl #32 + lsr x11, x14, #32 + adcs x12, x12, x11 + mul x11, x14, x10 + umulh x14, x14, x10 + adcs x13, x13, x11 + adc x14, x14, xzr + adds x12, x12, x0, lsl #32 + lsr x11, x0, #32 + adcs x13, x13, x11 + mul x11, x0, x10 + umulh x0, x0, x10 + adcs x14, x14, x11 + adc x0, x0, xzr + adds x12, x12, x1 + adcs x13, x13, x3 + adcs x14, x14, x4 + adcs x0, x0, x5 + cset x8, cs + mov x11, #0xffffffff + adds x1, x12, #0x1 + sbcs x3, x13, x11 + sbcs x4, x14, xzr + sbcs x5, x0, x10 + sbcs xzr, x8, xzr + csel x12, x12, x1, cc + csel x13, x13, x3, cc + csel x14, x14, x4, cc + csel x0, x0, x5, cc + stp x12, x13, [sp, #96] + stp x14, x0, [sp, #112] + ldp x5, x6, [sp, #64] + ldp x4, x3, [sp, #32] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #80] + ldp x4, x3, [sp, #48] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + csetm x3, cc + adds x5, x5, x3 + and x4, x3, #0xffffffff + adcs x6, x6, x4 + adcs x7, x7, xzr + and x4, x3, #0xffffffff00000001 + adc x8, x8, x4 + stp x5, x6, [x15, #64] + stp x7, x8, [x15, #80] + ldp x1, x2, [sp, #128] + lsl x0, x1, #2 + ldp x6, x7, [sp, #160] + subs x0, x0, x6 + extr x1, x2, x1, #62 + sbcs x1, x1, x7 + ldp x3, x4, [sp, #144] + extr x2, x3, x2, #62 + ldp x6, x7, [sp, #176] + sbcs x2, x2, x6 + extr x3, x4, x3, #62 + sbcs x3, x3, x7 + lsr x4, x4, #62 + sbc x4, x4, xzr + add x5, x4, #0x1 + lsl x8, x5, #32 + negs x6, x8 + ngcs x7, xzr + sbc x8, x8, x5 + adds x0, x0, x5 + adcs x1, x1, x6 + adcs x2, x2, x7 + adcs x3, x3, x8 + csetm x5, cc + adds x0, x0, x5 + and x6, x5, #0xffffffff + adcs x1, x1, x6 + adcs x2, x2, xzr + neg x7, x6 + adc x3, x3, x7 + stp x0, x1, [x15] + stp x2, x3, [x15, #16] + mov x1, #0x8 + mov x2, #0xffffffffffffffff + ldp x9, x10, [sp] + subs x9, x2, x9 + mov x2, #0xffffffff + sbcs x10, x2, x10 + ldp x11, x12, [sp, #16] + ngcs x11, x11 + mov x2, #0xffffffff00000001 + sbc x12, x2, x12 + lsl x3, x9, #3 + extr x4, x10, x9, #61 + extr x5, x11, x10, #61 + extr x6, x12, x11, #61 + lsr x7, x12, #61 + mov x1, #0x3 + ldp x9, x10, [sp, #96] + mul x8, x9, x1 + umulh x9, x9, x1 + adds x3, x3, x8 + mul x8, x10, x1 + umulh x10, x10, x1 + adcs x4, x4, x8 + ldp x11, x12, [sp, #112] + mul x8, x11, x1 + umulh x11, x11, x1 + adcs x5, x5, x8 + mul x8, x12, x1 + umulh x12, x12, x1 + adcs x6, x6, x8 + adc x7, x7, xzr + adds x4, x4, x9 + adcs x5, x5, x10 + adcs x6, x6, x11 + adc x7, x7, x12 + add x8, x7, #0x1 + lsl x10, x8, #32 + adds x6, x6, x10 + adc x7, x7, xzr + neg x9, x8 + sub x10, x10, #0x1 + subs x3, x3, x9 + sbcs x4, x4, x10 + sbcs x5, x5, xzr + sbcs x6, x6, x8 + sbc x8, x7, x8 + adds x3, x3, x8 + and x9, x8, #0xffffffff + adcs x4, x4, x9 + adcs x5, x5, xzr + neg x10, x9 + adc x6, x6, x10 + stp x3, x4, [x15, #32] + stp x5, x6, [x15, #48] + add sp, sp, #0xc0 + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/arm/p384/Makefile b/third_party/s2n-bignum/arm/p384/Makefile index 60687fb7c1..5d64426750 100644 --- a/third_party/s2n-bignum/arm/p384/Makefile +++ b/third_party/s2n-bignum/arm/p384/Makefile @@ -28,11 +28,13 @@ OBJ = bignum_add_p384.o \ bignum_demont_p384.o \ bignum_double_p384.o \ bignum_half_p384.o \ + bignum_inv_p384.o \ bignum_littleendian_6.o \ bignum_mod_n384.o \ bignum_mod_n384_6.o \ bignum_mod_p384.o \ bignum_mod_p384_6.o \ + bignum_montinv_p384.o \ bignum_montmul_p384.o \ bignum_montmul_p384_alt.o \ bignum_montmul_p384_neon.o \ @@ -51,7 +53,9 @@ OBJ = bignum_add_p384.o \ p384_montjdouble.o \ p384_montjdouble_alt.o \ p384_montjmixadd.o \ - p384_montjmixadd_alt.o + p384_montjmixadd_alt.o \ + p384_montjscalarmul.o \ + p384_montjscalarmul_alt.o %.o : %.S ; $(CC) -E -I../../include $< | $(GAS) -o $@ - diff --git a/third_party/s2n-bignum/arm/p384/bignum_inv_p384.S b/third_party/s2n-bignum/arm/p384/bignum_inv_p384.S new file mode 100644 index 0000000000..085224172e --- /dev/null +++ b/third_party/s2n-bignum/arm/p384/bignum_inv_p384.S @@ -0,0 +1,1469 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Modular inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1 +// Input x[6]; output z[6] +// +// extern void bignum_inv_p384(uint64_t z[static 6],uint64_t x[static 6]); +// +// If the 6-digit input x is coprime to p_384, i.e. is not divisible +// by it, returns z < p_384 such that x * z == 1 (mod p_384). Note that +// x does not need to be reduced modulo p_384, but the output always is. +// If the input is divisible (i.e. is 0 or p_384), then there can be no +// modular inverse and z = 0 is returned. +// +// Standard ARM ABI: X0 = z, X1 = x +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_inv_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_inv_p384) + + .text + .balign 4 + +// Size in bytes of a 64-bit word + +#define N 8 + +// Used for the return pointer + +#define res x20 + +// Loop counter and d = 2 * delta value for divstep + +#define i x21 +#define d x22 + +// Registers used for matrix element magnitudes and signs + +#define m00 x10 +#define m01 x11 +#define m10 x12 +#define m11 x13 +#define s00 x14 +#define s01 x15 +#define s10 x16 +#define s11 x17 + +// Initial carries for combinations + +#define car0 x9 +#define car1 x19 + +// Input and output, plain registers treated according to pattern + +#define reg0 x0, #0 +#define reg1 x1, #0 +#define reg2 x2, #0 +#define reg3 x3, #0 +#define reg4 x4, #0 + +#define x x1, #0 +#define z x0, #0 + +// Pointer-offset pairs for temporaries on stack +// The u and v variables are 6 words each as expected, but the f and g +// variables are 8 words each -- they need to have at least one extra +// word for a sign word, and to preserve alignment we "round up" to 8. +// In fact, we currently keep an extra word in u and v as well. + +#define f sp, #0 +#define g sp, #(8*N) +#define u sp, #(16*N) +#define v sp, #(24*N) + +// Total size to reserve on the stack + +#define NSPACE #(32*N) + +// --------------------------------------------------------------------------- +// Core signed almost-Montgomery reduction macro. Takes input in +// [d6;d5;d4;d3;d2;d1;d0] and returns result in [d6;d5d4;d3;d2;d1], adding +// to the existing [d6;d5;d4;d3;d2;d1], and re-using d0 as a temporary +// internally as well as t0, t1, t2. This is almost-Montgomery, i.e. the +// result fits in 6 digits but is not necessarily strictly reduced mod p_384. +// --------------------------------------------------------------------------- + +#define amontred(d6,d5,d4,d3,d2,d1,d0, t3,t2,t1) \ +/* We only know the input is -2^444 < x < 2^444. To do traditional */ \ +/* unsigned Montgomery reduction, start by adding 2^61 * p_384. */ \ + mov t1, #0xe000000000000000; \ + adds d0, d0, t1; \ + mov t2, #0x000000001fffffff; \ + adcs d1, d1, t2; \ + mov t3, #0xffffffffe0000000; \ + bic t3, t3, #0x2000000000000000; \ + adcs d2, d2, t3; \ + sbcs d3, d3, xzr; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ + mov t1, #0x1fffffffffffffff; \ + adc d6, d6, t1; \ +/* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ +/* Store it back into d0 since we no longer need that digit. */ \ + add d0, d0, d0, lsl #32; \ +/* Now let [t3;t2;t1;-] = (2^384 - p_384) * w */ \ +/* We know the lowest word will cancel d0 so we don't need it */ \ + mov t1, #0xffffffff00000001; \ + umulh t1, t1, d0; \ + mov t2, #0x00000000ffffffff; \ + mul t3, t2, d0; \ + umulh t2, t2, d0; \ + adds t1, t1, t3; \ + adcs t2, t2, d0; \ + cset t3, cs; \ +/* Now x + p_384 * w = (x + 2^384 * w) - (2^384 - p_384) * w */ \ +/* We catch the net top carry from add-subtract in the digit d0 */ \ + adds d6, d6, d0; \ + cset d0, cs; \ + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ + sbcs d6, d6, xzr; \ + sbcs d0, d0, xzr; \ +/* Now if d0 is nonzero we subtract p_384 (almost-Montgomery) */ \ + neg d0, d0; \ + and t1, d0, #0x00000000ffffffff; \ + and t2, d0, #0xffffffff00000000; \ + and t3, d0, #0xfffffffffffffffe; \ + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, d0; \ + sbcs d5, d5, d0; \ + sbc d6, d6, d0 + +// Very similar to a subroutine call to the s2n-bignum word_divstep59. +// But different in register usage and returning the final matrix in +// registers as follows +// +// [ m00 m01] +// [ m10 m11] + +#define divstep59() \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x8, x4, #0x100, lsl #12; \ + sbfx x8, x8, #21, #21; \ + mov x11, #0x100000; \ + add x11, x11, x11, lsl #21; \ + add x9, x4, x11; \ + asr x9, x9, #42; \ + add x10, x5, #0x100, lsl #12; \ + sbfx x10, x10, #21, #21; \ + add x11, x5, x11; \ + asr x11, x11, #42; \ + mul x6, x8, x2; \ + mul x7, x9, x3; \ + mul x2, x10, x2; \ + mul x3, x11, x3; \ + add x4, x6, x7; \ + add x5, x2, x3; \ + asr x2, x4, #20; \ + asr x3, x5, #20; \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x12, x4, #0x100, lsl #12; \ + sbfx x12, x12, #21, #21; \ + mov x15, #0x100000; \ + add x15, x15, x15, lsl #21; \ + add x13, x4, x15; \ + asr x13, x13, #42; \ + add x14, x5, #0x100, lsl #12; \ + sbfx x14, x14, #21, #21; \ + add x15, x5, x15; \ + asr x15, x15, #42; \ + mul x6, x12, x2; \ + mul x7, x13, x3; \ + mul x2, x14, x2; \ + mul x3, x15, x3; \ + add x4, x6, x7; \ + add x5, x2, x3; \ + asr x2, x4, #20; \ + asr x3, x5, #20; \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + mul x2, x12, x8; \ + mul x3, x12, x9; \ + mul x6, x14, x8; \ + mul x7, x14, x9; \ + madd x8, x13, x10, x2; \ + madd x9, x13, x11, x3; \ + madd x16, x15, x10, x6; \ + madd x17, x15, x11, x7; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x12, x4, #0x100, lsl #12; \ + sbfx x12, x12, #22, #21; \ + mov x15, #0x100000; \ + add x15, x15, x15, lsl #21; \ + add x13, x4, x15; \ + asr x13, x13, #43; \ + add x14, x5, #0x100, lsl #12; \ + sbfx x14, x14, #22, #21; \ + add x15, x5, x15; \ + asr x15, x15, #43; \ + mneg x2, x12, x8; \ + mneg x3, x12, x9; \ + mneg x4, x14, x8; \ + mneg x5, x14, x9; \ + msub m00, x13, x16, x2; \ + msub m01, x13, x17, x3; \ + msub m10, x15, x16, x4; \ + msub m11, x15, x17, x5 + +S2N_BN_SYMBOL(bignum_inv_p384): + +// Save registers and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, NSPACE + +// Save the return pointer for the end so we can overwrite x0 later + + mov res, x0 + +// Copy the prime and input into the main f and g variables respectively. +// Make sure x is reduced so that g <= f as assumed in the bound proof. + + mov x10, #0x00000000ffffffff + mov x11, #0xffffffff00000000 + mov x12, #0xfffffffffffffffe + mov x15, #0xffffffffffffffff + stp x10, x11, [f] + stp x12, x15, [f+2*N] + stp x15, x15, [f+4*N] + str xzr, [f+6*N] + + ldp x2, x3, [x1] + subs x10, x2, x10 + sbcs x11, x3, x11 + ldp x4, x5, [x1, #(2*N)] + sbcs x12, x4, x12 + sbcs x13, x5, x15 + ldp x6, x7, [x1, #(4*N)] + sbcs x14, x6, x15 + sbcs x15, x7, x15 + + csel x2, x2, x10, cc + csel x3, x3, x11, cc + csel x4, x4, x12, cc + csel x5, x5, x13, cc + csel x6, x6, x14, cc + csel x7, x7, x15, cc + + stp x2, x3, [g] + stp x4, x5, [g+2*N] + stp x6, x7, [g+4*N] + str xzr, [g+6*N] + +// Also maintain reduced < 2^384 vector [u,v] such that +// [f,g] == x * 2^{5*i-75} * [u,v] (mod p_384) +// starting with [p_384,x] == x * 2^{5*0-75} * [0,2^75] (mod p_384) +// The weird-looking 5*i modifications come in because we are doing +// 64-bit word-sized Montgomery reductions at each stage, which is +// 5 bits more than the 59-bit requirement to keep things stable. + + stp xzr, xzr, [u] + stp xzr, xzr, [u+2*N] + stp xzr, xzr, [u+4*N] + + mov x10, #2048 + stp xzr, x10, [v] + stp xzr, xzr, [v+2*N] + stp xzr, xzr, [v+4*N] + +// Start of main loop. We jump into the middle so that the divstep +// portion is common to the special fifteenth iteration after a uniform +// first 14. + + mov i, #15 + mov d, #1 + b midloop + +loop: + +// Separate the matrix elements into sign-magnitude pairs + + cmp m00, xzr + csetm s00, mi + cneg m00, m00, mi + + cmp m01, xzr + csetm s01, mi + cneg m01, m01, mi + + cmp m10, xzr + csetm s10, mi + cneg m10, m10, mi + + cmp m11, xzr + csetm s11, mi + cneg m11, m11, mi + +// Adjust the initial values to allow for complement instead of negation +// This initial offset is the same for [f,g] and [u,v] compositions. +// Save it in stable registers for the [u,v] part and do [f,g] first. + + and x0, m00, s00 + and x1, m01, s01 + add car0, x0, x1 + + and x0, m10, s10 + and x1, m11, s11 + add car1, x0, x1 + +// Now the computation of the updated f and g values. This maintains a +// 2-word carry between stages so we can conveniently insert the shift +// right by 59 before storing back, and not overwrite digits we need +// again of the old f and g values. +// +// Digit 0 of [f,g] + + ldr x7, [f] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [g] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + adc x2, x2, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, car1, x0 + adc x3, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + adc x3, x3, x1 + +// Digit 1 of [f,g] + + ldr x7, [f+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [g+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [f] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [g] + +// Digit 2 of [f,g] + + ldr x7, [f+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [g+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [f+N] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [g+N] + +// Digit 3 of [f,g] + + ldr x7, [f+3*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, xzr, x1 + ldr x8, [g+3*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [f+2*N] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x2, x2, x0 + adc x6, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [g+2*N] + +// Digit 4 of [f,g] + + ldr x7, [f+4*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x3, x3, x0 + adc x4, xzr, x1 + ldr x8, [g+4*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [f+3*N] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x6, x6, x0 + adc x5, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [g+3*N] + +// Digits 5 and 6 of [f,g] + + ldr x7, [f+5*N] + eor x1, x7, s00 + ldr x23, [f+6*N] + eor x2, x23, s00 + and x2, x2, m00 + neg x2, x2 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, x4, x0 + adc x2, x2, x1 + ldr x8, [g+5*N] + eor x1, x8, s01 + ldr x24, [g+6*N] + eor x0, x24, s01 + and x0, x0, m01 + sub x2, x2, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [f+4*N] + extr x4, x2, x4, #59 + str x4, [f+5*N] + asr x2, x2, #59 + str x2, [f+6*N] + + eor x1, x7, s10 + eor x4, x23, s10 + and x4, x4, m10 + neg x4, x4 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, x5, x0 + adc x4, x4, x1 + eor x1, x8, s11 + eor x0, x24, s11 + and x0, x0, m11 + sub x4, x4, x0 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + adc x4, x4, x1 + extr x6, x5, x6, #59 + str x6, [g+4*N] + extr x5, x4, x5, #59 + str x5, [g+5*N] + asr x4, x4, #59 + str x4, [g+6*N] + +// Now the computation of the updated u and v values and their +// Montgomery reductions. A very similar accumulation except that +// the top words of u and v are unsigned and we don't shift. +// +// Digit 0 of [u,v] + + ldr x7, [u] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [v] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u] + adc x2, x2, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, car1, x0 + adc x3, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + str x5, [v] + adc x3, x3, x1 + +// Digit 1 of [u,v] + + ldr x7, [u+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [v+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + str x2, [u+N] + adc x6, x6, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x3, x3, x0 + str x3, [v+N] + adc x4, x4, x1 + +// Digit 2 of [u,v] + + ldr x7, [u+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [v+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + str x6, [u+2*N] + adc x5, x5, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x4, x4, x0 + str x4, [v+2*N] + adc x2, x2, x1 + +// Digit 3 of [u,v] + + ldr x7, [u+3*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, xzr, x1 + ldr x8, [v+3*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + str x5, [u+3*N] + adc x3, x3, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x2, x2, x0 + adc x6, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x2, x2, x0 + str x2, [v+3*N] + adc x6, x6, x1 + +// Digit 4 of [u,v] + + ldr x7, [u+4*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x3, x3, x0 + adc x4, xzr, x1 + ldr x8, [v+4*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x3, x3, x0 + str x3, [u+4*N] + adc x4, x4, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x6, x6, x0 + adc x5, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x6, x6, x0 + str x6, [v+4*N] + adc x5, x5, x1 + +// Digits 5 and 6 of [u,v] (top is unsigned) + + ldr x7, [u+5*N] + eor x1, x7, s00 + and x2, s00, m00 + neg x2, x2 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, x4, x0 + adc x2, x2, x1 + ldr x8, [v+5*N] + eor x1, x8, s01 + and x0, s01, m01 + sub x2, x2, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u+5*N] + adc x2, x2, x1 + str x2, [u+6*N] + + eor x1, x7, s10 + and x4, s10, m10 + neg x4, x4 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, x5, x0 + adc x4, x4, x1 + eor x1, x8, s11 + and x0, s11, m11 + sub x4, x4, x0 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + str x5, [v+5*N] + adc x4, x4, x1 + str x4, [v+6*N] + +// Montgomery reduction of u + + ldp x0, x1, [u] + ldp x2, x3, [u+16] + ldp x4, x5, [u+32] + ldr x6, [u+48] + amontred(x6,x5,x4,x3,x2,x1,x0, x9,x8,x7) + stp x1, x2, [u] + stp x3, x4, [u+16] + stp x5, x6, [u+32] + +// Montgomery reduction of v + + ldp x0, x1, [v] + ldp x2, x3, [v+16] + ldp x4, x5, [v+32] + ldr x6, [v+48] + amontred(x6,x5,x4,x3,x2,x1,x0, x9,x8,x7) + stp x1, x2, [v] + stp x3, x4, [v+16] + stp x5, x6, [v+32] + +midloop: + + mov x1, d + ldr x2, [f] + ldr x3, [g] + divstep59() + mov d, x1 + +// Next iteration + + subs i, i, #1 + bne loop + +// The 15th and last iteration does not need anything except the +// u value and the sign of f; the latter can be obtained from the +// lowest word of f. So it's done differently from the main loop. +// Find the sign of the new f. For this we just need one digit +// since we know (for in-scope cases) that f is either +1 or -1. +// We don't explicitly shift right by 59 either, but looking at +// bit 63 (or any bit >= 60) of the unshifted result is enough +// to distinguish -1 from +1; this is then made into a mask. + + ldr x0, [f] + ldr x1, [g] + mul x0, x0, m00 + madd x1, x1, m01, x0 + asr x0, x1, #63 + +// Now separate out the matrix into sign-magnitude pairs +// and adjust each one based on the sign of f. +// +// Note that at this point we expect |f|=1 and we got its +// sign above, so then since [f,0] == x * [u,v] (mod p_384) +// we want to flip the sign of u according to that of f. + + cmp m00, xzr + csetm s00, mi + cneg m00, m00, mi + eor s00, s00, x0 + + cmp m01, xzr + csetm s01, mi + cneg m01, m01, mi + eor s01, s01, x0 + + cmp m10, xzr + csetm s10, mi + cneg m10, m10, mi + eor s10, s10, x0 + + cmp m11, xzr + csetm s11, mi + cneg m11, m11, mi + eor s11, s11, x0 + +// Adjust the initial value to allow for complement instead of negation + + and x0, m00, s00 + and x1, m01, s01 + add car0, x0, x1 + +// Digit 0 of [u] + + ldr x7, [u] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [v] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u] + adc x2, x2, x1 + +// Digit 1 of [u] + + ldr x7, [u+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [v+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + str x2, [u+N] + adc x6, x6, x1 + +// Digit 2 of [u] + + ldr x7, [u+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [v+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + str x6, [u+2*N] + adc x5, x5, x1 + +// Digit 3 of [u] + + ldr x7, [u+3*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, xzr, x1 + ldr x8, [v+3*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + str x5, [u+3*N] + adc x3, x3, x1 + +// Digit 4 of [u] + + ldr x7, [u+4*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x3, x3, x0 + adc x4, xzr, x1 + ldr x8, [v+4*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x3, x3, x0 + str x3, [u+4*N] + adc x4, x4, x1 + +// Digits 5 and 6 of [u] (top is unsigned) + + ldr x7, [u+5*N] + eor x1, x7, s00 + and x2, s00, m00 + neg x2, x2 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, x4, x0 + adc x2, x2, x1 + ldr x8, [v+5*N] + eor x1, x8, s01 + and x0, s01, m01 + sub x2, x2, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u+5*N] + adc x2, x2, x1 + str x2, [u+6*N] + +// Montgomery reduction of u. This needs to be strict not "almost" +// so it is followed by an optional subtraction of p_384 + + ldp x10, x0, [u] + ldp x1, x2, [u+16] + ldp x3, x4, [u+32] + ldr x5, [u+48] + amontred(x5,x4,x3,x2,x1,x0,x10, x9,x8,x7) + + mov x10, #0x00000000ffffffff + subs x10, x0, x10 + mov x11, #0xffffffff00000000 + sbcs x11, x1, x11 + mov x12, #0xfffffffffffffffe + sbcs x12, x2, x12 + mov x15, #0xffffffffffffffff + sbcs x13, x3, x15 + sbcs x14, x4, x15 + sbcs x15, x5, x15 + + csel x0, x0, x10, cc + csel x1, x1, x11, cc + csel x2, x2, x12, cc + csel x3, x3, x13, cc + csel x4, x4, x14, cc + csel x5, x5, x15, cc + +// Store it back to the final output + + stp x0, x1, [res] + stp x2, x3, [res, #16] + stp x4, x5, [res, #32] + +// Restore stack and registers + + add sp, sp, NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/arm/p384/bignum_montinv_p384.S b/third_party/s2n-bignum/arm/p384/bignum_montinv_p384.S new file mode 100644 index 0000000000..79d5978119 --- /dev/null +++ b/third_party/s2n-bignum/arm/p384/bignum_montinv_p384.S @@ -0,0 +1,1487 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1 +// Input x[6]; output z[6] +// +// extern void bignum_montinv_p384(uint64_t z[static 6],uint64_t x[static 6]); +// +// If the 6-digit input x is coprime to p_384, i.e. is not divisible +// by it, returns z < p_384 such that x * z == 2^768 (mod p_384). This +// is effectively "Montgomery inverse" because if we consider x and z as +// Montgomery forms of X and Z, i.e. x == 2^384 * X and z == 2^384 * Z +// (both mod p_384) then X * Z == 1 (mod p_384). That is, this function +// gives the analog of the modular inverse bignum_inv_p384 but with both +// input and output in the Montgomery domain. Note that x does not need +// to be reduced modulo p_384, but the output always is. If the input +// is divisible (i.e. is 0 or p_384), then there can be no solution to +// the congruence x * z == 2^768 (mod p_384), and z = 0 is returned. +// +// Standard ARM ABI: X0 = z, X1 = x +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montinv_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montinv_p384) + + .text + .balign 4 + +// Size in bytes of a 64-bit word + +#define N 8 + +// Used for the return pointer + +#define res x20 + +// Loop counter and d = 2 * delta value for divstep + +#define i x21 +#define d x22 + +// Registers used for matrix element magnitudes and signs + +#define m00 x10 +#define m01 x11 +#define m10 x12 +#define m11 x13 +#define s00 x14 +#define s01 x15 +#define s10 x16 +#define s11 x17 + +// Initial carries for combinations + +#define car0 x9 +#define car1 x19 + +// Input and output, plain registers treated according to pattern + +#define reg0 x0, #0 +#define reg1 x1, #0 +#define reg2 x2, #0 +#define reg3 x3, #0 +#define reg4 x4, #0 + +#define x x1, #0 +#define z x0, #0 + +// Pointer-offset pairs for temporaries on stack +// The u and v variables are 6 words each as expected, but the f and g +// variables are 8 words each -- they need to have at least one extra +// word for a sign word, and to preserve alignment we "round up" to 8. +// In fact, we currently keep an extra word in u and v as well. + +#define f sp, #0 +#define g sp, #(8*N) +#define u sp, #(16*N) +#define v sp, #(24*N) + +// Total size to reserve on the stack + +#define NSPACE #(32*N) + +// --------------------------------------------------------------------------- +// Core signed almost-Montgomery reduction macro. Takes input in +// [d6;d5;d4;d3;d2;d1;d0] and returns result in [d6;d5d4;d3;d2;d1], adding +// to the existing [d6;d5;d4;d3;d2;d1], and re-using d0 as a temporary +// internally as well as t0, t1, t2. This is almost-Montgomery, i.e. the +// result fits in 6 digits but is not necessarily strictly reduced mod p_384. +// --------------------------------------------------------------------------- + +#define amontred(d6,d5,d4,d3,d2,d1,d0, t3,t2,t1) \ +/* We only know the input is -2^444 < x < 2^444. To do traditional */ \ +/* unsigned Montgomery reduction, start by adding 2^61 * p_384. */ \ + mov t1, #0xe000000000000000; \ + adds d0, d0, t1; \ + mov t2, #0x000000001fffffff; \ + adcs d1, d1, t2; \ + mov t3, #0xffffffffe0000000; \ + bic t3, t3, #0x2000000000000000; \ + adcs d2, d2, t3; \ + sbcs d3, d3, xzr; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ + mov t1, #0x1fffffffffffffff; \ + adc d6, d6, t1; \ +/* Our correction multiplier is w = [d0 + (d0<<32)] mod 2^64 */ \ +/* Store it back into d0 since we no longer need that digit. */ \ + add d0, d0, d0, lsl #32; \ +/* Now let [t3;t2;t1;-] = (2^384 - p_384) * w */ \ +/* We know the lowest word will cancel d0 so we don't need it */ \ + mov t1, #0xffffffff00000001; \ + umulh t1, t1, d0; \ + mov t2, #0x00000000ffffffff; \ + mul t3, t2, d0; \ + umulh t2, t2, d0; \ + adds t1, t1, t3; \ + adcs t2, t2, d0; \ + cset t3, cs; \ +/* Now x + p_384 * w = (x + 2^384 * w) - (2^384 - p_384) * w */ \ +/* We catch the net top carry from add-subtract in the digit d0 */ \ + adds d6, d6, d0; \ + cset d0, cs; \ + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, xzr; \ + sbcs d5, d5, xzr; \ + sbcs d6, d6, xzr; \ + sbcs d0, d0, xzr; \ +/* Now if d0 is nonzero we subtract p_384 (almost-Montgomery) */ \ + neg d0, d0; \ + and t1, d0, #0x00000000ffffffff; \ + and t2, d0, #0xffffffff00000000; \ + and t3, d0, #0xfffffffffffffffe; \ + subs d1, d1, t1; \ + sbcs d2, d2, t2; \ + sbcs d3, d3, t3; \ + sbcs d4, d4, d0; \ + sbcs d5, d5, d0; \ + sbc d6, d6, d0 + +// Very similar to a subroutine call to the s2n-bignum word_divstep59. +// But different in register usage and returning the final matrix in +// registers as follows +// +// [ m00 m01] +// [ m10 m11] + +#define divstep59() \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x8, x4, #0x100, lsl #12; \ + sbfx x8, x8, #21, #21; \ + mov x11, #0x100000; \ + add x11, x11, x11, lsl #21; \ + add x9, x4, x11; \ + asr x9, x9, #42; \ + add x10, x5, #0x100, lsl #12; \ + sbfx x10, x10, #21, #21; \ + add x11, x5, x11; \ + asr x11, x11, #42; \ + mul x6, x8, x2; \ + mul x7, x9, x3; \ + mul x2, x10, x2; \ + mul x3, x11, x3; \ + add x4, x6, x7; \ + add x5, x2, x3; \ + asr x2, x4, #20; \ + asr x3, x5, #20; \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x12, x4, #0x100, lsl #12; \ + sbfx x12, x12, #21, #21; \ + mov x15, #0x100000; \ + add x15, x15, x15, lsl #21; \ + add x13, x4, x15; \ + asr x13, x13, #42; \ + add x14, x5, #0x100, lsl #12; \ + sbfx x14, x14, #21, #21; \ + add x15, x5, x15; \ + asr x15, x15, #42; \ + mul x6, x12, x2; \ + mul x7, x13, x3; \ + mul x2, x14, x2; \ + mul x3, x15, x3; \ + add x4, x6, x7; \ + add x5, x2, x3; \ + asr x2, x4, #20; \ + asr x3, x5, #20; \ + and x4, x2, #0xfffff; \ + orr x4, x4, #0xfffffe0000000000; \ + and x5, x3, #0xfffff; \ + orr x5, x5, #0xc000000000000000; \ + tst x5, #0x1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + mul x2, x12, x8; \ + mul x3, x12, x9; \ + mul x6, x14, x8; \ + mul x7, x14, x9; \ + madd x8, x13, x10, x2; \ + madd x9, x13, x11, x3; \ + madd x16, x15, x10, x6; \ + madd x17, x15, x11, x7; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + tst x5, #0x2; \ + asr x5, x5, #1; \ + csel x6, x4, xzr, ne; \ + ccmp x1, xzr, #0x8, ne; \ + cneg x1, x1, ge; \ + cneg x6, x6, ge; \ + csel x4, x5, x4, ge; \ + add x5, x5, x6; \ + add x1, x1, #0x2; \ + asr x5, x5, #1; \ + add x12, x4, #0x100, lsl #12; \ + sbfx x12, x12, #22, #21; \ + mov x15, #0x100000; \ + add x15, x15, x15, lsl #21; \ + add x13, x4, x15; \ + asr x13, x13, #43; \ + add x14, x5, #0x100, lsl #12; \ + sbfx x14, x14, #22, #21; \ + add x15, x5, x15; \ + asr x15, x15, #43; \ + mneg x2, x12, x8; \ + mneg x3, x12, x9; \ + mneg x4, x14, x8; \ + mneg x5, x14, x9; \ + msub m00, x13, x16, x2; \ + msub m01, x13, x17, x3; \ + msub m10, x15, x16, x4; \ + msub m11, x15, x17, x5 + +S2N_BN_SYMBOL(bignum_montinv_p384): + +// Save registers and make room for temporaries + + stp x19, x20, [sp, -16]! + stp x21, x22, [sp, -16]! + stp x23, x24, [sp, -16]! + sub sp, sp, NSPACE + +// Save the return pointer for the end so we can overwrite x0 later + + mov res, x0 + +// Copy the prime and input into the main f and g variables respectively. +// Make sure x is reduced so that g <= f as assumed in the bound proof. + + mov x10, #0x00000000ffffffff + mov x11, #0xffffffff00000000 + mov x12, #0xfffffffffffffffe + mov x15, #0xffffffffffffffff + stp x10, x11, [f] + stp x12, x15, [f+2*N] + stp x15, x15, [f+4*N] + str xzr, [f+6*N] + + ldp x2, x3, [x1] + subs x10, x2, x10 + sbcs x11, x3, x11 + ldp x4, x5, [x1, #(2*N)] + sbcs x12, x4, x12 + sbcs x13, x5, x15 + ldp x6, x7, [x1, #(4*N)] + sbcs x14, x6, x15 + sbcs x15, x7, x15 + + csel x2, x2, x10, cc + csel x3, x3, x11, cc + csel x4, x4, x12, cc + csel x5, x5, x13, cc + csel x6, x6, x14, cc + csel x7, x7, x15, cc + + stp x2, x3, [g] + stp x4, x5, [g+2*N] + stp x6, x7, [g+4*N] + str xzr, [g+6*N] + +// Also maintain reduced < 2^384 vector [u,v] such that +// [f,g] == x * 2^{5*i-843} * [u,v] (mod p_384) +// starting with [p_384,x] == x * 2^{5*0-843} * [0,2^843] (mod p_384) +// The weird-looking 5*i modifications come in because we are doing +// 64-bit word-sized Montgomery reductions at each stage, which is +// 5 bits more than the 59-bit requirement to keep things stable. +// After the 15th and last iteration and sign adjustment, when +// f == 1 for in-scope cases, we have x * 2^{75-843} * u == 1, i.e. +// x * u == 2^768 as required. + + stp xzr, xzr, [u] + stp xzr, xzr, [u+2*N] + stp xzr, xzr, [u+4*N] + +// The starting constant 2^843 mod p_384 is +// 0x0000000000000800:00001000000007ff:fffff00000000000 +// :00001000000007ff:fffff00000000800:0000000000000000 +// where colons separate 64-bit subwords, least significant at the right. +// Not all of these are single loads on ARM so this is a bit dynamic + + mov x12, #0xfffff00000000000 + orr x10, x12, #0x0000000000000800 + stp xzr, x10, [v] + mov x11, #0x00000000000007ff + orr x11, x11, #0x0000100000000000 + stp x11, x12, [v+2*N] + mov x12, #0x0000000000000800 + stp x11, x12, [v+4*N] + +// Start of main loop. We jump into the middle so that the divstep +// portion is common to the special fifteenth iteration after a uniform +// first 14. + + mov i, #15 + mov d, #1 + b bignum_montinv_p384_midloop + +bignum_montinv_p384_loop: + +// Separate the matrix elements into sign-magnitude pairs + + cmp m00, xzr + csetm s00, mi + cneg m00, m00, mi + + cmp m01, xzr + csetm s01, mi + cneg m01, m01, mi + + cmp m10, xzr + csetm s10, mi + cneg m10, m10, mi + + cmp m11, xzr + csetm s11, mi + cneg m11, m11, mi + +// Adjust the initial values to allow for complement instead of negation +// This initial offset is the same for [f,g] and [u,v] compositions. +// Save it in stable registers for the [u,v] part and do [f,g] first. + + and x0, m00, s00 + and x1, m01, s01 + add car0, x0, x1 + + and x0, m10, s10 + and x1, m11, s11 + add car1, x0, x1 + +// Now the computation of the updated f and g values. This maintains a +// 2-word carry between stages so we can conveniently insert the shift +// right by 59 before storing back, and not overwrite digits we need +// again of the old f and g values. +// +// Digit 0 of [f,g] + + ldr x7, [f] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [g] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + adc x2, x2, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, car1, x0 + adc x3, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + adc x3, x3, x1 + +// Digit 1 of [f,g] + + ldr x7, [f+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [g+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [f] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [g] + +// Digit 2 of [f,g] + + ldr x7, [f+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [g+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [f+N] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [g+N] + +// Digit 3 of [f,g] + + ldr x7, [f+3*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, xzr, x1 + ldr x8, [g+3*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + adc x3, x3, x1 + extr x6, x5, x6, #59 + str x6, [f+2*N] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x2, x2, x0 + adc x6, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x2, x2, x0 + adc x6, x6, x1 + extr x4, x2, x4, #59 + str x4, [g+2*N] + +// Digit 4 of [f,g] + + ldr x7, [f+4*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x3, x3, x0 + adc x4, xzr, x1 + ldr x8, [g+4*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x3, x3, x0 + adc x4, x4, x1 + extr x5, x3, x5, #59 + str x5, [f+3*N] + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x6, x6, x0 + adc x5, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x6, x6, x0 + adc x5, x5, x1 + extr x2, x6, x2, #59 + str x2, [g+3*N] + +// Digits 5 and 6 of [f,g] + + ldr x7, [f+5*N] + eor x1, x7, s00 + ldr x23, [f+6*N] + eor x2, x23, s00 + and x2, x2, m00 + neg x2, x2 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, x4, x0 + adc x2, x2, x1 + ldr x8, [g+5*N] + eor x1, x8, s01 + ldr x24, [g+6*N] + eor x0, x24, s01 + and x0, x0, m01 + sub x2, x2, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + adc x2, x2, x1 + extr x3, x4, x3, #59 + str x3, [f+4*N] + extr x4, x2, x4, #59 + str x4, [f+5*N] + asr x2, x2, #59 + str x2, [f+6*N] + + eor x1, x7, s10 + eor x4, x23, s10 + and x4, x4, m10 + neg x4, x4 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, x5, x0 + adc x4, x4, x1 + eor x1, x8, s11 + eor x0, x24, s11 + and x0, x0, m11 + sub x4, x4, x0 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + adc x4, x4, x1 + extr x6, x5, x6, #59 + str x6, [g+4*N] + extr x5, x4, x5, #59 + str x5, [g+5*N] + asr x4, x4, #59 + str x4, [g+6*N] + +// Now the computation of the updated u and v values and their +// Montgomery reductions. A very similar accumulation except that +// the top words of u and v are unsigned and we don't shift. +// +// Digit 0 of [u,v] + + ldr x7, [u] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [v] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u] + adc x2, x2, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, car1, x0 + adc x3, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + str x5, [v] + adc x3, x3, x1 + +// Digit 1 of [u,v] + + ldr x7, [u+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [v+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + str x2, [u+N] + adc x6, x6, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x3, x3, x0 + adc x4, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x3, x3, x0 + str x3, [v+N] + adc x4, x4, x1 + +// Digit 2 of [u,v] + + ldr x7, [u+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [v+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + str x6, [u+2*N] + adc x5, x5, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x4, x4, x0 + adc x2, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x4, x4, x0 + str x4, [v+2*N] + adc x2, x2, x1 + +// Digit 3 of [u,v] + + ldr x7, [u+3*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, xzr, x1 + ldr x8, [v+3*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + str x5, [u+3*N] + adc x3, x3, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x2, x2, x0 + adc x6, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x2, x2, x0 + str x2, [v+3*N] + adc x6, x6, x1 + +// Digit 4 of [u,v] + + ldr x7, [u+4*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x3, x3, x0 + adc x4, xzr, x1 + ldr x8, [v+4*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x3, x3, x0 + str x3, [u+4*N] + adc x4, x4, x1 + + eor x1, x7, s10 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x6, x6, x0 + adc x5, xzr, x1 + eor x1, x8, s11 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x6, x6, x0 + str x6, [v+4*N] + adc x5, x5, x1 + +// Digits 5 and 6 of [u,v] (top is unsigned) + + ldr x7, [u+5*N] + eor x1, x7, s00 + and x2, s00, m00 + neg x2, x2 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, x4, x0 + adc x2, x2, x1 + ldr x8, [v+5*N] + eor x1, x8, s01 + and x0, s01, m01 + sub x2, x2, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u+5*N] + adc x2, x2, x1 + str x2, [u+6*N] + + eor x1, x7, s10 + and x4, s10, m10 + neg x4, x4 + mul x0, x1, m10 + umulh x1, x1, m10 + adds x5, x5, x0 + adc x4, x4, x1 + eor x1, x8, s11 + and x0, s11, m11 + sub x4, x4, x0 + mul x0, x1, m11 + umulh x1, x1, m11 + adds x5, x5, x0 + str x5, [v+5*N] + adc x4, x4, x1 + str x4, [v+6*N] + +// Montgomery reduction of u + + ldp x0, x1, [u] + ldp x2, x3, [u+16] + ldp x4, x5, [u+32] + ldr x6, [u+48] + amontred(x6,x5,x4,x3,x2,x1,x0, x9,x8,x7) + stp x1, x2, [u] + stp x3, x4, [u+16] + stp x5, x6, [u+32] + +// Montgomery reduction of v + + ldp x0, x1, [v] + ldp x2, x3, [v+16] + ldp x4, x5, [v+32] + ldr x6, [v+48] + amontred(x6,x5,x4,x3,x2,x1,x0, x9,x8,x7) + stp x1, x2, [v] + stp x3, x4, [v+16] + stp x5, x6, [v+32] + +bignum_montinv_p384_midloop: + + mov x1, d + ldr x2, [f] + ldr x3, [g] + divstep59() + mov d, x1 + +// Next iteration + + subs i, i, #1 + bne bignum_montinv_p384_loop + +// The 15th and last iteration does not need anything except the +// u value and the sign of f; the latter can be obtained from the +// lowest word of f. So it's done differently from the main loop. +// Find the sign of the new f. For this we just need one digit +// since we know (for in-scope cases) that f is either +1 or -1. +// We don't explicitly shift right by 59 either, but looking at +// bit 63 (or any bit >= 60) of the unshifted result is enough +// to distinguish -1 from +1; this is then made into a mask. + + ldr x0, [f] + ldr x1, [g] + mul x0, x0, m00 + madd x1, x1, m01, x0 + asr x0, x1, #63 + +// Now separate out the matrix into sign-magnitude pairs +// and adjust each one based on the sign of f. +// +// Note that at this point we expect |f|=1 and we got its +// sign above, so then since [f,0] == x * 2^{-768} [u,v] (mod p_384) +// we want to flip the sign of u according to that of f. + + cmp m00, xzr + csetm s00, mi + cneg m00, m00, mi + eor s00, s00, x0 + + cmp m01, xzr + csetm s01, mi + cneg m01, m01, mi + eor s01, s01, x0 + + cmp m10, xzr + csetm s10, mi + cneg m10, m10, mi + eor s10, s10, x0 + + cmp m11, xzr + csetm s11, mi + cneg m11, m11, mi + eor s11, s11, x0 + +// Adjust the initial value to allow for complement instead of negation + + and x0, m00, s00 + and x1, m01, s01 + add car0, x0, x1 + +// Digit 0 of [u] + + ldr x7, [u] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, car0, x0 + adc x2, xzr, x1 + ldr x8, [v] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u] + adc x2, x2, x1 + +// Digit 1 of [u] + + ldr x7, [u+N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x2, x2, x0 + adc x6, xzr, x1 + ldr x8, [v+N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x2, x2, x0 + str x2, [u+N] + adc x6, x6, x1 + +// Digit 2 of [u] + + ldr x7, [u+2*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x6, x6, x0 + adc x5, xzr, x1 + ldr x8, [v+2*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x6, x6, x0 + str x6, [u+2*N] + adc x5, x5, x1 + +// Digit 3 of [u] + + ldr x7, [u+3*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x5, x5, x0 + adc x3, xzr, x1 + ldr x8, [v+3*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x5, x5, x0 + str x5, [u+3*N] + adc x3, x3, x1 + +// Digit 4 of [u] + + ldr x7, [u+4*N] + eor x1, x7, s00 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x3, x3, x0 + adc x4, xzr, x1 + ldr x8, [v+4*N] + eor x1, x8, s01 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x3, x3, x0 + str x3, [u+4*N] + adc x4, x4, x1 + +// Digits 5 and 6 of [u] (top is unsigned) + + ldr x7, [u+5*N] + eor x1, x7, s00 + and x2, s00, m00 + neg x2, x2 + mul x0, x1, m00 + umulh x1, x1, m00 + adds x4, x4, x0 + adc x2, x2, x1 + ldr x8, [v+5*N] + eor x1, x8, s01 + and x0, s01, m01 + sub x2, x2, x0 + mul x0, x1, m01 + umulh x1, x1, m01 + adds x4, x4, x0 + str x4, [u+5*N] + adc x2, x2, x1 + str x2, [u+6*N] + +// Montgomery reduction of u. This needs to be strict not "almost" +// so it is followed by an optional subtraction of p_384 + + ldp x10, x0, [u] + ldp x1, x2, [u+16] + ldp x3, x4, [u+32] + ldr x5, [u+48] + amontred(x5,x4,x3,x2,x1,x0,x10, x9,x8,x7) + + mov x10, #0x00000000ffffffff + subs x10, x0, x10 + mov x11, #0xffffffff00000000 + sbcs x11, x1, x11 + mov x12, #0xfffffffffffffffe + sbcs x12, x2, x12 + mov x15, #0xffffffffffffffff + sbcs x13, x3, x15 + sbcs x14, x4, x15 + sbcs x15, x5, x15 + + csel x0, x0, x10, cc + csel x1, x1, x11, cc + csel x2, x2, x12, cc + csel x3, x3, x13, cc + csel x4, x4, x14, cc + csel x5, x5, x15, cc + +// Store it back to the final output + + stp x0, x1, [res] + stp x2, x3, [res, #16] + stp x4, x5, [res, #32] + +// Restore stack and registers + + add sp, sp, NSPACE + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/arm/p384/p384_montjadd.S b/third_party/s2n-bignum/arm/p384/p384_montjadd.S index 3b65363162..3604313273 100644 --- a/third_party/s2n-bignum/arm/p384/p384_montjadd.S +++ b/third_party/s2n-bignum/arm/p384/p384_montjadd.S @@ -15,6 +15,14 @@ // ---------------------------------------------------------------------------- #include "_internal_s2n_bignum.h" +// This is functionally equivalent to p384_montjadd in unopt/p384_montjadd.S. +// This is the result of doing the following sequence of optimizations: +// 1. Function inlining +// 2. Eliminating redundant load/store instructions +// 3. Folding (add addr, const) + load/store +// Function inlining is done manually. The second and third optimizations are +// done by a script. + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjadd) S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjadd) .text @@ -24,731 +32,8 @@ #define NUMSIZE 48 -// Stable homes for input arguments during main code sequence - -#define input_z x24 -#define input_x x25 -#define input_y x26 - -// Pointer-offset pairs for inputs and outputs - -#define x_1 input_x, #0 -#define y_1 input_x, #NUMSIZE -#define z_1 input_x, #(2*NUMSIZE) - -#define x_2 input_y, #0 -#define y_2 input_y, #NUMSIZE -#define z_2 input_y, #(2*NUMSIZE) - -#define x_3 input_z, #0 -#define y_3 input_z, #NUMSIZE -#define z_3 input_z, #(2*NUMSIZE) - -// Pointer-offset pairs for temporaries, with some aliasing -// NSPACE is the total stack needed for these temporaries - -#define z1sq sp, #(NUMSIZE*0) -#define ww sp, #(NUMSIZE*0) -#define resx sp, #(NUMSIZE*0) - -#define yd sp, #(NUMSIZE*1) -#define y2a sp, #(NUMSIZE*1) - -#define x2a sp, #(NUMSIZE*2) -#define zzx2 sp, #(NUMSIZE*2) - -#define zz sp, #(NUMSIZE*3) -#define t1 sp, #(NUMSIZE*3) - -#define t2 sp, #(NUMSIZE*4) -#define x1a sp, #(NUMSIZE*4) -#define zzx1 sp, #(NUMSIZE*4) -#define resy sp, #(NUMSIZE*4) - -#define xd sp, #(NUMSIZE*5) -#define z2sq sp, #(NUMSIZE*5) -#define resz sp, #(NUMSIZE*5) - -#define y1a sp, #(NUMSIZE*6) - #define NSPACE (NUMSIZE*7) -// Corresponds to bignum_montmul_p384 except x24 -> x0 - -#define montmul_p384(P0,P1,P2) \ - ldp x3, x4, [P1]; \ - ldp x5, x6, [P1+16]; \ - ldp x7, x8, [P1+32]; \ - ldp x9, x10, [P2]; \ - ldp x11, x12, [P2+16]; \ - ldp x13, x14, [P2+32]; \ - mul x15, x3, x9; \ - mul x21, x4, x10; \ - mul x22, x5, x11; \ - umulh x23, x3, x9; \ - umulh x0, x4, x10; \ - umulh x1, x5, x11; \ - adds x23, x23, x21; \ - adcs x0, x0, x22; \ - adc x1, x1, xzr; \ - adds x16, x23, x15; \ - adcs x17, x0, x23; \ - adcs x19, x1, x0; \ - adc x20, x1, xzr; \ - adds x17, x17, x15; \ - adcs x19, x19, x23; \ - adcs x20, x20, x0; \ - adc x1, x1, xzr; \ - subs x0, x3, x4; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x10, x9; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x16, x16, x21; \ - adcs x17, x17, x22; \ - adcs x19, x19, x23; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x0, x3, x5; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x11, x9; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x17, x17, x21; \ - adcs x19, x19, x22; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x0, x4, x5; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x11, x10; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x19, x19, x21; \ - adcs x20, x20, x22; \ - adc x1, x1, x23; \ - lsl x23, x15, #32; \ - add x15, x23, x15; \ - lsr x23, x15, #32; \ - subs x23, x23, x15; \ - sbc x22, x15, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x15; \ - adc x21, xzr, xzr; \ - subs x16, x16, x23; \ - sbcs x17, x17, x22; \ - sbcs x19, x19, x21; \ - sbcs x20, x20, xzr; \ - sbcs x1, x1, xzr; \ - sbc x15, x15, xzr; \ - lsl x23, x16, #32; \ - add x16, x23, x16; \ - lsr x23, x16, #32; \ - subs x23, x23, x16; \ - sbc x22, x16, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x16; \ - adc x21, xzr, xzr; \ - subs x17, x17, x23; \ - sbcs x19, x19, x22; \ - sbcs x20, x20, x21; \ - sbcs x1, x1, xzr; \ - sbcs x15, x15, xzr; \ - sbc x16, x16, xzr; \ - lsl x23, x17, #32; \ - add x17, x23, x17; \ - lsr x23, x17, #32; \ - subs x23, x23, x17; \ - sbc x22, x17, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x17; \ - adc x21, xzr, xzr; \ - subs x19, x19, x23; \ - sbcs x20, x20, x22; \ - sbcs x1, x1, x21; \ - sbcs x15, x15, xzr; \ - sbcs x16, x16, xzr; \ - sbc x17, x17, xzr; \ - stp x19, x20, [P0]; \ - stp x1, x15, [P0+16]; \ - stp x16, x17, [P0+32]; \ - mul x15, x6, x12; \ - mul x21, x7, x13; \ - mul x22, x8, x14; \ - umulh x23, x6, x12; \ - umulh x0, x7, x13; \ - umulh x1, x8, x14; \ - adds x23, x23, x21; \ - adcs x0, x0, x22; \ - adc x1, x1, xzr; \ - adds x16, x23, x15; \ - adcs x17, x0, x23; \ - adcs x19, x1, x0; \ - adc x20, x1, xzr; \ - adds x17, x17, x15; \ - adcs x19, x19, x23; \ - adcs x20, x20, x0; \ - adc x1, x1, xzr; \ - subs x0, x6, x7; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x13, x12; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x16, x16, x21; \ - adcs x17, x17, x22; \ - adcs x19, x19, x23; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x0, x6, x8; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x14, x12; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x17, x17, x21; \ - adcs x19, x19, x22; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x0, x7, x8; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x14, x13; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x19, x19, x21; \ - adcs x20, x20, x22; \ - adc x1, x1, x23; \ - subs x6, x6, x3; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x5; \ - ngc x3, xzr; \ - cmn x3, #1; \ - eor x6, x6, x3; \ - adcs x6, x6, xzr; \ - eor x7, x7, x3; \ - adcs x7, x7, xzr; \ - eor x8, x8, x3; \ - adc x8, x8, xzr; \ - subs x9, x9, x12; \ - sbcs x10, x10, x13; \ - sbcs x11, x11, x14; \ - ngc x14, xzr; \ - cmn x14, #1; \ - eor x9, x9, x14; \ - adcs x9, x9, xzr; \ - eor x10, x10, x14; \ - adcs x10, x10, xzr; \ - eor x11, x11, x14; \ - adc x11, x11, xzr; \ - eor x14, x3, x14; \ - ldp x21, x22, [P0]; \ - adds x15, x15, x21; \ - adcs x16, x16, x22; \ - ldp x21, x22, [P0+16]; \ - adcs x17, x17, x21; \ - adcs x19, x19, x22; \ - ldp x21, x22, [P0+32]; \ - adcs x20, x20, x21; \ - adcs x1, x1, x22; \ - adc x2, xzr, xzr; \ - stp x15, x16, [P0]; \ - stp x17, x19, [P0+16]; \ - stp x20, x1, [P0+32]; \ - mul x15, x6, x9; \ - mul x21, x7, x10; \ - mul x22, x8, x11; \ - umulh x23, x6, x9; \ - umulh x0, x7, x10; \ - umulh x1, x8, x11; \ - adds x23, x23, x21; \ - adcs x0, x0, x22; \ - adc x1, x1, xzr; \ - adds x16, x23, x15; \ - adcs x17, x0, x23; \ - adcs x19, x1, x0; \ - adc x20, x1, xzr; \ - adds x17, x17, x15; \ - adcs x19, x19, x23; \ - adcs x20, x20, x0; \ - adc x1, x1, xzr; \ - subs x0, x6, x7; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x10, x9; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x16, x16, x21; \ - adcs x17, x17, x22; \ - adcs x19, x19, x23; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x0, x6, x8; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x11, x9; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x17, x17, x21; \ - adcs x19, x19, x22; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x0, x7, x8; \ - cneg x0, x0, lo; \ - csetm x23, lo; \ - subs x22, x11, x10; \ - cneg x22, x22, lo; \ - mul x21, x0, x22; \ - umulh x22, x0, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x19, x19, x21; \ - adcs x20, x20, x22; \ - adc x1, x1, x23; \ - ldp x3, x4, [P0]; \ - ldp x5, x6, [P0+16]; \ - ldp x7, x8, [P0+32]; \ - cmn x14, #1; \ - eor x15, x15, x14; \ - adcs x15, x15, x3; \ - eor x16, x16, x14; \ - adcs x16, x16, x4; \ - eor x17, x17, x14; \ - adcs x17, x17, x5; \ - eor x19, x19, x14; \ - adcs x19, x19, x6; \ - eor x20, x20, x14; \ - adcs x20, x20, x7; \ - eor x1, x1, x14; \ - adcs x1, x1, x8; \ - adcs x9, x14, x2; \ - adcs x10, x14, xzr; \ - adcs x11, x14, xzr; \ - adc x12, x14, xzr; \ - adds x19, x19, x3; \ - adcs x20, x20, x4; \ - adcs x1, x1, x5; \ - adcs x9, x9, x6; \ - adcs x10, x10, x7; \ - adcs x11, x11, x8; \ - adc x12, x12, x2; \ - lsl x23, x15, #32; \ - add x15, x23, x15; \ - lsr x23, x15, #32; \ - subs x23, x23, x15; \ - sbc x22, x15, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x15; \ - adc x21, xzr, xzr; \ - subs x16, x16, x23; \ - sbcs x17, x17, x22; \ - sbcs x19, x19, x21; \ - sbcs x20, x20, xzr; \ - sbcs x1, x1, xzr; \ - sbc x15, x15, xzr; \ - lsl x23, x16, #32; \ - add x16, x23, x16; \ - lsr x23, x16, #32; \ - subs x23, x23, x16; \ - sbc x22, x16, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x16; \ - adc x21, xzr, xzr; \ - subs x17, x17, x23; \ - sbcs x19, x19, x22; \ - sbcs x20, x20, x21; \ - sbcs x1, x1, xzr; \ - sbcs x15, x15, xzr; \ - sbc x16, x16, xzr; \ - lsl x23, x17, #32; \ - add x17, x23, x17; \ - lsr x23, x17, #32; \ - subs x23, x23, x17; \ - sbc x22, x17, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x17; \ - adc x21, xzr, xzr; \ - subs x19, x19, x23; \ - sbcs x20, x20, x22; \ - sbcs x1, x1, x21; \ - sbcs x15, x15, xzr; \ - sbcs x16, x16, xzr; \ - sbc x17, x17, xzr; \ - adds x9, x9, x15; \ - adcs x10, x10, x16; \ - adcs x11, x11, x17; \ - adc x12, x12, xzr; \ - add x22, x12, #1; \ - lsl x21, x22, #32; \ - subs x0, x22, x21; \ - sbc x21, x21, xzr; \ - adds x19, x19, x0; \ - adcs x20, x20, x21; \ - adcs x1, x1, x22; \ - adcs x9, x9, xzr; \ - adcs x10, x10, xzr; \ - adcs x11, x11, xzr; \ - csetm x22, lo; \ - mov x23, #4294967295; \ - and x23, x23, x22; \ - adds x19, x19, x23; \ - eor x23, x23, x22; \ - adcs x20, x20, x23; \ - mov x23, #-2; \ - and x23, x23, x22; \ - adcs x1, x1, x23; \ - adcs x9, x9, x22; \ - adcs x10, x10, x22; \ - adc x11, x11, x22; \ - stp x19, x20, [P0]; \ - stp x1, x9, [P0+16]; \ - stp x10, x11, [P0+32] - -// Corresponds exactly to bignum_montsqr_p384 - -#define montsqr_p384(P0,P1) \ - ldp x2, x3, [P1]; \ - ldp x4, x5, [P1+16]; \ - ldp x6, x7, [P1+32]; \ - mul x14, x2, x3; \ - mul x15, x2, x4; \ - mul x16, x3, x4; \ - mul x8, x2, x2; \ - mul x10, x3, x3; \ - mul x12, x4, x4; \ - umulh x17, x2, x3; \ - adds x15, x15, x17; \ - umulh x17, x2, x4; \ - adcs x16, x16, x17; \ - umulh x17, x3, x4; \ - adcs x17, x17, xzr; \ - umulh x9, x2, x2; \ - umulh x11, x3, x3; \ - umulh x13, x4, x4; \ - adds x14, x14, x14; \ - adcs x15, x15, x15; \ - adcs x16, x16, x16; \ - adcs x17, x17, x17; \ - adc x13, x13, xzr; \ - adds x9, x9, x14; \ - adcs x10, x10, x15; \ - adcs x11, x11, x16; \ - adcs x12, x12, x17; \ - adc x13, x13, xzr; \ - lsl x16, x8, #32; \ - add x8, x16, x8; \ - lsr x16, x8, #32; \ - subs x16, x16, x8; \ - sbc x15, x8, xzr; \ - extr x16, x15, x16, #32; \ - lsr x15, x15, #32; \ - adds x15, x15, x8; \ - adc x14, xzr, xzr; \ - subs x9, x9, x16; \ - sbcs x10, x10, x15; \ - sbcs x11, x11, x14; \ - sbcs x12, x12, xzr; \ - sbcs x13, x13, xzr; \ - sbc x8, x8, xzr; \ - lsl x16, x9, #32; \ - add x9, x16, x9; \ - lsr x16, x9, #32; \ - subs x16, x16, x9; \ - sbc x15, x9, xzr; \ - extr x16, x15, x16, #32; \ - lsr x15, x15, #32; \ - adds x15, x15, x9; \ - adc x14, xzr, xzr; \ - subs x10, x10, x16; \ - sbcs x11, x11, x15; \ - sbcs x12, x12, x14; \ - sbcs x13, x13, xzr; \ - sbcs x8, x8, xzr; \ - sbc x9, x9, xzr; \ - lsl x16, x10, #32; \ - add x10, x16, x10; \ - lsr x16, x10, #32; \ - subs x16, x16, x10; \ - sbc x15, x10, xzr; \ - extr x16, x15, x16, #32; \ - lsr x15, x15, #32; \ - adds x15, x15, x10; \ - adc x14, xzr, xzr; \ - subs x11, x11, x16; \ - sbcs x12, x12, x15; \ - sbcs x13, x13, x14; \ - sbcs x8, x8, xzr; \ - sbcs x9, x9, xzr; \ - sbc x10, x10, xzr; \ - stp x11, x12, [P0]; \ - stp x13, x8, [P0+16]; \ - stp x9, x10, [P0+32]; \ - mul x8, x2, x5; \ - mul x14, x3, x6; \ - mul x15, x4, x7; \ - umulh x16, x2, x5; \ - umulh x17, x3, x6; \ - umulh x1, x4, x7; \ - adds x16, x16, x14; \ - adcs x17, x17, x15; \ - adc x1, x1, xzr; \ - adds x9, x16, x8; \ - adcs x10, x17, x16; \ - adcs x11, x1, x17; \ - adc x12, x1, xzr; \ - adds x10, x10, x8; \ - adcs x11, x11, x16; \ - adcs x12, x12, x17; \ - adc x13, x1, xzr; \ - subs x17, x2, x3; \ - cneg x17, x17, lo; \ - csetm x14, lo; \ - subs x15, x6, x5; \ - cneg x15, x15, lo; \ - mul x16, x17, x15; \ - umulh x15, x17, x15; \ - cinv x14, x14, lo; \ - eor x16, x16, x14; \ - eor x15, x15, x14; \ - cmn x14, #1; \ - adcs x9, x9, x16; \ - adcs x10, x10, x15; \ - adcs x11, x11, x14; \ - adcs x12, x12, x14; \ - adc x13, x13, x14; \ - subs x17, x2, x4; \ - cneg x17, x17, lo; \ - csetm x14, lo; \ - subs x15, x7, x5; \ - cneg x15, x15, lo; \ - mul x16, x17, x15; \ - umulh x15, x17, x15; \ - cinv x14, x14, lo; \ - eor x16, x16, x14; \ - eor x15, x15, x14; \ - cmn x14, #1; \ - adcs x10, x10, x16; \ - adcs x11, x11, x15; \ - adcs x12, x12, x14; \ - adc x13, x13, x14; \ - subs x17, x3, x4; \ - cneg x17, x17, lo; \ - csetm x14, lo; \ - subs x15, x7, x6; \ - cneg x15, x15, lo; \ - mul x16, x17, x15; \ - umulh x15, x17, x15; \ - cinv x14, x14, lo; \ - eor x16, x16, x14; \ - eor x15, x15, x14; \ - cmn x14, #1; \ - adcs x11, x11, x16; \ - adcs x12, x12, x15; \ - adc x13, x13, x14; \ - adds x8, x8, x8; \ - adcs x9, x9, x9; \ - adcs x10, x10, x10; \ - adcs x11, x11, x11; \ - adcs x12, x12, x12; \ - adcs x13, x13, x13; \ - adc x17, xzr, xzr; \ - ldp x2, x3, [P0]; \ - adds x8, x8, x2; \ - adcs x9, x9, x3; \ - ldp x2, x3, [P0+16]; \ - adcs x10, x10, x2; \ - adcs x11, x11, x3; \ - ldp x2, x3, [P0+32]; \ - adcs x12, x12, x2; \ - adcs x13, x13, x3; \ - adc x17, x17, xzr; \ - lsl x4, x8, #32; \ - add x8, x4, x8; \ - lsr x4, x8, #32; \ - subs x4, x4, x8; \ - sbc x3, x8, xzr; \ - extr x4, x3, x4, #32; \ - lsr x3, x3, #32; \ - adds x3, x3, x8; \ - adc x2, xzr, xzr; \ - subs x9, x9, x4; \ - sbcs x10, x10, x3; \ - sbcs x11, x11, x2; \ - sbcs x12, x12, xzr; \ - sbcs x13, x13, xzr; \ - sbc x8, x8, xzr; \ - lsl x4, x9, #32; \ - add x9, x4, x9; \ - lsr x4, x9, #32; \ - subs x4, x4, x9; \ - sbc x3, x9, xzr; \ - extr x4, x3, x4, #32; \ - lsr x3, x3, #32; \ - adds x3, x3, x9; \ - adc x2, xzr, xzr; \ - subs x10, x10, x4; \ - sbcs x11, x11, x3; \ - sbcs x12, x12, x2; \ - sbcs x13, x13, xzr; \ - sbcs x8, x8, xzr; \ - sbc x9, x9, xzr; \ - lsl x4, x10, #32; \ - add x10, x4, x10; \ - lsr x4, x10, #32; \ - subs x4, x4, x10; \ - sbc x3, x10, xzr; \ - extr x4, x3, x4, #32; \ - lsr x3, x3, #32; \ - adds x3, x3, x10; \ - adc x2, xzr, xzr; \ - subs x11, x11, x4; \ - sbcs x12, x12, x3; \ - sbcs x13, x13, x2; \ - sbcs x8, x8, xzr; \ - sbcs x9, x9, xzr; \ - sbc x10, x10, xzr; \ - adds x17, x17, x8; \ - adcs x8, x9, xzr; \ - adcs x9, x10, xzr; \ - adcs x10, xzr, xzr; \ - mul x1, x5, x5; \ - adds x11, x11, x1; \ - mul x14, x6, x6; \ - mul x15, x7, x7; \ - umulh x1, x5, x5; \ - adcs x12, x12, x1; \ - umulh x1, x6, x6; \ - adcs x13, x13, x14; \ - adcs x17, x17, x1; \ - umulh x1, x7, x7; \ - adcs x8, x8, x15; \ - adcs x9, x9, x1; \ - adc x10, x10, xzr; \ - mul x1, x5, x6; \ - mul x14, x5, x7; \ - mul x15, x6, x7; \ - umulh x16, x5, x6; \ - adds x14, x14, x16; \ - umulh x16, x5, x7; \ - adcs x15, x15, x16; \ - umulh x16, x6, x7; \ - adc x16, x16, xzr; \ - adds x1, x1, x1; \ - adcs x14, x14, x14; \ - adcs x15, x15, x15; \ - adcs x16, x16, x16; \ - adc x5, xzr, xzr; \ - adds x12, x12, x1; \ - adcs x13, x13, x14; \ - adcs x17, x17, x15; \ - adcs x8, x8, x16; \ - adcs x9, x9, x5; \ - adc x10, x10, xzr; \ - mov x1, #-4294967295; \ - mov x14, #4294967295; \ - mov x15, #1; \ - cmn x11, x1; \ - adcs xzr, x12, x14; \ - adcs xzr, x13, x15; \ - adcs xzr, x17, xzr; \ - adcs xzr, x8, xzr; \ - adcs xzr, x9, xzr; \ - adc x10, x10, xzr; \ - neg x10, x10; \ - and x1, x1, x10; \ - adds x11, x11, x1; \ - and x14, x14, x10; \ - adcs x12, x12, x14; \ - and x15, x15, x10; \ - adcs x13, x13, x15; \ - adcs x17, x17, xzr; \ - adcs x8, x8, xzr; \ - adc x9, x9, xzr; \ - stp x11, x12, [P0]; \ - stp x13, x17, [P0+16]; \ - stp x8, x9, [P0+32] - -// Corresponds exactly to bignum_sub_p384 - -#define sub_p384(P0,P1,P2) \ - ldp x5, x6, [P1]; \ - ldp x4, x3, [P2]; \ - subs x5, x5, x4; \ - sbcs x6, x6, x3; \ - ldp x7, x8, [P1+16]; \ - ldp x4, x3, [P2+16]; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x3; \ - ldp x9, x10, [P1+32]; \ - ldp x4, x3, [P2+32]; \ - sbcs x9, x9, x4; \ - sbcs x10, x10, x3; \ - csetm x3, lo; \ - mov x4, #4294967295; \ - and x4, x4, x3; \ - adds x5, x5, x4; \ - eor x4, x4, x3; \ - adcs x6, x6, x4; \ - mov x4, #-2; \ - and x4, x4, x3; \ - adcs x7, x7, x4; \ - adcs x8, x8, x3; \ - adcs x9, x9, x3; \ - adc x10, x10, x3; \ - stp x5, x6, [P0]; \ - stp x7, x8, [P0+16]; \ - stp x9, x10, [P0+32] - S2N_BN_SYMBOL(p384_montjadd): // Save regs and make room on stack for temporary variables @@ -757,165 +42,6385 @@ S2N_BN_SYMBOL(p384_montjadd): stp x21, x22, [sp, #-16]! stp x23, x24, [sp, #-16]! stp x25, x26, [sp, #-16]! + stp x27, xzr, [sp, #-16]! sub sp, sp, NSPACE -// Move the input arguments to stable places - - mov input_z, x0 - mov input_x, x1 - mov input_y, x2 - -// Main code, just a sequence of basic field operations -// 8 * multiply + 3 * square + 7 * subtract - - montsqr_p384(z1sq,z_1) - montsqr_p384(z2sq,z_2) - - montmul_p384(y1a,z_2,y_1) - montmul_p384(y2a,z_1,y_2) - - montmul_p384(x2a,z1sq,x_2) - montmul_p384(x1a,z2sq,x_1) - montmul_p384(y2a,z1sq,y2a) - montmul_p384(y1a,z2sq,y1a) - - sub_p384(xd,x2a,x1a) - sub_p384(yd,y2a,y1a) - - montsqr_p384(zz,xd) - montsqr_p384(ww,yd) - - montmul_p384(zzx1,zz,x1a) - montmul_p384(zzx2,zz,x2a) - - sub_p384(resx,ww,zzx1) - sub_p384(t1,zzx2,zzx1) - - montmul_p384(xd,xd,z_1) - - sub_p384(resx,resx,zzx2) - - sub_p384(t2,zzx1,resx) - - montmul_p384(t1,t1,y1a) - montmul_p384(resz,xd,z_2) - montmul_p384(t2,yd,t2) - - sub_p384(resy,t2,t1) - -// Load in the z coordinates of the inputs to check for P1 = 0 and P2 = 0 -// The condition codes get set by a comparison (P2 != 0) - (P1 != 0) -// So "HI" <=> CF /\ ~ZF <=> P1 = 0 /\ ~(P2 = 0) -// and "LO" <=> ~CF <=> ~(P1 = 0) /\ P2 = 0 - - ldp x0, x1, [z_1] - ldp x2, x3, [z_1+16] - ldp x4, x5, [z_1+32] - - orr x20, x0, x1 - orr x21, x2, x3 - orr x22, x4, x5 - orr x20, x20, x21 - orr x20, x20, x22 - cmp x20, xzr - cset x20, ne - - ldp x6, x7, [z_2] - ldp x8, x9, [z_2+16] - ldp x10, x11, [z_2+32] - - orr x21, x6, x7 - orr x22, x8, x9 - orr x23, x10, x11 - orr x21, x21, x22 - orr x21, x21, x23 - cmp x21, xzr - cset x21, ne - - cmp x21, x20 - -// Multiplex the outputs accordingly, re-using the z's in registers - - ldp x12, x13, [resz] - csel x12, x0, x12, lo - csel x13, x1, x13, lo - csel x12, x6, x12, hi - csel x13, x7, x13, hi - ldp x14, x15, [resz+16] - csel x14, x2, x14, lo - csel x15, x3, x15, lo - csel x14, x8, x14, hi - csel x15, x9, x15, hi - ldp x16, x17, [resz+32] - csel x16, x4, x16, lo - csel x17, x5, x17, lo - csel x16, x10, x16, hi - csel x17, x11, x17, hi - - ldp x20, x21, [x_1] - ldp x0, x1, [resx] - csel x0, x20, x0, lo - csel x1, x21, x1, lo - ldp x20, x21, [x_2] - csel x0, x20, x0, hi - csel x1, x21, x1, hi - - ldp x20, x21, [x_1+16] - ldp x2, x3, [resx+16] - csel x2, x20, x2, lo - csel x3, x21, x3, lo - ldp x20, x21, [x_2+16] - csel x2, x20, x2, hi - csel x3, x21, x3, hi - - ldp x20, x21, [x_1+32] - ldp x4, x5, [resx+32] - csel x4, x20, x4, lo - csel x5, x21, x5, lo - ldp x20, x21, [x_2+32] - csel x4, x20, x4, hi - csel x5, x21, x5, hi - - ldp x20, x21, [y_1] - ldp x6, x7, [resy] - csel x6, x20, x6, lo - csel x7, x21, x7, lo - ldp x20, x21, [y_2] - csel x6, x20, x6, hi - csel x7, x21, x7, hi - - ldp x20, x21, [y_1+16] - ldp x8, x9, [resy+16] - csel x8, x20, x8, lo - csel x9, x21, x9, lo - ldp x20, x21, [y_2+16] - csel x8, x20, x8, hi - csel x9, x21, x9, hi - - ldp x20, x21, [y_1+32] - ldp x10, x11, [resy+32] - csel x10, x20, x10, lo - csel x11, x21, x11, lo - ldp x20, x21, [y_2+32] - csel x10, x20, x10, hi - csel x11, x21, x11, hi - -// Finally store back the multiplexed values - - stp x0, x1, [x_3] - stp x2, x3, [x_3+16] - stp x4, x5, [x_3+32] - stp x6, x7, [y_3] - stp x8, x9, [y_3+16] - stp x10, x11, [y_3+32] - stp x12, x13, [z_3] - stp x14, x15, [z_3+16] - stp x16, x17, [z_3+32] + mov x24, x0 + mov x25, x1 + mov x26, x2 + mov x0, sp + ldr q1, [x25, #96] + ldp x9, x2, [x25, #96] + ldr q0, [x25, #96] + ldp x4, x6, [x25, #112] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [x25, #128] + xtn v30.2s, v0.2d + ldr q1, [x25, #128] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [x25, #128] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [x0, #32] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] + adc x17, x14, xzr + stp x2, x17, [x0, #32] + ldr q1, [x26, #96] + ldp x9, x2, [x26, #96] + ldr q0, [x26, #96] + ldp x4, x6, [x26, #112] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [x26, #128] + xtn v30.2s, v0.2d + ldr q1, [x26, #128] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [x26, #128] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #240] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [sp, #256] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [sp, #272] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [sp, #256] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #240] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #272] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #240] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #256] + adc x17, x14, xzr + stp x2, x17, [sp, #272] + stp x23, x24, [sp, #-48] + ldr q3, [x26, #96] + ldr q25, [x25, #48] + ldp x13, x23, [x25, #48] + ldp x3, x21, [x26, #96] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x25, #80] + ldp x8, x24, [x26, #112] + subs x6, x3, x21 + ldr q0, [x26, #128] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [x25, #64] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x25, #80] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x26, #128] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #288] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #304] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #320] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #288] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #304] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #320] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #288] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #304] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #320] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #288] + ldp x21, x12, [sp, #304] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #320] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #288] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #304] + adc x12, x15, x23 + stp x21, x12, [sp, #320] + ldr q3, [x25, #96] + ldr q25, [x26, #48] + ldp x13, x23, [x26, #48] + ldp x3, x21, [x25, #96] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x26, #80] + ldp x8, x24, [x25, #112] + subs x6, x3, x21 + ldr q0, [x25, #128] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [x26, #64] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x26, #80] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x25, #128] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #48] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #64] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #80] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #48] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #64] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #80] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #48] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #64] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #80] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #48] + ldp x21, x12, [sp, #64] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #80] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #48] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #64] + adc x12, x15, x23 + stp x21, x12, [sp, #80] + mov x1, sp + ldr q3, [x1] + ldr q25, [x26, #0] + ldp x13, x23, [x26, #0] + ldp x3, x21, [x1] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x26, #32] + ldp x8, x24, [x1, #16] + subs x6, x3, x21 + ldr q0, [x1, #32] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [x26, #16] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x26, #32] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x1, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #96] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #112] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #128] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #96] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #112] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #128] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #96] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #112] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #128] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #96] + ldp x21, x12, [sp, #112] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #128] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #96] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #112] + adc x12, x15, x23 + stp x21, x12, [sp, #128] + ldr q3, [sp, #240] + ldr q25, [x25, #0] + ldp x13, x23, [x25, #0] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x25, #32] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [x25, #16] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x25, #32] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #192] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #208] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #224] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #192] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #208] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #224] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #192] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #208] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #224] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #192] + ldp x21, x12, [sp, #208] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #224] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #192] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #208] + adc x12, x15, x23 + stp x21, x12, [sp, #224] + mov x1, sp + ldr q3, [x1] + ldr q25, [sp, #48] + ldp x13, x23, [sp, #48] + ldp x3, x21, [x1] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #80] + ldp x8, x24, [x1, #16] + subs x6, x3, x21 + ldr q0, [x1, #32] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #64] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #80] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x1, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #48] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #64] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #80] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #48] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #64] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #80] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #48] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #64] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #80] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #48] + ldp x21, x12, [sp, #64] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #80] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #48] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #64] + adc x12, x15, x23 + stp x21, x12, [sp, #80] + ldr q3, [sp, #240] + ldr q25, [sp, #288] + ldp x13, x23, [sp, #288] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #320] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #304] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #320] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #288] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #304] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #320] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #288] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #304] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #320] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #288] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #304] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #320] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #288] + ldp x21, x12, [sp, #304] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #320] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x2, x24, x11 + stp x22, x5, [sp, #288] + adcs x11, x13, x23 + adcs x12, x8, x23 + stp x2, x11, [sp, #304] + adc x13, x15, x23 + stp x12, x13, [sp, #320] + ldp x5, x6, [sp, #96] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #112] + ldp x4, x3, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #128] + ldp x4, x3, [sp, #224] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldp x5, x6, [sp, #48] + ldp x4, x3, [sp, #288] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #64] + sbcs x7, x7, x2 + sbcs x8, x8, x11 + ldp x9, x10, [sp, #80] + sbcs x9, x9, x12 + sbcs x10, x10, x13 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #48] + stp x7, x8, [sp, #64] + stp x9, x10, [sp, #80] + ldr q1, [sp, #240] + ldp x9, x2, [sp, #240] + ldr q0, [sp, #240] + ldp x4, x6, [sp, #256] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #272] + xtn v30.2s, v0.2d + ldr q1, [sp, #272] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #272] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #144] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [sp, #160] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [sp, #176] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [sp, #160] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #144] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #176] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #144] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #160] + adc x17, x14, xzr + stp x2, x17, [sp, #176] + mov x0, sp + ldr q1, [sp, #48] + ldp x9, x2, [sp, #48] + ldr q0, [sp, #48] + ldp x4, x6, [sp, #64] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #80] + xtn v30.2s, v0.2d + ldr q1, [sp, #80] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #80] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [x0, #32] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] + adc x17, x14, xzr + stp x2, x17, [x0, #32] + ldr q3, [sp, #144] + ldr q25, [sp, #192] + ldp x13, x23, [sp, #192] + ldp x3, x21, [sp, #144] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #224] + ldp x8, x24, [sp, #160] + subs x6, x3, x21 + ldr q0, [sp, #176] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #208] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #224] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #176] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #192] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #208] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #224] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #192] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #208] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #224] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #192] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #208] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #224] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #192] + ldp x21, x12, [sp, #208] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #224] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #192] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #208] + adc x12, x15, x23 + stp x21, x12, [sp, #224] + ldr q3, [sp, #144] + ldr q25, [sp, #96] + ldp x13, x23, [sp, #96] + ldp x3, x21, [sp, #144] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #128] + ldp x8, x24, [sp, #160] + subs x6, x3, x21 + ldr q0, [sp, #176] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #112] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #128] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #176] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #96] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #112] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #128] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #96] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #112] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #128] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #96] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #112] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #128] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #96] + ldp x21, x12, [sp, #112] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #128] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x2, x24, x11 + stp x22, x5, [sp, #96] + adcs x11, x13, x23 + adcs x12, x8, x23 + stp x2, x11, [sp, #112] + adc x13, x15, x23 + stp x12, x13, [sp, #128] + mov x0, sp + mov x1, sp + ldp x5, x6, [x1] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x1, #16] + ldp x4, x3, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x1, #32] + ldp x4, x3, [sp, #224] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [x0] + stp x7, x8, [x0, #16] + stp x9, x10, [x0, #32] + ldp x5, x6, [sp, #96] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x4, x3, [sp, #208] + sbcs x7, x2, x4 + sbcs x8, x11, x3 + ldp x4, x3, [sp, #224] + sbcs x9, x12, x4 + sbcs x10, x13, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #144] + stp x7, x8, [sp, #160] + stp x9, x10, [sp, #176] + ldr q3, [sp, #240] + ldr q25, [x25, #96] + ldp x13, x23, [x25, #96] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x25, #128] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [x25, #112] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x25, #128] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #240] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #256] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #272] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #240] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #256] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #272] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #240] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #256] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #272] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #240] + ldp x21, x12, [sp, #256] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #272] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #240] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #256] + adc x12, x15, x23 + stp x21, x12, [sp, #272] + mov x0, sp + mov x1, sp + ldp x5, x6, [x1] + ldp x4, x3, [sp, #96] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x1, #16] + ldp x4, x3, [sp, #112] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x1, #32] + ldp x4, x3, [sp, #128] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x2, x5, x4 + eor x4, x4, x3 + adcs x11, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x4, x7, x4 + adcs x12, x8, x3 + adcs x13, x9, x3 + adc x3, x10, x3 + stp x2, x11, [x0] + stp x4, x12, [x0, #16] + stp x13, x3, [x0, #32] + ldp x5, x6, [sp, #192] + subs x5, x5, x2 + sbcs x6, x6, x11 + ldp x7, x8, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x12 + ldp x9, x10, [sp, #224] + sbcs x9, x9, x13 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #192] + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldr q3, [sp, #144] + ldr q25, [sp, #288] + ldp x13, x23, [sp, #288] + ldp x3, x21, [sp, #144] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #320] + ldp x8, x24, [sp, #160] + subs x6, x3, x21 + ldr q0, [sp, #176] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #304] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #320] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #176] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #144] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #160] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #176] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #144] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #160] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #176] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #144] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #160] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #176] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #144] + ldp x21, x12, [sp, #160] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #176] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #144] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #160] + adc x12, x15, x23 + stp x21, x12, [sp, #176] + ldr q3, [sp, #240] + ldr q25, [x26, #96] + ldp x13, x23, [x26, #96] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x26, #128] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [x26, #112] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x26, #128] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #240] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #256] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #272] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #240] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #256] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #272] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #240] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #256] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #272] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #240] + ldp x21, x12, [sp, #256] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #272] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #240] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #256] + adc x12, x15, x23 + stp x21, x12, [sp, #272] + ldp x2, x27, [sp, #-48] + ldr q3, [sp, #48] + ldr q25, [sp, #192] + ldp x13, x23, [sp, #192] + ldp x3, x21, [sp, #48] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #224] + ldp x8, x24, [sp, #64] + subs x6, x3, x21 + ldr q0, [sp, #80] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #208] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #224] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #80] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #192] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #208] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #224] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #192] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #208] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #224] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #192] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #208] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #224] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #192] + ldp x21, x12, [sp, #208] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #224] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x2, x6, x20 + eor x3, x20, x23 + adcs x6, x7, x3 + adcs x7, x24, x11 + adcs x9, x13, x23 + adcs x10, x8, x23 + adc x11, x15, x23 + ldp x4, x3, [sp, #144] + subs x5, x2, x4 + sbcs x6, x6, x3 + ldp x4, x3, [sp, #160] + sbcs x7, x7, x4 + sbcs x8, x9, x3 + ldp x4, x3, [sp, #176] + sbcs x9, x10, x4 + sbcs x10, x11, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x19, x5, x4 + eor x4, x4, x3 + adcs x24, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldp x0, x1, [x25, #96] + ldp x2, x3, [x25, #112] + ldp x4, x5, [x25, #128] + orr x20, x0, x1 + orr x21, x2, x3 + orr x22, x4, x5 + orr x20, x20, x21 + orr x20, x20, x22 + cmp x20, xzr + cset x20, ne + ldp x6, x7, [x26, #96] + ldp x8, x9, [x26, #112] + ldp x10, x11, [x26, #128] + orr x21, x6, x7 + orr x22, x8, x9 + orr x23, x10, x11 + orr x21, x21, x22 + orr x21, x21, x23 + cmp x21, xzr + cset x21, ne + cmp x21, x20 + ldp x12, x13, [sp, #240] + csel x12, x0, x12, cc + csel x13, x1, x13, cc + csel x12, x6, x12, hi + csel x13, x7, x13, hi + ldp x14, x15, [sp, #256] + csel x14, x2, x14, cc + csel x15, x3, x15, cc + csel x14, x8, x14, hi + csel x15, x9, x15, hi + ldp x16, x17, [sp, #272] + csel x16, x4, x16, cc + csel x17, x5, x17, cc + csel x16, x10, x16, hi + csel x17, x11, x17, hi + ldp x20, x21, [x25] + ldp x0, x1, [sp, #0] + csel x0, x20, x0, cc + csel x1, x21, x1, cc + ldp x20, x21, [x26] + csel x0, x20, x0, hi + csel x1, x21, x1, hi + ldp x20, x21, [x25, #16] + ldp x2, x3, [sp, #16] + csel x2, x20, x2, cc + csel x3, x21, x3, cc + ldp x20, x21, [x26, #16] + csel x2, x20, x2, hi + csel x3, x21, x3, hi + ldp x20, x21, [x25, #32] + ldp x4, x5, [sp, #32] + csel x4, x20, x4, cc + csel x5, x21, x5, cc + ldp x20, x21, [x26, #32] + csel x4, x20, x4, hi + csel x5, x21, x5, hi + ldp x20, x21, [x25, #48] + csel x6, x20, x19, cc + csel x7, x21, x24, cc + ldp x20, x21, [x26, #48] + csel x6, x20, x6, hi + csel x7, x21, x7, hi + ldp x20, x21, [x25, #64] + ldp x8, x9, [sp, #208] + csel x8, x20, x8, cc + csel x9, x21, x9, cc + ldp x20, x21, [x26, #64] + csel x8, x20, x8, hi + csel x9, x21, x9, hi + ldp x20, x21, [x25, #80] + ldp x10, x11, [sp, #224] + csel x10, x20, x10, cc + csel x11, x21, x11, cc + ldp x20, x21, [x26, #80] + csel x10, x20, x10, hi + csel x11, x21, x11, hi + stp x0, x1, [x27] + stp x2, x3, [x27, #16] + stp x4, x5, [x27, #32] + stp x6, x7, [x27, #48] + stp x8, x9, [x27, #64] + stp x10, x11, [x27, #80] + stp x12, x13, [x27, #96] + stp x14, x15, [x27, #112] + stp x16, x17, [x27, #128] // Restore stack and registers add sp, sp, NSPACE + ldp x27, xzr, [sp], 16 ldp x25, x26, [sp], 16 ldp x23, x24, [sp], 16 ldp x21, x22, [sp], 16 diff --git a/third_party/s2n-bignum/arm/p384/p384_montjdouble.S b/third_party/s2n-bignum/arm/p384/p384_montjdouble.S index 3f92103cad..3a3b109d37 100644 --- a/third_party/s2n-bignum/arm/p384/p384_montjdouble.S +++ b/third_party/s2n-bignum/arm/p384/p384_montjdouble.S @@ -15,6 +15,14 @@ // ---------------------------------------------------------------------------- #include "_internal_s2n_bignum.h" +// This is functionally equivalent to p384_montjdouble in unopt/p384_montjdouble.S. +// This is the result of doing the following sequence of optimizations: +// 1. Function inlining +// 2. Eliminating redundant load/store instructions +// 3. Folding (add addr, const) + load/store +// Function inlining is done manually. The second and third optimizations are +// done by a script. + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjdouble) S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjdouble) .text @@ -24,1069 +32,3105 @@ #define NUMSIZE 48 -// Stable homes for input arguments during main code sequence - -#define input_z x25 -#define input_x x26 - -// Pointer-offset pairs for inputs and outputs - -#define x_1 input_x, #0 -#define y_1 input_x, #NUMSIZE -#define z_1 input_x, #(2*NUMSIZE) - -#define x_3 input_z, #0 -#define y_3 input_z, #NUMSIZE -#define z_3 input_z, #(2*NUMSIZE) - -// Pointer-offset pairs for temporaries, with some aliasing -// NSPACE is the total stack needed for these temporaries - -#define z2 sp, #(NUMSIZE*0) -#define y2 sp, #(NUMSIZE*1) -#define x2p sp, #(NUMSIZE*2) -#define xy2 sp, #(NUMSIZE*3) - -#define y4 sp, #(NUMSIZE*4) -#define t2 sp, #(NUMSIZE*4) - -#define dx2 sp, #(NUMSIZE*5) -#define t1 sp, #(NUMSIZE*5) - -#define d sp, #(NUMSIZE*6) -#define x4p sp, #(NUMSIZE*6) - #define NSPACE #(NUMSIZE*7) -// Corresponds exactly to bignum_montmul_p384 - -#define montmul_p384(P0,P1,P2) \ - ldp x3, x4, [P1]; \ - ldp x5, x6, [P1+16]; \ - ldp x7, x8, [P1+32]; \ - ldp x9, x10, [P2]; \ - ldp x11, x12, [P2+16]; \ - ldp x13, x14, [P2+32]; \ - mul x15, x3, x9; \ - mul x21, x4, x10; \ - mul x22, x5, x11; \ - umulh x23, x3, x9; \ - umulh x24, x4, x10; \ - umulh x1, x5, x11; \ - adds x23, x23, x21; \ - adcs x24, x24, x22; \ - adc x1, x1, xzr; \ - adds x16, x23, x15; \ - adcs x17, x24, x23; \ - adcs x19, x1, x24; \ - adc x20, x1, xzr; \ - adds x17, x17, x15; \ - adcs x19, x19, x23; \ - adcs x20, x20, x24; \ - adc x1, x1, xzr; \ - subs x24, x3, x4; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x10, x9; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x16, x16, x21; \ - adcs x17, x17, x22; \ - adcs x19, x19, x23; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x24, x3, x5; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x11, x9; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x17, x17, x21; \ - adcs x19, x19, x22; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x24, x4, x5; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x11, x10; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x19, x19, x21; \ - adcs x20, x20, x22; \ - adc x1, x1, x23; \ - lsl x23, x15, #32; \ - add x15, x23, x15; \ - lsr x23, x15, #32; \ - subs x23, x23, x15; \ - sbc x22, x15, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x15; \ - adc x21, xzr, xzr; \ - subs x16, x16, x23; \ - sbcs x17, x17, x22; \ - sbcs x19, x19, x21; \ - sbcs x20, x20, xzr; \ - sbcs x1, x1, xzr; \ - sbc x15, x15, xzr; \ - lsl x23, x16, #32; \ - add x16, x23, x16; \ - lsr x23, x16, #32; \ - subs x23, x23, x16; \ - sbc x22, x16, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x16; \ - adc x21, xzr, xzr; \ - subs x17, x17, x23; \ - sbcs x19, x19, x22; \ - sbcs x20, x20, x21; \ - sbcs x1, x1, xzr; \ - sbcs x15, x15, xzr; \ - sbc x16, x16, xzr; \ - lsl x23, x17, #32; \ - add x17, x23, x17; \ - lsr x23, x17, #32; \ - subs x23, x23, x17; \ - sbc x22, x17, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x17; \ - adc x21, xzr, xzr; \ - subs x19, x19, x23; \ - sbcs x20, x20, x22; \ - sbcs x1, x1, x21; \ - sbcs x15, x15, xzr; \ - sbcs x16, x16, xzr; \ - sbc x17, x17, xzr; \ - stp x19, x20, [P0]; \ - stp x1, x15, [P0+16]; \ - stp x16, x17, [P0+32]; \ - mul x15, x6, x12; \ - mul x21, x7, x13; \ - mul x22, x8, x14; \ - umulh x23, x6, x12; \ - umulh x24, x7, x13; \ - umulh x1, x8, x14; \ - adds x23, x23, x21; \ - adcs x24, x24, x22; \ - adc x1, x1, xzr; \ - adds x16, x23, x15; \ - adcs x17, x24, x23; \ - adcs x19, x1, x24; \ - adc x20, x1, xzr; \ - adds x17, x17, x15; \ - adcs x19, x19, x23; \ - adcs x20, x20, x24; \ - adc x1, x1, xzr; \ - subs x24, x6, x7; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x13, x12; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x16, x16, x21; \ - adcs x17, x17, x22; \ - adcs x19, x19, x23; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x24, x6, x8; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x14, x12; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x17, x17, x21; \ - adcs x19, x19, x22; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x24, x7, x8; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x14, x13; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x19, x19, x21; \ - adcs x20, x20, x22; \ - adc x1, x1, x23; \ - subs x6, x6, x3; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x5; \ - ngc x3, xzr; \ - cmn x3, #1; \ - eor x6, x6, x3; \ - adcs x6, x6, xzr; \ - eor x7, x7, x3; \ - adcs x7, x7, xzr; \ - eor x8, x8, x3; \ - adc x8, x8, xzr; \ - subs x9, x9, x12; \ - sbcs x10, x10, x13; \ - sbcs x11, x11, x14; \ - ngc x14, xzr; \ - cmn x14, #1; \ - eor x9, x9, x14; \ - adcs x9, x9, xzr; \ - eor x10, x10, x14; \ - adcs x10, x10, xzr; \ - eor x11, x11, x14; \ - adc x11, x11, xzr; \ - eor x14, x3, x14; \ - ldp x21, x22, [P0]; \ - adds x15, x15, x21; \ - adcs x16, x16, x22; \ - ldp x21, x22, [P0+16]; \ - adcs x17, x17, x21; \ - adcs x19, x19, x22; \ - ldp x21, x22, [P0+32]; \ - adcs x20, x20, x21; \ - adcs x1, x1, x22; \ - adc x2, xzr, xzr; \ - stp x15, x16, [P0]; \ - stp x17, x19, [P0+16]; \ - stp x20, x1, [P0+32]; \ - mul x15, x6, x9; \ - mul x21, x7, x10; \ - mul x22, x8, x11; \ - umulh x23, x6, x9; \ - umulh x24, x7, x10; \ - umulh x1, x8, x11; \ - adds x23, x23, x21; \ - adcs x24, x24, x22; \ - adc x1, x1, xzr; \ - adds x16, x23, x15; \ - adcs x17, x24, x23; \ - adcs x19, x1, x24; \ - adc x20, x1, xzr; \ - adds x17, x17, x15; \ - adcs x19, x19, x23; \ - adcs x20, x20, x24; \ - adc x1, x1, xzr; \ - subs x24, x6, x7; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x10, x9; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x16, x16, x21; \ - adcs x17, x17, x22; \ - adcs x19, x19, x23; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x24, x6, x8; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x11, x9; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x17, x17, x21; \ - adcs x19, x19, x22; \ - adcs x20, x20, x23; \ - adc x1, x1, x23; \ - subs x24, x7, x8; \ - cneg x24, x24, lo; \ - csetm x23, lo; \ - subs x22, x11, x10; \ - cneg x22, x22, lo; \ - mul x21, x24, x22; \ - umulh x22, x24, x22; \ - cinv x23, x23, lo; \ - eor x21, x21, x23; \ - eor x22, x22, x23; \ - cmn x23, #1; \ - adcs x19, x19, x21; \ - adcs x20, x20, x22; \ - adc x1, x1, x23; \ - ldp x3, x4, [P0]; \ - ldp x5, x6, [P0+16]; \ - ldp x7, x8, [P0+32]; \ - cmn x14, #1; \ - eor x15, x15, x14; \ - adcs x15, x15, x3; \ - eor x16, x16, x14; \ - adcs x16, x16, x4; \ - eor x17, x17, x14; \ - adcs x17, x17, x5; \ - eor x19, x19, x14; \ - adcs x19, x19, x6; \ - eor x20, x20, x14; \ - adcs x20, x20, x7; \ - eor x1, x1, x14; \ - adcs x1, x1, x8; \ - adcs x9, x14, x2; \ - adcs x10, x14, xzr; \ - adcs x11, x14, xzr; \ - adc x12, x14, xzr; \ - adds x19, x19, x3; \ - adcs x20, x20, x4; \ - adcs x1, x1, x5; \ - adcs x9, x9, x6; \ - adcs x10, x10, x7; \ - adcs x11, x11, x8; \ - adc x12, x12, x2; \ - lsl x23, x15, #32; \ - add x15, x23, x15; \ - lsr x23, x15, #32; \ - subs x23, x23, x15; \ - sbc x22, x15, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x15; \ - adc x21, xzr, xzr; \ - subs x16, x16, x23; \ - sbcs x17, x17, x22; \ - sbcs x19, x19, x21; \ - sbcs x20, x20, xzr; \ - sbcs x1, x1, xzr; \ - sbc x15, x15, xzr; \ - lsl x23, x16, #32; \ - add x16, x23, x16; \ - lsr x23, x16, #32; \ - subs x23, x23, x16; \ - sbc x22, x16, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x16; \ - adc x21, xzr, xzr; \ - subs x17, x17, x23; \ - sbcs x19, x19, x22; \ - sbcs x20, x20, x21; \ - sbcs x1, x1, xzr; \ - sbcs x15, x15, xzr; \ - sbc x16, x16, xzr; \ - lsl x23, x17, #32; \ - add x17, x23, x17; \ - lsr x23, x17, #32; \ - subs x23, x23, x17; \ - sbc x22, x17, xzr; \ - extr x23, x22, x23, #32; \ - lsr x22, x22, #32; \ - adds x22, x22, x17; \ - adc x21, xzr, xzr; \ - subs x19, x19, x23; \ - sbcs x20, x20, x22; \ - sbcs x1, x1, x21; \ - sbcs x15, x15, xzr; \ - sbcs x16, x16, xzr; \ - sbc x17, x17, xzr; \ - adds x9, x9, x15; \ - adcs x10, x10, x16; \ - adcs x11, x11, x17; \ - adc x12, x12, xzr; \ - add x22, x12, #1; \ - lsl x21, x22, #32; \ - subs x24, x22, x21; \ - sbc x21, x21, xzr; \ - adds x19, x19, x24; \ - adcs x20, x20, x21; \ - adcs x1, x1, x22; \ - adcs x9, x9, xzr; \ - adcs x10, x10, xzr; \ - adcs x11, x11, xzr; \ - csetm x22, lo; \ - mov x23, #4294967295; \ - and x23, x23, x22; \ - adds x19, x19, x23; \ - eor x23, x23, x22; \ - adcs x20, x20, x23; \ - mov x23, #-2; \ - and x23, x23, x22; \ - adcs x1, x1, x23; \ - adcs x9, x9, x22; \ - adcs x10, x10, x22; \ - adc x11, x11, x22; \ - stp x19, x20, [P0]; \ - stp x1, x9, [P0+16]; \ - stp x10, x11, [P0+32] - -// Corresponds exactly to bignum_montsqr_p384 - -#define montsqr_p384(P0,P1) \ - ldp x2, x3, [P1]; \ - ldp x4, x5, [P1+16]; \ - ldp x6, x7, [P1+32]; \ - mul x14, x2, x3; \ - mul x15, x2, x4; \ - mul x16, x3, x4; \ - mul x8, x2, x2; \ - mul x10, x3, x3; \ - mul x12, x4, x4; \ - umulh x17, x2, x3; \ - adds x15, x15, x17; \ - umulh x17, x2, x4; \ - adcs x16, x16, x17; \ - umulh x17, x3, x4; \ - adcs x17, x17, xzr; \ - umulh x9, x2, x2; \ - umulh x11, x3, x3; \ - umulh x13, x4, x4; \ - adds x14, x14, x14; \ - adcs x15, x15, x15; \ - adcs x16, x16, x16; \ - adcs x17, x17, x17; \ - adc x13, x13, xzr; \ - adds x9, x9, x14; \ - adcs x10, x10, x15; \ - adcs x11, x11, x16; \ - adcs x12, x12, x17; \ - adc x13, x13, xzr; \ - lsl x16, x8, #32; \ - add x8, x16, x8; \ - lsr x16, x8, #32; \ - subs x16, x16, x8; \ - sbc x15, x8, xzr; \ - extr x16, x15, x16, #32; \ - lsr x15, x15, #32; \ - adds x15, x15, x8; \ - adc x14, xzr, xzr; \ - subs x9, x9, x16; \ - sbcs x10, x10, x15; \ - sbcs x11, x11, x14; \ - sbcs x12, x12, xzr; \ - sbcs x13, x13, xzr; \ - sbc x8, x8, xzr; \ - lsl x16, x9, #32; \ - add x9, x16, x9; \ - lsr x16, x9, #32; \ - subs x16, x16, x9; \ - sbc x15, x9, xzr; \ - extr x16, x15, x16, #32; \ - lsr x15, x15, #32; \ - adds x15, x15, x9; \ - adc x14, xzr, xzr; \ - subs x10, x10, x16; \ - sbcs x11, x11, x15; \ - sbcs x12, x12, x14; \ - sbcs x13, x13, xzr; \ - sbcs x8, x8, xzr; \ - sbc x9, x9, xzr; \ - lsl x16, x10, #32; \ - add x10, x16, x10; \ - lsr x16, x10, #32; \ - subs x16, x16, x10; \ - sbc x15, x10, xzr; \ - extr x16, x15, x16, #32; \ - lsr x15, x15, #32; \ - adds x15, x15, x10; \ - adc x14, xzr, xzr; \ - subs x11, x11, x16; \ - sbcs x12, x12, x15; \ - sbcs x13, x13, x14; \ - sbcs x8, x8, xzr; \ - sbcs x9, x9, xzr; \ - sbc x10, x10, xzr; \ - stp x11, x12, [P0]; \ - stp x13, x8, [P0+16]; \ - stp x9, x10, [P0+32]; \ - mul x8, x2, x5; \ - mul x14, x3, x6; \ - mul x15, x4, x7; \ - umulh x16, x2, x5; \ - umulh x17, x3, x6; \ - umulh x1, x4, x7; \ - adds x16, x16, x14; \ - adcs x17, x17, x15; \ - adc x1, x1, xzr; \ - adds x9, x16, x8; \ - adcs x10, x17, x16; \ - adcs x11, x1, x17; \ - adc x12, x1, xzr; \ - adds x10, x10, x8; \ - adcs x11, x11, x16; \ - adcs x12, x12, x17; \ - adc x13, x1, xzr; \ - subs x17, x2, x3; \ - cneg x17, x17, lo; \ - csetm x14, lo; \ - subs x15, x6, x5; \ - cneg x15, x15, lo; \ - mul x16, x17, x15; \ - umulh x15, x17, x15; \ - cinv x14, x14, lo; \ - eor x16, x16, x14; \ - eor x15, x15, x14; \ - cmn x14, #1; \ - adcs x9, x9, x16; \ - adcs x10, x10, x15; \ - adcs x11, x11, x14; \ - adcs x12, x12, x14; \ - adc x13, x13, x14; \ - subs x17, x2, x4; \ - cneg x17, x17, lo; \ - csetm x14, lo; \ - subs x15, x7, x5; \ - cneg x15, x15, lo; \ - mul x16, x17, x15; \ - umulh x15, x17, x15; \ - cinv x14, x14, lo; \ - eor x16, x16, x14; \ - eor x15, x15, x14; \ - cmn x14, #1; \ - adcs x10, x10, x16; \ - adcs x11, x11, x15; \ - adcs x12, x12, x14; \ - adc x13, x13, x14; \ - subs x17, x3, x4; \ - cneg x17, x17, lo; \ - csetm x14, lo; \ - subs x15, x7, x6; \ - cneg x15, x15, lo; \ - mul x16, x17, x15; \ - umulh x15, x17, x15; \ - cinv x14, x14, lo; \ - eor x16, x16, x14; \ - eor x15, x15, x14; \ - cmn x14, #1; \ - adcs x11, x11, x16; \ - adcs x12, x12, x15; \ - adc x13, x13, x14; \ - adds x8, x8, x8; \ - adcs x9, x9, x9; \ - adcs x10, x10, x10; \ - adcs x11, x11, x11; \ - adcs x12, x12, x12; \ - adcs x13, x13, x13; \ - adc x17, xzr, xzr; \ - ldp x2, x3, [P0]; \ - adds x8, x8, x2; \ - adcs x9, x9, x3; \ - ldp x2, x3, [P0+16]; \ - adcs x10, x10, x2; \ - adcs x11, x11, x3; \ - ldp x2, x3, [P0+32]; \ - adcs x12, x12, x2; \ - adcs x13, x13, x3; \ - adc x17, x17, xzr; \ - lsl x4, x8, #32; \ - add x8, x4, x8; \ - lsr x4, x8, #32; \ - subs x4, x4, x8; \ - sbc x3, x8, xzr; \ - extr x4, x3, x4, #32; \ - lsr x3, x3, #32; \ - adds x3, x3, x8; \ - adc x2, xzr, xzr; \ - subs x9, x9, x4; \ - sbcs x10, x10, x3; \ - sbcs x11, x11, x2; \ - sbcs x12, x12, xzr; \ - sbcs x13, x13, xzr; \ - sbc x8, x8, xzr; \ - lsl x4, x9, #32; \ - add x9, x4, x9; \ - lsr x4, x9, #32; \ - subs x4, x4, x9; \ - sbc x3, x9, xzr; \ - extr x4, x3, x4, #32; \ - lsr x3, x3, #32; \ - adds x3, x3, x9; \ - adc x2, xzr, xzr; \ - subs x10, x10, x4; \ - sbcs x11, x11, x3; \ - sbcs x12, x12, x2; \ - sbcs x13, x13, xzr; \ - sbcs x8, x8, xzr; \ - sbc x9, x9, xzr; \ - lsl x4, x10, #32; \ - add x10, x4, x10; \ - lsr x4, x10, #32; \ - subs x4, x4, x10; \ - sbc x3, x10, xzr; \ - extr x4, x3, x4, #32; \ - lsr x3, x3, #32; \ - adds x3, x3, x10; \ - adc x2, xzr, xzr; \ - subs x11, x11, x4; \ - sbcs x12, x12, x3; \ - sbcs x13, x13, x2; \ - sbcs x8, x8, xzr; \ - sbcs x9, x9, xzr; \ - sbc x10, x10, xzr; \ - adds x17, x17, x8; \ - adcs x8, x9, xzr; \ - adcs x9, x10, xzr; \ - adcs x10, xzr, xzr; \ - mul x1, x5, x5; \ - adds x11, x11, x1; \ - mul x14, x6, x6; \ - mul x15, x7, x7; \ - umulh x1, x5, x5; \ - adcs x12, x12, x1; \ - umulh x1, x6, x6; \ - adcs x13, x13, x14; \ - adcs x17, x17, x1; \ - umulh x1, x7, x7; \ - adcs x8, x8, x15; \ - adcs x9, x9, x1; \ - adc x10, x10, xzr; \ - mul x1, x5, x6; \ - mul x14, x5, x7; \ - mul x15, x6, x7; \ - umulh x16, x5, x6; \ - adds x14, x14, x16; \ - umulh x16, x5, x7; \ - adcs x15, x15, x16; \ - umulh x16, x6, x7; \ - adc x16, x16, xzr; \ - adds x1, x1, x1; \ - adcs x14, x14, x14; \ - adcs x15, x15, x15; \ - adcs x16, x16, x16; \ - adc x5, xzr, xzr; \ - adds x12, x12, x1; \ - adcs x13, x13, x14; \ - adcs x17, x17, x15; \ - adcs x8, x8, x16; \ - adcs x9, x9, x5; \ - adc x10, x10, xzr; \ - mov x1, #-4294967295; \ - mov x14, #4294967295; \ - mov x15, #1; \ - cmn x11, x1; \ - adcs xzr, x12, x14; \ - adcs xzr, x13, x15; \ - adcs xzr, x17, xzr; \ - adcs xzr, x8, xzr; \ - adcs xzr, x9, xzr; \ - adc x10, x10, xzr; \ - neg x10, x10; \ - and x1, x1, x10; \ - adds x11, x11, x1; \ - and x14, x14, x10; \ - adcs x12, x12, x14; \ - and x15, x15, x10; \ - adcs x13, x13, x15; \ - adcs x17, x17, xzr; \ - adcs x8, x8, xzr; \ - adc x9, x9, xzr; \ - stp x11, x12, [P0]; \ - stp x13, x17, [P0+16]; \ - stp x8, x9, [P0+32] - -// Corresponds exactly to bignum_sub_p384 - -#define sub_p384(P0,P1,P2) \ - ldp x5, x6, [P1]; \ - ldp x4, x3, [P2]; \ - subs x5, x5, x4; \ - sbcs x6, x6, x3; \ - ldp x7, x8, [P1+16]; \ - ldp x4, x3, [P2+16]; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x3; \ - ldp x9, x10, [P1+32]; \ - ldp x4, x3, [P2+32]; \ - sbcs x9, x9, x4; \ - sbcs x10, x10, x3; \ - csetm x3, lo; \ - mov x4, #4294967295; \ - and x4, x4, x3; \ - adds x5, x5, x4; \ - eor x4, x4, x3; \ - adcs x6, x6, x4; \ - mov x4, #-2; \ - and x4, x4, x3; \ - adcs x7, x7, x4; \ - adcs x8, x8, x3; \ - adcs x9, x9, x3; \ - adc x10, x10, x3; \ - stp x5, x6, [P0]; \ - stp x7, x8, [P0+16]; \ - stp x9, x10, [P0+32] - -// Corresponds exactly to bignum_add_p384 - -#define add_p384(P0,P1,P2) \ - ldp x5, x6, [P1]; \ - ldp x4, x3, [P2]; \ - adds x5, x5, x4; \ - adcs x6, x6, x3; \ - ldp x7, x8, [P1+16]; \ - ldp x4, x3, [P2+16]; \ - adcs x7, x7, x4; \ - adcs x8, x8, x3; \ - ldp x9, x10, [P1+32]; \ - ldp x4, x3, [P2+32]; \ - adcs x9, x9, x4; \ - adcs x10, x10, x3; \ - adc x3, xzr, xzr; \ - mov x4, #0xffffffff; \ - cmp x5, x4; \ - mov x4, #0xffffffff00000000; \ - sbcs xzr, x6, x4; \ - mov x4, #0xfffffffffffffffe; \ - sbcs xzr, x7, x4; \ - adcs xzr, x8, xzr; \ - adcs xzr, x9, xzr; \ - adcs xzr, x10, xzr; \ - adcs x3, x3, xzr; \ - csetm x3, ne; \ - mov x4, #0xffffffff; \ - and x4, x4, x3; \ - subs x5, x5, x4; \ - eor x4, x4, x3; \ - sbcs x6, x6, x4; \ - mov x4, #0xfffffffffffffffe; \ - and x4, x4, x3; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x3; \ - sbcs x9, x9, x3; \ - sbc x10, x10, x3; \ - stp x5, x6, [P0]; \ - stp x7, x8, [P0+16]; \ - stp x9, x10, [P0+32] - -// P0 = 4 * P1 - P2 - -#define cmsub41_p384(P0,P1,P2) \ - ldp x1, x2, [P1]; \ - ldp x3, x4, [P1+16]; \ - ldp x5, x6, [P1+32]; \ - lsl x0, x1, #2; \ - ldp x7, x8, [P2]; \ - subs x0, x0, x7; \ - extr x1, x2, x1, #62; \ - sbcs x1, x1, x8; \ - ldp x7, x8, [P2+16]; \ - extr x2, x3, x2, #62; \ - sbcs x2, x2, x7; \ - extr x3, x4, x3, #62; \ - sbcs x3, x3, x8; \ - extr x4, x5, x4, #62; \ - ldp x7, x8, [P2+32]; \ - sbcs x4, x4, x7; \ - extr x5, x6, x5, #62; \ - sbcs x5, x5, x8; \ - lsr x6, x6, #62; \ - adc x6, x6, xzr; \ - lsl x7, x6, #32; \ - subs x8, x6, x7; \ - sbc x7, x7, xzr; \ - adds x0, x0, x8; \ - adcs x1, x1, x7; \ - adcs x2, x2, x6; \ - adcs x3, x3, xzr; \ - adcs x4, x4, xzr; \ - adcs x5, x5, xzr; \ - csetm x8, cc; \ - mov x9, #0xffffffff; \ - and x9, x9, x8; \ - adds x0, x0, x9; \ - eor x9, x9, x8; \ - adcs x1, x1, x9; \ - mov x9, #0xfffffffffffffffe; \ - and x9, x9, x8; \ - adcs x2, x2, x9; \ - adcs x3, x3, x8; \ - adcs x4, x4, x8; \ - adc x5, x5, x8; \ - stp x0, x1, [P0]; \ - stp x2, x3, [P0+16]; \ - stp x4, x5, [P0+32] - -// P0 = C * P1 - D * P2 - -#define cmsub_p384(P0,C,P1,D,P2) \ - ldp x0, x1, [P2]; \ - mov x6, #0x00000000ffffffff; \ - subs x6, x6, x0; \ - mov x7, #0xffffffff00000000; \ - sbcs x7, x7, x1; \ - ldp x0, x1, [P2+16]; \ - mov x8, #0xfffffffffffffffe; \ - sbcs x8, x8, x0; \ - mov x13, #0xffffffffffffffff; \ - sbcs x9, x13, x1; \ - ldp x0, x1, [P2+32]; \ - sbcs x10, x13, x0; \ - sbc x11, x13, x1; \ - mov x12, D; \ - mul x0, x12, x6; \ - mul x1, x12, x7; \ - mul x2, x12, x8; \ - mul x3, x12, x9; \ - mul x4, x12, x10; \ - mul x5, x12, x11; \ - umulh x6, x12, x6; \ - umulh x7, x12, x7; \ - umulh x8, x12, x8; \ - umulh x9, x12, x9; \ - umulh x10, x12, x10; \ - umulh x12, x12, x11; \ - adds x1, x1, x6; \ - adcs x2, x2, x7; \ - adcs x3, x3, x8; \ - adcs x4, x4, x9; \ - adcs x5, x5, x10; \ - mov x6, #1; \ - adc x6, x12, x6; \ - ldp x8, x9, [P1]; \ - ldp x10, x11, [P1+16]; \ - ldp x12, x13, [P1+32]; \ - mov x14, C; \ - mul x15, x14, x8; \ - umulh x8, x14, x8; \ - adds x0, x0, x15; \ - mul x15, x14, x9; \ - umulh x9, x14, x9; \ - adcs x1, x1, x15; \ - mul x15, x14, x10; \ - umulh x10, x14, x10; \ - adcs x2, x2, x15; \ - mul x15, x14, x11; \ - umulh x11, x14, x11; \ - adcs x3, x3, x15; \ - mul x15, x14, x12; \ - umulh x12, x14, x12; \ - adcs x4, x4, x15; \ - mul x15, x14, x13; \ - umulh x13, x14, x13; \ - adcs x5, x5, x15; \ - adc x6, x6, xzr; \ - adds x1, x1, x8; \ - adcs x2, x2, x9; \ - adcs x3, x3, x10; \ - adcs x4, x4, x11; \ - adcs x5, x5, x12; \ - adcs x6, x6, x13; \ - lsl x7, x6, #32; \ - subs x8, x6, x7; \ - sbc x7, x7, xzr; \ - adds x0, x0, x8; \ - adcs x1, x1, x7; \ - adcs x2, x2, x6; \ - adcs x3, x3, xzr; \ - adcs x4, x4, xzr; \ - adcs x5, x5, xzr; \ - csetm x6, cc; \ - mov x7, #0xffffffff; \ - and x7, x7, x6; \ - adds x0, x0, x7; \ - eor x7, x7, x6; \ - adcs x1, x1, x7; \ - mov x7, #0xfffffffffffffffe; \ - and x7, x7, x6; \ - adcs x2, x2, x7; \ - adcs x3, x3, x6; \ - adcs x4, x4, x6; \ - adc x5, x5, x6; \ - stp x0, x1, [P0]; \ - stp x2, x3, [P0+16]; \ - stp x4, x5, [P0+32] - -// A weak version of add that only guarantees sum in 6 digits - -#define weakadd_p384(P0,P1,P2) \ - ldp x5, x6, [P1]; \ - ldp x4, x3, [P2]; \ - adds x5, x5, x4; \ - adcs x6, x6, x3; \ - ldp x7, x8, [P1+16]; \ - ldp x4, x3, [P2+16]; \ - adcs x7, x7, x4; \ - adcs x8, x8, x3; \ - ldp x9, x10, [P1+32]; \ - ldp x4, x3, [P2+32]; \ - adcs x9, x9, x4; \ - adcs x10, x10, x3; \ - csetm x3, cs; \ - mov x4, #0xffffffff; \ - and x4, x4, x3; \ - subs x5, x5, x4; \ - eor x4, x4, x3; \ - sbcs x6, x6, x4; \ - mov x4, #0xfffffffffffffffe; \ - and x4, x4, x3; \ - sbcs x7, x7, x4; \ - sbcs x8, x8, x3; \ - sbcs x9, x9, x3; \ - sbc x10, x10, x3; \ - stp x5, x6, [P0]; \ - stp x7, x8, [P0+16]; \ - stp x9, x10, [P0+32] - -// P0 = 3 * P1 - 8 * P2 - -#define cmsub38_p384(P0,P1,P2) \ - ldp x0, x1, [P2]; \ - mov x6, #0x00000000ffffffff; \ - subs x6, x6, x0; \ - mov x7, #0xffffffff00000000; \ - sbcs x7, x7, x1; \ - ldp x0, x1, [P2+16]; \ - mov x8, #0xfffffffffffffffe; \ - sbcs x8, x8, x0; \ - mov x13, #0xffffffffffffffff; \ - sbcs x9, x13, x1; \ - ldp x0, x1, [P2+32]; \ - sbcs x10, x13, x0; \ - sbc x11, x13, x1; \ - lsl x0, x6, #3; \ - extr x1, x7, x6, #61; \ - extr x2, x8, x7, #61; \ - extr x3, x9, x8, #61; \ - extr x4, x10, x9, #61; \ - extr x5, x11, x10, #61; \ - lsr x6, x11, #61; \ - add x6, x6, #1; \ - ldp x8, x9, [P1]; \ - ldp x10, x11, [P1+16]; \ - ldp x12, x13, [P1+32]; \ - mov x14, 3; \ - mul x15, x14, x8; \ - umulh x8, x14, x8; \ - adds x0, x0, x15; \ - mul x15, x14, x9; \ - umulh x9, x14, x9; \ - adcs x1, x1, x15; \ - mul x15, x14, x10; \ - umulh x10, x14, x10; \ - adcs x2, x2, x15; \ - mul x15, x14, x11; \ - umulh x11, x14, x11; \ - adcs x3, x3, x15; \ - mul x15, x14, x12; \ - umulh x12, x14, x12; \ - adcs x4, x4, x15; \ - mul x15, x14, x13; \ - umulh x13, x14, x13; \ - adcs x5, x5, x15; \ - adc x6, x6, xzr; \ - adds x1, x1, x8; \ - adcs x2, x2, x9; \ - adcs x3, x3, x10; \ - adcs x4, x4, x11; \ - adcs x5, x5, x12; \ - adcs x6, x6, x13; \ - lsl x7, x6, #32; \ - subs x8, x6, x7; \ - sbc x7, x7, xzr; \ - adds x0, x0, x8; \ - adcs x1, x1, x7; \ - adcs x2, x2, x6; \ - adcs x3, x3, xzr; \ - adcs x4, x4, xzr; \ - adcs x5, x5, xzr; \ - csetm x6, cc; \ - mov x7, #0xffffffff; \ - and x7, x7, x6; \ - adds x0, x0, x7; \ - eor x7, x7, x6; \ - adcs x1, x1, x7; \ - mov x7, #0xfffffffffffffffe; \ - and x7, x7, x6; \ - adcs x2, x2, x7; \ - adcs x3, x3, x6; \ - adcs x4, x4, x6; \ - adc x5, x5, x6; \ - stp x0, x1, [P0]; \ - stp x2, x3, [P0+16]; \ - stp x4, x5, [P0+32] - S2N_BN_SYMBOL(p384_montjdouble): // Save regs and make room on stack for temporary variables - sub sp, sp, NSPACE+64 + sub sp, sp, NSPACE+80 stp x19, x20, [sp, NSPACE] stp x21, x22, [sp, NSPACE+16] stp x23, x24, [sp, NSPACE+32] stp x25, x26, [sp, NSPACE+48] + stp x27, xzr, [sp, NSPACE+64] -// Move the input arguments to stable places - - mov input_z, x0 - mov input_x, x1 - -// Main code, just a sequence of basic field operations - -// z2 = z^2 -// y2 = y^2 - - montsqr_p384(z2,z_1) - montsqr_p384(y2,y_1) - -// x2p = x^2 - z^4 = (x + z^2) * (x - z^2) - - weakadd_p384(t1,x_1,z2) - sub_p384(t2,x_1,z2) - montmul_p384(x2p,t1,t2) - -// t1 = y + z -// x4p = x2p^2 -// xy2 = x * y^2 - - add_p384(t1,y_1,z_1) - montsqr_p384(x4p,x2p) - montmul_p384(xy2,x_1,y2) - -// t2 = (y + z)^2 - - montsqr_p384(t2,t1) - -// d = 12 * xy2 - 9 * x4p -// t1 = y^2 + 2 * y * z - - cmsub_p384(d,12,xy2,9,x4p) - sub_p384(t1,t2,z2) - -// y4 = y^4 - - montsqr_p384(y4,y2) - -// z_3' = 2 * y * z -// dx2 = d * x2p - - sub_p384(z_3,t1,y2) - montmul_p384(dx2,d,x2p) - -// x' = 4 * xy2 - d - - cmsub41_p384(x_3,xy2,d) - -// y' = 3 * dx2 - 8 * y4 - - cmsub38_p384(y_3,dx2,y4) + mov x25, x0 + mov x26, x1 + mov x0, sp + ldr q1, [x26, #96] + ldp x9, x2, [x26, #96] + ldr q0, [x26, #96] + ldp x4, x6, [x26, #112] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [x26, #128] + xtn v30.2s, v0.2d + ldr q1, [x26, #128] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [x26, #128] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [x0, #32] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] + adc x17, x14, xzr + stp x2, x17, [x0, #32] + ldr q1, [x26, #48] + ldp x9, x2, [x26, #48] + ldr q0, [x26, #48] + ldp x4, x6, [x26, #64] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [x26, #80] + xtn v30.2s, v0.2d + ldr q1, [x26, #80] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [x26, #80] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #48] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [sp, #64] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [sp, #80] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [sp, #64] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #48] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #80] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #48] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #64] + adc x17, x14, xzr + stp x2, x17, [sp, #80] + ldp x5, x6, [x26] + ldp x4, x3, [sp] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x26, #16] + ldp x4, x3, [sp, #16] + adcs x7, x7, x4 + adcs x8, x8, x3 + ldp x9, x10, [x26, #32] + ldp x4, x3, [sp, #32] + adcs x9, x9, x4 + adcs x10, x10, x3 + csetm x3, cs + mov x4, #0xffffffff + and x4, x4, x3 + subs x5, x5, x4 + eor x4, x4, x3 + sbcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + sbcs x7, x7, x4 + sbcs x8, x8, x3 + sbcs x9, x9, x3 + sbc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + mov x2, sp + ldp x5, x6, [x26, #0] + ldp x4, x3, [x2] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x26, #16] + ldp x4, x3, [x2, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x26, #32] + ldp x4, x3, [x2, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x13, x5, x4 + eor x4, x4, x3 + adcs x23, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x13, x23, [sp, #192] + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldr q3, [sp, #240] + ldr q25, [sp, #192] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #224] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #208] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #224] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #96] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #112] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #128] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #96] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #112] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #128] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #96] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #112] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #128] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #96] + ldp x21, x12, [sp, #112] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #128] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #96] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #112] + adc x12, x15, x23 + stp x21, x12, [sp, #128] + ldp x5, x6, [x26, #48] + ldp x4, x3, [x26, #96] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x26, #64] + ldp x4, x3, [x26, #112] + adcs x7, x7, x4 + adcs x8, x8, x3 + ldp x9, x10, [x26, #80] + ldp x4, x3, [x26, #128] + adcs x9, x9, x4 + adcs x10, x10, x3 + adc x3, xzr, xzr + mov x4, #0xffffffff + cmp x5, x4 + mov x4, #0xffffffff00000000 + sbcs xzr, x6, x4 + mov x4, #0xfffffffffffffffe + sbcs xzr, x7, x4 + adcs xzr, x8, xzr + adcs xzr, x9, xzr + adcs xzr, x10, xzr + adcs x3, x3, xzr + csetm x3, ne + mov x4, #0xffffffff + and x4, x4, x3 + subs x5, x5, x4 + eor x4, x4, x3 + sbcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + sbcs x7, x7, x4 + sbcs x8, x8, x3 + sbcs x9, x9, x3 + sbc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldr q1, [sp, #96] + ldp x9, x2, [sp, #96] + ldr q0, [sp, #96] + ldp x4, x6, [sp, #112] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #128] + xtn v30.2s, v0.2d + ldr q1, [sp, #128] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #128] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #288] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [sp, #304] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [sp, #320] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [sp, #304] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #288] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #320] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #288] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #304] + adc x17, x14, xzr + stp x2, x17, [sp, #320] + ldr q3, [x26, #0] + ldr q25, [sp, #48] + ldp x13, x23, [sp, #48] + ldp x3, x21, [x26, #0] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #80] + ldp x8, x24, [x26, #16] + subs x6, x3, x21 + ldr q0, [x26, #32] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #64] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #80] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x26, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x26, x4, x16 + mov x4, v27.d[0] + sbcs x27, x20, x11 + sbcs x20, x9, x12 + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #160] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #176] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #160] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #176] + adds x20, x22, x26 + mul x10, x13, x14 + adcs x11, x11, x27 + eor x9, x8, x21 + adcs x26, x19, x17 + stp x20, x11, [sp, #144] + adcs x27, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #176] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #144] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #176] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x26 + eor x1, x22, x9 + adcs x24, x23, x27 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x26 + adcs x15, x17, x27 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #144] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #160] + adc x12, x15, x23 + stp x21, x12, [sp, #176] + ldr q1, [sp, #240] + ldp x9, x2, [sp, #240] + ldr q0, [sp, #240] + ldp x4, x6, [sp, #256] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #272] + xtn v30.2s, v0.2d + ldr q1, [sp, #272] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #272] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x19, x3, x17 + sbcs x20, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #192] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [sp, #224] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #192] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #224] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x19 + adcs x1, x1, x20 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x19, x13, x1 + and x13, x4, x9 + adcs x20, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #208] + adc x17, x14, xzr + stp x2, x17, [sp, #224] + ldp x0, x1, [sp, #288] + mov x6, #0xffffffff + subs x6, x6, x0 + mov x7, #0xffffffff00000000 + sbcs x7, x7, x1 + ldp x0, x1, [sp, #304] + mov x8, #0xfffffffffffffffe + sbcs x8, x8, x0 + mov x13, #0xffffffffffffffff + sbcs x9, x13, x1 + ldp x0, x1, [sp, #320] + sbcs x10, x13, x0 + sbc x11, x13, x1 + mov x12, #0x9 + mul x0, x12, x6 + mul x1, x12, x7 + mul x2, x12, x8 + mul x3, x12, x9 + mul x4, x12, x10 + mul x5, x12, x11 + umulh x6, x12, x6 + umulh x7, x12, x7 + umulh x8, x12, x8 + umulh x9, x12, x9 + umulh x10, x12, x10 + umulh x12, x12, x11 + adds x1, x1, x6 + adcs x2, x2, x7 + adcs x3, x3, x8 + adcs x4, x4, x9 + adcs x5, x5, x10 + mov x6, #0x1 + adc x6, x12, x6 + ldp x8, x9, [sp, #144] + ldp x10, x11, [sp, #160] + ldp x12, x13, [sp, #176] + mov x14, #0xc + mul x15, x14, x8 + umulh x8, x14, x8 + adds x0, x0, x15 + mul x15, x14, x9 + umulh x9, x14, x9 + adcs x1, x1, x15 + mul x15, x14, x10 + umulh x10, x14, x10 + adcs x2, x2, x15 + mul x15, x14, x11 + umulh x11, x14, x11 + adcs x3, x3, x15 + mul x15, x14, x12 + umulh x12, x14, x12 + adcs x4, x4, x15 + mul x15, x14, x13 + umulh x13, x14, x13 + adcs x5, x5, x15 + adc x6, x6, xzr + adds x1, x1, x8 + adcs x2, x2, x9 + adcs x3, x3, x10 + adcs x4, x4, x11 + adcs x5, x5, x12 + adcs x6, x6, x13 + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x6, cc + mov x7, #0xffffffff + and x7, x7, x6 + adds x0, x0, x7 + eor x7, x7, x6 + adcs x1, x1, x7 + mov x7, #0xfffffffffffffffe + and x7, x7, x6 + adcs x2, x2, x7 + adcs x3, x3, x6 + adcs x4, x4, x6 + adc x5, x5, x6 + stp x0, x1, [sp, #288] + stp x2, x3, [sp, #304] + stp x4, x5, [sp, #320] + mov x2, sp + ldp x4, x3, [x2] + subs x5, x19, x4 + sbcs x6, x20, x3 + ldp x7, x8, [sp, #208] + ldp x4, x3, [x2, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #224] + ldp x4, x3, [x2, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldr q1, [sp, #48] + ldp x9, x2, [sp, #48] + ldr q0, [sp, #48] + ldp x4, x6, [sp, #64] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #80] + xtn v30.2s, v0.2d + ldr q1, [sp, #80] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #80] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x19, x3, x17 + sbcs x20, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #192] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [sp, #224] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #192] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #224] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x19 + adcs x1, x1, x20 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #192] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #208] + adc x17, x14, xzr + stp x2, x17, [sp, #224] + ldp x5, x6, [sp, #240] + ldp x4, x3, [sp, #48] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #256] + ldp x4, x3, [sp, #64] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #272] + ldp x4, x3, [sp, #80] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [x25, #96] + stp x7, x8, [x25, #112] + stp x9, x10, [x25, #128] + ldr q3, [sp, #288] + ldr q25, [sp, #96] + ldp x13, x23, [sp, #96] + ldp x3, x21, [sp, #288] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #128] + ldp x8, x24, [sp, #304] + subs x6, x3, x21 + ldr q0, [sp, #320] + movi v23.2d, #0xffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc + ldp x6, x14, [sp, #112] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #128] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #320] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x26, x4, x16 + mov x4, v27.d[0] + sbcs x27, x20, x11 + sbcs x20, x9, x12 + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #256] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [sp, #272] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #256] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #272] + adds x20, x22, x26 + mul x10, x13, x14 + adcs x11, x11, x27 + eor x9, x8, x21 + adcs x26, x19, x17 + stp x20, x11, [sp, #240] + adcs x27, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #272] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #240] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #272] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x26 + eor x1, x22, x9 + adcs x24, x23, x27 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x26 + adcs x15, x17, x27 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #240] + adcs x5, x13, x23 + adcs x12, x8, x23 + stp x14, x5, [sp, #256] + adc x19, x15, x23 + ldp x1, x2, [sp, #144] + ldp x3, x4, [sp, #160] + ldp x5, x6, [sp, #176] + lsl x0, x1, #2 + ldp x7, x8, [sp, #288] + subs x0, x0, x7 + extr x1, x2, x1, #62 + sbcs x1, x1, x8 + ldp x7, x8, [sp, #304] + extr x2, x3, x2, #62 + sbcs x2, x2, x7 + extr x3, x4, x3, #62 + sbcs x3, x3, x8 + extr x4, x5, x4, #62 + ldp x7, x8, [sp, #320] + sbcs x4, x4, x7 + extr x5, x6, x5, #62 + sbcs x5, x5, x8 + lsr x6, x6, #62 + adc x6, x6, xzr + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x8, cc + mov x9, #0xffffffff + and x9, x9, x8 + adds x0, x0, x9 + eor x9, x9, x8 + adcs x1, x1, x9 + mov x9, #0xfffffffffffffffe + and x9, x9, x8 + adcs x2, x2, x9 + adcs x3, x3, x8 + adcs x4, x4, x8 + adc x5, x5, x8 + stp x0, x1, [x25] + stp x2, x3, [x25, #16] + stp x4, x5, [x25, #32] + ldp x0, x1, [sp, #192] + mov x6, #0xffffffff + subs x6, x6, x0 + mov x7, #0xffffffff00000000 + sbcs x7, x7, x1 + ldp x0, x1, [sp, #208] + mov x8, #0xfffffffffffffffe + sbcs x8, x8, x0 + mov x13, #0xffffffffffffffff + sbcs x9, x13, x1 + ldp x0, x1, [sp, #224] + sbcs x10, x13, x0 + sbc x11, x13, x1 + lsl x0, x6, #3 + extr x1, x7, x6, #61 + extr x2, x8, x7, #61 + extr x3, x9, x8, #61 + extr x4, x10, x9, #61 + extr x5, x11, x10, #61 + lsr x6, x11, #61 + add x6, x6, #0x1 + ldp x8, x9, [sp, #240] + ldp x10, x11, [sp, #256] + mov x14, #0x3 + mul x15, x14, x8 + umulh x8, x14, x8 + adds x0, x0, x15 + mul x15, x14, x9 + umulh x9, x14, x9 + adcs x1, x1, x15 + mul x15, x14, x10 + umulh x10, x14, x10 + adcs x2, x2, x15 + mul x15, x14, x11 + umulh x11, x14, x11 + adcs x3, x3, x15 + mul x15, x14, x12 + umulh x12, x14, x12 + adcs x4, x4, x15 + mul x15, x14, x19 + umulh x13, x14, x19 + adcs x5, x5, x15 + adc x6, x6, xzr + adds x1, x1, x8 + adcs x2, x2, x9 + adcs x3, x3, x10 + adcs x4, x4, x11 + adcs x5, x5, x12 + adcs x6, x6, x13 + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x6, cc + mov x7, #0xffffffff + and x7, x7, x6 + adds x0, x0, x7 + eor x7, x7, x6 + adcs x1, x1, x7 + mov x7, #0xfffffffffffffffe + and x7, x7, x6 + adcs x2, x2, x7 + adcs x3, x3, x6 + adcs x4, x4, x6 + adc x5, x5, x6 + stp x0, x1, [x25, #48] + stp x2, x3, [x25, #64] + stp x4, x5, [x25, #80] // Restore stack and registers @@ -1094,7 +3138,8 @@ S2N_BN_SYMBOL(p384_montjdouble): ldp x21, x22, [sp, NSPACE+16] ldp x23, x24, [sp, NSPACE+32] ldp x25, x26, [sp, NSPACE+48] - add sp, sp, NSPACE+64 + ldp x27, xzr, [sp, NSPACE+64] + add sp, sp, NSPACE+80 ret #if defined(__linux__) && defined(__ELF__) diff --git a/third_party/s2n-bignum/arm/p384/p384_montjscalarmul.S b/third_party/s2n-bignum/arm/p384/p384_montjscalarmul.S new file mode 100644 index 0000000000..11b5215b4c --- /dev/null +++ b/third_party/s2n-bignum/arm/p384/p384_montjscalarmul.S @@ -0,0 +1,9988 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery-Jacobian form scalar multiplication for P-384 +// Input scalar[6], point[18]; output res[18] +// +// extern void p384_montjscalarmul +// (uint64_t res[static 18], +// uint64_t scalar[static 6], +// uint64_t point[static 18]); +// +// This function is a variant of its affine point version p384_scalarmul. +// Here, input and output points are assumed to be in Jacobian form with +// their coordinates in the Montgomery domain. Thus, if priming indicates +// Montgomery form, x' = (2^384 * x) mod p_384 etc., each point argument +// is a triple (x',y',z') representing the affine point (x/z^2,y/z^3) when +// z' is nonzero or the point at infinity (group identity) if z' = 0. +// +// Given scalar = n and point = P, assumed to be on the NIST elliptic +// curve P-384, returns a representation of n * P. If the result is the +// point at infinity (either because the input point was or because the +// scalar was a multiple of p_384) then the output is guaranteed to +// represent the point at infinity, i.e. to have its z coordinate zero. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjscalarmul) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjscalarmul) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 +#define JACSIZE (3*NUMSIZE) + +// Safe copies of input res and additional values in variables. + +#define bf x22 +#define sgn x23 +#define j x24 +#define res x25 + +// Intermediate variables on the stack. +// The table is 16 entries, each of size JACSIZE = 3 * NUMSIZE + +#define scalarb sp, #(0*NUMSIZE) +#define acc sp, #(1*NUMSIZE) +#define tabent sp, #(4*NUMSIZE) + +#define tab sp, #(7*NUMSIZE) + +#define NSPACE #(55*NUMSIZE) + +// Avoid using .rep for the sake of the BoringSSL/AWS-LC delocator, +// which doesn't accept repetitions, assembler macros etc. + +#define selectblock(I) \ + cmp bf, #(1*I); \ + ldp x20, x21, [x19]; \ + csel x0, x20, x0, eq; \ + csel x1, x21, x1, eq; \ + ldp x20, x21, [x19, #16]; \ + csel x2, x20, x2, eq; \ + csel x3, x21, x3, eq; \ + ldp x20, x21, [x19, #32]; \ + csel x4, x20, x4, eq; \ + csel x5, x21, x5, eq; \ + ldp x20, x21, [x19, #48]; \ + csel x6, x20, x6, eq; \ + csel x7, x21, x7, eq; \ + ldp x20, x21, [x19, #64]; \ + csel x8, x20, x8, eq; \ + csel x9, x21, x9, eq; \ + ldp x20, x21, [x19, #80]; \ + csel x10, x20, x10, eq; \ + csel x11, x21, x11, eq; \ + ldp x20, x21, [x19, #96]; \ + csel x12, x20, x12, eq; \ + csel x13, x21, x13, eq; \ + ldp x20, x21, [x19, #112]; \ + csel x14, x20, x14, eq; \ + csel x15, x21, x15, eq; \ + ldp x20, x21, [x19, #128]; \ + csel x16, x20, x16, eq; \ + csel x17, x21, x17, eq; \ + add x19, x19, #JACSIZE + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +S2N_BN_SYMBOL(p384_montjscalarmul): + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x30, [sp, #-16]! + sub sp, sp, NSPACE + +// Preserve the "res" input argument; others get processed early. + + mov res, x0 + +// Reduce the input scalar mod n_384, i.e. conditionally subtract n_384. +// Store it to "scalarb". + + ldp x3, x4, [x1] + movbig(x15, #0xecec, #0x196a, #0xccc5, #0x2973) + ldp x5, x6, [x1, #16] + movbig(x16, #0x581a, #0x0db2, #0x48b0, #0xa77a) + ldp x7, x8, [x1, #32] + movbig(x17, #0xc763, #0x4d81, #0xf437, #0x2ddf) + + subs x9, x3, x15 + sbcs x10, x4, x16 + sbcs x11, x5, x17 + adcs x12, x6, xzr + adcs x13, x7, xzr + adcs x14, x8, xzr + + csel x3, x3, x9, cc + csel x4, x4, x10, cc + csel x5, x5, x11, cc + csel x6, x6, x12, cc + csel x7, x7, x13, cc + csel x8, x8, x14, cc + + stp x3, x4, [scalarb] + stp x5, x6, [scalarb+16] + stp x7, x8, [scalarb+32] + +// Set the tab[0] table entry to the input point = 1 * P + + ldp x10, x11, [x2] + stp x10, x11, [tab] + ldp x12, x13, [x2, #16] + stp x12, x13, [tab+16] + ldp x14, x15, [x2, #32] + stp x14, x15, [tab+32] + + ldp x10, x11, [x2, #48] + stp x10, x11, [tab+48] + ldp x12, x13, [x2, #64] + stp x12, x13, [tab+64] + ldp x14, x15, [x2, #80] + stp x14, x15, [tab+80] + + ldp x10, x11, [x2, #96] + stp x10, x11, [tab+96] + ldp x12, x13, [x2, #112] + stp x12, x13, [tab+112] + ldp x14, x15, [x2, #128] + stp x14, x15, [tab+128] + +// Compute and record tab[1] = 2 * p, ..., tab[15] = 16 * P + + add x0, tab+JACSIZE*1 + add x1, tab + bl p384_montjscalarmul_p384_montjdouble + + add x0, tab+JACSIZE*2 + add x1, tab+JACSIZE*1 + add x2, tab + bl p384_montjscalarmul_p384_montjadd + + add x0, tab+JACSIZE*3 + add x1, tab+JACSIZE*1 + bl p384_montjscalarmul_p384_montjdouble + + add x0, tab+JACSIZE*4 + add x1, tab+JACSIZE*3 + add x2, tab + bl p384_montjscalarmul_p384_montjadd + + add x0, tab+JACSIZE*5 + add x1, tab+JACSIZE*2 + bl p384_montjscalarmul_p384_montjdouble + + add x0, tab+JACSIZE*6 + add x1, tab+JACSIZE*5 + add x2, tab + bl p384_montjscalarmul_p384_montjadd + + add x0, tab+JACSIZE*7 + add x1, tab+JACSIZE*3 + bl p384_montjscalarmul_p384_montjdouble + + add x0, tab+JACSIZE*8 + add x1, tab+JACSIZE*7 + add x2, tab + bl p384_montjscalarmul_p384_montjadd + + add x0, tab+JACSIZE*9 + add x1, tab+JACSIZE*4 + bl p384_montjscalarmul_p384_montjdouble + + add x0, tab+JACSIZE*10 + add x1, tab+JACSIZE*9 + add x2, tab + bl p384_montjscalarmul_p384_montjadd + + add x0, tab+JACSIZE*11 + add x1, tab+JACSIZE*5 + bl p384_montjscalarmul_p384_montjdouble + + add x0, tab+JACSIZE*12 + add x1, tab+JACSIZE*11 + add x2, tab + bl p384_montjscalarmul_p384_montjadd + + add x0, tab+JACSIZE*13 + add x1, tab+JACSIZE*6 + bl p384_montjscalarmul_p384_montjdouble + + add x0, tab+JACSIZE*14 + add x1, tab+JACSIZE*13 + add x2, tab + bl p384_montjscalarmul_p384_montjadd + + add x0, tab+JACSIZE*15 + add x1, tab+JACSIZE*7 + bl p384_montjscalarmul_p384_montjdouble + +// Add the recoding constant sum_i(16 * 32^i) to the scalar to allow signed +// digits. The digits of the constant, in lowest-to-highest order, are as +// follows; they are generated dynamically since none is a simple ARM load. +// +// 0x0842108421084210 +// 0x1084210842108421 +// 0x2108421084210842 +// 0x4210842108421084 +// 0x8421084210842108 +// 0x0842108421084210 + + ldp x0, x1, [scalarb] + ldp x2, x3, [scalarb+16] + ldp x4, x5, [scalarb+32] + movbig(x8, #0x1084, #0x2108, #0x4210, #0x8421) + adds x0, x0, x8, lsr #1 + adcs x1, x1, x8 + lsl x8, x8, #1 + adcs x2, x2, x8 + lsl x8, x8, #1 + adcs x3, x3, x8 + lsl x8, x8, #1 + adcs x4, x4, x8 + lsr x8, x8, #4 + adcs x5, x5, x8 + cset x6, cs + +// Record the top bitfield then shift the whole scalar left 4 bits +// to align the top of the next bitfield with the MSB (bits 379..383). + + extr bf, x6, x5, #60 + extr x5, x5, x4, #60 + extr x4, x4, x3, #60 + extr x3, x3, x2, #60 + extr x2, x2, x1, #60 + extr x1, x1, x0, #60 + lsl x0, x0, #4 + stp x0, x1, [scalarb] + stp x2, x3, [scalarb+16] + stp x4, x5, [scalarb+32] + +// Initialize the accumulator to the corresponding entry using constant-time +// lookup in the table. This top digit, uniquely, is not recoded so there is +// no sign adjustment to make. + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + mov x16, xzr + mov x17, xzr + + add x19, tab + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + selectblock(9) + selectblock(10) + selectblock(11) + selectblock(12) + selectblock(13) + selectblock(14) + selectblock(15) + selectblock(16) + + stp x0, x1, [acc] + stp x2, x3, [acc+16] + stp x4, x5, [acc+32] + stp x6, x7, [acc+48] + stp x8, x9, [acc+64] + stp x10, x11, [acc+80] + stp x12, x13, [acc+96] + stp x14, x15, [acc+112] + stp x16, x17, [acc+128] + + mov j, #380 + +// Main loop over size-5 bitfields: double 5 times then add signed digit +// At each stage we shift the scalar left by 5 bits so we can simply pick +// the top 5 bits as the bitfield, saving some fiddle over indexing. + +p384_montjscalarmul_mainloop: + sub j, j, #5 + + add x0, acc + add x1, acc + bl p384_montjscalarmul_p384_montjdouble + + add x0, acc + add x1, acc + bl p384_montjscalarmul_p384_montjdouble + + add x0, acc + add x1, acc + bl p384_montjscalarmul_p384_montjdouble + + add x0, acc + add x1, acc + bl p384_montjscalarmul_p384_montjdouble + + add x0, acc + add x1, acc + bl p384_montjscalarmul_p384_montjdouble + +// Choose the bitfield and adjust it to sign and magnitude + + ldp x0, x1, [scalarb] + ldp x2, x3, [scalarb+16] + ldp x4, x5, [scalarb+32] + lsr bf, x5, #59 + extr x5, x5, x4, #59 + extr x4, x4, x3, #59 + extr x3, x3, x2, #59 + extr x2, x2, x1, #59 + extr x1, x1, x0, #59 + lsl x0, x0, #5 + stp x0, x1, [scalarb] + stp x2, x3, [scalarb+16] + stp x4, x5, [scalarb+32] + + subs bf, bf, #16 + cset sgn, lo // sgn = sign of digit (1 = negative) + cneg bf, bf, lo // bf = absolute value of digit + +// Conditionally select the table entry tab[i-1] = i * P in constant time + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + mov x16, xzr + mov x17, xzr + + add x19, tab + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + selectblock(9) + selectblock(10) + selectblock(11) + selectblock(12) + selectblock(13) + selectblock(14) + selectblock(15) + selectblock(16) + +// Store it to "tabent" with the y coordinate optionally negated. +// This is done carefully to give coordinates < p_384 even in +// the degenerate case y = 0 (when z = 0 for points on the curve). + + stp x0, x1, [tabent] + stp x2, x3, [tabent+16] + stp x4, x5, [tabent+32] + + stp x12, x13, [tabent+96] + stp x14, x15, [tabent+112] + stp x16, x17, [tabent+128] + + mov x0, #0x00000000ffffffff + subs x0, x0, x6 + orr x12, x6, x7 + mov x1, #0xffffffff00000000 + sbcs x1, x1, x7 + orr x13, x8, x9 + mov x2, #0xfffffffffffffffe + sbcs x2, x2, x8 + orr x14, x10, x11 + mov x5, #0xffffffffffffffff + sbcs x3, x5, x9 + orr x12, x12, x13 + sbcs x4, x5, x10 + orr x12, x12, x14 + sbcs x5, x5, x11 + + cmp sgn, xzr + ccmp x12, xzr, #4, ne + + csel x6, x0, x6, ne + csel x7, x1, x7, ne + csel x8, x2, x8, ne + csel x9, x3, x9, ne + csel x10, x4, x10, ne + csel x11, x5, x11, ne + + stp x6, x7, [tabent+48] + stp x8, x9, [tabent+64] + stp x10, x11, [tabent+80] + +// Add to the accumulator + + add x0, acc + add x1, acc + add x2, tabent + bl p384_montjscalarmul_p384_montjadd + + cbnz j, p384_montjscalarmul_mainloop + +// That's the end of the main loop, and we just need to copy the +// result in "acc" to the output. + + ldp x0, x1, [acc] + stp x0, x1, [res] + ldp x0, x1, [acc+16] + stp x0, x1, [res, #16] + ldp x0, x1, [acc+32] + stp x0, x1, [res, #32] + ldp x0, x1, [acc+48] + stp x0, x1, [res, #48] + ldp x0, x1, [acc+64] + stp x0, x1, [res, #64] + ldp x0, x1, [acc+80] + stp x0, x1, [res, #80] + ldp x0, x1, [acc+96] + stp x0, x1, [res, #96] + ldp x0, x1, [acc+112] + stp x0, x1, [res, #112] + ldp x0, x1, [acc+128] + stp x0, x1, [res, #128] + +// Restore stack and registers and return + + add sp, sp, NSPACE + ldp x25, x30, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +// Local copies of subroutines, complete clones at the moment + +p384_montjscalarmul_p384_montjadd: + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x27, xzr, [sp, #-16]! + sub sp, sp, #0x150 + mov x24, x0 + mov x25, x1 + mov x26, x2 + mov x0, sp + ldr q1, [x25, #96] + ldp x9, x2, [x25, #96] + ldr q0, [x25, #96] + ldp x4, x6, [x25, #112] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [x25, #128] + xtn v30.2s, v0.2d + ldr q1, [x25, #128] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [x25, #128] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [x0, #32] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] + adc x17, x14, xzr + stp x2, x17, [x0, #32] + ldr q1, [x26, #96] + ldp x9, x2, [x26, #96] + ldr q0, [x26, #96] + ldp x4, x6, [x26, #112] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [x26, #128] + xtn v30.2s, v0.2d + ldr q1, [x26, #128] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [x26, #128] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #240] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [sp, #256] + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [sp, #272] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [sp, #256] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #240] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #272] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #240] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #256] + adc x17, x14, xzr + stp x2, x17, [sp, #272] + stp x23, x24, [sp, #-48] + ldr q3, [x26, #96] + ldr q25, [x25, #48] + ldp x13, x23, [x25, #48] + ldp x3, x21, [x26, #96] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x25, #80] + ldp x8, x24, [x26, #112] + subs x6, x3, x21 + ldr q0, [x26, #128] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [x25, #64] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x25, #80] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x26, #128] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #288] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #304] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #320] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #288] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #304] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #320] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #288] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #304] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #320] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #288] + ldp x21, x12, [sp, #304] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #320] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #288] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #304] + adc x12, x15, x23 + stp x21, x12, [sp, #320] + ldr q3, [x25, #96] + ldr q25, [x26, #48] + ldp x13, x23, [x26, #48] + ldp x3, x21, [x25, #96] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x26, #80] + ldp x8, x24, [x25, #112] + subs x6, x3, x21 + ldr q0, [x25, #128] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [x26, #64] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x26, #80] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x25, #128] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #48] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #64] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #80] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #48] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #64] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #80] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #48] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #64] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #80] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #48] + ldp x21, x12, [sp, #64] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #80] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #48] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #64] + adc x12, x15, x23 + stp x21, x12, [sp, #80] + mov x1, sp + ldr q3, [x1] + ldr q25, [x26] + ldp x13, x23, [x26] + ldp x3, x21, [x1] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x26, #32] + ldp x8, x24, [x1, #16] + subs x6, x3, x21 + ldr q0, [x1, #32] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [x26, #16] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x26, #32] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x1, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #96] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #112] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #128] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #96] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #112] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #128] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #96] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #112] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #128] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #96] + ldp x21, x12, [sp, #112] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #128] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #96] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #112] + adc x12, x15, x23 + stp x21, x12, [sp, #128] + ldr q3, [sp, #240] + ldr q25, [x25] + ldp x13, x23, [x25] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x25, #32] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [x25, #16] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x25, #32] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #192] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #208] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #224] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #192] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #208] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #224] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #192] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #208] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #224] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #192] + ldp x21, x12, [sp, #208] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #224] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #192] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #208] + adc x12, x15, x23 + stp x21, x12, [sp, #224] + mov x1, sp + ldr q3, [x1] + ldr q25, [sp, #48] + ldp x13, x23, [sp, #48] + ldp x3, x21, [x1] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #80] + ldp x8, x24, [x1, #16] + subs x6, x3, x21 + ldr q0, [x1, #32] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #64] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #80] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x1, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #48] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #64] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #80] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #48] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #64] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #80] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #48] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #64] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #80] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #48] + ldp x21, x12, [sp, #64] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #80] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #48] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #64] + adc x12, x15, x23 + stp x21, x12, [sp, #80] + ldr q3, [sp, #240] + ldr q25, [sp, #288] + ldp x13, x23, [sp, #288] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #320] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #304] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #320] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #288] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #304] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #320] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #288] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #304] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #320] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #288] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #304] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #320] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #288] + ldp x21, x12, [sp, #304] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #320] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x2, x24, x11 + stp x22, x5, [sp, #288] + adcs x11, x13, x23 + adcs x12, x8, x23 + stp x2, x11, [sp, #304] + adc x13, x15, x23 + stp x12, x13, [sp, #320] + ldp x5, x6, [sp, #96] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #112] + ldp x4, x3, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #128] + ldp x4, x3, [sp, #224] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldp x5, x6, [sp, #48] + ldp x4, x3, [sp, #288] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #64] + sbcs x7, x7, x2 + sbcs x8, x8, x11 + ldp x9, x10, [sp, #80] + sbcs x9, x9, x12 + sbcs x10, x10, x13 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #48] + stp x7, x8, [sp, #64] + stp x9, x10, [sp, #80] + ldr q1, [sp, #240] + ldp x9, x2, [sp, #240] + ldr q0, [sp, #240] + ldp x4, x6, [sp, #256] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #272] + xtn v30.2s, v0.2d + ldr q1, [sp, #272] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #272] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #144] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [sp, #160] + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [sp, #176] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [sp, #160] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #144] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #176] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #144] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #160] + adc x17, x14, xzr + stp x2, x17, [sp, #176] + mov x0, sp + ldr q1, [sp, #48] + ldp x9, x2, [sp, #48] + ldr q0, [sp, #48] + ldp x4, x6, [sp, #64] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #80] + xtn v30.2s, v0.2d + ldr q1, [sp, #80] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #80] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [x0, #32] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] + adc x17, x14, xzr + stp x2, x17, [x0, #32] + ldr q3, [sp, #144] + ldr q25, [sp, #192] + ldp x13, x23, [sp, #192] + ldp x3, x21, [sp, #144] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #224] + ldp x8, x24, [sp, #160] + subs x6, x3, x21 + ldr q0, [sp, #176] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #208] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #224] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #176] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #192] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #208] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #224] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #192] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #208] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #224] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #192] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #208] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #224] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #192] + ldp x21, x12, [sp, #208] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #224] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #192] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #208] + adc x12, x15, x23 + stp x21, x12, [sp, #224] + ldr q3, [sp, #144] + ldr q25, [sp, #96] + ldp x13, x23, [sp, #96] + ldp x3, x21, [sp, #144] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #128] + ldp x8, x24, [sp, #160] + subs x6, x3, x21 + ldr q0, [sp, #176] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #112] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #128] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #176] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #96] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #112] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #128] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #96] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #112] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #128] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #96] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #112] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #128] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #96] + ldp x21, x12, [sp, #112] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #128] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x2, x24, x11 + stp x22, x5, [sp, #96] + adcs x11, x13, x23 + adcs x12, x8, x23 + stp x2, x11, [sp, #112] + adc x13, x15, x23 + stp x12, x13, [sp, #128] + mov x0, sp + mov x1, sp + ldp x5, x6, [x1] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x1, #16] + ldp x4, x3, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x1, #32] + ldp x4, x3, [sp, #224] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [x0] + stp x7, x8, [x0, #16] + stp x9, x10, [x0, #32] + ldp x5, x6, [sp, #96] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x4, x3, [sp, #208] + sbcs x7, x2, x4 + sbcs x8, x11, x3 + ldp x4, x3, [sp, #224] + sbcs x9, x12, x4 + sbcs x10, x13, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #144] + stp x7, x8, [sp, #160] + stp x9, x10, [sp, #176] + ldr q3, [sp, #240] + ldr q25, [x25, #96] + ldp x13, x23, [x25, #96] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x25, #128] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [x25, #112] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x25, #128] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #240] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #256] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #272] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #240] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #256] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #272] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #240] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #256] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #272] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #240] + ldp x21, x12, [sp, #256] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #272] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #240] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #256] + adc x12, x15, x23 + stp x21, x12, [sp, #272] + mov x0, sp + mov x1, sp + ldp x5, x6, [x1] + ldp x4, x3, [sp, #96] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x1, #16] + ldp x4, x3, [sp, #112] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x1, #32] + ldp x4, x3, [sp, #128] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x2, x5, x4 + eor x4, x4, x3 + adcs x11, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x4, x7, x4 + adcs x12, x8, x3 + adcs x13, x9, x3 + adc x3, x10, x3 + stp x2, x11, [x0] + stp x4, x12, [x0, #16] + stp x13, x3, [x0, #32] + ldp x5, x6, [sp, #192] + subs x5, x5, x2 + sbcs x6, x6, x11 + ldp x7, x8, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x12 + ldp x9, x10, [sp, #224] + sbcs x9, x9, x13 + sbcs x10, x10, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #192] + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldr q3, [sp, #144] + ldr q25, [sp, #288] + ldp x13, x23, [sp, #288] + ldp x3, x21, [sp, #144] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #320] + ldp x8, x24, [sp, #160] + subs x6, x3, x21 + ldr q0, [sp, #176] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #304] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #320] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #176] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #144] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #160] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #176] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #144] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #160] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #176] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #144] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #160] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #176] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #144] + ldp x21, x12, [sp, #160] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #176] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #144] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #160] + adc x12, x15, x23 + stp x21, x12, [sp, #176] + ldr q3, [sp, #240] + ldr q25, [x26, #96] + ldp x13, x23, [x26, #96] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [x26, #128] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [x26, #112] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x26, #128] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #240] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #256] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #272] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #240] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #256] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #272] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #240] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #256] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #272] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #240] + ldp x21, x12, [sp, #256] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #272] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #240] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #256] + adc x12, x15, x23 + stp x21, x12, [sp, #272] + ldp x2, x27, [sp, #-48] + ldr q3, [sp, #48] + ldr q25, [sp, #192] + ldp x13, x23, [sp, #192] + ldp x3, x21, [sp, #48] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #224] + ldp x8, x24, [sp, #64] + subs x6, x3, x21 + ldr q0, [sp, #80] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #208] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #224] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #80] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #192] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #208] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #224] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #192] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #208] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #224] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #192] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #208] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #224] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #192] + ldp x21, x12, [sp, #208] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #224] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x2, x6, x20 + eor x3, x20, x23 + adcs x6, x7, x3 + adcs x7, x24, x11 + adcs x9, x13, x23 + adcs x10, x8, x23 + adc x11, x15, x23 + ldp x4, x3, [sp, #144] + subs x5, x2, x4 + sbcs x6, x6, x3 + ldp x4, x3, [sp, #160] + sbcs x7, x7, x4 + sbcs x8, x9, x3 + ldp x4, x3, [sp, #176] + sbcs x9, x10, x4 + sbcs x10, x11, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x19, x5, x4 + eor x4, x4, x3 + adcs x24, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldp x0, x1, [x25, #96] + ldp x2, x3, [x25, #112] + ldp x4, x5, [x25, #128] + orr x20, x0, x1 + orr x21, x2, x3 + orr x22, x4, x5 + orr x20, x20, x21 + orr x20, x20, x22 + cmp x20, xzr + cset x20, ne // ne = any + ldp x6, x7, [x26, #96] + ldp x8, x9, [x26, #112] + ldp x10, x11, [x26, #128] + orr x21, x6, x7 + orr x22, x8, x9 + orr x23, x10, x11 + orr x21, x21, x22 + orr x21, x21, x23 + cmp x21, xzr + cset x21, ne // ne = any + cmp x21, x20 + ldp x12, x13, [sp, #240] + csel x12, x0, x12, cc // cc = lo, ul, last + csel x13, x1, x13, cc // cc = lo, ul, last + csel x12, x6, x12, hi // hi = pmore + csel x13, x7, x13, hi // hi = pmore + ldp x14, x15, [sp, #256] + csel x14, x2, x14, cc // cc = lo, ul, last + csel x15, x3, x15, cc // cc = lo, ul, last + csel x14, x8, x14, hi // hi = pmore + csel x15, x9, x15, hi // hi = pmore + ldp x16, x17, [sp, #272] + csel x16, x4, x16, cc // cc = lo, ul, last + csel x17, x5, x17, cc // cc = lo, ul, last + csel x16, x10, x16, hi // hi = pmore + csel x17, x11, x17, hi // hi = pmore + ldp x20, x21, [x25] + ldp x0, x1, [sp] + csel x0, x20, x0, cc // cc = lo, ul, last + csel x1, x21, x1, cc // cc = lo, ul, last + ldp x20, x21, [x26] + csel x0, x20, x0, hi // hi = pmore + csel x1, x21, x1, hi // hi = pmore + ldp x20, x21, [x25, #16] + ldp x2, x3, [sp, #16] + csel x2, x20, x2, cc // cc = lo, ul, last + csel x3, x21, x3, cc // cc = lo, ul, last + ldp x20, x21, [x26, #16] + csel x2, x20, x2, hi // hi = pmore + csel x3, x21, x3, hi // hi = pmore + ldp x20, x21, [x25, #32] + ldp x4, x5, [sp, #32] + csel x4, x20, x4, cc // cc = lo, ul, last + csel x5, x21, x5, cc // cc = lo, ul, last + ldp x20, x21, [x26, #32] + csel x4, x20, x4, hi // hi = pmore + csel x5, x21, x5, hi // hi = pmore + ldp x20, x21, [x25, #48] + csel x6, x20, x19, cc // cc = lo, ul, last + csel x7, x21, x24, cc // cc = lo, ul, last + ldp x20, x21, [x26, #48] + csel x6, x20, x6, hi // hi = pmore + csel x7, x21, x7, hi // hi = pmore + ldp x20, x21, [x25, #64] + ldp x8, x9, [sp, #208] + csel x8, x20, x8, cc // cc = lo, ul, last + csel x9, x21, x9, cc // cc = lo, ul, last + ldp x20, x21, [x26, #64] + csel x8, x20, x8, hi // hi = pmore + csel x9, x21, x9, hi // hi = pmore + ldp x20, x21, [x25, #80] + ldp x10, x11, [sp, #224] + csel x10, x20, x10, cc // cc = lo, ul, last + csel x11, x21, x11, cc // cc = lo, ul, last + ldp x20, x21, [x26, #80] + csel x10, x20, x10, hi // hi = pmore + csel x11, x21, x11, hi // hi = pmore + stp x0, x1, [x27] + stp x2, x3, [x27, #16] + stp x4, x5, [x27, #32] + stp x6, x7, [x27, #48] + stp x8, x9, [x27, #64] + stp x10, x11, [x27, #80] + stp x12, x13, [x27, #96] + stp x14, x15, [x27, #112] + stp x16, x17, [x27, #128] + add sp, sp, #0x150 + ldp x27, xzr, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ret + +p384_montjscalarmul_p384_montjdouble: + sub sp, sp, #0x1a0 + stp x19, x20, [sp, #336] + stp x21, x22, [sp, #352] + stp x23, x24, [sp, #368] + stp x25, x26, [sp, #384] + stp x27, xzr, [sp, #400] + mov x25, x0 + mov x26, x1 + mov x0, sp + ldr q1, [x26, #96] + ldp x9, x2, [x26, #96] + ldr q0, [x26, #96] + ldp x4, x6, [x26, #112] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [x26, #128] + xtn v30.2s, v0.2d + ldr q1, [x26, #128] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [x26, #128] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [x0, #32] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] + adc x17, x14, xzr + stp x2, x17, [x0, #32] + ldr q1, [x26, #48] + ldp x9, x2, [x26, #48] + ldr q0, [x26, #48] + ldp x4, x6, [x26, #64] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [x26, #80] + xtn v30.2s, v0.2d + ldr q1, [x26, #80] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [x26, #80] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #48] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [sp, #64] + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [sp, #80] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [sp, #64] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #48] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #80] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #48] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #64] + adc x17, x14, xzr + stp x2, x17, [sp, #80] + ldp x5, x6, [x26] + ldp x4, x3, [sp] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x26, #16] + ldp x4, x3, [sp, #16] + adcs x7, x7, x4 + adcs x8, x8, x3 + ldp x9, x10, [x26, #32] + ldp x4, x3, [sp, #32] + adcs x9, x9, x4 + adcs x10, x10, x3 + csetm x3, cs // cs = hs, nlast + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + subs x5, x5, x4 + eor x4, x4, x3 + sbcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + sbcs x7, x7, x4 + sbcs x8, x8, x3 + sbcs x9, x9, x3 + sbc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + mov x2, sp + ldp x5, x6, [x26] + ldp x4, x3, [x2] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x26, #16] + ldp x4, x3, [x2, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x26, #32] + ldp x4, x3, [x2, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x13, x5, x4 + eor x4, x4, x3 + adcs x23, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x13, x23, [sp, #192] + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldr q3, [sp, #240] + ldr q25, [sp, #192] + ldp x3, x21, [sp, #240] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #224] + ldp x8, x24, [sp, #256] + subs x6, x3, x21 + ldr q0, [sp, #272] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #208] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #224] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #272] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [sp, #96] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #112] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #128] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [sp, #96] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #112] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #128] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [sp, #96] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [sp, #112] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #128] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #96] + ldp x21, x12, [sp, #112] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #128] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #96] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #112] + adc x12, x15, x23 + stp x21, x12, [sp, #128] + ldp x5, x6, [x26, #48] + ldp x4, x3, [x26, #96] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x26, #64] + ldp x4, x3, [x26, #112] + adcs x7, x7, x4 + adcs x8, x8, x3 + ldp x9, x10, [x26, #80] + ldp x4, x3, [x26, #128] + adcs x9, x9, x4 + adcs x10, x10, x3 + adc x3, xzr, xzr + mov x4, #0xffffffff // #4294967295 + cmp x5, x4 + mov x4, #0xffffffff00000000 // #-4294967296 + sbcs xzr, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + sbcs xzr, x7, x4 + adcs xzr, x8, xzr + adcs xzr, x9, xzr + adcs xzr, x10, xzr + adcs x3, x3, xzr + csetm x3, ne // ne = any + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + subs x5, x5, x4 + eor x4, x4, x3 + sbcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + sbcs x7, x7, x4 + sbcs x8, x8, x3 + sbcs x9, x9, x3 + sbc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldr q1, [sp, #96] + ldp x9, x2, [sp, #96] + ldr q0, [sp, #96] + ldp x4, x6, [sp, #112] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #128] + xtn v30.2s, v0.2d + ldr q1, [sp, #128] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #128] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #288] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [sp, #304] + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [sp, #320] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [sp, #304] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #288] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #320] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #288] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #304] + adc x17, x14, xzr + stp x2, x17, [sp, #320] + ldr q3, [x26] + ldr q25, [sp, #48] + ldp x13, x23, [sp, #48] + ldp x3, x21, [x26] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #80] + ldp x8, x24, [x26, #16] + subs x6, x3, x21 + ldr q0, [x26, #32] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #64] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #80] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x26, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x26, x4, x16 + mov x4, v27.d[0] + sbcs x27, x20, x11 + sbcs x20, x9, x12 + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #160] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #176] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #160] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #176] + adds x20, x22, x26 + mul x10, x13, x14 + adcs x11, x11, x27 + eor x9, x8, x21 + adcs x26, x19, x17 + stp x20, x11, [sp, #144] + adcs x27, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #176] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #144] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #176] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x26 + eor x1, x22, x9 + adcs x24, x23, x27 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x26 + adcs x15, x17, x27 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #144] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [sp, #160] + adc x12, x15, x23 + stp x21, x12, [sp, #176] + ldr q1, [sp, #240] + ldp x9, x2, [sp, #240] + ldr q0, [sp, #240] + ldp x4, x6, [sp, #256] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #272] + xtn v30.2s, v0.2d + ldr q1, [sp, #272] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #272] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x19, x3, x17 + sbcs x20, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #192] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [sp, #224] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #192] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #224] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x19 + adcs x1, x1, x20 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x19, x13, x1 + and x13, x4, x9 + adcs x20, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #208] + adc x17, x14, xzr + stp x2, x17, [sp, #224] + ldp x0, x1, [sp, #288] + mov x6, #0xffffffff // #4294967295 + subs x6, x6, x0 + mov x7, #0xffffffff00000000 // #-4294967296 + sbcs x7, x7, x1 + ldp x0, x1, [sp, #304] + mov x8, #0xfffffffffffffffe // #-2 + sbcs x8, x8, x0 + mov x13, #0xffffffffffffffff // #-1 + sbcs x9, x13, x1 + ldp x0, x1, [sp, #320] + sbcs x10, x13, x0 + sbc x11, x13, x1 + mov x12, #0x9 // #9 + mul x0, x12, x6 + mul x1, x12, x7 + mul x2, x12, x8 + mul x3, x12, x9 + mul x4, x12, x10 + mul x5, x12, x11 + umulh x6, x12, x6 + umulh x7, x12, x7 + umulh x8, x12, x8 + umulh x9, x12, x9 + umulh x10, x12, x10 + umulh x12, x12, x11 + adds x1, x1, x6 + adcs x2, x2, x7 + adcs x3, x3, x8 + adcs x4, x4, x9 + adcs x5, x5, x10 + mov x6, #0x1 // #1 + adc x6, x12, x6 + ldp x8, x9, [sp, #144] + ldp x10, x11, [sp, #160] + ldp x12, x13, [sp, #176] + mov x14, #0xc // #12 + mul x15, x14, x8 + umulh x8, x14, x8 + adds x0, x0, x15 + mul x15, x14, x9 + umulh x9, x14, x9 + adcs x1, x1, x15 + mul x15, x14, x10 + umulh x10, x14, x10 + adcs x2, x2, x15 + mul x15, x14, x11 + umulh x11, x14, x11 + adcs x3, x3, x15 + mul x15, x14, x12 + umulh x12, x14, x12 + adcs x4, x4, x15 + mul x15, x14, x13 + umulh x13, x14, x13 + adcs x5, x5, x15 + adc x6, x6, xzr + adds x1, x1, x8 + adcs x2, x2, x9 + adcs x3, x3, x10 + adcs x4, x4, x11 + adcs x5, x5, x12 + adcs x6, x6, x13 + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x6, cc // cc = lo, ul, last + mov x7, #0xffffffff // #4294967295 + and x7, x7, x6 + adds x0, x0, x7 + eor x7, x7, x6 + adcs x1, x1, x7 + mov x7, #0xfffffffffffffffe // #-2 + and x7, x7, x6 + adcs x2, x2, x7 + adcs x3, x3, x6 + adcs x4, x4, x6 + adc x5, x5, x6 + stp x0, x1, [sp, #288] + stp x2, x3, [sp, #304] + stp x4, x5, [sp, #320] + mov x2, sp + ldp x4, x3, [x2] + subs x5, x19, x4 + sbcs x6, x20, x3 + ldp x7, x8, [sp, #208] + ldp x4, x3, [x2, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #224] + ldp x4, x3, [x2, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldr q1, [sp, #48] + ldp x9, x2, [sp, #48] + ldr q0, [sp, #48] + ldp x4, x6, [sp, #64] + rev64 v21.4s, v1.4s + uzp2 v28.4s, v1.4s, v1.4s + umulh x7, x9, x2 + xtn v17.2s, v1.2d + mul v27.4s, v21.4s, v0.4s + ldr q20, [sp, #80] + xtn v30.2s, v0.2d + ldr q1, [sp, #80] + uzp2 v31.4s, v0.4s, v0.4s + ldp x5, x10, [sp, #80] + umulh x8, x9, x4 + uaddlp v3.2d, v27.4s + umull v16.2d, v30.2s, v17.2s + mul x16, x9, x4 + umull v27.2d, v30.2s, v28.2s + shrn v0.2s, v20.2d, #32 + xtn v7.2s, v20.2d + shl v20.2d, v3.2d, #32 + umull v3.2d, v31.2s, v28.2s + mul x3, x2, x4 + umlal v20.2d, v30.2s, v17.2s + umull v22.2d, v7.2s, v0.2s + usra v27.2d, v16.2d, #32 + umulh x11, x2, x4 + movi v21.2d, #0xffffffff + uzp2 v28.4s, v1.4s, v1.4s + adds x15, x16, x7 + and v5.16b, v27.16b, v21.16b + adcs x3, x3, x8 + usra v3.2d, v27.2d, #32 + dup v29.2d, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2d, v31.2s, v17.2s + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2d, v22.2d, #33 + xtn v25.2s, v29.2d + rev64 v31.4s, v1.4s + lsl x13, x14, #32 + uzp2 v6.4s, v29.4s, v29.4s + umlal v19.2d, v7.2s, v7.2s + usra v3.2d, v5.2d, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4s, v31.4s, v29.4s + xtn v4.2s, v1.2d + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2d, v25.2s, v28.2s + adcs x11, x16, x16 + umull v21.2d, v25.2s, v4.2s + mov x17, v3.d[0] + umull v18.2d, v6.2s, v28.2s + adc x16, x8, xzr + uaddlp v16.2d, v17.4s + movi v1.2d, #0xffffffff + subs x13, x13, x12 + usra v31.2d, v21.2d, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2d, v16.2d, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16b, v31.16b, v1.16b + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2d, v6.2s, v4.2s + usra v18.2d, v31.2d, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2d, v25.2s, v4.2s + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2d, v3.2d, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x19, x3, x17 + sbcs x20, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [sp, #192] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + csetm x15, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + stp x11, x14, [sp, #224] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc // cc = lo, ul, last + cinv x16, x15, cc // cc = lo, ul, last + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x4, x2, x4 + cneg x4, x4, cc // cc = lo, ul, last + csetm x7, cc // cc = lo, ul, last + subs x2, x10, x6 + cinv x8, x8, cc // cc = lo, ul, last + cneg x2, x2, cc // cc = lo, ul, last + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc // cc = lo, ul, last + cneg x1, x1, cc // cc = lo, ul, last + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [sp, #192] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [sp, #224] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x19 + adcs x1, x1, x20 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff // #4294967295 + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 // #-4294967295 + adcs x14, x14, x2 + mov x2, #0x1 // #1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [sp, #192] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [sp, #208] + adc x17, x14, xzr + stp x2, x17, [sp, #224] + ldp x5, x6, [sp, #240] + ldp x4, x3, [sp, #48] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #256] + ldp x4, x3, [sp, #64] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #272] + ldp x4, x3, [sp, #80] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc // cc = lo, ul, last + mov x4, #0xffffffff // #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe // #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [x25, #96] + stp x7, x8, [x25, #112] + stp x9, x10, [x25, #128] + ldr q3, [sp, #288] + ldr q25, [sp, #96] + ldp x13, x23, [sp, #96] + ldp x3, x21, [sp, #288] + rev64 v23.4s, v25.4s + uzp1 v17.4s, v25.4s, v3.4s + umulh x15, x3, x13 + mul v6.4s, v23.4s, v3.4s + uzp1 v3.4s, v3.4s, v3.4s + ldr q27, [sp, #128] + ldp x8, x24, [sp, #304] + subs x6, x3, x21 + ldr q0, [sp, #320] + movi v23.2d, #0xffffffff + csetm x10, cc // cc = lo, ul, last + umulh x19, x21, x23 + rev64 v4.4s, v27.4s + uzp2 v25.4s, v27.4s, v27.4s + cneg x4, x6, cc // cc = lo, ul, last + subs x7, x23, x13 + xtn v22.2s, v0.2d + xtn v24.2s, v27.2d + cneg x20, x7, cc // cc = lo, ul, last + ldp x6, x14, [sp, #112] + mul v27.4s, v4.4s, v0.4s + uaddlp v20.2d, v6.4s + cinv x5, x10, cc // cc = lo, ul, last + mul x16, x4, x20 + uzp2 v6.4s, v0.4s, v0.4s + umull v21.2d, v22.2s, v25.2s + shl v0.2d, v20.2d, #32 + umlal v0.2d, v3.2s, v17.2s + mul x22, x8, x6 + umull v1.2d, v6.2s, v25.2s + subs x12, x3, x8 + umull v20.2d, v22.2s, v24.2s + cneg x17, x12, cc // cc = lo, ul, last + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc // cc = lo, ul, last + usra v21.2d, v20.2d, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2d, v21.2d, #32 + adds x22, x15, x7 + and v26.16b, v21.16b, v23.16b + adcs x16, x12, x15 + uaddlp v25.2d, v27.4s + adcs x9, x19, x12 + umlal v26.2d, v6.2s, v24.2s + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2d, v25.2d, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc // cc = lo, ul, last + cinv x10, x10, cc // cc = lo, ul, last + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc // cc = lo, ul, last + eor x19, x19, x10 + csetm x4, cc // cc = lo, ul, last + subs x16, x6, x23 + cneg x16, x16, cc // cc = lo, ul, last + umlal v27.2d, v22.2s, v24.2s + mul x15, x20, x16 + cinv x4, x4, cc // cc = lo, ul, last + cmn x10, #0x1 + usra v1.2d, v26.2d, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [sp, #128] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [sp, #320] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x26, x4, x16 + mov x4, v27.d[0] + sbcs x27, x20, x11 + sbcs x20, x9, x12 + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [sp, #256] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc // cc = lo, ul, last + csetm x2, cc // cc = lo, ul, last + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc // cc = lo, ul, last + cneg x19, x19, cc // cc = lo, ul, last + stp x9, x20, [sp, #272] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc // cc = lo, ul, last + csetm x12, cc // cc = lo, ul, last + subs x9, x17, x14 + cinv x12, x12, cc // cc = lo, ul, last + cneg x9, x9, cc // cc = lo, ul, last + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc // cc = lo, ul, last + cneg x24, x10, cc // cc = lo, ul, last + subs x10, x17, x15 + cinv x7, x7, cc // cc = lo, ul, last + cneg x10, x10, cc // cc = lo, ul, last + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [sp, #256] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [sp, #272] + adds x20, x22, x26 + mul x10, x13, x14 + adcs x11, x11, x27 + eor x9, x8, x21 + adcs x26, x19, x17 + stp x20, x11, [sp, #240] + adcs x27, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [sp, #272] + cneg x3, x21, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc // cc = lo, ul, last + csetm x16, cc // cc = lo, ul, last + subs x21, x6, x15 + cneg x22, x21, cc // cc = lo, ul, last + cinv x21, x24, cc // cc = lo, ul, last + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc // cc = lo, ul, last + csetm x24, cc // cc = lo, ul, last + subs x20, x14, x15 + cinv x24, x24, cc // cc = lo, ul, last + mul x22, x3, x22 + cneg x3, x20, cc // cc = lo, ul, last + subs x13, x6, x14 + cneg x20, x13, cc // cc = lo, ul, last + cinv x15, x16, cc // cc = lo, ul, last + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe // #-2 + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [sp, #240] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [sp, #272] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x26 + eor x1, x22, x9 + adcs x24, x23, x27 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x26 + adcs x15, x17, x27 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff // #4294967295 + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc // cc = lo, ul, last + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [sp, #240] + adcs x5, x13, x23 + adcs x12, x8, x23 + stp x14, x5, [sp, #256] + adc x19, x15, x23 + ldp x1, x2, [sp, #144] + ldp x3, x4, [sp, #160] + ldp x5, x6, [sp, #176] + lsl x0, x1, #2 + ldp x7, x8, [sp, #288] + subs x0, x0, x7 + extr x1, x2, x1, #62 + sbcs x1, x1, x8 + ldp x7, x8, [sp, #304] + extr x2, x3, x2, #62 + sbcs x2, x2, x7 + extr x3, x4, x3, #62 + sbcs x3, x3, x8 + extr x4, x5, x4, #62 + ldp x7, x8, [sp, #320] + sbcs x4, x4, x7 + extr x5, x6, x5, #62 + sbcs x5, x5, x8 + lsr x6, x6, #62 + adc x6, x6, xzr + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x8, cc // cc = lo, ul, last + mov x9, #0xffffffff // #4294967295 + and x9, x9, x8 + adds x0, x0, x9 + eor x9, x9, x8 + adcs x1, x1, x9 + mov x9, #0xfffffffffffffffe // #-2 + and x9, x9, x8 + adcs x2, x2, x9 + adcs x3, x3, x8 + adcs x4, x4, x8 + adc x5, x5, x8 + stp x0, x1, [x25] + stp x2, x3, [x25, #16] + stp x4, x5, [x25, #32] + ldp x0, x1, [sp, #192] + mov x6, #0xffffffff // #4294967295 + subs x6, x6, x0 + mov x7, #0xffffffff00000000 // #-4294967296 + sbcs x7, x7, x1 + ldp x0, x1, [sp, #208] + mov x8, #0xfffffffffffffffe // #-2 + sbcs x8, x8, x0 + mov x13, #0xffffffffffffffff // #-1 + sbcs x9, x13, x1 + ldp x0, x1, [sp, #224] + sbcs x10, x13, x0 + sbc x11, x13, x1 + lsl x0, x6, #3 + extr x1, x7, x6, #61 + extr x2, x8, x7, #61 + extr x3, x9, x8, #61 + extr x4, x10, x9, #61 + extr x5, x11, x10, #61 + lsr x6, x11, #61 + add x6, x6, #0x1 + ldp x8, x9, [sp, #240] + ldp x10, x11, [sp, #256] + mov x14, #0x3 // #3 + mul x15, x14, x8 + umulh x8, x14, x8 + adds x0, x0, x15 + mul x15, x14, x9 + umulh x9, x14, x9 + adcs x1, x1, x15 + mul x15, x14, x10 + umulh x10, x14, x10 + adcs x2, x2, x15 + mul x15, x14, x11 + umulh x11, x14, x11 + adcs x3, x3, x15 + mul x15, x14, x12 + umulh x12, x14, x12 + adcs x4, x4, x15 + mul x15, x14, x19 + umulh x13, x14, x19 + adcs x5, x5, x15 + adc x6, x6, xzr + adds x1, x1, x8 + adcs x2, x2, x9 + adcs x3, x3, x10 + adcs x4, x4, x11 + adcs x5, x5, x12 + adcs x6, x6, x13 + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x6, cc // cc = lo, ul, last + mov x7, #0xffffffff // #4294967295 + and x7, x7, x6 + adds x0, x0, x7 + eor x7, x7, x6 + adcs x1, x1, x7 + mov x7, #0xfffffffffffffffe // #-2 + and x7, x7, x6 + adcs x2, x2, x7 + adcs x3, x3, x6 + adcs x4, x4, x6 + adc x5, x5, x6 + stp x0, x1, [x25, #48] + stp x2, x3, [x25, #64] + stp x4, x5, [x25, #80] + ldp x19, x20, [sp, #336] + ldp x21, x22, [sp, #352] + ldp x23, x24, [sp, #368] + ldp x25, x26, [sp, #384] + ldp x27, xzr, [sp, #400] + add sp, sp, #0x1a0 + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/arm/p384/p384_montjscalarmul_alt.S b/third_party/s2n-bignum/arm/p384/p384_montjscalarmul_alt.S new file mode 100644 index 0000000000..9f47090a8c --- /dev/null +++ b/third_party/s2n-bignum/arm/p384/p384_montjscalarmul_alt.S @@ -0,0 +1,7139 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery-Jacobian form scalar multiplication for P-384 +// Input scalar[6], point[18]; output res[18] +// +// extern void p384_montjscalarmul_alt +// (uint64_t res[static 18], +// uint64_t scalar[static 6], +// uint64_t point[static 18]); +// +// This function is a variant of its affine point version p384_scalarmul_alt. +// Here, input and output points are assumed to be in Jacobian form with +// their coordinates in the Montgomery domain. Thus, if priming indicates +// Montgomery form, x' = (2^384 * x) mod p_384 etc., each point argument +// is a triple (x',y',z') representing the affine point (x/z^2,y/z^3) when +// z' is nonzero or the point at infinity (group identity) if z' = 0. +// +// Given scalar = n and point = P, assumed to be on the NIST elliptic +// curve P-384, returns a representation of n * P. If the result is the +// point at infinity (either because the input point was or because the +// scalar was a multiple of p_384) then the output is guaranteed to +// represent the point at infinity, i.e. to have its z coordinate zero. +// +// Standard ARM ABI: X0 = res, X1 = scalar, X2 = point +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjscalarmul_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjscalarmul_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 +#define JACSIZE (3*NUMSIZE) + +// Safe copies of input res and additional values in variables. + +#define bf x22 +#define sgn x23 +#define j x24 +#define res x25 + +// Intermediate variables on the stack. +// The table is 16 entries, each of size JACSIZE = 3 * NUMSIZE + +#define scalarb sp, #(0*NUMSIZE) +#define acc sp, #(1*NUMSIZE) +#define tabent sp, #(4*NUMSIZE) + +#define tab sp, #(7*NUMSIZE) + +#define NSPACE #(55*NUMSIZE) + +// Avoid using .rep for the sake of the BoringSSL/AWS-LC delocator, +// which doesn't accept repetitions, assembler macros etc. + +#define selectblock(I) \ + cmp bf, #(1*I); \ + ldp x20, x21, [x19]; \ + csel x0, x20, x0, eq; \ + csel x1, x21, x1, eq; \ + ldp x20, x21, [x19, #16]; \ + csel x2, x20, x2, eq; \ + csel x3, x21, x3, eq; \ + ldp x20, x21, [x19, #32]; \ + csel x4, x20, x4, eq; \ + csel x5, x21, x5, eq; \ + ldp x20, x21, [x19, #48]; \ + csel x6, x20, x6, eq; \ + csel x7, x21, x7, eq; \ + ldp x20, x21, [x19, #64]; \ + csel x8, x20, x8, eq; \ + csel x9, x21, x9, eq; \ + ldp x20, x21, [x19, #80]; \ + csel x10, x20, x10, eq; \ + csel x11, x21, x11, eq; \ + ldp x20, x21, [x19, #96]; \ + csel x12, x20, x12, eq; \ + csel x13, x21, x13, eq; \ + ldp x20, x21, [x19, #112]; \ + csel x14, x20, x14, eq; \ + csel x15, x21, x15, eq; \ + ldp x20, x21, [x19, #128]; \ + csel x16, x20, x16, eq; \ + csel x17, x21, x17, eq; \ + add x19, x19, #JACSIZE + +// Loading large constants + +#define movbig(nn,n3,n2,n1,n0) \ + movz nn, n0; \ + movk nn, n1, lsl #16; \ + movk nn, n2, lsl #32; \ + movk nn, n3, lsl #48 + +S2N_BN_SYMBOL(p384_montjscalarmul_alt): + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x30, [sp, #-16]! + sub sp, sp, NSPACE + +// Preserve the "res" input argument; others get processed early. + + mov res, x0 + +// Reduce the input scalar mod n_384, i.e. conditionally subtract n_384. +// Store it to "scalarb". + + ldp x3, x4, [x1] + movbig(x15, #0xecec, #0x196a, #0xccc5, #0x2973) + ldp x5, x6, [x1, #16] + movbig(x16, #0x581a, #0x0db2, #0x48b0, #0xa77a) + ldp x7, x8, [x1, #32] + movbig(x17, #0xc763, #0x4d81, #0xf437, #0x2ddf) + + subs x9, x3, x15 + sbcs x10, x4, x16 + sbcs x11, x5, x17 + adcs x12, x6, xzr + adcs x13, x7, xzr + adcs x14, x8, xzr + + csel x3, x3, x9, cc + csel x4, x4, x10, cc + csel x5, x5, x11, cc + csel x6, x6, x12, cc + csel x7, x7, x13, cc + csel x8, x8, x14, cc + + stp x3, x4, [scalarb] + stp x5, x6, [scalarb+16] + stp x7, x8, [scalarb+32] + +// Set the tab[0] table entry to the input point = 1 * P + + ldp x10, x11, [x2] + stp x10, x11, [tab] + ldp x12, x13, [x2, #16] + stp x12, x13, [tab+16] + ldp x14, x15, [x2, #32] + stp x14, x15, [tab+32] + + ldp x10, x11, [x2, #48] + stp x10, x11, [tab+48] + ldp x12, x13, [x2, #64] + stp x12, x13, [tab+64] + ldp x14, x15, [x2, #80] + stp x14, x15, [tab+80] + + ldp x10, x11, [x2, #96] + stp x10, x11, [tab+96] + ldp x12, x13, [x2, #112] + stp x12, x13, [tab+112] + ldp x14, x15, [x2, #128] + stp x14, x15, [tab+128] + +// Compute and record tab[1] = 2 * p, ..., tab[15] = 16 * P + + add x0, tab+JACSIZE*1 + add x1, tab + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, tab+JACSIZE*2 + add x1, tab+JACSIZE*1 + add x2, tab + bl p384_montjscalarmul_alt_p384_montjadd + + add x0, tab+JACSIZE*3 + add x1, tab+JACSIZE*1 + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, tab+JACSIZE*4 + add x1, tab+JACSIZE*3 + add x2, tab + bl p384_montjscalarmul_alt_p384_montjadd + + add x0, tab+JACSIZE*5 + add x1, tab+JACSIZE*2 + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, tab+JACSIZE*6 + add x1, tab+JACSIZE*5 + add x2, tab + bl p384_montjscalarmul_alt_p384_montjadd + + add x0, tab+JACSIZE*7 + add x1, tab+JACSIZE*3 + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, tab+JACSIZE*8 + add x1, tab+JACSIZE*7 + add x2, tab + bl p384_montjscalarmul_alt_p384_montjadd + + add x0, tab+JACSIZE*9 + add x1, tab+JACSIZE*4 + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, tab+JACSIZE*10 + add x1, tab+JACSIZE*9 + add x2, tab + bl p384_montjscalarmul_alt_p384_montjadd + + add x0, tab+JACSIZE*11 + add x1, tab+JACSIZE*5 + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, tab+JACSIZE*12 + add x1, tab+JACSIZE*11 + add x2, tab + bl p384_montjscalarmul_alt_p384_montjadd + + add x0, tab+JACSIZE*13 + add x1, tab+JACSIZE*6 + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, tab+JACSIZE*14 + add x1, tab+JACSIZE*13 + add x2, tab + bl p384_montjscalarmul_alt_p384_montjadd + + add x0, tab+JACSIZE*15 + add x1, tab+JACSIZE*7 + bl p384_montjscalarmul_alt_p384_montjdouble + +// Add the recoding constant sum_i(16 * 32^i) to the scalar to allow signed +// digits. The digits of the constant, in lowest-to-highest order, are as +// follows; they are generated dynamically since none is a simple ARM load. +// +// 0x0842108421084210 +// 0x1084210842108421 +// 0x2108421084210842 +// 0x4210842108421084 +// 0x8421084210842108 +// 0x0842108421084210 + + ldp x0, x1, [scalarb] + ldp x2, x3, [scalarb+16] + ldp x4, x5, [scalarb+32] + movbig(x8, #0x1084, #0x2108, #0x4210, #0x8421) + adds x0, x0, x8, lsr #1 + adcs x1, x1, x8 + lsl x8, x8, #1 + adcs x2, x2, x8 + lsl x8, x8, #1 + adcs x3, x3, x8 + lsl x8, x8, #1 + adcs x4, x4, x8 + lsr x8, x8, #4 + adcs x5, x5, x8 + cset x6, cs + +// Record the top bitfield then shift the whole scalar left 4 bits +// to align the top of the next bitfield with the MSB (bits 379..383). + + extr bf, x6, x5, #60 + extr x5, x5, x4, #60 + extr x4, x4, x3, #60 + extr x3, x3, x2, #60 + extr x2, x2, x1, #60 + extr x1, x1, x0, #60 + lsl x0, x0, #4 + stp x0, x1, [scalarb] + stp x2, x3, [scalarb+16] + stp x4, x5, [scalarb+32] + +// Initialize the accumulator to the corresponding entry using constant-time +// lookup in the table. This top digit, uniquely, is not recoded so there is +// no sign adjustment to make. + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + mov x16, xzr + mov x17, xzr + + add x19, tab + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + selectblock(9) + selectblock(10) + selectblock(11) + selectblock(12) + selectblock(13) + selectblock(14) + selectblock(15) + selectblock(16) + + stp x0, x1, [acc] + stp x2, x3, [acc+16] + stp x4, x5, [acc+32] + stp x6, x7, [acc+48] + stp x8, x9, [acc+64] + stp x10, x11, [acc+80] + stp x12, x13, [acc+96] + stp x14, x15, [acc+112] + stp x16, x17, [acc+128] + + mov j, #380 + +// Main loop over size-5 bitfields: double 5 times then add signed digit +// At each stage we shift the scalar left by 5 bits so we can simply pick +// the top 5 bits as the bitfield, saving some fiddle over indexing. + +p384_montjscalarmul_alt_mainloop: + sub j, j, #5 + + add x0, acc + add x1, acc + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, acc + add x1, acc + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, acc + add x1, acc + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, acc + add x1, acc + bl p384_montjscalarmul_alt_p384_montjdouble + + add x0, acc + add x1, acc + bl p384_montjscalarmul_alt_p384_montjdouble + +// Choose the bitfield and adjust it to sign and magnitude + + ldp x0, x1, [scalarb] + ldp x2, x3, [scalarb+16] + ldp x4, x5, [scalarb+32] + lsr bf, x5, #59 + extr x5, x5, x4, #59 + extr x4, x4, x3, #59 + extr x3, x3, x2, #59 + extr x2, x2, x1, #59 + extr x1, x1, x0, #59 + lsl x0, x0, #5 + stp x0, x1, [scalarb] + stp x2, x3, [scalarb+16] + stp x4, x5, [scalarb+32] + + subs bf, bf, #16 + cset sgn, lo // sgn = sign of digit (1 = negative) + cneg bf, bf, lo // bf = absolute value of digit + +// Conditionally select the table entry tab[i-1] = i * P in constant time + + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + mov x16, xzr + mov x17, xzr + + add x19, tab + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + selectblock(9) + selectblock(10) + selectblock(11) + selectblock(12) + selectblock(13) + selectblock(14) + selectblock(15) + selectblock(16) + +// Store it to "tabent" with the y coordinate optionally negated. +// This is done carefully to give coordinates < p_384 even in +// the degenerate case y = 0 (when z = 0 for points on the curve). + + stp x0, x1, [tabent] + stp x2, x3, [tabent+16] + stp x4, x5, [tabent+32] + + stp x12, x13, [tabent+96] + stp x14, x15, [tabent+112] + stp x16, x17, [tabent+128] + + mov x0, #0x00000000ffffffff + subs x0, x0, x6 + orr x12, x6, x7 + mov x1, #0xffffffff00000000 + sbcs x1, x1, x7 + orr x13, x8, x9 + mov x2, #0xfffffffffffffffe + sbcs x2, x2, x8 + orr x14, x10, x11 + mov x5, #0xffffffffffffffff + sbcs x3, x5, x9 + orr x12, x12, x13 + sbcs x4, x5, x10 + orr x12, x12, x14 + sbcs x5, x5, x11 + + cmp sgn, xzr + ccmp x12, xzr, #4, ne + + csel x6, x0, x6, ne + csel x7, x1, x7, ne + csel x8, x2, x8, ne + csel x9, x3, x9, ne + csel x10, x4, x10, ne + csel x11, x5, x11, ne + + stp x6, x7, [tabent+48] + stp x8, x9, [tabent+64] + stp x10, x11, [tabent+80] + +// Add to the accumulator + + add x0, acc + add x1, acc + add x2, tabent + bl p384_montjscalarmul_alt_p384_montjadd + + cbnz j, p384_montjscalarmul_alt_mainloop + +// That's the end of the main loop, and we just need to copy the +// result in "acc" to the output. + + ldp x0, x1, [acc] + stp x0, x1, [res] + ldp x0, x1, [acc+16] + stp x0, x1, [res, #16] + ldp x0, x1, [acc+32] + stp x0, x1, [res, #32] + ldp x0, x1, [acc+48] + stp x0, x1, [res, #48] + ldp x0, x1, [acc+64] + stp x0, x1, [res, #64] + ldp x0, x1, [acc+80] + stp x0, x1, [res, #80] + ldp x0, x1, [acc+96] + stp x0, x1, [res, #96] + ldp x0, x1, [acc+112] + stp x0, x1, [res, #112] + ldp x0, x1, [acc+128] + stp x0, x1, [res, #128] + +// Restore stack and registers and return + + add sp, sp, NSPACE + ldp x25, x30, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + ret + +// Local copies of subroutines, complete clones at the moment + +p384_montjscalarmul_alt_p384_montjadd: + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + sub sp, sp, #336 + mov x24, x0 + mov x25, x1 + mov x26, x2 + ldp x2, x3, [x25, #96] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [x25, #112] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [x25, #128] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + mov x14, #-4294967295 + mov x15, #4294967295 + csel x14, x14, xzr, hs + csel x15, x15, xzr, hs + cset x16, hs + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + stp x2, x9, [sp] + stp x10, x11, [sp, #16] + stp x12, x13, [sp, #32] + ldp x2, x3, [x26, #96] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [x26, #112] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [x26, #128] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + mov x14, #-4294967295 + mov x15, #4294967295 + csel x14, x14, xzr, hs + csel x15, x15, xzr, hs + cset x16, hs + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + stp x2, x9, [sp, #240] + stp x10, x11, [sp, #256] + stp x12, x13, [sp, #272] + ldp x3, x4, [x26, #96] + ldp x5, x6, [x25, #48] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [x25, #64] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [x25, #80] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [x26, #112] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [x26, #128] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #288] + stp x14, x15, [sp, #304] + stp x16, x17, [sp, #320] + ldp x3, x4, [x25, #96] + ldp x5, x6, [x26, #48] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [x26, #64] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [x26, #80] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [x25, #112] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [x25, #128] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #48] + stp x14, x15, [sp, #64] + stp x16, x17, [sp, #80] + ldp x3, x4, [sp] + ldp x5, x6, [x26] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [x26, #16] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [x26, #32] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #16] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #32] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #96] + stp x14, x15, [sp, #112] + stp x16, x17, [sp, #128] + ldp x3, x4, [sp, #240] + ldp x5, x6, [x25] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [x25, #16] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [x25, #32] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #256] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #272] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #192] + stp x14, x15, [sp, #208] + stp x16, x17, [sp, #224] + ldp x3, x4, [sp] + ldp x5, x6, [sp, #48] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #64] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #80] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #16] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #32] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #48] + stp x14, x15, [sp, #64] + stp x16, x17, [sp, #80] + ldp x3, x4, [sp, #240] + ldp x5, x6, [sp, #288] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #304] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #320] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #256] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #272] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #288] + stp x14, x15, [sp, #304] + stp x16, x17, [sp, #320] + ldp x5, x6, [sp, #96] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #112] + ldp x4, x3, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #128] + ldp x4, x3, [sp, #224] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldp x5, x6, [sp, #48] + ldp x4, x3, [sp, #288] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #64] + ldp x4, x3, [sp, #304] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #80] + ldp x4, x3, [sp, #320] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #48] + stp x7, x8, [sp, #64] + stp x9, x10, [sp, #80] + ldp x2, x3, [sp, #240] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #256] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [sp, #272] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + mov x14, #-4294967295 + mov x15, #4294967295 + csel x14, x14, xzr, hs + csel x15, x15, xzr, hs + cset x16, hs + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, xzr + adcs x12, x12, xzr + adc x13, x13, xzr + stp x2, x9, [sp, #144] + stp x10, x11, [sp, #160] + stp x12, x13, [sp, #176] + ldp x2, x3, [sp, #48] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #64] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [sp, #80] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + adc x6, xzr, xzr + mov x8, #-4294967295 + adds x14, x2, x8 + mov x8, #4294967295 + adcs x15, x9, x8 + mov x8, #1 + adcs x16, x10, x8 + adcs x17, x11, xzr + adcs x19, x12, xzr + adcs x20, x13, xzr + adcs x6, x6, xzr + csel x2, x2, x14, eq + csel x9, x9, x15, eq + csel x10, x10, x16, eq + csel x11, x11, x17, eq + csel x12, x12, x19, eq + csel x13, x13, x20, eq + stp x2, x9, [sp] + stp x10, x11, [sp, #16] + stp x12, x13, [sp, #32] + ldp x3, x4, [sp, #144] + ldp x5, x6, [sp, #192] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #208] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #224] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #160] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #176] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #192] + stp x14, x15, [sp, #208] + stp x16, x17, [sp, #224] + ldp x3, x4, [sp, #144] + ldp x5, x6, [sp, #96] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #112] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #128] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #160] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #176] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #96] + stp x14, x15, [sp, #112] + stp x16, x17, [sp, #128] + ldp x5, x6, [sp] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #16] + ldp x4, x3, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #32] + ldp x4, x3, [sp, #224] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp] + stp x7, x8, [sp, #16] + stp x9, x10, [sp, #32] + ldp x5, x6, [sp, #96] + ldp x4, x3, [sp, #192] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #112] + ldp x4, x3, [sp, #208] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #128] + ldp x4, x3, [sp, #224] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #144] + stp x7, x8, [sp, #160] + stp x9, x10, [sp, #176] + ldp x3, x4, [sp, #240] + ldp x5, x6, [x25, #96] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [x25, #112] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [x25, #128] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #256] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #272] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #240] + stp x14, x15, [sp, #256] + stp x16, x17, [sp, #272] + ldp x5, x6, [sp] + ldp x4, x3, [sp, #96] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #16] + ldp x4, x3, [sp, #112] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #32] + ldp x4, x3, [sp, #128] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp] + stp x7, x8, [sp, #16] + stp x9, x10, [sp, #32] + ldp x5, x6, [sp, #192] + ldp x4, x3, [sp] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #208] + ldp x4, x3, [sp, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #224] + ldp x4, x3, [sp, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #192] + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldp x3, x4, [sp, #144] + ldp x5, x6, [sp, #288] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #304] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #320] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #160] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #176] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #144] + stp x14, x15, [sp, #160] + stp x16, x17, [sp, #176] + ldp x3, x4, [sp, #240] + ldp x5, x6, [x26, #96] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [x26, #112] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [x26, #128] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #256] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #272] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #240] + stp x14, x15, [sp, #256] + stp x16, x17, [sp, #272] + ldp x3, x4, [sp, #48] + ldp x5, x6, [sp, #192] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #208] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #224] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #64] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #80] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #192] + stp x14, x15, [sp, #208] + stp x16, x17, [sp, #224] + ldp x5, x6, [sp, #192] + ldp x4, x3, [sp, #144] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #208] + ldp x4, x3, [sp, #160] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #224] + ldp x4, x3, [sp, #176] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #192] + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldp x0, x1, [x25, #96] + ldp x2, x3, [x25, #112] + ldp x4, x5, [x25, #128] + orr x20, x0, x1 + orr x21, x2, x3 + orr x22, x4, x5 + orr x20, x20, x21 + orr x20, x20, x22 + cmp x20, xzr + cset x20, ne + ldp x6, x7, [x26, #96] + ldp x8, x9, [x26, #112] + ldp x10, x11, [x26, #128] + orr x21, x6, x7 + orr x22, x8, x9 + orr x23, x10, x11 + orr x21, x21, x22 + orr x21, x21, x23 + cmp x21, xzr + cset x21, ne + cmp x21, x20 + ldp x12, x13, [sp, #240] + csel x12, x0, x12, lo + csel x13, x1, x13, lo + csel x12, x6, x12, hi + csel x13, x7, x13, hi + ldp x14, x15, [sp, #256] + csel x14, x2, x14, lo + csel x15, x3, x15, lo + csel x14, x8, x14, hi + csel x15, x9, x15, hi + ldp x16, x17, [sp, #272] + csel x16, x4, x16, lo + csel x17, x5, x17, lo + csel x16, x10, x16, hi + csel x17, x11, x17, hi + ldp x20, x21, [x25] + ldp x0, x1, [sp] + csel x0, x20, x0, lo + csel x1, x21, x1, lo + ldp x20, x21, [x26] + csel x0, x20, x0, hi + csel x1, x21, x1, hi + ldp x20, x21, [x25, #16] + ldp x2, x3, [sp, #16] + csel x2, x20, x2, lo + csel x3, x21, x3, lo + ldp x20, x21, [x26, #16] + csel x2, x20, x2, hi + csel x3, x21, x3, hi + ldp x20, x21, [x25, #32] + ldp x4, x5, [sp, #32] + csel x4, x20, x4, lo + csel x5, x21, x5, lo + ldp x20, x21, [x26, #32] + csel x4, x20, x4, hi + csel x5, x21, x5, hi + ldp x20, x21, [x25, #48] + ldp x6, x7, [sp, #192] + csel x6, x20, x6, lo + csel x7, x21, x7, lo + ldp x20, x21, [x26, #48] + csel x6, x20, x6, hi + csel x7, x21, x7, hi + ldp x20, x21, [x25, #64] + ldp x8, x9, [sp, #208] + csel x8, x20, x8, lo + csel x9, x21, x9, lo + ldp x20, x21, [x26, #64] + csel x8, x20, x8, hi + csel x9, x21, x9, hi + ldp x20, x21, [x25, #80] + ldp x10, x11, [sp, #224] + csel x10, x20, x10, lo + csel x11, x21, x11, lo + ldp x20, x21, [x26, #80] + csel x10, x20, x10, hi + csel x11, x21, x11, hi + stp x0, x1, [x24] + stp x2, x3, [x24, #16] + stp x4, x5, [x24, #32] + stp x6, x7, [x24, #48] + stp x8, x9, [x24, #64] + stp x10, x11, [x24, #80] + stp x12, x13, [x24, #96] + stp x14, x15, [x24, #112] + stp x16, x17, [x24, #128] + add sp, sp, #336 + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ret + +p384_montjscalarmul_alt_p384_montjdouble: + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + sub sp, sp, #336 + mov x23, x0 + mov x24, x1 + ldp x2, x3, [x24, #96] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [x24, #112] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [x24, #128] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + adc x6, xzr, xzr + mov x8, #-4294967295 + adds x14, x2, x8 + mov x8, #4294967295 + adcs x15, x9, x8 + mov x8, #1 + adcs x16, x10, x8 + adcs x17, x11, xzr + adcs x19, x12, xzr + adcs x20, x13, xzr + adcs x6, x6, xzr + csel x2, x2, x14, eq + csel x9, x9, x15, eq + csel x10, x10, x16, eq + csel x11, x11, x17, eq + csel x12, x12, x19, eq + csel x13, x13, x20, eq + stp x2, x9, [sp] + stp x10, x11, [sp, #16] + stp x12, x13, [sp, #32] + ldp x2, x3, [x24, #48] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [x24, #64] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [x24, #80] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + adc x6, xzr, xzr + mov x8, #-4294967295 + adds x14, x2, x8 + mov x8, #4294967295 + adcs x15, x9, x8 + mov x8, #1 + adcs x16, x10, x8 + adcs x17, x11, xzr + adcs x19, x12, xzr + adcs x20, x13, xzr + adcs x6, x6, xzr + csel x2, x2, x14, eq + csel x9, x9, x15, eq + csel x10, x10, x16, eq + csel x11, x11, x17, eq + csel x12, x12, x19, eq + csel x13, x13, x20, eq + stp x2, x9, [sp, #48] + stp x10, x11, [sp, #64] + stp x12, x13, [sp, #80] + ldp x5, x6, [x24] + ldp x4, x3, [sp] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x24, #16] + ldp x4, x3, [sp, #16] + adcs x7, x7, x4 + adcs x8, x8, x3 + ldp x9, x10, [x24, #32] + ldp x4, x3, [sp, #32] + adcs x9, x9, x4 + adcs x10, x10, x3 + csetm x3, hs + mov x4, #4294967295 + and x4, x4, x3 + subs x5, x5, x4 + eor x4, x4, x3 + sbcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + sbcs x7, x7, x4 + sbcs x8, x8, x3 + sbcs x9, x9, x3 + sbc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldp x5, x6, [x24] + ldp x4, x3, [sp] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x24, #16] + ldp x4, x3, [sp, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x24, #32] + ldp x4, x3, [sp, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #192] + stp x7, x8, [sp, #208] + stp x9, x10, [sp, #224] + ldp x3, x4, [sp, #240] + ldp x5, x6, [sp, #192] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #208] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #224] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #256] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #272] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #96] + stp x14, x15, [sp, #112] + stp x16, x17, [sp, #128] + ldp x5, x6, [x24, #48] + ldp x4, x3, [x24, #96] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x24, #64] + ldp x4, x3, [x24, #112] + adcs x7, x7, x4 + adcs x8, x8, x3 + ldp x9, x10, [x24, #80] + ldp x4, x3, [x24, #128] + adcs x9, x9, x4 + adcs x10, x10, x3 + adc x3, xzr, xzr + mov x4, #4294967295 + cmp x5, x4 + mov x4, #-4294967296 + sbcs xzr, x6, x4 + mov x4, #-2 + sbcs xzr, x7, x4 + adcs xzr, x8, xzr + adcs xzr, x9, xzr + adcs xzr, x10, xzr + adcs x3, x3, xzr + csetm x3, ne + mov x4, #4294967295 + and x4, x4, x3 + subs x5, x5, x4 + eor x4, x4, x3 + sbcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + sbcs x7, x7, x4 + sbcs x8, x8, x3 + sbcs x9, x9, x3 + sbc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldp x2, x3, [sp, #96] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #112] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [sp, #128] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + adc x6, xzr, xzr + mov x8, #-4294967295 + adds x14, x2, x8 + mov x8, #4294967295 + adcs x15, x9, x8 + mov x8, #1 + adcs x16, x10, x8 + adcs x17, x11, xzr + adcs x19, x12, xzr + adcs x20, x13, xzr + adcs x6, x6, xzr + csel x2, x2, x14, eq + csel x9, x9, x15, eq + csel x10, x10, x16, eq + csel x11, x11, x17, eq + csel x12, x12, x19, eq + csel x13, x13, x20, eq + stp x2, x9, [sp, #288] + stp x10, x11, [sp, #304] + stp x12, x13, [sp, #320] + ldp x3, x4, [x24] + ldp x5, x6, [sp, #48] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #64] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #80] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [x24, #16] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [x24, #32] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #144] + stp x14, x15, [sp, #160] + stp x16, x17, [sp, #176] + ldp x2, x3, [sp, #240] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #256] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [sp, #272] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + adc x6, xzr, xzr + mov x8, #-4294967295 + adds x14, x2, x8 + mov x8, #4294967295 + adcs x15, x9, x8 + mov x8, #1 + adcs x16, x10, x8 + adcs x17, x11, xzr + adcs x19, x12, xzr + adcs x20, x13, xzr + adcs x6, x6, xzr + csel x2, x2, x14, eq + csel x9, x9, x15, eq + csel x10, x10, x16, eq + csel x11, x11, x17, eq + csel x12, x12, x19, eq + csel x13, x13, x20, eq + stp x2, x9, [sp, #192] + stp x10, x11, [sp, #208] + stp x12, x13, [sp, #224] + ldp x0, x1, [sp, #288] + mov x6, #4294967295 + subs x6, x6, x0 + mov x7, #-4294967296 + sbcs x7, x7, x1 + ldp x0, x1, [sp, #304] + mov x8, #-2 + sbcs x8, x8, x0 + mov x13, #-1 + sbcs x9, x13, x1 + ldp x0, x1, [sp, #320] + sbcs x10, x13, x0 + sbc x11, x13, x1 + mov x12, #9 + mul x0, x12, x6 + mul x1, x12, x7 + mul x2, x12, x8 + mul x3, x12, x9 + mul x4, x12, x10 + mul x5, x12, x11 + umulh x6, x12, x6 + umulh x7, x12, x7 + umulh x8, x12, x8 + umulh x9, x12, x9 + umulh x10, x12, x10 + umulh x12, x12, x11 + adds x1, x1, x6 + adcs x2, x2, x7 + adcs x3, x3, x8 + adcs x4, x4, x9 + adcs x5, x5, x10 + mov x6, #1 + adc x6, x12, x6 + ldp x8, x9, [sp, #144] + ldp x10, x11, [sp, #160] + ldp x12, x13, [sp, #176] + mov x14, #12 + mul x15, x14, x8 + umulh x8, x14, x8 + adds x0, x0, x15 + mul x15, x14, x9 + umulh x9, x14, x9 + adcs x1, x1, x15 + mul x15, x14, x10 + umulh x10, x14, x10 + adcs x2, x2, x15 + mul x15, x14, x11 + umulh x11, x14, x11 + adcs x3, x3, x15 + mul x15, x14, x12 + umulh x12, x14, x12 + adcs x4, x4, x15 + mul x15, x14, x13 + umulh x13, x14, x13 + adcs x5, x5, x15 + adc x6, x6, xzr + adds x1, x1, x8 + adcs x2, x2, x9 + adcs x3, x3, x10 + adcs x4, x4, x11 + adcs x5, x5, x12 + adcs x6, x6, x13 + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x6, lo + mov x7, #4294967295 + and x7, x7, x6 + adds x0, x0, x7 + eor x7, x7, x6 + adcs x1, x1, x7 + mov x7, #-2 + and x7, x7, x6 + adcs x2, x2, x7 + adcs x3, x3, x6 + adcs x4, x4, x6 + adc x5, x5, x6 + stp x0, x1, [sp, #288] + stp x2, x3, [sp, #304] + stp x4, x5, [sp, #320] + ldp x5, x6, [sp, #192] + ldp x4, x3, [sp] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #208] + ldp x4, x3, [sp, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #224] + ldp x4, x3, [sp, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [sp, #240] + stp x7, x8, [sp, #256] + stp x9, x10, [sp, #272] + ldp x2, x3, [sp, #48] + mul x9, x2, x3 + umulh x10, x2, x3 + ldp x4, x5, [sp, #64] + mul x8, x2, x4 + adds x10, x10, x8 + mul x11, x2, x5 + mul x8, x3, x4 + adcs x11, x11, x8 + umulh x12, x2, x5 + mul x8, x3, x5 + adcs x12, x12, x8 + ldp x6, x7, [sp, #80] + mul x13, x2, x7 + mul x8, x3, x6 + adcs x13, x13, x8 + umulh x14, x2, x7 + mul x8, x3, x7 + adcs x14, x14, x8 + mul x15, x5, x6 + adcs x15, x15, xzr + umulh x16, x5, x6 + adc x16, x16, xzr + umulh x8, x2, x4 + adds x11, x11, x8 + umulh x8, x3, x4 + adcs x12, x12, x8 + umulh x8, x3, x5 + adcs x13, x13, x8 + umulh x8, x3, x6 + adcs x14, x14, x8 + umulh x8, x3, x7 + adcs x15, x15, x8 + adc x16, x16, xzr + mul x8, x2, x6 + adds x12, x12, x8 + mul x8, x4, x5 + adcs x13, x13, x8 + mul x8, x4, x6 + adcs x14, x14, x8 + mul x8, x4, x7 + adcs x15, x15, x8 + mul x8, x5, x7 + adcs x16, x16, x8 + mul x17, x6, x7 + adcs x17, x17, xzr + umulh x19, x6, x7 + adc x19, x19, xzr + umulh x8, x2, x6 + adds x13, x13, x8 + umulh x8, x4, x5 + adcs x14, x14, x8 + umulh x8, x4, x6 + adcs x15, x15, x8 + umulh x8, x4, x7 + adcs x16, x16, x8 + umulh x8, x5, x7 + adcs x17, x17, x8 + adc x19, x19, xzr + adds x9, x9, x9 + adcs x10, x10, x10 + adcs x11, x11, x11 + adcs x12, x12, x12 + adcs x13, x13, x13 + adcs x14, x14, x14 + adcs x15, x15, x15 + adcs x16, x16, x16 + adcs x17, x17, x17 + adcs x19, x19, x19 + cset x20, hs + umulh x8, x2, x2 + mul x2, x2, x2 + adds x9, x9, x8 + mul x8, x3, x3 + adcs x10, x10, x8 + umulh x8, x3, x3 + adcs x11, x11, x8 + mul x8, x4, x4 + adcs x12, x12, x8 + umulh x8, x4, x4 + adcs x13, x13, x8 + mul x8, x5, x5 + adcs x14, x14, x8 + umulh x8, x5, x5 + adcs x15, x15, x8 + mul x8, x6, x6 + adcs x16, x16, x8 + umulh x8, x6, x6 + adcs x17, x17, x8 + mul x8, x7, x7 + adcs x19, x19, x8 + umulh x8, x7, x7 + adc x20, x20, x8 + lsl x5, x2, #32 + add x2, x5, x2 + mov x5, #-4294967295 + umulh x5, x5, x2 + mov x4, #4294967295 + mul x3, x4, x2 + umulh x4, x4, x2 + adds x5, x5, x3 + adcs x4, x4, x2 + adc x3, xzr, xzr + subs x9, x9, x5 + sbcs x10, x10, x4 + sbcs x11, x11, x3 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x2, x2, xzr + lsl x5, x9, #32 + add x9, x5, x9 + mov x5, #-4294967295 + umulh x5, x5, x9 + mov x4, #4294967295 + mul x3, x4, x9 + umulh x4, x4, x9 + adds x5, x5, x3 + adcs x4, x4, x9 + adc x3, xzr, xzr + subs x10, x10, x5 + sbcs x11, x11, x4 + sbcs x12, x12, x3 + sbcs x13, x13, xzr + sbcs x2, x2, xzr + sbc x9, x9, xzr + lsl x5, x10, #32 + add x10, x5, x10 + mov x5, #-4294967295 + umulh x5, x5, x10 + mov x4, #4294967295 + mul x3, x4, x10 + umulh x4, x4, x10 + adds x5, x5, x3 + adcs x4, x4, x10 + adc x3, xzr, xzr + subs x11, x11, x5 + sbcs x12, x12, x4 + sbcs x13, x13, x3 + sbcs x2, x2, xzr + sbcs x9, x9, xzr + sbc x10, x10, xzr + lsl x5, x11, #32 + add x11, x5, x11 + mov x5, #-4294967295 + umulh x5, x5, x11 + mov x4, #4294967295 + mul x3, x4, x11 + umulh x4, x4, x11 + adds x5, x5, x3 + adcs x4, x4, x11 + adc x3, xzr, xzr + subs x12, x12, x5 + sbcs x13, x13, x4 + sbcs x2, x2, x3 + sbcs x9, x9, xzr + sbcs x10, x10, xzr + sbc x11, x11, xzr + lsl x5, x12, #32 + add x12, x5, x12 + mov x5, #-4294967295 + umulh x5, x5, x12 + mov x4, #4294967295 + mul x3, x4, x12 + umulh x4, x4, x12 + adds x5, x5, x3 + adcs x4, x4, x12 + adc x3, xzr, xzr + subs x13, x13, x5 + sbcs x2, x2, x4 + sbcs x9, x9, x3 + sbcs x10, x10, xzr + sbcs x11, x11, xzr + sbc x12, x12, xzr + lsl x5, x13, #32 + add x13, x5, x13 + mov x5, #-4294967295 + umulh x5, x5, x13 + mov x4, #4294967295 + mul x3, x4, x13 + umulh x4, x4, x13 + adds x5, x5, x3 + adcs x4, x4, x13 + adc x3, xzr, xzr + subs x2, x2, x5 + sbcs x9, x9, x4 + sbcs x10, x10, x3 + sbcs x11, x11, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + adds x2, x2, x14 + adcs x9, x9, x15 + adcs x10, x10, x16 + adcs x11, x11, x17 + adcs x12, x12, x19 + adcs x13, x13, x20 + adc x6, xzr, xzr + mov x8, #-4294967295 + adds x14, x2, x8 + mov x8, #4294967295 + adcs x15, x9, x8 + mov x8, #1 + adcs x16, x10, x8 + adcs x17, x11, xzr + adcs x19, x12, xzr + adcs x20, x13, xzr + adcs x6, x6, xzr + csel x2, x2, x14, eq + csel x9, x9, x15, eq + csel x10, x10, x16, eq + csel x11, x11, x17, eq + csel x12, x12, x19, eq + csel x13, x13, x20, eq + stp x2, x9, [sp, #192] + stp x10, x11, [sp, #208] + stp x12, x13, [sp, #224] + ldp x5, x6, [sp, #240] + ldp x4, x3, [sp, #48] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [sp, #256] + ldp x4, x3, [sp, #64] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [sp, #272] + ldp x4, x3, [sp, #80] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, lo + mov x4, #4294967295 + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #-2 + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [x23, #96] + stp x7, x8, [x23, #112] + stp x9, x10, [x23, #128] + ldp x3, x4, [sp, #288] + ldp x5, x6, [sp, #96] + mul x12, x3, x5 + umulh x13, x3, x5 + mul x11, x3, x6 + umulh x14, x3, x6 + adds x13, x13, x11 + ldp x7, x8, [sp, #112] + mul x11, x3, x7 + umulh x15, x3, x7 + adcs x14, x14, x11 + mul x11, x3, x8 + umulh x16, x3, x8 + adcs x15, x15, x11 + ldp x9, x10, [sp, #128] + mul x11, x3, x9 + umulh x17, x3, x9 + adcs x16, x16, x11 + mul x11, x3, x10 + umulh x19, x3, x10 + adcs x17, x17, x11 + adc x19, x19, xzr + mul x11, x4, x5 + adds x13, x13, x11 + mul x11, x4, x6 + adcs x14, x14, x11 + mul x11, x4, x7 + adcs x15, x15, x11 + mul x11, x4, x8 + adcs x16, x16, x11 + mul x11, x4, x9 + adcs x17, x17, x11 + mul x11, x4, x10 + adcs x19, x19, x11 + cset x20, hs + umulh x11, x4, x5 + adds x14, x14, x11 + umulh x11, x4, x6 + adcs x15, x15, x11 + umulh x11, x4, x7 + adcs x16, x16, x11 + umulh x11, x4, x8 + adcs x17, x17, x11 + umulh x11, x4, x9 + adcs x19, x19, x11 + umulh x11, x4, x10 + adc x20, x20, x11 + ldp x3, x4, [sp, #304] + mul x11, x3, x5 + adds x14, x14, x11 + mul x11, x3, x6 + adcs x15, x15, x11 + mul x11, x3, x7 + adcs x16, x16, x11 + mul x11, x3, x8 + adcs x17, x17, x11 + mul x11, x3, x9 + adcs x19, x19, x11 + mul x11, x3, x10 + adcs x20, x20, x11 + cset x21, hs + umulh x11, x3, x5 + adds x15, x15, x11 + umulh x11, x3, x6 + adcs x16, x16, x11 + umulh x11, x3, x7 + adcs x17, x17, x11 + umulh x11, x3, x8 + adcs x19, x19, x11 + umulh x11, x3, x9 + adcs x20, x20, x11 + umulh x11, x3, x10 + adc x21, x21, x11 + mul x11, x4, x5 + adds x15, x15, x11 + mul x11, x4, x6 + adcs x16, x16, x11 + mul x11, x4, x7 + adcs x17, x17, x11 + mul x11, x4, x8 + adcs x19, x19, x11 + mul x11, x4, x9 + adcs x20, x20, x11 + mul x11, x4, x10 + adcs x21, x21, x11 + cset x22, hs + umulh x11, x4, x5 + adds x16, x16, x11 + umulh x11, x4, x6 + adcs x17, x17, x11 + umulh x11, x4, x7 + adcs x19, x19, x11 + umulh x11, x4, x8 + adcs x20, x20, x11 + umulh x11, x4, x9 + adcs x21, x21, x11 + umulh x11, x4, x10 + adc x22, x22, x11 + ldp x3, x4, [sp, #320] + mul x11, x3, x5 + adds x16, x16, x11 + mul x11, x3, x6 + adcs x17, x17, x11 + mul x11, x3, x7 + adcs x19, x19, x11 + mul x11, x3, x8 + adcs x20, x20, x11 + mul x11, x3, x9 + adcs x21, x21, x11 + mul x11, x3, x10 + adcs x22, x22, x11 + cset x2, hs + umulh x11, x3, x5 + adds x17, x17, x11 + umulh x11, x3, x6 + adcs x19, x19, x11 + umulh x11, x3, x7 + adcs x20, x20, x11 + umulh x11, x3, x8 + adcs x21, x21, x11 + umulh x11, x3, x9 + adcs x22, x22, x11 + umulh x11, x3, x10 + adc x2, x2, x11 + mul x11, x4, x5 + adds x17, x17, x11 + mul x11, x4, x6 + adcs x19, x19, x11 + mul x11, x4, x7 + adcs x20, x20, x11 + mul x11, x4, x8 + adcs x21, x21, x11 + mul x11, x4, x9 + adcs x22, x22, x11 + mul x11, x4, x10 + adcs x2, x2, x11 + cset x1, hs + umulh x11, x4, x5 + adds x19, x19, x11 + umulh x11, x4, x6 + adcs x20, x20, x11 + umulh x11, x4, x7 + adcs x21, x21, x11 + umulh x11, x4, x8 + adcs x22, x22, x11 + umulh x11, x4, x9 + adcs x2, x2, x11 + umulh x11, x4, x10 + adc x1, x1, x11 + lsl x7, x12, #32 + add x12, x7, x12 + mov x7, #-4294967295 + umulh x7, x7, x12 + mov x6, #4294967295 + mul x5, x6, x12 + umulh x6, x6, x12 + adds x7, x7, x5 + adcs x6, x6, x12 + adc x5, xzr, xzr + subs x13, x13, x7 + sbcs x14, x14, x6 + sbcs x15, x15, x5 + sbcs x16, x16, xzr + sbcs x17, x17, xzr + sbc x12, x12, xzr + lsl x7, x13, #32 + add x13, x7, x13 + mov x7, #-4294967295 + umulh x7, x7, x13 + mov x6, #4294967295 + mul x5, x6, x13 + umulh x6, x6, x13 + adds x7, x7, x5 + adcs x6, x6, x13 + adc x5, xzr, xzr + subs x14, x14, x7 + sbcs x15, x15, x6 + sbcs x16, x16, x5 + sbcs x17, x17, xzr + sbcs x12, x12, xzr + sbc x13, x13, xzr + lsl x7, x14, #32 + add x14, x7, x14 + mov x7, #-4294967295 + umulh x7, x7, x14 + mov x6, #4294967295 + mul x5, x6, x14 + umulh x6, x6, x14 + adds x7, x7, x5 + adcs x6, x6, x14 + adc x5, xzr, xzr + subs x15, x15, x7 + sbcs x16, x16, x6 + sbcs x17, x17, x5 + sbcs x12, x12, xzr + sbcs x13, x13, xzr + sbc x14, x14, xzr + lsl x7, x15, #32 + add x15, x7, x15 + mov x7, #-4294967295 + umulh x7, x7, x15 + mov x6, #4294967295 + mul x5, x6, x15 + umulh x6, x6, x15 + adds x7, x7, x5 + adcs x6, x6, x15 + adc x5, xzr, xzr + subs x16, x16, x7 + sbcs x17, x17, x6 + sbcs x12, x12, x5 + sbcs x13, x13, xzr + sbcs x14, x14, xzr + sbc x15, x15, xzr + lsl x7, x16, #32 + add x16, x7, x16 + mov x7, #-4294967295 + umulh x7, x7, x16 + mov x6, #4294967295 + mul x5, x6, x16 + umulh x6, x6, x16 + adds x7, x7, x5 + adcs x6, x6, x16 + adc x5, xzr, xzr + subs x17, x17, x7 + sbcs x12, x12, x6 + sbcs x13, x13, x5 + sbcs x14, x14, xzr + sbcs x15, x15, xzr + sbc x16, x16, xzr + lsl x7, x17, #32 + add x17, x7, x17 + mov x7, #-4294967295 + umulh x7, x7, x17 + mov x6, #4294967295 + mul x5, x6, x17 + umulh x6, x6, x17 + adds x7, x7, x5 + adcs x6, x6, x17 + adc x5, xzr, xzr + subs x12, x12, x7 + sbcs x13, x13, x6 + sbcs x14, x14, x5 + sbcs x15, x15, xzr + sbcs x16, x16, xzr + sbc x17, x17, xzr + adds x12, x12, x19 + adcs x13, x13, x20 + adcs x14, x14, x21 + adcs x15, x15, x22 + adcs x16, x16, x2 + adcs x17, x17, x1 + adc x10, xzr, xzr + mov x11, #-4294967295 + adds x19, x12, x11 + mov x11, #4294967295 + adcs x20, x13, x11 + mov x11, #1 + adcs x21, x14, x11 + adcs x22, x15, xzr + adcs x2, x16, xzr + adcs x1, x17, xzr + adcs x10, x10, xzr + csel x12, x12, x19, eq + csel x13, x13, x20, eq + csel x14, x14, x21, eq + csel x15, x15, x22, eq + csel x16, x16, x2, eq + csel x17, x17, x1, eq + stp x12, x13, [sp, #240] + stp x14, x15, [sp, #256] + stp x16, x17, [sp, #272] + ldp x1, x2, [sp, #144] + ldp x3, x4, [sp, #160] + ldp x5, x6, [sp, #176] + lsl x0, x1, #2 + ldp x7, x8, [sp, #288] + subs x0, x0, x7 + extr x1, x2, x1, #62 + sbcs x1, x1, x8 + ldp x7, x8, [sp, #304] + extr x2, x3, x2, #62 + sbcs x2, x2, x7 + extr x3, x4, x3, #62 + sbcs x3, x3, x8 + extr x4, x5, x4, #62 + ldp x7, x8, [sp, #320] + sbcs x4, x4, x7 + extr x5, x6, x5, #62 + sbcs x5, x5, x8 + lsr x6, x6, #62 + adc x6, x6, xzr + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x8, lo + mov x9, #4294967295 + and x9, x9, x8 + adds x0, x0, x9 + eor x9, x9, x8 + adcs x1, x1, x9 + mov x9, #-2 + and x9, x9, x8 + adcs x2, x2, x9 + adcs x3, x3, x8 + adcs x4, x4, x8 + adc x5, x5, x8 + stp x0, x1, [x23] + stp x2, x3, [x23, #16] + stp x4, x5, [x23, #32] + ldp x0, x1, [sp, #192] + mov x6, #4294967295 + subs x6, x6, x0 + mov x7, #-4294967296 + sbcs x7, x7, x1 + ldp x0, x1, [sp, #208] + mov x8, #-2 + sbcs x8, x8, x0 + mov x13, #-1 + sbcs x9, x13, x1 + ldp x0, x1, [sp, #224] + sbcs x10, x13, x0 + sbc x11, x13, x1 + lsl x0, x6, #3 + extr x1, x7, x6, #61 + extr x2, x8, x7, #61 + extr x3, x9, x8, #61 + extr x4, x10, x9, #61 + extr x5, x11, x10, #61 + lsr x6, x11, #61 + add x6, x6, #1 + ldp x8, x9, [sp, #240] + ldp x10, x11, [sp, #256] + ldp x12, x13, [sp, #272] + mov x14, #3 + mul x15, x14, x8 + umulh x8, x14, x8 + adds x0, x0, x15 + mul x15, x14, x9 + umulh x9, x14, x9 + adcs x1, x1, x15 + mul x15, x14, x10 + umulh x10, x14, x10 + adcs x2, x2, x15 + mul x15, x14, x11 + umulh x11, x14, x11 + adcs x3, x3, x15 + mul x15, x14, x12 + umulh x12, x14, x12 + adcs x4, x4, x15 + mul x15, x14, x13 + umulh x13, x14, x13 + adcs x5, x5, x15 + adc x6, x6, xzr + adds x1, x1, x8 + adcs x2, x2, x9 + adcs x3, x3, x10 + adcs x4, x4, x11 + adcs x5, x5, x12 + adcs x6, x6, x13 + lsl x7, x6, #32 + subs x8, x6, x7 + sbc x7, x7, xzr + adds x0, x0, x8 + adcs x1, x1, x7 + adcs x2, x2, x6 + adcs x3, x3, xzr + adcs x4, x4, xzr + adcs x5, x5, xzr + csetm x6, lo + mov x7, #4294967295 + and x7, x7, x6 + adds x0, x0, x7 + eor x7, x7, x6 + adcs x1, x1, x7 + mov x7, #-2 + and x7, x7, x6 + adcs x2, x2, x7 + adcs x3, x3, x6 + adcs x4, x4, x6 + adc x5, x5, x6 + stp x0, x1, [x23, #48] + stp x2, x3, [x23, #64] + stp x4, x5, [x23, #80] + add sp, sp, #336 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/arm/p384/unopt/p384_montjadd.S b/third_party/s2n-bignum/arm/p384/unopt/p384_montjadd.S new file mode 100644 index 0000000000..cbd6f3cf00 --- /dev/null +++ b/third_party/s2n-bignum/arm/p384/unopt/p384_montjadd.S @@ -0,0 +1,1036 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Point addition on NIST curve P-384 in Montgomery-Jacobian coordinates +// +// extern void p384_montjadd +// (uint64_t p3[static 18],uint64_t p1[static 18],uint64_t p2[static 18]); +// +// Does p3 := p1 + p2 where all points are regarded as Jacobian triples with +// each coordinate in the Montgomery domain, i.e. x' = (2^384 * x) mod p_384. +// A Jacobian triple (x',y',z') represents affine point (x/z^2,y/z^3). +// +// Standard ARM ABI: X0 = p3, X1 = p1, X2 = p2 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjadd) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjadd) + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 + +// Stable homes for input arguments during main code sequence + +#define input_z x24 +#define input_x x25 +#define input_y x26 + +// Pointer-offset pairs for inputs and outputs + +#define x_1 input_x, #0 +#define y_1 input_x, #NUMSIZE +#define z_1 input_x, #(2*NUMSIZE) + +#define x_2 input_y, #0 +#define y_2 input_y, #NUMSIZE +#define z_2 input_y, #(2*NUMSIZE) + +#define x_3 input_z, #0 +#define y_3 input_z, #NUMSIZE +#define z_3 input_z, #(2*NUMSIZE) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define z1sq sp, #(NUMSIZE*0) +#define ww sp, #(NUMSIZE*0) +#define resx sp, #(NUMSIZE*0) + +#define yd sp, #(NUMSIZE*1) +#define y2a sp, #(NUMSIZE*1) + +#define x2a sp, #(NUMSIZE*2) +#define zzx2 sp, #(NUMSIZE*2) + +#define zz sp, #(NUMSIZE*3) +#define t1 sp, #(NUMSIZE*3) + +#define t2 sp, #(NUMSIZE*4) +#define x1a sp, #(NUMSIZE*4) +#define zzx1 sp, #(NUMSIZE*4) +#define resy sp, #(NUMSIZE*4) + +#define xd sp, #(NUMSIZE*5) +#define z2sq sp, #(NUMSIZE*5) +#define resz sp, #(NUMSIZE*5) + +#define y1a sp, #(NUMSIZE*6) + +#define NSPACE (NUMSIZE*7) + +// Corresponds to bignum_montmul_p384_neon, with callee-save register spills +// rewritten to update sp in advance + +.montmul_p384: + sub sp, sp, 48 + stp x19, x20, [sp, 32] + stp x21, x22, [sp, 16] + stp x23, x24, [sp] + ldr q3, [x1] + ldr q25, [x2] + ldp x13, x23, [x2] + ldp x3, x21, [x1] + rev64 v23.4S, v25.4S + uzp1 v17.4S, v25.4S, v3.4S + umulh x15, x3, x13 + mul v6.4S, v23.4S, v3.4S + uzp1 v3.4S, v3.4S, v3.4S + ldr q27, [x2, #32] + ldp x8, x24, [x1, #16] + subs x6, x3, x21 + ldr q0, [x1, #32] + movi v23.2D, #0x00000000ffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4S, v27.4S + uzp2 v25.4S, v27.4S, v27.4S + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2S, v0.2D + xtn v24.2S, v27.2D + cneg x20, x7, cc + ldp x6, x14, [x2, #16] + mul v27.4S, v4.4S, v0.4S + uaddlp v20.2D, v6.4S + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4S, v0.4S, v0.4S + umull v21.2D, v22.2S, v25.2S + shl v0.2D, v20.2D, #32 + umlal v0.2D, v3.2S, v17.2S + mul x22, x8, x6 + umull v1.2D, v6.2S, v25.2S + subs x12, x3, x8 + umull v20.2D, v22.2S, v24.2S + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2D, v20.2D, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2D, v21.2D, #32 + adds x22, x15, x7 + and v26.16B, v21.16B, v23.16B + adcs x16, x12, x15 + uaddlp v25.2D, v27.4S + adcs x9, x19, x12 + umlal v26.2D, v6.2S, v24.2S + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2D, v25.2D, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2D, v22.2S, v24.2S + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2D, v26.2D, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x2, #32] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x1, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [x0] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [x0, #16] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [x0, #32] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [x0] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [x0, #16] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [x0, #32] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [x0] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [x0, #16] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [x0, #32] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [x0] + ldp x21, x12, [x0, #16] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [x0, #32] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [x0] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [x0, #16] + adc x12, x15, x23 + stp x21, x12, [x0, #32] + ldp x23, x24, [sp] + ldp x21, x22, [sp, 16] + ldp x19, x20, [sp, 32] + add sp, sp, 48 + ret + +// Corresponds exactly to bignum_montsqr_p384 + +.montsqr_p384: + ldr q1, [x1] + ldp x9, x2, [x1] + ldr q0, [x1] + ldp x4, x6, [x1, #16] + rev64 v21.4S, v1.4S + uzp2 v28.4S, v1.4S, v1.4S + umulh x7, x9, x2 + xtn v17.2S, v1.2D + mul v27.4S, v21.4S, v0.4S + ldr q20, [x1, #32] + xtn v30.2S, v0.2D + ldr q1, [x1, #32] + uzp2 v31.4S, v0.4S, v0.4S + ldp x5, x10, [x1, #32] + umulh x8, x9, x4 + uaddlp v3.2D, v27.4S + umull v16.2D, v30.2S, v17.2S + mul x16, x9, x4 + umull v27.2D, v30.2S, v28.2S + shrn v0.2S, v20.2D, #32 + xtn v7.2S, v20.2D + shl v20.2D, v3.2D, #32 + umull v3.2D, v31.2S, v28.2S + mul x3, x2, x4 + umlal v20.2D, v30.2S, v17.2S + umull v22.2D, v7.2S, v0.2S + usra v27.2D, v16.2D, #32 + umulh x11, x2, x4 + movi v21.2D, #0x00000000ffffffff + uzp2 v28.4S, v1.4S, v1.4S + adds x15, x16, x7 + and v5.16B, v27.16B, v21.16B + adcs x3, x3, x8 + usra v3.2D, v27.2D, #32 + dup v29.2D, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2D, v31.2S, v17.2S + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2D, v22.2D, #33 + xtn v25.2S, v29.2D + rev64 v31.4S, v1.4S + lsl x13, x14, #32 + uzp2 v6.4S, v29.4S, v29.4S + umlal v19.2D, v7.2S, v7.2S + usra v3.2D, v5.2D, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4S, v31.4S, v29.4S + xtn v4.2S, v1.2D + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2D, v25.2S, v28.2S + adcs x11, x16, x16 + umull v21.2D, v25.2S, v4.2S + mov x17, v3.d[0] + umull v18.2D, v6.2S, v28.2S + adc x16, x8, xzr + uaddlp v16.2D, v17.4S + movi v1.2D, #0x00000000ffffffff + subs x13, x13, x12 + usra v31.2D, v21.2D, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2D, v16.2D, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16B, v31.16B, v1.16B + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2D, v6.2S, v4.2S + usra v18.2D, v31.2D, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2D, v25.2S, v4.2S + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2D, v3.2D, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [x0, #32] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] + adc x17, x14, xzr + stp x2, x17, [x0, #32] + ret + +// Corresponds exactly to bignum_sub_p384 + +.sub_p384: + ldp x5, x6, [x1] + ldp x4, x3, [x2] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x1, #16] + ldp x4, x3, [x2, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x1, #32] + ldp x4, x3, [x2, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [x0] + stp x7, x8, [x0, #16] + stp x9, x10, [x0, #32] + ret + + +#define montmul_p384(P0,P1,P2) \ + add x0, P0;\ + add x1, P1;\ + add x2, P2;\ + bl .montmul_p384 + +#define montsqr_p384(P0,P1) \ + add x0, P0;\ + add x1, P1;\ + bl .montsqr_p384 + +#define sub_p384(P0,P1,P2) \ + add x0, P0;\ + add x1, P1;\ + add x2, P2;\ + bl .sub_p384 + + +S2N_BN_SYMBOL(p384_montjadd): + +// Save regs and make room on stack for temporary variables + + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x30, xzr, [sp, #-16]! + sub sp, sp, NSPACE + +// Move the input arguments to stable places + + mov input_z, x0 + mov input_x, x1 + mov input_y, x2 + +// Main code, just a sequence of basic field operations +// 8 * multiply + 3 * square + 7 * subtract + + montsqr_p384(z1sq,z_1) + montsqr_p384(z2sq,z_2) + + montmul_p384(y1a,z_2,y_1) + montmul_p384(y2a,z_1,y_2) + + montmul_p384(x2a,z1sq,x_2) + montmul_p384(x1a,z2sq,x_1) + montmul_p384(y2a,z1sq,y2a) + montmul_p384(y1a,z2sq,y1a) + + sub_p384(xd,x2a,x1a) + sub_p384(yd,y2a,y1a) + + montsqr_p384(zz,xd) + montsqr_p384(ww,yd) + + montmul_p384(zzx1,zz,x1a) + montmul_p384(zzx2,zz,x2a) + + sub_p384(resx,ww,zzx1) + sub_p384(t1,zzx2,zzx1) + + montmul_p384(xd,xd,z_1) + + sub_p384(resx,resx,zzx2) + + sub_p384(t2,zzx1,resx) + + montmul_p384(t1,t1,y1a) + montmul_p384(resz,xd,z_2) + montmul_p384(t2,yd,t2) + + sub_p384(resy,t2,t1) + +// Load in the z coordinates of the inputs to check for P1 = 0 and P2 = 0 +// The condition codes get set by a comparison (P2 != 0) - (P1 != 0) +// So "HI" <=> CF /\ ~ZF <=> P1 = 0 /\ ~(P2 = 0) +// and "LO" <=> ~CF <=> ~(P1 = 0) /\ P2 = 0 + + ldp x0, x1, [z_1] + ldp x2, x3, [z_1+16] + ldp x4, x5, [z_1+32] + + orr x20, x0, x1 + orr x21, x2, x3 + orr x22, x4, x5 + orr x20, x20, x21 + orr x20, x20, x22 + cmp x20, xzr + cset x20, ne + + ldp x6, x7, [z_2] + ldp x8, x9, [z_2+16] + ldp x10, x11, [z_2+32] + + orr x21, x6, x7 + orr x22, x8, x9 + orr x23, x10, x11 + orr x21, x21, x22 + orr x21, x21, x23 + cmp x21, xzr + cset x21, ne + + cmp x21, x20 + +// Multiplex the outputs accordingly, re-using the z's in registers + + ldp x12, x13, [resz] + csel x12, x0, x12, lo + csel x13, x1, x13, lo + csel x12, x6, x12, hi + csel x13, x7, x13, hi + ldp x14, x15, [resz+16] + csel x14, x2, x14, lo + csel x15, x3, x15, lo + csel x14, x8, x14, hi + csel x15, x9, x15, hi + ldp x16, x17, [resz+32] + csel x16, x4, x16, lo + csel x17, x5, x17, lo + csel x16, x10, x16, hi + csel x17, x11, x17, hi + + ldp x20, x21, [x_1] + ldp x0, x1, [resx] + csel x0, x20, x0, lo + csel x1, x21, x1, lo + ldp x20, x21, [x_2] + csel x0, x20, x0, hi + csel x1, x21, x1, hi + + ldp x20, x21, [x_1+16] + ldp x2, x3, [resx+16] + csel x2, x20, x2, lo + csel x3, x21, x3, lo + ldp x20, x21, [x_2+16] + csel x2, x20, x2, hi + csel x3, x21, x3, hi + + ldp x20, x21, [x_1+32] + ldp x4, x5, [resx+32] + csel x4, x20, x4, lo + csel x5, x21, x5, lo + ldp x20, x21, [x_2+32] + csel x4, x20, x4, hi + csel x5, x21, x5, hi + + ldp x20, x21, [y_1] + ldp x6, x7, [resy] + csel x6, x20, x6, lo + csel x7, x21, x7, lo + ldp x20, x21, [y_2] + csel x6, x20, x6, hi + csel x7, x21, x7, hi + + ldp x20, x21, [y_1+16] + ldp x8, x9, [resy+16] + csel x8, x20, x8, lo + csel x9, x21, x9, lo + ldp x20, x21, [y_2+16] + csel x8, x20, x8, hi + csel x9, x21, x9, hi + + ldp x20, x21, [y_1+32] + ldp x10, x11, [resy+32] + csel x10, x20, x10, lo + csel x11, x21, x11, lo + ldp x20, x21, [y_2+32] + csel x10, x20, x10, hi + csel x11, x21, x11, hi + +// Finally store back the multiplexed values + + stp x0, x1, [x_3] + stp x2, x3, [x_3+16] + stp x4, x5, [x_3+32] + stp x6, x7, [y_3] + stp x8, x9, [y_3+16] + stp x10, x11, [y_3+32] + stp x12, x13, [z_3] + stp x14, x15, [z_3+16] + stp x16, x17, [z_3+32] + +// Restore stack and registers + + add sp, sp, NSPACE + + ldp x30, xzr, [sp], 16 + ldp x25, x26, [sp], 16 + ldp x23, x24, [sp], 16 + ldp x21, x22, [sp], 16 + ldp x19, x20, [sp], 16 + + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/arm/p384/unopt/p384_montjdouble.S b/third_party/s2n-bignum/arm/p384/unopt/p384_montjdouble.S new file mode 100644 index 0000000000..4cdeeb8699 --- /dev/null +++ b/third_party/s2n-bignum/arm/p384/unopt/p384_montjdouble.S @@ -0,0 +1,1216 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Point doubling on NIST curve P-384 in Montgomery-Jacobian coordinates +// +// extern void p384_montjdouble +// (uint64_t p3[static 18],uint64_t p1[static 18]); +// +// Does p3 := 2 * p1 where all points are regarded as Jacobian triples with +// each coordinate in the Montgomery domain, i.e. x' = (2^384 * x) mod p_384. +// A Jacobian triple (x',y',z') represents affine point (x/z^2,y/z^3). +// +// Standard ARM ABI: X0 = p3, X1 = p1 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjdouble) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjdouble) + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 + +// Stable homes for input arguments during main code sequence + +#define input_z x25 +#define input_x x26 + +// Pointer-offset pairs for inputs and outputs + +#define x_1 input_x, #0 +#define y_1 input_x, #NUMSIZE +#define z_1 input_x, #(2*NUMSIZE) + +#define x_3 input_z, #0 +#define y_3 input_z, #NUMSIZE +#define z_3 input_z, #(2*NUMSIZE) + +// Pointer-offset pairs for temporaries, with some aliasing +// NSPACE is the total stack needed for these temporaries + +#define z2 sp, #(NUMSIZE*0) +#define y2 sp, #(NUMSIZE*1) +#define x2p sp, #(NUMSIZE*2) +#define xy2 sp, #(NUMSIZE*3) + +#define y4 sp, #(NUMSIZE*4) +#define t2 sp, #(NUMSIZE*4) + +#define dx2 sp, #(NUMSIZE*5) +#define t1 sp, #(NUMSIZE*5) + +#define d_ sp, #(NUMSIZE*6) +#define x4p sp, #(NUMSIZE*6) + +#define NSPACE #(NUMSIZE*7) + +// Corresponds exactly to bignum_montmul_p384_neon + +.montmul_p384: + sub sp, sp, 48 + stp x19, x20, [sp, 32] + stp x21, x22, [sp, 16] + stp x23, x24, [sp] + ldr q3, [x1] + ldr q25, [x2] + ldp x13, x23, [x2] + ldp x3, x21, [x1] + rev64 v23.4S, v25.4S + uzp1 v17.4S, v25.4S, v3.4S + umulh x15, x3, x13 + mul v6.4S, v23.4S, v3.4S + uzp1 v3.4S, v3.4S, v3.4S + ldr q27, [x2, #32] + ldp x8, x24, [x1, #16] + subs x6, x3, x21 + ldr q0, [x1, #32] + movi v23.2D, #0x00000000ffffffff + csetm x10, cc + umulh x19, x21, x23 + rev64 v4.4S, v27.4S + uzp2 v25.4S, v27.4S, v27.4S + cneg x4, x6, cc + subs x7, x23, x13 + xtn v22.2S, v0.2D + xtn v24.2S, v27.2D + cneg x20, x7, cc + ldp x6, x14, [x2, #16] + mul v27.4S, v4.4S, v0.4S + uaddlp v20.2D, v6.4S + cinv x5, x10, cc + mul x16, x4, x20 + uzp2 v6.4S, v0.4S, v0.4S + umull v21.2D, v22.2S, v25.2S + shl v0.2D, v20.2D, #32 + umlal v0.2D, v3.2S, v17.2S + mul x22, x8, x6 + umull v1.2D, v6.2S, v25.2S + subs x12, x3, x8 + umull v20.2D, v22.2S, v24.2S + cneg x17, x12, cc + umulh x9, x8, x6 + mov x12, v0.d[1] + eor x11, x16, x5 + mov x7, v0.d[0] + csetm x10, cc + usra v21.2D, v20.2D, #32 + adds x15, x15, x12 + adcs x12, x19, x22 + umulh x20, x4, x20 + adc x19, x9, xzr + usra v1.2D, v21.2D, #32 + adds x22, x15, x7 + and v26.16B, v21.16B, v23.16B + adcs x16, x12, x15 + uaddlp v25.2D, v27.4S + adcs x9, x19, x12 + umlal v26.2D, v6.2S, v24.2S + adc x4, x19, xzr + adds x16, x16, x7 + shl v27.2D, v25.2D, #32 + adcs x9, x9, x15 + adcs x4, x4, x12 + eor x12, x20, x5 + adc x15, x19, xzr + subs x20, x6, x13 + cneg x20, x20, cc + cinv x10, x10, cc + cmn x5, #0x1 + mul x19, x17, x20 + adcs x11, x22, x11 + adcs x12, x16, x12 + adcs x9, x9, x5 + umulh x17, x17, x20 + adcs x22, x4, x5 + adc x5, x15, x5 + subs x16, x21, x8 + cneg x20, x16, cc + eor x19, x19, x10 + csetm x4, cc + subs x16, x6, x23 + cneg x16, x16, cc + umlal v27.2D, v22.2S, v24.2S + mul x15, x20, x16 + cinv x4, x4, cc + cmn x10, #0x1 + usra v1.2D, v26.2D, #32 + adcs x19, x12, x19 + eor x17, x17, x10 + adcs x9, x9, x17 + adcs x22, x22, x10 + lsl x12, x7, #32 + umulh x20, x20, x16 + eor x16, x15, x4 + ldp x15, x17, [x2, #32] + add x2, x12, x7 + adc x7, x5, x10 + ldp x5, x10, [x1, #32] + lsr x1, x2, #32 + eor x12, x20, x4 + subs x1, x1, x2 + sbc x20, x2, xzr + cmn x4, #0x1 + adcs x9, x9, x16 + extr x1, x20, x1, #32 + lsr x20, x20, #32 + adcs x22, x22, x12 + adc x16, x7, x4 + adds x12, x20, x2 + umulh x7, x24, x14 + adc x4, xzr, xzr + subs x1, x11, x1 + sbcs x20, x19, x12 + sbcs x12, x9, x4 + lsl x9, x1, #32 + add x1, x9, x1 + sbcs x9, x22, xzr + mul x22, x24, x14 + sbcs x16, x16, xzr + lsr x4, x1, #32 + sbc x19, x2, xzr + subs x4, x4, x1 + sbc x11, x1, xzr + extr x2, x11, x4, #32 + lsr x4, x11, #32 + adds x4, x4, x1 + adc x11, xzr, xzr + subs x2, x20, x2 + sbcs x4, x12, x4 + sbcs x20, x9, x11 + lsl x12, x2, #32 + add x2, x12, x2 + sbcs x9, x16, xzr + lsr x11, x2, #32 + sbcs x19, x19, xzr + sbc x1, x1, xzr + subs x16, x11, x2 + sbc x12, x2, xzr + extr x16, x12, x16, #32 + lsr x12, x12, #32 + adds x11, x12, x2 + adc x12, xzr, xzr + subs x16, x4, x16 + mov x4, v27.d[0] + sbcs x11, x20, x11 + sbcs x20, x9, x12 + stp x16, x11, [x0] + sbcs x11, x19, xzr + sbcs x9, x1, xzr + stp x20, x11, [x0, #16] + mov x1, v1.d[0] + sbc x20, x2, xzr + subs x12, x24, x5 + mov x11, v27.d[1] + cneg x16, x12, cc + csetm x2, cc + subs x19, x15, x14 + mov x12, v1.d[1] + cinv x2, x2, cc + cneg x19, x19, cc + stp x9, x20, [x0, #32] + mul x9, x16, x19 + adds x4, x7, x4 + adcs x11, x1, x11 + adc x1, x12, xzr + adds x20, x4, x22 + umulh x19, x16, x19 + adcs x7, x11, x4 + eor x16, x9, x2 + adcs x9, x1, x11 + adc x12, x1, xzr + adds x7, x7, x22 + adcs x4, x9, x4 + adcs x9, x12, x11 + adc x12, x1, xzr + cmn x2, #0x1 + eor x1, x19, x2 + adcs x11, x20, x16 + adcs x19, x7, x1 + adcs x1, x4, x2 + adcs x20, x9, x2 + adc x2, x12, x2 + subs x12, x24, x10 + cneg x16, x12, cc + csetm x12, cc + subs x9, x17, x14 + cinv x12, x12, cc + cneg x9, x9, cc + subs x3, x24, x3 + sbcs x21, x5, x21 + mul x24, x16, x9 + sbcs x4, x10, x8 + ngc x8, xzr + subs x10, x5, x10 + eor x5, x24, x12 + csetm x7, cc + cneg x24, x10, cc + subs x10, x17, x15 + cinv x7, x7, cc + cneg x10, x10, cc + subs x14, x13, x14 + sbcs x15, x23, x15 + eor x13, x21, x8 + mul x23, x24, x10 + sbcs x17, x6, x17 + eor x6, x3, x8 + ngc x21, xzr + umulh x9, x16, x9 + cmn x8, #0x1 + eor x3, x23, x7 + adcs x23, x6, xzr + adcs x13, x13, xzr + eor x16, x4, x8 + adc x16, x16, xzr + eor x4, x17, x21 + umulh x17, x24, x10 + cmn x21, #0x1 + eor x24, x14, x21 + eor x6, x15, x21 + adcs x15, x24, xzr + adcs x14, x6, xzr + adc x6, x4, xzr + cmn x12, #0x1 + eor x4, x9, x12 + adcs x19, x19, x5 + umulh x5, x23, x15 + adcs x1, x1, x4 + adcs x10, x20, x12 + eor x4, x17, x7 + ldp x20, x9, [x0] + adc x2, x2, x12 + cmn x7, #0x1 + adcs x12, x1, x3 + ldp x17, x24, [x0, #16] + mul x1, x16, x6 + adcs x3, x10, x4 + adc x2, x2, x7 + ldp x7, x4, [x0, #32] + adds x20, x22, x20 + mul x10, x13, x14 + adcs x11, x11, x9 + eor x9, x8, x21 + adcs x21, x19, x17 + stp x20, x11, [x0] + adcs x12, x12, x24 + mul x8, x23, x15 + adcs x3, x3, x7 + stp x21, x12, [x0, #16] + adcs x12, x2, x4 + adc x19, xzr, xzr + subs x21, x23, x16 + umulh x2, x16, x6 + stp x3, x12, [x0, #32] + cneg x3, x21, cc + csetm x24, cc + umulh x11, x13, x14 + subs x21, x13, x16 + eor x7, x8, x9 + cneg x17, x21, cc + csetm x16, cc + subs x21, x6, x15 + cneg x22, x21, cc + cinv x21, x24, cc + subs x20, x23, x13 + umulh x12, x3, x22 + cneg x23, x20, cc + csetm x24, cc + subs x20, x14, x15 + cinv x24, x24, cc + mul x22, x3, x22 + cneg x3, x20, cc + subs x13, x6, x14 + cneg x20, x13, cc + cinv x15, x16, cc + adds x13, x5, x10 + mul x4, x23, x3 + adcs x11, x11, x1 + adc x14, x2, xzr + adds x5, x13, x8 + adcs x16, x11, x13 + umulh x23, x23, x3 + adcs x3, x14, x11 + adc x1, x14, xzr + adds x10, x16, x8 + adcs x6, x3, x13 + adcs x8, x1, x11 + umulh x13, x17, x20 + eor x1, x4, x24 + adc x4, x14, xzr + cmn x24, #0x1 + adcs x1, x5, x1 + eor x16, x23, x24 + eor x11, x1, x9 + adcs x23, x10, x16 + eor x2, x22, x21 + adcs x3, x6, x24 + mul x14, x17, x20 + eor x17, x13, x15 + adcs x13, x8, x24 + adc x8, x4, x24 + cmn x21, #0x1 + adcs x6, x23, x2 + mov x16, #0xfffffffffffffffe + eor x20, x12, x21 + adcs x20, x3, x20 + eor x23, x14, x15 + adcs x2, x13, x21 + adc x8, x8, x21 + cmn x15, #0x1 + ldp x5, x4, [x0] + ldp x21, x12, [x0, #16] + adcs x22, x20, x23 + eor x23, x22, x9 + adcs x17, x2, x17 + adc x22, x8, x15 + cmn x9, #0x1 + adcs x15, x7, x5 + ldp x10, x14, [x0, #32] + eor x1, x6, x9 + lsl x2, x15, #32 + adcs x8, x11, x4 + adcs x13, x1, x21 + eor x1, x22, x9 + adcs x24, x23, x12 + eor x11, x17, x9 + adcs x23, x11, x10 + adcs x7, x1, x14 + adcs x17, x9, x19 + adcs x20, x9, xzr + add x1, x2, x15 + lsr x3, x1, #32 + adcs x11, x9, xzr + adc x9, x9, xzr + subs x3, x3, x1 + sbc x6, x1, xzr + adds x24, x24, x5 + adcs x4, x23, x4 + extr x3, x6, x3, #32 + lsr x6, x6, #32 + adcs x21, x7, x21 + adcs x15, x17, x12 + adcs x7, x20, x10 + adcs x20, x11, x14 + mov x14, #0xffffffff + adc x22, x9, x19 + adds x12, x6, x1 + adc x10, xzr, xzr + subs x3, x8, x3 + sbcs x12, x13, x12 + lsl x9, x3, #32 + add x3, x9, x3 + sbcs x10, x24, x10 + sbcs x24, x4, xzr + lsr x9, x3, #32 + sbcs x21, x21, xzr + sbc x1, x1, xzr + subs x9, x9, x3 + sbc x13, x3, xzr + extr x9, x13, x9, #32 + lsr x13, x13, #32 + adds x13, x13, x3 + adc x6, xzr, xzr + subs x12, x12, x9 + sbcs x17, x10, x13 + lsl x2, x12, #32 + sbcs x10, x24, x6 + add x9, x2, x12 + sbcs x6, x21, xzr + lsr x5, x9, #32 + sbcs x21, x1, xzr + sbc x13, x3, xzr + subs x8, x5, x9 + sbc x19, x9, xzr + lsr x12, x19, #32 + extr x3, x19, x8, #32 + adds x8, x12, x9 + adc x1, xzr, xzr + subs x2, x17, x3 + sbcs x12, x10, x8 + sbcs x5, x6, x1 + sbcs x3, x21, xzr + sbcs x19, x13, xzr + sbc x24, x9, xzr + adds x23, x15, x3 + adcs x8, x7, x19 + adcs x11, x20, x24 + adc x9, x22, xzr + add x24, x9, #0x1 + lsl x7, x24, #32 + subs x21, x24, x7 + sbc x10, x7, xzr + adds x6, x2, x21 + adcs x7, x12, x10 + adcs x24, x5, x24 + adcs x13, x23, xzr + adcs x8, x8, xzr + adcs x15, x11, xzr + csetm x23, cc + and x11, x16, x23 + and x20, x14, x23 + adds x22, x6, x20 + eor x3, x20, x23 + adcs x5, x7, x3 + adcs x14, x24, x11 + stp x22, x5, [x0] + adcs x5, x13, x23 + adcs x21, x8, x23 + stp x14, x5, [x0, #16] + adc x12, x15, x23 + stp x21, x12, [x0, #32] + ldp x23, x24, [sp] + ldp x21, x22, [sp, 16] + ldp x19, x20, [sp, 32] + add sp, sp, 48 + ret + +// Corresponds exactly to bignum_montsqr_p384 + +.montsqr_p384: + ldr q1, [x1] + ldp x9, x2, [x1] + ldr q0, [x1] + ldp x4, x6, [x1, #16] + rev64 v21.4S, v1.4S + uzp2 v28.4S, v1.4S, v1.4S + umulh x7, x9, x2 + xtn v17.2S, v1.2D + mul v27.4S, v21.4S, v0.4S + ldr q20, [x1, #32] + xtn v30.2S, v0.2D + ldr q1, [x1, #32] + uzp2 v31.4S, v0.4S, v0.4S + ldp x5, x10, [x1, #32] + umulh x8, x9, x4 + uaddlp v3.2D, v27.4S + umull v16.2D, v30.2S, v17.2S + mul x16, x9, x4 + umull v27.2D, v30.2S, v28.2S + shrn v0.2S, v20.2D, #32 + xtn v7.2S, v20.2D + shl v20.2D, v3.2D, #32 + umull v3.2D, v31.2S, v28.2S + mul x3, x2, x4 + umlal v20.2D, v30.2S, v17.2S + umull v22.2D, v7.2S, v0.2S + usra v27.2D, v16.2D, #32 + umulh x11, x2, x4 + movi v21.2D, #0x00000000ffffffff + uzp2 v28.4S, v1.4S, v1.4S + adds x15, x16, x7 + and v5.16B, v27.16B, v21.16B + adcs x3, x3, x8 + usra v3.2D, v27.2D, #32 + dup v29.2D, x6 + adcs x16, x11, xzr + mov x14, v20.d[0] + umlal v5.2D, v31.2S, v17.2S + mul x8, x9, x2 + mov x7, v20.d[1] + shl v19.2D, v22.2D, #33 + xtn v25.2S, v29.2D + rev64 v31.4S, v1.4S + lsl x13, x14, #32 + uzp2 v6.4S, v29.4S, v29.4S + umlal v19.2D, v7.2S, v7.2S + usra v3.2D, v5.2D, #32 + adds x1, x8, x8 + umulh x8, x4, x4 + add x12, x13, x14 + mul v17.4S, v31.4S, v29.4S + xtn v4.2S, v1.2D + adcs x14, x15, x15 + lsr x13, x12, #32 + adcs x15, x3, x3 + umull v31.2D, v25.2S, v28.2S + adcs x11, x16, x16 + umull v21.2D, v25.2S, v4.2S + mov x17, v3.d[0] + umull v18.2D, v6.2S, v28.2S + adc x16, x8, xzr + uaddlp v16.2D, v17.4S + movi v1.2D, #0x00000000ffffffff + subs x13, x13, x12 + usra v31.2D, v21.2D, #32 + sbc x8, x12, xzr + adds x17, x17, x1 + mul x1, x4, x4 + shl v28.2D, v16.2D, #32 + mov x3, v3.d[1] + adcs x14, x7, x14 + extr x7, x8, x13, #32 + adcs x13, x3, x15 + and v3.16B, v31.16B, v1.16B + adcs x11, x1, x11 + lsr x1, x8, #32 + umlal v3.2D, v6.2S, v4.2S + usra v18.2D, v31.2D, #32 + adc x3, x16, xzr + adds x1, x1, x12 + umlal v28.2D, v25.2S, v4.2S + adc x16, xzr, xzr + subs x15, x17, x7 + sbcs x7, x14, x1 + lsl x1, x15, #32 + sbcs x16, x13, x16 + add x8, x1, x15 + usra v18.2D, v3.2D, #32 + sbcs x14, x11, xzr + lsr x1, x8, #32 + sbcs x17, x3, xzr + sbc x11, x12, xzr + subs x13, x1, x8 + umulh x12, x4, x10 + sbc x1, x8, xzr + extr x13, x1, x13, #32 + lsr x1, x1, #32 + adds x15, x1, x8 + adc x1, xzr, xzr + subs x7, x7, x13 + sbcs x13, x16, x15 + lsl x3, x7, #32 + umulh x16, x2, x5 + sbcs x15, x14, x1 + add x7, x3, x7 + sbcs x3, x17, xzr + lsr x1, x7, #32 + sbcs x14, x11, xzr + sbc x11, x8, xzr + subs x8, x1, x7 + sbc x1, x7, xzr + extr x8, x1, x8, #32 + lsr x1, x1, #32 + adds x1, x1, x7 + adc x17, xzr, xzr + subs x13, x13, x8 + umulh x8, x9, x6 + sbcs x1, x15, x1 + sbcs x15, x3, x17 + sbcs x3, x14, xzr + mul x17, x2, x5 + sbcs x11, x11, xzr + stp x13, x1, [x0] + sbc x14, x7, xzr + mul x7, x4, x10 + subs x1, x9, x2 + stp x15, x3, [x0, #16] + csetm x15, cc + cneg x1, x1, cc + stp x11, x14, [x0, #32] + mul x14, x9, x6 + adds x17, x8, x17 + adcs x7, x16, x7 + adc x13, x12, xzr + subs x12, x5, x6 + cneg x3, x12, cc + cinv x16, x15, cc + mul x8, x1, x3 + umulh x1, x1, x3 + eor x12, x8, x16 + adds x11, x17, x14 + adcs x3, x7, x17 + adcs x15, x13, x7 + adc x8, x13, xzr + adds x3, x3, x14 + adcs x15, x15, x17 + adcs x17, x8, x7 + eor x1, x1, x16 + adc x13, x13, xzr + subs x9, x9, x4 + csetm x8, cc + cneg x9, x9, cc + subs x4, x2, x4 + cneg x4, x4, cc + csetm x7, cc + subs x2, x10, x6 + cinv x8, x8, cc + cneg x2, x2, cc + cmn x16, #0x1 + adcs x11, x11, x12 + mul x12, x9, x2 + adcs x3, x3, x1 + adcs x15, x15, x16 + umulh x9, x9, x2 + adcs x17, x17, x16 + adc x13, x13, x16 + subs x1, x10, x5 + cinv x2, x7, cc + cneg x1, x1, cc + eor x9, x9, x8 + cmn x8, #0x1 + eor x7, x12, x8 + mul x12, x4, x1 + adcs x3, x3, x7 + adcs x7, x15, x9 + adcs x15, x17, x8 + ldp x9, x17, [x0, #16] + umulh x4, x4, x1 + adc x8, x13, x8 + cmn x2, #0x1 + eor x1, x12, x2 + adcs x1, x7, x1 + ldp x7, x16, [x0] + eor x12, x4, x2 + adcs x4, x15, x12 + ldp x15, x12, [x0, #32] + adc x8, x8, x2 + adds x13, x14, x14 + umulh x14, x5, x10 + adcs x2, x11, x11 + adcs x3, x3, x3 + adcs x1, x1, x1 + adcs x4, x4, x4 + adcs x11, x8, x8 + adc x8, xzr, xzr + adds x13, x13, x7 + adcs x2, x2, x16 + mul x16, x5, x10 + adcs x3, x3, x9 + adcs x1, x1, x17 + umulh x5, x5, x5 + lsl x9, x13, #32 + add x9, x9, x13 + adcs x4, x4, x15 + mov x13, v28.d[1] + adcs x15, x11, x12 + lsr x7, x9, #32 + adc x11, x8, xzr + subs x7, x7, x9 + umulh x10, x10, x10 + sbc x17, x9, xzr + extr x7, x17, x7, #32 + lsr x17, x17, #32 + adds x17, x17, x9 + adc x12, xzr, xzr + subs x8, x2, x7 + sbcs x17, x3, x17 + lsl x7, x8, #32 + sbcs x2, x1, x12 + add x3, x7, x8 + sbcs x12, x4, xzr + lsr x1, x3, #32 + sbcs x7, x15, xzr + sbc x15, x9, xzr + subs x1, x1, x3 + sbc x4, x3, xzr + lsr x9, x4, #32 + extr x8, x4, x1, #32 + adds x9, x9, x3 + adc x4, xzr, xzr + subs x1, x17, x8 + lsl x17, x1, #32 + sbcs x8, x2, x9 + sbcs x9, x12, x4 + add x17, x17, x1 + mov x1, v18.d[1] + lsr x2, x17, #32 + sbcs x7, x7, xzr + mov x12, v18.d[0] + sbcs x15, x15, xzr + sbc x3, x3, xzr + subs x4, x2, x17 + sbc x2, x17, xzr + adds x12, x13, x12 + adcs x16, x16, x1 + lsr x13, x2, #32 + extr x1, x2, x4, #32 + adc x2, x14, xzr + adds x4, x13, x17 + mul x13, x6, x6 + adc x14, xzr, xzr + subs x1, x8, x1 + sbcs x4, x9, x4 + mov x9, v28.d[0] + sbcs x7, x7, x14 + sbcs x8, x15, xzr + sbcs x3, x3, xzr + sbc x14, x17, xzr + adds x17, x9, x9 + adcs x12, x12, x12 + mov x15, v19.d[0] + adcs x9, x16, x16 + umulh x6, x6, x6 + adcs x16, x2, x2 + adc x2, xzr, xzr + adds x11, x11, x8 + adcs x3, x3, xzr + adcs x14, x14, xzr + adcs x8, xzr, xzr + adds x13, x1, x13 + mov x1, v19.d[1] + adcs x6, x4, x6 + mov x4, #0xffffffff + adcs x15, x7, x15 + adcs x7, x11, x5 + adcs x1, x3, x1 + adcs x14, x14, x10 + adc x11, x8, xzr + adds x6, x6, x17 + adcs x8, x15, x12 + adcs x3, x7, x9 + adcs x15, x1, x16 + mov x16, #0xffffffff00000001 + adcs x14, x14, x2 + mov x2, #0x1 + adc x17, x11, xzr + cmn x13, x16 + adcs xzr, x6, x4 + adcs xzr, x8, x2 + adcs xzr, x3, xzr + adcs xzr, x15, xzr + adcs xzr, x14, xzr + adc x1, x17, xzr + neg x9, x1 + and x1, x16, x9 + adds x11, x13, x1 + and x13, x4, x9 + adcs x5, x6, x13 + and x1, x2, x9 + adcs x7, x8, x1 + stp x11, x5, [x0] + adcs x11, x3, xzr + adcs x2, x15, xzr + stp x7, x11, [x0, #16] + adc x17, x14, xzr + stp x2, x17, [x0, #32] + ret + +// Corresponds exactly to bignum_sub_p384 + +.sub_p384: + ldp x5, x6, [x1] + ldp x4, x3, [x2] + subs x5, x5, x4 + sbcs x6, x6, x3 + ldp x7, x8, [x1, #16] + ldp x4, x3, [x2, #16] + sbcs x7, x7, x4 + sbcs x8, x8, x3 + ldp x9, x10, [x1, #32] + ldp x4, x3, [x2, #32] + sbcs x9, x9, x4 + sbcs x10, x10, x3 + csetm x3, cc + mov x4, #0xffffffff + and x4, x4, x3 + adds x5, x5, x4 + eor x4, x4, x3 + adcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + adcs x7, x7, x4 + adcs x8, x8, x3 + adcs x9, x9, x3 + adc x10, x10, x3 + stp x5, x6, [x0] + stp x7, x8, [x0, #16] + stp x9, x10, [x0, #32] + ret + +// Corresponds exactly to bignum_add_p384 + +.add_p384: + ldp x5, x6, [x1] + ldp x4, x3, [x2] + adds x5, x5, x4 + adcs x6, x6, x3 + ldp x7, x8, [x1, #16] + ldp x4, x3, [x2, #16] + adcs x7, x7, x4 + adcs x8, x8, x3 + ldp x9, x10, [x1, #32] + ldp x4, x3, [x2, #32] + adcs x9, x9, x4 + adcs x10, x10, x3 + adc x3, xzr, xzr + mov x4, #0xffffffff + cmp x5, x4 + mov x4, #0xffffffff00000000 + sbcs xzr, x6, x4 + mov x4, #0xfffffffffffffffe + sbcs xzr, x7, x4 + adcs xzr, x8, xzr + adcs xzr, x9, xzr + adcs xzr, x10, xzr + adcs x3, x3, xzr + csetm x3, ne + mov x4, #0xffffffff + and x4, x4, x3 + subs x5, x5, x4 + eor x4, x4, x3 + sbcs x6, x6, x4 + mov x4, #0xfffffffffffffffe + and x4, x4, x3 + sbcs x7, x7, x4 + sbcs x8, x8, x3 + sbcs x9, x9, x3 + sbc x10, x10, x3 + stp x5, x6, [x0] + stp x7, x8, [x0, #16] + stp x9, x10, [x0, #32] + ret + + +#define montmul_p384(P0,P1,P2) \ + add x0, P0;\ + add x1, P1;\ + add x2, P2;\ + bl .montmul_p384 + +#define montsqr_p384(P0,P1) \ + add x0, P0;\ + add x1, P1;\ + bl .montsqr_p384 + +#define sub_p384(P0,P1,P2) \ + add x0, P0;\ + add x1, P1;\ + add x2, P2;\ + bl .sub_p384 + +#define add_p384(P0,P1,P2) \ + add x0, P0;\ + add x1, P1;\ + add x2, P2;\ + bl .add_p384 + + +// P0 = 4 * P1 - P2 + +#define cmsub41_p384(P0,P1,P2) \ + ldp x1, x2, [P1]; \ + ldp x3, x4, [P1+16]; \ + ldp x5, x6, [P1+32]; \ + lsl x0, x1, #2; \ + ldp x7, x8, [P2]; \ + subs x0, x0, x7; \ + extr x1, x2, x1, #62; \ + sbcs x1, x1, x8; \ + ldp x7, x8, [P2+16]; \ + extr x2, x3, x2, #62; \ + sbcs x2, x2, x7; \ + extr x3, x4, x3, #62; \ + sbcs x3, x3, x8; \ + extr x4, x5, x4, #62; \ + ldp x7, x8, [P2+32]; \ + sbcs x4, x4, x7; \ + extr x5, x6, x5, #62; \ + sbcs x5, x5, x8; \ + lsr x6, x6, #62; \ + adc x6, x6, xzr; \ + lsl x7, x6, #32; \ + subs x8, x6, x7; \ + sbc x7, x7, xzr; \ + adds x0, x0, x8; \ + adcs x1, x1, x7; \ + adcs x2, x2, x6; \ + adcs x3, x3, xzr; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + csetm x8, cc; \ + mov x9, #0xffffffff; \ + and x9, x9, x8; \ + adds x0, x0, x9; \ + eor x9, x9, x8; \ + adcs x1, x1, x9; \ + mov x9, #0xfffffffffffffffe; \ + and x9, x9, x8; \ + adcs x2, x2, x9; \ + adcs x3, x3, x8; \ + adcs x4, x4, x8; \ + adc x5, x5, x8; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16]; \ + stp x4, x5, [P0+32] + +// P0 = C * P1 - D * P2 + +#define cmsub_p384(P0,C,P1,D,P2) \ + ldp x0, x1, [P2]; \ + mov x6, #0x00000000ffffffff; \ + subs x6, x6, x0; \ + mov x7, #0xffffffff00000000; \ + sbcs x7, x7, x1; \ + ldp x0, x1, [P2+16]; \ + mov x8, #0xfffffffffffffffe; \ + sbcs x8, x8, x0; \ + mov x13, #0xffffffffffffffff; \ + sbcs x9, x13, x1; \ + ldp x0, x1, [P2+32]; \ + sbcs x10, x13, x0; \ + sbc x11, x13, x1; \ + mov x12, D; \ + mul x0, x12, x6; \ + mul x1, x12, x7; \ + mul x2, x12, x8; \ + mul x3, x12, x9; \ + mul x4, x12, x10; \ + mul x5, x12, x11; \ + umulh x6, x12, x6; \ + umulh x7, x12, x7; \ + umulh x8, x12, x8; \ + umulh x9, x12, x9; \ + umulh x10, x12, x10; \ + umulh x12, x12, x11; \ + adds x1, x1, x6; \ + adcs x2, x2, x7; \ + adcs x3, x3, x8; \ + adcs x4, x4, x9; \ + adcs x5, x5, x10; \ + mov x6, #1; \ + adc x6, x12, x6; \ + ldp x8, x9, [P1]; \ + ldp x10, x11, [P1+16]; \ + ldp x12, x13, [P1+32]; \ + mov x14, C; \ + mul x15, x14, x8; \ + umulh x8, x14, x8; \ + adds x0, x0, x15; \ + mul x15, x14, x9; \ + umulh x9, x14, x9; \ + adcs x1, x1, x15; \ + mul x15, x14, x10; \ + umulh x10, x14, x10; \ + adcs x2, x2, x15; \ + mul x15, x14, x11; \ + umulh x11, x14, x11; \ + adcs x3, x3, x15; \ + mul x15, x14, x12; \ + umulh x12, x14, x12; \ + adcs x4, x4, x15; \ + mul x15, x14, x13; \ + umulh x13, x14, x13; \ + adcs x5, x5, x15; \ + adc x6, x6, xzr; \ + adds x1, x1, x8; \ + adcs x2, x2, x9; \ + adcs x3, x3, x10; \ + adcs x4, x4, x11; \ + adcs x5, x5, x12; \ + adcs x6, x6, x13; \ + lsl x7, x6, #32; \ + subs x8, x6, x7; \ + sbc x7, x7, xzr; \ + adds x0, x0, x8; \ + adcs x1, x1, x7; \ + adcs x2, x2, x6; \ + adcs x3, x3, xzr; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + csetm x6, cc; \ + mov x7, #0xffffffff; \ + and x7, x7, x6; \ + adds x0, x0, x7; \ + eor x7, x7, x6; \ + adcs x1, x1, x7; \ + mov x7, #0xfffffffffffffffe; \ + and x7, x7, x6; \ + adcs x2, x2, x7; \ + adcs x3, x3, x6; \ + adcs x4, x4, x6; \ + adc x5, x5, x6; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16]; \ + stp x4, x5, [P0+32] + +// A weak version of add that only guarantees sum in 6 digits + +#define weakadd_p384(P0,P1,P2) \ + ldp x5, x6, [P1]; \ + ldp x4, x3, [P2]; \ + adds x5, x5, x4; \ + adcs x6, x6, x3; \ + ldp x7, x8, [P1+16]; \ + ldp x4, x3, [P2+16]; \ + adcs x7, x7, x4; \ + adcs x8, x8, x3; \ + ldp x9, x10, [P1+32]; \ + ldp x4, x3, [P2+32]; \ + adcs x9, x9, x4; \ + adcs x10, x10, x3; \ + csetm x3, cs; \ + mov x4, #0xffffffff; \ + and x4, x4, x3; \ + subs x5, x5, x4; \ + eor x4, x4, x3; \ + sbcs x6, x6, x4; \ + mov x4, #0xfffffffffffffffe; \ + and x4, x4, x3; \ + sbcs x7, x7, x4; \ + sbcs x8, x8, x3; \ + sbcs x9, x9, x3; \ + sbc x10, x10, x3; \ + stp x5, x6, [P0]; \ + stp x7, x8, [P0+16]; \ + stp x9, x10, [P0+32] + +// P0 = 3 * P1 - 8 * P2 + +#define cmsub38_p384(P0,P1,P2) \ + ldp x0, x1, [P2]; \ + mov x6, #0x00000000ffffffff; \ + subs x6, x6, x0; \ + mov x7, #0xffffffff00000000; \ + sbcs x7, x7, x1; \ + ldp x0, x1, [P2+16]; \ + mov x8, #0xfffffffffffffffe; \ + sbcs x8, x8, x0; \ + mov x13, #0xffffffffffffffff; \ + sbcs x9, x13, x1; \ + ldp x0, x1, [P2+32]; \ + sbcs x10, x13, x0; \ + sbc x11, x13, x1; \ + lsl x0, x6, #3; \ + extr x1, x7, x6, #61; \ + extr x2, x8, x7, #61; \ + extr x3, x9, x8, #61; \ + extr x4, x10, x9, #61; \ + extr x5, x11, x10, #61; \ + lsr x6, x11, #61; \ + add x6, x6, #1; \ + ldp x8, x9, [P1]; \ + ldp x10, x11, [P1+16]; \ + ldp x12, x13, [P1+32]; \ + mov x14, 3; \ + mul x15, x14, x8; \ + umulh x8, x14, x8; \ + adds x0, x0, x15; \ + mul x15, x14, x9; \ + umulh x9, x14, x9; \ + adcs x1, x1, x15; \ + mul x15, x14, x10; \ + umulh x10, x14, x10; \ + adcs x2, x2, x15; \ + mul x15, x14, x11; \ + umulh x11, x14, x11; \ + adcs x3, x3, x15; \ + mul x15, x14, x12; \ + umulh x12, x14, x12; \ + adcs x4, x4, x15; \ + mul x15, x14, x13; \ + umulh x13, x14, x13; \ + adcs x5, x5, x15; \ + adc x6, x6, xzr; \ + adds x1, x1, x8; \ + adcs x2, x2, x9; \ + adcs x3, x3, x10; \ + adcs x4, x4, x11; \ + adcs x5, x5, x12; \ + adcs x6, x6, x13; \ + lsl x7, x6, #32; \ + subs x8, x6, x7; \ + sbc x7, x7, xzr; \ + adds x0, x0, x8; \ + adcs x1, x1, x7; \ + adcs x2, x2, x6; \ + adcs x3, x3, xzr; \ + adcs x4, x4, xzr; \ + adcs x5, x5, xzr; \ + csetm x6, cc; \ + mov x7, #0xffffffff; \ + and x7, x7, x6; \ + adds x0, x0, x7; \ + eor x7, x7, x6; \ + adcs x1, x1, x7; \ + mov x7, #0xfffffffffffffffe; \ + and x7, x7, x6; \ + adcs x2, x2, x7; \ + adcs x3, x3, x6; \ + adcs x4, x4, x6; \ + adc x5, x5, x6; \ + stp x0, x1, [P0]; \ + stp x2, x3, [P0+16]; \ + stp x4, x5, [P0+32] + +S2N_BN_SYMBOL(p384_montjdouble): + +// Save regs and make room on stack for temporary variables + + sub sp, sp, NSPACE+80 + stp x19, x20, [sp, NSPACE] + stp x21, x22, [sp, NSPACE+16] + stp x23, x24, [sp, NSPACE+32] + stp x25, x26, [sp, NSPACE+48] + stp x30, xzr, [sp, NSPACE+64] + +// Move the input arguments to stable places + + mov input_z, x0 + mov input_x, x1 + +// Main code, just a sequence of basic field operations + +// z2 = z^2 +// y2 = y^2 + + montsqr_p384(z2,z_1) + montsqr_p384(y2,y_1) + +// x2p = x^2 - z^4 = (x + z^2) * (x - z^2) + + weakadd_p384(t1,x_1,z2) + sub_p384(t2,x_1,z2) + montmul_p384(x2p,t1,t2) + +// t1 = y + z +// x4p = x2p^2 +// xy2 = x * y^2 + + add_p384(t1,y_1,z_1) + montsqr_p384(x4p,x2p) + montmul_p384(xy2,x_1,y2) + +// t2 = (y + z)^2 + + montsqr_p384(t2,t1) + +// d = 12 * xy2 - 9 * x4p +// t1 = y^2 + 2 * y * z + + cmsub_p384(d_,12,xy2,9,x4p) + sub_p384(t1,t2,z2) + +// y4 = y^4 + + montsqr_p384(y4,y2) + +// z_3' = 2 * y * z +// dx2 = d * x2p + + sub_p384(z_3,t1,y2) + montmul_p384(dx2,d_,x2p) + +// x' = 4 * xy2 - d + + cmsub41_p384(x_3,xy2,d_) + +// y' = 3 * dx2 - 8 * y4 + + cmsub38_p384(y_3,dx2,y4) + +// Restore stack and registers + + ldp x19, x20, [sp, NSPACE] + ldp x21, x22, [sp, NSPACE+16] + ldp x23, x24, [sp, NSPACE+32] + ldp x25, x26, [sp, NSPACE+48] + ldp x30, xzr, [sp, NSPACE+64] + add sp, sp, NSPACE+80 + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/x86_att/p256/bignum_montinv_p256.S b/third_party/s2n-bignum/x86_att/p256/bignum_montinv_p256.S new file mode 100644 index 0000000000..36f5d376e0 --- /dev/null +++ b/third_party/s2n-bignum/x86_att/p256/bignum_montinv_p256.S @@ -0,0 +1,1633 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery inverse modulo p_256 = 2^256 - 2^224 + 2^192 + 2^96 - 1 +// Input x[4]; output z[4] +// +// extern void bignum_montinv_p256(uint64_t z[static 4],uint64_t x[static 4]); +// +// If the 4-digit input x is coprime to p_256, i.e. is not divisible +// by it, returns z < p_256 such that x * z == 2^512 (mod p_256). This +// is effectively "Montgomery inverse" because if we consider x and z as +// Montgomery forms of X and Z, i.e. x == 2^256 * X and z == 2^256 * Z +// (both mod p_256) then X * Z == 1 (mod p_256). That is, this function +// gives the analog of the modular inverse bignum_inv_p256 but with both +// input and output in the Montgomery domain. Note that x does not need +// to be reduced modulo p_256, but the output always is. If the input +// is divisible (i.e. is 0 or p_256), then there can be no solution to +// the congruence x * z == 2^512 (mod p_256), and z = 0 is returned. +// +// Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montinv_p256) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montinv_p256) + .text + +// Size in bytes of a 64-bit word + +#define N 8 + +// Pointer-offset pairs for temporaries on stack + +#define f 0(%rsp) +#define g (5*N)(%rsp) +#define u (10*N)(%rsp) +#define v (15*N)(%rsp) +#define tmp (20*N)(%rsp) +#define tmp2 (21*N)(%rsp) +#define i (22*N)(%rsp) +#define d (23*N)(%rsp) + +#define mat (24*N)(%rsp) + +// Backup for the input pointer + +#define res (28*N)(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (30*N) + +// Syntactic variants to make x86_att version simpler to generate + +#define F 0 +#define G (5*N) +#define U (10*N) +#define V (15*N) +#define MAT (24*N) + +#define ff (%rsp) +#define gg (5*N)(%rsp) + +// --------------------------------------------------------------------------- +// Core signed almost-Montgomery reduction macro from u[4..0] to u[3..0]. +// --------------------------------------------------------------------------- + +#define amontred(P) \ +/* We only know the input is -2^316 < x < 2^316. To do traditional */ \ +/* unsigned Montgomery reduction, start by adding 2^61 * p_256. */ \ + movq $0xe000000000000000, %r8 ; \ + addq P, %r8 ; \ + movq $0xffffffffffffffff, %r9 ; \ + adcq 8+P, %r9 ; \ + movq $0x000000001fffffff, %r10 ; \ + adcq 16+P, %r10 ; \ + movq $0x2000000000000000, %r11 ; \ + adcq 24+P, %r11 ; \ + movq $0x1fffffffe0000000, %r12 ; \ + adcq 32+P, %r12 ; \ +/* Let [%r8;%rbx] = 2^32 * w and [%rdx;%rax] = (2^64 - 2^32 + 1) * w */ \ +/* where w is the lowest word */ \ + movq %r8, %rbx ; \ + shlq $32, %rbx ; \ + movq $0xffffffff00000001, %rax ; \ + mulq %r8; \ + shrq $32, %r8 ; \ +/* Hence basic addition of (2^256 - 2^224 + 2^192 + 2^96) * w */ \ + addq %rbx, %r9 ; \ + adcq %r8, %r10 ; \ + adcq %rax, %r11 ; \ + adcq %rdx, %r12 ; \ +/* Now capture carry and subtract p_256 if set (almost-Montgomery) */ \ + sbbq %rax, %rax ; \ + movl $0x00000000ffffffff, %ebx ; \ + andq %rax, %rbx ; \ + movq $0xffffffff00000001, %rdx ; \ + andq %rax, %rdx ; \ + subq %rax, %r9 ; \ + movq %r9, P ; \ + sbbq %rbx, %r10 ; \ + movq %r10, 8+P ; \ + sbbq $0, %r11 ; \ + movq %r11, 16+P ; \ + sbbq %rdx, %r12 ; \ + movq %r12, 24+P + +// Very similar to a subroutine call to the s2n-bignum word_divstep59. +// But different in register usage and returning the final matrix as +// +// [ %r8 %r10] +// [ %r12 %r14] +// +// and also returning the matrix still negated (which doesn't matter) + +#define divstep59(din,fin,gin) \ + movq din, %rsi ; \ + movq fin, %rdx ; \ + movq gin, %rcx ; \ + movq %rdx, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq $0xfffffffffffffffe, %rax ; \ + xorl %ebp, %ebp ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %rdx ; \ + leaq (%rcx,%rax), %rdi ; \ + shlq $0x16, %rdx ; \ + shlq $0x16, %rdi ; \ + sarq $0x2b, %rdx ; \ + sarq $0x2b, %rdi ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %rbx ; \ + leaq (%rcx,%rax), %rcx ; \ + sarq $0x2a, %rbx ; \ + sarq $0x2a, %rcx ; \ + movq %rdx, MAT(%rsp) ; \ + movq %rbx, MAT+0x8(%rsp) ; \ + movq %rdi, MAT+0x10(%rsp) ; \ + movq %rcx, MAT+0x18(%rsp) ; \ + movq fin, %r12 ; \ + imulq %r12, %rdi ; \ + imulq %rdx, %r12 ; \ + movq gin, %r13 ; \ + imulq %r13, %rbx ; \ + imulq %rcx, %r13 ; \ + addq %rbx, %r12 ; \ + addq %rdi, %r13 ; \ + sarq $0x14, %r12 ; \ + sarq $0x14, %r13 ; \ + movq %r12, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + movq %r13, %rcx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq $0xfffffffffffffffe, %rax ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %r8 ; \ + leaq (%rcx,%rax), %r10 ; \ + shlq $0x16, %r8 ; \ + shlq $0x16, %r10 ; \ + sarq $0x2b, %r8 ; \ + sarq $0x2b, %r10 ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %r15 ; \ + leaq (%rcx,%rax), %r11 ; \ + sarq $0x2a, %r15 ; \ + sarq $0x2a, %r11 ; \ + movq %r13, %rbx ; \ + movq %r12, %rcx ; \ + imulq %r8, %r12 ; \ + imulq %r15, %rbx ; \ + addq %rbx, %r12 ; \ + imulq %r11, %r13 ; \ + imulq %r10, %rcx ; \ + addq %rcx, %r13 ; \ + sarq $0x14, %r12 ; \ + sarq $0x14, %r13 ; \ + movq %r12, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + movq %r13, %rcx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq MAT(%rsp), %rax ; \ + imulq %r8, %rax ; \ + movq MAT+0x10(%rsp), %rdx ; \ + imulq %r15, %rdx ; \ + imulq MAT+0x8(%rsp), %r8 ; \ + imulq MAT+0x18(%rsp), %r15 ; \ + addq %r8, %r15 ; \ + leaq (%rax,%rdx), %r9 ; \ + movq MAT(%rsp), %rax ; \ + imulq %r10, %rax ; \ + movq MAT+0x10(%rsp), %rdx ; \ + imulq %r11, %rdx ; \ + imulq MAT+0x8(%rsp), %r10 ; \ + imulq MAT+0x18(%rsp), %r11 ; \ + addq %r10, %r11 ; \ + leaq (%rax,%rdx), %r13 ; \ + movq $0xfffffffffffffffe, %rax ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %r8 ; \ + leaq (%rcx,%rax), %r12 ; \ + shlq $0x15, %r8 ; \ + shlq $0x15, %r12 ; \ + sarq $0x2b, %r8 ; \ + sarq $0x2b, %r12 ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %r10 ; \ + leaq (%rcx,%rax), %r14 ; \ + sarq $0x2b, %r10 ; \ + sarq $0x2b, %r14 ; \ + movq %r9, %rax ; \ + imulq %r8, %rax ; \ + movq %r13, %rdx ; \ + imulq %r10, %rdx ; \ + imulq %r15, %r8 ; \ + imulq %r11, %r10 ; \ + addq %r8, %r10 ; \ + leaq (%rax,%rdx), %r8 ; \ + movq %r9, %rax ; \ + imulq %r12, %rax ; \ + movq %r13, %rdx ; \ + imulq %r14, %rdx ; \ + imulq %r15, %r12 ; \ + imulq %r11, %r14 ; \ + addq %r12, %r14 ; \ + leaq (%rax,%rdx), %r12 + +S2N_BN_SYMBOL(bignum_montinv_p256): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + +// Save registers and make room for temporaries + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + +// Save the return pointer for the end so we can overwrite %rdi later + + movq %rdi, res + +// Create constant [%rdx;%rcx;%rbx;%rax] = p_256 and copy it into the variable f +// including the 5th zero digit + + xorl %ecx, %ecx + movl $0x00000000ffffffff, %edx + movq %rdx, %rbx + leaq -1(%rcx), %rax + negq %rdx + movq %rax, F(%rsp) + movq %rbx, F+8(%rsp) + movq %rcx, F+16(%rsp) + movq %rdx, F+24(%rsp) + movq %rcx, F+32(%rsp) + +// Now reduce the input modulo p_256, first negating the constant to get +// [%rdx;%rcx;%rbx;%rax] = 2^256 - p_256, adding it to x and hence getting +// the comparison x < p_256 <=> (2^256 - p_256) + x < 2^256 and choosing +// g accordingly. + + movq (%rsi), %r8 + movq 8(%rsi), %r9 + movq 16(%rsi), %r10 + movq 24(%rsi), %r11 + + leaq 1(%rcx), %rax + addq %r8, %rax + leaq -1(%rdx), %rbx + adcq %r9, %rbx + notq %rcx + adcq %r10, %rcx + notq %rdx + adcq %r11, %rdx + + cmovncq %r8, %rax + cmovncq %r9, %rbx + cmovncq %r10, %rcx + cmovncq %r11, %rdx + + movq %rax, G(%rsp) + movq %rbx, G+8(%rsp) + movq %rcx, G+16(%rsp) + movq %rdx, G+24(%rsp) + xorl %eax, %eax + movq %rax, G+32(%rsp) + +// Also maintain reduced < 2^256 vector [u,v] such that +// [f,g] == x * 2^{5*i-562} * [u,v] (mod p_256) +// starting with [p_256,x] == x * 2^{5*0-562} * [0,2^562] (mod p_256) +// The weird-looking 5*i modifications come in because we are doing +// 64-bit word-sized Montgomery reductions at each stage, which is +// 5 bits more than the 59-bit requirement to keep things stable. +// After the 10th and last iteration and sign adjustment, when +// f == 1 for in-scope cases, we have x * 2^{50-562} * u == 1, i.e. +// x * u == 2^512 as required. + + xorl %eax, %eax + movq %rax, U(%rsp) + movq %rax, U+8(%rsp) + movq %rax, U+16(%rsp) + movq %rax, U+24(%rsp) + + movq $0x000c000000140000, %rax + movq %rax, V(%rsp) + movq $0xffe8000000000000, %rax + movq %rax, V+8(%rsp) + movq $0xfffbffffffefffff, %rax + movq %rax, V+16(%rsp) + movq $0x000bffffffebffff, %rax + movq %rax, V+24(%rsp) + +// Start of main loop. We jump into the middle so that the divstep +// portion is common to the special tenth iteration after a uniform +// first 9. + + movq $10, i + movq $1, d + jmp bignum_montinv_p256_midloop + +bignum_montinv_p256_loop: + +// Separate out the matrix into sign-magnitude pairs + + movq %r8, %r9 + sarq $63, %r9 + xorq %r9, %r8 + subq %r9, %r8 + + movq %r10, %r11 + sarq $63, %r11 + xorq %r11, %r10 + subq %r11, %r10 + + movq %r12, %r13 + sarq $63, %r13 + xorq %r13, %r12 + subq %r13, %r12 + + movq %r14, %r15 + sarq $63, %r15 + xorq %r15, %r14 + subq %r15, %r14 + +// Adjust the initial values to allow for complement instead of negation +// This initial offset is the same for [f,g] and [u,v] compositions. +// Save it in temporary storage for the [u,v] part and do [f,g] first. + + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, tmp + + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, tmp2 + +// Now the computation of the updated f and g values. This maintains a +// 2-word carry between stages so we can conveniently insert the shift +// right by 59 before storing back, and not overwrite digits we need +// again of the old f and g values. +// +// Digit 0 of [f,g] + + xorl %ebx, %ebx + movq F(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq G(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + + xorl %ebp, %ebp + movq F(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq G(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + +// Digit 1 of [f,g] + + xorl %ecx, %ecx + movq F+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq G+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $59, %rbx, %rdi + movq %rdi, F(%rsp) + + xorl %edi, %edi + movq F+N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq G+N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $59, %rbp, %rsi + movq %rsi, G(%rsp) + +// Digit 2 of [f,g] + + xorl %esi, %esi + movq F+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq G+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $59, %rcx, %rbx + movq %rbx, F+N(%rsp) + + xorl %ebx, %ebx + movq F+2*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq G+2*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $59, %rdi, %rbp + movq %rbp, G+N(%rsp) + +// Digits 3 and 4 of [f,g] + + movq F+3*N(%rsp), %rax + xorq %r9, %rax + movq F+4*N(%rsp), %rbp + xorq %r9, %rbp + andq %r8, %rbp + negq %rbp + mulq %r8 + addq %rax, %rsi + adcq %rdx, %rbp + movq G+3*N(%rsp), %rax + xorq %r11, %rax + movq G+4*N(%rsp), %rdx + xorq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbp + mulq %r10 + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $59, %rsi, %rcx + movq %rcx, F+2*N(%rsp) + shrdq $59, %rbp, %rsi + sarq $59, %rbp + + movq F+3*N(%rsp), %rax + movq %rsi, F+3*N(%rsp) + + movq F+4*N(%rsp), %rsi + movq %rbp, F+4*N(%rsp) + + xorq %r13, %rax + xorq %r13, %rsi + andq %r12, %rsi + negq %rsi + mulq %r12 + addq %rax, %rbx + adcq %rdx, %rsi + movq G+3*N(%rsp), %rax + xorq %r15, %rax + movq G+4*N(%rsp), %rdx + xorq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rsi + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rsi + shrdq $59, %rbx, %rdi + movq %rdi, G+2*N(%rsp) + shrdq $59, %rsi, %rbx + movq %rbx, G+3*N(%rsp) + sarq $59, %rsi + movq %rsi, G+4*N(%rsp) + +// Get the initial carries back from storage and do the [u,v] accumulation + + movq tmp, %rbx + movq tmp2, %rbp + +// Digit 0 of [u,v] + + xorl %ecx, %ecx + movq U(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq V(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + + xorl %esi, %esi + movq U(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, U(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq V(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, V(%rsp) + +// Digit 1 of [u,v] + + xorl %ebx, %ebx + movq U+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq V+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + + xorl %ebp, %ebp + movq U+N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, U+N(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq V+N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, V+N(%rsp) + +// Digit 2 of [u,v] + + xorl %ecx, %ecx + movq U+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq V+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + + xorl %esi, %esi + movq U+2*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, U+2*N(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq V+2*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, V+2*N(%rsp) + +// Digits 3 and 4 of u (top is unsigned) + + movq U+3*N(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq V+3*N(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + +// Preload for last use of old u digit 3 + + movq U+3*N(%rsp), %rax + movq %rcx, U+3*N(%rsp) + movq %rdx, U+4*N(%rsp) + +// Digits 3 and 4 of v (top is unsigned) + + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx + negq %rcx + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rcx + movq V+3*N(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rsi, V+3*N(%rsp) + movq %rdx, V+4*N(%rsp) + +// Montgomery reduction of u + + amontred(u) + +// Montgomery reduction of v + + amontred(v) + +bignum_montinv_p256_midloop: + + divstep59(d,ff,gg) + movq %rsi, d + +// Next iteration + + decq i + jnz bignum_montinv_p256_loop + +// The 10th and last iteration does not need anything except the +// u value and the sign of f; the latter can be obtained from the +// lowest word of f. So it's done differently from the main loop. +// Find the sign of the new f. For this we just need one digit +// since we know (for in-scope cases) that f is either +1 or -1. +// We don't explicitly shift right by 59 either, but looking at +// bit 63 (or any bit >= 60) of the unshifted result is enough +// to distinguish -1 from +1; this is then made into a mask. + + movq F(%rsp), %rax + movq G(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $63, %rax + +// Now separate out the matrix into sign-magnitude pairs +// and adjust each one based on the sign of f. +// +// Note that at this point we expect |f|=1 and we got its +// sign above, so then since [f,0] == x * 2^{-512} [u,v] (mod p_256) +// we want to flip the sign of u according to that of f. + + movq %r8, %r9 + sarq $63, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + + movq %r10, %r11 + sarq $63, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + + movq %r12, %r13 + sarq $63, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + + movq %r14, %r15 + sarq $63, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + +// Adjust the initial value to allow for complement instead of negation + + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + +// Digit 0 of [u] + + xorl %r13d, %r13d + movq U(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq V(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + adcq %rdx, %r13 + +// Digit 1 of [u] + + xorl %r14d, %r14d + movq U+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq V+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + adcq %rdx, %r14 + +// Digit 2 of [u] + + xorl %r15d, %r15d + movq U+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq V+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + adcq %rdx, %r15 + +// Digits 3 and 4 of u (top is unsigned) + + movq U+3*N(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq V+3*N(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + adcq %rdx, %r9 + +// Store back and Montgomery reduce u + + movq %r12, U(%rsp) + movq %r13, U+N(%rsp) + movq %r14, U+2*N(%rsp) + movq %r15, U+3*N(%rsp) + movq %r9, U+4*N(%rsp) + + amontred(u) + +// Perform final strict reduction mod p_256 and copy to output + + movq U(%rsp), %r8 + movq U+N(%rsp), %r9 + movq U+2*N(%rsp), %r10 + movq U+3*N(%rsp), %r11 + + movl $1, %eax + movl $0xffffffff, %ebx + leaq -2(%rax), %rcx + leaq -1(%rbx), %rdx + notq %rbx + + addq %r8, %rax + adcq %r9, %rbx + adcq %r10, %rcx + adcq %r11, %rdx + + cmovncq %r8, %rax + cmovncq %r9, %rbx + cmovncq %r10, %rcx + cmovncq %r11, %rdx + + movq res, %rdi + movq %rax, (%rdi) + movq %rbx, N(%rdi) + movq %rcx, 2*N(%rdi) + movq %rdx, 3*N(%rdi) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/x86_att/p256/p256_montjscalarmul.S b/third_party/s2n-bignum/x86_att/p256/p256_montjscalarmul.S new file mode 100644 index 0000000000..4569646cd3 --- /dev/null +++ b/third_party/s2n-bignum/x86_att/p256/p256_montjscalarmul.S @@ -0,0 +1,3549 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery-Jacobian form scalar multiplication for P-256 +// Input scalar[4], point[12]; output res[12] +// +// extern void p256_montjscalarmul +// (uint64_t res[static 12], +// uint64_t scalar[static 4], +// uint64_t point[static 12]); +// +// This function is a variant of its affine point version p256_scalarmul. +// Here, input and output points are assumed to be in Jacobian form with +// their coordinates in the Montgomery domain. Thus, if priming indicates +// Montgomery form, x' = (2^256 * x) mod p_256 etc., each point argument +// is a triple (x',y',z') representing the affine point (x/z^2,y/z^3) when +// z' is nonzero or the point at infinity (group identity) if z' = 0. +// +// Given scalar = n and point = P, assumed to be on the NIST elliptic +// curve P-256, returns a representation of n * P. If the result is the +// point at infinity (either because the input point was or because the +// scalar was a multiple of p_256) then the output is guaranteed to +// represent the point at infinity, i.e. to have its z coordinate zero. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p256_montjscalarmul) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p256_montjscalarmul) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Intermediate variables on the stack. Uppercase syntactic variants +// make x86_att version simpler to generate. + +#define SCALARB (0*NUMSIZE) +#define scalarb (0*NUMSIZE)(%rsp) +#define ACC (1*NUMSIZE) +#define acc (1*NUMSIZE)(%rsp) +#define TABENT (4*NUMSIZE) +#define tabent (4*NUMSIZE)(%rsp) + +#define TAB (7*NUMSIZE) +#define tab (7*NUMSIZE)(%rsp) + +#define res (31*NUMSIZE)(%rsp) + +#define NSPACE (32*NUMSIZE) + +// Avoid using .rep for the sake of the BoringSSL/AWS-LC delocator, +// which doesn't accept repetitions, assembler macros etc. + +#define selectblock(I) \ + cmpq $I, %rdi ; \ + cmovzq TAB+96*(I-1)(%rsp), %rax ; \ + cmovzq TAB+96*(I-1)+8(%rsp), %rbx ; \ + cmovzq TAB+96*(I-1)+16(%rsp), %rcx ; \ + cmovzq TAB+96*(I-1)+24(%rsp), %rdx ; \ + cmovzq TAB+96*(I-1)+32(%rsp), %r8 ; \ + cmovzq TAB+96*(I-1)+40(%rsp), %r9 ; \ + cmovzq TAB+96*(I-1)+48(%rsp), %r10 ; \ + cmovzq TAB+96*(I-1)+56(%rsp), %r11 ; \ + cmovzq TAB+96*(I-1)+64(%rsp), %r12 ; \ + cmovzq TAB+96*(I-1)+72(%rsp), %r13 ; \ + cmovzq TAB+96*(I-1)+80(%rsp), %r14 ; \ + cmovzq TAB+96*(I-1)+88(%rsp), %r15 + +S2N_BN_SYMBOL(p256_montjscalarmul): + +// The Windows version literally calls the standard ABI version. +// This simplifies the proofs since subroutine offsets are fixed. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx + callq p256_montjscalarmul_standard + popq %rsi + popq %rdi + ret + +p256_montjscalarmul_standard: +#endif + +// Real start of the standard ABI code. + + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbp + pushq %rbx + + subq $NSPACE, %rsp + +// Preserve the "res" and "point" input arguments. We load and process the +// scalar immediately so we don't bother preserving that input argument. +// Also, "point" is only needed early on and so its register gets re-used. + + movq %rdx, %rbx + movq %rdi, res + +// Load the digits of group order n_256 = [%r15;%r14;%r13;%r12] + + movq $0xf3b9cac2fc632551, %r12 + movq $0xbce6faada7179e84, %r13 + movq $0xffffffffffffffff, %r14 + movq $0xffffffff00000000, %r15 + +// First, reduce the input scalar mod n_256, i.e. conditionally subtract n_256 + + movq (%rsi), %r8 + subq %r12, %r8 + movq 8(%rsi), %r9 + sbbq %r13, %r9 + movq 16(%rsi), %r10 + sbbq %r14, %r10 + movq 24(%rsi), %r11 + sbbq %r15, %r11 + + cmovcq (%rsi), %r8 + cmovcq 8(%rsi), %r9 + cmovcq 16(%rsi), %r10 + cmovcq 24(%rsi), %r11 + +// Now if the top bit of the reduced scalar is set, negate it mod n_256, +// i.e. do n |-> n_256 - n. Remember the sign in %rbp so we can +// correspondingly negate the point below. + + subq %r8, %r12 + sbbq %r9, %r13 + sbbq %r10, %r14 + sbbq %r11, %r15 + + movq %r11, %rbp + shrq $63, %rbp + cmovnzq %r12, %r8 + cmovnzq %r13, %r9 + cmovnzq %r14, %r10 + cmovnzq %r15, %r11 + +// In either case then add the recoding constant 0x08888...888 to allow +// signed digits. + + movq $0x8888888888888888, %rax + addq %rax, %r8 + adcq %rax, %r9 + adcq %rax, %r10 + adcq %rax, %r11 + btc $63, %r11 + + movq %r8, SCALARB(%rsp) + movq %r9, SCALARB+8(%rsp) + movq %r10, SCALARB+16(%rsp) + movq %r11, SCALARB+24(%rsp) + +// Set the tab[0] table entry to the input point = 1 * P, except +// that we negate it if the top bit of the scalar was set. This +// negation takes care over the y = 0 case to maintain all the +// coordinates < p_256 throughout, even though triples (x,y,z) +// with y = 0 can only represent a point on the curve when z = 0 +// and it represents the point at infinity regardless of x and y. + + movq (%rbx), %rax + movq %rax, TAB(%rsp) + movq 8(%rbx), %rax + movq %rax, TAB+8(%rsp) + movq 16(%rbx), %rax + movq %rax, TAB+16(%rsp) + movq 24(%rbx), %rax + movq %rax, TAB+24(%rsp) + + movq 32(%rbx), %r12 + movq %r12, %rax + movq 40(%rbx), %r13 + orq %r13, %rax + movq 48(%rbx), %r14 + movq %r14, %rcx + movq 56(%rbx), %r15 + orq %r15, %rcx + orq %rcx, %rax + cmovzq %rax, %rbp + + xorl %r10d, %r10d + leaq -1(%r10), %r8 + movq $0x00000000ffffffff, %r11 + movq %r11, %r9 + negq %r11 + subq %r12, %r8 + sbbq %r13, %r9 + sbbq %r14, %r10 + sbbq %r15, %r11 + testq %rbp, %rbp + cmovzq %r12, %r8 + cmovzq %r13, %r9 + cmovzq %r14, %r10 + cmovzq %r15, %r11 + movq %r8, TAB+32(%rsp) + movq %r9, TAB+40(%rsp) + movq %r10, TAB+48(%rsp) + movq %r11, TAB+56(%rsp) + + movq 64(%rbx), %rax + movq %rax, TAB+64(%rsp) + movq 72(%rbx), %rax + movq %rax, TAB+72(%rsp) + movq 80(%rbx), %rax + movq %rax, TAB+80(%rsp) + movq 88(%rbx), %rax + movq %rax, TAB+88(%rsp) + +// Compute and record tab[1] = 2 * p, ..., tab[7] = 8 * P + + leaq TAB+96*1(%rsp), %rdi + leaq TAB(%rsp), %rsi + callq p256_montjscalarmul_p256_montjdouble + + leaq TAB+96*2(%rsp), %rdi + leaq TAB+96*1(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p256_montjscalarmul_p256_montjadd + + leaq TAB+96*3(%rsp), %rdi + leaq TAB+96*1(%rsp), %rsi + callq p256_montjscalarmul_p256_montjdouble + + leaq TAB+96*4(%rsp), %rdi + leaq TAB+96*3(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p256_montjscalarmul_p256_montjadd + + leaq TAB+96*5(%rsp), %rdi + leaq TAB+96*2(%rsp), %rsi + callq p256_montjscalarmul_p256_montjdouble + + leaq TAB+96*6(%rsp), %rdi + leaq TAB+96*5(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p256_montjscalarmul_p256_montjadd + + leaq TAB+96*7(%rsp), %rdi + leaq TAB+96*3(%rsp), %rsi + callq p256_montjscalarmul_p256_montjdouble + +// Set up accumulator as table entry for top 4 bits (constant-time indexing) + + movq SCALARB+24(%rsp), %rdi + shrq $60, %rdi + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + + movq %rax, ACC(%rsp) + movq %rbx, ACC+8(%rsp) + movq %rcx, ACC+16(%rsp) + movq %rdx, ACC+24(%rsp) + movq %r8, ACC+32(%rsp) + movq %r9, ACC+40(%rsp) + movq %r10, ACC+48(%rsp) + movq %r11, ACC+56(%rsp) + movq %r12, ACC+64(%rsp) + movq %r13, ACC+72(%rsp) + movq %r14, ACC+80(%rsp) + movq %r15, ACC+88(%rsp) + +// Main loop over size-4 bitfield + + movl $252, %ebp + +p256_montjscalarmul_mainloop: + subq $4, %rbp + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_p256_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_p256_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_p256_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_p256_montjdouble + + movq %rbp, %rax + shrq $6, %rax + movq (%rsp,%rax,8), %rdi + movq %rbp, %rcx + shrq %cl, %rdi + andq $15, %rdi + + subq $8, %rdi + sbbq %rsi, %rsi // %rsi = sign of digit (-1 = negative) + xorq %rsi, %rdi + subq %rsi, %rdi // %rdi = absolute value of digit + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + +// Store it to "tabent" with the y coordinate optionally negated +// Again, do it carefully to give coordinates < p_256 even in +// the degenerate case y = 0 (when z = 0 for points on the curve). + + movq %rax, TABENT(%rsp) + movq %rbx, TABENT+8(%rsp) + movq %rcx, TABENT+16(%rsp) + movq %rdx, TABENT+24(%rsp) + + movq %r12, TABENT+64(%rsp) + movq %r13, TABENT+72(%rsp) + movq %r14, TABENT+80(%rsp) + movq %r15, TABENT+88(%rsp) + + movq %r8, %rax + xorl %r14d, %r14d + orq %r9, %rax + leaq -1(%r14), %r12 + movq %r10, %rcx + movq $0x00000000ffffffff, %r15 + orq %r11, %rcx + movq %r15, %r13 + negq %r15 + orq %rcx, %rax + cmovzq %rax, %rsi + + subq %r8, %r12 + sbbq %r9, %r13 + sbbq %r10, %r14 + sbbq %r11, %r15 + + testq %rsi, %rsi + cmovnzq %r12, %r8 + cmovnzq %r13, %r9 + cmovnzq %r14, %r10 + cmovnzq %r15, %r11 + + movq %r8, TABENT+32(%rsp) + movq %r9, TABENT+40(%rsp) + movq %r10, TABENT+48(%rsp) + movq %r11, TABENT+56(%rsp) + + leaq TABENT(%rsp), %rdx + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_p256_montjadd + + testq %rbp, %rbp + jne p256_montjscalarmul_mainloop + +// That's the end of the main loop, and we just need to copy the +// result in "acc" to the output. + + movq res, %rdi + movq ACC(%rsp), %rax + movq %rax, (%rdi) + movq ACC+8(%rsp), %rax + movq %rax, 8(%rdi) + movq ACC+16(%rsp), %rax + movq %rax, 16(%rdi) + movq ACC+24(%rsp), %rax + movq %rax, 24(%rdi) + + movq ACC+32(%rsp), %rax + movq %rax, 32(%rdi) + movq ACC+40(%rsp), %rax + movq %rax, 40(%rdi) + movq ACC+48(%rsp), %rax + movq %rax, 48(%rdi) + movq ACC+56(%rsp), %rax + movq %rax, 56(%rdi) + + movq ACC+64(%rsp), %rax + movq %rax, 64(%rdi) + movq ACC+72(%rsp), %rax + movq %rax, 72(%rdi) + movq ACC+80(%rsp), %rax + movq %rax, 80(%rdi) + movq ACC+88(%rsp), %rax + movq %rax, 88(%rdi) + +// Restore stack and registers and return + + addq $NSPACE, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + popq %r15 + ret + +// Local copies of subroutines, complete clones at the moment + +p256_montjscalarmul_p256_montjadd: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0xe0, %rsp + movq %rdx, %rbp + movq 0x40(%rsi), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0x48(%rsi), %r9, %r10 + mulxq 0x58(%rsi), %r11, %r12 + movq 0x50(%rsi), %rdx + mulxq 0x58(%rsi), %r13, %r14 + xorl %ecx, %ecx + mulxq 0x40(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x58(%rsi), %rdx + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rcx, %r13 + adoxq %rcx, %r14 + adcq %rcx, %r14 + xorl %ecx, %ecx + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0x48(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x50(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x58(%rsi), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rcx, %r15 + adoxq %rcx, %r15 + xorl %ecx, %ecx + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rcx, %r13 + movl %ecx, %r9d + adoxq %rcx, %r9 + adcxq %rcx, %r9 + addq %r9, %r14 + adcq %rcx, %r15 + movl %ecx, %r8d + adcq %rcx, %r8 + xorl %ecx, %ecx + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rcx, %r15 + adoxq %rcx, %r8 + adcq %rcx, %r8 + movl $0x1, %r8d + leaq -0x1(%rdx), %rdx + leaq -0x1(%rcx), %rax + movl $0xfffffffe, %r11d + cmoveq %rcx, %r8 + cmoveq %rcx, %rdx + cmoveq %rcx, %rax + cmoveq %rcx, %r11 + addq %r8, %r12 + adcq %rdx, %r13 + adcq %rax, %r14 + adcq %r11, %r15 + movq %r12, (%rsp) + movq %r13, 0x8(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + movq 0x40(%rbp), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0x48(%rbp), %r9, %r10 + mulxq 0x58(%rbp), %r11, %r12 + movq 0x50(%rbp), %rdx + mulxq 0x58(%rbp), %r13, %r14 + xorl %ecx, %ecx + mulxq 0x40(%rbp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x48(%rbp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x58(%rbp), %rdx + mulxq 0x48(%rbp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rcx, %r13 + adoxq %rcx, %r14 + adcq %rcx, %r14 + xorl %ecx, %ecx + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0x48(%rbp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x50(%rbp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x58(%rbp), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rcx, %r15 + adoxq %rcx, %r15 + xorl %ecx, %ecx + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rcx, %r13 + movl %ecx, %r9d + adoxq %rcx, %r9 + adcxq %rcx, %r9 + addq %r9, %r14 + adcq %rcx, %r15 + movl %ecx, %r8d + adcq %rcx, %r8 + xorl %ecx, %ecx + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rcx, %r15 + adoxq %rcx, %r8 + adcq %rcx, %r8 + movl $0x1, %r8d + leaq -0x1(%rdx), %rdx + leaq -0x1(%rcx), %rax + movl $0xfffffffe, %r11d + cmoveq %rcx, %r8 + cmoveq %rcx, %rdx + cmoveq %rcx, %rax + cmoveq %rcx, %r11 + addq %r8, %r12 + adcq %rdx, %r13 + adcq %rax, %r14 + adcq %r11, %r15 + movq %r12, 0xa0(%rsp) + movq %r13, 0xa8(%rsp) + movq %r14, 0xb0(%rsp) + movq %r15, 0xb8(%rsp) + xorl %r13d, %r13d + movq 0x20(%rsi), %rdx + mulxq 0x40(%rbp), %r8, %r9 + mulxq 0x48(%rbp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x50(%rbp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x58(%rbp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x28(%rsi), %rdx + xorl %r14d, %r14d + mulxq 0x40(%rbp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x48(%rbp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x50(%rbp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x58(%rbp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x30(%rsi), %rdx + xorl %r8d, %r8d + mulxq 0x40(%rbp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x48(%rbp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x50(%rbp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x58(%rbp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x38(%rsi), %rdx + xorl %r9d, %r9d + mulxq 0x40(%rbp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x48(%rbp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x50(%rbp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x58(%rbp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xc0(%rsp) + movq %r13, 0xc8(%rsp) + movq %r14, 0xd0(%rsp) + movq %r15, 0xd8(%rsp) + xorl %r13d, %r13d + movq 0x20(%rbp), %rdx + mulxq 0x40(%rsi), %r8, %r9 + mulxq 0x48(%rsi), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x50(%rsi), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x58(%rsi), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x28(%rbp), %rdx + xorl %r14d, %r14d + mulxq 0x40(%rsi), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x50(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x58(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x30(%rbp), %rdx + xorl %r8d, %r8d + mulxq 0x40(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x50(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x58(%rsi), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x38(%rbp), %rdx + xorl %r9d, %r9d + mulxq 0x40(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x50(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x58(%rsi), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x20(%rsp) + movq %r13, 0x28(%rsp) + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + xorl %r13d, %r13d + movq 0x0(%rbp), %rdx + mulxq (%rsp), %r8, %r9 + mulxq 0x8(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x10(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x18(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x8(%rbp), %rdx + xorl %r14d, %r14d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x10(%rbp), %rdx + xorl %r8d, %r8d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x18(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x18(%rbp), %rdx + xorl %r9d, %r9d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x18(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x40(%rsp) + movq %r13, 0x48(%rsp) + movq %r14, 0x50(%rsp) + movq %r15, 0x58(%rsp) + xorl %r13d, %r13d + movq (%rsi), %rdx + mulxq 0xa0(%rsp), %r8, %r9 + mulxq 0xa8(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0xb0(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0xb8(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x8(%rsi), %rdx + xorl %r14d, %r14d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x10(%rsi), %rdx + xorl %r8d, %r8d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x18(%rsi), %rdx + xorl %r9d, %r9d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x80(%rsp) + movq %r13, 0x88(%rsp) + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + xorl %r13d, %r13d + movq 0x20(%rsp), %rdx + mulxq (%rsp), %r8, %r9 + mulxq 0x8(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x10(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x18(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x28(%rsp), %rdx + xorl %r14d, %r14d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x30(%rsp), %rdx + xorl %r8d, %r8d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x18(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x38(%rsp), %rdx + xorl %r9d, %r9d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x18(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x20(%rsp) + movq %r13, 0x28(%rsp) + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + xorl %r13d, %r13d + movq 0xc0(%rsp), %rdx + mulxq 0xa0(%rsp), %r8, %r9 + mulxq 0xa8(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0xb0(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0xb8(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0xc8(%rsp), %rdx + xorl %r14d, %r14d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0xd0(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0xd8(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xc0(%rsp) + movq %r13, 0xc8(%rsp) + movq %r14, 0xd0(%rsp) + movq %r15, 0xd8(%rsp) + movq 0x40(%rsp), %rax + subq 0x80(%rsp), %rax + movq 0x48(%rsp), %rcx + sbbq 0x88(%rsp), %rcx + movq 0x50(%rsp), %r8 + sbbq 0x90(%rsp), %r8 + movq 0x58(%rsp), %r9 + sbbq 0x98(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0xa0(%rsp) + adcq %r10, %rcx + movq %rcx, 0xa8(%rsp) + adcq $0x0, %r8 + movq %r8, 0xb0(%rsp) + adcq %rdx, %r9 + movq %r9, 0xb8(%rsp) + movq 0x20(%rsp), %rax + subq 0xc0(%rsp), %rax + movq 0x28(%rsp), %rcx + sbbq 0xc8(%rsp), %rcx + movq 0x30(%rsp), %r8 + sbbq 0xd0(%rsp), %r8 + movq 0x38(%rsp), %r9 + sbbq 0xd8(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x20(%rsp) + adcq %r10, %rcx + movq %rcx, 0x28(%rsp) + adcq $0x0, %r8 + movq %r8, 0x30(%rsp) + adcq %rdx, %r9 + movq %r9, 0x38(%rsp) + movq 0xa0(%rsp), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0xa8(%rsp), %r9, %r10 + mulxq 0xb8(%rsp), %r11, %r12 + movq 0xb0(%rsp), %rdx + mulxq 0xb8(%rsp), %r13, %r14 + xorl %ecx, %ecx + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0xb8(%rsp), %rdx + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rcx, %r13 + adoxq %rcx, %r14 + adcq %rcx, %r14 + xorl %ecx, %ecx + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0xa8(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0xb0(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0xb8(%rsp), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rcx, %r15 + adoxq %rcx, %r15 + xorl %ecx, %ecx + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rcx, %r13 + movl %ecx, %r9d + adoxq %rcx, %r9 + adcxq %rcx, %r9 + addq %r9, %r14 + adcq %rcx, %r15 + movl %ecx, %r8d + adcq %rcx, %r8 + xorl %ecx, %ecx + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rcx, %r15 + adoxq %rcx, %r8 + adcq %rcx, %r8 + movl $0x1, %r8d + leaq -0x1(%rdx), %rdx + leaq -0x1(%rcx), %rax + movl $0xfffffffe, %r11d + cmoveq %rcx, %r8 + cmoveq %rcx, %rdx + cmoveq %rcx, %rax + cmoveq %rcx, %r11 + addq %r8, %r12 + adcq %rdx, %r13 + adcq %rax, %r14 + adcq %r11, %r15 + movq %r12, 0x60(%rsp) + movq %r13, 0x68(%rsp) + movq %r14, 0x70(%rsp) + movq %r15, 0x78(%rsp) + movq 0x20(%rsp), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0x28(%rsp), %r9, %r10 + mulxq 0x38(%rsp), %r11, %r12 + movq 0x30(%rsp), %rdx + mulxq 0x38(%rsp), %r13, %r14 + xorl %ecx, %ecx + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x28(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x38(%rsp), %rdx + mulxq 0x28(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rcx, %r13 + adoxq %rcx, %r14 + adcq %rcx, %r14 + xorl %ecx, %ecx + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0x28(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x30(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x38(%rsp), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rcx, %r15 + adoxq %rcx, %r15 + xorl %ecx, %ecx + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rcx, %r13 + movl %ecx, %r9d + adoxq %rcx, %r9 + adcxq %rcx, %r9 + addq %r9, %r14 + adcq %rcx, %r15 + movl %ecx, %r8d + adcq %rcx, %r8 + xorl %ecx, %ecx + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rcx, %r15 + adoxq %rcx, %r8 + adcq %rcx, %r8 + movl $0x1, %ebx + addq %r12, %rbx + leaq -0x1(%rdx), %rdx + adcq %r13, %rdx + leaq -0x1(%rcx), %rcx + movq %rcx, %rax + adcq %r14, %rcx + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rbx, %r12 + cmovbq %rdx, %r13 + cmovbq %rcx, %r14 + cmovbq %r11, %r15 + movq %r12, (%rsp) + movq %r13, 0x8(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + xorl %r13d, %r13d + movq 0x80(%rsp), %rdx + mulxq 0x60(%rsp), %r8, %r9 + mulxq 0x68(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x70(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x78(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x88(%rsp), %rdx + xorl %r14d, %r14d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x78(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x90(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x78(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x98(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x78(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x80(%rsp) + movq %r13, 0x88(%rsp) + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + xorl %r13d, %r13d + movq 0x40(%rsp), %rdx + mulxq 0x60(%rsp), %r8, %r9 + mulxq 0x68(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x70(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x78(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x48(%rsp), %rdx + xorl %r14d, %r14d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x78(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x50(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x78(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x58(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x78(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x40(%rsp) + movq %r13, 0x48(%rsp) + movq %r14, 0x50(%rsp) + movq %r15, 0x58(%rsp) + movq (%rsp), %rax + subq 0x80(%rsp), %rax + movq 0x8(%rsp), %rcx + sbbq 0x88(%rsp), %rcx + movq 0x10(%rsp), %r8 + sbbq 0x90(%rsp), %r8 + movq 0x18(%rsp), %r9 + sbbq 0x98(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, (%rsp) + adcq %r10, %rcx + movq %rcx, 0x8(%rsp) + adcq $0x0, %r8 + movq %r8, 0x10(%rsp) + adcq %rdx, %r9 + movq %r9, 0x18(%rsp) + movq 0x40(%rsp), %rax + subq 0x80(%rsp), %rax + movq 0x48(%rsp), %rcx + sbbq 0x88(%rsp), %rcx + movq 0x50(%rsp), %r8 + sbbq 0x90(%rsp), %r8 + movq 0x58(%rsp), %r9 + sbbq 0x98(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x60(%rsp) + adcq %r10, %rcx + movq %rcx, 0x68(%rsp) + adcq $0x0, %r8 + movq %r8, 0x70(%rsp) + adcq %rdx, %r9 + movq %r9, 0x78(%rsp) + xorl %r13d, %r13d + movq 0x40(%rsi), %rdx + mulxq 0xa0(%rsp), %r8, %r9 + mulxq 0xa8(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0xb0(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0xb8(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x48(%rsi), %rdx + xorl %r14d, %r14d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x50(%rsi), %rdx + xorl %r8d, %r8d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x58(%rsi), %rdx + xorl %r9d, %r9d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xa0(%rsp) + movq %r13, 0xa8(%rsp) + movq %r14, 0xb0(%rsp) + movq %r15, 0xb8(%rsp) + movq (%rsp), %rax + subq 0x40(%rsp), %rax + movq 0x8(%rsp), %rcx + sbbq 0x48(%rsp), %rcx + movq 0x10(%rsp), %r8 + sbbq 0x50(%rsp), %r8 + movq 0x18(%rsp), %r9 + sbbq 0x58(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, (%rsp) + adcq %r10, %rcx + movq %rcx, 0x8(%rsp) + adcq $0x0, %r8 + movq %r8, 0x10(%rsp) + adcq %rdx, %r9 + movq %r9, 0x18(%rsp) + movq 0x80(%rsp), %rax + subq (%rsp), %rax + movq 0x88(%rsp), %rcx + sbbq 0x8(%rsp), %rcx + movq 0x90(%rsp), %r8 + sbbq 0x10(%rsp), %r8 + movq 0x98(%rsp), %r9 + sbbq 0x18(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x80(%rsp) + adcq %r10, %rcx + movq %rcx, 0x88(%rsp) + adcq $0x0, %r8 + movq %r8, 0x90(%rsp) + adcq %rdx, %r9 + movq %r9, 0x98(%rsp) + xorl %r13d, %r13d + movq 0xc0(%rsp), %rdx + mulxq 0x60(%rsp), %r8, %r9 + mulxq 0x68(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x70(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x78(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0xc8(%rsp), %rdx + xorl %r14d, %r14d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x78(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0xd0(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x78(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0xd8(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x70(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x78(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x60(%rsp) + movq %r13, 0x68(%rsp) + movq %r14, 0x70(%rsp) + movq %r15, 0x78(%rsp) + xorl %r13d, %r13d + movq 0x40(%rbp), %rdx + mulxq 0xa0(%rsp), %r8, %r9 + mulxq 0xa8(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0xb0(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0xb8(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x48(%rbp), %rdx + xorl %r14d, %r14d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x50(%rbp), %rdx + xorl %r8d, %r8d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x58(%rbp), %rdx + xorl %r9d, %r9d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xa0(%rsp) + movq %r13, 0xa8(%rsp) + movq %r14, 0xb0(%rsp) + movq %r15, 0xb8(%rsp) + xorl %r13d, %r13d + movq 0x80(%rsp), %rdx + mulxq 0x20(%rsp), %r8, %r9 + mulxq 0x28(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x30(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x38(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x88(%rsp), %rdx + xorl %r14d, %r14d + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x28(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x38(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x90(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x28(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x38(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x98(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x28(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x38(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x80(%rsp) + movq %r13, 0x88(%rsp) + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq 0x80(%rsp), %rax + subq 0x60(%rsp), %rax + movq 0x88(%rsp), %rcx + sbbq 0x68(%rsp), %rcx + movq 0x90(%rsp), %r8 + sbbq 0x70(%rsp), %r8 + movq 0x98(%rsp), %r9 + sbbq 0x78(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x80(%rsp) + adcq %r10, %rcx + movq %rcx, 0x88(%rsp) + adcq $0x0, %r8 + movq %r8, 0x90(%rsp) + adcq %rdx, %r9 + movq %r9, 0x98(%rsp) + movq 0x40(%rsi), %r8 + movq 0x48(%rsi), %r9 + movq 0x50(%rsi), %r10 + movq 0x58(%rsi), %r11 + movq %r8, %rax + movq %r9, %rdx + orq %r10, %rax + orq %r11, %rdx + orq %rdx, %rax + negq %rax + sbbq %rax, %rax + movq 0x40(%rbp), %r12 + movq 0x48(%rbp), %r13 + movq 0x50(%rbp), %r14 + movq 0x58(%rbp), %r15 + movq %r12, %rbx + movq %r13, %rdx + orq %r14, %rbx + orq %r15, %rdx + orq %rdx, %rbx + negq %rbx + sbbq %rbx, %rbx + cmpq %rax, %rbx + cmovbq %r8, %r12 + cmovbq %r9, %r13 + cmovbq %r10, %r14 + cmovbq %r11, %r15 + cmoveq 0xa0(%rsp), %r12 + cmoveq 0xa8(%rsp), %r13 + cmoveq 0xb0(%rsp), %r14 + cmoveq 0xb8(%rsp), %r15 + movq (%rsp), %rax + cmovbq (%rsi), %rax + cmova 0x0(%rbp), %rax + movq 0x8(%rsp), %rbx + cmovbq 0x8(%rsi), %rbx + cmova 0x8(%rbp), %rbx + movq 0x10(%rsp), %rcx + cmovbq 0x10(%rsi), %rcx + cmova 0x10(%rbp), %rcx + movq 0x18(%rsp), %rdx + cmovbq 0x18(%rsi), %rdx + cmova 0x18(%rbp), %rdx + movq 0x80(%rsp), %r8 + cmovbq 0x20(%rsi), %r8 + cmova 0x20(%rbp), %r8 + movq 0x88(%rsp), %r9 + cmovbq 0x28(%rsi), %r9 + cmova 0x28(%rbp), %r9 + movq 0x90(%rsp), %r10 + cmovbq 0x30(%rsi), %r10 + cmova 0x30(%rbp), %r10 + movq 0x98(%rsp), %r11 + cmovbq 0x38(%rsi), %r11 + cmova 0x38(%rbp), %r11 + movq %rax, (%rdi) + movq %rbx, 0x8(%rdi) + movq %rcx, 0x10(%rdi) + movq %rdx, 0x18(%rdi) + movq %r8, 0x20(%rdi) + movq %r9, 0x28(%rdi) + movq %r10, 0x30(%rdi) + movq %r11, 0x38(%rdi) + movq %r12, 0x40(%rdi) + movq %r13, 0x48(%rdi) + movq %r14, 0x50(%rdi) + movq %r15, 0x58(%rdi) + addq $0xe0, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +p256_montjscalarmul_p256_montjdouble: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0xc0, %rsp + movq 0x40(%rsi), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0x48(%rsi), %r9, %r10 + mulxq 0x58(%rsi), %r11, %r12 + movq 0x50(%rsi), %rdx + mulxq 0x58(%rsi), %r13, %r14 + xorl %ebp, %ebp + mulxq 0x40(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x58(%rsi), %rdx + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + adoxq %rbp, %r14 + adcq %rbp, %r14 + xorl %ebp, %ebp + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0x48(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x50(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x58(%rsi), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rbp, %r15 + adoxq %rbp, %r15 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + movl %ebp, %r9d + adoxq %rbp, %r9 + adcxq %rbp, %r9 + addq %r9, %r14 + adcq %rbp, %r15 + movl %ebp, %r8d + adcq %rbp, %r8 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %r8 + adcq %rbp, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rdx), %rdx + adcq %r13, %rdx + leaq -0x1(%rbp), %rbp + movq %rbp, %rax + adcq %r14, %rbp + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %rbp, %r14 + cmovbq %r11, %r15 + movq %r12, (%rsp) + movq %r13, 0x8(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + movq 0x20(%rsi), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0x28(%rsi), %r9, %r10 + mulxq 0x38(%rsi), %r11, %r12 + movq 0x30(%rsi), %rdx + mulxq 0x38(%rsi), %r13, %r14 + xorl %ebp, %ebp + mulxq 0x20(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x28(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x38(%rsi), %rdx + mulxq 0x28(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + adoxq %rbp, %r14 + adcq %rbp, %r14 + xorl %ebp, %ebp + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0x28(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x30(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x38(%rsi), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rbp, %r15 + adoxq %rbp, %r15 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + movl %ebp, %r9d + adoxq %rbp, %r9 + adcxq %rbp, %r9 + addq %r9, %r14 + adcq %rbp, %r15 + movl %ebp, %r8d + adcq %rbp, %r8 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %r8 + adcq %rbp, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rdx), %rdx + adcq %r13, %rdx + leaq -0x1(%rbp), %rbp + movq %rbp, %rax + adcq %r14, %rbp + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %rbp, %r14 + cmovbq %r11, %r15 + movq %r12, 0x20(%rsp) + movq %r13, 0x28(%rsp) + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq (%rsi), %rax + subq (%rsp), %rax + movq 0x8(%rsi), %rcx + sbbq 0x8(%rsp), %rcx + movq 0x10(%rsi), %r8 + sbbq 0x10(%rsp), %r8 + movq 0x18(%rsi), %r9 + sbbq 0x18(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x60(%rsp) + adcq %r10, %rcx + movq %rcx, 0x68(%rsp) + adcq $0x0, %r8 + movq %r8, 0x70(%rsp) + adcq %rdx, %r9 + movq %r9, 0x78(%rsp) + movq (%rsi), %rax + addq (%rsp), %rax + movq 0x8(%rsi), %rcx + adcq 0x8(%rsp), %rcx + movq 0x10(%rsi), %r8 + adcq 0x10(%rsp), %r8 + movq 0x18(%rsi), %r9 + adcq 0x18(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + subq %r11, %rax + movq %rax, 0x40(%rsp) + sbbq %r10, %rcx + movq %rcx, 0x48(%rsp) + sbbq $0x0, %r8 + movq %r8, 0x50(%rsp) + sbbq %rdx, %r9 + movq %r9, 0x58(%rsp) + xorl %r13d, %r13d + movq 0x60(%rsp), %rdx + mulxq 0x40(%rsp), %r8, %r9 + mulxq 0x48(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x50(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x58(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x68(%rsp), %rdx + xorl %r14d, %r14d + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x58(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x70(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x58(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x78(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x58(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x60(%rsp) + movq %r13, 0x68(%rsp) + movq %r14, 0x70(%rsp) + movq %r15, 0x78(%rsp) + xorq %r11, %r11 + movq 0x20(%rsi), %rax + addq 0x40(%rsi), %rax + movq 0x28(%rsi), %rcx + adcq 0x48(%rsi), %rcx + movq 0x30(%rsi), %r8 + adcq 0x50(%rsi), %r8 + movq 0x38(%rsi), %r9 + adcq 0x58(%rsi), %r9 + adcq %r11, %r11 + subq $0xffffffffffffffff, %rax + movl $0xffffffff, %r10d + sbbq %r10, %rcx + sbbq $0x0, %r8 + movq $0xffffffff00000001, %rdx + sbbq %rdx, %r9 + sbbq $0x0, %r11 + andq %r11, %r10 + andq %r11, %rdx + addq %r11, %rax + movq %rax, 0x40(%rsp) + adcq %r10, %rcx + movq %rcx, 0x48(%rsp) + adcq $0x0, %r8 + movq %r8, 0x50(%rsp) + adcq %rdx, %r9 + movq %r9, 0x58(%rsp) + xorl %r13d, %r13d + movq 0x20(%rsp), %rdx + mulxq (%rsi), %r8, %r9 + mulxq 0x8(%rsi), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0x10(%rsi), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x18(%rsi), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x28(%rsp), %rdx + xorl %r14d, %r14d + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x18(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x30(%rsp), %rdx + xorl %r8d, %r8d + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0x18(%rsi), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x38(%rsp), %rdx + xorl %r9d, %r9d + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0x18(%rsi), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x80(%rsp) + movq %r13, 0x88(%rsp) + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq 0x60(%rsp), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0x68(%rsp), %r9, %r10 + mulxq 0x78(%rsp), %r11, %r12 + movq 0x70(%rsp), %rdx + mulxq 0x78(%rsp), %r13, %r14 + xorl %ebp, %ebp + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x78(%rsp), %rdx + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + adoxq %rbp, %r14 + adcq %rbp, %r14 + xorl %ebp, %ebp + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0x68(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x70(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x78(%rsp), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rbp, %r15 + adoxq %rbp, %r15 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + movl %ebp, %r9d + adoxq %rbp, %r9 + adcxq %rbp, %r9 + addq %r9, %r14 + adcq %rbp, %r15 + movl %ebp, %r8d + adcq %rbp, %r8 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %r8 + adcq %rbp, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rdx), %rdx + adcq %r13, %rdx + leaq -0x1(%rbp), %rbp + movq %rbp, %rax + adcq %r14, %rbp + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %rbp, %r14 + cmovbq %r11, %r15 + movq %r12, 0xa0(%rsp) + movq %r13, 0xa8(%rsp) + movq %r14, 0xb0(%rsp) + movq %r15, 0xb8(%rsp) + movq 0x40(%rsp), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0x48(%rsp), %r9, %r10 + mulxq 0x58(%rsp), %r11, %r12 + movq 0x50(%rsp), %rdx + mulxq 0x58(%rsp), %r13, %r14 + xorl %ebp, %ebp + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x58(%rsp), %rdx + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + adoxq %rbp, %r14 + adcq %rbp, %r14 + xorl %ebp, %ebp + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0x48(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x50(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x58(%rsp), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rbp, %r15 + adoxq %rbp, %r15 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + movl %ebp, %r9d + adoxq %rbp, %r9 + adcxq %rbp, %r9 + addq %r9, %r14 + adcq %rbp, %r15 + movl %ebp, %r8d + adcq %rbp, %r8 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %r8 + adcq %rbp, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rdx), %rdx + adcq %r13, %rdx + leaq -0x1(%rbp), %rbp + movq %rbp, %rax + adcq %r14, %rbp + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %rbp, %r14 + cmovbq %r11, %r15 + movq %r12, 0x40(%rsp) + movq %r13, 0x48(%rsp) + movq %r14, 0x50(%rsp) + movq %r15, 0x58(%rsp) + movq $0xffffffffffffffff, %r8 + xorl %r10d, %r10d + subq 0xa0(%rsp), %r8 + movq $0xffffffff, %r9 + sbbq 0xa8(%rsp), %r9 + sbbq 0xb0(%rsp), %r10 + movq $0xffffffff00000001, %r11 + sbbq 0xb8(%rsp), %r11 + xorl %r12d, %r12d + movq $0x9, %rdx + mulxq %r8, %r8, %rax + mulxq %r9, %r9, %rcx + addq %rax, %r9 + mulxq %r10, %r10, %rax + adcq %rcx, %r10 + mulxq %r11, %r11, %rcx + adcq %rax, %r11 + adcq %rcx, %r12 + movq $0xc, %rdx + xorl %eax, %eax + mulxq 0x80(%rsp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + mulxq 0x88(%rsp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + mulxq 0x90(%rsp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + mulxq 0x98(%rsp), %rax, %rdx + adcxq %rax, %r11 + adoxq %r12, %rdx + adcq $0x1, %rdx + addq %rdx, %r8 + movq $0x100000000, %rax + mulxq %rax, %rax, %rcx + sbbq $0x0, %rax + sbbq $0x0, %rcx + subq %rax, %r9 + sbbq %rcx, %r10 + movq $0xffffffff00000001, %rax + mulxq %rax, %rax, %rcx + sbbq %rax, %r11 + sbbq %rcx, %rdx + decq %rdx + movl $0xffffffff, %eax + andq %rdx, %rax + xorl %ecx, %ecx + subq %rax, %rcx + addq %rdx, %r8 + movq %r8, 0xa0(%rsp) + adcq %rax, %r9 + movq %r9, 0xa8(%rsp) + adcq $0x0, %r10 + movq %r10, 0xb0(%rsp) + adcq %rcx, %r11 + movq %r11, 0xb8(%rsp) + movq 0x40(%rsp), %rax + subq (%rsp), %rax + movq 0x48(%rsp), %rcx + sbbq 0x8(%rsp), %rcx + movq 0x50(%rsp), %r8 + sbbq 0x10(%rsp), %r8 + movq 0x58(%rsp), %r9 + sbbq 0x18(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x40(%rsp) + adcq %r10, %rcx + movq %rcx, 0x48(%rsp) + adcq $0x0, %r8 + movq %r8, 0x50(%rsp) + adcq %rdx, %r9 + movq %r9, 0x58(%rsp) + movq 0x20(%rsp), %rdx + mulxq %rdx, %r8, %r15 + mulxq 0x28(%rsp), %r9, %r10 + mulxq 0x38(%rsp), %r11, %r12 + movq 0x30(%rsp), %rdx + mulxq 0x38(%rsp), %r13, %r14 + xorl %ebp, %ebp + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x28(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x38(%rsp), %rdx + mulxq 0x28(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + adoxq %rbp, %r14 + adcq %rbp, %r14 + xorl %ebp, %ebp + adcxq %r9, %r9 + adoxq %r15, %r9 + movq 0x28(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x30(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x38(%rsp), %rdx + mulxq %rdx, %rax, %r15 + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %rbp, %r15 + adoxq %rbp, %r15 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + movq $0xffffffff00000001, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %rbp, %r13 + movl %ebp, %r9d + adoxq %rbp, %r9 + adcxq %rbp, %r9 + addq %r9, %r14 + adcq %rbp, %r15 + movl %ebp, %r8d + adcq %rbp, %r8 + xorl %ebp, %ebp + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq $0xffffffff00000001, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %r8 + adcq %rbp, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rdx), %rdx + adcq %r13, %rdx + leaq -0x1(%rbp), %rbp + movq %rbp, %rax + adcq %r14, %rbp + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %rbp, %r14 + cmovbq %r11, %r15 + movq %r12, (%rsp) + movq %r13, 0x8(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + xorl %r13d, %r13d + movq 0x60(%rsp), %rdx + mulxq 0xa0(%rsp), %r8, %r9 + mulxq 0xa8(%rsp), %rbx, %r10 + adcq %rbx, %r9 + mulxq 0xb0(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0xb8(%rsp), %rbx, %r12 + adcq %rbx, %r11 + adcq %r13, %r12 + movq 0x68(%rsp), %rdx + xorl %r14d, %r14d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcq %r14, %r13 + xorl %r15d, %r15d + movq $0x100000000, %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq %r9, %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r8, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r9, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adcxq %r15, %r13 + adoxq %r15, %r14 + adcq %r15, %r14 + movq 0x70(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + adoxq %r8, %r14 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r13 + adcq %rbx, %r14 + adcq %r8, %r15 + movq 0x78(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r9, %r15 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r9, %r8 + xorl %r9d, %r9d + movq $0x100000000, %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq %r11, %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + notq %rdx + leaq 0x2(%rdx), %rdx + mulxq %r10, %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq %r11, %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %r9, %r15 + adoxq %r9, %r8 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rdx + adcq %r13, %rdx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rdx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x60(%rsp) + movq %r13, 0x68(%rsp) + movq %r14, 0x70(%rsp) + movq %r15, 0x78(%rsp) + movq 0x40(%rsp), %rax + subq 0x20(%rsp), %rax + movq 0x48(%rsp), %rcx + sbbq 0x28(%rsp), %rcx + movq 0x50(%rsp), %r8 + sbbq 0x30(%rsp), %r8 + movq 0x58(%rsp), %r9 + sbbq 0x38(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x40(%rdi) + adcq %r10, %rcx + movq %rcx, 0x48(%rdi) + adcq $0x0, %r8 + movq %r8, 0x50(%rdi) + adcq %rdx, %r9 + movq %r9, 0x58(%rdi) + movq 0x98(%rsp), %r11 + movq %r11, %rdx + movq 0x90(%rsp), %r10 + shldq $0x2, %r10, %r11 + movq 0x88(%rsp), %r9 + shldq $0x2, %r9, %r10 + movq 0x80(%rsp), %r8 + shldq $0x2, %r8, %r9 + shlq $0x2, %r8 + shrq $0x3e, %rdx + addq $0x1, %rdx + subq 0xa0(%rsp), %r8 + sbbq 0xa8(%rsp), %r9 + sbbq 0xb0(%rsp), %r10 + sbbq 0xb8(%rsp), %r11 + sbbq $0x0, %rdx + addq %rdx, %r8 + movq $0x100000000, %rax + mulxq %rax, %rax, %rcx + sbbq $0x0, %rax + sbbq $0x0, %rcx + subq %rax, %r9 + sbbq %rcx, %r10 + movq $0xffffffff00000001, %rax + mulxq %rax, %rax, %rcx + sbbq %rax, %r11 + sbbq %rcx, %rdx + decq %rdx + movl $0xffffffff, %eax + andq %rdx, %rax + xorl %ecx, %ecx + subq %rax, %rcx + addq %rdx, %r8 + movq %r8, (%rdi) + adcq %rax, %r9 + movq %r9, 0x8(%rdi) + adcq $0x0, %r10 + movq %r10, 0x10(%rdi) + adcq %rcx, %r11 + movq %r11, 0x18(%rdi) + movq $0xffffffffffffffff, %r8 + xorl %r10d, %r10d + subq (%rsp), %r8 + movq $0xffffffff, %r9 + sbbq 0x8(%rsp), %r9 + sbbq 0x10(%rsp), %r10 + movq $0xffffffff00000001, %r11 + sbbq 0x18(%rsp), %r11 + movq %r11, %r12 + shldq $0x3, %r10, %r11 + shldq $0x3, %r9, %r10 + shldq $0x3, %r8, %r9 + shlq $0x3, %r8 + shrq $0x3d, %r12 + movq $0x3, %rdx + xorl %eax, %eax + mulxq 0x60(%rsp), %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + mulxq 0x68(%rsp), %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + mulxq 0x70(%rsp), %rax, %rcx + adcxq %rax, %r10 + adoxq %rcx, %r11 + mulxq 0x78(%rsp), %rax, %rdx + adcxq %rax, %r11 + adoxq %r12, %rdx + adcq $0x1, %rdx + addq %rdx, %r8 + movq $0x100000000, %rax + mulxq %rax, %rax, %rcx + sbbq $0x0, %rax + sbbq $0x0, %rcx + subq %rax, %r9 + sbbq %rcx, %r10 + movq $0xffffffff00000001, %rax + mulxq %rax, %rax, %rcx + sbbq %rax, %r11 + sbbq %rcx, %rdx + decq %rdx + movl $0xffffffff, %eax + andq %rdx, %rax + xorl %ecx, %ecx + subq %rax, %rcx + addq %rdx, %r8 + movq %r8, 0x20(%rdi) + adcq %rax, %r9 + movq %r9, 0x28(%rdi) + adcq $0x0, %r10 + movq %r10, 0x30(%rdi) + adcq %rcx, %r11 + movq %r11, 0x38(%rdi) + addq $0xc0, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/x86_att/p256/p256_montjscalarmul_alt.S b/third_party/s2n-bignum/x86_att/p256/p256_montjscalarmul_alt.S new file mode 100644 index 0000000000..b68d857e76 --- /dev/null +++ b/third_party/s2n-bignum/x86_att/p256/p256_montjscalarmul_alt.S @@ -0,0 +1,4706 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery-Jacobian form scalar multiplication for P-256 +// Input scalar[4], point[12]; output res[12] +// +// extern void p256_montjscalarmul_alt +// (uint64_t res[static 12], +// uint64_t scalar[static 4], +// uint64_t point[static 12]); +// +// This function is a variant of its affine point version p256_scalarmul. +// Here, input and output points are assumed to be in Jacobian form with +// their coordinates in the Montgomery domain. Thus, if priming indicates +// Montgomery form, x' = (2^256 * x) mod p_256 etc., each point argument +// is a triple (x',y',z') representing the affine point (x/z^2,y/z^3) when +// z' is nonzero or the point at infinity (group identity) if z' = 0. +// +// Given scalar = n and point = P, assumed to be on the NIST elliptic +// curve P-256, returns a representation of n * P. If the result is the +// point at infinity (either because the input point was or because the +// scalar was a multiple of p_256) then the output is guaranteed to +// represent the point at infinity, i.e. to have its z coordinate zero. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p256_montjscalarmul_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p256_montjscalarmul_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 32 + +// Intermediate variables on the stack. Uppercase syntactic variants +// make x86_att version simpler to generate. + +#define SCALARB (0*NUMSIZE) +#define scalarb (0*NUMSIZE)(%rsp) +#define ACC (1*NUMSIZE) +#define acc (1*NUMSIZE)(%rsp) +#define TABENT (4*NUMSIZE) +#define tabent (4*NUMSIZE)(%rsp) + +#define TAB (7*NUMSIZE) +#define tab (7*NUMSIZE)(%rsp) + +#define res (31*NUMSIZE)(%rsp) + +#define NSPACE (32*NUMSIZE) + +// Avoid using .rep for the sake of the BoringSSL/AWS-LC delocator, +// which doesn't accept repetitions, assembler macros etc. + +#define selectblock(I) \ + cmpq $I, %rdi ; \ + cmovzq TAB+96*(I-1)(%rsp), %rax ; \ + cmovzq TAB+96*(I-1)+8(%rsp), %rbx ; \ + cmovzq TAB+96*(I-1)+16(%rsp), %rcx ; \ + cmovzq TAB+96*(I-1)+24(%rsp), %rdx ; \ + cmovzq TAB+96*(I-1)+32(%rsp), %r8 ; \ + cmovzq TAB+96*(I-1)+40(%rsp), %r9 ; \ + cmovzq TAB+96*(I-1)+48(%rsp), %r10 ; \ + cmovzq TAB+96*(I-1)+56(%rsp), %r11 ; \ + cmovzq TAB+96*(I-1)+64(%rsp), %r12 ; \ + cmovzq TAB+96*(I-1)+72(%rsp), %r13 ; \ + cmovzq TAB+96*(I-1)+80(%rsp), %r14 ; \ + cmovzq TAB+96*(I-1)+88(%rsp), %r15 + +S2N_BN_SYMBOL(p256_montjscalarmul_alt): + +// The Windows version literally calls the standard ABI version. +// This simplifies the proofs since subroutine offsets are fixed. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx + callq p256_montjscalarmul_alt_standard + popq %rsi + popq %rdi + ret + +p256_montjscalarmul_alt_standard: +#endif + +// Real start of the standard ABI code. + + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbp + pushq %rbx + + subq $NSPACE, %rsp + +// Preserve the "res" and "point" input arguments. We load and process the +// scalar immediately so we don't bother preserving that input argument. +// Also, "point" is only needed early on and so its register gets re-used. + + movq %rdx, %rbx + movq %rdi, res + +// Load the digits of group order n_256 = [%r15;%r14;%r13;%r12] + + movq $0xf3b9cac2fc632551, %r12 + movq $0xbce6faada7179e84, %r13 + movq $0xffffffffffffffff, %r14 + movq $0xffffffff00000000, %r15 + +// First, reduce the input scalar mod n_256, i.e. conditionally subtract n_256 + + movq (%rsi), %r8 + subq %r12, %r8 + movq 8(%rsi), %r9 + sbbq %r13, %r9 + movq 16(%rsi), %r10 + sbbq %r14, %r10 + movq 24(%rsi), %r11 + sbbq %r15, %r11 + + cmovcq (%rsi), %r8 + cmovcq 8(%rsi), %r9 + cmovcq 16(%rsi), %r10 + cmovcq 24(%rsi), %r11 + +// Now if the top bit of the reduced scalar is set, negate it mod n_256, +// i.e. do n |-> n_256 - n. Remember the sign in %rbp so we can +// correspondingly negate the point below. + + subq %r8, %r12 + sbbq %r9, %r13 + sbbq %r10, %r14 + sbbq %r11, %r15 + + movq %r11, %rbp + shrq $63, %rbp + cmovnzq %r12, %r8 + cmovnzq %r13, %r9 + cmovnzq %r14, %r10 + cmovnzq %r15, %r11 + +// In either case then add the recoding constant 0x08888...888 to allow +// signed digits. + + movq $0x8888888888888888, %rax + addq %rax, %r8 + adcq %rax, %r9 + adcq %rax, %r10 + adcq %rax, %r11 + btc $63, %r11 + + movq %r8, SCALARB(%rsp) + movq %r9, SCALARB+8(%rsp) + movq %r10, SCALARB+16(%rsp) + movq %r11, SCALARB+24(%rsp) + +// Set the tab[0] table entry to the input point = 1 * P, except +// that we negate it if the top bit of the scalar was set. This +// negation takes care over the y = 0 case to maintain all the +// coordinates < p_256 throughout, even though triples (x,y,z) +// with y = 0 can only represent a point on the curve when z = 0 +// and it represents the point at infinity regardless of x and y. + + movq (%rbx), %rax + movq %rax, TAB(%rsp) + movq 8(%rbx), %rax + movq %rax, TAB+8(%rsp) + movq 16(%rbx), %rax + movq %rax, TAB+16(%rsp) + movq 24(%rbx), %rax + movq %rax, TAB+24(%rsp) + + movq 32(%rbx), %r12 + movq %r12, %rax + movq 40(%rbx), %r13 + orq %r13, %rax + movq 48(%rbx), %r14 + movq %r14, %rcx + movq 56(%rbx), %r15 + orq %r15, %rcx + orq %rcx, %rax + cmovzq %rax, %rbp + + xorl %r10d, %r10d + leaq -1(%r10), %r8 + movq $0x00000000ffffffff, %r11 + movq %r11, %r9 + negq %r11 + subq %r12, %r8 + sbbq %r13, %r9 + sbbq %r14, %r10 + sbbq %r15, %r11 + testq %rbp, %rbp + cmovzq %r12, %r8 + cmovzq %r13, %r9 + cmovzq %r14, %r10 + cmovzq %r15, %r11 + movq %r8, TAB+32(%rsp) + movq %r9, TAB+40(%rsp) + movq %r10, TAB+48(%rsp) + movq %r11, TAB+56(%rsp) + + movq 64(%rbx), %rax + movq %rax, TAB+64(%rsp) + movq 72(%rbx), %rax + movq %rax, TAB+72(%rsp) + movq 80(%rbx), %rax + movq %rax, TAB+80(%rsp) + movq 88(%rbx), %rax + movq %rax, TAB+88(%rsp) + +// Compute and record tab[1] = 2 * p, ..., tab[7] = 8 * P + + leaq TAB+96*1(%rsp), %rdi + leaq TAB(%rsp), %rsi + callq p256_montjscalarmul_alt_p256_montjdouble + + leaq TAB+96*2(%rsp), %rdi + leaq TAB+96*1(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p256_montjscalarmul_alt_p256_montjadd + + leaq TAB+96*3(%rsp), %rdi + leaq TAB+96*1(%rsp), %rsi + callq p256_montjscalarmul_alt_p256_montjdouble + + leaq TAB+96*4(%rsp), %rdi + leaq TAB+96*3(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p256_montjscalarmul_alt_p256_montjadd + + leaq TAB+96*5(%rsp), %rdi + leaq TAB+96*2(%rsp), %rsi + callq p256_montjscalarmul_alt_p256_montjdouble + + leaq TAB+96*6(%rsp), %rdi + leaq TAB+96*5(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p256_montjscalarmul_alt_p256_montjadd + + leaq TAB+96*7(%rsp), %rdi + leaq TAB+96*3(%rsp), %rsi + callq p256_montjscalarmul_alt_p256_montjdouble + +// Set up accumulator as table entry for top 4 bits (constant-time indexing) + + movq SCALARB+24(%rsp), %rdi + shrq $60, %rdi + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + + movq %rax, ACC(%rsp) + movq %rbx, ACC+8(%rsp) + movq %rcx, ACC+16(%rsp) + movq %rdx, ACC+24(%rsp) + movq %r8, ACC+32(%rsp) + movq %r9, ACC+40(%rsp) + movq %r10, ACC+48(%rsp) + movq %r11, ACC+56(%rsp) + movq %r12, ACC+64(%rsp) + movq %r13, ACC+72(%rsp) + movq %r14, ACC+80(%rsp) + movq %r15, ACC+88(%rsp) + +// Main loop over size-4 bitfield + + movl $252, %ebp + +p256_montjscalarmul_alt_mainloop: + subq $4, %rbp + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_alt_p256_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_alt_p256_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_alt_p256_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_alt_p256_montjdouble + + movq %rbp, %rax + shrq $6, %rax + movq (%rsp,%rax,8), %rdi + movq %rbp, %rcx + shrq %cl, %rdi + andq $15, %rdi + + subq $8, %rdi + sbbq %rsi, %rsi // %rsi = sign of digit (-1 = negative) + xorq %rsi, %rdi + subq %rsi, %rdi // %rdi = absolute value of digit + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + selectblock(1) + selectblock(2) + selectblock(3) + selectblock(4) + selectblock(5) + selectblock(6) + selectblock(7) + selectblock(8) + +// Store it to "tabent" with the y coordinate optionally negated +// Again, do it carefully to give coordinates < p_256 even in +// the degenerate case y = 0 (when z = 0 for points on the curve). + + movq %rax, TABENT(%rsp) + movq %rbx, TABENT+8(%rsp) + movq %rcx, TABENT+16(%rsp) + movq %rdx, TABENT+24(%rsp) + + movq %r12, TABENT+64(%rsp) + movq %r13, TABENT+72(%rsp) + movq %r14, TABENT+80(%rsp) + movq %r15, TABENT+88(%rsp) + + movq %r8, %rax + xorl %r14d, %r14d + orq %r9, %rax + leaq -1(%r14), %r12 + movq %r10, %rcx + movq $0x00000000ffffffff, %r15 + orq %r11, %rcx + movq %r15, %r13 + negq %r15 + orq %rcx, %rax + cmovzq %rax, %rsi + + subq %r8, %r12 + sbbq %r9, %r13 + sbbq %r10, %r14 + sbbq %r11, %r15 + + testq %rsi, %rsi + cmovnzq %r12, %r8 + cmovnzq %r13, %r9 + cmovnzq %r14, %r10 + cmovnzq %r15, %r11 + + movq %r8, TABENT+32(%rsp) + movq %r9, TABENT+40(%rsp) + movq %r10, TABENT+48(%rsp) + movq %r11, TABENT+56(%rsp) + + leaq TABENT(%rsp), %rdx + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p256_montjscalarmul_alt_p256_montjadd + + testq %rbp, %rbp + jne p256_montjscalarmul_alt_mainloop + +// That's the end of the main loop, and we just need to copy the +// result in "acc" to the output. + + movq res, %rdi + movq ACC(%rsp), %rax + movq %rax, (%rdi) + movq ACC+8(%rsp), %rax + movq %rax, 8(%rdi) + movq ACC+16(%rsp), %rax + movq %rax, 16(%rdi) + movq ACC+24(%rsp), %rax + movq %rax, 24(%rdi) + + movq ACC+32(%rsp), %rax + movq %rax, 32(%rdi) + movq ACC+40(%rsp), %rax + movq %rax, 40(%rdi) + movq ACC+48(%rsp), %rax + movq %rax, 48(%rdi) + movq ACC+56(%rsp), %rax + movq %rax, 56(%rdi) + + movq ACC+64(%rsp), %rax + movq %rax, 64(%rdi) + movq ACC+72(%rsp), %rax + movq %rax, 72(%rdi) + movq ACC+80(%rsp), %rax + movq %rax, 80(%rdi) + movq ACC+88(%rsp), %rax + movq %rax, 88(%rdi) + +// Restore stack and registers and return + + addq $NSPACE, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + popq %r15 + ret + +// Local copies of subroutines, complete clones at the moment + +p256_montjscalarmul_alt_p256_montjadd: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0xe0, %rsp + movq %rdx, %rbp + movq 0x40(%rsi), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0x48(%rsi), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x58(%rsi), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x50(%rsi), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0x40(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0x48(%rsi), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0x58(%rsi), %rbx + movq 0x48(%rsi), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0x48(%rsi), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0x50(%rsi), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0x58(%rsi), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, (%rsp) + movq %r13, 0x8(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + movq 0x40(%rbp), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0x48(%rbp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x58(%rbp), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x50(%rbp), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0x40(%rbp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0x48(%rbp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0x58(%rbp), %rbx + movq 0x48(%rbp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0x48(%rbp), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0x50(%rbp), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0x58(%rbp), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xa0(%rsp) + movq %r13, 0xa8(%rsp) + movq %r14, 0xb0(%rsp) + movq %r15, 0xb8(%rsp) + movq 0x20(%rsi), %rbx + movq 0x40(%rbp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x48(%rbp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x50(%rbp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x58(%rbp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x28(%rsi), %rbx + xorl %r13d, %r13d + movq 0x40(%rbp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x48(%rbp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x50(%rbp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x58(%rbp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x30(%rsi), %rbx + xorl %r15d, %r15d + movq 0x40(%rbp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x48(%rbp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x50(%rbp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x58(%rbp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x38(%rsi), %rbx + xorl %r8d, %r8d + movq 0x40(%rbp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x48(%rbp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x50(%rbp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x58(%rbp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xc0(%rsp) + movq %r13, 0xc8(%rsp) + movq %r14, 0xd0(%rsp) + movq %r15, 0xd8(%rsp) + movq 0x20(%rbp), %rbx + movq 0x40(%rsi), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x48(%rsi), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x50(%rsi), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x58(%rsi), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x28(%rbp), %rbx + xorl %r13d, %r13d + movq 0x40(%rsi), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x48(%rsi), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x50(%rsi), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x58(%rsi), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x30(%rbp), %rbx + xorl %r15d, %r15d + movq 0x40(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x48(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x50(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x58(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x38(%rbp), %rbx + xorl %r8d, %r8d + movq 0x40(%rsi), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x48(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x50(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x58(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x20(%rsp) + movq %r13, 0x28(%rsp) + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq 0x0(%rbp), %rbx + movq (%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x18(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x8(%rbp), %rbx + xorl %r13d, %r13d + movq (%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x10(%rbp), %rbx + xorl %r15d, %r15d + movq (%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x18(%rbp), %rbx + xorl %r8d, %r8d + movq (%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x40(%rsp) + movq %r13, 0x48(%rsp) + movq %r14, 0x50(%rsp) + movq %r15, 0x58(%rsp) + movq (%rsi), %rbx + movq 0xa0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0xb0(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0xb8(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x8(%rsi), %rbx + xorl %r13d, %r13d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x10(%rsi), %rbx + xorl %r15d, %r15d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x18(%rsi), %rbx + xorl %r8d, %r8d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x80(%rsp) + movq %r13, 0x88(%rsp) + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq 0x20(%rsp), %rbx + movq (%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x18(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x28(%rsp), %rbx + xorl %r13d, %r13d + movq (%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x30(%rsp), %rbx + xorl %r15d, %r15d + movq (%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x38(%rsp), %rbx + xorl %r8d, %r8d + movq (%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x20(%rsp) + movq %r13, 0x28(%rsp) + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq 0xc0(%rsp), %rbx + movq 0xa0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0xb0(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0xb8(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0xc8(%rsp), %rbx + xorl %r13d, %r13d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0xd0(%rsp), %rbx + xorl %r15d, %r15d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0xd8(%rsp), %rbx + xorl %r8d, %r8d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xc0(%rsp) + movq %r13, 0xc8(%rsp) + movq %r14, 0xd0(%rsp) + movq %r15, 0xd8(%rsp) + movq 0x40(%rsp), %rax + subq 0x80(%rsp), %rax + movq 0x48(%rsp), %rcx + sbbq 0x88(%rsp), %rcx + movq 0x50(%rsp), %r8 + sbbq 0x90(%rsp), %r8 + movq 0x58(%rsp), %r9 + sbbq 0x98(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0xa0(%rsp) + adcq %r10, %rcx + movq %rcx, 0xa8(%rsp) + adcq $0x0, %r8 + movq %r8, 0xb0(%rsp) + adcq %rdx, %r9 + movq %r9, 0xb8(%rsp) + movq 0x20(%rsp), %rax + subq 0xc0(%rsp), %rax + movq 0x28(%rsp), %rcx + sbbq 0xc8(%rsp), %rcx + movq 0x30(%rsp), %r8 + sbbq 0xd0(%rsp), %r8 + movq 0x38(%rsp), %r9 + sbbq 0xd8(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x20(%rsp) + adcq %r10, %rcx + movq %rcx, 0x28(%rsp) + adcq $0x0, %r8 + movq %r8, 0x30(%rsp) + adcq %rdx, %r9 + movq %r9, 0x38(%rsp) + movq 0xa0(%rsp), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0xa8(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0xb8(%rsp), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0xb0(%rsp), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0xa8(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0xb8(%rsp), %rbx + movq 0xa8(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0xa8(%rsp), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0xb0(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0xb8(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x60(%rsp) + movq %r13, 0x68(%rsp) + movq %r14, 0x70(%rsp) + movq %r15, 0x78(%rsp) + movq 0x20(%rsp), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0x28(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x38(%rsp), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x30(%rsp), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0x20(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0x28(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0x38(%rsp), %rbx + movq 0x28(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0x28(%rsp), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0x30(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0x38(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, (%rsp) + movq %r13, 0x8(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + movq 0x80(%rsp), %rbx + movq 0x60(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x68(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x70(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x78(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x88(%rsp), %rbx + xorl %r13d, %r13d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x90(%rsp), %rbx + xorl %r15d, %r15d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x98(%rsp), %rbx + xorl %r8d, %r8d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x80(%rsp) + movq %r13, 0x88(%rsp) + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq 0x40(%rsp), %rbx + movq 0x60(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x68(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x70(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x78(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x48(%rsp), %rbx + xorl %r13d, %r13d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x50(%rsp), %rbx + xorl %r15d, %r15d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x58(%rsp), %rbx + xorl %r8d, %r8d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x40(%rsp) + movq %r13, 0x48(%rsp) + movq %r14, 0x50(%rsp) + movq %r15, 0x58(%rsp) + movq (%rsp), %rax + subq 0x80(%rsp), %rax + movq 0x8(%rsp), %rcx + sbbq 0x88(%rsp), %rcx + movq 0x10(%rsp), %r8 + sbbq 0x90(%rsp), %r8 + movq 0x18(%rsp), %r9 + sbbq 0x98(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, (%rsp) + adcq %r10, %rcx + movq %rcx, 0x8(%rsp) + adcq $0x0, %r8 + movq %r8, 0x10(%rsp) + adcq %rdx, %r9 + movq %r9, 0x18(%rsp) + movq 0x40(%rsp), %rax + subq 0x80(%rsp), %rax + movq 0x48(%rsp), %rcx + sbbq 0x88(%rsp), %rcx + movq 0x50(%rsp), %r8 + sbbq 0x90(%rsp), %r8 + movq 0x58(%rsp), %r9 + sbbq 0x98(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x60(%rsp) + adcq %r10, %rcx + movq %rcx, 0x68(%rsp) + adcq $0x0, %r8 + movq %r8, 0x70(%rsp) + adcq %rdx, %r9 + movq %r9, 0x78(%rsp) + movq 0x40(%rsi), %rbx + movq 0xa0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0xb0(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0xb8(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x48(%rsi), %rbx + xorl %r13d, %r13d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x50(%rsi), %rbx + xorl %r15d, %r15d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x58(%rsi), %rbx + xorl %r8d, %r8d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xa0(%rsp) + movq %r13, 0xa8(%rsp) + movq %r14, 0xb0(%rsp) + movq %r15, 0xb8(%rsp) + movq (%rsp), %rax + subq 0x40(%rsp), %rax + movq 0x8(%rsp), %rcx + sbbq 0x48(%rsp), %rcx + movq 0x10(%rsp), %r8 + sbbq 0x50(%rsp), %r8 + movq 0x18(%rsp), %r9 + sbbq 0x58(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, (%rsp) + adcq %r10, %rcx + movq %rcx, 0x8(%rsp) + adcq $0x0, %r8 + movq %r8, 0x10(%rsp) + adcq %rdx, %r9 + movq %r9, 0x18(%rsp) + movq 0x80(%rsp), %rax + subq (%rsp), %rax + movq 0x88(%rsp), %rcx + sbbq 0x8(%rsp), %rcx + movq 0x90(%rsp), %r8 + sbbq 0x10(%rsp), %r8 + movq 0x98(%rsp), %r9 + sbbq 0x18(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x80(%rsp) + adcq %r10, %rcx + movq %rcx, 0x88(%rsp) + adcq $0x0, %r8 + movq %r8, 0x90(%rsp) + adcq %rdx, %r9 + movq %r9, 0x98(%rsp) + movq 0xc0(%rsp), %rbx + movq 0x60(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x68(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x70(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x78(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0xc8(%rsp), %rbx + xorl %r13d, %r13d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0xd0(%rsp), %rbx + xorl %r15d, %r15d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0xd8(%rsp), %rbx + xorl %r8d, %r8d + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x68(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x70(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x78(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x60(%rsp) + movq %r13, 0x68(%rsp) + movq %r14, 0x70(%rsp) + movq %r15, 0x78(%rsp) + movq 0x40(%rbp), %rbx + movq 0xa0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0xb0(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0xb8(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x48(%rbp), %rbx + xorl %r13d, %r13d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x50(%rbp), %rbx + xorl %r15d, %r15d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x58(%rbp), %rbx + xorl %r8d, %r8d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xa0(%rsp) + movq %r13, 0xa8(%rsp) + movq %r14, 0xb0(%rsp) + movq %r15, 0xb8(%rsp) + movq 0x80(%rsp), %rbx + movq 0x20(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x28(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x30(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x38(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x88(%rsp), %rbx + xorl %r13d, %r13d + movq 0x20(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x30(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x38(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x90(%rsp), %rbx + xorl %r15d, %r15d + movq 0x20(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x30(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x38(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x98(%rsp), %rbx + xorl %r8d, %r8d + movq 0x20(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x30(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x38(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x80(%rsp) + movq %r13, 0x88(%rsp) + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq 0x80(%rsp), %rax + subq 0x60(%rsp), %rax + movq 0x88(%rsp), %rcx + sbbq 0x68(%rsp), %rcx + movq 0x90(%rsp), %r8 + sbbq 0x70(%rsp), %r8 + movq 0x98(%rsp), %r9 + sbbq 0x78(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x80(%rsp) + adcq %r10, %rcx + movq %rcx, 0x88(%rsp) + adcq $0x0, %r8 + movq %r8, 0x90(%rsp) + adcq %rdx, %r9 + movq %r9, 0x98(%rsp) + movq 0x40(%rsi), %r8 + movq 0x48(%rsi), %r9 + movq 0x50(%rsi), %r10 + movq 0x58(%rsi), %r11 + movq %r8, %rax + movq %r9, %rdx + orq %r10, %rax + orq %r11, %rdx + orq %rdx, %rax + negq %rax + sbbq %rax, %rax + movq 0x40(%rbp), %r12 + movq 0x48(%rbp), %r13 + movq 0x50(%rbp), %r14 + movq 0x58(%rbp), %r15 + movq %r12, %rbx + movq %r13, %rdx + orq %r14, %rbx + orq %r15, %rdx + orq %rdx, %rbx + negq %rbx + sbbq %rbx, %rbx + cmpq %rax, %rbx + cmovbq %r8, %r12 + cmovbq %r9, %r13 + cmovbq %r10, %r14 + cmovbq %r11, %r15 + cmoveq 0xa0(%rsp), %r12 + cmoveq 0xa8(%rsp), %r13 + cmoveq 0xb0(%rsp), %r14 + cmoveq 0xb8(%rsp), %r15 + movq (%rsp), %rax + cmovbq (%rsi), %rax + cmova 0x0(%rbp), %rax + movq 0x8(%rsp), %rbx + cmovbq 0x8(%rsi), %rbx + cmova 0x8(%rbp), %rbx + movq 0x10(%rsp), %rcx + cmovbq 0x10(%rsi), %rcx + cmova 0x10(%rbp), %rcx + movq 0x18(%rsp), %rdx + cmovbq 0x18(%rsi), %rdx + cmova 0x18(%rbp), %rdx + movq 0x80(%rsp), %r8 + cmovbq 0x20(%rsi), %r8 + cmova 0x20(%rbp), %r8 + movq 0x88(%rsp), %r9 + cmovbq 0x28(%rsi), %r9 + cmova 0x28(%rbp), %r9 + movq 0x90(%rsp), %r10 + cmovbq 0x30(%rsi), %r10 + cmova 0x30(%rbp), %r10 + movq 0x98(%rsp), %r11 + cmovbq 0x38(%rsi), %r11 + cmova 0x38(%rbp), %r11 + movq %rax, (%rdi) + movq %rbx, 0x8(%rdi) + movq %rcx, 0x10(%rdi) + movq %rdx, 0x18(%rdi) + movq %r8, 0x20(%rdi) + movq %r9, 0x28(%rdi) + movq %r10, 0x30(%rdi) + movq %r11, 0x38(%rdi) + movq %r12, 0x40(%rdi) + movq %r13, 0x48(%rdi) + movq %r14, 0x50(%rdi) + movq %r15, 0x58(%rdi) + addq $0xe0, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +p256_montjscalarmul_alt_p256_montjdouble: + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0xc0, %rsp + movq 0x40(%rsi), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0x48(%rsi), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x58(%rsi), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x50(%rsi), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0x40(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0x48(%rsi), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0x58(%rsi), %rbx + movq 0x48(%rsi), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0x48(%rsi), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0x50(%rsi), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0x58(%rsi), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, (%rsp) + movq %r13, 0x8(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + movq 0x20(%rsi), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0x28(%rsi), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x38(%rsi), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x30(%rsi), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0x20(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0x28(%rsi), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0x38(%rsi), %rbx + movq 0x28(%rsi), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0x28(%rsi), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0x30(%rsi), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0x38(%rsi), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x20(%rsp) + movq %r13, 0x28(%rsp) + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq (%rsi), %rax + subq (%rsp), %rax + movq 0x8(%rsi), %rcx + sbbq 0x8(%rsp), %rcx + movq 0x10(%rsi), %r8 + sbbq 0x10(%rsp), %r8 + movq 0x18(%rsi), %r9 + sbbq 0x18(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x60(%rsp) + adcq %r10, %rcx + movq %rcx, 0x68(%rsp) + adcq $0x0, %r8 + movq %r8, 0x70(%rsp) + adcq %rdx, %r9 + movq %r9, 0x78(%rsp) + movq (%rsi), %rax + addq (%rsp), %rax + movq 0x8(%rsi), %rcx + adcq 0x8(%rsp), %rcx + movq 0x10(%rsi), %r8 + adcq 0x10(%rsp), %r8 + movq 0x18(%rsi), %r9 + adcq 0x18(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + subq %r11, %rax + movq %rax, 0x40(%rsp) + sbbq %r10, %rcx + movq %rcx, 0x48(%rsp) + sbbq $0x0, %r8 + movq %r8, 0x50(%rsp) + sbbq %rdx, %r9 + movq %r9, 0x58(%rsp) + movq 0x60(%rsp), %rbx + movq 0x40(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x48(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x50(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x58(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x68(%rsp), %rbx + xorl %r13d, %r13d + movq 0x40(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x48(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x50(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x58(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x70(%rsp), %rbx + xorl %r15d, %r15d + movq 0x40(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x48(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x50(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x58(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x78(%rsp), %rbx + xorl %r8d, %r8d + movq 0x40(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x48(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x50(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x58(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x60(%rsp) + movq %r13, 0x68(%rsp) + movq %r14, 0x70(%rsp) + movq %r15, 0x78(%rsp) + xorq %r11, %r11 + movq 0x20(%rsi), %rax + addq 0x40(%rsi), %rax + movq 0x28(%rsi), %rcx + adcq 0x48(%rsi), %rcx + movq 0x30(%rsi), %r8 + adcq 0x50(%rsi), %r8 + movq 0x38(%rsi), %r9 + adcq 0x58(%rsi), %r9 + adcq %r11, %r11 + subq $0xffffffffffffffff, %rax + movl $0xffffffff, %r10d + sbbq %r10, %rcx + sbbq $0x0, %r8 + movabsq $0xffffffff00000001, %rdx + sbbq %rdx, %r9 + sbbq $0x0, %r11 + andq %r11, %r10 + andq %r11, %rdx + addq %r11, %rax + movq %rax, 0x40(%rsp) + adcq %r10, %rcx + movq %rcx, 0x48(%rsp) + adcq $0x0, %r8 + movq %r8, 0x50(%rsp) + adcq %rdx, %r9 + movq %r9, 0x58(%rsp) + movq 0x20(%rsp), %rbx + movq (%rsi), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x8(%rsi), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x10(%rsi), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x18(%rsi), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x28(%rsp), %rbx + xorl %r13d, %r13d + movq (%rsi), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0x8(%rsi), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0x10(%rsi), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0x18(%rsi), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x30(%rsp), %rbx + xorl %r15d, %r15d + movq (%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x8(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x10(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x18(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x38(%rsp), %rbx + xorl %r8d, %r8d + movq (%rsi), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x8(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x10(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x18(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x80(%rsp) + movq %r13, 0x88(%rsp) + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq 0x60(%rsp), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0x68(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x78(%rsp), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x70(%rsp), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0x68(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0x78(%rsp), %rbx + movq 0x68(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0x68(%rsp), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0x70(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0x78(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0xa0(%rsp) + movq %r13, 0xa8(%rsp) + movq %r14, 0xb0(%rsp) + movq %r15, 0xb8(%rsp) + movq 0x40(%rsp), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0x48(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x58(%rsp), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x50(%rsp), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0x40(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0x48(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0x58(%rsp), %rbx + movq 0x48(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0x48(%rsp), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0x50(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0x58(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x40(%rsp) + movq %r13, 0x48(%rsp) + movq %r14, 0x50(%rsp) + movq %r15, 0x58(%rsp) + movq $0xffffffffffffffff, %r9 + xorl %r11d, %r11d + subq 0xa0(%rsp), %r9 + movabsq $0xffffffff, %r10 + sbbq 0xa8(%rsp), %r10 + sbbq 0xb0(%rsp), %r11 + movabsq $0xffffffff00000001, %r12 + sbbq 0xb8(%rsp), %r12 + movq $0x9, %rcx + movq %r9, %rax + mulq %rcx + movq %rax, %r8 + movq %rdx, %r9 + movq %r10, %rax + xorl %r10d, %r10d + mulq %rcx + addq %rax, %r9 + adcq %rdx, %r10 + movq %r11, %rax + xorl %r11d, %r11d + mulq %rcx + addq %rax, %r10 + adcq %rdx, %r11 + movq %r12, %rax + xorl %r12d, %r12d + mulq %rcx + addq %rax, %r11 + adcq %rdx, %r12 + movl $0xc, %ecx + movq 0x80(%rsp), %rax + mulq %rcx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %rbx, %rbx + movq 0x88(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rbx, %rbx + movq 0x90(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbx, %rbx + movq 0x98(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + leaq 0x1(%r12), %rcx + movabsq $0xffffffff00000001, %rax + mulq %rcx + movq %rcx, %rbx + shlq $0x20, %rbx + addq %rcx, %r8 + sbbq $0x0, %rbx + subq %rbx, %r9 + sbbq $0x0, %r10 + sbbq %rax, %r11 + sbbq %rdx, %rcx + decq %rcx + movl $0xffffffff, %eax + andq %rcx, %rax + xorl %edx, %edx + subq %rax, %rdx + addq %rcx, %r8 + movq %r8, 0xa0(%rsp) + adcq %rax, %r9 + movq %r9, 0xa8(%rsp) + adcq $0x0, %r10 + movq %r10, 0xb0(%rsp) + adcq %rdx, %r11 + movq %r11, 0xb8(%rsp) + movq 0x40(%rsp), %rax + subq (%rsp), %rax + movq 0x48(%rsp), %rcx + sbbq 0x8(%rsp), %rcx + movq 0x50(%rsp), %r8 + sbbq 0x10(%rsp), %r8 + movq 0x58(%rsp), %r9 + sbbq 0x18(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x40(%rsp) + adcq %r10, %rcx + movq %rcx, 0x48(%rsp) + adcq $0x0, %r8 + movq %r8, 0x50(%rsp) + adcq %rdx, %r9 + movq %r9, 0x58(%rsp) + movq 0x20(%rsp), %rax + movq %rax, %rbx + mulq %rax + movq %rax, %r8 + movq %rdx, %r15 + movq 0x28(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x38(%rsp), %rax + movq %rax, %r13 + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x30(%rsp), %rax + movq %rax, %rbx + mulq %r13 + movq %rax, %r13 + movq %rdx, %r14 + movq 0x20(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + movq 0x28(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq 0x38(%rsp), %rbx + movq 0x28(%rsp), %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq $0x0, %r14 + xorl %ecx, %ecx + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %rcx, %rcx + movq 0x28(%rsp), %rax + mulq %rax + addq %r15, %r9 + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + movq 0x30(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %r15, %r15 + movq 0x38(%rsp), %rax + mulq %rax + negq %r15 + adcq %rax, %r14 + adcq %rcx, %rdx + movq %rdx, %r15 + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rcx, %rcx + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + xorl %r8d, %r8d + movq %r9, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r8, %r14 + adcq %r8, %r15 + adcq %r8, %r8 + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + xorl %r9d, %r9d + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + leaq -0x1(%rbx), %rbx + adcq %r13, %rbx + leaq -0x1(%r9), %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, (%rsp) + movq %r13, 0x8(%rsp) + movq %r14, 0x10(%rsp) + movq %r15, 0x18(%rsp) + movq 0x60(%rsp), %rbx + movq 0xa0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0xb0(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0xb8(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x68(%rsp), %rbx + xorl %r13d, %r13d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r14, %r14 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r14, %r14 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r14, %r14 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r14, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + xorl %r14d, %r14d + movabsq $0x100000000, %rbx + movq %r8, %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r15, %r15 + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r8, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r15, %r15 + movq %r9, %rax + mulq %rbx + subq %r15, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + adcq %r14, %r14 + movq 0x70(%rsp), %rbx + xorl %r15d, %r15d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + adcq %r15, %r15 + movq 0x78(%rsp), %rbx + xorl %r8d, %r8d + movq 0xa0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r8, %r8 + xorl %r9d, %r9d + movabsq $0x100000000, %rbx + movq %r10, %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rcx, %rcx + notq %rbx + leaq 0x2(%rbx), %rbx + movq %r10, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rcx, %rcx + movq %r11, %rax + mulq %rbx + subq %rcx, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq %r9, %r8 + movl $0x1, %ecx + addq %r12, %rcx + decq %rbx + adcq %r13, %rbx + decq %r9 + movq %r9, %rax + adcq %r14, %r9 + movl $0xfffffffe, %r11d + adcq %r15, %r11 + adcq %r8, %rax + cmovbq %rcx, %r12 + cmovbq %rbx, %r13 + cmovbq %r9, %r14 + cmovbq %r11, %r15 + movq %r12, 0x60(%rsp) + movq %r13, 0x68(%rsp) + movq %r14, 0x70(%rsp) + movq %r15, 0x78(%rsp) + movq 0x40(%rsp), %rax + subq 0x20(%rsp), %rax + movq 0x48(%rsp), %rcx + sbbq 0x28(%rsp), %rcx + movq 0x50(%rsp), %r8 + sbbq 0x30(%rsp), %r8 + movq 0x58(%rsp), %r9 + sbbq 0x38(%rsp), %r9 + movl $0xffffffff, %r10d + sbbq %r11, %r11 + xorq %rdx, %rdx + andq %r11, %r10 + subq %r10, %rdx + addq %r11, %rax + movq %rax, 0x40(%rdi) + adcq %r10, %rcx + movq %rcx, 0x48(%rdi) + adcq $0x0, %r8 + movq %r8, 0x50(%rdi) + adcq %rdx, %r9 + movq %r9, 0x58(%rdi) + movq 0x98(%rsp), %r11 + movq %r11, %rcx + movq 0x90(%rsp), %r10 + shldq $0x2, %r10, %r11 + movq 0x88(%rsp), %r9 + shldq $0x2, %r9, %r10 + movq 0x80(%rsp), %r8 + shldq $0x2, %r8, %r9 + shlq $0x2, %r8 + shrq $0x3e, %rcx + addq $0x1, %rcx + subq 0xa0(%rsp), %r8 + sbbq 0xa8(%rsp), %r9 + sbbq 0xb0(%rsp), %r10 + sbbq 0xb8(%rsp), %r11 + sbbq $0x0, %rcx + movabsq $0xffffffff00000001, %rax + mulq %rcx + movq %rcx, %rbx + shlq $0x20, %rbx + addq %rcx, %r8 + sbbq $0x0, %rbx + subq %rbx, %r9 + sbbq $0x0, %r10 + sbbq %rax, %r11 + sbbq %rdx, %rcx + decq %rcx + movl $0xffffffff, %eax + andq %rcx, %rax + xorl %edx, %edx + subq %rax, %rdx + addq %rcx, %r8 + movq %r8, (%rdi) + adcq %rax, %r9 + movq %r9, 0x8(%rdi) + adcq $0x0, %r10 + movq %r10, 0x10(%rdi) + adcq %rdx, %r11 + movq %r11, 0x18(%rdi) + movq $0xffffffffffffffff, %r8 + xorl %r10d, %r10d + subq (%rsp), %r8 + movabsq $0xffffffff, %r9 + sbbq 0x8(%rsp), %r9 + sbbq 0x10(%rsp), %r10 + movabsq $0xffffffff00000001, %r11 + sbbq 0x18(%rsp), %r11 + movq %r11, %r12 + shldq $0x3, %r10, %r11 + shldq $0x3, %r9, %r10 + shldq $0x3, %r8, %r9 + shlq $0x3, %r8 + shrq $0x3d, %r12 + movl $0x3, %ecx + movq 0x60(%rsp), %rax + mulq %rcx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %rbx, %rbx + movq 0x68(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rbx, %rbx + movq 0x70(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbx, %rbx + movq 0x78(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + leaq 0x1(%r12), %rcx + movabsq $0xffffffff00000001, %rax + mulq %rcx + movq %rcx, %rbx + shlq $0x20, %rbx + addq %rcx, %r8 + sbbq $0x0, %rbx + subq %rbx, %r9 + sbbq $0x0, %r10 + sbbq %rax, %r11 + sbbq %rdx, %rcx + decq %rcx + movl $0xffffffff, %eax + andq %rcx, %rax + xorl %edx, %edx + subq %rax, %rdx + addq %rcx, %r8 + movq %r8, 0x20(%rdi) + adcq %rax, %r9 + movq %r9, 0x28(%rdi) + adcq $0x0, %r10 + movq %r10, 0x30(%rdi) + adcq %rdx, %r11 + movq %r11, 0x38(%rdi) + addq $0xc0, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/x86_att/p384/bignum_inv_p384.S b/third_party/s2n-bignum/x86_att/p384/bignum_inv_p384.S new file mode 100644 index 0000000000..e1dfecfa2e --- /dev/null +++ b/third_party/s2n-bignum/x86_att/p384/bignum_inv_p384.S @@ -0,0 +1,1810 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Modular inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1 +// Input x[6]; output z[6] +// +// extern void bignum_inv_p384(uint64_t z[static 6],uint64_t x[static 6]); +// +// If the 6-digit input x is coprime to p_384, i.e. is not divisible +// by it, returns z < p_384 such that x * z == 1 (mod p_384). Note that +// x does not need to be reduced modulo p_384, but the output always is. +// If the input is divisible (i.e. is 0 or p_384), then there can be no +// modular inverse and z = 0 is returned. +// +// Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_inv_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_inv_p384) + .text + +// Size in bytes of a 64-bit word + +#define N 8 + +// Pointer-offset pairs for temporaries on stack +// The u and v variables are 6 words each as expected, but the f and g +// variables are 8 words each -- they need to have at least one extra +// word for a sign word, and to preserve alignment we "round up" to 8. +// In fact, we currently keep an extra word in u and v as well. + +#define f 0(%rsp) +#define g (8*N)(%rsp) +#define u (16*N)(%rsp) +#define v (24*N)(%rsp) +#define tmp (32*N)(%rsp) +#define tmp2 (33*N)(%rsp) +#define i (34*N)(%rsp) +#define d (35*N)(%rsp) + +#define mat (36*N)(%rsp) + +// Backup for the input pointer + +#define res (40*N)(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (42*N) + +// Syntactic variants to make x86_att version simpler to generate + +#define F 0 +#define G (8*N) +#define U (16*N) +#define V (24*N) +#define MAT (36*N) + +#define ff (%rsp) +#define gg (8*N)(%rsp) + +// --------------------------------------------------------------------------- +// Core signed almost-Montgomery reduction macro from P[6..0] to P[5..0]. +// --------------------------------------------------------------------------- + +#define amontred(P) \ +/* We only know the input is -2^444 < x < 2^444. To do traditional */ \ +/* unsigned Montgomery reduction, start by adding 2^61 * p_384. */ \ + movq $0xe000000000000000, %r8 ; \ + xorl %eax, %eax ; \ + addq P, %r8 ; \ + movq $0x000000001fffffff, %r9 ; \ + leaq -1(%rax), %rax ; \ + adcq N+P, %r9 ; \ + movq $0xdfffffffe0000000, %r10 ; \ + adcq 2*N+P, %r10 ; \ + movq 3*N+P, %r11 ; \ + adcq %rax, %r11 ; \ + movq 4*N+P, %r12 ; \ + adcq %rax, %r12 ; \ + movq 5*N+P, %r13 ; \ + adcq %rax, %r13 ; \ + movq $0x1fffffffffffffff, %r14 ; \ + adcq 6*N+P, %r14 ; \ +/* Correction multiplier is %rbx = w = [d0 + (d0<<32)] mod 2^64 */ \ + movq %r8, %rbx ; \ + shlq $32, %rbx ; \ + addq %r8, %rbx ; \ +/* Construct [%rbp;%rdx;%rax;-] = (2^384 - p_384) * w */ \ +/* We know lowest word will cancel so can re-use %r8 as a temp */ \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulq %rbx; \ + movq %rdx, %r8 ; \ + movq $0x00000000ffffffff, %rax ; \ + mulq %rbx; \ + addq %r8, %rax ; \ + adcq %rbx, %rdx ; \ + adcl %ebp, %ebp ; \ +/* Now subtract that and add 2^384 * w, catching carry in %rax */ \ + subq %rax, %r9 ; \ + sbbq %rdx, %r10 ; \ + sbbq %rbp, %r11 ; \ + sbbq $0, %r12 ; \ + sbbq $0, %r13 ; \ + sbbq $0, %r14 ; \ + sbbq %rax, %rax ; \ + addq %rbx, %r14 ; \ + adcq $0, %rax ; \ +/* Now if top is nonzero we subtract p_384 (almost-Montgomery) */ \ + negq %rax; \ + movq $0x00000000ffffffff, %rbx ; \ + andq %rax, %rbx ; \ + movq $0xffffffff00000000, %rcx ; \ + andq %rax, %rcx ; \ + movq $0xfffffffffffffffe, %rdx ; \ + andq %rax, %rdx ; \ + subq %rbx, %r9 ; \ + movq %r9, P ; \ + sbbq %rcx, %r10 ; \ + movq %r10, N+P ; \ + sbbq %rdx, %r11 ; \ + movq %r11, 2*N+P ; \ + sbbq %rax, %r12 ; \ + movq %r12, 3*N+P ; \ + sbbq %rax, %r13 ; \ + movq %r13, 4*N+P ; \ + sbbq %rax, %r14 ; \ + movq %r14, 5*N+P + +// Very similar to a subroutine call to the s2n-bignum word_divstep59. +// But different in register usage and returning the final matrix as +// +// [ %r8 %r10] +// [ %r12 %r14] +// +// and also returning the matrix still negated (which doesn't matter) + +#define divstep59(din,fin,gin) \ + movq din, %rsi ; \ + movq fin, %rdx ; \ + movq gin, %rcx ; \ + movq %rdx, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq $0xfffffffffffffffe, %rax ; \ + xorl %ebp, %ebp ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %rdx ; \ + leaq (%rcx,%rax), %rdi ; \ + shlq $0x16, %rdx ; \ + shlq $0x16, %rdi ; \ + sarq $0x2b, %rdx ; \ + sarq $0x2b, %rdi ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %rbx ; \ + leaq (%rcx,%rax), %rcx ; \ + sarq $0x2a, %rbx ; \ + sarq $0x2a, %rcx ; \ + movq %rdx, MAT(%rsp) ; \ + movq %rbx, MAT+0x8(%rsp) ; \ + movq %rdi, MAT+0x10(%rsp) ; \ + movq %rcx, MAT+0x18(%rsp) ; \ + movq fin, %r12 ; \ + imulq %r12, %rdi ; \ + imulq %rdx, %r12 ; \ + movq gin, %r13 ; \ + imulq %r13, %rbx ; \ + imulq %rcx, %r13 ; \ + addq %rbx, %r12 ; \ + addq %rdi, %r13 ; \ + sarq $0x14, %r12 ; \ + sarq $0x14, %r13 ; \ + movq %r12, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + movq %r13, %rcx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq $0xfffffffffffffffe, %rax ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %r8 ; \ + leaq (%rcx,%rax), %r10 ; \ + shlq $0x16, %r8 ; \ + shlq $0x16, %r10 ; \ + sarq $0x2b, %r8 ; \ + sarq $0x2b, %r10 ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %r15 ; \ + leaq (%rcx,%rax), %r11 ; \ + sarq $0x2a, %r15 ; \ + sarq $0x2a, %r11 ; \ + movq %r13, %rbx ; \ + movq %r12, %rcx ; \ + imulq %r8, %r12 ; \ + imulq %r15, %rbx ; \ + addq %rbx, %r12 ; \ + imulq %r11, %r13 ; \ + imulq %r10, %rcx ; \ + addq %rcx, %r13 ; \ + sarq $0x14, %r12 ; \ + sarq $0x14, %r13 ; \ + movq %r12, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + movq %r13, %rcx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq MAT(%rsp), %rax ; \ + imulq %r8, %rax ; \ + movq MAT+0x10(%rsp), %rdx ; \ + imulq %r15, %rdx ; \ + imulq MAT+0x8(%rsp), %r8 ; \ + imulq MAT+0x18(%rsp), %r15 ; \ + addq %r8, %r15 ; \ + leaq (%rax,%rdx), %r9 ; \ + movq MAT(%rsp), %rax ; \ + imulq %r10, %rax ; \ + movq MAT+0x10(%rsp), %rdx ; \ + imulq %r11, %rdx ; \ + imulq MAT+0x8(%rsp), %r10 ; \ + imulq MAT+0x18(%rsp), %r11 ; \ + addq %r10, %r11 ; \ + leaq (%rax,%rdx), %r13 ; \ + movq $0xfffffffffffffffe, %rax ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %r8 ; \ + leaq (%rcx,%rax), %r12 ; \ + shlq $0x15, %r8 ; \ + shlq $0x15, %r12 ; \ + sarq $0x2b, %r8 ; \ + sarq $0x2b, %r12 ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %r10 ; \ + leaq (%rcx,%rax), %r14 ; \ + sarq $0x2b, %r10 ; \ + sarq $0x2b, %r14 ; \ + movq %r9, %rax ; \ + imulq %r8, %rax ; \ + movq %r13, %rdx ; \ + imulq %r10, %rdx ; \ + imulq %r15, %r8 ; \ + imulq %r11, %r10 ; \ + addq %r8, %r10 ; \ + leaq (%rax,%rdx), %r8 ; \ + movq %r9, %rax ; \ + imulq %r12, %rax ; \ + movq %r13, %rdx ; \ + imulq %r14, %rdx ; \ + imulq %r15, %r12 ; \ + imulq %r11, %r14 ; \ + addq %r12, %r14 ; \ + leaq (%rax,%rdx), %r12 + +S2N_BN_SYMBOL(bignum_inv_p384): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + +// Save registers and make room for temporaries + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + +// Save the return pointer for the end so we can overwrite %rdi later + + movq %rdi, res + +// Copy the constant p_384 into f including the 7th zero digit + + movl $0xffffffff, %eax + movq %rax, F(%rsp) + movq %rax, %rbx + notq %rbx + movq %rbx, F+N(%rsp) + xorl %ebp, %ebp + leaq -2(%rbp), %rcx + movq %rcx, F+2*N(%rsp) + leaq -1(%rbp), %rdx + movq %rdx, F+3*N(%rsp) + movq %rdx, F+4*N(%rsp) + movq %rdx, F+5*N(%rsp) + movq %rbp, F+6*N(%rsp) + +// Copy input but to g, reduced mod p_384 so that g <= f as assumed +// in the divstep bound proof. + + movq (%rsi), %r8 + subq %rax, %r8 + movq N(%rsi), %r9 + sbbq %rbx, %r9 + movq 2*N(%rsi), %r10 + sbbq %rcx, %r10 + movq 3*N(%rsi), %r11 + sbbq %rdx, %r11 + movq 4*N(%rsi), %r12 + sbbq %rdx, %r12 + movq 5*N(%rsi), %r13 + sbbq %rdx, %r13 + + cmovcq (%rsi), %r8 + cmovcq N(%rsi), %r9 + cmovcq 2*N(%rsi), %r10 + cmovcq 3*N(%rsi), %r11 + cmovcq 4*N(%rsi), %r12 + cmovcq 5*N(%rsi), %r13 + + movq %r8, G(%rsp) + movq %r9, G+N(%rsp) + movq %r10, G+2*N(%rsp) + movq %r11, G+3*N(%rsp) + movq %r12, G+4*N(%rsp) + movq %r13, G+5*N(%rsp) + movq %rbp, G+6*N(%rsp) + +// Also maintain reduced < 2^384 vector [u,v] such that +// [f,g] == x * 2^{5*i-75} * [u,v] (mod p_384) +// starting with [p_384,x] == x * 2^{5*0-75} * [0,2^75] (mod p_384) +// The weird-looking 5*i modifications come in because we are doing +// 64-bit word-sized Montgomery reductions at each stage, which is +// 5 bits more than the 59-bit requirement to keep things stable. + + xorl %eax, %eax + movq %rax, U(%rsp) + movq %rax, U+N(%rsp) + movq %rax, U+2*N(%rsp) + movq %rax, U+3*N(%rsp) + movq %rax, U+4*N(%rsp) + movq %rax, U+5*N(%rsp) + + movl $2048, %ecx + movq %rax, V(%rsp) + movq %rcx, V+N(%rsp) + movq %rax, V+2*N(%rsp) + movq %rax, V+3*N(%rsp) + movq %rax, V+4*N(%rsp) + movq %rax, V+5*N(%rsp) + +// Start of main loop. We jump into the middle so that the divstep +// portion is common to the special fifteenth iteration after a uniform +// first 14. + + movq $15, i + movq $1, d + jmp midloop + +loop: + +// Separate out the matrix into sign-magnitude pairs + + movq %r8, %r9 + sarq $63, %r9 + xorq %r9, %r8 + subq %r9, %r8 + + movq %r10, %r11 + sarq $63, %r11 + xorq %r11, %r10 + subq %r11, %r10 + + movq %r12, %r13 + sarq $63, %r13 + xorq %r13, %r12 + subq %r13, %r12 + + movq %r14, %r15 + sarq $63, %r15 + xorq %r15, %r14 + subq %r15, %r14 + +// Adjust the initial values to allow for complement instead of negation +// This initial offset is the same for [f,g] and [u,v] compositions. +// Save it in temporary storage for the [u,v] part and do [f,g] first. + + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, tmp + + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, tmp2 + +// Now the computation of the updated f and g values. This maintains a +// 2-word carry between stages so we can conveniently insert the shift +// right by 59 before storing back, and not overwrite digits we need +// again of the old f and g values. +// +// Digit 0 of [f,g] + + xorl %ebx, %ebx + movq F(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq G(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + + xorl %ebp, %ebp + movq F(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq G(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + +// Digit 1 of [f,g] + + xorl %ecx, %ecx + movq F+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq G+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $59, %rbx, %rdi + movq %rdi, F(%rsp) + + xorl %edi, %edi + movq F+N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq G+N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $59, %rbp, %rsi + movq %rsi, G(%rsp) + +// Digit 2 of [f,g] + + xorl %esi, %esi + movq F+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq G+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $59, %rcx, %rbx + movq %rbx, F+N(%rsp) + + xorl %ebx, %ebx + movq F+2*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq G+2*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $59, %rdi, %rbp + movq %rbp, G+N(%rsp) + +// Digit 3 of [f,g] + + xorl %ebp, %ebp + movq F+3*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rsi + adcq %rdx, %rbp + movq G+3*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $59, %rsi, %rcx + movq %rcx, F+2*N(%rsp) + + xorl %ecx, %ecx + movq F+3*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbx + adcq %rdx, %rcx + movq G+3*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $59, %rbx, %rdi + movq %rdi, G+2*N(%rsp) + +// Digit 4 of [f,g] + + xorl %edi, %edi + movq F+4*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbp + adcq %rdx, %rdi + movq G+4*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $59, %rbp, %rsi + movq %rsi, F+3*N(%rsp) + + xorl %esi, %esi + movq F+4*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rcx + adcq %rdx, %rsi + movq G+4*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $59, %rcx, %rbx + movq %rbx, G+3*N(%rsp) + +// Digits 5 and 6 of [f,g] + + movq F+5*N(%rsp), %rax + xorq %r9, %rax + movq F+6*N(%rsp), %rbx + xorq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq G+5*N(%rsp), %rax + xorq %r11, %rax + movq G+6*N(%rsp), %rdx + xorq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $59, %rdi, %rbp + movq %rbp, F+4*N(%rsp) + shrdq $59, %rbx, %rdi + sarq $59, %rbx + + movq F+5*N(%rsp), %rax + movq %rdi, F+5*N(%rsp) + + movq F+6*N(%rsp), %rdi + movq %rbx, F+6*N(%rsp) + + xorq %r13, %rax + xorq %r13, %rdi + andq %r12, %rdi + negq %rdi + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rdi + movq G+5*N(%rsp), %rax + xorq %r15, %rax + movq G+6*N(%rsp), %rdx + xorq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rdi + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rdi + shrdq $59, %rsi, %rcx + movq %rcx, G+4*N(%rsp) + shrdq $59, %rdi, %rsi + movq %rsi, G+5*N(%rsp) + sarq $59, %rdi + movq %rdi, G+6*N(%rsp) + +// Get the initial carries back from storage and do the [u,v] accumulation + + movq tmp, %rbx + movq tmp2, %rbp + +// Digit 0 of [u,v] + + xorl %ecx, %ecx + movq U(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq V(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + + xorl %esi, %esi + movq U(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, U(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq V(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, V(%rsp) + +// Digit 1 of [u,v] + + xorl %ebx, %ebx + movq U+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq V+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + + xorl %ebp, %ebp + movq U+N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, U+N(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq V+N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, V+N(%rsp) + +// Digit 2 of [u,v] + + xorl %ecx, %ecx + movq U+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq V+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + + xorl %esi, %esi + movq U+2*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, U+2*N(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq V+2*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, V+2*N(%rsp) + +// Digit 3 of [u,v] + + xorl %ebx, %ebx + movq U+3*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq V+3*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + + xorl %ebp, %ebp + movq U+3*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, U+3*N(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq V+3*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, V+3*N(%rsp) + +// Digit 4 of [u,v] + + xorl %ecx, %ecx + movq U+4*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq V+4*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + + xorl %esi, %esi + movq U+4*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, U+4*N(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq V+4*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, V+4*N(%rsp) + +// Digits 5 and 6 of u (top is unsigned) + + movq U+5*N(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq V+5*N(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + +// Preload for last use of old u digit 3 + + movq U+5*N(%rsp), %rax + movq %rcx, U+5*N(%rsp) + movq %rdx, U+6*N(%rsp) + +// Digits 5 and 6 of v (top is unsigned) + + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx + negq %rcx + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rcx + movq V+5*N(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rsi, V+5*N(%rsp) + movq %rdx, V+6*N(%rsp) + +// Montgomery reduction of u + + amontred(u) + +// Montgomery reduction of v + + amontred(v) + +midloop: + + divstep59(d,ff,gg) + movq %rsi, d + +// Next iteration + + decq i + jnz loop + +// The 15th and last iteration does not need anything except the +// u value and the sign of f; the latter can be obtained from the +// lowest word of f. So it's done differently from the main loop. +// Find the sign of the new f. For this we just need one digit +// since we know (for in-scope cases) that f is either +1 or -1. +// We don't explicitly shift right by 59 either, but looking at +// bit 63 (or any bit >= 60) of the unshifted result is enough +// to distinguish -1 from +1; this is then made into a mask. + + movq F(%rsp), %rax + movq G(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $63, %rax + +// Now separate out the matrix into sign-magnitude pairs +// and adjust each one based on the sign of f. +// +// Note that at this point we expect |f|=1 and we got its +// sign above, so then since [f,0] == x * [u,v] (mod p_384) +// we want to flip the sign of u according to that of f. + + movq %r8, %r9 + sarq $63, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + + movq %r10, %r11 + sarq $63, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + + movq %r12, %r13 + sarq $63, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + + movq %r14, %r15 + sarq $63, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + +// Adjust the initial value to allow for complement instead of negation + + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + +// Digit 0 of [u] + + xorl %r13d, %r13d + movq U(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq V(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + movq %r12, U(%rsp) + adcq %rdx, %r13 + +// Digit 1 of [u] + + xorl %r14d, %r14d + movq U+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq V+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + movq %r13, U+N(%rsp) + adcq %rdx, %r14 + +// Digit 2 of [u] + + xorl %r15d, %r15d + movq U+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq V+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + movq %r14, U+2*N(%rsp) + adcq %rdx, %r15 + +// Digit 3 of [u] + + xorl %r14d, %r14d + movq U+3*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r14 + movq V+3*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r15 + movq %r15, U+3*N(%rsp) + adcq %rdx, %r14 + +// Digit 4 of [u] + + xorl %r15d, %r15d + movq U+4*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq V+4*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + movq %r14, U+4*N(%rsp) + adcq %rdx, %r15 + +// Digits 5 and 6 of u (top is unsigned) + + movq U+5*N(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq V+5*N(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + movq %r15, U+5*N(%rsp) + adcq %rdx, %r9 + movq %r9, U+6*N(%rsp) + +// Montgomery reduce u + + amontred(u) + +// Perform final strict reduction mod p_384 and copy to output + + movl $0xffffffff, %eax + movq %rax, %rbx + notq %rbx + xorl %ebp, %ebp + leaq -2(%rbp), %rcx + leaq -1(%rbp), %rdx + + movq U(%rsp), %r8 + subq %rax, %r8 + movq U+N(%rsp), %r9 + sbbq %rbx, %r9 + movq U+2*N(%rsp), %r10 + sbbq %rcx, %r10 + movq U+3*N(%rsp), %r11 + sbbq %rdx, %r11 + movq U+4*N(%rsp), %r12 + sbbq %rdx, %r12 + movq U+5*N(%rsp), %r13 + sbbq %rdx, %r13 + + cmovcq U(%rsp), %r8 + cmovcq U+N(%rsp), %r9 + cmovcq U+2*N(%rsp), %r10 + cmovcq U+3*N(%rsp), %r11 + cmovcq U+4*N(%rsp), %r12 + cmovcq U+5*N(%rsp), %r13 + + movq res, %rdi + movq %r8, (%rdi) + movq %r9, N(%rdi) + movq %r10, 2*N(%rdi) + movq %r11, 3*N(%rdi) + movq %r12, 4*N(%rdi) + movq %r13, 5*N(%rdi) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/x86_att/p384/bignum_montinv_p384.S b/third_party/s2n-bignum/x86_att/p384/bignum_montinv_p384.S new file mode 100644 index 0000000000..81928ed59d --- /dev/null +++ b/third_party/s2n-bignum/x86_att/p384/bignum_montinv_p384.S @@ -0,0 +1,1827 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery inverse modulo p_384 = 2^384 - 2^128 - 2^96 + 2^32 - 1 +// Input x[6]; output z[6] +// +// extern void bignum_montinv_p384(uint64_t z[static 6],uint64_t x[static 6]); +// +// If the 6-digit input x is coprime to p_384, i.e. is not divisible +// by it, returns z < p_384 such that x * z == 2^768 (mod p_384). This +// is effectively "Montgomery inverse" because if we consider x and z as +// Montgomery forms of X and Z, i.e. x == 2^384 * X and z == 2^384 * Z +// (both mod p_384) then X * Z == 1 (mod p_384). That is, this function +// gives the analog of the modular inverse bignum_inv_p384 but with both +// input and output in the Montgomery domain. Note that x does not need +// to be reduced modulo p_384, but the output always is. If the input +// is divisible (i.e. is 0 or p_384), then there can be no solution to +// the congruence x * z == 2^768 (mod p_384), and z = 0 is returned. +// +// Standard x86-64 ABI: RDI = z, RSI = x +// Microsoft x64 ABI: RCX = z, RDX = x +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_montinv_p384) + S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_montinv_p384) + .text + +// Size in bytes of a 64-bit word + +#define N 8 + +// Pointer-offset pairs for temporaries on stack +// The u and v variables are 6 words each as expected, but the f and g +// variables are 8 words each -- they need to have at least one extra +// word for a sign word, and to preserve alignment we "round up" to 8. +// In fact, we currently keep an extra word in u and v as well. + +#define f 0(%rsp) +#define g (8*N)(%rsp) +#define u (16*N)(%rsp) +#define v (24*N)(%rsp) +#define tmp (32*N)(%rsp) +#define tmp2 (33*N)(%rsp) +#define i (34*N)(%rsp) +#define d (35*N)(%rsp) + +#define mat (36*N)(%rsp) + +// Backup for the input pointer + +#define res (40*N)(%rsp) + +// Total size to reserve on the stack + +#define NSPACE (42*N) + +// Syntactic variants to make x86_att version simpler to generate + +#define F 0 +#define G (8*N) +#define U (16*N) +#define V (24*N) +#define MAT (36*N) + +#define ff (%rsp) +#define gg (8*N)(%rsp) + +// --------------------------------------------------------------------------- +// Core signed almost-Montgomery reduction macro from P[6..0] to P[5..0]. +// --------------------------------------------------------------------------- + +#define amontred(P) \ +/* We only know the input is -2^444 < x < 2^444. To do traditional */ \ +/* unsigned Montgomery reduction, start by adding 2^61 * p_384. */ \ + movq $0xe000000000000000, %r8 ; \ + xorl %eax, %eax ; \ + addq P, %r8 ; \ + movq $0x000000001fffffff, %r9 ; \ + leaq -1(%rax), %rax ; \ + adcq N+P, %r9 ; \ + movq $0xdfffffffe0000000, %r10 ; \ + adcq 2*N+P, %r10 ; \ + movq 3*N+P, %r11 ; \ + adcq %rax, %r11 ; \ + movq 4*N+P, %r12 ; \ + adcq %rax, %r12 ; \ + movq 5*N+P, %r13 ; \ + adcq %rax, %r13 ; \ + movq $0x1fffffffffffffff, %r14 ; \ + adcq 6*N+P, %r14 ; \ +/* Correction multiplier is %rbx = w = [d0 + (d0<<32)] mod 2^64 */ \ + movq %r8, %rbx ; \ + shlq $32, %rbx ; \ + addq %r8, %rbx ; \ +/* Construct [%rbp;%rdx;%rax;-] = (2^384 - p_384) * w */ \ +/* We know lowest word will cancel so can re-use %r8 as a temp */ \ + xorl %ebp, %ebp ; \ + movq $0xffffffff00000001, %rax ; \ + mulq %rbx; \ + movq %rdx, %r8 ; \ + movq $0x00000000ffffffff, %rax ; \ + mulq %rbx; \ + addq %r8, %rax ; \ + adcq %rbx, %rdx ; \ + adcl %ebp, %ebp ; \ +/* Now subtract that and add 2^384 * w, catching carry in %rax */ \ + subq %rax, %r9 ; \ + sbbq %rdx, %r10 ; \ + sbbq %rbp, %r11 ; \ + sbbq $0, %r12 ; \ + sbbq $0, %r13 ; \ + sbbq $0, %r14 ; \ + sbbq %rax, %rax ; \ + addq %rbx, %r14 ; \ + adcq $0, %rax ; \ +/* Now if top is nonzero we subtract p_384 (almost-Montgomery) */ \ + negq %rax; \ + movq $0x00000000ffffffff, %rbx ; \ + andq %rax, %rbx ; \ + movq $0xffffffff00000000, %rcx ; \ + andq %rax, %rcx ; \ + movq $0xfffffffffffffffe, %rdx ; \ + andq %rax, %rdx ; \ + subq %rbx, %r9 ; \ + movq %r9, P ; \ + sbbq %rcx, %r10 ; \ + movq %r10, N+P ; \ + sbbq %rdx, %r11 ; \ + movq %r11, 2*N+P ; \ + sbbq %rax, %r12 ; \ + movq %r12, 3*N+P ; \ + sbbq %rax, %r13 ; \ + movq %r13, 4*N+P ; \ + sbbq %rax, %r14 ; \ + movq %r14, 5*N+P + +// Very similar to a subroutine call to the s2n-bignum word_divstep59. +// But different in register usage and returning the final matrix as +// +// [ %r8 %r10] +// [ %r12 %r14] +// +// and also returning the matrix still negated (which doesn't matter) + +#define divstep59(din,fin,gin) \ + movq din, %rsi ; \ + movq fin, %rdx ; \ + movq gin, %rcx ; \ + movq %rdx, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq $0xfffffffffffffffe, %rax ; \ + xorl %ebp, %ebp ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %rdx ; \ + leaq (%rcx,%rax), %rdi ; \ + shlq $0x16, %rdx ; \ + shlq $0x16, %rdi ; \ + sarq $0x2b, %rdx ; \ + sarq $0x2b, %rdi ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %rbx ; \ + leaq (%rcx,%rax), %rcx ; \ + sarq $0x2a, %rbx ; \ + sarq $0x2a, %rcx ; \ + movq %rdx, MAT(%rsp) ; \ + movq %rbx, MAT+0x8(%rsp) ; \ + movq %rdi, MAT+0x10(%rsp) ; \ + movq %rcx, MAT+0x18(%rsp) ; \ + movq fin, %r12 ; \ + imulq %r12, %rdi ; \ + imulq %rdx, %r12 ; \ + movq gin, %r13 ; \ + imulq %r13, %rbx ; \ + imulq %rcx, %r13 ; \ + addq %rbx, %r12 ; \ + addq %rdi, %r13 ; \ + sarq $0x14, %r12 ; \ + sarq $0x14, %r13 ; \ + movq %r12, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + movq %r13, %rcx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq $0xfffffffffffffffe, %rax ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %r8 ; \ + leaq (%rcx,%rax), %r10 ; \ + shlq $0x16, %r8 ; \ + shlq $0x16, %r10 ; \ + sarq $0x2b, %r8 ; \ + sarq $0x2b, %r10 ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %r15 ; \ + leaq (%rcx,%rax), %r11 ; \ + sarq $0x2a, %r15 ; \ + sarq $0x2a, %r11 ; \ + movq %r13, %rbx ; \ + movq %r12, %rcx ; \ + imulq %r8, %r12 ; \ + imulq %r15, %rbx ; \ + addq %rbx, %r12 ; \ + imulq %r11, %r13 ; \ + imulq %r10, %rcx ; \ + addq %rcx, %r13 ; \ + sarq $0x14, %r12 ; \ + sarq $0x14, %r13 ; \ + movq %r12, %rbx ; \ + andq $0xfffff, %rbx ; \ + movabsq $0xfffffe0000000000, %rax ; \ + orq %rax, %rbx ; \ + movq %r13, %rcx ; \ + andq $0xfffff, %rcx ; \ + movabsq $0xc000000000000000, %rax ; \ + orq %rax, %rcx ; \ + movq MAT(%rsp), %rax ; \ + imulq %r8, %rax ; \ + movq MAT+0x10(%rsp), %rdx ; \ + imulq %r15, %rdx ; \ + imulq MAT+0x8(%rsp), %r8 ; \ + imulq MAT+0x18(%rsp), %r15 ; \ + addq %r8, %r15 ; \ + leaq (%rax,%rdx), %r9 ; \ + movq MAT(%rsp), %rax ; \ + imulq %r10, %rax ; \ + movq MAT+0x10(%rsp), %rdx ; \ + imulq %r11, %rdx ; \ + imulq MAT+0x8(%rsp), %r10 ; \ + imulq MAT+0x18(%rsp), %r11 ; \ + addq %r10, %r11 ; \ + leaq (%rax,%rdx), %r13 ; \ + movq $0xfffffffffffffffe, %rax ; \ + movl $0x2, %edx ; \ + movq %rbx, %rdi ; \ + movq %rax, %r8 ; \ + testq %rsi, %rsi ; \ + cmovs %rbp, %r8 ; \ + testq $0x1, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + cmovs %rbp, %r8 ; \ + movq %rbx, %rdi ; \ + testq %rdx, %rcx ; \ + cmoveq %rbp, %r8 ; \ + cmoveq %rbp, %rdi ; \ + sarq $1, %rcx ; \ + xorq %r8, %rdi ; \ + xorq %r8, %rsi ; \ + btq $0x3f, %r8 ; \ + cmovbq %rcx, %rbx ; \ + movq %rax, %r8 ; \ + subq %rax, %rsi ; \ + leaq (%rcx,%rdi), %rcx ; \ + sarq $1, %rcx ; \ + movl $0x100000, %eax ; \ + leaq (%rbx,%rax), %r8 ; \ + leaq (%rcx,%rax), %r12 ; \ + shlq $0x15, %r8 ; \ + shlq $0x15, %r12 ; \ + sarq $0x2b, %r8 ; \ + sarq $0x2b, %r12 ; \ + movabsq $0x20000100000, %rax ; \ + leaq (%rbx,%rax), %r10 ; \ + leaq (%rcx,%rax), %r14 ; \ + sarq $0x2b, %r10 ; \ + sarq $0x2b, %r14 ; \ + movq %r9, %rax ; \ + imulq %r8, %rax ; \ + movq %r13, %rdx ; \ + imulq %r10, %rdx ; \ + imulq %r15, %r8 ; \ + imulq %r11, %r10 ; \ + addq %r8, %r10 ; \ + leaq (%rax,%rdx), %r8 ; \ + movq %r9, %rax ; \ + imulq %r12, %rax ; \ + movq %r13, %rdx ; \ + imulq %r14, %rdx ; \ + imulq %r15, %r12 ; \ + imulq %r11, %r14 ; \ + addq %r12, %r14 ; \ + leaq (%rax,%rdx), %r12 + +S2N_BN_SYMBOL(bignum_montinv_p384): + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi +#endif + +// Save registers and make room for temporaries + + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + subq $NSPACE, %rsp + +// Save the return pointer for the end so we can overwrite %rdi later + + movq %rdi, res + +// Copy the constant p_384 into f including the 7th zero digit + + movl $0xffffffff, %eax + movq %rax, F(%rsp) + movq %rax, %rbx + notq %rbx + movq %rbx, F+N(%rsp) + xorl %ebp, %ebp + leaq -2(%rbp), %rcx + movq %rcx, F+2*N(%rsp) + leaq -1(%rbp), %rdx + movq %rdx, F+3*N(%rsp) + movq %rdx, F+4*N(%rsp) + movq %rdx, F+5*N(%rsp) + movq %rbp, F+6*N(%rsp) + +// Copy input but to g, reduced mod p_384 so that g <= f as assumed +// in the divstep bound proof. + + movq (%rsi), %r8 + subq %rax, %r8 + movq N(%rsi), %r9 + sbbq %rbx, %r9 + movq 2*N(%rsi), %r10 + sbbq %rcx, %r10 + movq 3*N(%rsi), %r11 + sbbq %rdx, %r11 + movq 4*N(%rsi), %r12 + sbbq %rdx, %r12 + movq 5*N(%rsi), %r13 + sbbq %rdx, %r13 + + cmovcq (%rsi), %r8 + cmovcq N(%rsi), %r9 + cmovcq 2*N(%rsi), %r10 + cmovcq 3*N(%rsi), %r11 + cmovcq 4*N(%rsi), %r12 + cmovcq 5*N(%rsi), %r13 + + movq %r8, G(%rsp) + movq %r9, G+N(%rsp) + movq %r10, G+2*N(%rsp) + movq %r11, G+3*N(%rsp) + movq %r12, G+4*N(%rsp) + movq %r13, G+5*N(%rsp) + movq %rbp, G+6*N(%rsp) + +// Also maintain reduced < 2^384 vector [u,v] such that +// [f,g] == x * 2^{5*i-843} * [u,v] (mod p_384) +// starting with [p_384,x] == x * 2^{5*0-843} * [0,2^843] (mod p_384) +// The weird-looking 5*i modifications come in because we are doing +// 64-bit word-sized Montgomery reductions at each stage, which is +// 5 bits more than the 59-bit requirement to keep things stable. +// After the 15th and last iteration and sign adjustment, when +// f == 1 for in-scope cases, we have x * 2^{75-843} * u == 1, i.e. +// x * u == 2^768 as required. + + xorl %eax, %eax + movq %rax, U(%rsp) + movq %rax, U+N(%rsp) + movq %rax, U+2*N(%rsp) + movq %rax, U+3*N(%rsp) + movq %rax, U+4*N(%rsp) + movq %rax, U+5*N(%rsp) + +// The starting constant 2^843 mod p_384 is +// 0x0000000000000800:00001000000007ff:fffff00000000000 +// :00001000000007ff:fffff00000000800:0000000000000000 +// where colons separate 64-bit subwords, least significant at the right. +// These are constructed dynamically to reduce large constant loads. + + movq %rax, V(%rsp) + movq $0xfffff00000000800, %rcx + movq %rcx, V+N(%rsp) + movq $0x00001000000007ff, %rdx + movq %rdx, V+2*N(%rsp) + btr $11, %rcx + movq %rcx, V+3*N(%rsp) + movq %rdx, V+4*N(%rsp) + bts $11, %rax + movq %rax, V+5*N(%rsp) + +// Start of main loop. We jump into the middle so that the divstep +// portion is common to the special fifteenth iteration after a uniform +// first 14. + + movq $15, i + movq $1, d + jmp bignum_montinv_p384_midloop + +bignum_montinv_p384_loop: + +// Separate out the matrix into sign-magnitude pairs + + movq %r8, %r9 + sarq $63, %r9 + xorq %r9, %r8 + subq %r9, %r8 + + movq %r10, %r11 + sarq $63, %r11 + xorq %r11, %r10 + subq %r11, %r10 + + movq %r12, %r13 + sarq $63, %r13 + xorq %r13, %r12 + subq %r13, %r12 + + movq %r14, %r15 + sarq $63, %r15 + xorq %r15, %r14 + subq %r15, %r14 + +// Adjust the initial values to allow for complement instead of negation +// This initial offset is the same for [f,g] and [u,v] compositions. +// Save it in temporary storage for the [u,v] part and do [f,g] first. + + movq %r8, %rax + andq %r9, %rax + movq %r10, %rdi + andq %r11, %rdi + addq %rax, %rdi + movq %rdi, tmp + + movq %r12, %rax + andq %r13, %rax + movq %r14, %rsi + andq %r15, %rsi + addq %rax, %rsi + movq %rsi, tmp2 + +// Now the computation of the updated f and g values. This maintains a +// 2-word carry between stages so we can conveniently insert the shift +// right by 59 before storing back, and not overwrite digits we need +// again of the old f and g values. +// +// Digit 0 of [f,g] + + xorl %ebx, %ebx + movq F(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq G(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + + xorl %ebp, %ebp + movq F(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rbp + movq G(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + +// Digit 1 of [f,g] + + xorl %ecx, %ecx + movq F+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq G+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $59, %rbx, %rdi + movq %rdi, F(%rsp) + + xorl %edi, %edi + movq F+N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbp + adcq %rdx, %rdi + movq G+N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $59, %rbp, %rsi + movq %rsi, G(%rsp) + +// Digit 2 of [f,g] + + xorl %esi, %esi + movq F+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rsi + movq G+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $59, %rcx, %rbx + movq %rbx, F+N(%rsp) + + xorl %ebx, %ebx + movq F+2*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rdi + adcq %rdx, %rbx + movq G+2*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $59, %rdi, %rbp + movq %rbp, G+N(%rsp) + +// Digit 3 of [f,g] + + xorl %ebp, %ebp + movq F+3*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rsi + adcq %rdx, %rbp + movq G+3*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rsi + adcq %rdx, %rbp + shrdq $59, %rsi, %rcx + movq %rcx, F+2*N(%rsp) + + xorl %ecx, %ecx + movq F+3*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rbx + adcq %rdx, %rcx + movq G+3*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbx + adcq %rdx, %rcx + shrdq $59, %rbx, %rdi + movq %rdi, G+2*N(%rsp) + +// Digit 4 of [f,g] + + xorl %edi, %edi + movq F+4*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbp + adcq %rdx, %rdi + movq G+4*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbp + adcq %rdx, %rdi + shrdq $59, %rbp, %rsi + movq %rsi, F+3*N(%rsp) + + xorl %esi, %esi + movq F+4*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + addq %rax, %rcx + adcq %rdx, %rsi + movq G+4*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rcx + adcq %rdx, %rsi + shrdq $59, %rcx, %rbx + movq %rbx, G+3*N(%rsp) + +// Digits 5 and 6 of [f,g] + + movq F+5*N(%rsp), %rax + xorq %r9, %rax + movq F+6*N(%rsp), %rbx + xorq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rdi + adcq %rdx, %rbx + movq G+5*N(%rsp), %rax + xorq %r11, %rax + movq G+6*N(%rsp), %rdx + xorq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rdi + adcq %rdx, %rbx + shrdq $59, %rdi, %rbp + movq %rbp, F+4*N(%rsp) + shrdq $59, %rbx, %rdi + sarq $59, %rbx + + movq F+5*N(%rsp), %rax + movq %rdi, F+5*N(%rsp) + + movq F+6*N(%rsp), %rdi + movq %rbx, F+6*N(%rsp) + + xorq %r13, %rax + xorq %r13, %rdi + andq %r12, %rdi + negq %rdi + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rdi + movq G+5*N(%rsp), %rax + xorq %r15, %rax + movq G+6*N(%rsp), %rdx + xorq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rdi + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rdi + shrdq $59, %rsi, %rcx + movq %rcx, G+4*N(%rsp) + shrdq $59, %rdi, %rsi + movq %rsi, G+5*N(%rsp) + sarq $59, %rdi + movq %rdi, G+6*N(%rsp) + +// Get the initial carries back from storage and do the [u,v] accumulation + + movq tmp, %rbx + movq tmp2, %rbp + +// Digit 0 of [u,v] + + xorl %ecx, %ecx + movq U(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq V(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + + xorl %esi, %esi + movq U(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, U(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq V(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, V(%rsp) + +// Digit 1 of [u,v] + + xorl %ebx, %ebx + movq U+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq V+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + + xorl %ebp, %ebp + movq U+N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, U+N(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq V+N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, V+N(%rsp) + +// Digit 2 of [u,v] + + xorl %ecx, %ecx + movq U+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq V+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + + xorl %esi, %esi + movq U+2*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, U+2*N(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq V+2*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, V+2*N(%rsp) + +// Digit 3 of [u,v] + + xorl %ebx, %ebx + movq U+3*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq V+3*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rcx + adcq %rdx, %rbx + + xorl %ebp, %ebp + movq U+3*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rcx, U+3*N(%rsp) + addq %rax, %rsi + adcq %rdx, %rbp + movq V+3*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rsi + adcq %rdx, %rbp + movq %rsi, V+3*N(%rsp) + +// Digit 4 of [u,v] + + xorl %ecx, %ecx + movq U+4*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %rbx + adcq %rdx, %rcx + movq V+4*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %rbx + adcq %rdx, %rcx + + xorl %esi, %esi + movq U+4*N(%rsp), %rax + xorq %r13, %rax + mulq %r12 + movq %rbx, U+4*N(%rsp) + addq %rax, %rbp + adcq %rdx, %rsi + movq V+4*N(%rsp), %rax + xorq %r15, %rax + mulq %r14 + addq %rax, %rbp + adcq %rdx, %rsi + movq %rbp, V+4*N(%rsp) + +// Digits 5 and 6 of u (top is unsigned) + + movq U+5*N(%rsp), %rax + xorq %r9, %rax + movq %r9, %rbx + andq %r8, %rbx + negq %rbx + mulq %r8 + addq %rax, %rcx + adcq %rdx, %rbx + movq V+5*N(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %rbx + mulq %r10 + addq %rax, %rcx + adcq %rbx, %rdx + +// Preload for last use of old u digit 3 + + movq U+5*N(%rsp), %rax + movq %rcx, U+5*N(%rsp) + movq %rdx, U+6*N(%rsp) + +// Digits 5 and 6 of v (top is unsigned) + + xorq %r13, %rax + movq %r13, %rcx + andq %r12, %rcx + negq %rcx + mulq %r12 + addq %rax, %rsi + adcq %rdx, %rcx + movq V+5*N(%rsp), %rax + xorq %r15, %rax + movq %r15, %rdx + andq %r14, %rdx + subq %rdx, %rcx + mulq %r14 + addq %rax, %rsi + adcq %rcx, %rdx + movq %rsi, V+5*N(%rsp) + movq %rdx, V+6*N(%rsp) + +// Montgomery reduction of u + + amontred(u) + +// Montgomery reduction of v + + amontred(v) + +bignum_montinv_p384_midloop: + + divstep59(d,ff,gg) + movq %rsi, d + +// Next iteration + + decq i + jnz bignum_montinv_p384_loop + +// The 15th and last iteration does not need anything except the +// u value and the sign of f; the latter can be obtained from the +// lowest word of f. So it's done differently from the main loop. +// Find the sign of the new f. For this we just need one digit +// since we know (for in-scope cases) that f is either +1 or -1. +// We don't explicitly shift right by 59 either, but looking at +// bit 63 (or any bit >= 60) of the unshifted result is enough +// to distinguish -1 from +1; this is then made into a mask. + + movq F(%rsp), %rax + movq G(%rsp), %rcx + imulq %r8, %rax + imulq %r10, %rcx + addq %rcx, %rax + sarq $63, %rax + +// Now separate out the matrix into sign-magnitude pairs +// and adjust each one based on the sign of f. +// +// Note that at this point we expect |f|=1 and we got its +// sign above, so then since [f,0] == x * 2^{-768} [u,v] (mod p_384) +// we want to flip the sign of u according to that of f. + + movq %r8, %r9 + sarq $63, %r9 + xorq %r9, %r8 + subq %r9, %r8 + xorq %rax, %r9 + + movq %r10, %r11 + sarq $63, %r11 + xorq %r11, %r10 + subq %r11, %r10 + xorq %rax, %r11 + + movq %r12, %r13 + sarq $63, %r13 + xorq %r13, %r12 + subq %r13, %r12 + xorq %rax, %r13 + + movq %r14, %r15 + sarq $63, %r15 + xorq %r15, %r14 + subq %r15, %r14 + xorq %rax, %r15 + +// Adjust the initial value to allow for complement instead of negation + + movq %r8, %rax + andq %r9, %rax + movq %r10, %r12 + andq %r11, %r12 + addq %rax, %r12 + +// Digit 0 of [u] + + xorl %r13d, %r13d + movq U(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r12 + adcq %rdx, %r13 + movq V(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r12 + movq %r12, U(%rsp) + adcq %rdx, %r13 + +// Digit 1 of [u] + + xorl %r14d, %r14d + movq U+N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r13 + adcq %rdx, %r14 + movq V+N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r13 + movq %r13, U+N(%rsp) + adcq %rdx, %r14 + +// Digit 2 of [u] + + xorl %r15d, %r15d + movq U+2*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq V+2*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + movq %r14, U+2*N(%rsp) + adcq %rdx, %r15 + +// Digit 3 of [u] + + xorl %r14d, %r14d + movq U+3*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r14 + movq V+3*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r15 + movq %r15, U+3*N(%rsp) + adcq %rdx, %r14 + +// Digit 4 of [u] + + xorl %r15d, %r15d + movq U+4*N(%rsp), %rax + xorq %r9, %rax + mulq %r8 + addq %rax, %r14 + adcq %rdx, %r15 + movq V+4*N(%rsp), %rax + xorq %r11, %rax + mulq %r10 + addq %rax, %r14 + movq %r14, U+4*N(%rsp) + adcq %rdx, %r15 + +// Digits 5 and 6 of u (top is unsigned) + + movq U+5*N(%rsp), %rax + xorq %r9, %rax + andq %r8, %r9 + negq %r9 + mulq %r8 + addq %rax, %r15 + adcq %rdx, %r9 + movq V+5*N(%rsp), %rax + xorq %r11, %rax + movq %r11, %rdx + andq %r10, %rdx + subq %rdx, %r9 + mulq %r10 + addq %rax, %r15 + movq %r15, U+5*N(%rsp) + adcq %rdx, %r9 + movq %r9, U+6*N(%rsp) + +// Montgomery reduce u + + amontred(u) + +// Perform final strict reduction mod p_384 and copy to output + + movl $0xffffffff, %eax + movq %rax, %rbx + notq %rbx + xorl %ebp, %ebp + leaq -2(%rbp), %rcx + leaq -1(%rbp), %rdx + + movq U(%rsp), %r8 + subq %rax, %r8 + movq U+N(%rsp), %r9 + sbbq %rbx, %r9 + movq U+2*N(%rsp), %r10 + sbbq %rcx, %r10 + movq U+3*N(%rsp), %r11 + sbbq %rdx, %r11 + movq U+4*N(%rsp), %r12 + sbbq %rdx, %r12 + movq U+5*N(%rsp), %r13 + sbbq %rdx, %r13 + + cmovcq U(%rsp), %r8 + cmovcq U+N(%rsp), %r9 + cmovcq U+2*N(%rsp), %r10 + cmovcq U+3*N(%rsp), %r11 + cmovcq U+4*N(%rsp), %r12 + cmovcq U+5*N(%rsp), %r13 + + movq res, %rdi + movq %r8, (%rdi) + movq %r9, N(%rdi) + movq %r10, 2*N(%rdi) + movq %r11, 3*N(%rdi) + movq %r12, 4*N(%rdi) + movq %r13, 5*N(%rdi) + +// Restore stack and registers + + addq $NSPACE, %rsp + + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + +#if WINDOWS_ABI + popq %rsi + popq %rdi +#endif + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/x86_att/p384/p384_montjscalarmul.S b/third_party/s2n-bignum/x86_att/p384/p384_montjscalarmul.S new file mode 100644 index 0000000000..38bea41d87 --- /dev/null +++ b/third_party/s2n-bignum/x86_att/p384/p384_montjscalarmul.S @@ -0,0 +1,7393 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery-Jacobian form scalar multiplication for P-384 +// Input scalar[6], point[18]; output res[18] +// +// extern void p384_montjscalarmul +// (uint64_t res[static 18], +// uint64_t scalar[static 6], +// uint64_t point[static 18]); +// +// This function is a variant of its affine point version p384_scalarmul. +// Here, input and output points are assumed to be in Jacobian form with +// their coordinates in the Montgomery domain. Thus, if priming indicates +// Montgomery form, x' = (2^384 * x) mod p_384 etc., each point argument +// is a triple (x',y',z') representing the affine point (x/z^2,y/z^3) when +// z' is nonzero or the point at infinity (group identity) if z' = 0. +// +// Given scalar = n and point = P, assumed to be on the NIST elliptic +// curve P-384, returns a representation of n * P. If the result is the +// point at infinity (either because the input point was or because the +// scalar was a multiple of p_384) then the output is guaranteed to +// represent the point at infinity, i.e. to have its z coordinate zero. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjscalarmul) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjscalarmul) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 +#define JACSIZE (3*NUMSIZE) + +// Intermediate variables on the stack. +// The table is 16 entries, each of size JACSIZE = 3 * NUMSIZE +// Uppercase syntactic variants make x86_att version simpler to generate. + +#define SCALARB (0*NUMSIZE) +#define scalarb (0*NUMSIZE)(%rsp) +#define ACC (1*NUMSIZE) +#define acc (1*NUMSIZE)(%rsp) +#define TABENT (4*NUMSIZE) +#define tabent (4*NUMSIZE)(%rsp) + +#define TAB (7*NUMSIZE) +#define tab (7*NUMSIZE)(%rsp) + +#define res (55*NUMSIZE)(%rsp) + +#define NSPACE (56*NUMSIZE) + +// Avoid using .rep for the sake of the BoringSSL/AWS-LC delocator, +// which doesn't accept repetitions, assembler macros etc. + +#define selectblock_xz(I) \ + cmpq $I, %rdi ; \ + cmovzq TAB+JACSIZE*(I-1)(%rsp), %rax ; \ + cmovzq TAB+JACSIZE*(I-1)+8(%rsp), %rbx ; \ + cmovzq TAB+JACSIZE*(I-1)+16(%rsp), %rcx ; \ + cmovzq TAB+JACSIZE*(I-1)+24(%rsp), %rdx ; \ + cmovzq TAB+JACSIZE*(I-1)+32(%rsp), %r8 ; \ + cmovzq TAB+JACSIZE*(I-1)+40(%rsp), %r9 ; \ + cmovzq TAB+JACSIZE*(I-1)+96(%rsp), %r10 ; \ + cmovzq TAB+JACSIZE*(I-1)+104(%rsp), %r11 ; \ + cmovzq TAB+JACSIZE*(I-1)+112(%rsp), %r12 ; \ + cmovzq TAB+JACSIZE*(I-1)+120(%rsp), %r13 ; \ + cmovzq TAB+JACSIZE*(I-1)+128(%rsp), %r14 ; \ + cmovzq TAB+JACSIZE*(I-1)+136(%rsp), %r15 + +#define selectblock_y(I) \ + cmpq $I, %rdi ; \ + cmovzq TAB+JACSIZE*(I-1)+48(%rsp), %rax ; \ + cmovzq TAB+JACSIZE*(I-1)+56(%rsp), %rbx ; \ + cmovzq TAB+JACSIZE*(I-1)+64(%rsp), %rcx ; \ + cmovzq TAB+JACSIZE*(I-1)+72(%rsp), %rdx ; \ + cmovzq TAB+JACSIZE*(I-1)+80(%rsp), %r8 ; \ + cmovzq TAB+JACSIZE*(I-1)+88(%rsp), %r9 + +S2N_BN_SYMBOL(p384_montjscalarmul): + +// The Windows version literally calls the standard ABI version. +// This simplifies the proofs since subroutine offsets are fixed. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx + callq p384_montjscalarmul_standard + popq %rsi + popq %rdi + ret + +p384_montjscalarmul_standard: +#endif + +// Real start of the standard ABI code. + + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbp + pushq %rbx + + subq $NSPACE, %rsp + +// Preserve the "res" input argument; others get processed early. + + movq %rdi, res + +// Reduce the input scalar mod n_384, i.e. conditionally subtract n_384. +// Store it to "scalarb". + + movq (%rsi), %r8 + movq $0xecec196accc52973, %rax + subq %rax, %r8 + movq 8(%rsi), %r9 + movq $0x581a0db248b0a77a, %rax + sbbq %rax, %r9 + movq 16(%rsi), %r10 + movq $0xc7634d81f4372ddf, %rax + sbbq %rax, %r10 + movq 24(%rsi), %r11 + movq $0xffffffffffffffff, %rax + sbbq %rax, %r11 + movq 32(%rsi), %r12 + sbbq %rax, %r12 + movq 40(%rsi), %r13 + sbbq %rax, %r13 + + cmovcq (%rsi), %r8 + cmovcq 8(%rsi), %r9 + cmovcq 16(%rsi), %r10 + cmovcq 24(%rsi), %r11 + cmovcq 32(%rsi), %r12 + cmovcq 40(%rsi), %r13 + + movq %r8, SCALARB(%rsp) + movq %r9, SCALARB+8(%rsp) + movq %r10, SCALARB+16(%rsp) + movq %r11, SCALARB+24(%rsp) + movq %r12, SCALARB+32(%rsp) + movq %r13, SCALARB+40(%rsp) + +// Set the tab[0] table entry to the input point = 1 * P + + movq (%rdx), %rax + movq %rax, TAB(%rsp) + movq 8(%rdx), %rax + movq %rax, TAB+8(%rsp) + movq 16(%rdx), %rax + movq %rax, TAB+16(%rsp) + movq 24(%rdx), %rax + movq %rax, TAB+24(%rsp) + movq 32(%rdx), %rax + movq %rax, TAB+32(%rsp) + movq 40(%rdx), %rax + movq %rax, TAB+40(%rsp) + + movq 48(%rdx), %rax + movq %rax, TAB+48(%rsp) + movq 56(%rdx), %rax + movq %rax, TAB+56(%rsp) + movq 64(%rdx), %rax + movq %rax, TAB+64(%rsp) + movq 72(%rdx), %rax + movq %rax, TAB+72(%rsp) + movq 80(%rdx), %rax + movq %rax, TAB+80(%rsp) + movq 88(%rdx), %rax + movq %rax, TAB+88(%rsp) + + movq 96(%rdx), %rax + movq %rax, TAB+96(%rsp) + movq 104(%rdx), %rax + movq %rax, TAB+104(%rsp) + movq 112(%rdx), %rax + movq %rax, TAB+112(%rsp) + movq 120(%rdx), %rax + movq %rax, TAB+120(%rsp) + movq 128(%rdx), %rax + movq %rax, TAB+128(%rsp) + movq 136(%rdx), %rax + movq %rax, TAB+136(%rsp) + +// Compute and record tab[1] = 2 * p, ..., tab[15] = 16 * P + + leaq TAB+JACSIZE*1(%rsp), %rdi + leaq TAB(%rsp), %rsi + callq p384_montjscalarmul_p384_montjdouble + + leaq TAB+JACSIZE*2(%rsp), %rdi + leaq TAB+JACSIZE*1(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_p384_montjadd + + leaq TAB+JACSIZE*3(%rsp), %rdi + leaq TAB+JACSIZE*1(%rsp), %rsi + callq p384_montjscalarmul_p384_montjdouble + + leaq TAB+JACSIZE*4(%rsp), %rdi + leaq TAB+JACSIZE*3(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_p384_montjadd + + leaq TAB+JACSIZE*5(%rsp), %rdi + leaq TAB+JACSIZE*2(%rsp), %rsi + callq p384_montjscalarmul_p384_montjdouble + + leaq TAB+JACSIZE*6(%rsp), %rdi + leaq TAB+JACSIZE*5(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_p384_montjadd + + leaq TAB+JACSIZE*7(%rsp), %rdi + leaq TAB+JACSIZE*3(%rsp), %rsi + callq p384_montjscalarmul_p384_montjdouble + + leaq TAB+JACSIZE*8(%rsp), %rdi + leaq TAB+JACSIZE*7(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_p384_montjadd + + leaq TAB+JACSIZE*9(%rsp), %rdi + leaq TAB+JACSIZE*4(%rsp), %rsi + callq p384_montjscalarmul_p384_montjdouble + + leaq TAB+JACSIZE*10(%rsp), %rdi + leaq TAB+JACSIZE*9(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_p384_montjadd + + leaq TAB+JACSIZE*11(%rsp), %rdi + leaq TAB+JACSIZE*5(%rsp), %rsi + callq p384_montjscalarmul_p384_montjdouble + + leaq TAB+JACSIZE*12(%rsp), %rdi + leaq TAB+JACSIZE*11(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_p384_montjadd + + leaq TAB+JACSIZE*13(%rsp), %rdi + leaq TAB+JACSIZE*6(%rsp), %rsi + callq p384_montjscalarmul_p384_montjdouble + + leaq TAB+JACSIZE*14(%rsp), %rdi + leaq TAB+JACSIZE*13(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_p384_montjadd + + leaq TAB+JACSIZE*15(%rsp), %rdi + leaq TAB+JACSIZE*7(%rsp), %rsi + callq p384_montjscalarmul_p384_montjdouble + +// Add the recoding constant sum_i(16 * 32^i) to the scalar to allow signed +// digits. The digits of the constant, in lowest-to-highest order, are as +// follows; they are generated dynamically to use fewer large constant loads. +// +// 0x0842108421084210 +// 0x1084210842108421 +// 0x2108421084210842 +// 0x4210842108421084 +// 0x8421084210842108 +// 0x0842108421084210 + + movq $0x1084210842108421, %rax + movq %rax, %rcx + shrq $1, %rax + movq SCALARB(%rsp), %r8 + addq %rax, %r8 + movq SCALARB+8(%rsp), %r9 + adcq %rcx, %r9 + leaq (%rcx,%rcx), %rcx + movq SCALARB+16(%rsp), %r10 + adcq %rcx, %r10 + leaq (%rcx,%rcx), %rcx + movq SCALARB+24(%rsp), %r11 + adcq %rcx, %r11 + leaq (%rcx,%rcx), %rcx + movq SCALARB+32(%rsp), %r12 + adcq %rcx, %r12 + movq SCALARB+40(%rsp), %r13 + adcq %rax, %r13 + sbbq %rdi, %rdi + negq %rdi + +// Record the top bitfield in %rdi then shift the whole scalar left 4 bits +// to align the top of the next bitfield with the MSB (bits 379..383). + + shldq $4, %r13, %rdi + shldq $4, %r12, %r13 + shldq $4, %r11, %r12 + shldq $4, %r10, %r11 + shldq $4, %r9, %r10 + shldq $4, %r8, %r9 + shlq $4, %r8 + + movq %r8, SCALARB(%rsp) + movq %r9, SCALARB+8(%rsp) + movq %r10, SCALARB+16(%rsp) + movq %r11, SCALARB+24(%rsp) + movq %r12, SCALARB+32(%rsp) + movq %r13, SCALARB+40(%rsp) + +// Initialize the accumulator to the corresponding entry using constant-time +// lookup in the table. This top digit, uniquely, is not recoded so there is +// no sign adjustment to make. On the x86 integer side we don't have enough +// registers to hold all the fields; this could be better done with SIMD +// registers anyway. So we do x and z coordinates in one sweep, y in another +// (this is a rehearsal for below where we might need to negate the y). + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + selectblock_xz(1) + selectblock_xz(2) + selectblock_xz(3) + selectblock_xz(4) + selectblock_xz(5) + selectblock_xz(6) + selectblock_xz(7) + selectblock_xz(8) + selectblock_xz(9) + selectblock_xz(10) + selectblock_xz(11) + selectblock_xz(12) + selectblock_xz(13) + selectblock_xz(14) + selectblock_xz(15) + selectblock_xz(16) + + movq %rax, ACC(%rsp) + movq %rbx, ACC+8(%rsp) + movq %rcx, ACC+16(%rsp) + movq %rdx, ACC+24(%rsp) + movq %r8, ACC+32(%rsp) + movq %r9, ACC+40(%rsp) + movq %r10, ACC+96(%rsp) + movq %r11, ACC+104(%rsp) + movq %r12, ACC+112(%rsp) + movq %r13, ACC+120(%rsp) + movq %r14, ACC+128(%rsp) + movq %r15, ACC+136(%rsp) + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + + selectblock_y(1) + selectblock_y(2) + selectblock_y(3) + selectblock_y(4) + selectblock_y(5) + selectblock_y(6) + selectblock_y(7) + selectblock_y(8) + selectblock_y(9) + selectblock_y(10) + selectblock_y(11) + selectblock_y(12) + selectblock_y(13) + selectblock_y(14) + selectblock_y(15) + selectblock_y(16) + + movq %rax, ACC+48(%rsp) + movq %rbx, ACC+56(%rsp) + movq %rcx, ACC+64(%rsp) + movq %rdx, ACC+72(%rsp) + movq %r8, ACC+80(%rsp) + movq %r9, ACC+88(%rsp) + +// Main loop over size-5 bitfields: double 5 times then add signed digit +// At each stage we shift the scalar left by 5 bits so we can simply pick +// the top 5 bits as the bitfield, saving some fiddle over indexing. + + movl $380, %ebp + +p384_montjscalarmul_mainloop: + subq $5, %rbp + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_p384_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_p384_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_p384_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_p384_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_p384_montjdouble + +// Choose the bitfield and adjust it to sign and magnitude + + movq SCALARB(%rsp), %r8 + movq SCALARB+8(%rsp), %r9 + movq SCALARB+16(%rsp), %r10 + movq SCALARB+24(%rsp), %r11 + movq SCALARB+32(%rsp), %r12 + movq SCALARB+40(%rsp), %r13 + + movq %r13, %rdi + shrq $59, %rdi + shldq $5, %r12, %r13 + shldq $5, %r11, %r12 + shldq $5, %r10, %r11 + shldq $5, %r9, %r10 + shldq $5, %r8, %r9 + shlq $5, %r8 + + movq %r8, SCALARB(%rsp) + movq %r9, SCALARB+8(%rsp) + movq %r10, SCALARB+16(%rsp) + movq %r11, SCALARB+24(%rsp) + movq %r12, SCALARB+32(%rsp) + movq %r13, SCALARB+40(%rsp) + + subq $16, %rdi + sbbq %rsi, %rsi // %rsi = sign of digit (-1 = negative) + xorq %rsi, %rdi + subq %rsi, %rdi // %rdi = absolute value of digit + +// Conditionally select the table entry tab[i-1] = i * P in constant time +// Again, this is done in two sweeps, first doing x and z then y. + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + selectblock_xz(1) + selectblock_xz(2) + selectblock_xz(3) + selectblock_xz(4) + selectblock_xz(5) + selectblock_xz(6) + selectblock_xz(7) + selectblock_xz(8) + selectblock_xz(9) + selectblock_xz(10) + selectblock_xz(11) + selectblock_xz(12) + selectblock_xz(13) + selectblock_xz(14) + selectblock_xz(15) + selectblock_xz(16) + + movq %rax, TABENT(%rsp) + movq %rbx, TABENT+8(%rsp) + movq %rcx, TABENT+16(%rsp) + movq %rdx, TABENT+24(%rsp) + movq %r8, TABENT+32(%rsp) + movq %r9, TABENT+40(%rsp) + movq %r10, TABENT+96(%rsp) + movq %r11, TABENT+104(%rsp) + movq %r12, TABENT+112(%rsp) + movq %r13, TABENT+120(%rsp) + movq %r14, TABENT+128(%rsp) + movq %r15, TABENT+136(%rsp) + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + + selectblock_y(1) + selectblock_y(2) + selectblock_y(3) + selectblock_y(4) + selectblock_y(5) + selectblock_y(6) + selectblock_y(7) + selectblock_y(8) + selectblock_y(9) + selectblock_y(10) + selectblock_y(11) + selectblock_y(12) + selectblock_y(13) + selectblock_y(14) + selectblock_y(15) + selectblock_y(16) + +// Store it to "tabent" with the y coordinate optionally negated. +// This is done carefully to give coordinates < p_384 even in +// the degenerate case y = 0 (when z = 0 for points on the curve). +// The digits of the prime p_384 are generated dynamically from +// the zeroth via not/lea to reduce the number of constant loads. + + movq %rax, %r10 + orq %rbx, %r10 + movq %rcx, %r11 + orq %rdx, %r11 + movq %r8, %r12 + orq %r9, %r12 + orq %r11, %r10 + orq %r12, %r10 + cmovzq %r10, %rsi + + movl $0xffffffff, %r10d + movq %r10, %r11 + notq %r11 + leaq (%r10,%r11), %r13 + subq %rax, %r10 + leaq -1(%r13), %r12 + sbbq %rbx, %r11 + movq %r13, %r14 + sbbq %rcx, %r12 + sbbq %rdx, %r13 + movq %r14, %r15 + sbbq %r8, %r14 + sbbq %r9, %r15 + + testq %rsi, %rsi + cmovnzq %r10, %rax + cmovnzq %r11, %rbx + cmovnzq %r12, %rcx + cmovnzq %r13, %rdx + cmovnzq %r14, %r8 + cmovnzq %r15, %r9 + + movq %rax, TABENT+48(%rsp) + movq %rbx, TABENT+56(%rsp) + movq %rcx, TABENT+64(%rsp) + movq %rdx, TABENT+72(%rsp) + movq %r8, TABENT+80(%rsp) + movq %r9, TABENT+88(%rsp) + +// Add to the accumulator + + leaq TABENT(%rsp), %rdx + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_p384_montjadd + + testq %rbp, %rbp + jne p384_montjscalarmul_mainloop + +// That's the end of the main loop, and we just need to copy the +// result in "acc" to the output. + + movq res, %rdi + movq ACC(%rsp), %rax + movq %rax, (%rdi) + movq ACC+8(%rsp), %rax + movq %rax, 8(%rdi) + movq ACC+16(%rsp), %rax + movq %rax, 16(%rdi) + movq ACC+24(%rsp), %rax + movq %rax, 24(%rdi) + movq ACC+32(%rsp), %rax + movq %rax, 32(%rdi) + movq ACC+40(%rsp), %rax + movq %rax, 40(%rdi) + movq ACC+48(%rsp), %rax + movq %rax, 48(%rdi) + movq ACC+56(%rsp), %rax + movq %rax, 56(%rdi) + movq ACC+64(%rsp), %rax + movq %rax, 64(%rdi) + movq ACC+72(%rsp), %rax + movq %rax, 72(%rdi) + movq ACC+80(%rsp), %rax + movq %rax, 80(%rdi) + movq ACC+88(%rsp), %rax + movq %rax, 88(%rdi) + movq ACC+96(%rsp), %rax + movq %rax, 96(%rdi) + movq ACC+104(%rsp), %rax + movq %rax, 104(%rdi) + movq ACC+112(%rsp), %rax + movq %rax, 112(%rdi) + movq ACC+120(%rsp), %rax + movq %rax, 120(%rdi) + movq ACC+128(%rsp), %rax + movq %rax, 128(%rdi) + movq ACC+136(%rsp), %rax + movq %rax, 136(%rdi) + +// Restore stack and registers and return + + addq $NSPACE, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + popq %r15 + ret + +// Local copies of subroutines, complete clones at the moment + +p384_montjscalarmul_p384_montjadd: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x160, %rsp + movq %rsi, 0x150(%rsp) + movq %rdx, 0x158(%rsp) + movq 0x60(%rsi), %rdx + mulxq 0x68(%rsi), %r9, %r10 + mulxq 0x78(%rsi), %r11, %r12 + mulxq 0x88(%rsi), %r13, %r14 + movq 0x78(%rsi), %rdx + mulxq 0x80(%rsi), %r15, %rcx + xorl %ebp, %ebp + movq 0x70(%rsi), %rdx + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x68(%rsi), %rdx + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x88(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x80(%rsi), %rdx + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x70(%rsi), %rdx + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x88(%rsi), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x88(%rsi), %rdx + mulxq 0x80(%rsi), %rbx, %rbp + mulxq 0x78(%rsi), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0x60(%rsi), %rdx + mulxq 0x60(%rsi), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0x68(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x70(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x78(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x80(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x88(%rsi), %rdx + mulxq %rdx, %rax, %rsi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rsi + adoxq %rax, %rsi + movq %rbx, (%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq (%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rsi + movl $0x0, %r8d + movabsq $0xffffffff00000001, %rax + movl $0xffffffff, %r9d + movl $0x1, %r10d + cmovaeq %r8, %rax + cmovaeq %r8, %r9 + cmovaeq %r8, %r10 + addq %rax, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r8, %rbx + adcq %r8, %rbp + adcq %r8, %rsi + movq %r14, (%rsp) + movq %r15, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rbx, 0x18(%rsp) + movq %rbp, 0x20(%rsp) + movq %rsi, 0x28(%rsp) + movq 0x158(%rsp), %rsi + movq 0x60(%rsi), %rdx + mulxq 0x68(%rsi), %r9, %r10 + mulxq 0x78(%rsi), %r11, %r12 + mulxq 0x88(%rsi), %r13, %r14 + movq 0x78(%rsi), %rdx + mulxq 0x80(%rsi), %r15, %rcx + xorl %ebp, %ebp + movq 0x70(%rsi), %rdx + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x68(%rsi), %rdx + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x88(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x80(%rsi), %rdx + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x70(%rsi), %rdx + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x88(%rsi), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x88(%rsi), %rdx + mulxq 0x80(%rsi), %rbx, %rbp + mulxq 0x78(%rsi), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0x60(%rsi), %rdx + mulxq 0x60(%rsi), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0x68(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x70(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x78(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x80(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x88(%rsi), %rdx + mulxq %rdx, %rax, %rsi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rsi + adoxq %rax, %rsi + movq %rbx, 0xf0(%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq 0xf0(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rsi + movl $0x0, %r8d + movabsq $0xffffffff00000001, %rax + movl $0xffffffff, %r9d + movl $0x1, %r10d + cmovaeq %r8, %rax + cmovaeq %r8, %r9 + cmovaeq %r8, %r10 + addq %rax, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r8, %rbx + adcq %r8, %rbp + adcq %r8, %rsi + movq %r14, 0xf0(%rsp) + movq %r15, 0xf8(%rsp) + movq %rcx, 0x100(%rsp) + movq %rbx, 0x108(%rsp) + movq %rbp, 0x110(%rsp) + movq %rsi, 0x118(%rsp) + movq 0x150(%rsp), %rsi + movq 0x158(%rsp), %rcx + movq 0x30(%rsi), %rdx + xorl %r15d, %r15d + mulxq 0x60(%rcx), %r8, %r9 + mulxq 0x68(%rcx), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x70(%rcx), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x78(%rcx), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x80(%rcx), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x88(%rcx), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x38(%rsi), %rdx + xorl %r8d, %r8d + mulxq 0x60(%rcx), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x68(%rcx), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x70(%rcx), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x78(%rcx), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x80(%rcx), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x88(%rcx), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x40(%rsi), %rdx + xorl %r9d, %r9d + mulxq 0x60(%rcx), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rcx), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x70(%rcx), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x78(%rcx), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x80(%rcx), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x88(%rcx), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x48(%rsi), %rdx + xorl %r10d, %r10d + mulxq 0x60(%rcx), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x68(%rcx), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x70(%rcx), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x78(%rcx), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x80(%rcx), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x88(%rcx), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x50(%rsi), %rdx + xorl %r11d, %r11d + mulxq 0x60(%rcx), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x68(%rcx), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x70(%rcx), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x78(%rcx), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x80(%rcx), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x88(%rcx), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x58(%rsi), %rdx + xorl %r12d, %r12d + mulxq 0x60(%rcx), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x68(%rcx), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x70(%rcx), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x78(%rcx), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x80(%rcx), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x88(%rcx), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x120(%rsp) + movq %r15, 0x128(%rsp) + movq %r8, 0x130(%rsp) + movq %r9, 0x138(%rsp) + movq %r10, 0x140(%rsp) + movq %r11, 0x148(%rsp) + movq 0x150(%rsp), %rsi + movq 0x158(%rsp), %rcx + movq 0x30(%rcx), %rdx + xorl %r15d, %r15d + mulxq 0x60(%rsi), %r8, %r9 + mulxq 0x68(%rsi), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x70(%rsi), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x78(%rsi), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x80(%rsi), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x88(%rsi), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x38(%rcx), %rdx + xorl %r8d, %r8d + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x68(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x70(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x88(%rsi), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x40(%rcx), %rdx + xorl %r9d, %r9d + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x70(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x88(%rsi), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x48(%rcx), %rdx + xorl %r10d, %r10d + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x68(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x70(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x88(%rsi), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x50(%rcx), %rdx + xorl %r11d, %r11d + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x68(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x70(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x88(%rsi), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x58(%rcx), %rdx + xorl %r12d, %r12d + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x68(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x70(%rsi), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x88(%rsi), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq %r8, 0x40(%rsp) + movq %r9, 0x48(%rsp) + movq %r10, 0x50(%rsp) + movq %r11, 0x58(%rsp) + movq 0x158(%rsp), %rcx + movq (%rcx), %rdx + xorl %r15d, %r15d + mulxq (%rsp), %r8, %r9 + mulxq 0x8(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x10(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x18(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x20(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x28(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x8(%rcx), %rdx + xorl %r8d, %r8d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x10(%rcx), %rdx + xorl %r9d, %r9d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x18(%rcx), %rdx + xorl %r10d, %r10d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x20(%rcx), %rdx + xorl %r11d, %r11d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x28(%rcx), %rdx + xorl %r12d, %r12d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x60(%rsp) + movq %r15, 0x68(%rsp) + movq %r8, 0x70(%rsp) + movq %r9, 0x78(%rsp) + movq %r10, 0x80(%rsp) + movq %r11, 0x88(%rsp) + movq 0x150(%rsp), %rsi + movq (%rsi), %rdx + xorl %r15d, %r15d + mulxq 0xf0(%rsp), %r8, %r9 + mulxq 0xf8(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x100(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x108(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x110(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x118(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x8(%rsi), %rdx + xorl %r8d, %r8d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x10(%rsi), %rdx + xorl %r9d, %r9d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x18(%rsi), %rdx + xorl %r10d, %r10d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x20(%rsi), %rdx + xorl %r11d, %r11d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x28(%rsi), %rdx + xorl %r12d, %r12d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %r8, 0xd0(%rsp) + movq %r9, 0xd8(%rsp) + movq %r10, 0xe0(%rsp) + movq %r11, 0xe8(%rsp) + movq 0x30(%rsp), %rdx + xorl %r15d, %r15d + mulxq (%rsp), %r8, %r9 + mulxq 0x8(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x10(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x18(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x20(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x28(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x38(%rsp), %rdx + xorl %r8d, %r8d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x40(%rsp), %rdx + xorl %r9d, %r9d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x48(%rsp), %rdx + xorl %r10d, %r10d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x50(%rsp), %rdx + xorl %r11d, %r11d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x58(%rsp), %rdx + xorl %r12d, %r12d + mulxq (%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x10(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x18(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x20(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x28(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq %r8, 0x40(%rsp) + movq %r9, 0x48(%rsp) + movq %r10, 0x50(%rsp) + movq %r11, 0x58(%rsp) + movq 0x120(%rsp), %rdx + xorl %r15d, %r15d + mulxq 0xf0(%rsp), %r8, %r9 + mulxq 0xf8(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x100(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x108(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x110(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x118(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x128(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x130(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x138(%rsp), %rdx + xorl %r10d, %r10d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x140(%rsp), %rdx + xorl %r11d, %r11d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x148(%rsp), %rdx + xorl %r12d, %r12d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x120(%rsp) + movq %r15, 0x128(%rsp) + movq %r8, 0x130(%rsp) + movq %r9, 0x138(%rsp) + movq %r10, 0x140(%rsp) + movq %r11, 0x148(%rsp) + movq 0x60(%rsp), %rax + subq 0xc0(%rsp), %rax + movq 0x68(%rsp), %rdx + sbbq 0xc8(%rsp), %rdx + movq 0x70(%rsp), %r8 + sbbq 0xd0(%rsp), %r8 + movq 0x78(%rsp), %r9 + sbbq 0xd8(%rsp), %r9 + movq 0x80(%rsp), %r10 + sbbq 0xe0(%rsp), %r10 + movq 0x88(%rsp), %r11 + sbbq 0xe8(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0xf0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xf8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x100(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x108(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x110(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x118(%rsp) + movq 0x30(%rsp), %rax + subq 0x120(%rsp), %rax + movq 0x38(%rsp), %rdx + sbbq 0x128(%rsp), %rdx + movq 0x40(%rsp), %r8 + sbbq 0x130(%rsp), %r8 + movq 0x48(%rsp), %r9 + sbbq 0x138(%rsp), %r9 + movq 0x50(%rsp), %r10 + sbbq 0x140(%rsp), %r10 + movq 0x58(%rsp), %r11 + sbbq 0x148(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0x30(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0x38(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x40(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x48(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x50(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x58(%rsp) + movq 0xf0(%rsp), %rdx + mulxq 0xf8(%rsp), %r9, %r10 + mulxq 0x108(%rsp), %r11, %r12 + mulxq 0x118(%rsp), %r13, %r14 + movq 0x108(%rsp), %rdx + mulxq 0x110(%rsp), %r15, %rcx + xorl %ebp, %ebp + movq 0x100(%rsp), %rdx + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0xf8(%rsp), %rdx + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x118(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x110(%rsp), %rdx + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x100(%rsp), %rdx + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x118(%rsp), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x118(%rsp), %rdx + mulxq 0x110(%rsp), %rbx, %rbp + mulxq 0x108(%rsp), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0xf0(%rsp), %rdx + mulxq 0xf0(%rsp), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0xf8(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x100(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x108(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x110(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x118(%rsp), %rdx + mulxq %rdx, %rax, %rsi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rsi + adoxq %rax, %rsi + movq %rbx, 0x90(%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq 0x90(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rsi + movl $0x0, %r8d + movabsq $0xffffffff00000001, %rax + movl $0xffffffff, %r9d + movl $0x1, %r10d + cmovaeq %r8, %rax + cmovaeq %r8, %r9 + cmovaeq %r8, %r10 + addq %rax, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r8, %rbx + adcq %r8, %rbp + adcq %r8, %rsi + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq %rcx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rbp, 0xb0(%rsp) + movq %rsi, 0xb8(%rsp) + movq 0x30(%rsp), %rdx + mulxq 0x38(%rsp), %r9, %r10 + mulxq 0x48(%rsp), %r11, %r12 + mulxq 0x58(%rsp), %r13, %r14 + movq 0x48(%rsp), %rdx + mulxq 0x50(%rsp), %r15, %rcx + xorl %ebp, %ebp + movq 0x40(%rsp), %rdx + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x38(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x38(%rsp), %rdx + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x58(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x50(%rsp), %rdx + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x40(%rsp), %rdx + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x58(%rsp), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x58(%rsp), %rdx + mulxq 0x50(%rsp), %rbx, %rbp + mulxq 0x48(%rsp), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0x30(%rsp), %rdx + mulxq 0x30(%rsp), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0x38(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x40(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x48(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x50(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x58(%rsp), %rdx + mulxq %rdx, %rax, %rsi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rsi + adoxq %rax, %rsi + movq %rbx, (%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq (%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rsi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rsi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rsi + movq %r14, (%rsp) + movq %r15, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rbx, 0x18(%rsp) + movq %rbp, 0x20(%rsp) + movq %rsi, 0x28(%rsp) + movq 0xc0(%rsp), %rdx + xorl %r15d, %r15d + mulxq 0x90(%rsp), %r8, %r9 + mulxq 0x98(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0xa0(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0xa8(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0xb0(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0xb8(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0xc8(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0xd0(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0xd8(%rsp), %rdx + xorl %r10d, %r10d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0xe0(%rsp), %rdx + xorl %r11d, %r11d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0xe8(%rsp), %rdx + xorl %r12d, %r12d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %r8, 0xd0(%rsp) + movq %r9, 0xd8(%rsp) + movq %r10, 0xe0(%rsp) + movq %r11, 0xe8(%rsp) + movq 0x60(%rsp), %rdx + xorl %r15d, %r15d + mulxq 0x90(%rsp), %r8, %r9 + mulxq 0x98(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0xa0(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0xa8(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0xb0(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0xb8(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x68(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x70(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x78(%rsp), %rdx + xorl %r10d, %r10d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x80(%rsp), %rdx + xorl %r11d, %r11d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x88(%rsp), %rdx + xorl %r12d, %r12d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x60(%rsp) + movq %r15, 0x68(%rsp) + movq %r8, 0x70(%rsp) + movq %r9, 0x78(%rsp) + movq %r10, 0x80(%rsp) + movq %r11, 0x88(%rsp) + movq (%rsp), %rax + subq 0xc0(%rsp), %rax + movq 0x8(%rsp), %rdx + sbbq 0xc8(%rsp), %rdx + movq 0x10(%rsp), %r8 + sbbq 0xd0(%rsp), %r8 + movq 0x18(%rsp), %r9 + sbbq 0xd8(%rsp), %r9 + movq 0x20(%rsp), %r10 + sbbq 0xe0(%rsp), %r10 + movq 0x28(%rsp), %r11 + sbbq 0xe8(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, (%rsp) + sbbq %rcx, %rdx + movq %rdx, 0x8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x10(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x18(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x20(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x28(%rsp) + movq 0x60(%rsp), %rax + subq 0xc0(%rsp), %rax + movq 0x68(%rsp), %rdx + sbbq 0xc8(%rsp), %rdx + movq 0x70(%rsp), %r8 + sbbq 0xd0(%rsp), %r8 + movq 0x78(%rsp), %r9 + sbbq 0xd8(%rsp), %r9 + movq 0x80(%rsp), %r10 + sbbq 0xe0(%rsp), %r10 + movq 0x88(%rsp), %r11 + sbbq 0xe8(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0x90(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0x98(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0xa0(%rsp) + sbbq $0x0, %r9 + movq %r9, 0xa8(%rsp) + sbbq $0x0, %r10 + movq %r10, 0xb0(%rsp) + sbbq $0x0, %r11 + movq %r11, 0xb8(%rsp) + movq 0x150(%rsp), %rsi + movq 0x60(%rsi), %rdx + xorl %r15d, %r15d + mulxq 0xf0(%rsp), %r8, %r9 + mulxq 0xf8(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x100(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x108(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x110(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x118(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x68(%rsi), %rdx + xorl %r8d, %r8d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x70(%rsi), %rdx + xorl %r9d, %r9d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x78(%rsi), %rdx + xorl %r10d, %r10d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x80(%rsi), %rdx + xorl %r11d, %r11d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x88(%rsi), %rdx + xorl %r12d, %r12d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xf0(%rsp) + movq %r15, 0xf8(%rsp) + movq %r8, 0x100(%rsp) + movq %r9, 0x108(%rsp) + movq %r10, 0x110(%rsp) + movq %r11, 0x118(%rsp) + movq (%rsp), %rax + subq 0x60(%rsp), %rax + movq 0x8(%rsp), %rdx + sbbq 0x68(%rsp), %rdx + movq 0x10(%rsp), %r8 + sbbq 0x70(%rsp), %r8 + movq 0x18(%rsp), %r9 + sbbq 0x78(%rsp), %r9 + movq 0x20(%rsp), %r10 + sbbq 0x80(%rsp), %r10 + movq 0x28(%rsp), %r11 + sbbq 0x88(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, (%rsp) + sbbq %rcx, %rdx + movq %rdx, 0x8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x10(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x18(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x20(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x28(%rsp) + movq 0xc0(%rsp), %rax + subq (%rsp), %rax + movq 0xc8(%rsp), %rdx + sbbq 0x8(%rsp), %rdx + movq 0xd0(%rsp), %r8 + sbbq 0x10(%rsp), %r8 + movq 0xd8(%rsp), %r9 + sbbq 0x18(%rsp), %r9 + movq 0xe0(%rsp), %r10 + sbbq 0x20(%rsp), %r10 + movq 0xe8(%rsp), %r11 + sbbq 0x28(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0xc0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xc8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0xd0(%rsp) + sbbq $0x0, %r9 + movq %r9, 0xd8(%rsp) + sbbq $0x0, %r10 + movq %r10, 0xe0(%rsp) + sbbq $0x0, %r11 + movq %r11, 0xe8(%rsp) + movq 0x120(%rsp), %rdx + xorl %r15d, %r15d + mulxq 0x90(%rsp), %r8, %r9 + mulxq 0x98(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0xa0(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0xa8(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0xb0(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0xb8(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x128(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x130(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x138(%rsp), %rdx + xorl %r10d, %r10d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x140(%rsp), %rdx + xorl %r11d, %r11d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x148(%rsp), %rdx + xorl %r12d, %r12d + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0xb8(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq %r8, 0xa0(%rsp) + movq %r9, 0xa8(%rsp) + movq %r10, 0xb0(%rsp) + movq %r11, 0xb8(%rsp) + movq 0x158(%rsp), %rcx + movq 0x60(%rcx), %rdx + xorl %r15d, %r15d + mulxq 0xf0(%rsp), %r8, %r9 + mulxq 0xf8(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x100(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x108(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x110(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x118(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x68(%rcx), %rdx + xorl %r8d, %r8d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x70(%rcx), %rdx + xorl %r9d, %r9d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x78(%rcx), %rdx + xorl %r10d, %r10d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x80(%rcx), %rdx + xorl %r11d, %r11d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x88(%rcx), %rdx + xorl %r12d, %r12d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xf0(%rsp) + movq %r15, 0xf8(%rsp) + movq %r8, 0x100(%rsp) + movq %r9, 0x108(%rsp) + movq %r10, 0x110(%rsp) + movq %r11, 0x118(%rsp) + movq 0xc0(%rsp), %rdx + xorl %r15d, %r15d + mulxq 0x30(%rsp), %r8, %r9 + mulxq 0x38(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x40(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x48(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x50(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x58(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0xc8(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x38(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x58(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0xd0(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x38(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x58(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0xd8(%rsp), %rdx + xorl %r10d, %r10d + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x38(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x58(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0xe0(%rsp), %rdx + xorl %r11d, %r11d + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x38(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x58(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0xe8(%rsp), %rdx + xorl %r12d, %r12d + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x38(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x40(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x58(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %r8, 0xd0(%rsp) + movq %r9, 0xd8(%rsp) + movq %r10, 0xe0(%rsp) + movq %r11, 0xe8(%rsp) + movq 0xc0(%rsp), %rax + subq 0x90(%rsp), %rax + movq 0xc8(%rsp), %rdx + sbbq 0x98(%rsp), %rdx + movq 0xd0(%rsp), %r8 + sbbq 0xa0(%rsp), %r8 + movq 0xd8(%rsp), %r9 + sbbq 0xa8(%rsp), %r9 + movq 0xe0(%rsp), %r10 + sbbq 0xb0(%rsp), %r10 + movq 0xe8(%rsp), %r11 + sbbq 0xb8(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0xc0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xc8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0xd0(%rsp) + sbbq $0x0, %r9 + movq %r9, 0xd8(%rsp) + sbbq $0x0, %r10 + movq %r10, 0xe0(%rsp) + sbbq $0x0, %r11 + movq %r11, 0xe8(%rsp) + movq 0x158(%rsp), %rcx + movq 0x60(%rcx), %r8 + movq 0x68(%rcx), %r9 + movq 0x70(%rcx), %r10 + movq 0x78(%rcx), %r11 + movq 0x80(%rcx), %rbx + movq 0x88(%rcx), %rbp + movq %r8, %rax + movq %r9, %rdx + orq %r10, %rax + orq %r11, %rdx + orq %rbx, %rax + orq %rbp, %rdx + orq %rdx, %rax + negq %rax + sbbq %rax, %rax + movq 0x150(%rsp), %rsi + movq 0x60(%rsi), %r12 + movq 0x68(%rsi), %r13 + movq 0x70(%rsi), %r14 + movq 0x78(%rsi), %r15 + movq 0x80(%rsi), %rdx + movq 0x88(%rsi), %rcx + cmoveq %r12, %r8 + cmoveq %r13, %r9 + cmoveq %r14, %r10 + cmoveq %r15, %r11 + cmoveq %rdx, %rbx + cmoveq %rcx, %rbp + orq %r13, %r12 + orq %r15, %r14 + orq %rcx, %rdx + orq %r14, %r12 + orq %r12, %rdx + negq %rdx + sbbq %rdx, %rdx + cmpq %rdx, %rax + cmoveq 0xf0(%rsp), %r8 + cmoveq 0xf8(%rsp), %r9 + cmoveq 0x100(%rsp), %r10 + cmoveq 0x108(%rsp), %r11 + cmoveq 0x110(%rsp), %rbx + cmoveq 0x118(%rsp), %rbp + movq %r8, 0xf0(%rsp) + movq %r9, 0xf8(%rsp) + movq %r10, 0x100(%rsp) + movq %r11, 0x108(%rsp) + movq %rbx, 0x110(%rsp) + movq %rbp, 0x118(%rsp) + movq 0x158(%rsp), %rcx + movq 0x150(%rsp), %rsi + movq (%rsp), %r8 + cmovbq (%rsi), %r8 + cmova (%rcx), %r8 + movq 0x8(%rsp), %r9 + cmovbq 0x8(%rsi), %r9 + cmova 0x8(%rcx), %r9 + movq 0x10(%rsp), %r10 + cmovbq 0x10(%rsi), %r10 + cmova 0x10(%rcx), %r10 + movq 0x18(%rsp), %r11 + cmovbq 0x18(%rsi), %r11 + cmova 0x18(%rcx), %r11 + movq 0x20(%rsp), %rbx + cmovbq 0x20(%rsi), %rbx + cmova 0x20(%rcx), %rbx + movq 0x28(%rsp), %rbp + cmovbq 0x28(%rsi), %rbp + cmova 0x28(%rcx), %rbp + movq 0xc0(%rsp), %r12 + cmovbq 0x30(%rsi), %r12 + cmova 0x30(%rcx), %r12 + movq 0xc8(%rsp), %r13 + cmovbq 0x38(%rsi), %r13 + cmova 0x38(%rcx), %r13 + movq 0xd0(%rsp), %r14 + cmovbq 0x40(%rsi), %r14 + cmova 0x40(%rcx), %r14 + movq 0xd8(%rsp), %r15 + cmovbq 0x48(%rsi), %r15 + cmova 0x48(%rcx), %r15 + movq 0xe0(%rsp), %rdx + cmovbq 0x50(%rsi), %rdx + cmova 0x50(%rcx), %rdx + movq 0xe8(%rsp), %rax + cmovbq 0x58(%rsi), %rax + cmova 0x58(%rcx), %rax + movq %r8, (%rdi) + movq %r9, 0x8(%rdi) + movq %r10, 0x10(%rdi) + movq %r11, 0x18(%rdi) + movq %rbx, 0x20(%rdi) + movq %rbp, 0x28(%rdi) + movq 0xf0(%rsp), %r8 + movq 0xf8(%rsp), %r9 + movq 0x100(%rsp), %r10 + movq 0x108(%rsp), %r11 + movq 0x110(%rsp), %rbx + movq 0x118(%rsp), %rbp + movq %r12, 0x30(%rdi) + movq %r13, 0x38(%rdi) + movq %r14, 0x40(%rdi) + movq %r15, 0x48(%rdi) + movq %rdx, 0x50(%rdi) + movq %rax, 0x58(%rdi) + movq %r8, 0x60(%rdi) + movq %r9, 0x68(%rdi) + movq %r10, 0x70(%rdi) + movq %r11, 0x78(%rdi) + movq %rbx, 0x80(%rdi) + movq %rbp, 0x88(%rdi) + addq $0x160, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +p384_montjscalarmul_p384_montjdouble: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x158, %rsp + movq %rdi, 0x150(%rsp) + movq 0x60(%rsi), %rdx + mulxq 0x68(%rsi), %r9, %r10 + mulxq 0x78(%rsi), %r11, %r12 + mulxq 0x88(%rsi), %r13, %r14 + movq 0x78(%rsi), %rdx + mulxq 0x80(%rsi), %r15, %rcx + xorl %ebp, %ebp + movq 0x70(%rsi), %rdx + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x68(%rsi), %rdx + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x88(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x80(%rsi), %rdx + mulxq 0x60(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x70(%rsi), %rdx + mulxq 0x78(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x80(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x88(%rsi), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x88(%rsi), %rdx + mulxq 0x80(%rsi), %rbx, %rbp + mulxq 0x78(%rsi), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0x60(%rsi), %rdx + mulxq 0x60(%rsi), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0x68(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x70(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x78(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x80(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x88(%rsi), %rdx + mulxq %rdx, %rax, %rdi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rdi + adoxq %rax, %rdi + movq %rbx, (%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq (%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, (%rsp) + movq %r15, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rbx, 0x18(%rsp) + movq %rbp, 0x20(%rsp) + movq %rdi, 0x28(%rsp) + movq 0x30(%rsi), %rdx + mulxq 0x38(%rsi), %r9, %r10 + mulxq 0x48(%rsi), %r11, %r12 + mulxq 0x58(%rsi), %r13, %r14 + movq 0x48(%rsi), %rdx + mulxq 0x50(%rsi), %r15, %rcx + xorl %ebp, %ebp + movq 0x40(%rsi), %rdx + mulxq 0x30(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x38(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x38(%rsi), %rdx + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x50(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x58(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x50(%rsi), %rdx + mulxq 0x30(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x40(%rsi), %rdx + mulxq 0x48(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x50(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x58(%rsi), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x58(%rsi), %rdx + mulxq 0x50(%rsi), %rbx, %rbp + mulxq 0x48(%rsi), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0x30(%rsi), %rdx + mulxq 0x30(%rsi), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0x38(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x40(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x48(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x50(%rsi), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x58(%rsi), %rdx + mulxq %rdx, %rax, %rdi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rdi + adoxq %rax, %rdi + movq %rbx, 0x30(%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq 0x30(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq %rcx, 0x40(%rsp) + movq %rbx, 0x48(%rsp) + movq %rbp, 0x50(%rsp) + movq %rdi, 0x58(%rsp) + movq (%rsi), %rax + addq (%rsp), %rax + movq 0x8(%rsi), %rcx + adcq 0x8(%rsp), %rcx + movq 0x10(%rsi), %r8 + adcq 0x10(%rsp), %r8 + movq 0x18(%rsi), %r9 + adcq 0x18(%rsp), %r9 + movq 0x20(%rsi), %r10 + adcq 0x20(%rsp), %r10 + movq 0x28(%rsi), %r11 + adcq 0x28(%rsp), %r11 + sbbq %rdx, %rdx + movl $0x1, %ebx + andq %rdx, %rbx + movl $0xffffffff, %ebp + andq %rbp, %rdx + xorq %rbp, %rbp + subq %rdx, %rbp + addq %rbp, %rax + movq %rax, 0xf0(%rsp) + adcq %rdx, %rcx + movq %rcx, 0xf8(%rsp) + adcq %rbx, %r8 + movq %r8, 0x100(%rsp) + adcq $0x0, %r9 + movq %r9, 0x108(%rsp) + adcq $0x0, %r10 + movq %r10, 0x110(%rsp) + adcq $0x0, %r11 + movq %r11, 0x118(%rsp) + movq (%rsi), %rax + subq (%rsp), %rax + movq 0x8(%rsi), %rdx + sbbq 0x8(%rsp), %rdx + movq 0x10(%rsi), %r8 + sbbq 0x10(%rsp), %r8 + movq 0x18(%rsi), %r9 + sbbq 0x18(%rsp), %r9 + movq 0x20(%rsi), %r10 + sbbq 0x20(%rsp), %r10 + movq 0x28(%rsi), %r11 + sbbq 0x28(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %ebx + andq %rbx, %rcx + xorq %rbx, %rbx + subq %rcx, %rbx + subq %rbx, %rax + movq %rax, 0xc0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xc8(%rsp) + sbbq %rax, %rax + andq %rbx, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0xd0(%rsp) + sbbq $0x0, %r9 + movq %r9, 0xd8(%rsp) + sbbq $0x0, %r10 + movq %r10, 0xe0(%rsp) + sbbq $0x0, %r11 + movq %r11, 0xe8(%rsp) + movq 0xc0(%rsp), %rdx + xorl %r15d, %r15d + mulxq 0xf0(%rsp), %r8, %r9 + mulxq 0xf8(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x100(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x108(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x110(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x118(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0xc8(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0xd0(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0xd8(%rsp), %rdx + xorl %r10d, %r10d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0xe0(%rsp), %rdx + xorl %r11d, %r11d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0xe8(%rsp), %rdx + xorl %r12d, %r12d + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x118(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x60(%rsp) + movq %r15, 0x68(%rsp) + movq %r8, 0x70(%rsp) + movq %r9, 0x78(%rsp) + movq %r10, 0x80(%rsp) + movq %r11, 0x88(%rsp) + movq 0x30(%rsi), %rax + addq 0x60(%rsi), %rax + movq 0x38(%rsi), %rcx + adcq 0x68(%rsi), %rcx + movq 0x40(%rsi), %r8 + adcq 0x70(%rsi), %r8 + movq 0x48(%rsi), %r9 + adcq 0x78(%rsi), %r9 + movq 0x50(%rsi), %r10 + adcq 0x80(%rsi), %r10 + movq 0x58(%rsi), %r11 + adcq 0x88(%rsi), %r11 + movl $0x0, %edx + adcq %rdx, %rdx + movabsq $0xffffffff00000001, %rbp + addq %rbp, %rax + movl $0xffffffff, %ebp + adcq %rbp, %rcx + adcq $0x1, %r8 + adcq $0x0, %r9 + adcq $0x0, %r10 + adcq $0x0, %r11 + adcq $0xffffffffffffffff, %rdx + movl $0x1, %ebx + andq %rdx, %rbx + andq %rbp, %rdx + xorq %rbp, %rbp + subq %rdx, %rbp + subq %rbp, %rax + movq %rax, 0xf0(%rsp) + sbbq %rdx, %rcx + movq %rcx, 0xf8(%rsp) + sbbq %rbx, %r8 + movq %r8, 0x100(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x108(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x110(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x118(%rsp) + movq 0x60(%rsp), %rdx + mulxq 0x68(%rsp), %r9, %r10 + mulxq 0x78(%rsp), %r11, %r12 + mulxq 0x88(%rsp), %r13, %r14 + movq 0x78(%rsp), %rdx + mulxq 0x80(%rsp), %r15, %rcx + xorl %ebp, %ebp + movq 0x70(%rsp), %rdx + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x68(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x68(%rsp), %rdx + mulxq 0x78(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x80(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x88(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x80(%rsp), %rdx + mulxq 0x60(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x70(%rsp), %rdx + mulxq 0x78(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x80(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x88(%rsp), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x88(%rsp), %rdx + mulxq 0x80(%rsp), %rbx, %rbp + mulxq 0x78(%rsp), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0x60(%rsp), %rdx + mulxq 0x60(%rsp), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0x68(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x70(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x78(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x80(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x88(%rsp), %rdx + mulxq %rdx, %rax, %rdi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rdi + adoxq %rax, %rdi + movq %rbx, 0x120(%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq 0x120(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, 0x120(%rsp) + movq %r15, 0x128(%rsp) + movq %rcx, 0x130(%rsp) + movq %rbx, 0x138(%rsp) + movq %rbp, 0x140(%rsp) + movq %rdi, 0x148(%rsp) + movq 0x30(%rsp), %rdx + xorl %r15d, %r15d + mulxq (%rsi), %r8, %r9 + mulxq 0x8(%rsi), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x10(%rsi), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x18(%rsi), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x20(%rsi), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x28(%rsi), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x38(%rsp), %rdx + xorl %r8d, %r8d + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x18(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x20(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x28(%rsi), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x40(%rsp), %rdx + xorl %r9d, %r9d + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x18(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x20(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x28(%rsi), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x48(%rsp), %rdx + xorl %r10d, %r10d + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x18(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x20(%rsi), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x28(%rsi), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x50(%rsp), %rdx + xorl %r11d, %r11d + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x18(%rsi), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x20(%rsi), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x28(%rsi), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x58(%rsp), %rdx + xorl %r12d, %r12d + mulxq (%rsi), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x8(%rsi), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x10(%rsi), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x18(%rsi), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x20(%rsi), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x28(%rsi), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq %r8, 0xa0(%rsp) + movq %r9, 0xa8(%rsp) + movq %r10, 0xb0(%rsp) + movq %r11, 0xb8(%rsp) + movq 0xf0(%rsp), %rdx + mulxq 0xf8(%rsp), %r9, %r10 + mulxq 0x108(%rsp), %r11, %r12 + mulxq 0x118(%rsp), %r13, %r14 + movq 0x108(%rsp), %rdx + mulxq 0x110(%rsp), %r15, %rcx + xorl %ebp, %ebp + movq 0x100(%rsp), %rdx + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0xf8(%rsp), %rdx + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x118(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x110(%rsp), %rdx + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x100(%rsp), %rdx + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x118(%rsp), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x118(%rsp), %rdx + mulxq 0x110(%rsp), %rbx, %rbp + mulxq 0x108(%rsp), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0xf0(%rsp), %rdx + mulxq 0xf0(%rsp), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0xf8(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x100(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x108(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x110(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x118(%rsp), %rdx + mulxq %rdx, %rax, %rdi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rdi + adoxq %rax, %rdi + movq %rbx, 0xc0(%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq 0xc0(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %rcx, 0xd0(%rsp) + movq %rbx, 0xd8(%rsp) + movq %rbp, 0xe0(%rsp) + movq %rdi, 0xe8(%rsp) + movabsq $0xffffffff, %r8 + subq 0x120(%rsp), %r8 + movabsq $0xffffffff00000000, %r9 + sbbq 0x128(%rsp), %r9 + movq $0xfffffffffffffffe, %r10 + sbbq 0x130(%rsp), %r10 + movq $0xffffffffffffffff, %r11 + sbbq 0x138(%rsp), %r11 + movq $0xffffffffffffffff, %r12 + sbbq 0x140(%rsp), %r12 + movq $0xffffffffffffffff, %r13 + sbbq 0x148(%rsp), %r13 + movq $0x9, %rdx + mulxq %r8, %r8, %rax + mulxq %r9, %r9, %rcx + addq %rax, %r9 + mulxq %r10, %r10, %rax + adcq %rcx, %r10 + mulxq %r11, %r11, %rcx + adcq %rax, %r11 + mulxq %r12, %r12, %rax + adcq %rcx, %r12 + mulxq %r13, %r13, %r14 + adcq %rax, %r13 + adcq $0x1, %r14 + xorl %ecx, %ecx + movq $0xc, %rdx + mulxq 0x90(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x98(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0xa0(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0xa8(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0xb0(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0xb8(%rsp), %rax, %rdx + adcxq %rax, %r13 + adoxq %r14, %rdx + adcxq %rcx, %rdx + xorq %rcx, %rcx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movl $0xffffffff, %eax + mulxq %rax, %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %rdx, %r10 + movl $0x0, %eax + movl $0x0, %ecx + adoxq %rax, %rax + adcq %rax, %r11 + adcq %rcx, %r12 + adcq %rcx, %r13 + adcq %rcx, %rcx + subq $0x1, %rcx + movl $0xffffffff, %edx + xorq %rax, %rax + andq %rcx, %rdx + subq %rdx, %rax + andq $0x1, %rcx + subq %rax, %r8 + movq %r8, 0x120(%rsp) + sbbq %rdx, %r9 + movq %r9, 0x128(%rsp) + sbbq %rcx, %r10 + movq %r10, 0x130(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x138(%rsp) + sbbq $0x0, %r12 + movq %r12, 0x140(%rsp) + sbbq $0x0, %r13 + movq %r13, 0x148(%rsp) + movq 0xc0(%rsp), %rax + subq (%rsp), %rax + movq 0xc8(%rsp), %rdx + sbbq 0x8(%rsp), %rdx + movq 0xd0(%rsp), %r8 + sbbq 0x10(%rsp), %r8 + movq 0xd8(%rsp), %r9 + sbbq 0x18(%rsp), %r9 + movq 0xe0(%rsp), %r10 + sbbq 0x20(%rsp), %r10 + movq 0xe8(%rsp), %r11 + sbbq 0x28(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %ebx + andq %rbx, %rcx + xorq %rbx, %rbx + subq %rcx, %rbx + subq %rbx, %rax + movq %rax, 0xf0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xf8(%rsp) + sbbq %rax, %rax + andq %rbx, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x100(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x108(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x110(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x118(%rsp) + movq 0x30(%rsp), %rdx + mulxq 0x38(%rsp), %r9, %r10 + mulxq 0x48(%rsp), %r11, %r12 + mulxq 0x58(%rsp), %r13, %r14 + movq 0x48(%rsp), %rdx + mulxq 0x50(%rsp), %r15, %rcx + xorl %ebp, %ebp + movq 0x40(%rsp), %rdx + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x38(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + movq 0x38(%rsp), %rdx + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x58(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adcxq %rbp, %r15 + adoxq %rbp, %rcx + adcq %rbp, %rcx + xorl %ebp, %ebp + movq 0x50(%rsp), %rdx + mulxq 0x30(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + movq 0x40(%rsp), %rdx + mulxq 0x48(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x50(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x58(%rsp), %rax, %rdx + adcxq %rax, %r15 + adoxq %rdx, %rcx + movq 0x58(%rsp), %rdx + mulxq 0x50(%rsp), %rbx, %rbp + mulxq 0x48(%rsp), %rax, %rdx + adcxq %rax, %rcx + adoxq %rdx, %rbx + movl $0x0, %eax + adcxq %rax, %rbx + adoxq %rax, %rbp + adcq %rax, %rbp + xorq %rax, %rax + movq 0x30(%rsp), %rdx + mulxq 0x30(%rsp), %r8, %rax + adcxq %r9, %r9 + adoxq %rax, %r9 + movq 0x38(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r10, %r10 + adoxq %rax, %r10 + adcxq %r11, %r11 + adoxq %rdx, %r11 + movq 0x40(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r12, %r12 + adoxq %rax, %r12 + adcxq %r13, %r13 + adoxq %rdx, %r13 + movq 0x48(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %r14, %r14 + adoxq %rax, %r14 + adcxq %r15, %r15 + adoxq %rdx, %r15 + movq 0x50(%rsp), %rdx + mulxq %rdx, %rax, %rdx + adcxq %rcx, %rcx + adoxq %rax, %rcx + adcxq %rbx, %rbx + adoxq %rdx, %rbx + movq 0x58(%rsp), %rdx + mulxq %rdx, %rax, %rdi + adcxq %rbp, %rbp + adoxq %rax, %rbp + movl $0x0, %eax + adcxq %rax, %rdi + adoxq %rax, %rdi + movq %rbx, 0xc0(%rsp) + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r8, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r8 + addq %rbx, %rax + adcq %rdx, %r8 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r9 + sbbq %r8, %r10 + sbbq %rbx, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rdx, %r8 + sbbq $0x0, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r9, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r9 + addq %rbx, %rax + adcq %rdx, %r9 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r10 + sbbq %r9, %r11 + sbbq %rbx, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rdx, %r9 + sbbq $0x0, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r10, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r10 + addq %rbx, %rax + adcq %rdx, %r10 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r11 + sbbq %r10, %r12 + sbbq %rbx, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rdx, %r10 + sbbq $0x0, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r11, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r11 + addq %rbx, %rax + adcq %rdx, %r11 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r12 + sbbq %r11, %r13 + sbbq %rbx, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rdx, %r11 + sbbq $0x0, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r12, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r12 + addq %rbx, %rax + adcq %rdx, %r12 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r13 + sbbq %r12, %r8 + sbbq %rbx, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rdx, %r12 + sbbq $0x0, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %r13, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %rbx, %r13 + addq %rbx, %rax + adcq %rdx, %r13 + movl $0x0, %ebx + adcq %rbx, %rbx + subq %rax, %r8 + sbbq %r13, %r9 + sbbq %rbx, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rdx, %r13 + sbbq $0x0, %r13 + movq 0xc0(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %rcx, 0xd0(%rsp) + movq %rbx, 0xd8(%rsp) + movq %rbp, 0xe0(%rsp) + movq %rdi, 0xe8(%rsp) + movq 0x150(%rsp), %rdi + movq 0xf0(%rsp), %rax + subq 0x30(%rsp), %rax + movq 0xf8(%rsp), %rdx + sbbq 0x38(%rsp), %rdx + movq 0x100(%rsp), %r8 + sbbq 0x40(%rsp), %r8 + movq 0x108(%rsp), %r9 + sbbq 0x48(%rsp), %r9 + movq 0x110(%rsp), %r10 + sbbq 0x50(%rsp), %r10 + movq 0x118(%rsp), %r11 + sbbq 0x58(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %ebx + andq %rbx, %rcx + xorq %rbx, %rbx + subq %rcx, %rbx + subq %rbx, %rax + movq %rax, 0x60(%rdi) + sbbq %rcx, %rdx + movq %rdx, 0x68(%rdi) + sbbq %rax, %rax + andq %rbx, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x70(%rdi) + sbbq $0x0, %r9 + movq %r9, 0x78(%rdi) + sbbq $0x0, %r10 + movq %r10, 0x80(%rdi) + sbbq $0x0, %r11 + movq %r11, 0x88(%rdi) + movq 0x60(%rsp), %rdx + xorl %r15d, %r15d + mulxq 0x120(%rsp), %r8, %r9 + mulxq 0x128(%rsp), %rbx, %r10 + addq %rbx, %r9 + mulxq 0x130(%rsp), %rbx, %r11 + adcq %rbx, %r10 + mulxq 0x138(%rsp), %rbx, %r12 + adcq %rbx, %r11 + mulxq 0x140(%rsp), %rbx, %r13 + adcq %rbx, %r12 + mulxq 0x148(%rsp), %rbx, %r14 + adcq %rbx, %r13 + adcq %r15, %r14 + movq %r8, %rdx + shlq $0x20, %rdx + addq %r8, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r8, %rbx + adcq %r8, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rbx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rdx + addq %rdx, %r14 + adcq $0x0, %r15 + movq 0x68(%rsp), %rdx + xorl %r8d, %r8d + mulxq 0x120(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x128(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x130(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x138(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x140(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + adoxq %r8, %r15 + mulxq 0x148(%rsp), %rax, %rbx + adcq %rax, %r14 + adcq %rbx, %r15 + adcq %r8, %r8 + movq %r9, %rdx + shlq $0x20, %rdx + addq %r9, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r9, %rbx + adcq %r9, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rbx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rdx + addq %rdx, %r15 + adcq $0x0, %r8 + movq 0x70(%rsp), %rdx + xorl %r9d, %r9d + mulxq 0x120(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x128(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x130(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x138(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x140(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + adoxq %r9, %r8 + mulxq 0x148(%rsp), %rax, %rbx + adcq %rax, %r15 + adcq %rbx, %r8 + adcq %r9, %r9 + movq %r10, %rdx + shlq $0x20, %rdx + addq %r10, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r10, %rbx + adcq %r10, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rbx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rdx + addq %rdx, %r8 + adcq $0x0, %r9 + movq 0x78(%rsp), %rdx + xorl %r10d, %r10d + mulxq 0x120(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x128(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x130(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x138(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x140(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + adoxq %r10, %r9 + mulxq 0x148(%rsp), %rax, %rbx + adcq %rax, %r8 + adcq %rbx, %r9 + adcq %r10, %r10 + movq %r11, %rdx + shlq $0x20, %rdx + addq %r11, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r11, %rbx + adcq %r11, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rbx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rdx + addq %rdx, %r9 + adcq $0x0, %r10 + movq 0x80(%rsp), %rdx + xorl %r11d, %r11d + mulxq 0x120(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x128(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x130(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x138(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x140(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + adoxq %r11, %r10 + mulxq 0x148(%rsp), %rax, %rbx + adcq %rax, %r9 + adcq %rbx, %r10 + adcq %r11, %r11 + movq %r12, %rdx + shlq $0x20, %rdx + addq %r12, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r12, %rbx + adcq %r12, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rbx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rdx + addq %rdx, %r10 + adcq $0x0, %r11 + movq 0x88(%rsp), %rdx + xorl %r12d, %r12d + mulxq 0x120(%rsp), %rax, %rbx + adcxq %rax, %r13 + adoxq %rbx, %r14 + mulxq 0x128(%rsp), %rax, %rbx + adcxq %rax, %r14 + adoxq %rbx, %r15 + mulxq 0x130(%rsp), %rax, %rbx + adcxq %rax, %r15 + adoxq %rbx, %r8 + mulxq 0x138(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0x140(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + adoxq %r12, %r11 + mulxq 0x148(%rsp), %rax, %rbx + adcq %rax, %r10 + adcq %rbx, %r11 + adcq %r12, %r12 + movq %r13, %rdx + shlq $0x20, %rdx + addq %r13, %rdx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rbx, %rax + movl $0xffffffff, %ebx + mulxq %rbx, %r13, %rbx + adcq %r13, %rax + adcq %rdx, %rbx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rbx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rdx + addq %rdx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xf0(%rsp) + movq %r15, 0xf8(%rsp) + movq %r8, 0x100(%rsp) + movq %r9, 0x108(%rsp) + movq %r10, 0x110(%rsp) + movq %r11, 0x118(%rsp) + movq 0xb8(%rsp), %rdx + movq %rdx, %r13 + shrq $0x3e, %rdx + movq 0xb0(%rsp), %r12 + shldq $0x2, %r12, %r13 + movq 0xa8(%rsp), %r11 + shldq $0x2, %r11, %r12 + movq 0xa0(%rsp), %r10 + shldq $0x2, %r10, %r11 + movq 0x98(%rsp), %r9 + shldq $0x2, %r9, %r10 + movq 0x90(%rsp), %r8 + shldq $0x2, %r8, %r9 + shlq $0x2, %r8 + addq $0x1, %rdx + subq 0x120(%rsp), %r8 + sbbq 0x128(%rsp), %r9 + sbbq 0x130(%rsp), %r10 + sbbq 0x138(%rsp), %r11 + sbbq 0x140(%rsp), %r12 + sbbq 0x148(%rsp), %r13 + sbbq $0x0, %rdx + xorq %rcx, %rcx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movl $0xffffffff, %eax + mulxq %rax, %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %rdx, %r10 + movl $0x0, %eax + movl $0x0, %ecx + adoxq %rax, %rax + adcq %rax, %r11 + adcq %rcx, %r12 + adcq %rcx, %r13 + adcq %rcx, %rcx + subq $0x1, %rcx + movl $0xffffffff, %edx + xorq %rax, %rax + andq %rcx, %rdx + subq %rdx, %rax + andq $0x1, %rcx + subq %rax, %r8 + movq %r8, (%rdi) + sbbq %rdx, %r9 + movq %r9, 0x8(%rdi) + sbbq %rcx, %r10 + movq %r10, 0x10(%rdi) + sbbq $0x0, %r11 + movq %r11, 0x18(%rdi) + sbbq $0x0, %r12 + movq %r12, 0x20(%rdi) + sbbq $0x0, %r13 + movq %r13, 0x28(%rdi) + movabsq $0xffffffff, %r8 + subq 0xc0(%rsp), %r8 + movabsq $0xffffffff00000000, %r9 + sbbq 0xc8(%rsp), %r9 + movq $0xfffffffffffffffe, %r10 + sbbq 0xd0(%rsp), %r10 + movq $0xffffffffffffffff, %r11 + sbbq 0xd8(%rsp), %r11 + movq $0xffffffffffffffff, %r12 + sbbq 0xe0(%rsp), %r12 + movq $0xffffffffffffffff, %r13 + sbbq 0xe8(%rsp), %r13 + movq %r13, %r14 + shrq $0x3d, %r14 + shldq $0x3, %r12, %r13 + shldq $0x3, %r11, %r12 + shldq $0x3, %r10, %r11 + shldq $0x3, %r9, %r10 + shldq $0x3, %r8, %r9 + shlq $0x3, %r8 + addq $0x1, %r14 + xorl %ecx, %ecx + movq $0x3, %rdx + mulxq 0xf0(%rsp), %rax, %rbx + adcxq %rax, %r8 + adoxq %rbx, %r9 + mulxq 0xf8(%rsp), %rax, %rbx + adcxq %rax, %r9 + adoxq %rbx, %r10 + mulxq 0x100(%rsp), %rax, %rbx + adcxq %rax, %r10 + adoxq %rbx, %r11 + mulxq 0x108(%rsp), %rax, %rbx + adcxq %rax, %r11 + adoxq %rbx, %r12 + mulxq 0x110(%rsp), %rax, %rbx + adcxq %rax, %r12 + adoxq %rbx, %r13 + mulxq 0x118(%rsp), %rax, %rdx + adcxq %rax, %r13 + adoxq %r14, %rdx + adcxq %rcx, %rdx + xorq %rcx, %rcx + movabsq $0xffffffff00000001, %rax + mulxq %rax, %rax, %rcx + adcxq %rax, %r8 + adoxq %rcx, %r9 + movl $0xffffffff, %eax + mulxq %rax, %rax, %rcx + adcxq %rax, %r9 + adoxq %rcx, %r10 + adcxq %rdx, %r10 + movl $0x0, %eax + movl $0x0, %ecx + adoxq %rax, %rax + adcq %rax, %r11 + adcq %rcx, %r12 + adcq %rcx, %r13 + adcq %rcx, %rcx + subq $0x1, %rcx + movl $0xffffffff, %edx + xorq %rax, %rax + andq %rcx, %rdx + subq %rdx, %rax + andq $0x1, %rcx + subq %rax, %r8 + movq %r8, 0x30(%rdi) + sbbq %rdx, %r9 + movq %r9, 0x38(%rdi) + sbbq %rcx, %r10 + movq %r10, 0x40(%rdi) + sbbq $0x0, %r11 + movq %r11, 0x48(%rdi) + sbbq $0x0, %r12 + movq %r12, 0x50(%rdi) + sbbq $0x0, %r13 + movq %r13, 0x58(%rdi) + addq $0x158, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif diff --git a/third_party/s2n-bignum/x86_att/p384/p384_montjscalarmul_alt.S b/third_party/s2n-bignum/x86_att/p384/p384_montjscalarmul_alt.S new file mode 100644 index 0000000000..c666db6dbe --- /dev/null +++ b/third_party/s2n-bignum/x86_att/p384/p384_montjscalarmul_alt.S @@ -0,0 +1,9415 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + +// ---------------------------------------------------------------------------- +// Montgomery-Jacobian form scalar multiplication for P-384 +// Input scalar[6], point[18]; output res[18] +// +// extern void p384_montjscalarmul_alt +// (uint64_t res[static 18], +// uint64_t scalar[static 6], +// uint64_t point[static 18]); +// +// This function is a variant of its affine point version p384_scalarmul. +// Here, input and output points are assumed to be in Jacobian form with +// their coordinates in the Montgomery domain. Thus, if priming indicates +// Montgomery form, x' = (2^384 * x) mod p_384 etc., each point argument +// is a triple (x',y',z') representing the affine point (x/z^2,y/z^3) when +// z' is nonzero or the point at infinity (group identity) if z' = 0. +// +// Given scalar = n and point = P, assumed to be on the NIST elliptic +// curve P-384, returns a representation of n * P. If the result is the +// point at infinity (either because the input point was or because the +// scalar was a multiple of p_384) then the output is guaranteed to +// represent the point at infinity, i.e. to have its z coordinate zero. +// +// Standard x86-64 ABI: RDI = res, RSI = scalar, RDX = point +// Microsoft x64 ABI: RCX = res, RDX = scalar, R8 = point +// ---------------------------------------------------------------------------- + +#include "_internal_s2n_bignum.h" + + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(p384_montjscalarmul_alt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(p384_montjscalarmul_alt) + + .text + .balign 4 + +// Size of individual field elements + +#define NUMSIZE 48 +#define JACSIZE (3*NUMSIZE) + +// Intermediate variables on the stack. +// The table is 16 entries, each of size JACSIZE = 3 * NUMSIZE +// Uppercase syntactic variants make x86_att version simpler to generate. + +#define SCALARB (0*NUMSIZE) +#define scalarb (0*NUMSIZE)(%rsp) +#define ACC (1*NUMSIZE) +#define acc (1*NUMSIZE)(%rsp) +#define TABENT (4*NUMSIZE) +#define tabent (4*NUMSIZE)(%rsp) + +#define TAB (7*NUMSIZE) +#define tab (7*NUMSIZE)(%rsp) + +#define res (55*NUMSIZE)(%rsp) + +#define NSPACE (56*NUMSIZE) + +// Avoid using .rep for the sake of the BoringSSL/AWS-LC delocator, +// which doesn't accept repetitions, assembler macros etc. + +#define selectblock_xz(I) \ + cmpq $I, %rdi ; \ + cmovzq TAB+JACSIZE*(I-1)(%rsp), %rax ; \ + cmovzq TAB+JACSIZE*(I-1)+8(%rsp), %rbx ; \ + cmovzq TAB+JACSIZE*(I-1)+16(%rsp), %rcx ; \ + cmovzq TAB+JACSIZE*(I-1)+24(%rsp), %rdx ; \ + cmovzq TAB+JACSIZE*(I-1)+32(%rsp), %r8 ; \ + cmovzq TAB+JACSIZE*(I-1)+40(%rsp), %r9 ; \ + cmovzq TAB+JACSIZE*(I-1)+96(%rsp), %r10 ; \ + cmovzq TAB+JACSIZE*(I-1)+104(%rsp), %r11 ; \ + cmovzq TAB+JACSIZE*(I-1)+112(%rsp), %r12 ; \ + cmovzq TAB+JACSIZE*(I-1)+120(%rsp), %r13 ; \ + cmovzq TAB+JACSIZE*(I-1)+128(%rsp), %r14 ; \ + cmovzq TAB+JACSIZE*(I-1)+136(%rsp), %r15 + +#define selectblock_y(I) \ + cmpq $I, %rdi ; \ + cmovzq TAB+JACSIZE*(I-1)+48(%rsp), %rax ; \ + cmovzq TAB+JACSIZE*(I-1)+56(%rsp), %rbx ; \ + cmovzq TAB+JACSIZE*(I-1)+64(%rsp), %rcx ; \ + cmovzq TAB+JACSIZE*(I-1)+72(%rsp), %rdx ; \ + cmovzq TAB+JACSIZE*(I-1)+80(%rsp), %r8 ; \ + cmovzq TAB+JACSIZE*(I-1)+88(%rsp), %r9 + +S2N_BN_SYMBOL(p384_montjscalarmul_alt): + +// The Windows version literally calls the standard ABI version. +// This simplifies the proofs since subroutine offsets are fixed. + +#if WINDOWS_ABI + pushq %rdi + pushq %rsi + movq %rcx, %rdi + movq %rdx, %rsi + movq %r8, %rdx + callq p384_montjscalarmul_alt_standard + popq %rsi + popq %rdi + ret + +p384_montjscalarmul_alt_standard: +#endif + +// Real start of the standard ABI code. + + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbp + pushq %rbx + + subq $NSPACE, %rsp + +// Preserve the "res" input argument; others get processed early. + + movq %rdi, res + +// Reduce the input scalar mod n_384, i.e. conditionally subtract n_384. +// Store it to "scalarb". + + movq (%rsi), %r8 + movq $0xecec196accc52973, %rax + subq %rax, %r8 + movq 8(%rsi), %r9 + movq $0x581a0db248b0a77a, %rax + sbbq %rax, %r9 + movq 16(%rsi), %r10 + movq $0xc7634d81f4372ddf, %rax + sbbq %rax, %r10 + movq 24(%rsi), %r11 + movq $0xffffffffffffffff, %rax + sbbq %rax, %r11 + movq 32(%rsi), %r12 + sbbq %rax, %r12 + movq 40(%rsi), %r13 + sbbq %rax, %r13 + + cmovcq (%rsi), %r8 + cmovcq 8(%rsi), %r9 + cmovcq 16(%rsi), %r10 + cmovcq 24(%rsi), %r11 + cmovcq 32(%rsi), %r12 + cmovcq 40(%rsi), %r13 + + movq %r8, SCALARB(%rsp) + movq %r9, SCALARB+8(%rsp) + movq %r10, SCALARB+16(%rsp) + movq %r11, SCALARB+24(%rsp) + movq %r12, SCALARB+32(%rsp) + movq %r13, SCALARB+40(%rsp) + +// Set the tab[0] table entry to the input point = 1 * P + + movq (%rdx), %rax + movq %rax, TAB(%rsp) + movq 8(%rdx), %rax + movq %rax, TAB+8(%rsp) + movq 16(%rdx), %rax + movq %rax, TAB+16(%rsp) + movq 24(%rdx), %rax + movq %rax, TAB+24(%rsp) + movq 32(%rdx), %rax + movq %rax, TAB+32(%rsp) + movq 40(%rdx), %rax + movq %rax, TAB+40(%rsp) + + movq 48(%rdx), %rax + movq %rax, TAB+48(%rsp) + movq 56(%rdx), %rax + movq %rax, TAB+56(%rsp) + movq 64(%rdx), %rax + movq %rax, TAB+64(%rsp) + movq 72(%rdx), %rax + movq %rax, TAB+72(%rsp) + movq 80(%rdx), %rax + movq %rax, TAB+80(%rsp) + movq 88(%rdx), %rax + movq %rax, TAB+88(%rsp) + + movq 96(%rdx), %rax + movq %rax, TAB+96(%rsp) + movq 104(%rdx), %rax + movq %rax, TAB+104(%rsp) + movq 112(%rdx), %rax + movq %rax, TAB+112(%rsp) + movq 120(%rdx), %rax + movq %rax, TAB+120(%rsp) + movq 128(%rdx), %rax + movq %rax, TAB+128(%rsp) + movq 136(%rdx), %rax + movq %rax, TAB+136(%rsp) + +// Compute and record tab[1] = 2 * p, ..., tab[15] = 16 * P + + leaq TAB+JACSIZE*1(%rsp), %rdi + leaq TAB(%rsp), %rsi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq TAB+JACSIZE*2(%rsp), %rdi + leaq TAB+JACSIZE*1(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_alt_p384_montjadd + + leaq TAB+JACSIZE*3(%rsp), %rdi + leaq TAB+JACSIZE*1(%rsp), %rsi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq TAB+JACSIZE*4(%rsp), %rdi + leaq TAB+JACSIZE*3(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_alt_p384_montjadd + + leaq TAB+JACSIZE*5(%rsp), %rdi + leaq TAB+JACSIZE*2(%rsp), %rsi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq TAB+JACSIZE*6(%rsp), %rdi + leaq TAB+JACSIZE*5(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_alt_p384_montjadd + + leaq TAB+JACSIZE*7(%rsp), %rdi + leaq TAB+JACSIZE*3(%rsp), %rsi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq TAB+JACSIZE*8(%rsp), %rdi + leaq TAB+JACSIZE*7(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_alt_p384_montjadd + + leaq TAB+JACSIZE*9(%rsp), %rdi + leaq TAB+JACSIZE*4(%rsp), %rsi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq TAB+JACSIZE*10(%rsp), %rdi + leaq TAB+JACSIZE*9(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_alt_p384_montjadd + + leaq TAB+JACSIZE*11(%rsp), %rdi + leaq TAB+JACSIZE*5(%rsp), %rsi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq TAB+JACSIZE*12(%rsp), %rdi + leaq TAB+JACSIZE*11(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_alt_p384_montjadd + + leaq TAB+JACSIZE*13(%rsp), %rdi + leaq TAB+JACSIZE*6(%rsp), %rsi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq TAB+JACSIZE*14(%rsp), %rdi + leaq TAB+JACSIZE*13(%rsp), %rsi + leaq TAB(%rsp), %rdx + callq p384_montjscalarmul_alt_p384_montjadd + + leaq TAB+JACSIZE*15(%rsp), %rdi + leaq TAB+JACSIZE*7(%rsp), %rsi + callq p384_montjscalarmul_alt_p384_montjdouble + +// Add the recoding constant sum_i(16 * 32^i) to the scalar to allow signed +// digits. The digits of the constant, in lowest-to-highest order, are as +// follows; they are generated dynamically to use fewer large constant loads. +// +// 0x0842108421084210 +// 0x1084210842108421 +// 0x2108421084210842 +// 0x4210842108421084 +// 0x8421084210842108 +// 0x0842108421084210 + + movq $0x1084210842108421, %rax + movq %rax, %rcx + shrq $1, %rax + movq SCALARB(%rsp), %r8 + addq %rax, %r8 + movq SCALARB+8(%rsp), %r9 + adcq %rcx, %r9 + leaq (%rcx,%rcx), %rcx + movq SCALARB+16(%rsp), %r10 + adcq %rcx, %r10 + leaq (%rcx,%rcx), %rcx + movq SCALARB+24(%rsp), %r11 + adcq %rcx, %r11 + leaq (%rcx,%rcx), %rcx + movq SCALARB+32(%rsp), %r12 + adcq %rcx, %r12 + movq SCALARB+40(%rsp), %r13 + adcq %rax, %r13 + sbbq %rdi, %rdi + negq %rdi + +// Record the top bitfield in %rdi then shift the whole scalar left 4 bits +// to align the top of the next bitfield with the MSB (bits 379..383). + + shldq $4, %r13, %rdi + shldq $4, %r12, %r13 + shldq $4, %r11, %r12 + shldq $4, %r10, %r11 + shldq $4, %r9, %r10 + shldq $4, %r8, %r9 + shlq $4, %r8 + + movq %r8, SCALARB(%rsp) + movq %r9, SCALARB+8(%rsp) + movq %r10, SCALARB+16(%rsp) + movq %r11, SCALARB+24(%rsp) + movq %r12, SCALARB+32(%rsp) + movq %r13, SCALARB+40(%rsp) + +// Initialize the accumulator to the corresponding entry using constant-time +// lookup in the table. This top digit, uniquely, is not recoded so there is +// no sign adjustment to make. On the x86 integer side we don't have enough +// registers to hold all the fields; this could be better done with SIMD +// registers anyway. So we do x and z coordinates in one sweep, y in another +// (this is a rehearsal for below where we might need to negate the y). + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + + selectblock_xz(1) + selectblock_xz(2) + selectblock_xz(3) + selectblock_xz(4) + selectblock_xz(5) + selectblock_xz(6) + selectblock_xz(7) + selectblock_xz(8) + selectblock_xz(9) + selectblock_xz(10) + selectblock_xz(11) + selectblock_xz(12) + selectblock_xz(13) + selectblock_xz(14) + selectblock_xz(15) + selectblock_xz(16) + + movq %rax, ACC(%rsp) + movq %rbx, ACC+8(%rsp) + movq %rcx, ACC+16(%rsp) + movq %rdx, ACC+24(%rsp) + movq %r8, ACC+32(%rsp) + movq %r9, ACC+40(%rsp) + movq %r10, ACC+96(%rsp) + movq %r11, ACC+104(%rsp) + movq %r12, ACC+112(%rsp) + movq %r13, ACC+120(%rsp) + movq %r14, ACC+128(%rsp) + movq %r15, ACC+136(%rsp) + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + + selectblock_y(1) + selectblock_y(2) + selectblock_y(3) + selectblock_y(4) + selectblock_y(5) + selectblock_y(6) + selectblock_y(7) + selectblock_y(8) + selectblock_y(9) + selectblock_y(10) + selectblock_y(11) + selectblock_y(12) + selectblock_y(13) + selectblock_y(14) + selectblock_y(15) + selectblock_y(16) + + movq %rax, ACC+48(%rsp) + movq %rbx, ACC+56(%rsp) + movq %rcx, ACC+64(%rsp) + movq %rdx, ACC+72(%rsp) + movq %r8, ACC+80(%rsp) + movq %r9, ACC+88(%rsp) + +// Main loop over size-5 bitfields: double 5 times then add signed digit +// At each stage we shift the scalar left by 5 bits so we can simply pick +// the top 5 bits as the bitfield, saving some fiddle over indexing. + + movl $380, %ebp + +p384_montjscalarmul_alt_mainloop: + subq $5, %rbp + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_alt_p384_montjdouble + + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_alt_p384_montjdouble + +// Choose the bitfield and adjust it to sign and magnitude + + movq SCALARB(%rsp), %r8 + movq SCALARB+8(%rsp), %r9 + movq SCALARB+16(%rsp), %r10 + movq SCALARB+24(%rsp), %r11 + movq SCALARB+32(%rsp), %r12 + movq SCALARB+40(%rsp), %r13 + + movq %r13, %rdi + shrq $59, %rdi + shldq $5, %r12, %r13 + shldq $5, %r11, %r12 + shldq $5, %r10, %r11 + shldq $5, %r9, %r10 + shldq $5, %r8, %r9 + shlq $5, %r8 + + movq %r8, SCALARB(%rsp) + movq %r9, SCALARB+8(%rsp) + movq %r10, SCALARB+16(%rsp) + movq %r11, SCALARB+24(%rsp) + movq %r12, SCALARB+32(%rsp) + movq %r13, SCALARB+40(%rsp) + + subq $16, %rdi + sbbq %rsi, %rsi // %rsi = sign of digit (-1 = negative) + xorq %rsi, %rdi + subq %rsi, %rdi // %rdi = absolute value of digit + +// Conditionally select the table entry tab[i-1] = i * P in constant time +// Again, this is done in two sweeps, first doing x and z then y. + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + +selectblock_xz(1) + selectblock_xz(2) + selectblock_xz(3) + selectblock_xz(4) + selectblock_xz(5) + selectblock_xz(6) + selectblock_xz(7) + selectblock_xz(8) + selectblock_xz(9) + selectblock_xz(10) + selectblock_xz(11) + selectblock_xz(12) + selectblock_xz(13) + selectblock_xz(14) + selectblock_xz(15) + selectblock_xz(16) + + movq %rax, TABENT(%rsp) + movq %rbx, TABENT+8(%rsp) + movq %rcx, TABENT+16(%rsp) + movq %rdx, TABENT+24(%rsp) + movq %r8, TABENT+32(%rsp) + movq %r9, TABENT+40(%rsp) + movq %r10, TABENT+96(%rsp) + movq %r11, TABENT+104(%rsp) + movq %r12, TABENT+112(%rsp) + movq %r13, TABENT+120(%rsp) + movq %r14, TABENT+128(%rsp) + movq %r15, TABENT+136(%rsp) + + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + + selectblock_y(1) + selectblock_y(2) + selectblock_y(3) + selectblock_y(4) + selectblock_y(5) + selectblock_y(6) + selectblock_y(7) + selectblock_y(8) + selectblock_y(9) + selectblock_y(10) + selectblock_y(11) + selectblock_y(12) + selectblock_y(13) + selectblock_y(14) + selectblock_y(15) + selectblock_y(16) + +// Store it to "tabent" with the y coordinate optionally negated. +// This is done carefully to give coordinates < p_384 even in +// the degenerate case y = 0 (when z = 0 for points on the curve). +// The digits of the prime p_384 are generated dynamically from +// the zeroth via not/lea to reduce the number of constant loads. + + movq %rax, %r10 + orq %rbx, %r10 + movq %rcx, %r11 + orq %rdx, %r11 + movq %r8, %r12 + orq %r9, %r12 + orq %r11, %r10 + orq %r12, %r10 + cmovzq %r10, %rsi + + movl $0xffffffff, %r10d + movq %r10, %r11 + notq %r11 + leaq (%r10,%r11), %r13 + subq %rax, %r10 + leaq -1(%r13), %r12 + sbbq %rbx, %r11 + movq %r13, %r14 + sbbq %rcx, %r12 + sbbq %rdx, %r13 + movq %r14, %r15 + sbbq %r8, %r14 + sbbq %r9, %r15 + + testq %rsi, %rsi + cmovnzq %r10, %rax + cmovnzq %r11, %rbx + cmovnzq %r12, %rcx + cmovnzq %r13, %rdx + cmovnzq %r14, %r8 + cmovnzq %r15, %r9 + + movq %rax, TABENT+48(%rsp) + movq %rbx, TABENT+56(%rsp) + movq %rcx, TABENT+64(%rsp) + movq %rdx, TABENT+72(%rsp) + movq %r8, TABENT+80(%rsp) + movq %r9, TABENT+88(%rsp) + +// Add to the accumulator + + leaq TABENT(%rsp), %rdx + leaq ACC(%rsp), %rsi + leaq ACC(%rsp), %rdi + callq p384_montjscalarmul_alt_p384_montjadd + + testq %rbp, %rbp + jne p384_montjscalarmul_alt_mainloop + +// That's the end of the main loop, and we just need to copy the +// result in "acc" to the output. + + movq res, %rdi + movq ACC(%rsp), %rax + movq %rax, (%rdi) + movq ACC+8(%rsp), %rax + movq %rax, 8(%rdi) + movq ACC+16(%rsp), %rax + movq %rax, 16(%rdi) + movq ACC+24(%rsp), %rax + movq %rax, 24(%rdi) + movq ACC+32(%rsp), %rax + movq %rax, 32(%rdi) + movq ACC+40(%rsp), %rax + movq %rax, 40(%rdi) + movq ACC+48(%rsp), %rax + movq %rax, 48(%rdi) + movq ACC+56(%rsp), %rax + movq %rax, 56(%rdi) + movq ACC+64(%rsp), %rax + movq %rax, 64(%rdi) + movq ACC+72(%rsp), %rax + movq %rax, 72(%rdi) + movq ACC+80(%rsp), %rax + movq %rax, 80(%rdi) + movq ACC+88(%rsp), %rax + movq %rax, 88(%rdi) + movq ACC+96(%rsp), %rax + movq %rax, 96(%rdi) + movq ACC+104(%rsp), %rax + movq %rax, 104(%rdi) + movq ACC+112(%rsp), %rax + movq %rax, 112(%rdi) + movq ACC+120(%rsp), %rax + movq %rax, 120(%rdi) + movq ACC+128(%rsp), %rax + movq %rax, 128(%rdi) + movq ACC+136(%rsp), %rax + movq %rax, 136(%rdi) + +// Restore stack and registers and return + + addq $NSPACE, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + popq %r15 + ret + +// Local copies of subroutines, complete clones at the moment + +p384_montjscalarmul_alt_p384_montjadd: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x160, %rsp + movq %rsi, 0x150(%rsp) + movq %rdx, 0x158(%rsp) + movq 0x60(%rsi), %rbx + movq 0x68(%rsi), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x78(%rsi), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x88(%rsi), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x78(%rsi), %rax + mulq 0x80(%rsi) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x70(%rsi), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x68(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0x68(%rsi), %rbx + movq 0x78(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x80(%rsi), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x70(%rsi), %rbx + movq 0x78(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x78(%rsi), %rax + mulq 0x88(%rsi) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x80(%rsi), %rax + mulq 0x88(%rsi) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0x60(%rsi), %rax + mulq %rax + movq %r8, (%rsp) + movq %rax, %r8 + movq 0x68(%rsi), %rax + movq %rbp, 0x8(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x70(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x78(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rax + negq %rbp + adcq 0x8(%rsp), %rax + adcq (%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rsi + movq %rbx, (%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq (%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rsi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rsi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rsi + movq %r14, (%rsp) + movq %r15, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rbx, 0x18(%rsp) + movq %rbp, 0x20(%rsp) + movq %rsi, 0x28(%rsp) + movq 0x158(%rsp), %rsi + movq 0x60(%rsi), %rbx + movq 0x68(%rsi), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x78(%rsi), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x88(%rsi), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x78(%rsi), %rax + mulq 0x80(%rsi) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x70(%rsi), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x68(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0x68(%rsi), %rbx + movq 0x78(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x80(%rsi), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x70(%rsi), %rbx + movq 0x78(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x78(%rsi), %rax + mulq 0x88(%rsi) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x80(%rsi), %rax + mulq 0x88(%rsi) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0x60(%rsi), %rax + mulq %rax + movq %r8, 0xf0(%rsp) + movq %rax, %r8 + movq 0x68(%rsi), %rax + movq %rbp, 0xf8(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x70(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x78(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rax + negq %rbp + adcq 0xf8(%rsp), %rax + adcq 0xf0(%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rsi + movq %rbx, 0xf0(%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq 0xf0(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rsi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rsi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rsi + movq %r14, 0xf0(%rsp) + movq %r15, 0xf8(%rsp) + movq %rcx, 0x100(%rsp) + movq %rbx, 0x108(%rsp) + movq %rbp, 0x110(%rsp) + movq %rsi, 0x118(%rsp) + movq 0x150(%rsp), %rsi + movq 0x158(%rsp), %rcx + movq 0x30(%rsi), %rbx + movq 0x60(%rcx), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x68(%rcx), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x70(%rcx), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x78(%rcx), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x80(%rcx), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x88(%rcx), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x38(%rsi), %rbx + movq 0x60(%rcx), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x68(%rcx), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x70(%rcx), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x78(%rcx), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x80(%rcx), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x88(%rcx), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x40(%rsi), %rbx + movq 0x60(%rcx), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x68(%rcx), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x70(%rcx), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x78(%rcx), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x80(%rcx), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x88(%rcx), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x48(%rsi), %rbx + movq 0x60(%rcx), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x68(%rcx), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x70(%rcx), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x78(%rcx), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x80(%rcx), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x88(%rcx), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x50(%rsi), %rbx + movq 0x60(%rcx), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x68(%rcx), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x70(%rcx), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x78(%rcx), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x80(%rcx), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x88(%rcx), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x58(%rsi), %rbx + movq 0x60(%rcx), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x68(%rcx), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x70(%rcx), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x78(%rcx), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x80(%rcx), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x88(%rcx), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x120(%rsp) + movq %r15, 0x128(%rsp) + movq %r8, 0x130(%rsp) + movq %r9, 0x138(%rsp) + movq %r10, 0x140(%rsp) + movq %r11, 0x148(%rsp) + movq 0x150(%rsp), %rsi + movq 0x158(%rsp), %rcx + movq 0x30(%rcx), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x68(%rsi), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x70(%rsi), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x78(%rsi), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x80(%rsi), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x88(%rsi), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x38(%rcx), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x68(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x70(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x78(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x80(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x88(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x40(%rcx), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x68(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x70(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x78(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x80(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x88(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x48(%rcx), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x68(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x70(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x78(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x80(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x88(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x50(%rcx), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x68(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x70(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x78(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x80(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x88(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x58(%rcx), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x68(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x70(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x78(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x80(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x88(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq %r8, 0x40(%rsp) + movq %r9, 0x48(%rsp) + movq %r10, 0x50(%rsp) + movq %r11, 0x58(%rsp) + movq 0x158(%rsp), %rcx + movq (%rcx), %rbx + movq (%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x18(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x20(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x28(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x8(%rcx), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x10(%rcx), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x18(%rcx), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x20(%rcx), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x28(%rcx), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x60(%rsp) + movq %r15, 0x68(%rsp) + movq %r8, 0x70(%rsp) + movq %r9, 0x78(%rsp) + movq %r10, 0x80(%rsp) + movq %r11, 0x88(%rsp) + movq 0x150(%rsp), %rsi + movq (%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x118(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x8(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x10(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x18(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x20(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x28(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %r8, 0xd0(%rsp) + movq %r9, 0xd8(%rsp) + movq %r10, 0xe0(%rsp) + movq %r11, 0xe8(%rsp) + movq 0x30(%rsp), %rbx + movq (%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x10(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x18(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x20(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x28(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x38(%rsp), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x40(%rsp), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x48(%rsp), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x50(%rsp), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x58(%rsp), %rbx + movq (%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x10(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x18(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x20(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x28(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq %r8, 0x40(%rsp) + movq %r9, 0x48(%rsp) + movq %r10, 0x50(%rsp) + movq %r11, 0x58(%rsp) + movq 0x120(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x118(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x128(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x130(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x138(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x140(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x148(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x120(%rsp) + movq %r15, 0x128(%rsp) + movq %r8, 0x130(%rsp) + movq %r9, 0x138(%rsp) + movq %r10, 0x140(%rsp) + movq %r11, 0x148(%rsp) + movq 0x60(%rsp), %rax + subq 0xc0(%rsp), %rax + movq 0x68(%rsp), %rdx + sbbq 0xc8(%rsp), %rdx + movq 0x70(%rsp), %r8 + sbbq 0xd0(%rsp), %r8 + movq 0x78(%rsp), %r9 + sbbq 0xd8(%rsp), %r9 + movq 0x80(%rsp), %r10 + sbbq 0xe0(%rsp), %r10 + movq 0x88(%rsp), %r11 + sbbq 0xe8(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0xf0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xf8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x100(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x108(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x110(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x118(%rsp) + movq 0x30(%rsp), %rax + subq 0x120(%rsp), %rax + movq 0x38(%rsp), %rdx + sbbq 0x128(%rsp), %rdx + movq 0x40(%rsp), %r8 + sbbq 0x130(%rsp), %r8 + movq 0x48(%rsp), %r9 + sbbq 0x138(%rsp), %r9 + movq 0x50(%rsp), %r10 + sbbq 0x140(%rsp), %r10 + movq 0x58(%rsp), %r11 + sbbq 0x148(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0x30(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0x38(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x40(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x48(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x50(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x58(%rsp) + movq 0xf0(%rsp), %rbx + movq 0xf8(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x108(%rsp), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x118(%rsp), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x108(%rsp), %rax + mulq 0x110(%rsp) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x100(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0xf8(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0xf8(%rsp), %rbx + movq 0x108(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x110(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x118(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x110(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x100(%rsp), %rbx + movq 0x108(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x110(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x118(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x108(%rsp), %rax + mulq 0x118(%rsp) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x110(%rsp), %rax + mulq 0x118(%rsp) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0xf0(%rsp), %rax + mulq %rax + movq %r8, 0x90(%rsp) + movq %rax, %r8 + movq 0xf8(%rsp), %rax + movq %rbp, 0x98(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x100(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x108(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x110(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x118(%rsp), %rax + mulq %rax + negq %rbp + adcq 0x98(%rsp), %rax + adcq 0x90(%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rsi + movq %rbx, 0x90(%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq 0x90(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rsi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rsi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rsi + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq %rcx, 0xa0(%rsp) + movq %rbx, 0xa8(%rsp) + movq %rbp, 0xb0(%rsp) + movq %rsi, 0xb8(%rsp) + movq 0x30(%rsp), %rbx + movq 0x38(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x48(%rsp), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x58(%rsp), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x48(%rsp), %rax + mulq 0x50(%rsp) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x40(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x38(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0x38(%rsp), %rbx + movq 0x48(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x50(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x58(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x50(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x40(%rsp), %rbx + movq 0x48(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x50(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x58(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + mulq 0x58(%rsp) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x50(%rsp), %rax + mulq 0x58(%rsp) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0x30(%rsp), %rax + mulq %rax + movq %r8, (%rsp) + movq %rax, %r8 + movq 0x38(%rsp), %rax + movq %rbp, 0x8(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x40(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x48(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x50(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x58(%rsp), %rax + mulq %rax + negq %rbp + adcq 0x8(%rsp), %rax + adcq (%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rsi + movq %rbx, (%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq (%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rsi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rsi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rsi + movq %r14, (%rsp) + movq %r15, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rbx, 0x18(%rsp) + movq %rbp, 0x20(%rsp) + movq %rsi, 0x28(%rsp) + movq 0xc0(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x98(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0xa0(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0xa8(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0xb0(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0xb8(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0xc8(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0xd0(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0xd8(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0xe0(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0xe8(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %r8, 0xd0(%rsp) + movq %r9, 0xd8(%rsp) + movq %r10, 0xe0(%rsp) + movq %r11, 0xe8(%rsp) + movq 0x60(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x98(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0xa0(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0xa8(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0xb0(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0xb8(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x68(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x70(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x78(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x80(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x88(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x60(%rsp) + movq %r15, 0x68(%rsp) + movq %r8, 0x70(%rsp) + movq %r9, 0x78(%rsp) + movq %r10, 0x80(%rsp) + movq %r11, 0x88(%rsp) + movq (%rsp), %rax + subq 0xc0(%rsp), %rax + movq 0x8(%rsp), %rdx + sbbq 0xc8(%rsp), %rdx + movq 0x10(%rsp), %r8 + sbbq 0xd0(%rsp), %r8 + movq 0x18(%rsp), %r9 + sbbq 0xd8(%rsp), %r9 + movq 0x20(%rsp), %r10 + sbbq 0xe0(%rsp), %r10 + movq 0x28(%rsp), %r11 + sbbq 0xe8(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, (%rsp) + sbbq %rcx, %rdx + movq %rdx, 0x8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x10(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x18(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x20(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x28(%rsp) + movq 0x60(%rsp), %rax + subq 0xc0(%rsp), %rax + movq 0x68(%rsp), %rdx + sbbq 0xc8(%rsp), %rdx + movq 0x70(%rsp), %r8 + sbbq 0xd0(%rsp), %r8 + movq 0x78(%rsp), %r9 + sbbq 0xd8(%rsp), %r9 + movq 0x80(%rsp), %r10 + sbbq 0xe0(%rsp), %r10 + movq 0x88(%rsp), %r11 + sbbq 0xe8(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0x90(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0x98(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0xa0(%rsp) + sbbq $0x0, %r9 + movq %r9, 0xa8(%rsp) + sbbq $0x0, %r10 + movq %r10, 0xb0(%rsp) + sbbq $0x0, %r11 + movq %r11, 0xb8(%rsp) + movq 0x150(%rsp), %rsi + movq 0x60(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x118(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x68(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x70(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x78(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x80(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x88(%rsi), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xf0(%rsp) + movq %r15, 0xf8(%rsp) + movq %r8, 0x100(%rsp) + movq %r9, 0x108(%rsp) + movq %r10, 0x110(%rsp) + movq %r11, 0x118(%rsp) + movq (%rsp), %rax + subq 0x60(%rsp), %rax + movq 0x8(%rsp), %rdx + sbbq 0x68(%rsp), %rdx + movq 0x10(%rsp), %r8 + sbbq 0x70(%rsp), %r8 + movq 0x18(%rsp), %r9 + sbbq 0x78(%rsp), %r9 + movq 0x20(%rsp), %r10 + sbbq 0x80(%rsp), %r10 + movq 0x28(%rsp), %r11 + sbbq 0x88(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, (%rsp) + sbbq %rcx, %rdx + movq %rdx, 0x8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x10(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x18(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x20(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x28(%rsp) + movq 0xc0(%rsp), %rax + subq (%rsp), %rax + movq 0xc8(%rsp), %rdx + sbbq 0x8(%rsp), %rdx + movq 0xd0(%rsp), %r8 + sbbq 0x10(%rsp), %r8 + movq 0xd8(%rsp), %r9 + sbbq 0x18(%rsp), %r9 + movq 0xe0(%rsp), %r10 + sbbq 0x20(%rsp), %r10 + movq 0xe8(%rsp), %r11 + sbbq 0x28(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0xc0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xc8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0xd0(%rsp) + sbbq $0x0, %r9 + movq %r9, 0xd8(%rsp) + sbbq $0x0, %r10 + movq %r10, 0xe0(%rsp) + sbbq $0x0, %r11 + movq %r11, 0xe8(%rsp) + movq 0x120(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x98(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0xa0(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0xa8(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0xb0(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0xb8(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x128(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x130(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x138(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x140(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x148(%rsp), %rbx + movq 0x90(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x98(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0xa0(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0xa8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0xb0(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0xb8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq %r8, 0xa0(%rsp) + movq %r9, 0xa8(%rsp) + movq %r10, 0xb0(%rsp) + movq %r11, 0xb8(%rsp) + movq 0x158(%rsp), %rcx + movq 0x60(%rcx), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x118(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x68(%rcx), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x70(%rcx), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x78(%rcx), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x80(%rcx), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x88(%rcx), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xf0(%rsp) + movq %r15, 0xf8(%rsp) + movq %r8, 0x100(%rsp) + movq %r9, 0x108(%rsp) + movq %r10, 0x110(%rsp) + movq %r11, 0x118(%rsp) + movq 0xc0(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x38(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x40(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x48(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x50(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x58(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0xc8(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x38(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x40(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x48(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x50(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x58(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0xd0(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x38(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x40(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x48(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x50(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x58(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0xd8(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x38(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x40(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x48(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x50(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x58(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0xe0(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x38(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x40(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x48(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x50(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x58(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0xe8(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x38(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x40(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x48(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x50(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x58(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %r8, 0xd0(%rsp) + movq %r9, 0xd8(%rsp) + movq %r10, 0xe0(%rsp) + movq %r11, 0xe8(%rsp) + movq 0xc0(%rsp), %rax + subq 0x90(%rsp), %rax + movq 0xc8(%rsp), %rdx + sbbq 0x98(%rsp), %rdx + movq 0xd0(%rsp), %r8 + sbbq 0xa0(%rsp), %r8 + movq 0xd8(%rsp), %r9 + sbbq 0xa8(%rsp), %r9 + movq 0xe0(%rsp), %r10 + sbbq 0xb0(%rsp), %r10 + movq 0xe8(%rsp), %r11 + sbbq 0xb8(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %esi + andq %rsi, %rcx + xorq %rsi, %rsi + subq %rcx, %rsi + subq %rsi, %rax + movq %rax, 0xc0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xc8(%rsp) + sbbq %rax, %rax + andq %rsi, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0xd0(%rsp) + sbbq $0x0, %r9 + movq %r9, 0xd8(%rsp) + sbbq $0x0, %r10 + movq %r10, 0xe0(%rsp) + sbbq $0x0, %r11 + movq %r11, 0xe8(%rsp) + movq 0x158(%rsp), %rcx + movq 0x60(%rcx), %r8 + movq 0x68(%rcx), %r9 + movq 0x70(%rcx), %r10 + movq 0x78(%rcx), %r11 + movq 0x80(%rcx), %rbx + movq 0x88(%rcx), %rbp + movq %r8, %rax + movq %r9, %rdx + orq %r10, %rax + orq %r11, %rdx + orq %rbx, %rax + orq %rbp, %rdx + orq %rdx, %rax + negq %rax + sbbq %rax, %rax + movq 0x150(%rsp), %rsi + movq 0x60(%rsi), %r12 + movq 0x68(%rsi), %r13 + movq 0x70(%rsi), %r14 + movq 0x78(%rsi), %r15 + movq 0x80(%rsi), %rdx + movq 0x88(%rsi), %rcx + cmoveq %r12, %r8 + cmoveq %r13, %r9 + cmoveq %r14, %r10 + cmoveq %r15, %r11 + cmoveq %rdx, %rbx + cmoveq %rcx, %rbp + orq %r13, %r12 + orq %r15, %r14 + orq %rcx, %rdx + orq %r14, %r12 + orq %r12, %rdx + negq %rdx + sbbq %rdx, %rdx + cmpq %rdx, %rax + cmoveq 0xf0(%rsp), %r8 + cmoveq 0xf8(%rsp), %r9 + cmoveq 0x100(%rsp), %r10 + cmoveq 0x108(%rsp), %r11 + cmoveq 0x110(%rsp), %rbx + cmoveq 0x118(%rsp), %rbp + movq %r8, 0xf0(%rsp) + movq %r9, 0xf8(%rsp) + movq %r10, 0x100(%rsp) + movq %r11, 0x108(%rsp) + movq %rbx, 0x110(%rsp) + movq %rbp, 0x118(%rsp) + movq 0x158(%rsp), %rcx + movq 0x150(%rsp), %rsi + movq (%rsp), %r8 + cmovbq (%rsi), %r8 + cmova (%rcx), %r8 + movq 0x8(%rsp), %r9 + cmovbq 0x8(%rsi), %r9 + cmova 0x8(%rcx), %r9 + movq 0x10(%rsp), %r10 + cmovbq 0x10(%rsi), %r10 + cmova 0x10(%rcx), %r10 + movq 0x18(%rsp), %r11 + cmovbq 0x18(%rsi), %r11 + cmova 0x18(%rcx), %r11 + movq 0x20(%rsp), %rbx + cmovbq 0x20(%rsi), %rbx + cmova 0x20(%rcx), %rbx + movq 0x28(%rsp), %rbp + cmovbq 0x28(%rsi), %rbp + cmova 0x28(%rcx), %rbp + movq 0xc0(%rsp), %r12 + cmovbq 0x30(%rsi), %r12 + cmova 0x30(%rcx), %r12 + movq 0xc8(%rsp), %r13 + cmovbq 0x38(%rsi), %r13 + cmova 0x38(%rcx), %r13 + movq 0xd0(%rsp), %r14 + cmovbq 0x40(%rsi), %r14 + cmova 0x40(%rcx), %r14 + movq 0xd8(%rsp), %r15 + cmovbq 0x48(%rsi), %r15 + cmova 0x48(%rcx), %r15 + movq 0xe0(%rsp), %rdx + cmovbq 0x50(%rsi), %rdx + cmova 0x50(%rcx), %rdx + movq 0xe8(%rsp), %rax + cmovbq 0x58(%rsi), %rax + cmova 0x58(%rcx), %rax + movq %r8, (%rdi) + movq %r9, 0x8(%rdi) + movq %r10, 0x10(%rdi) + movq %r11, 0x18(%rdi) + movq %rbx, 0x20(%rdi) + movq %rbp, 0x28(%rdi) + movq 0xf0(%rsp), %r8 + movq 0xf8(%rsp), %r9 + movq 0x100(%rsp), %r10 + movq 0x108(%rsp), %r11 + movq 0x110(%rsp), %rbx + movq 0x118(%rsp), %rbp + movq %r12, 0x30(%rdi) + movq %r13, 0x38(%rdi) + movq %r14, 0x40(%rdi) + movq %r15, 0x48(%rdi) + movq %rdx, 0x50(%rdi) + movq %rax, 0x58(%rdi) + movq %r8, 0x60(%rdi) + movq %r9, 0x68(%rdi) + movq %r10, 0x70(%rdi) + movq %r11, 0x78(%rdi) + movq %rbx, 0x80(%rdi) + movq %rbp, 0x88(%rdi) + addq $0x160, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +p384_montjscalarmul_alt_p384_montjdouble: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $0x158, %rsp + movq %rdi, 0x150(%rsp) + movq 0x60(%rsi), %rbx + movq 0x68(%rsi), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x78(%rsi), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x88(%rsi), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x78(%rsi), %rax + mulq 0x80(%rsi) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x70(%rsi), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x68(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0x68(%rsi), %rbx + movq 0x78(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x80(%rsi), %rbx + movq 0x60(%rsi), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x70(%rsi), %rbx + movq 0x78(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x78(%rsi), %rax + mulq 0x88(%rsi) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x80(%rsi), %rax + mulq 0x88(%rsi) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0x60(%rsi), %rax + mulq %rax + movq %r8, (%rsp) + movq %rax, %r8 + movq 0x68(%rsi), %rax + movq %rbp, 0x8(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x70(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x78(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x80(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x88(%rsi), %rax + mulq %rax + negq %rbp + adcq 0x8(%rsp), %rax + adcq (%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rdi + movq %rbx, (%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq (%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, (%rsp) + movq %r15, 0x8(%rsp) + movq %rcx, 0x10(%rsp) + movq %rbx, 0x18(%rsp) + movq %rbp, 0x20(%rsp) + movq %rdi, 0x28(%rsp) + movq 0x30(%rsi), %rbx + movq 0x38(%rsi), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x48(%rsi), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x58(%rsi), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x48(%rsi), %rax + mulq 0x50(%rsi) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x40(%rsi), %rbx + movq 0x30(%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x38(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0x38(%rsi), %rbx + movq 0x48(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x50(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x58(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x50(%rsi), %rbx + movq 0x30(%rsi), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x40(%rsi), %rbx + movq 0x48(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x50(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x58(%rsi), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x48(%rsi), %rax + mulq 0x58(%rsi) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x50(%rsi), %rax + mulq 0x58(%rsi) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0x30(%rsi), %rax + mulq %rax + movq %r8, 0x30(%rsp) + movq %rax, %r8 + movq 0x38(%rsi), %rax + movq %rbp, 0x38(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x40(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x48(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x50(%rsi), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x58(%rsi), %rax + mulq %rax + negq %rbp + adcq 0x38(%rsp), %rax + adcq 0x30(%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rdi + movq %rbx, 0x30(%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq 0x30(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, 0x30(%rsp) + movq %r15, 0x38(%rsp) + movq %rcx, 0x40(%rsp) + movq %rbx, 0x48(%rsp) + movq %rbp, 0x50(%rsp) + movq %rdi, 0x58(%rsp) + movq (%rsi), %rax + addq (%rsp), %rax + movq 0x8(%rsi), %rcx + adcq 0x8(%rsp), %rcx + movq 0x10(%rsi), %r8 + adcq 0x10(%rsp), %r8 + movq 0x18(%rsi), %r9 + adcq 0x18(%rsp), %r9 + movq 0x20(%rsi), %r10 + adcq 0x20(%rsp), %r10 + movq 0x28(%rsi), %r11 + adcq 0x28(%rsp), %r11 + sbbq %rdx, %rdx + movl $0x1, %ebx + andq %rdx, %rbx + movl $0xffffffff, %ebp + andq %rbp, %rdx + xorq %rbp, %rbp + subq %rdx, %rbp + addq %rbp, %rax + movq %rax, 0xf0(%rsp) + adcq %rdx, %rcx + movq %rcx, 0xf8(%rsp) + adcq %rbx, %r8 + movq %r8, 0x100(%rsp) + adcq $0x0, %r9 + movq %r9, 0x108(%rsp) + adcq $0x0, %r10 + movq %r10, 0x110(%rsp) + adcq $0x0, %r11 + movq %r11, 0x118(%rsp) + movq (%rsi), %rax + subq (%rsp), %rax + movq 0x8(%rsi), %rdx + sbbq 0x8(%rsp), %rdx + movq 0x10(%rsi), %r8 + sbbq 0x10(%rsp), %r8 + movq 0x18(%rsi), %r9 + sbbq 0x18(%rsp), %r9 + movq 0x20(%rsi), %r10 + sbbq 0x20(%rsp), %r10 + movq 0x28(%rsi), %r11 + sbbq 0x28(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %ebx + andq %rbx, %rcx + xorq %rbx, %rbx + subq %rcx, %rbx + subq %rbx, %rax + movq %rax, 0xc0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xc8(%rsp) + sbbq %rax, %rax + andq %rbx, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0xd0(%rsp) + sbbq $0x0, %r9 + movq %r9, 0xd8(%rsp) + sbbq $0x0, %r10 + movq %r10, 0xe0(%rsp) + sbbq $0x0, %r11 + movq %r11, 0xe8(%rsp) + movq 0xc0(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x118(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0xc8(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0xd0(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0xd8(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0xe0(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0xe8(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0xf8(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x100(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x108(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x110(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x118(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x60(%rsp) + movq %r15, 0x68(%rsp) + movq %r8, 0x70(%rsp) + movq %r9, 0x78(%rsp) + movq %r10, 0x80(%rsp) + movq %r11, 0x88(%rsp) + movq 0x30(%rsi), %rax + addq 0x60(%rsi), %rax + movq 0x38(%rsi), %rcx + adcq 0x68(%rsi), %rcx + movq 0x40(%rsi), %r8 + adcq 0x70(%rsi), %r8 + movq 0x48(%rsi), %r9 + adcq 0x78(%rsi), %r9 + movq 0x50(%rsi), %r10 + adcq 0x80(%rsi), %r10 + movq 0x58(%rsi), %r11 + adcq 0x88(%rsi), %r11 + movl $0x0, %edx + adcq %rdx, %rdx + movabsq $0xffffffff00000001, %rbp + addq %rbp, %rax + movl $0xffffffff, %ebp + adcq %rbp, %rcx + adcq $0x1, %r8 + adcq $0x0, %r9 + adcq $0x0, %r10 + adcq $0x0, %r11 + adcq $0xffffffffffffffff, %rdx + movl $0x1, %ebx + andq %rdx, %rbx + andq %rbp, %rdx + xorq %rbp, %rbp + subq %rdx, %rbp + subq %rbp, %rax + movq %rax, 0xf0(%rsp) + sbbq %rdx, %rcx + movq %rcx, 0xf8(%rsp) + sbbq %rbx, %r8 + movq %r8, 0x100(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x108(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x110(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x118(%rsp) + movq 0x60(%rsp), %rbx + movq 0x68(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x78(%rsp), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x88(%rsp), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x78(%rsp), %rax + mulq 0x80(%rsp) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x70(%rsp), %rbx + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x68(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0x68(%rsp), %rbx + movq 0x78(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x80(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x88(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x80(%rsp), %rbx + movq 0x60(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x70(%rsp), %rbx + movq 0x78(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x80(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x88(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x78(%rsp), %rax + mulq 0x88(%rsp) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x80(%rsp), %rax + mulq 0x88(%rsp) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0x60(%rsp), %rax + mulq %rax + movq %r8, 0x120(%rsp) + movq %rax, %r8 + movq 0x68(%rsp), %rax + movq %rbp, 0x128(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x70(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x78(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x80(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x88(%rsp), %rax + mulq %rax + negq %rbp + adcq 0x128(%rsp), %rax + adcq 0x120(%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rdi + movq %rbx, 0x120(%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq 0x120(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, 0x120(%rsp) + movq %r15, 0x128(%rsp) + movq %rcx, 0x130(%rsp) + movq %rbx, 0x138(%rsp) + movq %rbp, 0x140(%rsp) + movq %rdi, 0x148(%rsp) + movq 0x30(%rsp), %rbx + movq (%rsi), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x8(%rsi), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x10(%rsi), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x18(%rsi), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x20(%rsi), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x28(%rsi), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x38(%rsp), %rbx + movq (%rsi), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x8(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x10(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x18(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x20(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x28(%rsi), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x40(%rsp), %rbx + movq (%rsi), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x8(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x10(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x18(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x20(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x28(%rsi), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x48(%rsp), %rbx + movq (%rsi), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x8(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x10(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x18(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x20(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x28(%rsi), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x50(%rsp), %rbx + movq (%rsi), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x8(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x10(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x18(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x20(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x28(%rsi), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x58(%rsp), %rbx + movq (%rsi), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x8(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x10(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x18(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x20(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x28(%rsi), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0x90(%rsp) + movq %r15, 0x98(%rsp) + movq %r8, 0xa0(%rsp) + movq %r9, 0xa8(%rsp) + movq %r10, 0xb0(%rsp) + movq %r11, 0xb8(%rsp) + movq 0xf0(%rsp), %rbx + movq 0xf8(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x108(%rsp), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x118(%rsp), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x108(%rsp), %rax + mulq 0x110(%rsp) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x100(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0xf8(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0xf8(%rsp), %rbx + movq 0x108(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x110(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x118(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x110(%rsp), %rbx + movq 0xf0(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x100(%rsp), %rbx + movq 0x108(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x110(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x118(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x108(%rsp), %rax + mulq 0x118(%rsp) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x110(%rsp), %rax + mulq 0x118(%rsp) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0xf0(%rsp), %rax + mulq %rax + movq %r8, 0xc0(%rsp) + movq %rax, %r8 + movq 0xf8(%rsp), %rax + movq %rbp, 0xc8(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x100(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x108(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x110(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x118(%rsp), %rax + mulq %rax + negq %rbp + adcq 0xc8(%rsp), %rax + adcq 0xc0(%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rdi + movq %rbx, 0xc0(%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq 0xc0(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %rcx, 0xd0(%rsp) + movq %rbx, 0xd8(%rsp) + movq %rbp, 0xe0(%rsp) + movq %rdi, 0xe8(%rsp) + movabsq $0xffffffff, %r9 + subq 0x120(%rsp), %r9 + movabsq $0xffffffff00000000, %r10 + sbbq 0x128(%rsp), %r10 + movq $0xfffffffffffffffe, %r11 + sbbq 0x130(%rsp), %r11 + movq $0xffffffffffffffff, %r12 + sbbq 0x138(%rsp), %r12 + movq $0xffffffffffffffff, %r13 + sbbq 0x140(%rsp), %r13 + movq $0xffffffffffffffff, %r14 + sbbq 0x148(%rsp), %r14 + movq $0x9, %rcx + movq %r9, %rax + mulq %rcx + movq %rax, %r8 + movq %rdx, %r9 + movq %r10, %rax + xorl %r10d, %r10d + mulq %rcx + addq %rax, %r9 + adcq %rdx, %r10 + movq %r11, %rax + xorl %r11d, %r11d + mulq %rcx + addq %rax, %r10 + adcq %rdx, %r11 + movq %r12, %rax + xorl %r12d, %r12d + mulq %rcx + addq %rax, %r11 + adcq %rdx, %r12 + movq %r13, %rax + xorl %r13d, %r13d + mulq %rcx + addq %rax, %r12 + adcq %rdx, %r13 + movq %r14, %rax + movl $0x1, %r14d + mulq %rcx + addq %rax, %r13 + adcq %rdx, %r14 + movl $0xc, %ecx + movq 0x90(%rsp), %rax + mulq %rcx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %rbx, %rbx + movq 0x98(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rbx, %rbx + movq 0xa0(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbx, %rbx + movq 0xa8(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbx, %rbx + movq 0xb0(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbx, %rbx + movq 0xb8(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + movabsq $0xffffffff00000001, %rax + mulq %r14 + addq %rax, %r8 + adcq %rdx, %r9 + adcq %r14, %r10 + movq %r14, %rax + sbbq %rcx, %rcx + movl $0xffffffff, %edx + negq %rcx + mulq %rdx + addq %rax, %r9 + adcq %rdx, %r10 + adcq %rcx, %r11 + adcq $0x0, %r12 + adcq $0x0, %r13 + sbbq %rcx, %rcx + notq %rcx + movl $0xffffffff, %edx + xorq %rax, %rax + andq %rcx, %rdx + subq %rdx, %rax + andq $0x1, %rcx + subq %rax, %r8 + movq %r8, 0x120(%rsp) + sbbq %rdx, %r9 + movq %r9, 0x128(%rsp) + sbbq %rcx, %r10 + movq %r10, 0x130(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x138(%rsp) + sbbq $0x0, %r12 + movq %r12, 0x140(%rsp) + sbbq $0x0, %r13 + movq %r13, 0x148(%rsp) + movq 0xc0(%rsp), %rax + subq (%rsp), %rax + movq 0xc8(%rsp), %rdx + sbbq 0x8(%rsp), %rdx + movq 0xd0(%rsp), %r8 + sbbq 0x10(%rsp), %r8 + movq 0xd8(%rsp), %r9 + sbbq 0x18(%rsp), %r9 + movq 0xe0(%rsp), %r10 + sbbq 0x20(%rsp), %r10 + movq 0xe8(%rsp), %r11 + sbbq 0x28(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %ebx + andq %rbx, %rcx + xorq %rbx, %rbx + subq %rcx, %rbx + subq %rbx, %rax + movq %rax, 0xf0(%rsp) + sbbq %rcx, %rdx + movq %rdx, 0xf8(%rsp) + sbbq %rax, %rax + andq %rbx, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x100(%rsp) + sbbq $0x0, %r9 + movq %r9, 0x108(%rsp) + sbbq $0x0, %r10 + movq %r10, 0x110(%rsp) + sbbq $0x0, %r11 + movq %r11, 0x118(%rsp) + movq 0x30(%rsp), %rbx + movq 0x38(%rsp), %rax + mulq %rbx + movq %rax, %r9 + movq %rdx, %r10 + movq 0x48(%rsp), %rax + mulq %rbx + movq %rax, %r11 + movq %rdx, %r12 + movq 0x58(%rsp), %rax + mulq %rbx + movq %rax, %r13 + movq %rdx, %r14 + movq 0x48(%rsp), %rax + mulq 0x50(%rsp) + movq %rax, %r15 + movq %rdx, %rcx + movq 0x40(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x38(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbp, %rbp + movq 0x38(%rsp), %rbx + movq 0x48(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x50(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x58(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + adcq $0x0, %rcx + movq 0x50(%rsp), %rbx + movq 0x30(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x40(%rsp), %rbx + movq 0x48(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %rbp, %rbp + movq 0x50(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x58(%rsp), %rax + mulq %rbx + subq %rbp, %rdx + addq %rax, %r15 + adcq %rdx, %rcx + sbbq %rbp, %rbp + xorl %ebx, %ebx + movq 0x48(%rsp), %rax + mulq 0x58(%rsp) + subq %rbp, %rdx + xorl %ebp, %ebp + addq %rax, %rcx + adcq %rdx, %rbx + adcl %ebp, %ebp + movq 0x50(%rsp), %rax + mulq 0x58(%rsp) + addq %rax, %rbx + adcq %rdx, %rbp + xorl %r8d, %r8d + addq %r9, %r9 + adcq %r10, %r10 + adcq %r11, %r11 + adcq %r12, %r12 + adcq %r13, %r13 + adcq %r14, %r14 + adcq %r15, %r15 + adcq %rcx, %rcx + adcq %rbx, %rbx + adcq %rbp, %rbp + adcl %r8d, %r8d + movq 0x30(%rsp), %rax + mulq %rax + movq %r8, 0xc0(%rsp) + movq %rax, %r8 + movq 0x38(%rsp), %rax + movq %rbp, 0xc8(%rsp) + addq %rdx, %r9 + sbbq %rbp, %rbp + mulq %rax + negq %rbp + adcq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbp, %rbp + movq 0x40(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbp, %rbp + movq 0x48(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %r14 + adcq %rdx, %r15 + sbbq %rbp, %rbp + movq 0x50(%rsp), %rax + mulq %rax + negq %rbp + adcq %rax, %rcx + adcq %rdx, %rbx + sbbq %rbp, %rbp + movq 0x58(%rsp), %rax + mulq %rax + negq %rbp + adcq 0xc8(%rsp), %rax + adcq 0xc0(%rsp), %rdx + movq %rax, %rbp + movq %rdx, %rdi + movq %rbx, 0xc0(%rsp) + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r8 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r8, %r9 + sbbq %rdx, %r10 + sbbq %rax, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + movq %rbx, %r8 + sbbq $0x0, %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r9 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r9, %r10 + sbbq %rdx, %r11 + sbbq %rax, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r8 + movq %rbx, %r9 + sbbq $0x0, %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r10 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r10, %r11 + sbbq %rdx, %r12 + sbbq %rax, %r13 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + movq %rbx, %r10 + sbbq $0x0, %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r11 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r11, %r12 + sbbq %rdx, %r13 + sbbq %rax, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + movq %rbx, %r11 + sbbq $0x0, %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r12 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r12, %r13 + sbbq %rdx, %r8 + sbbq %rax, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %r11 + movq %rbx, %r12 + sbbq $0x0, %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %rax, %r13 + movl $0x0, %eax + adcq %rbx, %rdx + adcl %eax, %eax + subq %r13, %r8 + sbbq %rdx, %r9 + sbbq %rax, %r10 + sbbq $0x0, %r11 + sbbq $0x0, %r12 + movq %rbx, %r13 + sbbq $0x0, %r13 + movq 0xc0(%rsp), %rbx + addq %r8, %r14 + adcq %r9, %r15 + adcq %r10, %rcx + adcq %r11, %rbx + adcq %r12, %rbp + adcq %r13, %rdi + movl $0x0, %r8d + adcq %r8, %r8 + xorq %r11, %r11 + xorq %r12, %r12 + xorq %r13, %r13 + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %r9d + adcq %r15, %r9 + movl $0x1, %r10d + adcq %rcx, %r10 + adcq %rbx, %r11 + adcq %rbp, %r12 + adcq %rdi, %r13 + adcq $0x0, %r8 + cmovneq %rax, %r14 + cmovneq %r9, %r15 + cmovneq %r10, %rcx + cmovneq %r11, %rbx + cmovneq %r12, %rbp + cmovneq %r13, %rdi + movq %r14, 0xc0(%rsp) + movq %r15, 0xc8(%rsp) + movq %rcx, 0xd0(%rsp) + movq %rbx, 0xd8(%rsp) + movq %rbp, 0xe0(%rsp) + movq %rdi, 0xe8(%rsp) + movq 0x150(%rsp), %rdi + movq 0xf0(%rsp), %rax + subq 0x30(%rsp), %rax + movq 0xf8(%rsp), %rdx + sbbq 0x38(%rsp), %rdx + movq 0x100(%rsp), %r8 + sbbq 0x40(%rsp), %r8 + movq 0x108(%rsp), %r9 + sbbq 0x48(%rsp), %r9 + movq 0x110(%rsp), %r10 + sbbq 0x50(%rsp), %r10 + movq 0x118(%rsp), %r11 + sbbq 0x58(%rsp), %r11 + sbbq %rcx, %rcx + movl $0xffffffff, %ebx + andq %rbx, %rcx + xorq %rbx, %rbx + subq %rcx, %rbx + subq %rbx, %rax + movq %rax, 0x60(%rdi) + sbbq %rcx, %rdx + movq %rdx, 0x68(%rdi) + sbbq %rax, %rax + andq %rbx, %rcx + negq %rax + sbbq %rcx, %r8 + movq %r8, 0x70(%rdi) + sbbq $0x0, %r9 + movq %r9, 0x78(%rdi) + sbbq $0x0, %r10 + movq %r10, 0x80(%rdi) + sbbq $0x0, %r11 + movq %r11, 0x88(%rdi) + movq 0x60(%rsp), %rbx + movq 0x120(%rsp), %rax + mulq %rbx + movq %rax, %r8 + movq %rdx, %r9 + movq 0x128(%rsp), %rax + mulq %rbx + xorl %r10d, %r10d + addq %rax, %r9 + adcq %rdx, %r10 + movq 0x130(%rsp), %rax + mulq %rbx + xorl %r11d, %r11d + addq %rax, %r10 + adcq %rdx, %r11 + movq 0x138(%rsp), %rax + mulq %rbx + xorl %r12d, %r12d + addq %rax, %r11 + adcq %rdx, %r12 + movq 0x140(%rsp), %rax + mulq %rbx + xorl %r13d, %r13d + addq %rax, %r12 + adcq %rdx, %r13 + movq 0x148(%rsp), %rax + mulq %rbx + xorl %r14d, %r14d + addq %rax, %r13 + adcq %rdx, %r14 + xorl %r15d, %r15d + movq %r8, %rbx + shlq $0x20, %rbx + addq %r8, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r8 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r8, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r9 + sbbq %rdx, %r10 + sbbq %rbp, %r11 + sbbq $0x0, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %rbx + addq %rbx, %r14 + adcq $0x0, %r15 + movq 0x68(%rsp), %rbx + movq 0x120(%rsp), %rax + mulq %rbx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r8, %r8 + movq 0x128(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r8, %r8 + movq 0x130(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r8, %r8 + movq 0x138(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r8, %r8 + movq 0x140(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r8, %r8 + movq 0x148(%rsp), %rax + mulq %rbx + subq %r8, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r8, %r8 + negq %r8 + movq %r9, %rbx + shlq $0x20, %rbx + addq %r9, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r9 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r9, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r10 + sbbq %rdx, %r11 + sbbq %rbp, %r12 + sbbq $0x0, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %rbx + addq %rbx, %r15 + adcq $0x0, %r8 + movq 0x70(%rsp), %rbx + movq 0x120(%rsp), %rax + mulq %rbx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r9, %r9 + movq 0x128(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r9, %r9 + movq 0x130(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r9, %r9 + movq 0x138(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r9, %r9 + movq 0x140(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r9, %r9 + movq 0x148(%rsp), %rax + mulq %rbx + subq %r9, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r9, %r9 + negq %r9 + movq %r10, %rbx + shlq $0x20, %rbx + addq %r10, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r10 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r10, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r11 + sbbq %rdx, %r12 + sbbq %rbp, %r13 + sbbq $0x0, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %rbx + addq %rbx, %r8 + adcq $0x0, %r9 + movq 0x78(%rsp), %rbx + movq 0x120(%rsp), %rax + mulq %rbx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %r10, %r10 + movq 0x128(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r10, %r10 + movq 0x130(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r10, %r10 + movq 0x138(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r10, %r10 + movq 0x140(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r10, %r10 + movq 0x148(%rsp), %rax + mulq %rbx + subq %r10, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r10, %r10 + negq %r10 + movq %r11, %rbx + shlq $0x20, %rbx + addq %r11, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r11 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r11, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r12 + sbbq %rdx, %r13 + sbbq %rbp, %r14 + sbbq $0x0, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %rbx + addq %rbx, %r9 + adcq $0x0, %r10 + movq 0x80(%rsp), %rbx + movq 0x120(%rsp), %rax + mulq %rbx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %r11, %r11 + movq 0x128(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r11, %r11 + movq 0x130(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r11, %r11 + movq 0x138(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r11, %r11 + movq 0x140(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r11, %r11 + movq 0x148(%rsp), %rax + mulq %rbx + subq %r11, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r11, %r11 + negq %r11 + movq %r12, %rbx + shlq $0x20, %rbx + addq %r12, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r12 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r12, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r13 + sbbq %rdx, %r14 + sbbq %rbp, %r15 + sbbq $0x0, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %rbx + addq %rbx, %r10 + adcq $0x0, %r11 + movq 0x88(%rsp), %rbx + movq 0x120(%rsp), %rax + mulq %rbx + addq %rax, %r13 + adcq %rdx, %r14 + sbbq %r12, %r12 + movq 0x128(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r14 + adcq %rdx, %r15 + sbbq %r12, %r12 + movq 0x130(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r15 + adcq %rdx, %r8 + sbbq %r12, %r12 + movq 0x138(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %r12, %r12 + movq 0x140(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %r12, %r12 + movq 0x148(%rsp), %rax + mulq %rbx + subq %r12, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %r12, %r12 + negq %r12 + movq %r13, %rbx + shlq $0x20, %rbx + addq %r13, %rbx + xorl %ebp, %ebp + movabsq $0xffffffff00000001, %rax + mulq %rbx + movq %rdx, %r13 + movabsq $0xffffffff, %rax + mulq %rbx + addq %r13, %rax + adcq %rbx, %rdx + adcl %ebp, %ebp + subq %rax, %r14 + sbbq %rdx, %r15 + sbbq %rbp, %r8 + sbbq $0x0, %r9 + sbbq $0x0, %r10 + sbbq $0x0, %rbx + addq %rbx, %r11 + adcq $0x0, %r12 + xorl %edx, %edx + xorl %ebp, %ebp + xorl %r13d, %r13d + movabsq $0xffffffff00000001, %rax + addq %r14, %rax + movl $0xffffffff, %ebx + adcq %r15, %rbx + movl $0x1, %ecx + adcq %r8, %rcx + adcq %r9, %rdx + adcq %r10, %rbp + adcq %r11, %r13 + adcq $0x0, %r12 + cmovneq %rax, %r14 + cmovneq %rbx, %r15 + cmovneq %rcx, %r8 + cmovneq %rdx, %r9 + cmovneq %rbp, %r10 + cmovneq %r13, %r11 + movq %r14, 0xf0(%rsp) + movq %r15, 0xf8(%rsp) + movq %r8, 0x100(%rsp) + movq %r9, 0x108(%rsp) + movq %r10, 0x110(%rsp) + movq %r11, 0x118(%rsp) + movq 0xb8(%rsp), %rcx + movq %rcx, %r13 + shrq $0x3e, %rcx + movq 0xb0(%rsp), %r12 + shldq $0x2, %r12, %r13 + movq 0xa8(%rsp), %r11 + shldq $0x2, %r11, %r12 + movq 0xa0(%rsp), %r10 + shldq $0x2, %r10, %r11 + movq 0x98(%rsp), %r9 + shldq $0x2, %r9, %r10 + movq 0x90(%rsp), %r8 + shldq $0x2, %r8, %r9 + shlq $0x2, %r8 + addq $0x1, %rcx + subq 0x120(%rsp), %r8 + sbbq 0x128(%rsp), %r9 + sbbq 0x130(%rsp), %r10 + sbbq 0x138(%rsp), %r11 + sbbq 0x140(%rsp), %r12 + sbbq 0x148(%rsp), %r13 + sbbq $0x0, %rcx + movabsq $0xffffffff00000001, %rax + mulq %rcx + addq %rax, %r8 + adcq %rdx, %r9 + adcq %rcx, %r10 + movq %rcx, %rax + sbbq %rcx, %rcx + movl $0xffffffff, %edx + negq %rcx + mulq %rdx + addq %rax, %r9 + adcq %rdx, %r10 + adcq %rcx, %r11 + adcq $0x0, %r12 + adcq $0x0, %r13 + sbbq %rcx, %rcx + notq %rcx + movl $0xffffffff, %edx + xorq %rax, %rax + andq %rcx, %rdx + subq %rdx, %rax + andq $0x1, %rcx + subq %rax, %r8 + movq %r8, (%rdi) + sbbq %rdx, %r9 + movq %r9, 0x8(%rdi) + sbbq %rcx, %r10 + movq %r10, 0x10(%rdi) + sbbq $0x0, %r11 + movq %r11, 0x18(%rdi) + sbbq $0x0, %r12 + movq %r12, 0x20(%rdi) + sbbq $0x0, %r13 + movq %r13, 0x28(%rdi) + movabsq $0xffffffff, %r8 + subq 0xc0(%rsp), %r8 + movabsq $0xffffffff00000000, %r9 + sbbq 0xc8(%rsp), %r9 + movq $0xfffffffffffffffe, %r10 + sbbq 0xd0(%rsp), %r10 + movq $0xffffffffffffffff, %r11 + sbbq 0xd8(%rsp), %r11 + movq $0xffffffffffffffff, %r12 + sbbq 0xe0(%rsp), %r12 + movq $0xffffffffffffffff, %r13 + sbbq 0xe8(%rsp), %r13 + movq %r13, %r14 + shrq $0x3d, %r14 + shldq $0x3, %r12, %r13 + shldq $0x3, %r11, %r12 + shldq $0x3, %r10, %r11 + shldq $0x3, %r9, %r10 + shldq $0x3, %r8, %r9 + shlq $0x3, %r8 + addq $0x1, %r14 + movl $0x3, %ecx + movq 0xf0(%rsp), %rax + mulq %rcx + addq %rax, %r8 + adcq %rdx, %r9 + sbbq %rbx, %rbx + movq 0xf8(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r9 + adcq %rdx, %r10 + sbbq %rbx, %rbx + movq 0x100(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r10 + adcq %rdx, %r11 + sbbq %rbx, %rbx + movq 0x108(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r11 + adcq %rdx, %r12 + sbbq %rbx, %rbx + movq 0x110(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r12 + adcq %rdx, %r13 + sbbq %rbx, %rbx + movq 0x118(%rsp), %rax + mulq %rcx + subq %rbx, %rdx + addq %rax, %r13 + adcq %rdx, %r14 + movabsq $0xffffffff00000001, %rax + mulq %r14 + addq %rax, %r8 + adcq %rdx, %r9 + adcq %r14, %r10 + movq %r14, %rax + sbbq %rcx, %rcx + movl $0xffffffff, %edx + negq %rcx + mulq %rdx + addq %rax, %r9 + adcq %rdx, %r10 + adcq %rcx, %r11 + adcq $0x0, %r12 + adcq $0x0, %r13 + sbbq %rcx, %rcx + notq %rcx + movl $0xffffffff, %edx + xorq %rax, %rax + andq %rcx, %rdx + subq %rdx, %rax + andq $0x1, %rcx + subq %rax, %r8 + movq %r8, 0x30(%rdi) + sbbq %rdx, %r9 + movq %r9, 0x38(%rdi) + sbbq %rcx, %r10 + movq %r10, 0x40(%rdi) + sbbq $0x0, %r11 + movq %r11, 0x48(%rdi) + sbbq $0x0, %r12 + movq %r12, 0x50(%rdi) + sbbq $0x0, %r13 + movq %r13, 0x58(%rdi) + addq $0x158, %rsp + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + ret + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack, "", %progbits +#endif