diff --git a/Cargo.toml b/Cargo.toml index d2dd40127b..670b59c426 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,12 +65,14 @@ include = [ "crypto/fipsmodule/bn/internal.h", "crypto/fipsmodule/bn/montgomery.c", "crypto/fipsmodule/bn/montgomery_inv.c", + "crypto/fipsmodule/bn/shift.c", "crypto/fipsmodule/ec/asm/p256-armv8-asm.pl", "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl", "crypto/fipsmodule/ec/ecp_nistz.c", "crypto/fipsmodule/ec/ecp_nistz.h", "crypto/fipsmodule/ec/ecp_nistz384.h", "crypto/fipsmodule/ec/ecp_nistz384.inl", + "crypto/fipsmodule/ec/internal.h", "crypto/fipsmodule/ec/gfp_p256.c", "crypto/fipsmodule/ec/gfp_p384.c", "crypto/fipsmodule/ec/p256.c", @@ -80,6 +82,7 @@ include = [ "crypto/fipsmodule/ec/p256_shared.h", "crypto/fipsmodule/ec/p256_table.h", "crypto/fipsmodule/ec/util.h", + "crypto/fipsmodule/ec/wnaf.c", "crypto/fipsmodule/ecdsa/ecdsa_verify_tests.txt", "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl", "crypto/fipsmodule/modes/asm/ghash-armv4.pl", diff --git a/build.rs b/build.rs index 3cbae9d926..0446e3ec10 100644 --- a/build.rs +++ b/build.rs @@ -38,10 +38,12 @@ const RING_SRCS: &[(&[&str], &str)] = &[ (&[], "crypto/fipsmodule/aes/aes_nohw.c"), (&[], "crypto/fipsmodule/bn/montgomery.c"), (&[], "crypto/fipsmodule/bn/montgomery_inv.c"), + (&[], "crypto/fipsmodule/bn/shift.c"), (&[], "crypto/fipsmodule/ec/ecp_nistz.c"), (&[], "crypto/fipsmodule/ec/gfp_p256.c"), (&[], "crypto/fipsmodule/ec/gfp_p384.c"), (&[], "crypto/fipsmodule/ec/p256.c"), + (&[], "crypto/fipsmodule/ec/wnaf.c"), (&[], "crypto/limbs/limbs.c"), (&[], "crypto/mem.c"), (&[], "crypto/poly1305/poly1305.c"), @@ -959,6 +961,7 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "p256_point_mul", "p256_point_mul_base", "p256_point_mul_base_vartime", + "p256_point_mul_public", "p256_scalar_mul_mont", "p256_scalar_sqr_rep_mont", "p256_sqr_mont", diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h index cf7c807256..99c47bae39 100644 --- a/crypto/fipsmodule/ec/internal.h +++ b/crypto/fipsmodule/ec/internal.h @@ -68,7 +68,7 @@ #ifndef OPENSSL_HEADER_EC_INTERNAL_H #define OPENSSL_HEADER_EC_INTERNAL_H -#include +#include // ec_compute_wNAF writes the modified width-(w+1) Non-Adjacent Form (wNAF) of // |scalar| to |out|. |out| must have room for |bits| + 1 elements, each of @@ -78,7 +78,6 @@ // where at most one of any w+1 consecutive digits is non-zero // with the exception that the most significant digit may be only // w-1 zeros away from that next non-zero digit. -void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, - const EC_SCALAR *scalar, size_t bits, int w); +void ec_compute_wNAF(int8_t *out, const BN_ULONG *scalar, size_t scalar_limbs, size_t bits, int w); #endif // OPENSSL_HEADER_EC_INTERNAL_H diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c index dc67a71f07..f40fc9afd5 100644 --- a/crypto/fipsmodule/ec/p256.c +++ b/crypto/fipsmodule/ec/p256.c @@ -23,6 +23,7 @@ #include "p256_shared.h" +#include "internal.h" #include "../../internal.h" #include "./util.h" @@ -473,19 +474,17 @@ void p256_point_mul_base(Limb r[3][P256_LIMBS], const Limb scalar[P256_LIMBS]) { fiat_p256_to_words(r[2], nq[2]); } -#if 0 - -static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, - EC_JACOBIAN *r, - const EC_SCALAR *g_scalar, - const EC_JACOBIAN *p, - const EC_SCALAR *p_scalar) { +void p256_point_mul_public(Limb r[3][P256_LIMBS], + const Limb g_scalar[P256_LIMBS], + const Limb p_scalar[P256_LIMBS], + const Limb p_x[P256_LIMBS], + const Limb p_y[P256_LIMBS]) { #define P256_WSIZE_PUBLIC 4 // Precompute multiples of |p|. p_pre_comp[i] is (2*i+1) * |p|. fiat_p256_felem p_pre_comp[1 << (P256_WSIZE_PUBLIC - 1)][3]; - fiat_p256_from_generic(p_pre_comp[0][0], &p->X); - fiat_p256_from_generic(p_pre_comp[0][1], &p->Y); - fiat_p256_from_generic(p_pre_comp[0][2], &p->Z); + fiat_p256_from_words(p_pre_comp[0][0], p_x); + fiat_p256_from_words(p_pre_comp[0][1], p_y); + fiat_p256_copy(p_pre_comp[0][2], fiat_p256_one); fiat_p256_felem p2[3]; fiat_p256_point_double(p2[0], p2[1], p2[2], p_pre_comp[0][0], p_pre_comp[0][1], p_pre_comp[0][2]); @@ -498,7 +497,7 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, // Set up the coefficients for |p_scalar|. int8_t p_wNAF[257]; - ec_compute_wNAF(group, p_wNAF, p_scalar, 256, P256_WSIZE_PUBLIC); + ec_compute_wNAF(p_wNAF, p_scalar, P256_LIMBS, 256, P256_WSIZE_PUBLIC); // Set |ret| to the point at infinity. int skip = 1; // Save some point operations. @@ -562,13 +561,11 @@ static void ec_GFp_nistp256_point_mul_public(const EC_GROUP *group, } } - fiat_p256_to_generic(&r->X, ret[0]); - fiat_p256_to_generic(&r->Y, ret[1]); - fiat_p256_to_generic(&r->Z, ret[2]); + fiat_p256_to_words(r[0], ret[0]); + fiat_p256_to_words(r[1], ret[1]); + fiat_p256_to_words(r[2], ret[2]); } -#endif - void p256_mul_mont(Limb r[P256_LIMBS], const Limb a[P256_LIMBS], const Limb b[P256_LIMBS]) { fiat_p256_felem a_, b_; diff --git a/crypto/fipsmodule/ec/wnaf.c b/crypto/fipsmodule/ec/wnaf.c index 56de6cfec5..0c815e783e 100644 --- a/crypto/fipsmodule/ec/wnaf.c +++ b/crypto/fipsmodule/ec/wnaf.c @@ -65,17 +65,6 @@ * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems * Laboratories. */ -#include - -#include -#include - -#include -#include -#include -#include - -#include "internal.h" #include "../bn/internal.h" #include "../../internal.h" @@ -85,18 +74,17 @@ // http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13 // http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf -void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, - const EC_SCALAR *scalar, size_t bits, int w) { +void ec_compute_wNAF(int8_t *out, const BN_ULONG scalar[], size_t scalar_limbs, size_t bits, int w) { // 'int8_t' can represent integers with absolute values less than 2^7. - assert(0 < w && w <= 7); - assert(bits != 0); + debug_assert_nonsecret(0 < w && w <= 7); + debug_assert_nonsecret(bits != 0); int bit = 1 << w; // 2^w, at most 128 int next_bit = bit << 1; // 2^(w+1), at most 256 int mask = next_bit - 1; // at most 255 - int window_val = scalar->words[0] & mask; + int window_val = ((int)scalar[0]) & mask; for (size_t j = 0; j < bits + 1; j++) { - assert(0 <= window_val && window_val <= next_bit); + debug_assert_nonsecret(0 <= window_val && window_val <= next_bit); int digit = 0; if (window_val & 1) { assert(0 < window_val && window_val < next_bit); @@ -105,7 +93,7 @@ void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, // We know -next_bit < digit < 0 and window_val - digit = next_bit. // modified wNAF - if (j + w + 1 >= bits) { + if (j + ((size_t)w) + 1 >= bits) { // special case for generating modified wNAFs: // no new bits will be added into window_val, // so using a positive digit here will decrease @@ -132,17 +120,16 @@ void ec_compute_wNAF(const EC_GROUP *group, int8_t *out, assert(digit & 1); } - out[j] = digit; + out[j] = (int8_t)digit; // Incorporate the next bit. Previously, |window_val| <= |next_bit|, so if // we shift and add at most one copy of |bit|, this will continue to hold // afterwards. window_val >>= 1; - window_val += bit * bn_is_bit_set_words(scalar->words, group->order.N.width, - j + w + 1); - assert(window_val <= next_bit); + window_val += bit * bn_is_bit_set_words(scalar, scalar_limbs, j + (size_t)w + 1); + debug_assert_nonsecret(window_val <= next_bit); } // bits + 1 entries should be sufficient to consume all bits. - assert(window_val == 0); + debug_assert_nonsecret(window_val == 0); } diff --git a/crypto/internal.h b/crypto/internal.h index 7beb1d44fd..062ca564c6 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -150,6 +150,8 @@ typedef __int128_t int128_t; typedef __uint128_t uint128_t; #endif +#define OPENSSL_ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) + // Pointer utility functions. // buffers_alias returns one if |a| and |b| alias and zero otherwise. diff --git a/src/ec/suite_b/ops/p256.rs b/src/ec/suite_b/ops/p256.rs index adbed60936..104c2e10fc 100644 --- a/src/ec/suite_b/ops/p256.rs +++ b/src/ec/suite_b/ops/p256.rs @@ -119,9 +119,7 @@ pub static PUBLIC_SCALAR_OPS: PublicScalarOps = PublicScalarOps { twin_mul: twin_mul_nistz256, #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] - twin_mul: |g_scalar, p_scalar, p_xy| { - twin_mul_inefficient(&PRIVATE_KEY_OPS, g_scalar, p_scalar, p_xy) - }, + twin_mul: twin_mul_fiat, q_minus_n: Elem::from_hex("4319055358e8617b0c46353d039cdaae"), }; @@ -147,6 +145,28 @@ fn point_mul_base_vartime(g_scalar: &Scalar) -> Point { scaled_g } +#[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))] +fn twin_mul_fiat(g_scalar: &Scalar, p_scalar: &Scalar, &(p_x, p_y): &(Elem, Elem)) -> Point { + prefixed_extern! { + fn p256_point_mul_public(r: *mut Limb, + g_scalar: *const Limb, + p_scalar: *const Limb, + p_x: *const Limb, + p_y: *const Limb); + } + let mut r = Point::new_at_infinity(); + unsafe { + p256_point_mul_public( + r.xyz.as_mut_ptr(), + g_scalar.limbs.as_ptr(), + p_scalar.limbs.as_ptr(), + p_x.limbs.as_ptr(), + p_y.limbs.as_ptr(), + ); + } + r +} + pub static PRIVATE_SCALAR_OPS: PrivateScalarOps = PrivateScalarOps { scalar_ops: &SCALAR_OPS,