From 804c090f214c4088bab15b2ecfe6ab8480ce683e Mon Sep 17 00:00:00 2001 From: Tim Ruffing Date: Fri, 5 Apr 2024 19:13:58 +0200 Subject: [PATCH] fixup! Reduce side channels from single-bit reads --- src/ecmult_gen_impl.h | 36 ++++++++++++++++++++++++++++-------- src/util.h | 12 ++++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h index d4f34e5ac0..4ff8944627 100644 --- a/src/ecmult_gen_impl.h +++ b/src/ecmult_gen_impl.h @@ -7,6 +7,10 @@ #ifndef SECP256K1_ECMULT_GEN_IMPL_H #define SECP256K1_ECMULT_GEN_IMPL_H +#include +#include +#include + #include "util.h" #include "scalar.h" #include "group.h" @@ -200,15 +204,31 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25 * together: bit (tooth) of bits = bit * ((block*COMB_TEETH + tooth)*COMB_SPACING + comb_off) of d. */ uint32_t bits = 0, sign, abs, index, tooth; + /* Instead of reading individual bits here to construct the bits variable, + * build up the result by xoring rotated reads together. In every iteration, + * one additional bit is made correct, starting at the bottom. The bits + * above that contain junk. This reduces leakage by avoiding computations + * on variables that can have only a low number of possible values (e.g., + * just two values when reading a single bit into a variable.) See: + * https://www.usenix.org/system/files/conference/usenixsecurity18/sec18-alam.pdf + */ for (tooth = 0; tooth < COMB_TEETH; ++tooth) { - /* Instead of reading individual bits here to construct bits, build up - * the result by xoring shifted reads together. In every iteration, one - * additional bit is made correct, starting at the bottom. The bits - * above that contain junk. This reduces leakage from single bits. See - * https://www.usenix.org/system/files/conference/usenixsecurity18/sec18-alam.pdf - */ - uint32_t bitdata = recoded[bit_pos >> 5] >> (bit_pos & 0x1f); - bits &= ~(1 << tooth); + /* Construct bitdata s.t. the bottom bit is the bit we'd like to read. + * + * We could just set bitdata = recoded[bit_pos >> 5] >> (bit_pos & 0x1f) + * but this would simply discard the bits that fall off at the bottom, + * and thus, for example, bitdata could still have only two values if we + * happen to shift by exactly 31 positions. We use a rotation instead, + * which ensures that bitdata doesn't loose entropy. This relies on the + * rotation being atomic, i.e., the compiler emitting an actual rot + * instruction. */ + uint32_t bitdata = secp256k1_rotr32(recoded[bit_pos >> 5], bit_pos & 0x1f); + + /* Clear the bit at position tooth, but sssh, don't tell clang. */ + uint32_t volatile vmask = ~(1 << tooth); + bits &= vmask; + + /* Write the bit into position tooth (and junk into higher bits). */ bits ^= bitdata << tooth; bit_pos += COMB_SPACING; } diff --git a/src/util.h b/src/util.h index 154d9ebcf1..a0a33d9c52 100644 --- a/src/util.h +++ b/src/util.h @@ -391,4 +391,16 @@ SECP256K1_INLINE static void secp256k1_write_be64(unsigned char* p, uint64_t x) p[0] = x >> 56; } +/* Rotate a uint32_t to the right. */ +SECP256K1_INLINE static uint32_t secp256k1_rotr32(const uint32_t x, const unsigned int by) { +#if defined(_MSC_VER) + return _rotr(x, by); /* needs */ +#else + /* Reduce rotation amount to avoid UB when shifting. */ + const unsigned int mask = CHAR_BIT * sizeof(x) - 1; + /* Turned it into a rot instruction by GCC and clang. */ + return (x >> (by & mask)) | (x << ((-by) & mask)); +#endif +} + #endif /* SECP256K1_UTIL_H */