From 4eb3182921b8785cb7cd1d3da4b8922b554a31db Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Sun, 10 Dec 2023 11:42:03 -0700 Subject: [PATCH] BoxedUint: use `Limb`s in `montgomery_mul` This was previously written using raw `Word`s to ensure maximum performance, however it precludes access to the constant-time helper functions implemented on `Limb`. This carefully rewrites the implementation to using `Limb` instead of `Word` while avoiding any performance impacts. --- src/modular/boxed_residue/mul.rs | 53 ++++++++++++++++---------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/src/modular/boxed_residue/mul.rs b/src/modular/boxed_residue/mul.rs index 293f57dbb..c403de859 100644 --- a/src/modular/boxed_residue/mul.rs +++ b/src/modular/boxed_residue/mul.rs @@ -6,12 +6,12 @@ //! Originally (c) 2014 The Rust Project Developers, dual licensed Apache 2.0+MIT. use super::{BoxedResidue, BoxedResidueParams}; -use crate::{traits::Square, BoxedUint, Limb, WideWord, Word}; +use crate::{traits::Square, BoxedUint, Limb, WideWord, Word, Zero}; use core::{ borrow::Borrow, ops::{Mul, MulAssign}, }; -use subtle::{ConditionallySelectable, ConstantTimeEq}; +use subtle::ConditionallySelectable; #[cfg(feature = "zeroize")] use zeroize::Zeroize; @@ -129,11 +129,11 @@ impl<'a> MontgomeryMultiplier<'a> { self.clear_product(); montgomery_mul( - self.product.as_words_mut(), - a.as_words(), - b.as_words(), - self.modulus.as_words(), - self.mod_neg_inv.into(), + self.product.as_limbs_mut(), + a.as_limbs(), + b.as_limbs(), + self.modulus.as_limbs(), + self.mod_neg_inv, ); a.limbs .copy_from_slice(&self.product.limbs[..a.limbs.len()]); @@ -152,11 +152,11 @@ impl<'a> MontgomeryMultiplier<'a> { self.clear_product(); montgomery_mul( - self.product.as_words_mut(), - a.as_words(), - a.as_words(), - self.modulus.as_words(), - self.mod_neg_inv.into(), + self.product.as_limbs_mut(), + a.as_limbs(), + a.as_limbs(), + self.modulus.as_limbs(), + self.mod_neg_inv, ); a.limbs .copy_from_slice(&self.product.limbs[..a.limbs.len()]); @@ -192,7 +192,7 @@ impl Drop for MontgomeryMultiplier<'_> { /// Note: this was adapted from an implementation in `num-bigint`'s `monty.rs`. // TODO(tarcieri): refactor into `reduction.rs`, share impl with `(Dyn)Residue`? #[cfg(feature = "alloc")] -fn montgomery_mul(z: &mut [Word], x: &[Word], y: &[Word], m: &[Word], k: Word) { +fn montgomery_mul(z: &mut [Limb], x: &[Limb], y: &[Limb], m: &[Limb], k: Limb) { // This code assumes x, y, m are all the same length (required by addMulVVW and the for loop). // It also assumes that x, y are already reduced mod m, or else the result will not be properly // reduced. @@ -202,7 +202,7 @@ fn montgomery_mul(z: &mut [Word], x: &[Word], y: &[Word], m: &[Word], k: Word) { debug_assert_eq!(y.len(), n); debug_assert_eq!(m.len(), n); - let mut c: Word = 0; + let mut c = Limb::ZERO; for i in 0..n { let c2 = add_mul_vvw(&mut z[i..n + i], x, y[i]); @@ -213,24 +213,24 @@ fn montgomery_mul(z: &mut [Word], x: &[Word], y: &[Word], m: &[Word], k: Word) { z[n + i] = cy; // TODO(tarcieri): eliminate data-dependent branches - c = (cx < c2 || cy < c3) as Word; + c = Limb((cx.0 < c2.0 || cy.0 < c3.0) as Word); } let (lower, upper) = z.split_at_mut(n); sub_vv(lower, upper, m); - let is_zero = c.ct_eq(&0); + let is_zero = c.is_zero(); for (a, b) in lower.iter_mut().zip(upper.iter()) { a.conditional_assign(b, is_zero); } } #[inline] -fn add_mul_vvw(z: &mut [Word], x: &[Word], y: Word) -> Word { - let mut c = 0; +fn add_mul_vvw(z: &mut [Limb], x: &[Limb], y: Limb) -> Limb { + let mut c = Limb::ZERO; for (zi, xi) in z.iter_mut().zip(x.iter()) { let (z1, z0) = mul_add_www(*xi, y, *zi); - let (c_, zi_) = add_ww(z0, c, 0); + let (c_, zi_) = add_ww(Limb(z0.0), c, Limb::ZERO); *zi = zi_; c = c_.wrapping_add(z1); } @@ -240,8 +240,8 @@ fn add_mul_vvw(z: &mut [Word], x: &[Word], y: Word) -> Word { /// The resulting carry c is either 0 or 1. #[inline(always)] -fn sub_vv(z: &mut [Word], x: &[Word], y: &[Word]) -> Word { - let mut c = 0; +fn sub_vv(z: &mut [Limb], x: &[Limb], y: &[Limb]) -> Limb { + let mut c = Limb::ZERO; for (i, (&xi, &yi)) in x.iter().zip(y.iter()).enumerate().take(z.len()) { let zi = xi.wrapping_sub(yi).wrapping_sub(c); z[i] = zi; @@ -254,16 +254,17 @@ fn sub_vv(z: &mut [Word], x: &[Word], y: &[Word]) -> Word { /// z1<<_W + z0 = x+y+c, with c == 0 or 1 #[inline(always)] -fn add_ww(x: Word, y: Word, c: Word) -> (Word, Word) { +fn add_ww(x: Limb, y: Limb, c: Limb) -> (Limb, Limb) { let yc = y.wrapping_add(c); let z0 = x.wrapping_add(yc); - let z1 = (z0 < x || yc < y) as Word; + // TODO(tarcieri): eliminate data-dependent branches + let z1 = Limb((z0.0 < x.0 || yc.0 < y.0) as Word); (z1, z0) } /// z1 << _W + z0 = x * y + c #[inline] -fn mul_add_www(x: Word, y: Word, c: Word) -> (Word, Word) { - let z = x as WideWord * y as WideWord + c as WideWord; - ((z >> Word::BITS) as Word, z as Word) +fn mul_add_www(x: Limb, y: Limb, c: Limb) -> (Limb, Limb) { + let z = x.0 as WideWord * y.0 as WideWord + c.0 as WideWord; + (Limb((z >> Word::BITS) as Word), Limb(z as Word)) }