diff --git a/benches/boxed_monty.rs b/benches/boxed_monty.rs index b621a1fae..a8a97f5dc 100644 --- a/benches/boxed_monty.rs +++ b/benches/boxed_monty.rs @@ -94,6 +94,27 @@ fn bench_montgomery_ops(group: &mut BenchmarkGroup<'_, M>) { BatchSize::SmallInput, ) }); + + group.bench_function( + "lincomb_vartime, BoxedUint*BoxedUint+BoxedUint*BoxedUint", + |b| { + b.iter_batched( + || { + BoxedMontyForm::new( + BoxedUint::random_mod(&mut OsRng, params.modulus().as_nz_ref()), + params.clone(), + ) + }, + |a| { + BoxedMontyForm::lincomb_vartime(&[ + (black_box(&a), black_box(&a)), + (black_box(&a), black_box(&a)), + ]) + }, + BatchSize::SmallInput, + ) + }, + ); } fn bench_montgomery_conversion(group: &mut BenchmarkGroup<'_, M>) { diff --git a/benches/const_monty.rs b/benches/const_monty.rs index 380fd273c..2cc1447d0 100644 --- a/benches/const_monty.rs +++ b/benches/const_monty.rs @@ -11,7 +11,7 @@ use crypto_bigint::MultiExponentiate; impl_modulus!( Modulus, U256, - "ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551" + "7fffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551" ); type ConstMontyForm = crypto_bigint::modular::ConstMontyForm; @@ -80,6 +80,19 @@ fn bench_montgomery_ops(group: &mut BenchmarkGroup<'_, M>) { ) }); + group.bench_function("lincomb_vartime, U256*U256+U256*U256", |b| { + b.iter_batched( + || ConstMontyForm::random(&mut OsRng), + |a| { + ConstMontyForm::lincomb_vartime(&[ + (black_box(a), black_box(a)), + (black_box(a), black_box(a)), + ]) + }, + BatchSize::SmallInput, + ) + }); + #[cfg(feature = "alloc")] for i in [1, 2, 3, 4, 10, 100] { group.bench_function( diff --git a/src/limb/cmp.rs b/src/limb/cmp.rs index e1295898c..194bc866d 100644 --- a/src/limb/cmp.rs +++ b/src/limb/cmp.rs @@ -72,7 +72,7 @@ impl Ord for Limb { let mut ret = Ordering::Less; ret.conditional_assign(&Ordering::Equal, self.ct_eq(other)); ret.conditional_assign(&Ordering::Greater, self.ct_gt(other)); - debug_assert_eq!(ret == Ordering::Less, self.ct_lt(other).into()); + debug_assert_eq!(ret == Ordering::Less, bool::from(self.ct_lt(other))); ret } } diff --git a/src/modular.rs b/src/modular.rs index dbd5c804b..1159d6a42 100644 --- a/src/modular.rs +++ b/src/modular.rs @@ -17,6 +17,7 @@ //! the modulus can vary at runtime. mod const_monty_form; +mod lincomb; mod monty_form; mod reduction; diff --git a/src/modular/boxed_monty_form.rs b/src/modular/boxed_monty_form.rs index 5d989156d..e6d5ca30f 100644 --- a/src/modular/boxed_monty_form.rs +++ b/src/modular/boxed_monty_form.rs @@ -2,6 +2,7 @@ mod add; mod inv; +mod lincomb; mod mul; mod neg; mod pow; @@ -33,6 +34,8 @@ pub struct BoxedMontyParams { /// The lowest limbs of -(MODULUS^-1) mod R /// We only need the LSB because during reduction this value is multiplied modulo 2**Limb::BITS. mod_neg_inv: Limb, + /// Leading zeros in the modulus, used to choose optimized algorithms + mod_leading_zeros: u32, } impl BoxedMontyParams { @@ -92,6 +95,9 @@ impl BoxedMontyParams { debug_assert!(bool::from(modulus_is_odd)); let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod_limb.limbs[0].0)); + + let mod_leading_zeros = modulus.as_ref().leading_zeros().max(Word::BITS - 1); + let r3 = montgomery_reduction_boxed(&mut r2.square(), &modulus, mod_neg_inv); Self { @@ -100,6 +106,7 @@ impl BoxedMontyParams { r2, r3, mod_neg_inv, + mod_leading_zeros, } } diff --git a/src/modular/boxed_monty_form/lincomb.rs b/src/modular/boxed_monty_form/lincomb.rs new file mode 100644 index 000000000..aa261a783 --- /dev/null +++ b/src/modular/boxed_monty_form/lincomb.rs @@ -0,0 +1,75 @@ +//! Linear combinations of integers in Montgomery form with a modulus set at runtime. + +use super::BoxedMontyForm; +use crate::modular::lincomb::lincomb_boxed_monty_form; + +impl BoxedMontyForm { + /// Calculate the sum of products of pairs `(a, b)` in `products`. + /// + /// This method is variable time only with the value of the modulus. + /// For a modulus with leading zeros, this method is more efficient than a naive sum of products. + /// + /// This method will panic if `products` is empty. All terms must be associated + /// with equivalent `MontyParams`. + pub fn lincomb_vartime(products: &[(&Self, &Self)]) -> Self { + assert!(!products.is_empty(), "empty products"); + let params = &products[0].0.params; + Self { + montgomery_form: lincomb_boxed_monty_form( + products, + ¶ms.modulus, + params.mod_neg_inv, + params.mod_leading_zeros, + ), + params: products[0].0.params.clone(), + } + } +} + +#[cfg(test)] +mod tests { + + #[cfg(feature = "rand")] + #[test] + fn lincomb_expected() { + use crate::modular::{BoxedMontyForm, BoxedMontyParams}; + use crate::{BoxedUint, Odd, RandomMod}; + use rand_core::SeedableRng; + + const SIZE: u32 = 511; + + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(1); + for n in 0..100 { + let modulus = Odd::::random(&mut rng, SIZE); + let params = BoxedMontyParams::new(modulus.clone()); + let a = BoxedUint::random_mod(&mut rng, modulus.as_nz_ref()); + let b = BoxedUint::random_mod(&mut rng, modulus.as_nz_ref()); + let c = BoxedUint::random_mod(&mut rng, modulus.as_nz_ref()); + let d = BoxedUint::random_mod(&mut rng, modulus.as_nz_ref()); + let e = BoxedUint::random_mod(&mut rng, modulus.as_nz_ref()); + let f = BoxedUint::random_mod(&mut rng, modulus.as_nz_ref()); + + let std = a + .mul_mod(&b, &modulus) + .add_mod(&c.mul_mod(&d, &modulus), &modulus) + .add_mod(&e.mul_mod(&f, &modulus), &modulus); + + let lincomb = BoxedMontyForm::lincomb_vartime(&[ + ( + &BoxedMontyForm::new(a, params.clone()), + &BoxedMontyForm::new(b, params.clone()), + ), + ( + &BoxedMontyForm::new(c, params.clone()), + &BoxedMontyForm::new(d, params.clone()), + ), + ( + &BoxedMontyForm::new(e, params.clone()), + &BoxedMontyForm::new(f, params.clone()), + ), + ]); + + assert_eq!(std, lincomb.retrieve(), "n={n}"); + } + } +} diff --git a/src/modular/const_monty_form.rs b/src/modular/const_monty_form.rs index ae4d2a7a1..cc42ef71d 100644 --- a/src/modular/const_monty_form.rs +++ b/src/modular/const_monty_form.rs @@ -2,6 +2,7 @@ mod add; pub(super) mod inv; +mod lincomb; mod mul; mod neg; mod pow; @@ -49,6 +50,8 @@ pub trait ConstMontyParams: /// The lowest limbs of -(MODULUS^-1) mod R // We only need the LSB because during reduction this value is multiplied modulo 2**Limb::BITS. const MOD_NEG_INV: Limb; + /// Leading zeros in the modulus, used to choose optimized algorithms + const MOD_LEADING_ZEROS: u32; /// Precompute a Bernstein-Yang inverter for this modulus. /// diff --git a/src/modular/const_monty_form/lincomb.rs b/src/modular/const_monty_form/lincomb.rs new file mode 100644 index 000000000..416efdf9f --- /dev/null +++ b/src/modular/const_monty_form/lincomb.rs @@ -0,0 +1,60 @@ +//! Linear combinations of integers n Montgomery form with a constant modulus. + +use core::marker::PhantomData; + +use super::{ConstMontyForm, ConstMontyParams}; +use crate::modular::lincomb::lincomb_const_monty_form; + +impl, const LIMBS: usize> ConstMontyForm { + /// Calculate the sum of products of pairs `(a, b)` in `products`. + /// + /// This method is variable time only with the value of the modulus. + /// For a modulus with leading zeros, this method is more efficient than a naive sum of products. + pub const fn lincomb_vartime(products: &[(Self, Self)]) -> Self { + Self { + montgomery_form: lincomb_const_monty_form(products, &MOD::MODULUS, MOD::MOD_NEG_INV), + phantom: PhantomData, + } + } +} + +#[cfg(test)] +mod tests { + + #[cfg(feature = "rand")] + #[test] + fn lincomb_expected() { + use super::{ConstMontyForm, ConstMontyParams}; + use crate::{impl_modulus, RandomMod, U256}; + use rand_core::SeedableRng; + impl_modulus!( + P, + U256, + "7fffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551" + ); + let modulus = P::MODULUS.as_nz_ref(); + + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(1); + for n in 0..1000 { + let a = U256::random_mod(&mut rng, modulus); + let b = U256::random_mod(&mut rng, modulus); + let c = U256::random_mod(&mut rng, modulus); + let d = U256::random_mod(&mut rng, modulus); + let e = U256::random_mod(&mut rng, modulus); + let f = U256::random_mod(&mut rng, modulus); + + assert_eq!( + a.mul_mod(&b, modulus) + .add_mod(&c.mul_mod(&d, modulus), modulus) + .add_mod(&e.mul_mod(&f, modulus), modulus), + ConstMontyForm::::lincomb_vartime(&[ + (ConstMontyForm::new(&a), ConstMontyForm::new(&b)), + (ConstMontyForm::new(&c), ConstMontyForm::new(&d)), + (ConstMontyForm::new(&e), ConstMontyForm::new(&f)), + ]) + .retrieve(), + "n={n}" + ) + } + } +} diff --git a/src/modular/const_monty_form/macros.rs b/src/modular/const_monty_form/macros.rs index 93c331bc8..b54872668 100644 --- a/src/modular/const_monty_form/macros.rs +++ b/src/modular/const_monty_form/macros.rs @@ -44,6 +44,16 @@ macro_rules! impl_modulus { ), ); + // Leading zeros in the modulus, used to choose optimized algorithms. + const MOD_LEADING_ZEROS: u32 = { + let z = Self::MODULUS.as_ref().leading_zeros(); + if z >= $crate::Word::BITS { + $crate::Word::BITS - 1 + } else { + z + } + }; + const R3: $uint_type = $crate::modular::montgomery_reduction( &Self::R2.square_wide(), &Self::MODULUS, diff --git a/src/modular/lincomb.rs b/src/modular/lincomb.rs new file mode 100644 index 000000000..fbf7a5186 --- /dev/null +++ b/src/modular/lincomb.rs @@ -0,0 +1,171 @@ +use crate::{modular::MontyForm, Limb, Odd, Uint}; + +use super::{ConstMontyForm, ConstMontyParams}; + +#[cfg(feature = "alloc")] +use super::BoxedMontyForm; +#[cfg(feature = "alloc")] +use crate::BoxedUint; + +/// Implement the coarse interleaved sum of products (Algorithm 2 for B=1) from +/// Efficient Algorithms for Large Prime Characteristic Fields and Their Application +/// to Bilinear Pairings by Patrick Longa. https://eprint.iacr.org/2022/367 +/// +/// For correct results, the un-reduced sum of products must not exceed `p•R` where `p` +/// is the modulus. Given a list of pairs `(a_1, b_1)..(a_k, b_k)` in Montgomery form, +/// where each `a_i < p` and `b_i < p`, we have `sum(a_i•b_i) < k•p^2` and so up to +/// `k = floor(R/p)` pairs may be safely accumulated per call. +/// +/// This is implemented as a macro to abstract over `const fn` and boxed use cases, since the latter +/// needs mutable references and thus the unstable `const_mut_refs` feature (rust-lang/rust#57349). +/// +// TODO: change this into a `const fn` when `const_mut_refs` is stable +macro_rules! impl_longa_monty_lincomb { + ($a_b:expr, $u:expr, $modulus:expr, $mod_neg_inv:expr, $nlimbs:expr) => {{ + let len = $a_b.len(); + let mut hi_carry = Limb::ZERO; + let mut hi; + let mut carry; + + let mut j = 0; + while j < $nlimbs { + hi = hi_carry; + hi_carry = Limb::ZERO; + + let mut i = 0; + while i < len { + let (ai, bi) = &$a_b[i]; + carry = Limb::ZERO; + + let mut k = 0; + while k < $nlimbs { + ($u[k], carry) = $u[k].mac( + ai.as_montgomery().limbs[j], + bi.as_montgomery().limbs[k], + carry, + ); + k += 1; + } + (hi, carry) = hi.adc(carry, Limb::ZERO); + hi_carry = hi_carry.wrapping_add(carry); + + i += 1; + } + + let q = $u[0].wrapping_mul($mod_neg_inv); + + (_, carry) = $u[0].mac(q, $modulus[0], Limb::ZERO); + + i = 1; + while i < $nlimbs { + ($u[i - 1], carry) = $u[i].mac(q, $modulus[i], carry); + i += 1; + } + ($u[$nlimbs - 1], carry) = hi.adc(carry, Limb::ZERO); + hi_carry = hi_carry.wrapping_add(carry); + + j += 1; + } + + hi_carry + }}; +} + +pub const fn lincomb_const_monty_form, const LIMBS: usize>( + mut products: &[(ConstMontyForm, ConstMontyForm)], + modulus: &Odd>, + mod_neg_inv: Limb, +) -> Uint { + let max_accum = 1 << (MOD::MOD_LEADING_ZEROS as usize); + let mut ret = Uint::ZERO; + let mut remain = products.len(); + if remain <= max_accum { + let carry = + impl_longa_monty_lincomb!(products, ret.limbs, modulus.0.limbs, mod_neg_inv, LIMBS); + ret.sub_mod_with_carry(carry, &modulus.0, &modulus.0) + } else { + let mut window; + while remain > 0 { + let mut buf = Uint::ZERO; + let mut count = remain; + if count > max_accum { + count = max_accum; + } + (window, products) = products.split_at(count); + let carry = + impl_longa_monty_lincomb!(window, buf.limbs, modulus.0.limbs, mod_neg_inv, LIMBS); + buf = buf.sub_mod_with_carry(carry, &modulus.0, &modulus.0); + ret = ret.add_mod(&buf, &modulus.0); + remain -= count; + } + ret + } +} + +pub const fn lincomb_monty_form( + mut products: &[(MontyForm, MontyForm)], + modulus: &Odd>, + mod_neg_inv: Limb, + mod_leading_zeros: u32, +) -> Uint { + let max_accum = 1 << (mod_leading_zeros as usize); + let mut ret = Uint::ZERO; + let mut remain = products.len(); + if remain <= max_accum { + let carry = + impl_longa_monty_lincomb!(products, ret.limbs, modulus.0.limbs, mod_neg_inv, LIMBS); + ret.sub_mod_with_carry(carry, &modulus.0, &modulus.0) + } else { + let mut window; + while remain > 0 { + let mut count = remain; + if count > max_accum { + count = max_accum; + } + (window, products) = products.split_at(count); + let mut buf = Uint::ZERO; + let carry = + impl_longa_monty_lincomb!(window, buf.limbs, modulus.0.limbs, mod_neg_inv, LIMBS); + buf = buf.sub_mod_with_carry(carry, &modulus.0, &modulus.0); + ret = ret.add_mod(&buf, &modulus.0); + remain -= count; + } + ret + } +} + +#[cfg(feature = "alloc")] +pub fn lincomb_boxed_monty_form( + mut products: &[(&BoxedMontyForm, &BoxedMontyForm)], + modulus: &Odd, + mod_neg_inv: Limb, + mod_leading_zeros: u32, +) -> BoxedUint { + let max_accum = 1 << (mod_leading_zeros as usize); + let nlimbs = modulus.0.nlimbs(); + let mut ret = BoxedUint::zero_with_precision(modulus.0.bits_precision()); + let mut remain = products.len(); + if remain <= max_accum { + let carry = + impl_longa_monty_lincomb!(products, ret.limbs, modulus.0.limbs, mod_neg_inv, nlimbs); + ret.sub_assign_mod_with_carry(carry, &modulus.0, &modulus.0); + } else { + let mut window; + let mut buf = BoxedUint::zero_with_precision(modulus.0.bits_precision()); + while remain > 0 { + buf.limbs.fill(Limb::ZERO); + let mut count = remain; + if count > max_accum { + count = max_accum; + } + (window, products) = products.split_at(count); + let carry = + impl_longa_monty_lincomb!(window, buf.limbs, modulus.0.limbs, mod_neg_inv, nlimbs); + buf.sub_assign_mod_with_carry(carry, &modulus.0, &modulus.0); + let carry = ret.adc_assign(&buf, Limb::ZERO); + ret.sub_assign_mod_with_carry(carry, &modulus.0, &modulus.0); + remain -= count; + } + } + ret +} diff --git a/src/modular/monty_form.rs b/src/modular/monty_form.rs index 95c4aadbf..9eae5cb71 100644 --- a/src/modular/monty_form.rs +++ b/src/modular/monty_form.rs @@ -2,6 +2,7 @@ mod add; pub(super) mod inv; +mod lincomb; mod mul; mod neg; mod pow; @@ -30,6 +31,8 @@ pub struct MontyParams { /// The lowest limbs of -(MODULUS^-1) mod R /// We only need the LSB because during reduction this value is multiplied modulo 2**Limb::BITS. mod_neg_inv: Limb, + /// Leading zeros in the modulus, used to choose optimized algorithms + mod_leading_zeros: u32, } impl MontyParams @@ -57,6 +60,8 @@ where let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod.limbs[0].0)); + let mod_leading_zeros = modulus.as_ref().leading_zeros().max(Word::BITS - 1); + // `R^3 mod modulus`, used for inversion in Montgomery form. let r3 = montgomery_reduction(&r2.square_wide(), &modulus, mod_neg_inv); @@ -66,6 +71,7 @@ where r2, r3, mod_neg_inv, + mod_leading_zeros, } } } @@ -89,6 +95,8 @@ impl MontyParams { let mod_neg_inv = Limb(Word::MIN.wrapping_sub(inv_mod.limbs[0].0)); + let mod_leading_zeros = modulus.as_ref().leading_zeros().max(Word::BITS - 1); + // `R^3 mod modulus`, used for inversion in Montgomery form. let r3 = montgomery_reduction(&r2.square_wide(), &modulus, mod_neg_inv); @@ -98,6 +106,7 @@ impl MontyParams { r2, r3, mod_neg_inv, + mod_leading_zeros, } } @@ -117,6 +126,7 @@ impl MontyParams { r2: P::R2, r3: P::R3, mod_neg_inv: P::MOD_NEG_INV, + mod_leading_zeros: P::MOD_LEADING_ZEROS, } } } @@ -129,6 +139,11 @@ impl ConditionallySelectable for MontyParams { r2: Uint::conditional_select(&a.r2, &b.r2, choice), r3: Uint::conditional_select(&a.r3, &b.r3, choice), mod_neg_inv: Limb::conditional_select(&a.mod_neg_inv, &b.mod_neg_inv, choice), + mod_leading_zeros: u32::conditional_select( + &a.mod_leading_zeros, + &b.mod_leading_zeros, + choice, + ), } } } diff --git a/src/modular/monty_form/lincomb.rs b/src/modular/monty_form/lincomb.rs new file mode 100644 index 000000000..98c8a3af8 --- /dev/null +++ b/src/modular/monty_form/lincomb.rs @@ -0,0 +1,62 @@ +//! Linear combinations of integers in Montgomery form with a modulus set at runtime. + +use super::MontyForm; +use crate::modular::lincomb::lincomb_monty_form; + +impl MontyForm { + /// Calculate the sum of products of pairs `(a, b)` in `products`. + /// + /// This method is variable time only with the value of the modulus. + /// For a modulus with leading zeros, this method is more efficient than a naive sum of products. + /// + /// This method will panic if `products` is empty. All terms must be associated + /// with equivalent `MontyParams`. + pub const fn lincomb_vartime(products: &[(Self, Self)]) -> Self { + assert!(!products.is_empty(), "empty products"); + let params = &products[0].0.params; + Self { + montgomery_form: lincomb_monty_form( + products, + ¶ms.modulus, + params.mod_neg_inv, + params.mod_leading_zeros, + ), + params: products[0].0.params, + } + } +} + +#[cfg(test)] +mod tests { + #[cfg(feature = "rand")] + #[test] + fn lincomb_expected() { + use crate::U256; + use crate::{ + modular::{MontyForm, MontyParams}, + Odd, Random, RandomMod, + }; + use rand_core::SeedableRng; + + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(1); + for n in 0..1000 { + let modulus = Odd::::random(&mut rng); + let params = MontyParams::new_vartime(modulus); + let m = modulus.as_nz_ref(); + let a = U256::random_mod(&mut rng, m); + let b = U256::random_mod(&mut rng, m); + let c = U256::random_mod(&mut rng, m); + let d = U256::random_mod(&mut rng, m); + + assert_eq!( + a.mul_mod(&b, m).add_mod(&c.mul_mod(&d, m), m), + MontyForm::lincomb_vartime(&[ + (MontyForm::new(&a, params), MontyForm::new(&b, params)), + (MontyForm::new(&c, params), MontyForm::new(&d, params)), + ]) + .retrieve(), + "n={n}, a={a}, b={b}, c={c}, d={d}" + ) + } + } +} diff --git a/src/uint/boxed/sub_mod.rs b/src/uint/boxed/sub_mod.rs index 37651dbb4..d0428fa47 100644 --- a/src/uint/boxed/sub_mod.rs +++ b/src/uint/boxed/sub_mod.rs @@ -1,6 +1,6 @@ //! [`BoxedUint`] modular subtraction operations. -use crate::{BoxedUint, Limb, SubMod}; +use crate::{BoxedUint, Limb, SubMod, Zero}; impl BoxedUint { /// Computes `self - rhs mod p`. @@ -19,6 +19,22 @@ impl BoxedUint { out.wrapping_add(&p.bitand_limb(mask)) } + /// Returns `(self..., carry) - (rhs...) mod (p...)`, where `carry <= 1`. + /// Assumes `-(p...) <= (self..., carry) - (rhs...) < (p...)`. + #[inline(always)] + pub(crate) fn sub_assign_mod_with_carry(&mut self, carry: Limb, rhs: &Self, p: &Self) { + debug_assert!(carry.0 <= 1); + + let borrow = self.sbb_assign(rhs, Limb::ZERO); + + // The new `borrow = Word::MAX` iff `carry == 0` and `borrow == Word::MAX`. + let mask = carry.wrapping_neg().not().bitand(borrow); + + // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise + // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the modulus. + self.conditional_adc_assign(p, !mask.is_zero()); + } + /// Computes `self - rhs mod p` for the special modulus /// `p = MAX+1-c` where `c` is small enough to fit in a single [`Limb`]. ///