From 276789aef38edb6b0aa03ecd99362136c5f9250b Mon Sep 17 00:00:00 2001
From: Tony Arcieri <bascule@gmail.com>
Date: Sun, 26 Nov 2023 10:55:41 -0700
Subject: [PATCH] Impl `MulMod` for `Uint`

Uses Montgomery multiplication although it may not be the most efficient
approach (e.g. a Barrett reduction might be faster).

This also changes the `MulMod` trait to remove the Montgomery-specific
implementation details, allowing a simple `mul_mod(self, rhs, p)`.
Optimized Montgomery multiplication is still available via `DynResidue`.

Closes #70
---
 src/traits.rs           |  6 +-----
 src/uint/mul_mod.rs     | 35 +++++++++++++++++++++++++++++++++--
 tests/uint_proptests.rs | 18 ++++++++++++++++++
 3 files changed, 52 insertions(+), 7 deletions(-)
diff --git a/src/traits.rs b/src/traits.rs
index 55d068984..3a9cf360f 100644
--- a/src/traits.rs
+++ b/src/traits.rs
@@ -145,16 +145,12 @@ pub trait NegMod {
 }
 
 /// Compute `self * rhs mod p`.
-///
-/// Requires `p_inv = -(p^{-1} mod 2^{BITS}) mod 2^{BITS}` to be provided for efficiency.
 pub trait MulMod<Rhs = Self> {
     /// Output type.
     type Output;
 
     /// Compute `self * rhs mod p`.
-    ///
-    /// Requires `p_inv = -(p^{-1} mod 2^{BITS}) mod 2^{BITS}` to be provided for efficiency.
-    fn mul_mod(&self, rhs: &Rhs, p: &Self, p_inv: Limb) -> Self::Output;
+    fn mul_mod(&self, rhs: &Rhs, p: &Self) -> Self::Output;
 }
 
 /// Checked addition.
diff --git a/src/uint/mul_mod.rs b/src/uint/mul_mod.rs
index c46274a4d..ff174d2e8 100644
--- a/src/uint/mul_mod.rs
+++ b/src/uint/mul_mod.rs
@@ -1,8 +1,31 @@
-//! [`Uint`] multiplication modulus operations.
+//! [`Uint`] modular multiplication operations.
 
-use crate::{Limb, Uint, WideWord, Word};
+use super::modular::{DynResidue, DynResidueParams};
+use crate::{Limb, MulMod, Uint, WideWord, Word};
 
 impl<const LIMBS: usize> Uint<LIMBS> {
+    /// Computes `self * rhs mod p` for odd `p`.
+    ///
+    /// Panics if `p` is even.
+    // TODO(tarcieri): support for even `p`?
+    pub fn mul_mod(&self, rhs: &Uint<LIMBS>, p: &Uint<LIMBS>) -> Uint<LIMBS> {
+        // NOTE: the overhead of converting to Montgomery form to perform this operation and then
+        // immediately converting out of Montgomery form after just a single operation is likely to
+        // be higher than other possible implementations of this function, such as using a
+        // Barrett reduction instead.
+        //
+        // It's worth potentially exploring other approaches to improve efficiency.
+        match DynResidueParams::new(p).into() {
+            Some(params) => {
+                let lhs = DynResidue::new(self, params);
+                let rhs = DynResidue::new(rhs, params);
+                let ret = lhs * rhs;
+                ret.retrieve()
+            }
+            None => todo!("even moduli are currently unsupported"),
+        }
+    }
+
     /// Computes `self * rhs mod p` for the special modulus
     /// `p = MAX+1-c` where `c` is small enough to fit in a single [`Limb`].
     /// For the modulus reduction, this function implements Algorithm 14.47 from
@@ -36,6 +59,14 @@ impl<const LIMBS: usize> Uint<LIMBS> {
     }
 }
 
+impl<const LIMBS: usize> MulMod for Uint<LIMBS> {
+    type Output = Self;
+
+    fn mul_mod(&self, rhs: &Self, p: &Self) -> Self {
+        self.mul_mod(rhs, p)
+    }
+}
+
 /// Computes `a + (b * c) + carry`, returning the result along with the new carry.
 const fn mac_by_limb<const LIMBS: usize>(
     a: &Uint<LIMBS>,
diff --git a/tests/uint_proptests.rs b/tests/uint_proptests.rs
index 8725448a4..cb4760b58 100644
--- a/tests/uint_proptests.rs
+++ b/tests/uint_proptests.rs
@@ -136,6 +136,24 @@ proptest! {
         assert_eq!(expected, actual);
     }
 
+    #[test]
+    fn mul_mod_nist_p256(a in uint_mod_p(P), b in uint_mod_p(P)) {
+        assert!(a < P);
+        assert!(b < P);
+
+        let a_bi = to_biguint(&a);
+        let b_bi = to_biguint(&b);
+        let p_bi = to_biguint(&P);
+
+        let expected = to_uint((a_bi * b_bi) % p_bi);
+        let actual = a.mul_mod(&b, &P);
+
+        assert!(expected < P);
+        assert!(actual < P);
+
+        assert_eq!(expected, actual);
+    }
+
     #[test]
     fn wrapping_sub(mut a in uint(), mut b in uint()) {
         if b > a {