diff --git a/benches/uint.rs b/benches/uint.rs
index 9c5df6c38..1d03fce02 100644
--- a/benches/uint.rs
+++ b/benches/uint.rs
@@ -111,6 +111,36 @@ fn bench_division(c: &mut Criterion) {
     group.finish();
 }
 
+fn bench_gcd(c: &mut Criterion) {
+    let mut group = c.benchmark_group("greatest common divisor");
+
+    group.bench_function("gcd, U256", |b| {
+        b.iter_batched(
+            || {
+                let f = U256::random(&mut OsRng);
+                let g = U256::random(&mut OsRng);
+                (f, g)
+            },
+            |(f, g)| black_box(f.gcd(&g)),
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.bench_function("gcd_vartime, U256", |b| {
+        b.iter_batched(
+            || {
+                let f = Odd::<U256>::random(&mut OsRng);
+                let g = U256::random(&mut OsRng);
+                (f, g)
+            },
+            |(f, g)| black_box(f.gcd_vartime(&g)),
+            BatchSize::SmallInput,
+        )
+    });
+
+    group.finish();
+}
+
 fn bench_shl(c: &mut Criterion) {
     let mut group = c.benchmark_group("left shift");
 
@@ -258,9 +288,10 @@ fn bench_sqrt(c: &mut Criterion) {
 
 criterion_group!(
     benches,
+    bench_division,
+    bench_gcd,
     bench_shl,
     bench_shr,
-    bench_division,
     bench_inv_mod,
     bench_sqrt
 );
diff --git a/src/modular/bernstein_yang.rs b/src/modular/bernstein_yang.rs
index 25789a02b..44217bfbb 100644
--- a/src/modular/bernstein_yang.rs
+++ b/src/modular/bernstein_yang.rs
@@ -106,6 +106,19 @@ impl<const SAT_LIMBS: usize, const UNSAT_LIMBS: usize>
         f.to_uint()
     }
 
+    /// Returns the greatest common divisor (GCD) of the two given numbers.
+    ///
+    /// This version is variable-time with respect to `g`.
+    pub(crate) const fn gcd_vartime(f: &Uint<SAT_LIMBS>, g: &Uint<SAT_LIMBS>) -> Uint<SAT_LIMBS> {
+        let inverse = inv_mod2_62(f.as_words());
+        let e = Int62L::<UNSAT_LIMBS>::ONE;
+        let f = Int62L::from_uint(f);
+        let g = Int62L::from_uint(g);
+        let (_, mut f) = divsteps_vartime(e, f, g, inverse);
+        f = Int62L::select(&f, &f.neg(), f.is_negative());
+        f.to_uint()
+    }
+
     /// Returns either "value (mod M)" or "-value (mod M)", where M is the modulus the inverter
     /// was created for, depending on "negate", which determines the presence of "-" in the used
     /// formula. The input integer lies in the interval (-2 * M, M).
@@ -136,7 +149,10 @@ impl<const SAT_LIMBS: usize, const UNSAT_LIMBS: usize> Inverter
 ///
 /// For better understanding the implementation, the following paper is recommended:
 /// J. Hurchalla, "An Improved Integer Multiplicative Inverse (modulo 2^w)",
-/// https://arxiv.org/pdf/2204.04342.pdf
+/// <https://arxiv.org/pdf/2204.04342.pdf>
+///
+/// Variable time with respect to the number of words in `value`, however that number will be
+/// fixed for a given integer size.
 const fn inv_mod2_62(value: &[Word]) -> i64 {
     let value = {
         #[cfg(target_pointer_width = "32")]
@@ -167,6 +183,9 @@ const fn inv_mod2_62(value: &[Word]) -> i64 {
 
 /// Algorithm `divsteps2` to compute (δₙ, fₙ, gₙ) = divstepⁿ(δ, f, g) as described in Figure 10.1
 /// of <https://eprint.iacr.org/2019/266.pdf>.
+///
+/// This version runs in a fixed number of iterations relative to the highest bit of `f` or `g`
+/// as described in Figure 11.1.
 const fn divsteps<const LIMBS: usize>(
     mut e: Int62L<LIMBS>,
     f_0: Int62L<LIMBS>,
@@ -177,8 +196,35 @@ const fn divsteps<const LIMBS: usize>(
     let mut f = f_0;
     let mut delta = 1;
     let mut matrix;
+    let mut i = 0;
+    let m = iterations(&f_0, &g);
+
+    while i < m {
+        (delta, matrix) = jump(&f.0, &g.0, delta);
+        (f, g) = fg(f, g, matrix);
+        (d, e) = de(&f_0, inverse, matrix, d, e);
+        i += 1;
+    }
+
+    debug_assert!(g.eq(&Int62L::ZERO).to_bool_vartime());
+    (d, f)
+}
+
+/// Algorithm `divsteps2` to compute (δₙ, fₙ, gₙ) = divstepⁿ(δ, f, g) as described in Figure 10.1
+/// of <https://eprint.iacr.org/2019/266.pdf>.
+///
+/// This version is variable-time with respect to `g`.
+const fn divsteps_vartime<const LIMBS: usize>(
+    mut e: Int62L<LIMBS>,
+    f_0: Int62L<LIMBS>,
+    mut g: Int62L<LIMBS>,
+    inverse: i64,
+) -> (Int62L<LIMBS>, Int62L<LIMBS>) {
+    let mut d = Int62L::ZERO;
+    let mut f = f_0;
+    let mut delta = 1;
+    let mut matrix;
 
-    // TODO(tarcieri): run in a fixed number of iterations
     while !g.eq(&Int62L::ZERO).to_bool_vartime() {
         (delta, matrix) = jump(&f.0, &g.0, delta);
         (f, g) = fg(f, g, matrix);
@@ -284,6 +330,29 @@ const fn de<const LIMBS: usize>(
     (cd.shr(), ce.shr())
 }
 
+/// Compute the number of iterations required to compute Bernstein-Yang on the two values.
+///
+/// Adapted from Fig 11.1 of <https://eprint.iacr.org/2019/266.pdf>
+///
+/// The paper proves that the algorithm will converge (i.e. `g` will be `0`) in all cases when
+/// the algorithm runs this particular number of iterations.
+///
+/// Once `g` reaches `0`, continuing to run the algorithm will have no effect.
+// TODO(tarcieri): improved bounds using https://github.com/sipa/safegcd-bounds
+const fn iterations<const LIMBS: usize>(f: &Int62L<LIMBS>, g: &Int62L<LIMBS>) -> usize {
+    let f_bits = f.bits();
+    let g_bits = g.bits();
+
+    // Select max of `f_bits`, `g_bits`
+    let d = ConstChoice::from_u32_lt(f_bits, g_bits).select_u32(f_bits, g_bits) as usize;
+
+    if d < 46 {
+        (49 * d + 80) / 17
+    } else {
+        (49 * d + 57) / 17
+    }
+}
+
 /// "Bigint"-like (62 * LIMBS)-bit signed integer type, whose variables store numbers in the two's
 /// complement code as arrays of 62-bit limbs in little endian order.
 ///
@@ -467,6 +536,29 @@ impl<const LIMBS: usize> Int62L<LIMBS> {
 
         ret
     }
+
+    /// Calculate the number of leading zeros in the binary representation of this number.
+    pub const fn leading_zeros(&self) -> u32 {
+        let mut count = 0;
+        let mut i = LIMBS;
+        let mut nonzero_limb_not_encountered = ConstChoice::TRUE;
+
+        while i > 0 {
+            i -= 1;
+            let l = self.0[i];
+            let z = l.leading_zeros() - 2;
+            count += nonzero_limb_not_encountered.if_true_u32(z);
+            nonzero_limb_not_encountered =
+                nonzero_limb_not_encountered.and(ConstChoice::from_u64_nonzero(l).not());
+        }
+
+        count
+    }
+
+    /// Calculate the number of bits in this value (i.e. index of the highest bit) in constant time.
+    pub const fn bits(&self) -> u32 {
+        (LIMBS as u32 * 62) - self.leading_zeros()
+    }
 }
 
 #[cfg(test)]
diff --git a/src/uint/add.rs b/src/uint/add.rs
index bcb120dc1..76d89d3fb 100644
--- a/src/uint/add.rs
+++ b/src/uint/add.rs
@@ -50,6 +50,18 @@ impl<const LIMBS: usize> Add<&Uint<LIMBS>> for Uint<LIMBS> {
     }
 }
 
+impl<const LIMBS: usize> AddAssign for Uint<LIMBS> {
+    fn add_assign(&mut self, other: Self) {
+        *self += &other;
+    }
+}
+
+impl<const LIMBS: usize> AddAssign<&Uint<LIMBS>> for Uint<LIMBS> {
+    fn add_assign(&mut self, other: &Self) {
+        *self = *self + other;
+    }
+}
+
 impl<const LIMBS: usize> AddAssign for Wrapping<Uint<LIMBS>> {
     fn add_assign(&mut self, other: Self) {
         *self = *self + other;
diff --git a/src/uint/gcd.rs b/src/uint/gcd.rs
index e736d23a3..1889b8cd9 100644
--- a/src/uint/gcd.rs
+++ b/src/uint/gcd.rs
@@ -7,6 +7,9 @@ where
     Odd<Self>: PrecomputeInverter<Inverter = BernsteinYangInverter<SAT_LIMBS, UNSAT_LIMBS>>,
 {
     /// Compute the greatest common divisor (GCD) of this number and another.
+    ///
+    /// Runs in a constant number of iterations depending on the maximum highest bit of either
+    /// `self` or `rhs`.
     pub const fn gcd(&self, rhs: &Self) -> Self {
         let k1 = self.trailing_zeros();
         let k2 = rhs.trailing_zeros();
@@ -27,6 +30,18 @@ where
     }
 }
 
+impl<const SAT_LIMBS: usize, const UNSAT_LIMBS: usize> Odd<Uint<SAT_LIMBS>>
+where
+    Self: PrecomputeInverter<Inverter = BernsteinYangInverter<SAT_LIMBS, UNSAT_LIMBS>>,
+{
+    /// Compute the greatest common divisor (GCD) of this number and another.
+    ///
+    /// Runs in variable time with respect to `rhs`.
+    pub const fn gcd_vartime(&self, rhs: &Uint<SAT_LIMBS>) -> Uint<SAT_LIMBS> {
+        <Self as PrecomputeInverter>::Inverter::gcd_vartime(self.as_ref(), rhs)
+    }
+}
+
 impl<const SAT_LIMBS: usize, const UNSAT_LIMBS: usize> Gcd for Uint<SAT_LIMBS>
 where
     Odd<Self>: PrecomputeInverter<Inverter = BernsteinYangInverter<SAT_LIMBS, UNSAT_LIMBS>>,
diff --git a/tests/uint.rs b/tests/uint.rs
index 341e7fdef..d3b0f0bad 100644
--- a/tests/uint.rs
+++ b/tests/uint.rs
@@ -5,7 +5,7 @@ mod common;
 use common::to_biguint;
 use crypto_bigint::{
     modular::{MontyForm, MontyParams},
-    Encoding, Limb, NonZero, Odd, Word, U256,
+    Encoding, Integer, Limb, NonZero, Odd, Word, U256,
 };
 use num_bigint::BigUint;
 use num_integer::Integer as _;
@@ -284,6 +284,21 @@ proptest! {
         assert_eq!(expected, actual);
     }
 
+    #[test]
+    fn gcd_vartime(mut f in uint(), g in uint()) {
+        if bool::from(f.is_even()) {
+            f += U256::ONE;
+        }
+
+        let f_bi = to_biguint(&f);
+        let g_bi = to_biguint(&g);
+        let expected = to_uint(f_bi.gcd(&g_bi));
+
+        let f = Odd::new(f).unwrap();
+        let actual = f.gcd_vartime(&g);
+        assert_eq!(expected, actual);
+    }
+
     #[test]
     fn inv_mod2k(a in uint(), k in any::<u32>()) {
         let a = a | U256::ONE; // make odd