diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32ad2c4e1..515afe9a7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -480,6 +480,13 @@ foreach (test IN ITEMS path str)
    target_link_libraries (mlib.${test}.test PRIVATE mongo::mlib)
 endforeach ()
 
+if ("cxx_relaxed_constexpr" IN_LIST CMAKE_CXX_COMPILE_FEATURES)
+   add_executable(mlib.int128.test src/mlib/int128.test.cpp src/mlib/int128.test.c)
+   add_test (mlib.int128 mlib.int128.test)
+   target_link_libraries (mlib.int128.test PRIVATE mongo::mlib Threads::Threads)
+   target_compile_features (mlib.int128.test PRIVATE cxx_relaxed_constexpr)
+endif ()
+
 if ("cxx_std_20" IN_LIST CMAKE_CXX_COMPILE_FEATURES)
    add_executable (csfle-markup src/csfle-markup.cpp)
    target_link_libraries (csfle-markup PRIVATE mongocrypt_static _mongocrypt::libbson_for_static mongo::mlib)
diff --git a/src/mlib/int128.h b/src/mlib/int128.h
new file mode 100644
index 000000000..dfb02a026
--- /dev/null
+++ b/src/mlib/int128.h
@@ -0,0 +1,684 @@
+#ifndef MLIB_INT128_H_INCLUDED
+#define MLIB_INT128_H_INCLUDED
+
+#include "./macros.h"
+#include "./str.h"
+
+#include <stdbool.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdlib.h>
+
+MLIB_C_LINKAGE_BEGIN
+
+/**
+ * @brief A 128-bit binary integer
+ */
+typedef union {
+   struct {
+      uint64_t lo;
+      uint64_t hi;
+   } r;
+#if defined(__SIZEOF_INT128__)
+   // These union members are only for the purpose of debugging visualization
+   // and testing, and will only appear correctly on little-endian platforms.
+   __int128_t signed_;
+   __uint128_t unsigned_;
+#endif
+} mlib_int128;
+
+/// Define an int128 from a literal within [INT64_MIN, INT64_MAX]
+#define MLIB_INT128(N) MLIB_INIT (mlib_int128) MLIB_INT128_C (N)
+/// Define an int128 from a literal within [INT64_MIN, INT64_MAX] (usable as a
+/// constant init)
+#define MLIB_INT128_C(N)                           \
+   MLIB_INT128_FROM_PARTS ((uint64_t) INT64_C (N), \
+                           (INT64_C (N) < 0 ? UINT64_MAX : 0))
+/**
+ * @brief Cast an integral value to an mlib_int128
+ *
+ * If the argument is signed and less-than zero, it will be sign-extended
+ */
+#define MLIB_INT128_CAST(N) \
+   MLIB_INIT (mlib_int128)  \
+   MLIB_INT128_FROM_PARTS ((uint64_t) (N), ((N) < 0 ? UINT64_MAX : 0))
+
+/**
+ * @brief Create an mlib_int128 from the low and high parts of the integer
+ *
+ * @param LowWord_u64 The low-value 64 bits of the number
+ * @param HighWord_u64 The high-value 64 bits of the number
+ */
+#define MLIB_INT128_FROM_PARTS(LowWord_u64, HighWord_u64) \
+   {                                                      \
+      {LowWord_u64, HighWord_u64},                        \
+   }
+
+/// Maximum value of int128 when treated as a signed integer
+#define MLIB_INT128_SMAX \
+   MLIB_INT128_FROM_PARTS (UINT64_MAX, UINT64_MAX & ~(UINT64_C (1) << 63))
+
+/// Minimum value of int128, when treated as a signed integer
+#define MLIB_INT128_SMIN MLIB_INT128_FROM_PARTS (0, UINT64_C (1) << 63)
+
+/// Maximum value of int128, when treated as an unsigned integer
+#define MLIB_INT128_UMAX MLIB_INT128_FROM_PARTS (UINT64_MAX, UINT64_MAX)
+
+/**
+ * @brief Compare two 128-bit integers as unsigned integers
+ *
+ * @return (R < 0) if (left < right)
+ * @return (R > 0) if (left > right)
+ * @return (R = 0) if (left == right)
+ */
+static mlib_constexpr_fn int
+mlib_int128_ucmp (mlib_int128 left, mlib_int128 right)
+{
+   if (left.r.hi > right.r.hi) {
+      return 1;
+   } else if (left.r.hi < right.r.hi) {
+      return -1;
+   } else if (left.r.lo > right.r.lo) {
+      return 1;
+   } else if (left.r.lo < right.r.lo) {
+      return -1;
+   } else {
+      return 0;
+   }
+}
+
+/**
+ * @brief Compare two 128-bit integers as signed integers
+ *
+ * @return (R < 0) if (left < right)
+ * @return (R > 0) if (left > right)
+ * @return (R = 0) if (left == right)
+ */
+static mlib_constexpr_fn int
+mlib_int128_scmp (mlib_int128 left, mlib_int128 right)
+{
+   if ((left.r.hi & (1ull << 63)) == (right.r.hi & (1ull << 63))) {
+      // Same signed-ness, so they are as comparable as unsigned
+      return mlib_int128_ucmp (left, right);
+   } else if (left.r.hi & (1ull << 63)) {
+      // The left is negative
+      return -1;
+   } else {
+      // The right is negative
+      return 1;
+   }
+}
+
+/**
+ * @brief Determine whether the two 128-bit integers are equal
+ *
+ * @retval true If left == right
+ * @retval false Otherwise
+ */
+static mlib_constexpr_fn bool
+mlib_int128_eq (mlib_int128 left, mlib_int128 right)
+{
+   return mlib_int128_ucmp (left, right) == 0;
+}
+
+/**
+ * @brief Add two 128-bit integers together
+ *
+ * @return mlib_int128 The sum of the two addends. Overflow will wrap.
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_add (mlib_int128 left, mlib_int128 right)
+{
+   uint64_t losum = left.r.lo + right.r.lo;
+   // Overflow check
+   int carry = (losum < left.r.lo || losum < right.r.lo);
+   uint64_t hisum = left.r.hi + right.r.hi + carry;
+   return MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (losum, hisum);
+}
+
+/**
+ * @brief Treat the given 128-bit integer as signed, and return its
+ * negated value
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_negate (mlib_int128 v)
+{
+   mlib_int128 r = MLIB_INT128_FROM_PARTS (~v.r.lo, ~v.r.hi);
+   r = mlib_int128_add (r, MLIB_INT128 (1));
+   return r;
+}
+
+/**
+ * @brief Subtract two 128-bit integers
+ *
+ * @return mlib_int128 The difference between `from` and `less`
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_sub (mlib_int128 from, mlib_int128 less)
+{
+   int borrow = from.r.lo < less.r.lo;
+   uint64_t low = from.r.lo - less.r.lo;
+   uint64_t high = from.r.hi - less.r.hi;
+   high -= borrow;
+   return MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (low, high);
+}
+
+/**
+ * @brief Bitwise left-shift a 128-bit integer
+ *
+ * @param val The value to modify
+ * @param off The offset to shift left. If negative, shifts right
+ * @return The result of the shift operation
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_lshift (mlib_int128 val, int off)
+{
+   if (off > 0) {
+      if (off >= 64) {
+         off -= 64;
+         uint64_t high = val.r.lo << off;
+         return MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (0, high);
+      } else {
+         uint64_t low = val.r.lo << off;
+         uint64_t high = val.r.hi << off;
+         high |= val.r.lo >> (64 - off);
+         return MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (low, high);
+      }
+   } else if (off < 0) {
+      off = -off;
+      if (off >= 64) {
+         off -= 64;
+         uint64_t low = val.r.hi >> off;
+         return MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (low, 0);
+      } else {
+         uint64_t high = val.r.hi >> off;
+         uint64_t low = val.r.lo >> off;
+         low |= val.r.hi << (64 - off);
+         return MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (low, high);
+      }
+   } else {
+      return val;
+   }
+}
+
+/**
+ * @brief Bitwise logical right-shift a 128-bit integer
+ *
+ * @param val The value to modify. No "sign bit" is respected.
+ * @param off The offset to shift right. If negative, shifts left
+ * @return The result of the shift operation
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_rshift (mlib_int128 val, int off)
+{
+   return mlib_int128_lshift (val, -off);
+}
+
+/**
+ * @brief Bitwise-or two 128-bit integers
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_bitor (mlib_int128 l, mlib_int128 r)
+{
+   return MLIB_INIT (mlib_int128)
+      MLIB_INT128_FROM_PARTS (l.r.lo | r.r.lo, l.r.hi | r.r.hi);
+}
+
+// Multiply two 64bit integers to get a 128-bit result without overflow
+static mlib_constexpr_fn mlib_int128
+_mlibUnsignedMult128 (uint64_t left, uint64_t right)
+{
+   // Perform a Knuth 4.3.1M multiplication
+   uint32_t u[2] = {(uint32_t) left, (uint32_t) (left >> 32)};
+   uint32_t v[2] = {(uint32_t) right, (uint32_t) (right >> 32)};
+   uint32_t w[4] = {0};
+
+   for (int j = 0; j < 2; ++j) {
+      uint64_t t = 0;
+      for (int i = 0; i < 2; ++i) {
+         t += (uint64_t) (u[i]) * v[j] + w[i + j];
+         w[i + j] = (uint32_t) t;
+         t >>= 32;
+      }
+      w[j + 2] = (uint32_t) t;
+   }
+
+   return MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (
+      ((uint64_t) w[1] << 32) | w[0], ((uint64_t) w[3] << 32) | w[2]);
+}
+
+/**
+ * @brief Multiply two mlib_int128s together. Overflow will wrap.
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_mul (mlib_int128 l, mlib_int128 r)
+{
+   // Multiply the low-order word
+   mlib_int128 ret = _mlibUnsignedMult128 (l.r.lo, r.r.lo);
+   // Accumulate the high-order parts:
+   ret.r.hi += l.r.lo * r.r.hi;
+   ret.r.hi += l.r.hi * r.r.lo;
+   return ret;
+}
+
+/// Get the number of leading zeros in a 64bit number.
+static mlib_constexpr_fn int
+_mlibCountLeadingZeros_u64 (uint64_t bits)
+{
+   int n = 0;
+   if (bits == 0) {
+      return 64;
+   }
+   while (!(1ull << 63 & bits)) {
+      ++n;
+      bits <<= 1;
+   }
+   return n;
+}
+
+/// Implementation of Knuth's algorithm 4.3.1 D for unsigned integer division
+static mlib_constexpr_fn void
+_mlibKnuth431D (uint32_t *const u,
+                const int ulen,
+                const uint32_t *const v,
+                const int vlen,
+                uint32_t *quotient)
+{
+   // Part D1 (normalization) is done by caller,
+   // normalized in u and v (radix b is 2^32)
+   typedef uint64_t u64;
+   typedef int64_t i64;
+   typedef uint32_t u32;
+   const int m = ulen - vlen - 1;
+   const int n = vlen;
+
+   // 'd' is 2^32. Shifting left and right is equivalent to mult and division by
+   // d, respectively.
+
+   // D2
+   int j = m;
+   for (;;) {
+      // D3: Select two u32 as a u64:
+      u64 two = ((u64) (u[j + n]) << 32) | u[j + n - 1];
+      // D3: Partial quotient: q̂
+      u64 q = two / v[n - 1];
+      // D3: Partial remainder: r̂
+      u64 r = two % v[n - 1];
+
+      // D3: Compute q̂ and r̂
+      while (q >> 32 || q * (u64) v[n - 2] > (r << 32 | u[j + n - 2])) {
+         q--;
+         r += v[n - 1];
+         if (r >> 32) {
+            break;
+         }
+      }
+
+      // D4: Multiply and subtract
+      i64 k = 0;
+      i64 t = 0;
+      for (int i = 0; i < n; ++i) {
+         u64 prod = (u32) q * (u64) (v[i]);
+         t = u[i + j] - k - (u32) prod;
+         u[i + j] = (u32) t;
+         k = (i64) (prod >> 32) - (t >> 32);
+      }
+      t = u[j + n] - k;
+      u[j + n] = (u32) t;
+
+      quotient[j] = (u32) q;
+
+      // D5: Test remainder
+      if (t < 0) {
+         // D6: Add back
+         --quotient[j];
+         k = 0;
+         for (int i = 0; i < n; ++i) {
+            t = u[i + j] + k + v[i];
+            u[i + j] = (u32) (t);
+            k = t >> 32;
+         }
+         u[j + n] += (int32_t) k;
+      }
+
+      // D7:
+      --j;
+      if (j < 0) {
+         break;
+      }
+   }
+
+   // Denormalization (D8) is done by caller.
+}
+
+/// The result of 128-bit division
+typedef struct mlib_int128_divmod_result {
+   /// The quotient of the division operation (rounds to zero)
+   mlib_int128 quotient;
+   /// The remainder of the division operation
+   mlib_int128 remainder;
+} mlib_int128_divmod_result;
+
+/// Divide a 128-bit number by a 64bit number.
+static mlib_constexpr_fn struct mlib_int128_divmod_result
+_mlibDivide_u128_by_u64 (const mlib_int128 numer, const uint64_t denom)
+{
+   mlib_int128 adjusted = numer;
+   adjusted.r.hi %= denom;
+   int d = _mlibCountLeadingZeros_u64 (denom);
+
+   typedef uint32_t u32;
+   typedef uint64_t u64;
+
+   if (d >= 32) {
+      // jk: We're dividing by less than UINT32_MAX: We can do a simple short
+      // division of two base32 numbers.
+      // Treat the denominator as a single base32 digit:
+      const u32 d0 = (u32) denom;
+
+      // And the numerator as four base32 digits:
+      const u64 n0 = (u32) (numer.r.lo);
+      const u64 n1 = (u32) (numer.r.lo >> 32);
+
+      // We don't need to split n2 and n3. (n3,n2) will be the first partial
+      // dividend
+      const u64 n3_n2 = numer.r.hi;
+
+      // First partial remainder: (n3,n2) % d0
+      const u64 r1 = n3_n2 % d0;
+      // Second partial dividend: (r1,n1)
+      const u64 r1_n1 = (r1 << 32) + n1;
+      // Second partial remainder: (r1,n1) % d0
+      const u64 r0 = r1_n1 % d0;
+      // Final partial dividend: (r0,n0)
+      const u64 r0_n0 = (r0 << 32) + n0;
+      // Final remainder: (r0,n0) % d0
+      const u64 rem = r0_n0 % d0;
+
+      // Form the quotient as four base32 digits:
+      // Least quotient digit: (r0,n0) / d0
+      const u64 q0 = r0_n0 / d0;
+      // Second quotient digit: (r1,n1) / d0
+      const u64 q1 = r1_n1 / d0;
+      // Third and fourth quotient digit: (n3,n2) / d0
+      const u64 q3_q2 = n3_n2 / d0;
+
+      // Low word of the quotient: (q1,q0)
+      const u64 q1_q0 = (q1 << 32) + q0;
+
+      return MLIB_INIT (mlib_int128_divmod_result){
+         MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (q1_q0, q3_q2),
+         MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (rem, 0),
+      };
+   }
+
+   // Normalize for a Knuth 4.3.1D division. Convert the integers into two
+   // base-32 numbers, with u and v being arrays of digits:
+   u32 u[5] = {
+      (u32) (adjusted.r.lo << d),
+      (u32) (adjusted.r.lo >> (32 - d)),
+      (u32) (adjusted.r.hi << d),
+      (u32) (adjusted.r.hi >> (32 - d)),
+      0,
+   };
+
+   if (d != 0) {
+      // Extra bits from overlap:
+      u[2] |= (u32) (adjusted.r.lo >> (64 - d));
+      u[4] |= (u32) (adjusted.r.hi >> (64 - d));
+   }
+
+   u32 v[2] = {
+      (u32) (denom << d),
+      (u32) (denom >> (32 - d)),
+   };
+
+   u32 qparts[3] = {0};
+
+   _mlibKnuth431D (u, 5, v, 2, qparts);
+
+   u64 rem = ((u64) u[1] << (32 - d)) | (u[0] >> d);
+   u64 quo = ((u64) qparts[1] << 32) | qparts[0];
+   return MLIB_INIT (mlib_int128_divmod_result){
+      MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (quo, numer.r.hi / denom),
+      MLIB_INIT (mlib_int128) MLIB_INT128_FROM_PARTS (rem, 0),
+   };
+}
+
+/**
+ * @brief Perform a combined division+remainder of two 128-bit numbers
+ *
+ * @param numer The dividend
+ * @param denom The divisor
+ * @return A struct with .quotient and .remainder results
+ */
+static mlib_constexpr_fn mlib_int128_divmod_result
+mlib_int128_divmod (mlib_int128 numer, mlib_int128 denom)
+{
+   const uint64_t nhi = numer.r.hi;
+   const uint64_t nlo = numer.r.lo;
+   const uint64_t dhi = denom.r.hi;
+   const uint64_t dlo = denom.r.lo;
+   if (dhi > nhi) {
+      // Denominator is definitely larger than numerator. Quotient is zero,
+      // remainder is full numerator.
+      return MLIB_INIT (mlib_int128_divmod_result){MLIB_INT128 (0), numer};
+   } else if (dhi == nhi) {
+      // High words are equal
+      if (nhi == 0) {
+         // Both high words are zero, so this is just a division of two 64bit
+         // numbers
+         return MLIB_INIT (mlib_int128_divmod_result){
+            MLIB_INT128_CAST (nlo / dlo),
+            MLIB_INT128_CAST (nlo % dlo),
+         };
+      } else if (nlo > dlo) {
+         // The numerator is larger than the denom and the high word on the
+         // denom is non-zero, so this cannot divide to anything greater than 1.
+         return MLIB_INIT (mlib_int128_divmod_result){
+            MLIB_INT128 (1),
+            mlib_int128_sub (numer, denom),
+         };
+      } else if (nlo < dlo) {
+         // numer.r.lo < denom.r.lo and denom.r.hi > denom.r.lo, so the
+         // integer division becomes zero
+         return MLIB_INIT (mlib_int128_divmod_result){
+            MLIB_INT128 (0),
+            numer,
+         };
+      } else {
+         // N / N is one
+         return MLIB_INIT (mlib_int128_divmod_result){MLIB_INT128 (1),
+                                                      MLIB_INT128 (0)};
+      }
+   } else if (dhi == 0) {
+      // No high in denominator. We can use a u128/u64
+      return _mlibDivide_u128_by_u64 (numer, denom.r.lo);
+   } else {
+      // We'll need to do a full u128/u128 division
+      // Normalize for Knuth 4.3.1D
+      int d = _mlibCountLeadingZeros_u64 (denom.r.hi);
+      // Does the denom have only three base32 digits?
+      const bool has_three = d >= 32;
+      d &= 31;
+
+      uint32_t u[5] = {
+         (uint32_t) (numer.r.lo << d),
+         (uint32_t) (numer.r.lo >> (32 - d)),
+         (uint32_t) (numer.r.hi << d),
+         (uint32_t) (numer.r.hi >> (32 - d)),
+         0,
+      };
+      uint32_t v[4] = {
+         (uint32_t) (denom.r.lo << d),
+         (uint32_t) (denom.r.lo >> (32 - d)),
+         (uint32_t) (denom.r.hi << d),
+         (uint32_t) (denom.r.hi >> (32 - d)),
+      };
+      if (d != 0) {
+         u[2] |= numer.r.lo >> (64 - d);
+         u[4] |= numer.r.hi >> (64 - d);
+         v[2] |= denom.r.lo >> (64 - d);
+      };
+
+      uint32_t q[2] = {0};
+      if (has_three) {
+         _mlibKnuth431D (u, 5, v, 3, q);
+      } else {
+         _mlibKnuth431D (u, 5, v, 4, q);
+      }
+
+      mlib_int128 remainder = MLIB_INT128_FROM_PARTS (
+         ((uint64_t) u[1] << 32) | u[0], ((uint64_t) u[3] << 32) | u[2]);
+      remainder = mlib_int128_rshift (remainder, d);
+
+      return MLIB_INIT (mlib_int128_divmod_result){
+         MLIB_INT128_CAST (q[0] | (uint64_t) q[1] << 32),
+         remainder,
+      };
+   }
+}
+
+/**
+ * @brief Perform a division of two 128-bit numbers
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_div (mlib_int128 numer, mlib_int128 denom)
+{
+   return mlib_int128_divmod (numer, denom).quotient;
+}
+
+/**
+ * @brief Perform a modulus of two 128-bit numbers
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_mod (mlib_int128 numer, mlib_int128 denom)
+{
+   return mlib_int128_divmod (numer, denom).remainder;
+}
+
+/**
+ * @brief Get the nth power of ten as a 128-bit number
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_pow10 (long nth)
+{
+   mlib_int128 r = MLIB_INT128 (1);
+   while (nth-- > 0) {
+      r = mlib_int128_mul (r, MLIB_INT128 (10));
+   }
+   return r;
+}
+
+/**
+ * @brief Get the Nth power of two as a 128-bit number
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_pow2 (long nth)
+{
+   return mlib_int128_lshift (MLIB_INT128 (1), nth);
+}
+
+/**
+ * @brief Read a 128-bit unsigned integer from a base-10 string
+ */
+static mlib_constexpr_fn mlib_int128
+mlib_int128_from_string (const char *s, const char **end)
+{
+   int radix = 10;
+   if (mlib_strlen (s) > 2 && s[0] == '0') {
+      // Check for a different radix
+      char b = s[1];
+      if (b == 'b' || b == 'B') {
+         radix = 2;
+         s += 2;
+      } else if (b == 'c' || b == 'C') {
+         radix = 8;
+         s += 2;
+      } else if (b == 'x' || b == 'X') {
+         radix = 16;
+         s += 2;
+      } else {
+         radix = 8;
+         s += 1;
+      }
+   }
+
+   mlib_int128 ret = MLIB_INT128 (0);
+   for (; *s; ++s) {
+      char c = *s;
+      if (c == '\'') {
+         // Digit separator. Skip it;
+         continue;
+      }
+      if (c >= 'a') {
+         c -= 'a' - 'A'; // Uppercase (if a letter, otherwise some other punct)
+      }
+      int digit = c - '0';
+      if (c >= 'A') {
+         // It's actually a letter (or garbage, which we'll catch later)
+         digit = (c - 'A') + 10;
+      }
+      if (digit > radix || digit < 0) {
+         // The digit is outside of our radix, or garbage
+         break;
+      }
+      ret = mlib_int128_mul (ret, MLIB_INT128_CAST (radix));
+      ret = mlib_int128_add (ret, MLIB_INT128_CAST (digit));
+   }
+   if (end) {
+      *end = s;
+   }
+   return ret;
+}
+
+/**
+ * @brief Truncate a 128-bit number to a 64-bit number
+ */
+static mlib_constexpr_fn uint64_t
+mlib_int128_to_u64 (mlib_int128 v)
+{
+   return v.r.lo;
+}
+
+/// The result type of formatting a 128-bit number
+typedef struct {
+   /// The character array of the number as a base10 string. Null-terminated.
+   char str[40];
+} mlib_int128_charbuf;
+
+/**
+ * @brief Format a 128-bit integer into a string of base10 digits.
+ *
+ * @return mlib_int128_charbuf a struct containing a .str character array
+ */
+static mlib_constexpr_fn mlib_int128_charbuf
+mlib_int128_format (mlib_int128 i)
+{
+   mlib_int128_charbuf into = {0};
+   char *out = into.str + (sizeof into) - 1;
+   int len = 0;
+   if (mlib_int128_eq (i, MLIB_INT128 (0))) {
+      *out-- = '0';
+      len = 1;
+   }
+   while (!mlib_int128_eq (i, MLIB_INT128 (0))) {
+      mlib_int128_divmod_result dm = mlib_int128_divmod (i, MLIB_INT128 (10));
+      uint64_t v = mlib_int128_to_u64 (dm.remainder);
+      char digits[] = "0123456789";
+      char d = digits[v];
+      *out = d;
+      --out;
+      i = dm.quotient;
+      ++len;
+   }
+   for (int i = 0; i < len; ++i) {
+      into.str[i] = out[i + 1];
+   }
+   into.str[len] = 0;
+   return into;
+}
+
+MLIB_C_LINKAGE_END
+
+#endif // MLIB_INT128_H_INCLUDED
diff --git a/src/mlib/int128.test.c b/src/mlib/int128.test.c
new file mode 100644
index 000000000..4d1372126
--- /dev/null
+++ b/src/mlib/int128.test.c
@@ -0,0 +1,3 @@
+#include "./int128.h"
+
+// This file checks for C compilability. Other tests are defined in .test.cpp
diff --git a/src/mlib/int128.test.cpp b/src/mlib/int128.test.cpp
new file mode 100644
index 000000000..e9e247888
--- /dev/null
+++ b/src/mlib/int128.test.cpp
@@ -0,0 +1,422 @@
+#include "./int128.h"
+
+#include <iostream>
+#include <random>
+#include <thread>
+#include <string>
+#include <vector>
+
+#if (defined(__GNUC__) && __GNUC__ < 7 && !defined(__clang__)) || \
+   (defined(_MSC_VER) && _MSC_VER < 1920)
+// Old GCC and old MSVC have partially-broken constexpr that prevents us from
+// properly using static_assert with from_string()
+#define BROKEN_CONSTEXPR
+#endif
+
+#ifndef BROKEN_CONSTEXPR
+// Basic checks with static_asserts, check constexpr correctness and fail fast
+static_assert (mlib_int128_eq (MLIB_INT128 (0), MLIB_INT128_FROM_PARTS (0, 0)),
+               "fail");
+static_assert (mlib_int128_eq (MLIB_INT128 (4), MLIB_INT128_FROM_PARTS (4, 0)),
+               "fail");
+static_assert (mlib_int128_eq (MLIB_INT128 (34),
+                               MLIB_INT128_FROM_PARTS (34, 0)),
+               "fail");
+static_assert (mlib_int128_eq (MLIB_INT128 (34 + 8),
+                               MLIB_INT128_FROM_PARTS (42, 0)),
+               "fail");
+static_assert (mlib_int128_eq (MLIB_INT128_CAST (94),
+                               MLIB_INT128_FROM_PARTS (94, 0)),
+               "fail");
+static_assert (mlib_int128_eq (mlib_int128_lshift (MLIB_INT128_CAST (1), 64),
+                               MLIB_INT128_FROM_PARTS (0, 1)),
+               "fail");
+static_assert (mlib_int128_eq (mlib_int128_lshift (MLIB_INT128_CAST (1), 127),
+                               MLIB_INT128_FROM_PARTS (0, 1ull << 63)),
+               "fail");
+
+static_assert (mlib_int128_scmp (MLIB_INT128_CAST (2), MLIB_INT128 (0)) > 0,
+               "fail");
+static_assert (mlib_int128_scmp (MLIB_INT128_CAST (-2), MLIB_INT128 (0)) < 0,
+               "fail");
+static_assert (mlib_int128_scmp (MLIB_INT128_CAST (0), MLIB_INT128 (0)) == 0,
+               "fail");
+// Unsigned compare doesn't believe in negative numbers:
+static_assert (mlib_int128_ucmp (MLIB_INT128_CAST (-2), MLIB_INT128 (0)) > 0,
+               "fail");
+#endif // BROKEN_CONSTEXPR
+
+// Literals, for test convenience:
+#ifndef BROKEN_CONSTEXPR
+constexpr
+#endif
+   mlib_int128
+   operator""_i128 (const char *s)
+{
+   return mlib_int128_from_string (s, NULL);
+}
+
+#ifndef BROKEN_CONSTEXPR
+constexpr
+#endif
+   mlib_int128
+   operator""_i128 (const char *s, size_t)
+{
+   return mlib_int128_from_string (s, NULL);
+}
+
+// Operators, for test convenience
+constexpr bool
+operator== (mlib_int128 l, mlib_int128 r)
+{
+   return mlib_int128_eq (l, r);
+}
+
+constexpr bool
+operator<(mlib_int128 l, mlib_int128 r)
+{
+   return mlib_int128_scmp (l, r) < 0;
+}
+
+#ifndef BROKEN_CONSTEXPR
+static_assert (mlib_int128_eq (MLIB_INT128 (0), 0_i128), "fail");
+static_assert (mlib_int128_eq (MLIB_INT128 (65025), 65025_i128), "fail");
+static_assert (mlib_int128_eq (MLIB_INT128_FROM_PARTS (0, 1),
+                               18446744073709551616_i128),
+               "fail");
+static_assert (mlib_int128_eq (MLIB_INT128_UMAX,
+                               340282366920938463463374607431768211455_i128),
+               "fail");
+
+static_assert (mlib_int128_scmp (MLIB_INT128_SMIN, MLIB_INT128_SMAX) < 0,
+               "fail");
+static_assert (mlib_int128_scmp (MLIB_INT128_SMAX, MLIB_INT128_SMIN) > 0,
+               "fail");
+static_assert (mlib_int128_scmp (MLIB_INT128_CAST (-12), MLIB_INT128_CAST (0)) <
+                  0,
+               "fail");
+static_assert (mlib_int128_scmp (MLIB_INT128_CAST (12), MLIB_INT128_CAST (0)) >
+                  0,
+               "fail");
+
+// Simple arithmetic:
+static_assert (mlib_int128_scmp (mlib_int128_add (MLIB_INT128_SMAX, 1_i128),
+                                 MLIB_INT128_SMIN) == 0,
+               "fail");
+static_assert (mlib_int128_scmp (mlib_int128_negate (MLIB_INT128_CAST (-42)),
+                                 MLIB_INT128 (42)) == 0,
+               "fail");
+static_assert (mlib_int128_scmp (mlib_int128_sub (5_i128, 3_i128), 2_i128) == 0,
+               "fail");
+static_assert (mlib_int128_scmp (mlib_int128_sub (3_i128, 5_i128),
+                                 mlib_int128_negate (2_i128)) == 0,
+               "fail");
+static_assert (mlib_int128_ucmp (mlib_int128_sub (3_i128, 5_i128),
+                                 mlib_int128_sub (MLIB_INT128_UMAX, 1_i128)) ==
+                  0,
+               "fail");
+
+static_assert (mlib_int128_scmp (mlib_int128_lshift (1_i128, 127),
+                                 MLIB_INT128_SMIN) == 0,
+               "fail");
+
+static_assert (
+   mlib_int128_scmp (mlib_int128_rshift (mlib_int128_lshift (1_i128, 127), 127),
+                     1_i128) == 0,
+   "fail");
+
+// With no high-32 bits in the denominator
+static_assert (mlib_int128_div (316356263640858117670580590964547584140_i128,
+                                13463362962560749016052695684_i128) ==
+                  23497566285_i128,
+               "fail");
+
+// Remainder correctness with high bit set:
+static_assert (mlib_int128_mod (292590981272581782572061492191999425232_i128,
+                                221673222198185508195462959065350495048_i128) ==
+                  70917759074396274376598533126648930184_i128,
+               "fail");
+
+// Remainder with 64bit denom:
+static_assert (mlib_int128_mod (2795722437127403543495742528_i128,
+                                708945413_i128) == 619266642_i128,
+               "fail");
+
+// 10-div:
+static_assert (mlib_int128_div (MLIB_INT128_SMAX, 10_i128) ==
+                  17014118346046923173168730371588410572_i128,
+               "fail");
+#endif // BROKEN_CONSTEXPR
+
+inline std::ostream &
+operator<< (std::ostream &out, const mlib_int128 &v)
+{
+   out << mlib_int128_format (v).str;
+   return out;
+}
+
+struct check_info {
+   const char *filename;
+   int line;
+   const char *expr;
+};
+
+struct nil {
+};
+
+template <typename Left> struct bound_lhs {
+   check_info info;
+   Left value;
+
+#define DEFOP(Oper)                                                   \
+   template <typename Rhs> nil operator Oper (Rhs rhs) const noexcept \
+   {                                                                  \
+      if (value Oper rhs) {                                           \
+         return {};                                                   \
+      }                                                               \
+      fprintf (stderr,                                                \
+               "%s:%d: CHECK( %s ) failed!\n",                        \
+               info.filename,                                         \
+               info.line,                                             \
+               info.expr);                                            \
+      fprintf (stderr, "Expanded expression: ");                      \
+      std::cerr << value << " " #Oper " " << rhs << '\n';             \
+      std::exit (1);                                                  \
+      return {};                                                      \
+   }
+   DEFOP (==)
+   DEFOP (!=)
+   DEFOP (<)
+   DEFOP (<=)
+   DEFOP (>)
+   DEFOP (>=)
+#undef DEFOP
+};
+
+struct check_magic {
+   check_info info;
+
+   template <typename Oper>
+   bound_lhs<Oper>
+   operator->*(Oper op)
+   {
+      return bound_lhs<Oper>{info, op};
+   }
+};
+
+struct check_consume {
+   void
+   operator= (nil)
+   {
+   }
+
+   void
+   operator= (bound_lhs<bool> const &l)
+   {
+      // Invoke the test for truthiness:
+      (void) (l == true);
+   }
+};
+
+#undef CHECK
+#define CHECK(Cond) \
+   check_consume{} = check_magic{check_info{__FILE__, __LINE__, #Cond}}->*Cond
+
+#ifndef BROKEN_CONSTEXPR
+static_assert (mlib_int128 (MLIB_INT128_UMAX) ==
+                  340282366920938463463374607431768211455_i128,
+               "fail");
+
+// Check sign extension works correctly:
+static_assert (mlib_int128 (MLIB_INT128_CAST (INT64_MIN)) ==
+                  mlib_int128_negate (9223372036854775808_i128),
+               "fail");
+static_assert (mlib_int128 (MLIB_INT128_CAST (INT64_MIN)) <
+                  mlib_int128_negate (9223372036854775807_i128),
+               "fail");
+static_assert (mlib_int128_negate (9223372036854775809_i128) <
+                  mlib_int128 (MLIB_INT128_CAST (INT64_MIN)),
+               "fail");
+#endif
+
+static mlib_int128_divmod_result
+div_check (mlib_int128 num, mlib_int128 den)
+{
+   // std::cout << "Check: " << num << " ÷ " << den << '\n';
+   mlib_int128_divmod_result res = mlib_int128_divmod (num, den);
+#ifdef __SIZEOF_INT128__
+   // When we have an existing i128 impl, test against that:
+   __uint128_t num1;
+   __uint128_t den1;
+   memcpy (&num1, &num.r, sizeof num);
+   memcpy (&den1, &den.r, sizeof den);
+   __uint128_t q = num1 / den1;
+   __uint128_t r = num1 % den1;
+   mlib_int128_divmod_result expect;
+   memcpy (&expect.quotient.r, &q, sizeof q);
+   memcpy (&expect.remainder.r, &r, sizeof r);
+   CHECK (expect.quotient == res.quotient);
+   CHECK (expect.remainder == res.remainder);
+#endif
+   // Check inversion by multiplication provides the correct result
+   auto invert = mlib_int128_mul (res.quotient, den);
+   invert = mlib_int128_add (invert, res.remainder);
+   CHECK (invert == num);
+   return res;
+}
+
+// Runtime checks, easier to debug that static_asserts
+int
+main ()
+{
+   mlib_int128 zero = MLIB_INT128 (0);
+   CHECK (mlib_int128_eq (zero, MLIB_INT128 (0)));
+   CHECK (mlib_int128_eq (zero, 0_i128));
+   CHECK (zero == 0_i128);
+
+   auto two = MLIB_INT128 (2);
+   auto four = mlib_int128_add (two, two);
+   CHECK (four == MLIB_INT128 (4));
+   CHECK (four == 4_i128);
+   CHECK (two == mlib_int128_add (two, zero));
+
+   // Addition wraps:
+   mlib_int128 max = MLIB_INT128_SMAX;
+   auto more = mlib_int128_add (max, four);
+   CHECK (more == mlib_int128_add (MLIB_INT128_SMIN, MLIB_INT128 (3)));
+
+   // "Wrap" around zero:
+   auto ntwo = MLIB_INT128_CAST (-2);
+   auto sum = mlib_int128_add (ntwo, four);
+   CHECK (sum == two);
+
+   auto eight = mlib_int128_lshift (two, 2);
+   CHECK (eight == MLIB_INT128 (8));
+
+   auto big = mlib_int128_lshift (two, 72);
+   CHECK (mlib_int128_scmp (big, MLIB_INT128 (0)) > 0);
+
+   auto four_v2 = mlib_int128_lshift (eight, -1);
+   CHECK (four == four_v2);
+
+   // Negative literals:
+   CHECK (MLIB_INT128 (-64) == mlib_int128_negate (64_i128));
+
+   CHECK (mlib_int128_mul (1_i128, 2_i128) == 2_i128);
+   CHECK (mlib_int128_mul (1_i128, 0_i128) == 0_i128);
+   CHECK (mlib_int128_mul (0_i128, 0_i128) == 0_i128);
+   CHECK (mlib_int128_mul (2_i128, 73_i128) == 146_i128);
+   CHECK (mlib_int128_mul (28468554863115876158655557_i128, 73_i128) ==
+          2078204505007458959581855661_i128);
+   CHECK (mlib_int128_mul (MLIB_INT128_CAST (-7), 4_i128) ==
+          MLIB_INT128_CAST (-28));
+   CHECK (mlib_int128_mul (MLIB_INT128_CAST (-7), MLIB_INT128_CAST (-7)) ==
+          49_i128);
+
+   // It's useful to specify bit patterns directly
+   auto in_binary =
+      0b110101010110100100001101111001111010100010111100100101101011010110101001010110110011000100000100011110010101101001111110001000_i128;
+   CHECK (in_binary == 70917759074396274376598533126648930184_i128);
+   CHECK (
+      in_binary ==
+      "0b110101010110100100001101111001111010100010111100100101101011010110101001010110110011000100000100011110010101101001111110001000"_i128);
+
+   // Or hexadecimal
+   auto in_hex = 0x355a4379ea2f25ad6a56cc411e569f88_i128;
+   CHECK (in_hex == 70917759074396274376598533126648930184_i128);
+
+   int8_t n = -12;
+   CHECK (mlib_int128_scmp (zero, MLIB_INT128_CAST (n)) > 0);
+   CHECK (mlib_int128_ucmp (zero, MLIB_INT128_CAST (n)) < 0);
+
+   auto _2pow127 = mlib_int128_pow2 (127);
+   CHECK (std::string (mlib_int128_format (_2pow127).str) ==
+          "170141183460469231731687303715884105728");
+
+   auto r = div_check (27828649044156246570177174673037165454_i128,
+                       499242349997913298655486252455941907_i128);
+
+   CHECK (r.quotient == 55_i128);
+   CHECK (r.remainder == 370319794271015144125430787960360569_i128);
+
+   r = div_check (64208687961221311123721027584_i128, 3322092839076102144_i128);
+   CHECK (r.remainder == 3155565729965670400_i128);
+
+   // This division will trigger the rare Knuth 4.3.1D/D6 condition:
+   r = div_check (31322872034807296605612234499929458960_i128,
+                  34573864092216774938021667884_i128);
+   CHECK (r.quotient == 905969663_i128);
+   CHECK (r.remainder == 34573864092065898160364055868_i128);
+
+   // Self-divide:
+   r = div_check (628698094597401606590302208_i128,
+                  628698094597401606590302208_i128);
+   CHECK (r.quotient == 1_i128);
+   CHECK (r.remainder == 0_i128);
+
+   // With no high-32 bits in the denominator
+   r = div_check (316356263640858117670580590964547584140_i128,
+                  13463362962560749016052695684_i128);
+   CHECK (r.quotient == 23497566285_i128);
+
+   // Remainder correctness with high bit set:
+   r = div_check (292590981272581782572061492191999425232_i128,
+                  221673222198185508195462959065350495048_i128);
+   CHECK (r.remainder == 70917759074396274376598533126648930184_i128);
+
+   // Remainder with 64bit denom:
+   r = div_check (2795722437127403543495742528_i128, 708945413_i128);
+   CHECK (r.remainder == 619266642_i128);
+
+   // 10-div:
+   CHECK (mlib_int128_div (MLIB_INT128_SMAX, 10_i128) ==
+          17014118346046923173168730371588410572_i128);
+
+   std::random_device rd;
+   std::seed_seq seed ({rd (), rd (), rd (), rd ()});
+   // Pick every numerator bit pattern from 0b00'00 to 0b11'11
+   for (auto nbits = 0u; nbits < 16u; ++nbits) {
+      // This is an extremely rudimentary thread pool to parallelize the
+      // division checks. It doesn't need to be rigorous or optimal, it only
+      // needs to "just work."
+      std::vector<std::thread> threads;
+      // Pick every denominator bit pattern from 0b00'01 to 0b11'11:
+      for (auto dbits = 1u; dbits < 16u; ++dbits) {
+         // Randomness:
+         std::mt19937 random;
+         random.seed (seed);
+         // Spawn a thread for this denominator bit pattern:
+         threads.emplace_back ([nbits, dbits, random] () mutable {
+            std::uniform_int_distribution<std::uint32_t> dist;
+            // 100k random divisions:
+            for (auto i = 0; i < 100000; ++i) {
+               // Generate a denominator
+               auto den = 0_i128;
+               while (den == 0_i128) {
+                  // Regenerate until we don't have zero (very
+                  // unlikely, but be safe!)
+                  uint64_t dlo = 0, dhi = 0;
+                  (dbits & 1) && (dlo |= dist (random));
+                  (dbits & 2) && (dlo |= (uint64_t) dist (random) << 32);
+                  (dbits & 4) && (dhi |= dist (random));
+                  (dbits & 8) && (dhi |= (uint64_t) dist (random) << 32);
+                  den = MLIB_INT128_FROM_PARTS (dlo, dhi);
+               }
+               // Generate a numerator
+               uint64_t nlo = 0, nhi = 0;
+               (nbits & 1) && (nlo |= dist (random));
+               (nbits & 2) && (nlo |= (uint64_t) dist (random) << 32);
+               (nbits & 4) && (nhi |= dist (random));
+               (nbits & 8) && (nhi |= (uint64_t) dist (random) << 32);
+               mlib_int128 num = MLIB_INT128_FROM_PARTS (nlo, nhi);
+               // Divide them:
+               div_check (num, den);
+            }
+         });
+      }
+      // Join the threads that are dividing:
+      for (auto &t : threads) {
+         t.join ();
+      }
+   }
+}
diff --git a/src/mlib/macros.h b/src/mlib/macros.h
index 32a02e734..f586597b4 100644
--- a/src/mlib/macros.h
+++ b/src/mlib/macros.h
@@ -14,4 +14,33 @@
 #define MLIB_INIT(T) (T)
 #endif
 
+#ifdef __cplusplus
+#define _mlibCLinkageBegin extern "C" {
+#define _mlibCLinkageEnd }
+#else
+#define _mlibCLinkageBegin
+#define _mlibCLinkageEnd
+#endif
+
+/// Mark the beginning of a C-language-linkage section
+#define MLIB_C_LINKAGE_BEGIN _mlibCLinkageBegin
+/// End a C-language-linkage section
+#define MLIB_C_LINKAGE_END _mlibCLinkageEnd
+
+#if (defined(__cpp_constexpr) && __cpp_constexpr >= 201304L) || \
+   (defined(__cplusplus) && __cplusplus >= 201402L) ||          \
+   (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
+#define _mlibConstexprFn constexpr inline
+#else
+#define _mlibConstexprFn inline
+#endif
+
+/**
+ * @brief Mark a function as constexpr
+ *
+ * Expands to `constexpr inline` in C++14 and above (and someday C26...?).
+ * "inline" otherwise.
+ */
+#define mlib_constexpr_fn _mlibConstexprFn
+
 #endif // MLIB_MACROS_H_INCLUDED
diff --git a/src/mlib/str.h b/src/mlib/str.h
index 42dff3c4d..901c49a83 100644
--- a/src/mlib/str.h
+++ b/src/mlib/str.h
@@ -16,6 +16,8 @@
 #include <strings.h> /* For strncasecmp. */
 #endif
 
+MLIB_C_LINKAGE_BEGIN
+
 /**
  * @brief A simple non-owning string-view type.
  *
@@ -947,4 +949,18 @@ _mstr_split_iter_done_ (struct _mstr_split_iter_ *iter)
            --_iter_var_.once)
 // clang-format on
 
+/**
+ * @brief Equivalent to strlen(), but has a constexpr annotation.
+ */
+static mlib_constexpr_fn size_t
+mlib_strlen (const char *s)
+{
+   size_t r = 0;
+   for (; *s; ++r, ++s) {
+   }
+   return r;
+}
+
+MLIB_C_LINKAGE_END
+
 #endif // MONGOCRYPT_STR_PRIVATE_H