From 7b4c86b03eceb1fdb6e0bb8e85160dac8ba6a24a Mon Sep 17 00:00:00 2001 From: Jacob Pratt Date: Thu, 19 Mar 2026 00:45:06 -0400 Subject: [PATCH] Optimize 128-bit integer formatting The compiler is unaware of the restricted range of the input, so it is unable to optimize out the final division and modulus. By doing this manually, we get a nontrivial performance gain. --- src/lib.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 884193e..fbe3918 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -410,7 +410,7 @@ fn enc_16lsd(buf: &mut [MaybeUninit], n: u64) { let mut remain = n; // Format per four digits from the lookup table. - for quad_index in (0..4).rev() { + for quad_index in (1..4).rev() { // pull two pairs let quad = remain % 1_00_00; remain /= 1_00_00; @@ -426,6 +426,15 @@ fn enc_16lsd(buf: &mut [MaybeUninit], n: u64) { .write(*DECIMAL_PAIRS.0.get_unchecked(pair2 as usize * 2 + 1)); } } + + // final two pairs + let (pair1, pair2) = divmod100(remain as u32); + unsafe { + buf[OFFSET + 0].write(*DECIMAL_PAIRS.0.get_unchecked(pair1 as usize * 2 + 0)); + buf[OFFSET + 1].write(*DECIMAL_PAIRS.0.get_unchecked(pair1 as usize * 2 + 1)); + buf[OFFSET + 2].write(*DECIMAL_PAIRS.0.get_unchecked(pair2 as usize * 2 + 0)); + buf[OFFSET + 3].write(*DECIMAL_PAIRS.0.get_unchecked(pair2 as usize * 2 + 1)); + } } // Euclidean division plus remainder with constant 1E16 basically consumes 16