oxc-project · graphite-app · Aug 28, 2025 · Aug 28, 2025
diff --git a/crates/oxc_ast_visit/src/utf8_to_utf16/converter.rs b/crates/oxc_ast_visit/src/utf8_to_utf16/converter.rs
@@ -13,7 +13,7 @@ use super::Translation;
 /// This range starts at byte `range_start`, and is `range_len` bytes long.
 /// The range describes a stretch of source text which contains only ASCII characters.
 /// A UTF-8 offset within this range can be converted to UTF-16 offset with the formula
-/// `utf16_offset = (utf8_offset - range_start_utf8).wrapping_add(range_start_utf16)`.
+/// `utf16_offset = utf8_offset - range_start_utf8 + range_start_utf16`.
 ///
 /// [`convert_offset`] has a very fast path for converting offsets in the current range.
 ///
@@ -36,12 +36,10 @@ pub struct Utf8ToUtf16Converter<'t> {
     range_len_utf8: u32,
     /// UTF-16 offset of start of range.
     /// To convert offset within this range:
-    /// `utf16_offset = (utf8_offset - range_start_utf8).wrapping_add(range_start_utf16)`.
-    /// Note: `range_start_utf16` is calculated and used with wrapping addition/subtraction,
-    /// because it can wrap around when there's a Unicode character very close to start of source.
+    /// `utf16_offset = utf8_offset - range_start_utf8 + range_start_utf16`.
     /// We store UTF-16 range start, rather than `utf16_difference`, because it makes
     /// [`Self::convert_offset`] more efficient - 1 less instruction, and 1 less register.
-    /// <https://godbolt.org/z/1xnx1v17T>
+    /// <https://godbolt.org/z/hz5xWGfYn>
     range_start_utf16: u32,
     /// Index of current `Translation`
     index: u32,
@@ -111,7 +109,7 @@ impl<'t> Utf8ToUtf16Converter<'t> {
     //
     // This method is written to reduce this common path to as few instructions as possible.
     // It's only 8 instructions on x86_64, with 2 branches, and using only 1 register.
-    // https://godbolt.org/z/1xnx1v17T
+    // https://godbolt.org/z/hz5xWGfYn
     //
     // `#[inline(always)]` because this function is small and on a very hot path.
     #[expect(clippy::inline_always)]
@@ -134,10 +132,9 @@ impl<'t> Utf8ToUtf16Converter<'t> {
         }
 
         let bytes_from_start_of_range = utf8_offset.wrapping_sub(self.range_start_utf8);
-        if bytes_from_start_of_range <= self.range_len_utf8 {
-            // Offset is within current range.
-            // `wrapping_add` because `range_start_utf16` can be `u32::MAX`.
-            *offset = self.range_start_utf16.wrapping_add(bytes_from_start_of_range);
+        if bytes_from_start_of_range < self.range_len_utf8 {
+            // Offset is within current range
+            *offset = self.range_start_utf16 + bytes_from_start_of_range;
         } else {
             // Offset is outside current range - slow path
             self.convert_offset_slow(offset);
@@ -179,18 +176,7 @@ impl<'t> Utf8ToUtf16Converter<'t> {
         self.index = index as u32;
         self.range_start_utf8 = range_start_utf8;
         self.range_len_utf8 = range_end_utf8 - range_start_utf8;
-
-        // `wrapping_sub` because `utf16_difference` can be `> range_start_utf8` where one of
-        // first few characters of source is Unicode. e.g.:
-        //
-        // * 1st char is Unicode:
-        //   * `range_start_utf8 = 1` (offsets in `Translation`s are the offset of the character + 1).
-        //   * `utf16_difference` is the length of the Unicode char, which is `> 1`.
-        //
-        // * If 1st 2 chars are ASCII, but 3rd char is a 4-byte Unicode char:
-        //   * `range_start_utf8 = 3`.
-        //   * `utf16_difference = 4`.
-        self.range_start_utf16 = range_start_utf8.wrapping_sub(utf16_difference);
+        self.range_start_utf16 = range_start_utf8 - utf16_difference;
 
         *offset = utf8_offset - utf16_difference;
     }
@@ -252,7 +238,7 @@ impl<'t> Utf8ToUtf16Converter<'t> {
         const LINEAR_SEARCH_ITERATIONS: usize = 8;
 
         // `utf8_offset` is after current range, so there must be another range after this one.
-        // We don't need to include next range in search because we know it starts before `utf8_offset`,
+        // We don't need to include next range in search because we know it starts on or before `utf8_offset`,
         // and we're looking for a range which starts *after* `utf8_offset`.
         //
         // Note: `translations` is a slice, which has max length of `isize::MAX` on all platforms.

diff --git a/crates/oxc_ast_visit/src/utf8_to_utf16/translation.rs b/crates/oxc_ast_visit/src/utf8_to_utf16/translation.rs
@@ -82,17 +82,17 @@ pub fn build_translations(source_text: &str, translations: &mut Vec<Translation>
     // Closure that processes a slice of bytes
     let mut process_slice = |slice: &[u8], start_offset: usize| {
         for (index, &byte) in slice.iter().enumerate() {
-            #[expect(clippy::cast_possible_truncation)]
             if byte >= 0xC0 {
                 let difference_for_this_byte = u32::from(byte >= 0xE0) + 1;
                 utf16_difference += difference_for_this_byte;
-                // Record `offset + 1` not `offset`, because it's only offsets *after* this
-                // Unicode character that need to be shifted.
-                // `offset + 1` cannot overflow, because source is limited to `u32::MAX` bytes,
-                // so a multi-byte Unicode character can't start at offset `u32::MAX`, because there
-                // isn't space to complete the multi-byte sequence, which would not be a valid `&str`.
-                let offset = start_offset + index;
-                let utf8_offset = (offset + 1) as u32;
+
+                // Record the index of the end of this Unicode character, because it's only offsets
+                // *after* this Unicode character that need to be shifted.
+                // Addition cannot overflow because length of source text is max `u32::MAX`.
+                let bytes_in_char =
+                    difference_for_this_byte as usize + usize::from(byte >= 0xF0) + 1;
+                #[expect(clippy::cast_possible_truncation)]
+                let utf8_offset = (start_offset + index + bytes_in_char) as u32;
                 translations.push(Translation { utf8_offset, utf16_difference });
             }
         }