diff --git a/crates/oxc_ast_visit/src/utf8_to_utf16/converter.rs b/crates/oxc_ast_visit/src/utf8_to_utf16/converter.rs index 988225999952f..16709af065d97 100644 --- a/crates/oxc_ast_visit/src/utf8_to_utf16/converter.rs +++ b/crates/oxc_ast_visit/src/utf8_to_utf16/converter.rs @@ -285,6 +285,35 @@ impl<'t> Utf8ToUtf16Converter<'t> { self.convert_offset(&mut span.start); self.convert_offset(&mut span.end); } + + /// Convert a single UTF-16 offset back to UTF-8. + /// + /// Note: This method is not optimized. It always performs a binary search. + /// It's only intended for use in linter, where it will be called infrequently. + pub fn convert_offset_back(&self, offset: &mut u32) { + // Find first translation whose UTF-16 offset is after `utf16_offset` + let utf16_offset = *offset; + let next_index = self.translations.partition_point(|translation| { + utf16_offset >= translation.utf8_offset - translation.utf16_difference + }); + + // First entry in table is `0, 0`. `partition_point` finds the first entry where + // `utf16_offset < translation.utf8_offset - translation.utf16_difference` + // (or `translations.len()` if none exists). + // So guaranteed `next_index > 0`, and `next_index <= translations.len()`. + let index = next_index - 1; + + // SAFETY: `next_index <= translations.len()`, so `next_index - 1` is in bounds + let translation = unsafe { self.translations.get_unchecked(index) }; + + *offset += translation.utf16_difference; + } + + /// Convert [`Span`] from UTF-16 offsets to UTF-8 offsets. + pub fn convert_span_back(&self, span: &mut Span) { + self.convert_offset_back(&mut span.start); + self.convert_offset_back(&mut span.end); + } } impl VisitMutModuleRecord for Utf8ToUtf16Converter<'_> { diff --git a/crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs b/crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs index e0f21d6a142b4..71aa8e78a72a2 100644 --- a/crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs +++ b/crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs @@ -1,6 +1,7 @@ //! Convert UTF-8 span offsets to UTF-16. use oxc_ast::ast::{Comment, Program}; +use oxc_span::Span; use oxc_syntax::module_record::{ModuleRecord, VisitMutModuleRecord}; use crate::VisitMut; @@ -103,6 +104,20 @@ impl Utf8ToUtf16 { converter.visit_module_record(module_record); } } + + /// Convert a single UTF-16 offset back to UTF-8. + pub fn convert_offset_back(&self, utf16_offset: &mut u32) { + if let Some(converter) = self.converter() { + converter.convert_offset_back(utf16_offset); + } + } + + /// Convert [`Span`] from UTF-16 offsets to UTF-8 offsets. + pub fn convert_span_back(&self, span: &mut Span) { + if let Some(converter) = self.converter() { + converter.convert_span_back(span); + } + } } #[cfg(test)] @@ -147,6 +162,19 @@ mod test { let Expression::StringLiteral(s) = &expr_stmt.expression else { unreachable!() }; assert_eq!(s.span, Span::new(1, 5)); assert_eq!(program.comments[0].span, Span::new(6, 11)); + + // Check converting back from UTF-16 to UTF-8 + let convert_back = |utf16_offset: u32| { + let mut utf8_offset = utf16_offset; + span_converter.convert_offset_back(&mut utf8_offset); + utf8_offset + }; + + assert_eq!(convert_back(0), 0); + assert_eq!(convert_back(2), 2); + assert_eq!(convert_back(4), 6); + assert_eq!(convert_back(9), 11); + assert_eq!(convert_back(11), 15); } #[test] @@ -246,6 +274,13 @@ mod test { converter.convert_offset(&mut utf16_offset); assert_eq!(utf16_offset, expected_utf16_offset); } + + // Convert back from UTF-16 to UTF-8 + for &(expected_utf8_offset, utf16_offset) in &translations { + let mut utf8_offset = utf16_offset; + converter.convert_offset_back(&mut utf8_offset); + assert_eq!(utf8_offset, expected_utf8_offset); + } } else { // No Unicode chars. All offsets should be the same. for &(utf8_offset, expected_utf16_offset) in &translations {