Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions crates/oxc_ast_visit/src/utf8_to_utf16/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,35 @@ impl<'t> Utf8ToUtf16Converter<'t> {
self.convert_offset(&mut span.start);
self.convert_offset(&mut span.end);
}

/// Convert a single UTF-16 offset back to UTF-8.
///
/// Note: This method is not optimized. It always performs a binary search.
/// It's only intended for use in linter, where it will be called infrequently.
pub fn convert_offset_back(&self, offset: &mut u32) {
// Find first translation whose UTF-16 offset is after `utf16_offset`
let utf16_offset = *offset;
let next_index = self.translations.partition_point(|translation| {
utf16_offset >= translation.utf8_offset - translation.utf16_difference
});

// First entry in table is `0, 0`. `partition_point` finds the first entry where
// `utf16_offset < translation.utf8_offset - translation.utf16_difference`
// (or `translations.len()` if none exists).
// So guaranteed `next_index > 0`, and `next_index <= translations.len()`.
let index = next_index - 1;

// SAFETY: `next_index <= translations.len()`, so `next_index - 1` is in bounds
let translation = unsafe { self.translations.get_unchecked(index) };

*offset += translation.utf16_difference;
}

/// Convert [`Span`] from UTF-16 offsets to UTF-8 offsets.
pub fn convert_span_back(&self, span: &mut Span) {
self.convert_offset_back(&mut span.start);
self.convert_offset_back(&mut span.end);
}
}

impl VisitMutModuleRecord for Utf8ToUtf16Converter<'_> {
Expand Down
35 changes: 35 additions & 0 deletions crates/oxc_ast_visit/src/utf8_to_utf16/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Convert UTF-8 span offsets to UTF-16.

use oxc_ast::ast::{Comment, Program};
use oxc_span::Span;
use oxc_syntax::module_record::{ModuleRecord, VisitMutModuleRecord};

use crate::VisitMut;
Expand Down Expand Up @@ -103,6 +104,20 @@ impl Utf8ToUtf16 {
converter.visit_module_record(module_record);
}
}

/// Convert a single UTF-16 offset back to UTF-8.
pub fn convert_offset_back(&self, utf16_offset: &mut u32) {
if let Some(converter) = self.converter() {
converter.convert_offset_back(utf16_offset);
}
}

/// Convert [`Span`] from UTF-16 offsets to UTF-8 offsets.
pub fn convert_span_back(&self, span: &mut Span) {
if let Some(converter) = self.converter() {
converter.convert_span_back(span);
}
}
}

#[cfg(test)]
Expand Down Expand Up @@ -147,6 +162,19 @@ mod test {
let Expression::StringLiteral(s) = &expr_stmt.expression else { unreachable!() };
assert_eq!(s.span, Span::new(1, 5));
assert_eq!(program.comments[0].span, Span::new(6, 11));

// Check converting back from UTF-16 to UTF-8
let convert_back = |utf16_offset: u32| {
let mut utf8_offset = utf16_offset;
span_converter.convert_offset_back(&mut utf8_offset);
utf8_offset
};

assert_eq!(convert_back(0), 0);
assert_eq!(convert_back(2), 2);
assert_eq!(convert_back(4), 6);
assert_eq!(convert_back(9), 11);
assert_eq!(convert_back(11), 15);
}

#[test]
Expand Down Expand Up @@ -246,6 +274,13 @@ mod test {
converter.convert_offset(&mut utf16_offset);
assert_eq!(utf16_offset, expected_utf16_offset);
}

// Convert back from UTF-16 to UTF-8
for &(expected_utf8_offset, utf16_offset) in &translations {
let mut utf8_offset = utf16_offset;
converter.convert_offset_back(&mut utf8_offset);
assert_eq!(utf8_offset, expected_utf8_offset);
}
} else {
// No Unicode chars. All offsets should be the same.
for &(utf8_offset, expected_utf16_offset) in &translations {
Expand Down
Loading