oxc-project · Boshen · Sep 16, 2025 · Sep 16, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -43,6 +43,7 @@ owo-colors = { version = "4", optional = true }
 cfg-if = "1"
 
 unicode-width = "0.2.0"
+unicode-segmentation = "1.12.0"
 
 textwrap = { version = "0.16.2", optional = true }
 supports-hyperlinks = { version = "3.1.0", optional = true }

diff --git a/src/handlers/graphical.rs b/src/handlers/graphical.rs
@@ -4,7 +4,8 @@ use std::{
 };
 
 use owo_colors::{OwoColorize, Style};
-use unicode_width::UnicodeWidthChar;
+use unicode_segmentation::UnicodeSegmentation;
+use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
 
 use crate::{
     Diagnostic, GraphicalTheme, LabeledSpan, ReportHandler, Severity, SourceCode, SourceSpan,
@@ -930,31 +931,77 @@ impl GraphicalReportHandler {
         &self,
         text: &'a str,
     ) -> impl Iterator<Item = usize> + 'a + use<'a> {
-        let mut column = 0;
-        let mut escaped = false;
-        let tab_width = self.tab_width;
-        text.chars().map(move |c| {
-            let width = match (escaped, c) {
-                // Round up to the next multiple of tab_width
-                (false, '\t') => tab_width - column % tab_width,
-                // start of ANSI escape
-                (false, '\x1b') => {
-                    escaped = true;
-                    0
-                }
-                // use Unicode width for all other characters
-                (false, c) => c.width().unwrap_or(0),
-                // end of ANSI escape
-                (true, 'm') => {
-                    escaped = false;
-                    0
-                }
-                // characters are zero width within escape sequence
-                (true, _) => 0,
-            };
-            column += width;
-            width
-        })
+        // Custom iterator that handles both ASCII and Unicode efficiently
+        struct CharWidthIterator<'a> {
+            chars: std::str::CharIndices<'a>,
+            grapheme_boundaries: Option<Vec<(usize, usize)>>, // (byte_pos, width) - None for ASCII
+            current_grapheme_idx: usize,
+            column: usize,
+            escaped: bool,
+            tab_width: usize,
+        }
+
+        impl<'a> Iterator for CharWidthIterator<'a> {
+            type Item = usize;
+
+            fn next(&mut self) -> Option<Self::Item> {
+                let (byte_pos, c) = self.chars.next()?;
+
+                let width = match (self.escaped, c) {
+                    (false, '\t') => self.tab_width - self.column % self.tab_width,
+                    (false, '\x1b') => {
+                        self.escaped = true;
+                        0
+                    }
+                    (false, _) => {
+                        if let Some(ref boundaries) = self.grapheme_boundaries {
+                            // Unicode path: check if we're at a grapheme boundary
+                            if self.current_grapheme_idx < boundaries.len()
+                                && boundaries[self.current_grapheme_idx].0 == byte_pos
+                            {
+                                let width = boundaries[self.current_grapheme_idx].1;
+                                self.current_grapheme_idx += 1;
+                                width
+                            } else {
+                                0 // Not at a grapheme boundary
+                            }
+                        } else {
+                            // ASCII path: all non-control chars are width 1
+                            1
+                        }
+                    }
+                    (true, 'm') => {
+                        self.escaped = false;
+                        0
+                    }
+                    (true, _) => 0,
+                };
+
+                self.column += width;
+                Some(width)
+            }
+        }
+
+        // Only compute grapheme boundaries for non-ASCII text
+        let grapheme_boundaries = if text.is_ascii() {
+            None
+        } else {
+            // Collect grapheme boundaries with their widths
+            Some(
+                text.grapheme_indices(true)
+                    .map(|(pos, grapheme)| (pos, grapheme.width()))
+                    .collect(),
+            )
+        };
+
+        CharWidthIterator {
+            chars: text.char_indices(),
+            grapheme_boundaries,
+            current_grapheme_idx: 0,
+            column: 0,
+            escaped: false,
+            tab_width: self.tab_width,
+        }
     }
 
     /// Returns the visual column position of a byte offset on a specific line.

diff --git a/tests/test_emoji_underline.rs b/tests/test_emoji_underline.rs
@@ -0,0 +1,75 @@
+#![cfg(feature = "fancy-no-backtrace")]
+
+use miette::{Diagnostic, GraphicalReportHandler, NamedSource, SourceSpan};
+use thiserror::Error;
+
+#[test]
+fn test_emoji_sequence_underline() {
+    #[derive(Error, Debug, Diagnostic)]
+    #[error("emoji test")]
+    struct TestError {
+        #[source_code]
+        src: NamedSource<String>,
+        #[label("here")]
+        span: SourceSpan,
+    }
+
+    // Test with a ZWJ emoji sequence (family emoji)
+    let family_emoji = "👨‍👩‍👧‍👦";
+    let src = format!("before {} after", family_emoji);
+    let err = TestError {
+        src: NamedSource::new("test.txt", src.clone()),
+        span: (7, family_emoji.len()).into(),
+    };
+
+    let mut output = String::new();
+    GraphicalReportHandler::new().render_report(&mut output, &err).unwrap();
+
+    println!("Output for family emoji:");
+    println!("{}", output);
+
+    // Test with flag emoji (also uses ZWJ)
+    let flag_emoji = "🏳️‍🌈";
+    let src2 = format!("before {} after", flag_emoji);
+    let err2 = TestError {
+        src: NamedSource::new("test2.txt", src2.clone()),
+        span: (7, flag_emoji.len()).into(),
+    };
+
+    let mut output2 = String::new();
+    GraphicalReportHandler::new().render_report(&mut output2, &err2).unwrap();
+
+    println!("\nOutput for rainbow flag:");
+    println!("{}", output2);
+
+    // Test with skin tone modifier
+    let skin_tone_emoji = "👋🏽";
+    let src3 = format!("before {} after", skin_tone_emoji);
+    let err3 = TestError {
+        src: NamedSource::new("test3.txt", src3.clone()),
+        span: (7, skin_tone_emoji.len()).into(),
+    };
+
+    let mut output3 = String::new();
+    GraphicalReportHandler::new().render_report(&mut output3, &err3).unwrap();
+
+    println!("\nOutput for waving hand with skin tone:");
+    println!("{}", output3);
+
+    // Test ASCII fast path
+    let ascii_text = "hello world";
+    let src4 = format!("before {} after", ascii_text);
+    let err4 = TestError {
+        src: NamedSource::new("test4.txt", src4.clone()),
+        span: (7, ascii_text.len()).into(),
+    };
+
+    let mut output4 = String::new();
+    GraphicalReportHandler::new().render_report(&mut output4, &err4).unwrap();
+
+    println!("\nOutput for ASCII text:");
+    println!("{}", output4);
+
+    // Verify the underline matches the text length
+    assert!(output4.contains("hello world"));
+}