Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ owo-colors = { version = "4", optional = true }
cfg-if = "1"

unicode-width = "0.2.0"
unicode-segmentation = "1.12.0"

textwrap = { version = "0.16.2", optional = true }
supports-hyperlinks = { version = "3.1.0", optional = true }
Expand Down
99 changes: 73 additions & 26 deletions src/handlers/graphical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ use std::{
};

use owo_colors::{OwoColorize, Style};
use unicode_width::UnicodeWidthChar;
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

use crate::{
Diagnostic, GraphicalTheme, LabeledSpan, ReportHandler, Severity, SourceCode, SourceSpan,
Expand Down Expand Up @@ -930,31 +931,77 @@ impl GraphicalReportHandler {
&self,
text: &'a str,
) -> impl Iterator<Item = usize> + 'a + use<'a> {
let mut column = 0;
let mut escaped = false;
let tab_width = self.tab_width;
text.chars().map(move |c| {
let width = match (escaped, c) {
// Round up to the next multiple of tab_width
(false, '\t') => tab_width - column % tab_width,
// start of ANSI escape
(false, '\x1b') => {
escaped = true;
0
}
// use Unicode width for all other characters
(false, c) => c.width().unwrap_or(0),
// end of ANSI escape
(true, 'm') => {
escaped = false;
0
}
// characters are zero width within escape sequence
(true, _) => 0,
};
column += width;
width
})
// Custom iterator that handles both ASCII and Unicode efficiently
struct CharWidthIterator<'a> {
chars: std::str::CharIndices<'a>,
grapheme_boundaries: Option<Vec<(usize, usize)>>, // (byte_pos, width) - None for ASCII
current_grapheme_idx: usize,
column: usize,
escaped: bool,
tab_width: usize,
}

impl<'a> Iterator for CharWidthIterator<'a> {
type Item = usize;

fn next(&mut self) -> Option<Self::Item> {
let (byte_pos, c) = self.chars.next()?;

let width = match (self.escaped, c) {
(false, '\t') => self.tab_width - self.column % self.tab_width,
(false, '\x1b') => {
self.escaped = true;
0
}
(false, _) => {
if let Some(ref boundaries) = self.grapheme_boundaries {
// Unicode path: check if we're at a grapheme boundary
if self.current_grapheme_idx < boundaries.len()
&& boundaries[self.current_grapheme_idx].0 == byte_pos
{
let width = boundaries[self.current_grapheme_idx].1;
self.current_grapheme_idx += 1;
width
} else {
0 // Not at a grapheme boundary
}
} else {
// ASCII path: all non-control chars are width 1
1
}
}
(true, 'm') => {
self.escaped = false;
0
}
(true, _) => 0,
};

self.column += width;
Some(width)
}
}

// Only compute grapheme boundaries for non-ASCII text
let grapheme_boundaries = if text.is_ascii() {
None
} else {
// Collect grapheme boundaries with their widths
Some(
text.grapheme_indices(true)
.map(|(pos, grapheme)| (pos, grapheme.width()))
.collect(),
)
};

CharWidthIterator {
chars: text.char_indices(),
grapheme_boundaries,
current_grapheme_idx: 0,
column: 0,
escaped: false,
tab_width: self.tab_width,
}
}

/// Returns the visual column position of a byte offset on a specific line.
Expand Down
75 changes: 75 additions & 0 deletions tests/test_emoji_underline.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#![cfg(feature = "fancy-no-backtrace")]

use miette::{Diagnostic, GraphicalReportHandler, NamedSource, SourceSpan};
use thiserror::Error;

#[test]
fn test_emoji_sequence_underline() {
#[derive(Error, Debug, Diagnostic)]
#[error("emoji test")]
struct TestError {
#[source_code]
src: NamedSource<String>,
#[label("here")]
span: SourceSpan,
}

// Test with a ZWJ emoji sequence (family emoji)
let family_emoji = "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦";
let src = format!("before {} after", family_emoji);
let err = TestError {
src: NamedSource::new("test.txt", src.clone()),
span: (7, family_emoji.len()).into(),
};

let mut output = String::new();
GraphicalReportHandler::new().render_report(&mut output, &err).unwrap();

println!("Output for family emoji:");
println!("{}", output);

// Test with flag emoji (also uses ZWJ)
let flag_emoji = "πŸ³οΈβ€πŸŒˆ";
let src2 = format!("before {} after", flag_emoji);
let err2 = TestError {
src: NamedSource::new("test2.txt", src2.clone()),
span: (7, flag_emoji.len()).into(),
};

let mut output2 = String::new();
GraphicalReportHandler::new().render_report(&mut output2, &err2).unwrap();

println!("\nOutput for rainbow flag:");
println!("{}", output2);

// Test with skin tone modifier
let skin_tone_emoji = "πŸ‘‹πŸ½";
let src3 = format!("before {} after", skin_tone_emoji);
let err3 = TestError {
src: NamedSource::new("test3.txt", src3.clone()),
span: (7, skin_tone_emoji.len()).into(),
};

let mut output3 = String::new();
GraphicalReportHandler::new().render_report(&mut output3, &err3).unwrap();

println!("\nOutput for waving hand with skin tone:");
println!("{}", output3);

// Test ASCII fast path
let ascii_text = "hello world";
let src4 = format!("before {} after", ascii_text);
let err4 = TestError {
src: NamedSource::new("test4.txt", src4.clone()),
span: (7, ascii_text.len()).into(),
};

let mut output4 = String::new();
GraphicalReportHandler::new().render_report(&mut output4, &err4).unwrap();

println!("\nOutput for ASCII text:");
println!("{}", output4);

// Verify the underline matches the text length
assert!(output4.contains("hello world"));
}
Loading