Skip to content

Commit 682c079

Browse files
bors[bot]lnicola
andauthored
Merge #4276
4276: Don't count start of non-ASCII characters as being inside of them r=matklad a=lnicola I'm still not sure that `utf16_to_utf8_col` is correct for code points from Supplementary Planes. These have two UTF-16 code units, and I feel we're not going to count them correctly. Fixes the crash in #4263 (comment). Co-authored-by: Laurențiu Nicola <[email protected]>
2 parents 2474f42 + 16d3bb9 commit 682c079

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

crates/ra_ide_db/src/line_index.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ use superslice::Ext;
88

99
#[derive(Clone, Debug, PartialEq, Eq)]
1010
pub struct LineIndex {
11+
/// Offset the the beginning of each line, zero-based
1112
pub(crate) newlines: Vec<TextSize>,
13+
/// List of non-ASCII characters on each line
1214
pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
1315
}
1416

@@ -22,7 +24,9 @@ pub struct LineCol {
2224

2325
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
2426
pub(crate) struct Utf16Char {
27+
/// Start offset of a character inside a line, zero-based
2528
pub(crate) start: TextSize,
29+
/// End offset of a character inside a line, zero-based
2630
pub(crate) end: TextSize,
2731
}
2832

@@ -120,7 +124,7 @@ impl LineIndex {
120124
fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
121125
if let Some(utf16_chars) = self.utf16_lines.get(&line) {
122126
for c in utf16_chars {
123-
if col >= u32::from(c.start) {
127+
if col > u32::from(c.start) {
124128
col += u32::from(c.len()) - 1;
125129
} else {
126130
// From here on, all utf16 characters come *after* the character we are mapping,
@@ -226,8 +230,10 @@ const C: char = \"メ メ\";
226230
// UTF-16 to UTF-8
227231
assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
228232

229-
assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20));
230-
assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(23));
233+
// メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1
234+
assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20
235+
assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space
236+
assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24
231237

232238
assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15));
233239
}

0 commit comments

Comments
 (0)