Skip to content

Commit 3e5267c

Browse files
swolchokfacebook-github-bot
authored andcommitted
Handle Unicode when we step forward/backward
Summary: Fixes #26 -- we tried to step forward/backward 1 character, but we stepped 1 byte instead, which is of course not the same for non-ASCII UTF-8. Reviewed By: adamjernst Differential Revision: D27932461 fbshipit-source-id: a12c320d9d5727b3fca71ec5045c579470832af1
1 parent 0dc949a commit 3e5267c

File tree

1 file changed

+47
-4
lines changed

1 file changed

+47
-4
lines changed

src/main.rs

+47-4
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,26 @@ impl Sink for FastmodSink {
256256
}
257257
}
258258

259+
fn to_char_boundary(s: &str, mut index: usize) -> usize {
260+
while index < s.len() && !s.is_char_boundary(index) {
261+
index += 1;
262+
}
263+
debug_assert!(
264+
index > s.len() || s.is_char_boundary(index),
265+
"index: {}, len: {}",
266+
index,
267+
s.len()
268+
);
269+
index
270+
}
271+
272+
fn backward_to_char_boundary(s: &str, mut index: usize) -> usize {
273+
while !s.is_char_boundary(index) {
274+
index -= 1;
275+
}
276+
index
277+
}
278+
259279
impl Fastmod {
260280
fn new(accept_all: bool, hidden: bool, print_changed_files: bool) -> Fastmod {
261281
Fastmod {
@@ -342,7 +362,10 @@ impl Fastmod {
342362
// Avoid generating index of -1 when start
343363
// == end == offset = 0 for a zero-length
344364
// match.
345-
mat.end() + offset - if is_zero_length_match { 0 } else { 1 },
365+
backward_to_char_boundary(
366+
&contents,
367+
mat.end() + offset - if is_zero_length_match { 0 } else { 1 },
368+
),
346369
);
347370
let accepted = self.ask_about_patch(
348371
path,
@@ -352,15 +375,18 @@ impl Fastmod {
352375
&new_contents,
353376
)?;
354377
if accepted {
355-
offset = offset
378+
offset = to_char_boundary(
379+
&contents,
380+
offset
356381
+ mat.start()
357382
+ subst.len()
358383
// Ensure forward progress when there
359384
// is a zero-length match.
360-
+ if is_zero_length_match { 1 } else { 0 };
385+
+ if is_zero_length_match { 1 } else { 0 },
386+
);
361387
} else {
362388
// Advance to the next character after the match.
363-
offset = offset + mat.end() + 1;
389+
offset = to_char_boundary(&contents, offset + mat.end() + 1);
364390
}
365391
}
366392
}
@@ -1109,4 +1135,21 @@ mod tests {
11091135
}
11101136
}
11111137
}
1138+
1139+
#[test]
1140+
fn test_replace_next_to_unicode_character() {
1141+
let contents = "I have “unicodequotes”";
1142+
let dir = create_test_files(&[("foo.txt", contents)]);
1143+
Command::cargo_bin("fastmod")
1144+
.unwrap()
1145+
.args(&[
1146+
"quotes",
1147+
"characters",
1148+
"--dir",
1149+
dir.path().to_str().unwrap(),
1150+
])
1151+
.write_stdin("n\n")
1152+
.assert()
1153+
.success();
1154+
}
11121155
}

0 commit comments

Comments
 (0)