astral-sh · ntBre · Aug 8, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
@@ -1273,13 +1273,17 @@ fn format_header<'a>(
                 ..
             } = item
             {
-                if main_range >= range.0 && main_range < range.1 + max(*end_line as usize, 1) {
+                let end_of_range = range.1 + max(*end_line as usize, 1);
+                if main_range >= range.0 && main_range < end_of_range {
                     let char_column = text[0..(main_range - range.0).min(text.len())]
                         .chars()
                         .count();
                     col = char_column + 1;
                     line_offset = lineno.unwrap_or(1);
                     break;
+                } else if main_range == end_of_range {
+                    line_offset = lineno.map_or(1, |line| line + 1);
 ISC_syntax_error.py:30:1: invalid-syntax: unexpected EOF while parsing 
    | 
 28 |     "i" "j" 
 29 | ) 
    |  ^ 
    | 
 ISC_syntax_error.py:30:1: invalid-syntax: unexpected EOF while parsing 
    | 
 28 |     "i" "j" 
 29 | ) 
    |  ^ 
    | 
+                    break;
                 }
             }
         }

@@ -655,6 +655,22 @@ impl<'r> RenderableSnippet<'r> {
             .as_source_code()
             .slice(TextRange::new(snippet_start, snippet_end));
 
+        // Strip the BOM from the beginning of the snippet, if present. Doing this here saves us the
+        // trouble of updating the annotation ranges in `replace_unprintable`, and also allows us to
+        // check that the BOM is at the very beginning of the file, not just the beginning of the
+        // snippet.
+        const BOM: char = '\u{feff}';
+        let bom_len = BOM.text_len();
+        let (snippet, snippet_start) =
+            if snippet_start == TextSize::default() && snippet.starts_with(BOM) {
+                (
+                    &snippet[bom_len.to_usize()..],
+                    snippet_start + TextSize::new(bom_len.to_u32()),
+                )
+            } else {
+                (snippet, snippet_start)
+            };
+
         let annotations = anns
             .iter()
             .map(|ann| RenderableAnnotation::new(snippet_start, ann))
@@ -1000,7 +1016,12 @@ fn replace_unprintable<'r>(
     let mut last_end = 0;
     let mut result = String::new();
     for (index, c) in source.char_indices() {
-        if let Some(printable) = unprintable_replacement(c) {
+        // normalize `\r` line endings but don't double `\r\n`
+        if c == '\r' && !matches!(source.get(index + 1..index + 2), Some("\n")) {
+            result.push_str(&source[last_end..index]);
+            result.push('\n');
+            last_end = index + 1;
+        } else if let Some(printable) = unprintable_replacement(c) {
             result.push_str(&source[last_end..index]);
 
             let len = printable.text_len().to_u32();

@@ -1,7 +1,7 @@
 #[cfg(test)]
 mod tests {
     use ruff_diagnostics::Applicability;
-    use ruff_text_size::TextRange;
+    use ruff_text_size::{TextLen, TextRange, TextSize};
 
     use crate::diagnostic::{
         Annotation, DiagnosticFormat, Severity,
@@ -400,4 +400,86 @@ print()
         help: Remove `print` statement
         ");
     }
+
+    /// Carriage return (`\r`) is a valid line-ending in Python, so we should normalize this to a
+    /// line feed (`\n`) for rendering. Otherwise we report a single long line for this case.
+    #[test]
+    fn normalize_carriage_return() {
+        let mut env = TestEnvironment::new();
+        env.add(
+            "example.py",
+            "# Keep parenthesis around preserved CR\rint(-\r    1)\rint(+\r    1)",
+        );
+        env.format(DiagnosticFormat::Full);
+
+        let mut diagnostic = env.err().build();
+        let span = env
+            .path("example.py")
+            .with_range(TextRange::at(TextSize::new(39), TextSize::new(0)));
+        let annotation = Annotation::primary(span);
+        diagnostic.annotate(annotation);
+
+        insta::assert_snapshot!(env.render(&diagnostic), @r"
+        error[test-diagnostic]: main diagnostic message
+         --> example.py:2:1
+          |
+        1 | # Keep parenthesis around preserved CR
+        2 | int(-
+          | ^
+        3 |     1)
+        4 | int(+
+          |
+        ");
+    }
+
+    /// Without stripping the BOM, we report an error in column 2, unlike Ruff.
+    #[test]
+    fn strip_bom() {
+        let mut env = TestEnvironment::new();
+        env.add("example.py", "\u{feff}import foo");
+        env.format(DiagnosticFormat::Full);
+
+        let mut diagnostic = env.err().build();
+        let span = env
+            .path("example.py")
+            .with_range(TextRange::at(TextSize::new(3), TextSize::new(0)));
+        let annotation = Annotation::primary(span);
+        diagnostic.annotate(annotation);
+
+        insta::assert_snapshot!(env.render(&diagnostic), @r"
+        error[test-diagnostic]: main diagnostic message
+         --> example.py:1:1
+          |
+        1 | import foo
+          | ^
+          |
+        ");
+    }
+
+    /// We previously rendered this correctly, but the header was falling back to 1:1 for ranges
+    /// pointing to the final newline in a file. Like Ruff, we now use the offset of the first
+    /// character in the nonexistent final line in the header.
+    #[test]
+    fn end_of_file() {
+        let mut env = TestEnvironment::new();
+        let contents = "unexpected eof\n";
+        env.add("example.py", contents);
+        env.format(DiagnosticFormat::Full);
+
+        let mut diagnostic = env.err().build();
+        let span = env
+            .path("example.py")
+            .with_range(TextRange::at(contents.text_len(), TextSize::new(0)));
+        let annotation = Annotation::primary(span);
+        diagnostic.annotate(annotation);
+
+        insta::assert_snapshot!(env.render(&diagnostic), @r"
+        error[test-diagnostic]: main diagnostic message
+         --> example.py:2:1
+          |
+        1 | unexpected eof
+          |               ^
+          |
+        ");
+    }
 }