astral-sh · MichaReiser · Apr 27, 2025 · Apr 23, 2025 · Apr 24, 2025 · Apr 27, 2025
diff --git a/crates/red_knot_server/src/document.rs b/crates/red_knot_server/src/document.rs
@@ -27,6 +27,16 @@ pub enum PositionEncoding {
     UTF8,
 }
 
+impl From<PositionEncoding> for ruff_source_file::PositionEncoding {
+    fn from(value: PositionEncoding) -> Self {
+        match value {
+            PositionEncoding::UTF8 => Self::Utf8,
+            PositionEncoding::UTF16 => Self::Utf16,
+            PositionEncoding::UTF32 => Self::Utf32,
+        }
+    }
+}
+
 /// A unique document ID, derived from a URL passed as part of an LSP request.
 /// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
 #[derive(Clone, Debug)]

diff --git a/crates/red_knot_server/src/document/range.rs b/crates/red_knot_server/src/document/range.rs
@@ -9,8 +9,8 @@ use red_knot_python_semantic::Db;
 use ruff_db::files::FileRange;
 use ruff_db::source::{line_index, source_text};
 use ruff_notebook::NotebookIndex;
-use ruff_source_file::OneIndexed;
-use ruff_source_file::{LineIndex, SourceLocation};
+use ruff_source_file::LineIndex;
+use ruff_source_file::{OneIndexed, SourceLocation};
 use ruff_text_size::{Ranged, TextRange, TextSize};
 
 #[expect(dead_code)]
@@ -46,7 +46,7 @@ impl TextSizeExt for TextSize {
         index: &LineIndex,
         encoding: PositionEncoding,
     ) -> types::Position {
-        let source_location = offset_to_source_location(self, text, index, encoding);
+        let source_location = index.source_location(self, text, encoding.into());
         source_location_to_position(&source_location)
     }
 }
@@ -75,36 +75,14 @@ fn u32_index_to_usize(index: u32) -> usize {
 
 impl PositionExt for lsp_types::Position {
     fn to_text_size(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> TextSize {
-        let start_line = index.line_range(
-            OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
+        index.offset(
+            SourceLocation {
+                line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
+                character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)),
+            },
             text,
-        );
-
-        let start_column_offset = match encoding {
-            PositionEncoding::UTF8 => TextSize::new(self.character),
-
-            PositionEncoding::UTF16 => {
-                // Fast path for ASCII only documents
-                if index.is_ascii() {
-                    TextSize::new(self.character)
-                } else {
-                    // UTF16 encodes characters either as one or two 16 bit words.
-                    // The position in `range` is the 16-bit word offset from the start of the line (and not the character offset)
-                    // UTF-16 with a text that may use variable-length characters.
-                    utf8_column_offset(self.character, &text[start_line])
-                }
-            }
-            PositionEncoding::UTF32 => {
-                // UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset.
-                return index.offset(
-                    OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
-                    OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)),
-                    text,
-                );
-            }
-        };
-
-        start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end())
+            encoding.into(),
+        )
     }
 }
 
@@ -142,26 +120,23 @@ impl ToRangeExt for TextRange {
         notebook_index: &NotebookIndex,
         encoding: PositionEncoding,
     ) -> NotebookRange {
-        let start = offset_to_source_location(self.start(), text, source_index, encoding);
-        let mut end = offset_to_source_location(self.end(), text, source_index, encoding);
-        let starting_cell = notebook_index.cell(start.row);
+        let start = source_index.source_location(self.start(), text, encoding.into());
+        let mut end = source_index.source_location(self.end(), text, encoding.into());
+        let starting_cell = notebook_index.cell(start.line);
 
         // weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds')
         // we need to move it one character back (which should place it at the end of the last line).
         // we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset.
-        if notebook_index.cell(end.row) != starting_cell {
-            end.row = end.row.saturating_sub(1);
-            end.column = offset_to_source_location(
-                self.end().checked_sub(1.into()).unwrap_or_default(),
-                text,
-                source_index,
-                encoding,
-            )
-            .column;
+        if notebook_index.cell(end.line) != starting_cell {
+            end.line = end.line.saturating_sub(1);
+            let offset = self.end().checked_sub(1.into()).unwrap_or_default();
+            end.character_offset = source_index
+                .source_location(offset, text, encoding.into())
+                .character_offset;
         }
 
-        let start = source_location_to_position(&notebook_index.translate_location(&start));
-        let end = source_location_to_position(&notebook_index.translate_location(&end));
+        let start = source_location_to_position(&notebook_index.translate_source_location(&start));
+        let end = source_location_to_position(&notebook_index.translate_source_location(&end));
 
         NotebookRange {
             cell: starting_cell
@@ -172,67 +147,10 @@ impl ToRangeExt for TextRange {
     }
 }
 
-/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number.
-fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize {
-    let mut utf8_code_unit_offset = TextSize::new(0);
-
-    let mut i = 0u32;
-
-    for c in line.chars() {
-        if i >= utf16_code_unit_offset {
-            break;
-        }
-
-        // Count characters encoded as two 16 bit words as 2 characters.
-        {
-            utf8_code_unit_offset +=
-                TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4"));
-            i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2");
-        }
-    }
-
-    utf8_code_unit_offset
-}
-
-fn offset_to_source_location(
-    offset: TextSize,
-    text: &str,
-    index: &LineIndex,
-    encoding: PositionEncoding,
-) -> SourceLocation {
-    match encoding {
-        PositionEncoding::UTF8 => {
-            let row = index.line_index(offset);
-            let column = offset - index.line_start(row, text);
-
-            SourceLocation {
-                column: OneIndexed::from_zero_indexed(column.to_usize()),
-                row,
-            }
-        }
-        PositionEncoding::UTF16 => {
-            let row = index.line_index(offset);
-
-            let column = if index.is_ascii() {
-                (offset - index.line_start(row, text)).to_usize()
-            } else {
-                let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)];
-                up_to_line.encode_utf16().count()
-            };
-
-            SourceLocation {
-                column: OneIndexed::from_zero_indexed(column),
-                row,
-            }
-        }
-        PositionEncoding::UTF32 => index.source_location(offset, text),
-    }
-}
-
 fn source_location_to_position(location: &SourceLocation) -> types::Position {
     types::Position {
-        line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"),
-        character: u32::try_from(location.column.to_zero_indexed())
+        line: u32::try_from(location.line.to_zero_indexed()).expect("line usize fits in u32"),
+        character: u32::try_from(location.character_offset.to_zero_indexed())
             .expect("character usize fits in u32"),
     }
 }

diff --git a/crates/red_knot_test/src/matcher.rs b/crates/red_knot_test/src/matcher.rs
@@ -263,7 +263,7 @@ impl Matcher {
             .and_then(|span| span.range())
             .map(|range| {
                 self.line_index
-                    .source_location(range.start(), &self.source)
+                    .line_column(range.start(), &self.source)
                     .column
             })
             .unwrap_or(OneIndexed::from_zero_indexed(0))

diff --git a/crates/red_knot_wasm/src/lib.rs b/crates/red_knot_wasm/src/lib.rs
@@ -19,7 +19,7 @@ use ruff_db::system::{
 use ruff_db::Upcast;
 use ruff_notebook::Notebook;
 use ruff_python_formatter::formatted_file;
-use ruff_source_file::{LineIndex, OneIndexed, SourceLocation};
+use ruff_source_file::{LineColumn, LineIndex, OneIndexed, PositionEncoding, SourceLocation};
 use ruff_text_size::{Ranged, TextSize};
 use wasm_bindgen::prelude::*;
 
@@ -408,8 +408,8 @@ impl Range {
     }
 }
 
-impl From<(SourceLocation, SourceLocation)> for Range {
-    fn from((start, end): (SourceLocation, SourceLocation)) -> Self {
+impl From<(LineColumn, LineColumn)> for Range {
+    fn from((start, end): (LineColumn, LineColumn)) -> Self {
         Self {
             start: start.into(),
             end: end.into(),
@@ -438,29 +438,34 @@ impl Position {
 impl Position {
     fn to_text_size(self, text: &str, index: &LineIndex) -> Result<TextSize, Error> {
         let text_size = index.offset(
-            OneIndexed::new(self.line).ok_or_else(|| {
-                Error::new("Invalid value `0` for `position.line`. The line index is 1-indexed.")
-            })?,
-            OneIndexed::new(self.column).ok_or_else(|| {
-                Error::new(
-                    "Invalid value `0` for `position.column`. The column index is 1-indexed.",
-                )
-            })?,
+            SourceLocation {
+                line: OneIndexed::new(self.line).ok_or_else(|| {
+                    Error::new(
+                        "Invalid value `0` for `position.line`. The line index is 1-indexed.",
+                    )
+                })?,
+                character_offset: OneIndexed::new(self.column).ok_or_else(|| {
+                    Error::new(
+                        "Invalid value `0` for `position.column`. The column index is 1-indexed.",
+                    )
+                })?,
+            },
             text,
+            PositionEncoding::Utf32,
         );
 
         Ok(text_size)
     }
 
     fn from_text_size(offset: TextSize, line_index: &LineIndex, source: &str) -> Self {
-        line_index.source_location(offset, source).into()
+        line_index.line_column(offset, source).into()
     }
 }
 
-impl From<SourceLocation> for Position {
-    fn from(location: SourceLocation) -> Self {
+impl From<LineColumn> for Position {
+    fn from(location: LineColumn) -> Self {
         Self {
-            line: location.row.get(),
+            line: location.line.get(),
             column: location.column.get(),
         }
     }

diff --git a/crates/ruff/src/args.rs b/crates/ruff/src/args.rs
@@ -5,6 +5,7 @@ use std::path::{Path, PathBuf};
 use std::str::FromStr;
 use std::sync::Arc;
 
+use crate::commands::completions::config::{OptionString, OptionStringParser};
 use anyhow::bail;
 use clap::builder::{TypedValueParser, ValueParserFactory};
 use clap::{command, Parser, Subcommand};
@@ -22,7 +23,7 @@ use ruff_linter::settings::types::{
 };
 use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser};
 use ruff_python_ast as ast;
-use ruff_source_file::{LineIndex, OneIndexed};
+use ruff_source_file::{LineIndex, OneIndexed, PositionEncoding};
 use ruff_text_size::TextRange;
 use ruff_workspace::configuration::{Configuration, RuleSelection};
 use ruff_workspace::options::{Options, PycodestyleOptions};
@@ -31,8 +32,6 @@ use ruff_workspace::resolver::ConfigurationTransformer;
 use rustc_hash::FxHashMap;
 use toml;
 
-use crate::commands::completions::config::{OptionString, OptionStringParser};
-
 /// All configuration options that can be passed "globally",
 /// i.e., can be passed to all subcommands
 #[derive(Debug, Default, Clone, clap::Args)]
@@ -1070,8 +1069,9 @@ impl FormatRange {
     ///
     /// Returns an empty range if the start range is past the end of `source`.
     pub(super) fn to_text_range(self, source: &str, line_index: &LineIndex) -> TextRange {
-        let start_byte_offset = line_index.offset(self.start.line, self.start.column, source);
-        let end_byte_offset = line_index.offset(self.end.line, self.end.column, source);
+        let start_byte_offset =
+            line_index.offset(self.start.into(), source, PositionEncoding::Utf32);
+        let end_byte_offset = line_index.offset(self.end.into(), source, PositionEncoding::Utf32);
 
         TextRange::new(start_byte_offset, end_byte_offset)
     }
@@ -1142,6 +1142,15 @@ pub struct LineColumn {
     pub column: OneIndexed,
 }
 
+impl From<LineColumn> for ruff_source_file::SourceLocation {
+    fn from(value: LineColumn) -> Self {
+        Self {
+            line: value.line,
+            character_offset: value.column,
+        }
+    }
+}
+
 impl std::fmt::Display for LineColumn {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(f, "{line}:{column}", line = self.line, column = self.column)

@@ -71,8 +71,8 @@ impl std::fmt::Display for DisplayDiagnostic<'_> {
                 write!(f, " {path}", path = self.resolver.path(span.file()))?;
                 if let Some(range) = span.range() {
                     let input = self.resolver.input(span.file());
-                    let start = input.as_source_code().source_location(range.start());
-                    write!(f, ":{line}:{col}", line = start.row, col = start.column)?;
+                    let start = input.as_source_code().line_column(range.start());
+                    write!(f, ":{line}:{col}", line = start.line, col = start.column)?;
                 }
                 write!(f, ":")?;
             }

diff --git a/crates/ruff_linter/src/checkers/ast/analyze/definitions.rs b/crates/ruff_linter/src/checkers/ast/analyze/definitions.rs
@@ -191,7 +191,7 @@ pub(crate) fn definitions(checker: &mut Checker) {
                 warn_user!(
                     "Docstring at {}:{}:{} contains implicit string concatenation; ignoring...",
                     relativize_path(checker.path),
-                    location.row,
+                    location.line,
                     location.column
                 );
                 continue;

diff --git a/crates/ruff_linter/src/locator.rs b/crates/ruff_linter/src/locator.rs
@@ -2,7 +2,7 @@
 
 use std::cell::OnceCell;
 
-use ruff_source_file::{LineIndex, LineRanges, OneIndexed, SourceCode, SourceLocation};
+use ruff_source_file::{LineColumn, LineIndex, LineRanges, OneIndexed, SourceCode};
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
 
 #[derive(Debug)]
@@ -36,8 +36,8 @@ impl<'a> Locator<'a> {
     #[deprecated(
         note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
     )]
-    pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
-        self.to_source_code().source_location(offset)
+    pub fn compute_source_location(&self, offset: TextSize) -> LineColumn {
+        self.to_source_code().line_column(offset)
     }
 
     pub fn to_index(&self) -> &LineIndex {