Skip to content

Commit

Permalink
red-knot: source_text, line_index, and parsed_module queries (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser authored Jun 13, 2024
1 parent efbf7b1 commit d4dd96d
Show file tree
Hide file tree
Showing 9 changed files with 193 additions and 15 deletions.
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions crates/ruff_db/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ repository = { workspace = true }
license = { workspace = true }

[dependencies]
ruff_python_ast = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_source_file = { workspace = true }
ruff_text_size = { workspace = true }

camino = { workspace = true }
countme = { workspace = true }
dashmap = { workspace = true }
Expand Down
28 changes: 27 additions & 1 deletion crates/ruff_db/src/file_system.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,31 @@ impl FileSystemPath {
unsafe { &*(path as *const Utf8Path as *const FileSystemPath) }
}

/// Extracts the file extension, if possible.
///
/// The extension is:
///
/// * [`None`], if there is no file name;
/// * [`None`], if there is no embedded `.`;
/// * [`None`], if the file name begins with `.` and has no other `.`s within;
/// * Otherwise, the portion of the file name after the final `.`
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::FileSystemPath;
///
/// assert_eq!("rs", FileSystemPath::new("foo.rs").extension().unwrap());
/// assert_eq!("gz", FileSystemPath::new("foo.tar.gz").extension().unwrap());
/// ```
///
/// See [`Path::extension`] for more details.
#[inline]
#[must_use]
pub fn extension(&self) -> Option<&str> {
self.0.extension()
}

/// Converts the path to an owned [`FileSystemPathBuf`].
pub fn to_path_buf(&self) -> FileSystemPathBuf {
FileSystemPathBuf(self.0.to_path_buf())
Expand Down Expand Up @@ -251,9 +276,10 @@ impl FileType {

#[cfg(test)]
mod tests {
use crate::file_system::FileRevision;
use filetime::FileTime;

use crate::file_system::FileRevision;

#[test]
fn revision_from_file_time() {
let file_time = FileTime::now();
Expand Down
6 changes: 5 additions & 1 deletion crates/ruff_db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,19 @@ use rustc_hash::FxHasher;
use salsa::DbWithJar;

use crate::file_system::{FileSystem, FileSystemPath};
use crate::parsed::parsed_module;
use crate::source::{line_index, source_text};
use crate::vfs::{VendoredPath, Vfs, VfsFile};

pub mod file_system;
pub mod parsed;
pub mod source;
pub mod vfs;

pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;

#[salsa::jar(db=Db)]
pub struct Jar(VfsFile);
pub struct Jar(VfsFile, source_text, line_index, parsed_module);

/// Database that gives access to the virtual filesystem, source code, and parsed AST.
pub trait Db: DbWithJar<Jar> {
Expand Down
126 changes: 126 additions & 0 deletions crates/ruff_db/src/parsed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::sync::Arc;

use ruff_python_ast::{ModModule, PySourceType};
use ruff_python_parser::{parse_unchecked_source, Parsed};

use crate::source::source_text;
use crate::vfs::{VfsFile, VfsPath};
use crate::Db;

/// Returns the parsed AST of `file`, including its token stream.
///
/// The query uses Ruff's error-resilient parser. That means that the parser always succeeds to produce a
/// AST even if the file contains syntax errors. The parse errors
/// are then accessible through [`Parsed::errors`].
///
/// The query is only cached when the [`source_text()`] hasn't changed. This is because
/// comparing two ASTs is a non-trivial operation and every offset change is directly
/// reflected in the changed AST offsets.
/// The other reason is that Ruff's AST doesn't implement `Eq` which Sala requires
/// for determining if a query result is unchanged.
#[salsa::tracked(return_ref, no_eq)]
pub fn parsed_module(db: &dyn Db, file: VfsFile) -> ParsedModule {
let source = source_text(db, file);
let path = file.path(db);

let ty = match path {
VfsPath::FileSystem(path) => path
.extension()
.map_or(PySourceType::Python, PySourceType::from_extension),
VfsPath::Vendored(_) => PySourceType::Stub,
};

ParsedModule {
inner: Arc::new(parse_unchecked_source(&source, ty)),
}
}

/// Cheap cloneable wrapper around the parsed module.
#[derive(Clone, PartialEq)]
pub struct ParsedModule {
inner: Arc<Parsed<ModModule>>,
}

impl ParsedModule {
/// Consumes `self` and returns the Arc storing the parsed module.
pub fn into_arc(self) -> Arc<Parsed<ModModule>> {
self.inner
}
}

impl Deref for ParsedModule {
type Target = Parsed<ModModule>;

fn deref(&self) -> &Self::Target {
&self.inner
}
}

impl std::fmt::Debug for ParsedModule {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("ParsedModule").field(&self.inner).finish()
}
}

#[cfg(test)]
mod tests {
use crate::file_system::FileSystemPath;
use crate::parsed::parsed_module;
use crate::tests::TestDb;
use crate::vfs::VendoredPath;
use crate::Db;

#[test]
fn python_file() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");

db.file_system_mut().write_file(path, "x = 10".to_string());

let file = db.file(path);

let parsed = parsed_module(&db, file);

assert!(parsed.is_valid());
}

#[test]
fn python_ipynb_file() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.ipynb");

db.file_system_mut()
.write_file(path, "%timeit a = b".to_string());

let file = db.file(path);

let parsed = parsed_module(&db, file);

assert!(parsed.is_valid());
}

#[test]
fn vendored_file() {
let mut db = TestDb::new();
db.vfs_mut().stub_vendored([(
"path.pyi",
r#"
import sys
if sys.platform == "win32":
from ntpath import *
from ntpath import __all__ as __all__
else:
from posixpath import *
from posixpath import __all__ as __all__"#,
)]);

let file = db.vendored_file(VendoredPath::new("path.pyi")).unwrap();

let parsed = parsed_module(&db, file);

assert!(parsed.is_valid());
}
}
5 changes: 2 additions & 3 deletions crates/ruff_db/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,10 @@ mod tests {
// Change the file permission only
file.set_permissions(&mut db).to(Some(0o777));

db.events().lock().unwrap().clear();
db.clear_salsa_events();
assert_eq!(&*source_text(&db, file), "x = 10");

let events = db.events();
let events = events.lock().unwrap();
let events = db.take_salsa_events();

assert!(!events
.iter()
Expand Down
25 changes: 19 additions & 6 deletions crates/ruff_python_ast/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::ffi::OsStr;
use std::path::Path;

pub use expression::*;
Expand Down Expand Up @@ -80,13 +81,25 @@ pub enum PySourceType {
Ipynb,
}

impl PySourceType {
/// Infers the source type from the file extension.
///
/// Falls back to `Python` if the extension is not recognized.
pub fn from_extension(extension: &str) -> Self {
match extension {
"py" => Self::Python,
"pyi" => Self::Stub,
"ipynb" => Self::Ipynb,
_ => Self::Python,
}
}
}

impl<P: AsRef<Path>> From<P> for PySourceType {
fn from(path: P) -> Self {
match path.as_ref().extension() {
Some(ext) if ext == "py" => PySourceType::Python,
Some(ext) if ext == "pyi" => PySourceType::Stub,
Some(ext) if ext == "ipynb" => PySourceType::Ipynb,
_ => PySourceType::Python,
}
path.as_ref()
.extension()
.and_then(OsStr::to_str)
.map_or(Self::Python, Self::from_extension)
}
}
4 changes: 2 additions & 2 deletions crates/ruff_python_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed
}

/// Represents the parsed source code.
#[derive(Debug, Clone)]
#[derive(Debug, PartialEq, Clone)]
pub struct Parsed<T> {
syntax: T,
tokens: Tokens,
Expand Down Expand Up @@ -361,7 +361,7 @@ impl Parsed<ModExpression> {
}

/// Tokens represents a vector of lexed [`Token`].
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Tokens {
raw: Vec<Token>,

Expand Down
5 changes: 3 additions & 2 deletions crates/ruff_source_file/src/line_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ use crate::SourceLocation;
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq)]
pub struct LineIndex {
inner: Arc<LineIndexInner>,
}

#[derive(Eq, PartialEq)]
struct LineIndexInner {
line_starts: Vec<TextSize>,
kind: IndexKind,
Expand Down Expand Up @@ -268,7 +269,7 @@ impl Debug for LineIndex {
}
}

#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
enum IndexKind {
/// Optimized index for an ASCII only document
Ascii,
Expand Down

0 comments on commit d4dd96d

Please sign in to comment.