Skip to content

Commit d8cf8ac

Browse files
AlexWaygoodcarljm
andauthored
[red-knot] Resolve symbols from builtins.pyi in the stdlib if they cannot be found in other scopes (#12390)
Co-authored-by: Carl Meyer <[email protected]>
1 parent 1c7b840 commit d8cf8ac

File tree

8 files changed

+231
-11
lines changed

8 files changed

+231
-11
lines changed

Diff for: crates/red_knot_module_resolver/src/path.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -233,10 +233,16 @@ impl ModuleResolutionPathBuf {
233233
ModuleResolutionPathRef::from(self).is_directory(search_path, resolver)
234234
}
235235

236-
pub(crate) fn is_site_packages(&self) -> bool {
236+
#[must_use]
237+
pub(crate) const fn is_site_packages(&self) -> bool {
237238
matches!(self.0, ModuleResolutionPathBufInner::SitePackages(_))
238239
}
239240

241+
#[must_use]
242+
pub(crate) const fn is_standard_library(&self) -> bool {
243+
matches!(self.0, ModuleResolutionPathBufInner::StandardLibrary(_))
244+
}
245+
240246
#[must_use]
241247
pub(crate) fn with_pyi_extension(&self) -> Self {
242248
ModuleResolutionPathRef::from(self).with_pyi_extension()

Diff for: crates/red_knot_module_resolver/src/resolver.rs

+85
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::borrow::Cow;
22
use std::iter::FusedIterator;
33
use std::sync::Arc;
44

5+
use once_cell::sync::Lazy;
56
use rustc_hash::{FxBuildHasher, FxHashSet};
67

78
use ruff_db::files::{File, FilePath};
@@ -442,6 +443,52 @@ pub(crate) mod internal {
442443
}
443444
}
444445

446+
/// Modules that are builtin to the Python interpreter itself.
447+
///
448+
/// When these module names are imported, standard module resolution is bypassed:
449+
/// the module name always resolves to the stdlib module,
450+
/// even if there's a module of the same name in the workspace root
451+
/// (which would normally result in the stdlib module being overridden).
452+
///
453+
/// TODO(Alex): write a script to generate this list,
454+
/// similar to what we do in `crates/ruff_python_stdlib/src/sys.rs`
455+
static BUILTIN_MODULES: Lazy<FxHashSet<&str>> = Lazy::new(|| {
456+
const BUILTIN_MODULE_NAMES: &[&str] = &[
457+
"_abc",
458+
"_ast",
459+
"_codecs",
460+
"_collections",
461+
"_functools",
462+
"_imp",
463+
"_io",
464+
"_locale",
465+
"_operator",
466+
"_signal",
467+
"_sre",
468+
"_stat",
469+
"_string",
470+
"_symtable",
471+
"_thread",
472+
"_tokenize",
473+
"_tracemalloc",
474+
"_typing",
475+
"_warnings",
476+
"_weakref",
477+
"atexit",
478+
"builtins",
479+
"errno",
480+
"faulthandler",
481+
"gc",
482+
"itertools",
483+
"marshal",
484+
"posix",
485+
"pwd",
486+
"sys",
487+
"time",
488+
];
489+
BUILTIN_MODULE_NAMES.iter().copied().collect()
490+
});
491+
445492
/// Given a module name and a list of search paths in which to lookup modules,
446493
/// attempt to resolve the module name
447494
fn resolve_name(
@@ -450,8 +497,12 @@ fn resolve_name(
450497
) -> Option<(Arc<ModuleResolutionPathBuf>, File, ModuleKind)> {
451498
let resolver_settings = module_resolution_settings(db);
452499
let resolver_state = ResolverState::new(db, resolver_settings.target_version());
500+
let is_builtin_module = BUILTIN_MODULES.contains(&name.as_str());
453501

454502
for search_path in resolver_settings.search_paths(db) {
503+
if is_builtin_module && !search_path.is_standard_library() {
504+
continue;
505+
}
455506
let mut components = name.components();
456507
let module_name = components.next_back()?;
457508

@@ -629,6 +680,40 @@ mod tests {
629680
);
630681
}
631682

683+
#[test]
684+
fn builtins_vendored() {
685+
let TestCase { db, stdlib, .. } = TestCaseBuilder::new()
686+
.with_vendored_typeshed()
687+
.with_src_files(&[("builtins.py", "FOOOO = 42")])
688+
.build();
689+
690+
let builtins_module_name = ModuleName::new_static("builtins").unwrap();
691+
let builtins = resolve_module(&db, builtins_module_name).expect("builtins to resolve");
692+
693+
assert_eq!(builtins.file().path(&db), &stdlib.join("builtins.pyi"));
694+
}
695+
696+
#[test]
697+
fn builtins_custom() {
698+
const TYPESHED: MockedTypeshed = MockedTypeshed {
699+
stdlib_files: &[("builtins.pyi", "def min(a, b): ...")],
700+
versions: "builtins: 3.8-",
701+
};
702+
703+
const SRC: &[FileSpec] = &[("builtins.py", "FOOOO = 42")];
704+
705+
let TestCase { db, stdlib, .. } = TestCaseBuilder::new()
706+
.with_src_files(SRC)
707+
.with_custom_typeshed(TYPESHED)
708+
.with_target_version(TargetVersion::Py38)
709+
.build();
710+
711+
let builtins_module_name = ModuleName::new_static("builtins").unwrap();
712+
let builtins = resolve_module(&db, builtins_module_name).expect("builtins to resolve");
713+
714+
assert_eq!(builtins.file().path(&db), &stdlib.join("builtins.pyi"));
715+
}
716+
632717
#[test]
633718
fn stdlib() {
634719
const TYPESHED: MockedTypeshed = MockedTypeshed {

Diff for: crates/red_knot_python_semantic/src/builtins.rs

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
use red_knot_module_resolver::{resolve_module, ModuleName};
2+
3+
use crate::semantic_index::global_scope;
4+
use crate::semantic_index::symbol::ScopeId;
5+
use crate::Db;
6+
7+
/// Salsa query to get the builtins scope.
8+
///
9+
/// Can return None if a custom typeshed is used that is missing `builtins.pyi`.
10+
#[salsa::tracked]
11+
pub(crate) fn builtins_scope(db: &dyn Db) -> Option<ScopeId<'_>> {
12+
let builtins_name =
13+
ModuleName::new_static("builtins").expect("Expected 'builtins' to be a valid module name");
14+
let builtins_file = resolve_module(db.upcast(), builtins_name)?.file();
15+
Some(global_scope(db, builtins_file))
16+
}

Diff for: crates/red_knot_python_semantic/src/db.rs

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use salsa::DbWithJar;
33
use red_knot_module_resolver::Db as ResolverDb;
44
use ruff_db::{Db as SourceDb, Upcast};
55

6+
use crate::builtins::builtins_scope;
67
use crate::semantic_index::definition::Definition;
78
use crate::semantic_index::expression::Expression;
89
use crate::semantic_index::symbol::ScopeId;
@@ -28,6 +29,7 @@ pub struct Jar(
2829
infer_definition_types,
2930
infer_expression_types,
3031
infer_scope_types,
32+
builtins_scope,
3133
);
3234

3335
/// Database giving access to semantic information about a Python program.

Diff for: crates/red_knot_python_semantic/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ pub use db::{Db, Jar};
66
pub use semantic_model::{HasTy, SemanticModel};
77

88
pub mod ast_node_ref;
9+
mod builtins;
910
mod db;
1011
mod node_key;
1112
pub mod semantic_index;

Diff for: crates/red_knot_python_semantic/src/types.rs

+10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use ruff_db::files::File;
22
use ruff_python_ast::name::Name;
33

4+
use crate::builtins::builtins_scope;
45
use crate::semantic_index::definition::Definition;
56
use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId};
67
use crate::semantic_index::{global_scope, symbol_table, use_def_map};
@@ -47,6 +48,15 @@ pub(crate) fn global_symbol_ty_by_name<'db>(db: &'db dyn Db, file: File, name: &
4748
symbol_ty_by_name(db, global_scope(db, file), name)
4849
}
4950

51+
/// Shorthand for `symbol_ty` that looks up a symbol in the builtins.
52+
///
53+
/// Returns `None` if the builtins module isn't available for some reason.
54+
pub(crate) fn builtins_symbol_ty_by_name<'db>(db: &'db dyn Db, name: &str) -> Type<'db> {
55+
builtins_scope(db)
56+
.map(|builtins| symbol_ty_by_name(db, builtins, name))
57+
.unwrap_or(Type::Unbound)
58+
}
59+
5060
/// Infer the type of a [`Definition`].
5161
pub(crate) fn definition_ty<'db>(db: &'db dyn Db, definition: Definition<'db>) -> Type<'db> {
5262
let inference = infer_definition_types(db, definition);

Diff for: crates/red_knot_python_semantic/src/types/display.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,9 @@ impl Display for DisplayType<'_> {
2929
write!(f, "<module '{:?}'>", file.path(self.db.upcast()))
3030
}
3131
// TODO functions and classes should display using a fully qualified name
32-
Type::Class(class) => {
33-
f.write_str("Literal[")?;
34-
f.write_str(&class.name(self.db))?;
35-
f.write_str("]")
36-
}
32+
Type::Class(class) => write!(f, "Literal[{}]", class.name(self.db)),
3733
Type::Instance(class) => f.write_str(&class.name(self.db)),
38-
Type::Function(function) => f.write_str(&function.name(self.db)),
34+
Type::Function(function) => write!(f, "Literal[{}]", function.name(self.db)),
3935
Type::Union(union) => union.display(self.db).fmt(f),
4036
Type::Intersection(intersection) => intersection.display(self.db).fmt(f),
4137
Type::IntLiteral(n) => write!(f, "Literal[{n}]"),

Diff for: crates/red_knot_python_semantic/src/types/infer.rs

+108-4
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,16 @@ use ruff_db::parsed::parsed_module;
2929
use ruff_python_ast as ast;
3030
use ruff_python_ast::{ExprContext, TypeParams};
3131

32+
use crate::builtins::builtins_scope;
3233
use crate::semantic_index::ast_ids::{HasScopedAstId, HasScopedUseId, ScopedExpressionId};
3334
use crate::semantic_index::definition::{Definition, DefinitionKind, DefinitionNodeKey};
3435
use crate::semantic_index::expression::Expression;
3536
use crate::semantic_index::semantic_index;
36-
use crate::semantic_index::symbol::NodeWithScopeKind;
37-
use crate::semantic_index::symbol::{NodeWithScopeRef, ScopeId};
37+
use crate::semantic_index::symbol::{FileScopeId, NodeWithScopeKind, NodeWithScopeRef, ScopeId};
3838
use crate::semantic_index::SemanticIndex;
3939
use crate::types::{
40-
definitions_ty, global_symbol_ty_by_name, ClassType, FunctionType, Name, Type, UnionTypeBuilder,
40+
builtins_symbol_ty_by_name, definitions_ty, global_symbol_ty_by_name, ClassType, FunctionType,
41+
Name, Type, UnionTypeBuilder,
4142
};
4243
use crate::Db;
4344

@@ -686,7 +687,18 @@ impl<'db> TypeInferenceBuilder<'db> {
686687
let symbol = symbols.symbol_by_name(id).unwrap();
687688
if !symbol.is_defined() || !self.scope.is_function_like(self.db) {
688689
// implicit global
689-
Some(global_symbol_ty_by_name(self.db, self.file, id))
690+
let mut unbound_ty = if file_scope_id == FileScopeId::global() {
691+
Type::Unbound
692+
} else {
693+
global_symbol_ty_by_name(self.db, self.file, id)
694+
};
695+
// fallback to builtins
696+
if matches!(unbound_ty, Type::Unbound)
697+
&& Some(self.scope) != builtins_scope(self.db)
698+
{
699+
unbound_ty = builtins_symbol_ty_by_name(self.db, id);
700+
}
701+
Some(unbound_ty)
690702
} else {
691703
Some(Type::Unbound)
692704
}
@@ -792,6 +804,7 @@ mod tests {
792804
use ruff_db::testing::assert_function_query_was_not_run;
793805
use ruff_python_ast::name::Name;
794806

807+
use crate::builtins::builtins_scope;
795808
use crate::db::tests::TestDb;
796809
use crate::semantic_index::definition::Definition;
797810
use crate::semantic_index::semantic_index;
@@ -819,6 +832,23 @@ mod tests {
819832
db
820833
}
821834

835+
fn setup_db_with_custom_typeshed(typeshed: &str) -> TestDb {
836+
let db = TestDb::new();
837+
838+
Program::new(
839+
&db,
840+
TargetVersion::Py38,
841+
SearchPathSettings {
842+
extra_paths: Vec::new(),
843+
workspace_root: SystemPathBuf::from("/src"),
844+
site_packages: None,
845+
custom_typeshed: Some(SystemPathBuf::from(typeshed)),
846+
},
847+
);
848+
849+
db
850+
}
851+
822852
fn assert_public_ty(db: &TestDb, file_name: &str, symbol_name: &str, expected: &str) {
823853
let file = system_path_to_file(db, file_name).expect("Expected file to exist.");
824854

@@ -1370,6 +1400,80 @@ mod tests {
13701400
Ok(())
13711401
}
13721402

1403+
#[test]
1404+
fn builtin_symbol_vendored_stdlib() -> anyhow::Result<()> {
1405+
let mut db = setup_db();
1406+
1407+
db.write_file("/src/a.py", "c = copyright")?;
1408+
1409+
assert_public_ty(&db, "/src/a.py", "c", "Literal[copyright]");
1410+
1411+
Ok(())
1412+
}
1413+
1414+
#[test]
1415+
fn builtin_symbol_custom_stdlib() -> anyhow::Result<()> {
1416+
let mut db = setup_db_with_custom_typeshed("/typeshed");
1417+
1418+
db.write_files([
1419+
("/src/a.py", "c = copyright"),
1420+
(
1421+
"/typeshed/stdlib/builtins.pyi",
1422+
"def copyright() -> None: ...",
1423+
),
1424+
("/typeshed/stdlib/VERSIONS", "builtins: 3.8-"),
1425+
])?;
1426+
1427+
assert_public_ty(&db, "/src/a.py", "c", "Literal[copyright]");
1428+
1429+
Ok(())
1430+
}
1431+
1432+
#[test]
1433+
fn unknown_global_later_defined() -> anyhow::Result<()> {
1434+
let mut db = setup_db();
1435+
1436+
db.write_file("/src/a.py", "x = foo; foo = 1")?;
1437+
1438+
assert_public_ty(&db, "/src/a.py", "x", "Unbound");
1439+
1440+
Ok(())
1441+
}
1442+
1443+
#[test]
1444+
fn unknown_builtin_later_defined() -> anyhow::Result<()> {
1445+
let mut db = setup_db_with_custom_typeshed("/typeshed");
1446+
1447+
db.write_files([
1448+
("/src/a.py", "x = foo"),
1449+
("/typeshed/stdlib/builtins.pyi", "foo = bar; bar = 1"),
1450+
("/typeshed/stdlib/VERSIONS", "builtins: 3.8-"),
1451+
])?;
1452+
1453+
assert_public_ty(&db, "/src/a.py", "x", "Unbound");
1454+
1455+
Ok(())
1456+
}
1457+
1458+
#[test]
1459+
fn import_builtins() -> anyhow::Result<()> {
1460+
let mut db = setup_db();
1461+
1462+
db.write_file("/src/a.py", "import builtins; x = builtins.copyright")?;
1463+
1464+
assert_public_ty(&db, "/src/a.py", "x", "Literal[copyright]");
1465+
// imported builtins module is the same file as the implicit builtins
1466+
let file = system_path_to_file(&db, "/src/a.py").expect("Expected file to exist.");
1467+
let builtins_ty = global_symbol_ty_by_name(&db, file, "builtins");
1468+
let Type::Module(builtins_file) = builtins_ty else {
1469+
panic!("Builtins are not a module?");
1470+
};
1471+
let implicit_builtins_file = builtins_scope(&db).expect("builtins to exist").file(&db);
1472+
assert_eq!(builtins_file, implicit_builtins_file);
1473+
1474+
Ok(())
1475+
}
1476+
13731477
fn first_public_def<'db>(db: &'db TestDb, file: File, name: &str) -> Definition<'db> {
13741478
let scope = global_scope(db, file);
13751479
*use_def_map(db, scope)

0 commit comments

Comments
 (0)