Skip to content
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
02c4798
Add ALWAYS_FALSE
dcreager Jan 29, 2025
2c0d577
Mark return states
dcreager Jan 29, 2025
e198806
Mark live bindings as non-visible when flow is unreachable
dcreager Jan 29, 2025
23b366d
Revert "Mark return states"
dcreager Jan 30, 2025
38f3d99
Handle final return statement specially
dcreager Jan 30, 2025
5c1918a
Customizable binding visibility
dcreager Jan 30, 2025
7504fb7
Reachability is now a vis constraint, not a bool
dcreager Jan 30, 2025
ee6f253
Fix ternary AND logic
dcreager Jan 30, 2025
6b53554
These are now unresolved I guess?
dcreager Jan 30, 2025
bb11cf8
clippy
dcreager Jan 30, 2025
a2ef702
Normalize negations of ALWAYS_{TRUE,FALSE}
dcreager Jan 30, 2025
1c42a2b
scope_start_visibility _is_ reachability
dcreager Jan 30, 2025
289c0c6
No, bindings are always visible
dcreager Jan 30, 2025
8b0899f
TODO for `raise`/`else` unreachability
dcreager Jan 30, 2025
9cd8e68
Fix test failure
dcreager Jan 30, 2025
1a80f81
Expected test case change
dcreager Jan 30, 2025
f71325c
Try to skip simplification by checking scope_start_visibility
dcreager Jan 30, 2025
0e06012
And try via a separate `always_reachable` boolean
dcreager Jan 30, 2025
b3b4577
Add AlwaysFalse as its own constraint variant
dcreager Jan 30, 2025
46b1ec2
Update always_reachable on merge correctly
dcreager Jan 30, 2025
999188a
Try checking if reachability contains AlwaysFalse in syntax tree
dcreager Jan 30, 2025
0f278da
But that doesn't work either
dcreager Jan 30, 2025
0fb6a74
Merge branch 'main' into dcreager/static-terminal
dcreager Feb 4, 2025
c882a95
This is working again
dcreager Feb 4, 2025
33db6f1
Remove static bool
dcreager Feb 4, 2025
b49916d
Remove moot comment
dcreager Feb 4, 2025
c80f27e
And another
dcreager Feb 4, 2025
0d50385
New bindings are visible only if control flow is reachable
dcreager Feb 4, 2025
c42490c
Add xfail for RET503
dcreager Feb 4, 2025
26f842b
Update terminal statement comment
dcreager Feb 5, 2025
e5b6c4a
Add shorter example for bindings after terminal statement
dcreager Feb 5, 2025
00e236f
Add back debug derive
dcreager Feb 5, 2025
1d93650
Add TODO to function symbol comment
dcreager Feb 5, 2025
ae83741
Wrap try examples in functions to bound reachability
dcreager Feb 5, 2025
f13a6a6
Spelling typo
dcreager Feb 5, 2025
7423de2
Add TODO for unreachable code example
dcreager Feb 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/red_knot_project/tests/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ impl SourceOrderVisitor<'_> for PullTypesVisitor<'_> {
/// Whether or not the .py/.pyi version of this file is expected to fail
#[rustfmt::skip]
const KNOWN_FAILURES: &[(&str, bool, bool)] = &[
// related to circular references in nested functions
("crates/ruff_linter/resources/test/fixtures/flake8_return/RET503.py", false, true),
// related to circular references in class definitions
("crates/ruff_linter/resources/test/fixtures/pyflakes/F821_26.py", true, true),
("crates/ruff_linter/resources/test/fixtures/pyflakes/F821_27.py", true, true),
Expand Down
67 changes: 37 additions & 30 deletions crates/red_knot_python_semantic/resources/mdtest/exception/basic.md
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes in this file are easier to see with "Hide whitespace"

Original file line number Diff line number Diff line change
Expand Up @@ -124,42 +124,49 @@ def _(e: Exception | type[Exception] | None):
## Exception cause is not an exception

```py
try:
raise EOFError() from GeneratorExit # fine
except:
...
def _():
try:
raise EOFError() from GeneratorExit # fine
except:
...

try:
raise StopIteration from MemoryError() # fine
except:
...
def _():
try:
raise StopIteration from MemoryError() # fine
except:
...

try:
raise BufferError() from None # fine
except:
...
def _():
try:
raise BufferError() from None # fine
except:
...

try:
raise ZeroDivisionError from False # error: [invalid-raise]
except:
...
def _():
try:
raise ZeroDivisionError from False # error: [invalid-raise]
except:
...

try:
raise SystemExit from bool() # error: [invalid-raise]
except:
...
def _():
try:
raise SystemExit from bool() # error: [invalid-raise]
except:
...

try:
raise
except KeyboardInterrupt as e: # fine
reveal_type(e) # revealed: KeyboardInterrupt
raise LookupError from e # fine
def _():
try:
raise
except KeyboardInterrupt as e: # fine
reveal_type(e) # revealed: KeyboardInterrupt
raise LookupError from e # fine

try:
raise
except int as e: # error: [invalid-exception-caught]
reveal_type(e) # revealed: Unknown
raise KeyError from e
def _():
try:
raise
except int as e: # error: [invalid-exception-caught]
reveal_type(e) # revealed: Unknown
raise KeyError from e

def _(e: Exception | type[Exception]):
raise ModuleNotFoundError from e # fine
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,9 @@ def raise_in_both_branches(cond: bool):
# Exceptions can occur anywhere, so "before" and "raise" are valid possibilities
reveal_type(x) # revealed: Literal["before", "raise1", "raise2"]
else:
# This branch is unreachable, since all control flows in the `try` clause raise exceptions.
# As a result, this binding should never be reachable, since new bindings are visibile only
# when they are reachable.
x = "unreachable"
finally:
# Exceptions can occur anywhere, so "before" and "raise" are valid possibilities
Expand Down Expand Up @@ -623,9 +626,9 @@ def return_from_nested_if(cond1: bool, cond2: bool):

## Statically known terminal statements

Terminal statements do not yet interact correctly with statically known bounds. In this example, we
should see that the `return` statement is always executed, and therefore that the `"b"` assignment
is not visible to the `reveal_type`.
We model reachability using the same visibility constraints that we use to model statically known
bounds. In this example, we see that the `return` statement is always executed, and therefore that
the `"b"` assignment is not visible to the `reveal_type`.

```py
def _(cond: bool):
Expand All @@ -635,6 +638,23 @@ def _(cond: bool):
if True:
return

# TODO: Literal["a"]
reveal_type(x) # revealed: Literal["a", "b"]
reveal_type(x) # revealed: Literal["a"]
```

## Bindings after a terminal statement are unreachable

Any bindings introduced after a terminal statement are unreachable, and are considered not visible.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the new test case showing that bindings after a terminal statement are considered not visible. https://github.com/astral-sh/ruff/pull/15817/files#r1941996689

Note that this means we're currently implementing the "least helpful" option in #15797. (I think that's still okay for this PR, just pointing out that this will change depending on how we decide to handle unreachable code)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I would put a TODO on that unresolved-reference diagnostic below.

I'm a little worried about the difficulty of implementing "more useful" options for checking unreachable code, but we can leave that as a separate problem.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I would put a TODO on that unresolved-reference diagnostic below.

Done

I'm a little worried about the difficulty of implementing "more useful" options for checking unreachable code, but we can leave that as a separate problem.

Are you worried that this PR makes it more difficult? Or just that it's on the list of things to tackle sooner rather than later in case it requires large changes to the design?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not really so much that this PR makes it more difficult, just that I'm not sure how much this approach will end up having to change. I don't think it's a reason not to merge this. I am curious if you have a rough sense of how we might go about implementing "check unreachable code as if it were reachable" while still preserving (as I think we must) "unreachable branches never merge back to outer control flow". That is, fixing the TODO you just added, without having that unreachable assignment become visible in the outer flow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am curious if you have a rough sense of how we might go about implementing "check unreachable code as if it were reachable" while still preserving (as I think we must) "unreachable branches never merge back to outer control flow".

I'd say we'd either need to track multiple visibility constraints for each binding, or multiple "current flow states" — both being ways to represent the visibility that each binding has now, and what it would reset to at the next merge point.

But I'm also not sure that's what we'd want to implement — if we want to "check unreachable code as if it were reachable", I'm not sure that should reset at merge points. e.g. if someone inserted a return statement for debugging, I don't see a difference in UX between:

def _(cond: bool):
    if cond:
        x = 1
        return
        reveal_type(x)  # revealed: Literal[1]

and

def _(cond: bool):
    if cond:
        x = 1
        return
    reveal_type(x)  # revealed: Literal[1]

And so if we want to treat these both the same, I'd say we'd go for an option that controls the visibility of new bindings: "always true" if we want to check unreachable code as if it were reachable, and "current reachability" if not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd say we'd either need to track multiple visibility constraints for each binding, or multiple "current flow states" — both being ways to represent the visibility that each binding has now, and what it would reset to at the next merge point.

Yeah makes sense.

But I'm also not sure that's what we'd want to implement — if we want to "check unreachable code as if it were reachable", I'm not sure that should reset at merge points.

Not sure either. I feel like checking unreachable code as if it were reachable is kind of an unprincipled approach that may not have a sensible and consistent semantics.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In any case, I think the semantics implemented in this PR are a good step forward, and we should go ahead with them for now.


```py
def f(cond: bool) -> str:
x = "before"
if cond:
reveal_type(x) # revealed: Literal["before"]
return
x = "after-return"
# error: [unresolved-reference]
reveal_type(x) # revealed: Unknown
else:
x = "else"
reveal_type(x) # revealed: Literal["else"]
```
25 changes: 23 additions & 2 deletions crates/red_knot_python_semantic/src/semantic_index/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -793,9 +793,30 @@ where
&mut builder.current_first_parameter_name,
&mut first_parameter_name,
);
builder.visit_body(body);
builder.current_first_parameter_name = first_parameter_name;

// TODO: Fix how we determine the public types of symbols in a
// function-like scope: https://github.com/astral-sh/ruff/issues/15777
//
// In the meantime, visit the function body, but treat the last statement
// specially if it is a return. If it is, this would cause all definitions
// in the function to be marked as non-visible with our current treatment
// of terminal statements. Since we currently model the externally visible
// definitions in a function scope as the set of bindings that are visible
// at the end of the body, we then consider this function to have no
// externally visible definitions. To get around this, we take a flow
// snapshot just before processing the return statement, and use _that_ as
// the "end-of-body" state that we resolve external references against.
if let Some((last_stmt, first_stmts)) = body.split_last() {
builder.visit_body(first_stmts);
let pre_return_state = matches!(last_stmt, ast::Stmt::Return(_))
.then(|| builder.flow_snapshot());
builder.visit_stmt(last_stmt);
if let Some(pre_return_state) = pre_return_state {
builder.flow_restore(pre_return_state);
}
}

builder.current_first_parameter_name = first_parameter_name;
builder.pop_scope()
},
);
Expand Down
36 changes: 17 additions & 19 deletions crates/red_knot_python_semantic/src/semantic_index/use_def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,6 @@ impl std::iter::FusedIterator for DeclarationsIterator<'_, '_> {}
pub(super) struct FlowSnapshot {
symbol_states: IndexVec<ScopedSymbolId, SymbolState>,
scope_start_visibility: ScopedVisibilityConstraintId,
reachable: bool,
}

#[derive(Debug)]
Expand Down Expand Up @@ -506,8 +505,6 @@ pub(super) struct UseDefMapBuilder<'db> {

/// Currently live bindings and declarations for each symbol.
symbol_states: IndexVec<ScopedSymbolId, SymbolState>,

reachable: bool,
}

impl Default for UseDefMapBuilder<'_> {
Expand All @@ -520,14 +517,13 @@ impl Default for UseDefMapBuilder<'_> {
bindings_by_use: IndexVec::new(),
definitions_by_definition: FxHashMap::default(),
symbol_states: IndexVec::new(),
reachable: true,
}
}
}

impl<'db> UseDefMapBuilder<'db> {
pub(super) fn mark_unreachable(&mut self) {
self.reachable = false;
self.record_visibility_constraint(ScopedVisibilityConstraintId::ALWAYS_FALSE);
}

pub(super) fn add_symbol(&mut self, symbol: ScopedSymbolId) {
Expand All @@ -544,7 +540,7 @@ impl<'db> UseDefMapBuilder<'db> {
binding,
SymbolDefinitions::Declarations(symbol_state.declarations().clone()),
);
symbol_state.record_binding(def_id);
symbol_state.record_binding(def_id, self.scope_start_visibility);
}

pub(super) fn add_constraint(&mut self, constraint: Constraint<'db>) -> ScopedConstraintId {
Expand Down Expand Up @@ -596,7 +592,11 @@ impl<'db> UseDefMapBuilder<'db> {
pub(super) fn simplify_visibility_constraints(&mut self, snapshot: FlowSnapshot) {
debug_assert!(self.symbol_states.len() >= snapshot.symbol_states.len());

self.scope_start_visibility = snapshot.scope_start_visibility;
// If there are any control flow paths that have become unreachable between `snapshot` and
// now, then it's not valid to simplify any visibility constraints to `snapshot`.
if self.scope_start_visibility != snapshot.scope_start_visibility {
return;
}

// Note that this loop terminates when we reach a symbol not present in the snapshot.
// This means we keep visibility constraints for all new symbols, which is intended,
Expand Down Expand Up @@ -632,7 +632,7 @@ impl<'db> UseDefMapBuilder<'db> {
let def_id = self.all_definitions.push(Some(definition));
let symbol_state = &mut self.symbol_states[symbol];
symbol_state.record_declaration(def_id);
symbol_state.record_binding(def_id);
symbol_state.record_binding(def_id, self.scope_start_visibility);
}

pub(super) fn record_use(&mut self, symbol: ScopedSymbolId, use_id: ScopedUseId) {
Expand All @@ -649,7 +649,6 @@ impl<'db> UseDefMapBuilder<'db> {
FlowSnapshot {
symbol_states: self.symbol_states.clone(),
scope_start_visibility: self.scope_start_visibility,
reachable: self.reachable,
}
}

Expand All @@ -672,21 +671,23 @@ impl<'db> UseDefMapBuilder<'db> {
num_symbols,
SymbolState::undefined(self.scope_start_visibility),
);

self.reachable = snapshot.reachable;
}

/// Merge the given snapshot into the current state, reflecting that we might have taken either
/// path to get here. The new state for each symbol should include definitions from both the
/// prior state and the snapshot.
pub(super) fn merge(&mut self, snapshot: FlowSnapshot) {
// Unreachable snapshots should not be merged: If the current snapshot is unreachable, it
// should be completely overwritten by the snapshot we're merging in. If the other snapshot
// is unreachable, we should return without merging.
if !snapshot.reachable {
// As an optimization, if we know statically that either of the snapshots is always
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commenting out these two if clauses is how we verify that this is truly an optimization — we should get the same results for the tests with and without it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I verified that we can indeed remove these checks and all tests pass! I'm not seeing a detectable performance improvement in the benchmark from including these lines; perhaps that just suggests conditional terminals aren't common enough for it to show up? It definitely seems like this should be faster in cases where it does apply.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not seeing a detectable performance improvement in the benchmark from including these lines

I think it might be that the merge step is now faster: if the other snapshot's reachability is ALWAYS_FALSE, then the visibility of all of its bindings should also be ALWAYS_FALSE. (I think there were cases before TDD normalization where we wouldn't be able to see that in the structure of the visibility constraint.) Merge will iterate through all of the bindings and AND their visibility constraints, but ANDing with ALWAYS_FALSE is one of the fast-path returns.

To be clear, it's a hunch — I haven't backed any of ☝️ with data!

// unreachable, we can leave it out of the merged result entirely. Note that we cannot
// perform any type inference at this point, so this is largely limited to unreachability
// via terminal statements. If a flow's reachability depends on an expression in the code,
// we will include the flow in the merged result; the visibility constraints of its
// bindings will include this reachability condition, so that later during type inference,
// we can determine whether any particular binding is non-visible due to unreachability.
if snapshot.scope_start_visibility == ScopedVisibilityConstraintId::ALWAYS_FALSE {
return;
}
if !self.reachable {
if self.scope_start_visibility == ScopedVisibilityConstraintId::ALWAYS_FALSE {
self.restore(snapshot);
return;
}
Expand All @@ -712,9 +713,6 @@ impl<'db> UseDefMapBuilder<'db> {
self.scope_start_visibility = self
.visibility_constraints
.add_or_constraint(self.scope_start_visibility, snapshot.scope_start_visibility);

// Both of the snapshots are reachable, so the merged result is too.
self.reachable = true;
}

pub(super) fn finish(mut self) -> UseDefMap<'db> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,16 +237,19 @@ impl SymbolBindings {
}

/// Record a newly-encountered binding for this symbol.
pub(super) fn record_binding(&mut self, binding_id: ScopedDefinitionId) {
pub(super) fn record_binding(
&mut self,
binding_id: ScopedDefinitionId,
visibility_constraint: ScopedVisibilityConstraintId,
) {
// The new binding replaces all previous live bindings in this path, and has no
// constraints.
self.live_bindings = Bindings::with(binding_id.into());
self.constraints = ConstraintsPerBinding::with_capacity(1);
self.constraints.push(Constraints::default());

self.visibility_constraints = VisibilityConstraintPerBinding::with_capacity(1);
self.visibility_constraints
.push(ScopedVisibilityConstraintId::ALWAYS_TRUE);
self.visibility_constraints.push(visibility_constraint);
}

/// Add given constraint to all live bindings.
Expand Down Expand Up @@ -349,9 +352,14 @@ impl SymbolState {
}

/// Record a newly-encountered binding for this symbol.
pub(super) fn record_binding(&mut self, binding_id: ScopedDefinitionId) {
pub(super) fn record_binding(
&mut self,
binding_id: ScopedDefinitionId,
visibility_constraint: ScopedVisibilityConstraintId,
) {
debug_assert_ne!(binding_id, ScopedDefinitionId::UNBOUND);
self.bindings.record_binding(binding_id);
self.bindings
.record_binding(binding_id, visibility_constraint);
}

/// Add given constraint to all live bindings.
Expand Down Expand Up @@ -557,15 +565,21 @@ mod tests {
#[test]
fn with() {
let mut sym = SymbolState::undefined(ScopedVisibilityConstraintId::ALWAYS_TRUE);
sym.record_binding(ScopedDefinitionId::from_u32(1));
sym.record_binding(
ScopedDefinitionId::from_u32(1),
ScopedVisibilityConstraintId::ALWAYS_TRUE,
);

assert_bindings(&sym, &["1<>"]);
}

#[test]
fn record_constraint() {
let mut sym = SymbolState::undefined(ScopedVisibilityConstraintId::ALWAYS_TRUE);
sym.record_binding(ScopedDefinitionId::from_u32(1));
sym.record_binding(
ScopedDefinitionId::from_u32(1),
ScopedVisibilityConstraintId::ALWAYS_TRUE,
);
sym.record_constraint(ScopedConstraintId::from_u32(0));

assert_bindings(&sym, &["1<0>"]);
Expand All @@ -577,11 +591,17 @@ mod tests {

// merging the same definition with the same constraint keeps the constraint
let mut sym1a = SymbolState::undefined(ScopedVisibilityConstraintId::ALWAYS_TRUE);
sym1a.record_binding(ScopedDefinitionId::from_u32(1));
sym1a.record_binding(
ScopedDefinitionId::from_u32(1),
ScopedVisibilityConstraintId::ALWAYS_TRUE,
);
sym1a.record_constraint(ScopedConstraintId::from_u32(0));

let mut sym1b = SymbolState::undefined(ScopedVisibilityConstraintId::ALWAYS_TRUE);
sym1b.record_binding(ScopedDefinitionId::from_u32(1));
sym1b.record_binding(
ScopedDefinitionId::from_u32(1),
ScopedVisibilityConstraintId::ALWAYS_TRUE,
);
sym1b.record_constraint(ScopedConstraintId::from_u32(0));

sym1a.merge(sym1b, &mut visibility_constraints);
Expand All @@ -590,11 +610,17 @@ mod tests {

// merging the same definition with differing constraints drops all constraints
let mut sym2a = SymbolState::undefined(ScopedVisibilityConstraintId::ALWAYS_TRUE);
sym2a.record_binding(ScopedDefinitionId::from_u32(2));
sym2a.record_binding(
ScopedDefinitionId::from_u32(2),
ScopedVisibilityConstraintId::ALWAYS_TRUE,
);
sym2a.record_constraint(ScopedConstraintId::from_u32(1));

let mut sym1b = SymbolState::undefined(ScopedVisibilityConstraintId::ALWAYS_TRUE);
sym1b.record_binding(ScopedDefinitionId::from_u32(2));
sym1b.record_binding(
ScopedDefinitionId::from_u32(2),
ScopedVisibilityConstraintId::ALWAYS_TRUE,
);
sym1b.record_constraint(ScopedConstraintId::from_u32(2));

sym2a.merge(sym1b, &mut visibility_constraints);
Expand All @@ -603,7 +629,10 @@ mod tests {

// merging a constrained definition with unbound keeps both
let mut sym3a = SymbolState::undefined(ScopedVisibilityConstraintId::ALWAYS_TRUE);
sym3a.record_binding(ScopedDefinitionId::from_u32(3));
sym3a.record_binding(
ScopedDefinitionId::from_u32(3),
ScopedVisibilityConstraintId::ALWAYS_TRUE,
);
sym3a.record_constraint(ScopedConstraintId::from_u32(3));

let sym2b = SymbolState::undefined(ScopedVisibilityConstraintId::ALWAYS_TRUE);
Expand Down
Loading