Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 98 additions & 17 deletions crates/ruff_benchmark/benches/red_knot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ use ruff_python_ast::PythonVersion;
struct Case {
db: ProjectDatabase,
fs: MemoryFileSystem,
re: File,
re_path: SystemPathBuf,
file: File,
file_path: SystemPathBuf,
}

// "https://raw.githubusercontent.com/python/cpython/8e8a4baf652f6e1cee7acde9d78c4b6154539748/Lib/tomllib";
Expand Down Expand Up @@ -59,7 +59,7 @@ type KeyDiagnosticFields = (
Severity,
);

static EXPECTED_DIAGNOSTICS: &[KeyDiagnosticFields] = &[(
static EXPECTED_TOMLLIB_DIAGNOSTICS: &[KeyDiagnosticFields] = &[(
DiagnosticId::lint("unused-ignore-comment"),
Some("/src/tomllib/_parser.py"),
Some(22299..22333),
Expand All @@ -71,7 +71,7 @@ fn tomllib_path(file: &TestFile) -> SystemPathBuf {
SystemPathBuf::from("src").join(file.name())
}

fn setup_case() -> Case {
fn setup_tomllib_case() -> Case {
let system = TestSystem::default();
let fs = system.memory_file_system().clone();

Expand Down Expand Up @@ -112,8 +112,8 @@ fn setup_case() -> Case {
Case {
db,
fs,
re,
re_path,
file: re,
file_path: re_path,
}
}

Expand All @@ -135,16 +135,19 @@ fn setup_rayon() {

fn benchmark_incremental(criterion: &mut Criterion) {
fn setup() -> Case {
let case = setup_case();
let case = setup_tomllib_case();

let result: Vec<_> = case.db.check().unwrap();

assert_diagnostics(&case.db, &result);
assert_diagnostics(&case.db, &result, EXPECTED_TOMLLIB_DIAGNOSTICS);

case.fs
.write_file_all(
&case.re_path,
format!("{}\n# A comment\n", source_text(&case.db, case.re).as_str()),
&case.file_path,
format!(
"{}\n# A comment\n",
source_text(&case.db, case.file).as_str()
),
)
.unwrap();

Expand All @@ -156,15 +159,15 @@ fn benchmark_incremental(criterion: &mut Criterion) {

db.apply_changes(
vec![ChangeEvent::Changed {
path: case.re_path.clone(),
path: case.file_path.clone(),
kind: ChangedKind::FileContent,
}],
None,
);

let result = db.check().unwrap();

assert_eq!(result.len(), EXPECTED_DIAGNOSTICS.len());
assert_eq!(result.len(), EXPECTED_TOMLLIB_DIAGNOSTICS.len());
}

setup_rayon();
Expand All @@ -179,20 +182,20 @@ fn benchmark_cold(criterion: &mut Criterion) {

criterion.bench_function("red_knot_check_file[cold]", |b| {
b.iter_batched_ref(
setup_case,
setup_tomllib_case,
|case| {
let Case { db, .. } = case;
let result: Vec<_> = db.check().unwrap();

assert_diagnostics(db, &result);
assert_diagnostics(db, &result, EXPECTED_TOMLLIB_DIAGNOSTICS);
},
BatchSize::SmallInput,
);
});
}

#[track_caller]
fn assert_diagnostics(db: &dyn Db, diagnostics: &[Diagnostic]) {
fn assert_diagnostics(db: &dyn Db, diagnostics: &[Diagnostic], expected: &[KeyDiagnosticFields]) {
let normalized: Vec<_> = diagnostics
.iter()
.map(|diagnostic| {
Expand All @@ -211,8 +214,86 @@ fn assert_diagnostics(db: &dyn Db, diagnostics: &[Diagnostic]) {
)
})
.collect();
assert_eq!(&normalized, EXPECTED_DIAGNOSTICS);
assert_eq!(&normalized, expected);
}

fn setup_micro_case(code: &str) -> Case {
let system = TestSystem::default();
let fs = system.memory_file_system().clone();

let file_path = "src/test.py";
fs.write_file_all(
SystemPathBuf::from(file_path),
ruff_python_trivia::textwrap::dedent(code),
)
.unwrap();

let src_root = SystemPath::new("/src");
let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap();
metadata.apply_cli_options(Options {
environment: Some(EnvironmentOptions {
python_version: Some(RangedValue::cli(PythonVersion::PY312)),
..EnvironmentOptions::default()
}),
..Options::default()
});

let mut db = ProjectDatabase::new(metadata, system).unwrap();
let file = system_path_to_file(&db, SystemPathBuf::from(file_path)).unwrap();

db.project()
.set_open_files(&mut db, FxHashSet::from_iter([file]));

let file_path = file.path(&db).as_system_path().unwrap().to_owned();

Case {
db,
fs,
file,
file_path,
}
}

fn benchmark_many_string_assignments(criterion: &mut Criterion) {
setup_rayon();

criterion.bench_function("red_knot_micro[many_string_assignments]", |b| {
b.iter_batched_ref(
|| {
setup_micro_case(
r#"
def f(x) -> str:
s = ""
if x.attr1:
s += "attr1"
if x.attr2:
s += "attr2"
if x.attr3:
s += "attr3"
if x.attr4:
s += "attr4"
if x.attr5:
s += "attr5"
if x.attr6:
s += "attr6"
if x.attr7:
s += "attr7"
if x.attr8:
s += "attr8"
return s
"#,
)
},
|case| {
let Case { db, .. } = case;
let result = db.check().unwrap();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with the existing benchmark setup here, and I see that we do the same thing for benchmark_cold, so this is more of a question: how can we be sure that we are actually re-inferring types in this call here (instead of relying on salsa-cached values)?

Copy link
Contributor Author

@carljm carljm Apr 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not an expert on this benchmark setup either, but I think the answer is our use of iter_batched_ref, which (per the documentation) treats the input data from the first closure as non-reusable. So it generates a batch of input data by running the first closure (setup) multiple times, then runs the second closure (timed) for each input in the batch. This means that each timed execution of the "check" is running on a brand-new separate Salsa db, with a new memory file system, etc -- they are fully isolated.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it generates a batch of input data by running the first closure (setup) multiple times, then runs the second closure (timed) for each input in the batch.

Oh — thanks for the explanation. I was wrongly assuming the "setup" to only run once for some reason (same terminology but different functionality in other benchmarking libraries).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Criterion offers both; if the artifacts from the setup are reusable and only need to be created once, you use iter (this is the more common/basic case), if the setup artifacts are non-reusable and need to be generated once per execution, then you use iter_batched/iter_batched_ref. (I don't find those method names very inherently clear, but the docs are useful.)

assert_eq!(result.len(), 0);
},
BatchSize::SmallInput,
);
});
}

criterion_group!(check_file, benchmark_cold, benchmark_incremental);
criterion_main!(check_file);
criterion_group!(micro, benchmark_many_string_assignments);
criterion_main!(check_file, micro);
Loading