diff --git a/.github/labeler.yml b/.github/labeler.yml index 1195ee4a14d2..9ef0a5a3c742 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -5,9 +5,20 @@ # # Note that we enable dot, so "**" matches all files in a folder -Z-BenchCI: +Z-EndToEndBenchCI: - any: - changed-files: - - any-glob-to-any-file: ['kani-compiler/**', 'kani-driver/src/call-*', 'cprover_bindings/**', 'library/**'] + - any-glob-to-any-file: ['kani-compiler/**', 'kani-driver/src/call_*', 'cprover_bindings/**', 'library/**'] - any-glob-to-any-file: ['rust-toolchain.toml', 'Cargo.lock'] - any-glob-to-any-file: ['kani-dependencies'] + +Z-CompilerBenchCI: +- any: + # we want to run compiler benchmarks if: + - changed-files: + # any parts of the compiler change + - any-glob-to-any-file: ['kani-compiler/**', 'cprover_bindings/**', 'library/**'] + # the way we call the compiler changes + - any-glob-to-any-file: ['kani-driver/src/call_cargo.rs', 'kani-driver/src/call_single_file.rs'] + # or if our dependencies change + - any-glob-to-any-file: ['rust-toolchain.toml', 'Cargo.lock'] \ No newline at end of file diff --git a/.github/workflows/bench-compiler.yml b/.github/workflows/bench-compiler.yml new file mode 100644 index 000000000000..c7dbe7d88747 --- /dev/null +++ b/.github/workflows/bench-compiler.yml @@ -0,0 +1,140 @@ +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT +# +# Run performance benchmarks comparing the compiler performance of two different Kani versions. +name: Kani Compiler Performance Benchmarks +on: + push: + branches: + - 'main' + workflow_call: + +jobs: + compile-timer-short: + runs-on: ubuntu-24.04 + steps: + - name: Save push event HEAD and HEAD~ to environment variables + if: ${{ github.event_name == 'push' }} + run: | + echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV" + echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV" + - name: Save pull request HEAD and base to environment variables + if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }} + run: | + echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV" + echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV" + - name: Check out Kani (old variant) + uses: actions/checkout@v4 + with: + path: ./old + ref: ${{ env.OLD_REF }} + fetch-depth: 2 + + - name: Check out Kani (new variant) + uses: actions/checkout@v4 + with: + path: ./new + ref: ${{ env.NEW_REF }} + fetch-depth: 1 + + - name: Set up Kani Dependencies (old variant) + uses: ./old/.github/actions/setup + with: + os: ubuntu-24.04 + kani_dir: old + + - name: Set up Kani Dependencies (new variant) + uses: ./new/.github/actions/setup + with: + os: ubuntu-24.04 + kani_dir: new + + - name: Copy benchmarks from new to old + run: rm -rf ./old/tests/perf ; cp -r ./new/tests/perf ./old/tests/ + + - name: Build `compile-timer` in old + run: cd old/tools/compile-timer && cargo build --release + - name: Build `kani` in old + run: cd old && cargo build-dev --release + + - name: Build `compile-timer` in new + run: cd new/tools/compile-timer && cargo build --release + - name: Build `kani` in new + run: cd new && cargo build-dev --release + + - name: Run `compile-timer` on old + run: | + export PATH="${{ github.workspace }}/old/scripts:$PATH" + cd old/tests/perf && ../../target/release/compile-timer --out-path compile-times-old.json --ignore kani-lib --ignore display_trait --ignore s2n-quic + - name: Run `compile-timer` on new + run: | + export PATH="${{ github.workspace }}/new/scripts:$PATH" + cd new/tests/perf && ../../target/release/compile-timer --out-path compile-times-new.json --ignore kani-lib --ignore display_trait --ignore s2n-quic + - name: Run analysis between the two + run: ./new/target/release/compile-analyzer --path-pre old/tests/perf/compile-times-old.json --path-post new/tests/perf/compile-times-new.json --only-markdown --suite-name short >> "$GITHUB_STEP_SUMMARY" + + compile-timer-long: + runs-on: ubuntu-24.04 + steps: + - name: Save push event HEAD and HEAD~ to environment variables + if: ${{ github.event_name == 'push' }} + run: | + echo "NEW_REF=${{ github.event.after}}" | tee -a "$GITHUB_ENV" + echo "OLD_REF=${{ github.event.before }}" | tee -a "$GITHUB_ENV" + - name: Save pull request HEAD and base to environment variables + if: ${{ contains(fromJSON('["pull_request", "pull_request_target"]'), github.event_name) }} + run: | + echo "OLD_REF=${{ github.event.pull_request.base.sha }}" | tee -a "$GITHUB_ENV" + echo "NEW_REF=${{ github.event.pull_request.head.sha }}" | tee -a "$GITHUB_ENV" + + - name: Check out Kani (old variant) + uses: actions/checkout@v4 + with: + path: ./old + ref: ${{ env.OLD_REF }} + fetch-depth: 2 + + - name: Check out Kani (new variant) + uses: actions/checkout@v4 + with: + path: ./new + ref: ${{ env.NEW_REF }} + fetch-depth: 1 + + - name: Set up Kani Dependencies (old variant) + uses: ./old/.github/actions/setup + with: + os: ubuntu-24.04 + kani_dir: old + + - name: Set up Kani Dependencies (new variant) + uses: ./new/.github/actions/setup + with: + os: ubuntu-24.04 + kani_dir: new + + # Ensures that a PR changing the benchmarks will have the new benchmarks run + # for both commits. + - name: Copy benchmarks from new to old + run: rm -rf ./old/tests/perf ; cp -r ./new/tests/perf ./old/tests/ + + - name: Build `compile-timer` in old + run: cd old/tools/compile-timer && cargo build --release + - name: Build `kani` in old + run: cd old && cargo build-dev --release + + - name: Build `compile-timer` in new + run: cd new/tools/compile-timer && cargo build --release + - name: Build `kani` in new + run: cd new && cargo build-dev --release + + - name: Run `compile-timer` on old + run: | + export PATH="${{ github.workspace }}/old/scripts:$PATH" + cd old/tests/perf/s2n-quic && ../../../target/release/compile-timer --out-path compile-times-old.json --also-visit quic/s2n-quic-core --also-visit quic/s2n-quic-platform --also-visit common/s2n-codec --skip-current + - name: Run `compile-timer` on new + run: | + export PATH="${{ github.workspace }}/new/scripts:$PATH" + cd new/tests/perf/s2n-quic && ../../../target/release/compile-timer --out-path compile-times-new.json --also-visit quic/s2n-quic-core --also-visit quic/s2n-quic-platform --also-visit common/s2n-codec --skip-current + - name: Run analysis between the two + run: ./new/target/release/compile-analyzer --path-pre old/tests/perf/s2n-quic/compile-times-old.json --path-post new/tests/perf/s2n-quic/compile-times-new.json --only-markdown --suite-name long >> "$GITHUB_STEP_SUMMARY" \ No newline at end of file diff --git a/.github/workflows/bench.yml b/.github/workflows/bench-e2e.yml similarity index 98% rename from .github/workflows/bench.yml rename to .github/workflows/bench-e2e.yml index dcd5b5004f2f..954b84c727c4 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench-e2e.yml @@ -6,7 +6,7 @@ # This workflow will run when: # - Changes are pushed to 'main'. # - Triggered by another workflow -name: Kani Performance Benchmarks +name: Kani End-To-End Performance Benchmarks on: push: branches: diff --git a/.github/workflows/extra_jobs.yml b/.github/workflows/extra_jobs.yml index ddf242ba3956..416fe94072db 100644 --- a/.github/workflows/extra_jobs.yml +++ b/.github/workflows/extra_jobs.yml @@ -43,9 +43,16 @@ jobs: with: dot: true - verification-bench: - name: Verification Benchmarks + end-to-end-bench: + name: End-to-End Benchmarks needs: auto-label permissions: {} - if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-BenchCI') && github.event_name != 'merge_group' }} - uses: ./.github/workflows/bench.yml + if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-EndToEndBenchCI') && github.event_name != 'merge_group' }} + uses: ./.github/workflows/bench-e2e.yml + + compiler-bench: + name: Compiler Benchmarks + needs: auto-label + permissions: {} + if: ${{ contains(needs.auto-label.outputs.all-labels, 'Z-CompilerBenchCI') && github.event_name != 'merge_group' }} + uses: ./.github/workflows/bench-compiler.yml diff --git a/Cargo.lock b/Cargo.lock index 095a517289a3..39d0be596007 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -338,9 +338,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" dependencies = [ "clap_builder", "clap_derive", @@ -348,9 +348,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" dependencies = [ "anstream", "anstyle", @@ -360,9 +360,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.32" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" dependencies = [ "heck", "proc-macro2", @@ -403,6 +403,15 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "compile-timer" +version = "0.1.0" +dependencies = [ + "clap", + "serde", + "serde_json", +] + [[package]] name = "compiletest" version = "0.0.0" diff --git a/Cargo.toml b/Cargo.toml index 62d572f35a3a..5f5564e993cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ members = [ "tools/build-kani", "tools/kani-cov", "tools/scanner", + "tools/compile-timer", "kani-driver", "kani-compiler", "kani_metadata", diff --git a/kani-driver/src/call_cargo.rs b/kani-driver/src/call_cargo.rs index 9cba9c940979..f669612c5960 100644 --- a/kani-driver/src/call_cargo.rs +++ b/kani-driver/src/call_cargo.rs @@ -357,7 +357,13 @@ crate-type = ["lib"] && t1.doc == t2.doc) } + let compile_start = std::time::Instant::now(); let artifacts = self.run_build(cargo_cmd)?; + if std::env::var("TIME_COMPILER").is_ok() { + // conditionally print the compilation time for debugging & use by `compile-timer` + // doesn't just use the existing `--debug` flag because the number of prints significantly affects performance + println!("BUILT {} IN {:?}μs", target.name, compile_start.elapsed().as_micros()); + } debug!(?artifacts, "run_build_target"); // We generate kani specific artifacts only for the build target. The build target is diff --git a/tools/benchcomp/benchcomp/visualizers/__init__.py b/tools/benchcomp/benchcomp/visualizers/__init__.py index 865386900639..8963e444e7a4 100644 --- a/tools/benchcomp/benchcomp/visualizers/__init__.py +++ b/tools/benchcomp/benchcomp/visualizers/__init__.py @@ -265,12 +265,17 @@ def _get_template(): Scatterplot axis ranges are {{ d["scaled_metrics"][metric]["min_value"] }} (bottom/left) to {{ d["scaled_metrics"][metric]["max_value"] }} (top/right). {% endif -%} +
Breakdown by harness + | Benchmark | {% for variant in d["variants"][metric] %} {{ variant }} |{% endfor %} | --- |{% for variant in d["variants"][metric] %} --- |{% endfor -%} {% for bench_name, bench_variants in benchmarks.items () %} | {{ bench_name }} {% for variant in d["variants"][metric] -%} | {{ bench_variants[variant] }} {% endfor %}| {%- endfor %} + +
+ {% endfor -%} """) diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py index ccf2259f7f0b..00256aa891ab 100644 --- a/tools/benchcomp/test/test_regression.py +++ b/tools/benchcomp/test/test_regression.py @@ -477,18 +477,28 @@ def test_markdown_results_table(self): ``` Scatterplot axis ranges are 5 (bottom/left) to 10 (top/right). +
Breakdown by harness + | Benchmark | variant_1 | variant_2 | ratio | | --- | --- | --- | --- | | bench_1 | 5 | 10 | **2.0** | | bench_2 | 10 | 5 | 0.5 | +
+ + ## success +
Breakdown by harness + | Benchmark | variant_1 | variant_2 | notes | | --- | --- | --- | --- | | bench_1 | True | True | | | bench_2 | True | False | regressed | | bench_3 | False | True | newly passing | + +
+ """)) diff --git a/tools/compile-timer/Cargo.toml b/tools/compile-timer/Cargo.toml new file mode 100644 index 000000000000..f826d25f842c --- /dev/null +++ b/tools/compile-timer/Cargo.toml @@ -0,0 +1,24 @@ +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT + +[package] +name = "compile-timer" +version = "0.1.0" +edition = "2024" +license = "MIT OR Apache-2.0" + +[dependencies] +clap = { version = "4.5.40", features = ["derive"] } +serde = {version = "1.0.219", features = ["derive"]} +serde_json = "1.0.140" + +[[bin]] +name = "compile-timer" +path = "src/compile-timer.rs" + +[[bin]] +name = "compile-analyzer" +path = "src/compile-analyzer.rs" + +[lints] +workspace = true \ No newline at end of file diff --git a/tools/compile-timer/README.md b/tools/compile-timer/README.md new file mode 100644 index 000000000000..de9821f91e88 --- /dev/null +++ b/tools/compile-timer/README.md @@ -0,0 +1,19 @@ +# Compile-Timer +This is a simple script for timing the Kani compiler's end-to-end performance on crates. + +## Setup +You can run it by first compiling Kani (with `cargo build-dev --release` in the project root), then building this script (with `cargo build --release` in this `compile-timer` directory). This will build new `compile-timer` & `compile-analyzer` binaries in `kani/target/release`. + +## Recording Compiler Times with `compile-timer` +After doing that, you should make sure you have Kani on your $PATH (see instructions [here](https://model-checking.github.io/kani/build-from-source.html#adding-kani-to-your-path)) after which you can run `compile-timer --out-path [OUT_JSON_FILE]` in any directory to profile the compiler's performance on it. + +By default, the script recursively goes into directories and will use `cargo kani` to profile any Rust projects it encounters (which it determines by looking for a `Cargo.toml`). You can tell it to ignore specific subtrees by passing in the `--ignore [DIR_NAME]` flag. + +## Visualizing Compiler Times with `compile-analyzer` +`compile-timer` itself will have some debug output including each individual run's time and aggregates for each crate. + +`compile-analyzer` is specifically for comparing performance across multiple commits. + +Once you've run `compile-timer` on both commits, you can run `compile-analyzer --path-pre [FIRST_JSON_FILE] --path-post [SECOND_JSON_FILE]` to see the change in performance going from the first to second commit. + +By default, `compile-analyzer` will just print to the console, but if you specify the `--only-markdown` option, it's output will be formatted for GitHub flavored markdown (as is useful in CI). \ No newline at end of file diff --git a/tools/compile-timer/src/common.rs b/tools/compile-timer/src/common.rs new file mode 100644 index 000000000000..b65ed0de79e8 --- /dev/null +++ b/tools/compile-timer/src/common.rs @@ -0,0 +1,61 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +#![allow(dead_code)] +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; +use std::time::Duration; + +#[derive(Debug, Serialize, Deserialize)] +pub struct AggrResult { + pub krate: PathBuf, + pub krate_trimmed_path: String, + /// the stats for only the 25th-75th percentile of runs on this crate, i.e., the interquartile range + pub iqr_stats: Stats, + /// the stats for all runs on this crate + full_stats: Stats, +} + +pub fn krate_trimmed_path(krate: &Path) -> String { + format!( + "{:?}", + krate + .canonicalize() + .unwrap() + .strip_prefix(std::env::current_dir().unwrap().parent().unwrap()) + .unwrap() + ) +} + +impl AggrResult { + pub fn new(krate: PathBuf, iqr_stats: Stats, full_stats: Stats) -> Self { + AggrResult { krate_trimmed_path: krate_trimmed_path(&krate), krate, iqr_stats, full_stats } + } + + pub fn full_std_dev(&self) -> Duration { + self.full_stats.std_dev + } + + pub fn iqr(&self) -> Duration { + self.iqr_stats.range.1 - self.iqr_stats.range.0 + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Stats { + pub avg: Duration, + pub std_dev: Duration, + pub range: (Duration, Duration), +} + +/// Sum the IQR averages and IQR standard deviations respectively for all crates timed. +pub fn aggregate_aggregates(info: &[AggrResult]) -> (Duration, Duration) { + for i in info { + println!("krate {:?} -- {:?}", i.krate, i.iqr_stats.avg); + } + + (info.iter().map(|i| i.iqr_stats.avg).sum(), info.iter().map(|i| i.iqr_stats.std_dev).sum()) +} + +pub fn fraction_of_duration(dur: Duration, frac: f64) -> Duration { + Duration::from_nanos(((dur.as_nanos() as f64) * frac) as u64) +} diff --git a/tools/compile-timer/src/compile-analyzer.rs b/tools/compile-timer/src/compile-analyzer.rs new file mode 100644 index 000000000000..416065131734 --- /dev/null +++ b/tools/compile-timer/src/compile-analyzer.rs @@ -0,0 +1,240 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +#![allow(dead_code)] +use crate::common::{AggrResult, fraction_of_duration}; +use clap::Parser; +use serde_json::Deserializer; +use std::{cmp::max, fs::File, io, path::PathBuf, time::Duration}; +mod common; + +// Constants for detecting 'significant' regressions + +/// The fractional of a sample's standard deviation that it can regress by +/// without being considered a significant regression. +const FRAC_STD_DEV_THRESHOLD: f64 = 2.0; // In this case, 2x the average std deviation. + +/// The fractional amount a run can regress by without it being considered a significant regression. +const FRAC_ABSOLUTE_THRESHOLD: f64 = 0.05; // In this case, 5% of the initial time. + +#[derive(Debug, Parser)] +#[command(version, about, long_about = None)] +struct AnalyzerArgs { + #[arg(short, long, value_name = "FILE")] + path_pre: PathBuf, + + #[arg(short, long, value_name = "FILE")] + path_post: PathBuf, + + #[arg(short, long)] + /// The test suite name to display as part of the output's title + suite_name: Option, + + /// Output results in markdown format + #[arg(short, long)] + only_markdown: bool, +} + +fn main() { + let args = AnalyzerArgs::parse(); + + let (pre_file, post_file) = try_read_files(&args).unwrap(); + + let (pre_ser, post_ser) = + (Deserializer::from_reader(pre_file), Deserializer::from_reader(post_file)); + + let pre_results = pre_ser.into_iter::().collect::>(); + let post_results = post_ser.into_iter::().collect::>(); + + let mut results = pre_results + .into_iter() + .filter_map(Result::ok) + .zip(post_results.into_iter().filter_map(Result::ok)) + .collect::>(); + + sort_results(&mut results); + + if args.only_markdown { + print_markdown(results.as_slice(), args.suite_name); + } else { + print_to_terminal(results.as_slice()); + } +} + +/// Sort results based on percentage change, with high magnitude regressions first, then low +/// magnitude regressions, low magnitude improvements and finally high magnitude improvements. +fn sort_results(results: &mut [(AggrResult, AggrResult)]) { + results.sort_by_key(|a| { + -(signed_percent_diff(&a.0.iqr_stats.avg, &a.1.iqr_stats.avg).abs() * 1000_f64) as i64 + }); +} + +/// Print results in a markdown format (for GitHub actions). +fn print_markdown(results: &[(AggrResult, AggrResult)], suite_name: Option) { + let suite_text = if let Some(suite_name) = suite_name { + format!(" (`{suite_name}` suite)") + } else { + "".to_string() + }; + println!("# Compiletime Results{suite_text}"); + let total_pre = results.iter().map(|i| i.0.iqr_stats.avg).sum(); + let total_post = results.iter().map(|i| i.1.iqr_stats.avg).sum(); + println!( + "### *on the whole: {:.2?} → {:.2?} —* {}", + total_pre, + total_post, + diff_string(total_pre, total_post) + ); + // Note that we have to call the fourth column "heterogeneousness" because the color-formatted + // diff will cut off if the column isn't wide enough for it, so verbosity is required. + println!( + "| test crate | old compile time | new compile time | heterogeneousness (percentage difference) | verdict |" + ); + println!("| - | - | - | - | - |"); + let regressions = results + .iter() + .map(|(pre_res, post_res)| { + assert!(pre_res.krate_trimmed_path == post_res.krate_trimmed_path); + let pre_time = pre_res.iqr_stats.avg; + let post_time = post_res.iqr_stats.avg; + + let verdict = verdict_on_change(pre_res, post_res); + // emphasize output of crate name if it had a suspected regression + let emph = if verdict.is_regression() { "**" } else { "" }; + println!( + "| {emph}{}{emph} | {:.2?} | {:.2?} | {} | {:?} |", + pre_res.krate_trimmed_path, + pre_time, + post_time, + diff_string(pre_time, post_time), + verdict + ); + (&pre_res.krate_trimmed_path, verdict) + }) + .filter_map( + |(krate, verdict)| if verdict.is_regression() { Some(krate.clone()) } else { None }, + ) + .collect::>(); + + let footnote_number = 1; + println!( + "\n[^{footnote_number}]: threshold: max({FRAC_STD_DEV_THRESHOLD} x std_dev, {FRAC_ABSOLUTE_THRESHOLD} x initial_time)." + ); + + if regressions.is_empty() { + println!("## No suspected regressions[^{footnote_number}]!"); + } else { + println!( + "## Failing because of {} suspected regressions[^{footnote_number}]:", + regressions.len() + ); + println!("{}", regressions.join(", ")); + std::process::exit(1); + } +} + +/// Print results for a terminal output. +fn print_to_terminal(results: &[(AggrResult, AggrResult)]) { + let krate_column_len = results + .iter() + .map(|(a, b)| max(a.krate_trimmed_path.len(), b.krate_trimmed_path.len())) + .max() + .unwrap(); + + for (pre_res, post_res) in results { + assert!(pre_res.krate == post_res.krate); + let pre_time = pre_res.iqr_stats.avg; + let post_time = post_res.iqr_stats.avg; + + let change_dir = if post_time > pre_time { + "↑" + } else if post_time == pre_time { + "-" + } else { + "↓" + }; + let change_amount = (pre_time.abs_diff(post_time).as_micros() as f64 + / post_time.as_micros() as f64) + * 100_f64; + + println!( + "krate {:krate_column_len$} -- [{:.2?} => {:.2?} ({change_dir}{change_amount:5.2}%)] {:?}", + pre_res.krate_trimmed_path, + pre_time, + post_time, + verdict_on_change(pre_res, post_res) + ); + } +} + +/// Classify a change into a [Verdict], determining whether it was an improvement, regression, +/// or likely just noise based on provided thresholds. +fn verdict_on_change(pre: &AggrResult, post: &AggrResult) -> Verdict { + let (pre_time, post_time) = (pre.iqr_stats.avg, post.iqr_stats.avg); + + if post_time.abs_diff(pre_time) < fraction_of_duration(pre_time, FRAC_ABSOLUTE_THRESHOLD) { + return Verdict::ProbablyNoise(NoiseExplanation::SmallPercentageChange); + } + + let avg_std_dev = (pre.full_std_dev() + post.full_std_dev()) / 2; + if post_time.abs_diff(pre_time) < fraction_of_duration(avg_std_dev, FRAC_STD_DEV_THRESHOLD) { + return Verdict::ProbablyNoise(NoiseExplanation::SmallComparedToStdDevOf(avg_std_dev)); + } + + if pre.iqr_stats.avg > post.iqr_stats.avg { + return Verdict::Improved; + } + + Verdict::PotentialRegression { sample_std_dev: avg_std_dev } +} + +fn signed_percent_diff(pre: &Duration, post: &Duration) -> f64 { + let change_amount = (pre.abs_diff(*post).as_micros() as f64 / pre.as_micros() as f64) * 100_f64; + if post < pre { -change_amount } else { change_amount } +} + +fn diff_string(pre: Duration, post: Duration) -> String { + let change_dir = if post > pre { + "$\\color{red}\\textsf{↑ " + } else if post == pre { + "$\\color{black}\\textsf{- " + } else { + "$\\color{green}\\textsf{↓ " + }; + let change_amount = signed_percent_diff(&pre, &post).abs(); + format!("{change_dir}{:.2?} ({change_amount:.2}\\\\%)}}$", pre.abs_diff(post)) +} + +#[derive(Debug)] +enum Verdict { + /// This crate now compiles faster! + Improved, + /// This crate compiled slower, but likely because of OS noise. + ProbablyNoise(NoiseExplanation), + /// This crate compiled slower, potentially indicating a true performance regression. + PotentialRegression { sample_std_dev: std::time::Duration }, +} + +#[derive(Debug)] +/// The reason a regression was flagged as likely noise rather than a true performance regression. +enum NoiseExplanation { + /// The increase in compile time is so small compared to the + /// sample's standard deviation (< [FRAC_STD_DEV_THRESHOLD] * std_dev) + /// that it is probably just sampling noise. + SmallComparedToStdDevOf(std::time::Duration), + /// The percentage increase in compile time is so small (< [FRAC_ABSOLUTE_THRESHOLD]), + /// the difference is likely insignificant. + SmallPercentageChange, +} + +impl Verdict { + fn is_regression(&self) -> bool { + matches!(self, Verdict::PotentialRegression { sample_std_dev: _ }) + } +} + +fn try_read_files(c: &AnalyzerArgs) -> io::Result<(File, File)> { + io::Result::Ok(( + File::open(c.path_pre.canonicalize()?)?, + File::open(c.path_post.canonicalize()?)?, + )) +} diff --git a/tools/compile-timer/src/compile-timer.rs b/tools/compile-timer/src/compile-timer.rs new file mode 100644 index 000000000000..4c7799d40ff4 --- /dev/null +++ b/tools/compile-timer/src/compile-timer.rs @@ -0,0 +1,184 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#![feature(exit_status_error)] + +use crate::common::{AggrResult, Stats, aggregate_aggregates, krate_trimmed_path}; +use clap::Parser; +use serde::Serialize; +use std::fs::File; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Duration; +mod common; + +#[derive(Debug, Parser)] +#[command(version, about, long_about = None)] +struct TimerArgs { + /// Sets a custom config file + #[arg(short, long, value_name = "FILE")] + out_path: PathBuf, + + /// Ignore compiling the current directory + #[arg(short, long)] + skip_current: bool, + + /// The paths of additional paths to visit beyond the current subtree + #[arg(short, long, value_name = "DIR")] + also_visit: Vec, + + /// The names or paths of files to ignore + #[arg(short, long)] + ignore: Vec, +} + +/// The number of untimed runs to do before starting timed runs. +/// We need at least one warm-up run to make sure crates are fetched & cached in +/// the local `.cargo/registry` folder. Otherwise the first run will be significantly slower. +const WARMUP_RUNS: usize = 1; +const TIMED_RUNS: usize = 10; + +fn main() { + let args = TimerArgs::parse(); + + let (mut to_visit, mut res) = (vec![], vec![]); + to_visit.extend(args.also_visit.into_iter().rev()); + if !args.skip_current { + let current_directory = std::env::current_dir().expect("should be run in a directory"); + to_visit.push(current_directory); + } + + let mut out_ser = serde_json::Serializer::pretty(File::create(&args.out_path).unwrap()); + let run_start = std::time::Instant::now(); + + // recursively visit subdirectories to time the compiler on all rust projects + while let Some(next) = to_visit.pop() { + let next = next.canonicalize().unwrap(); + let path_to_toml = next.join("Cargo.toml"); + + if path_to_toml.exists() && path_to_toml.is_file() { + // in rust crate so we want to profile it + println!("[!] profiling in {}", krate_trimmed_path(&next)); + let new_res = profile_on_crate(&next); + new_res.serialize(&mut out_ser).unwrap(); + res.push(new_res); + } else { + // we want want to recur and visit all directories that aren't explicitly ignored + to_visit.extend(std::fs::read_dir(next).unwrap().filter_map(|entry| { + if let Ok(entry) = entry { + let path = entry.path(); + if path.is_dir() && !args.ignore.iter().any(|ignored| path.ends_with(ignored)) { + return Some(path); + } + } + None + })); + } + } + + println!("[!] total info is {:?}", aggregate_aggregates(&res)); + + print!("\t [*] run took {:?}", run_start.elapsed()); +} + +/// Profile a crate by running a certain number of untimed warmup runs and then +/// a certain number of timed runs, returning aggregates of the timing results. +fn profile_on_crate(absolute_path: &std::path::PathBuf) -> AggrResult { + let _warmup_results = (0..WARMUP_RUNS) + .map(|i| { + print!("\t[*] running warmup {}/{WARMUP_RUNS}", i + 1); + let _ = std::io::stdout().flush(); + let res = run_command_in(absolute_path); + println!(" -- {res:?}"); + res + }) + .collect::>(); + + let timed_results = (0..TIMED_RUNS) + .map(|i| { + print!("\t[*] running timed run {}/{TIMED_RUNS}", i + 1); + let _ = std::io::stdout().flush(); + let res = run_command_in(absolute_path); + println!(" -- {res:?}"); + res + }) + .collect::>(); + + let aggr = aggregate_results(absolute_path, &timed_results); + println!("\t[!] results for {absolute_path:?} are in! {aggr:?}"); + + aggr +} + +type RunResult = Duration; +/// Run `cargo kani` in a crate and parse out the compiler timing info outputted +/// by the `TIME_COMPILER` environment variable. +fn run_command_in(absolute_path: &Path) -> RunResult { + // `cargo clean` first to ensure the compiler is fully run again + let _ = Command::new("cargo") + .current_dir(absolute_path) + .arg("clean") + .stdout(Stdio::null()) + .output() + .expect("cargo clean should succeed"); + + // do the actual compiler run (using `--only-codegen` to save time) + let kani_output = Command::new("cargo") + .current_dir(absolute_path) + .arg("kani") + .arg("--only-codegen") + .env("TIME_COMPILER", "true") + .output() + .expect("cargo kani should succeed"); + + // parse the output bytes into a string + let out_str = String::from_utf8(kani_output.stdout).expect("utf8 conversion should succeed"); + + if !kani_output.status.success() { + println!( + "the `TIME_COMPILER=true cargo kani --only-codegen` command failed in {absolute_path:?} with output -- {out_str:?}" + ); + panic!("cargo kani command failed"); + } + + // parse that string for the compiler build information + // and if it's built multiple times (which could happen in a workspace with multiple crates), + // we just sum up the total time + out_str.split("\n").filter(|line| line.starts_with("BUILT")).map(extract_duration).sum() +} + +fn extract_duration(s: &str) -> Duration { + let s = &s[s.find("IN").unwrap_or(0)..]; + let micros = s.chars().filter(|c| c.is_ascii_digit()).collect::().parse().ok().unwrap(); + + Duration::from_micros(micros) +} + +fn aggregate_results(path: &Path, results: &[Duration]) -> AggrResult { + assert!(results.len() == TIMED_RUNS); + + // sort and calculate the subset of times in the interquartile range + let mut sorted = results.to_vec(); + sorted.sort(); + let iqr_bounds = (results.len() / 4, results.len() * 3 / 4); + let iqr_durations = sorted + .into_iter() + .enumerate() + .filter_map(|(i, v)| if i >= iqr_bounds.0 && i <= iqr_bounds.1 { Some(v) } else { None }) + .collect::>(); + + AggrResult::new(path.to_path_buf(), result_stats(&iqr_durations), result_stats(results)) +} + +/// Record the stats from a subset slice of timing runs. +fn result_stats(results: &[Duration]) -> Stats { + let avg = results.iter().sum::() / results.len().try_into().unwrap(); + let range = (*results.iter().min().unwrap(), *results.iter().max().unwrap()); + + let deviations = results.iter().map(|x| x.abs_diff(avg).as_micros().pow(2)).sum::(); + let std_dev = + Duration::from_micros((deviations / results.len() as u128).isqrt().try_into().unwrap()); + + Stats { avg, std_dev, range } +}