From c6283ac0ec53f70bf6ab78f367b6443c25b5290c Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Wed, 9 Jul 2025 15:22:44 -0700 Subject: [PATCH 1/3] skip codegen based on --harness filters --- kani-compiler/src/args.rs | 10 ++ .../src/kani_middle/codegen_units.rs | 39 ++++++- kani-driver/src/call_single_file.rs | 8 ++ kani-driver/src/main.rs | 2 +- kani-driver/src/metadata.rs | 101 ++++++------------ kani_metadata/src/harness.rs | 46 +++++++- 6 files changed, 134 insertions(+), 72 deletions(-) diff --git a/kani-compiler/src/args.rs b/kani-compiler/src/args.rs index 287e478d7d3c..af9d025e22b0 100644 --- a/kani-compiler/src/args.rs +++ b/kani-compiler/src/args.rs @@ -52,6 +52,16 @@ pub struct Arguments { /// Option name used to use json pretty-print for output files. #[clap(long = "pretty-json-files")] pub output_pretty_json: bool, + /// When specified, the harness filter will only match the exact fully qualified name of a harness. + // (Passed here directly from [CargoKaniArgs] in `args_toml.rs`) + #[arg(long, requires("harnesses"))] + pub exact: bool, + /// If specified, only run harnesses that match this filter. This option can be provided + /// multiple times, which will run all tests matching any of the filters. + /// If used with --exact, the harness filter will only match the exact fully qualified name of a harness. + // (Passed here directly from [CargoKaniArgs] in `args_toml.rs`) + #[arg(long = "harness", num_args(1), value_name = "HARNESS_FILTER")] + pub harnesses: Vec, /// Option used for suppressing global ASM error. #[clap(long)] pub ignore_global_asm: bool, diff --git a/kani-compiler/src/kani_middle/codegen_units.rs b/kani-compiler/src/kani_middle/codegen_units.rs index 9f860bf80156..487993e67261 100644 --- a/kani-compiler/src/kani_middle/codegen_units.rs +++ b/kani-compiler/src/kani_middle/codegen_units.rs @@ -21,7 +21,7 @@ use crate::kani_queries::QueryDb; use fxhash::{FxHashMap, FxHashSet}; use kani_metadata::{ ArtifactType, AssignsContract, AutoHarnessMetadata, AutoHarnessSkipReason, HarnessKind, - HarnessMetadata, KaniMetadata, + HarnessMetadata, KaniMetadata, find_proof_harnesses, }; use regex::RegexSet; use rustc_hir::def_id::DefId; @@ -74,7 +74,11 @@ impl CodegenUnits { let args = queries.args(); match args.reachability_analysis { ReachabilityType::Harnesses => { - let all_harnesses = get_all_manual_harnesses(tcx, base_filename); + let all_harnesses = determine_targets( + get_all_manual_harnesses(tcx, base_filename), + &args.harnesses, + args.exact, + ); // Even if no_stubs is empty we still need to store rustc metadata. let units = group_by_stubs(tcx, &all_harnesses); validate_units(tcx, &units); @@ -82,7 +86,11 @@ impl CodegenUnits { CodegenUnits { units, harness_info: all_harnesses, crate_info } } ReachabilityType::AllFns => { - let mut all_harnesses = get_all_manual_harnesses(tcx, base_filename); + let mut all_harnesses = determine_targets( + get_all_manual_harnesses(tcx, base_filename), + &args.harnesses, + args.exact, + ); let mut units = group_by_stubs(tcx, &all_harnesses); validate_units(tcx, &units); @@ -327,6 +335,31 @@ fn get_all_manual_harnesses( .collect::>() } +/// Filter which harnesses to codegen based on user filters. Shares use of `find_proof_harnesses` with the `determine_targets` function +/// in `kani-driver/src/metadata.rs` to ensure the filter is consistent and thus codegen is always done for the subset of harnesses we want +/// to analyze. +fn determine_targets( + harnesses: HashMap, + harness_filters: &[String], + exact_filter: bool, +) -> HashMap { + // If there are no filters, nothing to do! + if harness_filters.is_empty() { + return harnesses; + } + + // If there are filters, only keep around harnesses that satisfy them. + let mut new_harnesses = harnesses.clone(); + let valid_harnesses = find_proof_harnesses( + &BTreeSet::from_iter(harness_filters.iter()), + harnesses.values(), + exact_filter, + ); + + new_harnesses.retain(|_, metadata| valid_harnesses.contains(&&*metadata)); + new_harnesses +} + /// For each function eligible for automatic verification, /// generate a harness Instance for it, then generate its metadata. /// Note that the body of each harness instance is still the dummy body of `kani_harness_intrinsic`; diff --git a/kani-driver/src/call_single_file.rs b/kani-driver/src/call_single_file.rs index f3e488791ddc..4bb9aa090a6e 100644 --- a/kani-driver/src/call_single_file.rs +++ b/kani-driver/src/call_single_file.rs @@ -151,6 +151,14 @@ impl KaniSession { flags.push("--no-assert-contracts".into()); } + for harness in &self.args.harnesses { + flags.push(format!("--harness {harness}")); + } + + if self.args.exact { + flags.push("--exact".into()); + } + if let Some(args) = self.autoharness_compiler_flags.clone() { flags.extend(args); } diff --git a/kani-driver/src/main.rs b/kani-driver/src/main.rs index 972cbd5dfbd3..82ac9ee1abb5 100644 --- a/kani-driver/src/main.rs +++ b/kani-driver/src/main.rs @@ -129,7 +129,7 @@ fn standalone_main() -> Result<()> { /// Run verification on the given project. fn verify_project(project: Project, session: KaniSession) -> Result<()> { debug!(?project, "verify_project"); - let harnesses = session.determine_targets(&project.get_all_harnesses())?; + let harnesses = session.determine_targets(project.get_all_harnesses())?; debug!(n = harnesses.len(), ?harnesses, "verify_project"); // Verification diff --git a/kani-driver/src/metadata.rs b/kani-driver/src/metadata.rs index 367afbd5331f..9ccf882f50b2 100644 --- a/kani-driver/src/metadata.rs +++ b/kani-driver/src/metadata.rs @@ -3,9 +3,11 @@ use anyhow::{Result, bail}; use std::path::Path; -use tracing::{debug, trace}; +// use tracing::{debug, trace}; -use kani_metadata::{HarnessMetadata, InternedString, TraitDefinedMethod, VtableCtxResults}; +use kani_metadata::{ + HarnessMetadata, InternedString, TraitDefinedMethod, VtableCtxResults, find_proof_harnesses, +}; use std::collections::{BTreeSet, HashMap}; use std::fs::File; use std::io::{BufReader, BufWriter}; @@ -91,39 +93,42 @@ impl KaniSession { /// Determine which function to use as entry point, based on command-line arguments and kani-metadata. pub fn determine_targets<'a>( &self, - all_harnesses: &[&'a HarnessMetadata], + compiler_filtered_harnesses: Vec<&'a HarnessMetadata>, ) -> Result> { - let harnesses = BTreeSet::from_iter(self.args.harnesses.iter()); - let total_harnesses = harnesses.len(); - let all_targets = &harnesses; + let harness_filters = BTreeSet::from_iter(self.args.harnesses.iter()); - if harnesses.is_empty() { - Ok(Vec::from(all_harnesses)) - } else { - let harnesses_found: Vec<&HarnessMetadata> = - find_proof_harnesses(&harnesses, all_harnesses, self.args.exact); - - // If even one harness was not found with --exact, return an error to user - if self.args.exact && harnesses_found.len() < total_harnesses { - let harness_found_names: BTreeSet<&String> = - harnesses_found.iter().map(|&h| &h.pretty_name).collect(); + // For dev builds, re-filter the harnesses to double check filtering in the compiler + // and ensure we're doing the minimal harness codegen possible. That filtering happens in + // the `kani-compiler/src/kani_middle/codegen_units.rs` file's `determine_targets` function. + if cfg!(debug_assertions) && !harness_filters.is_empty() { + let filtered_harnesses: Vec<&HarnessMetadata> = find_proof_harnesses( + &harness_filters, + compiler_filtered_harnesses.clone(), + self.args.exact, + ); + assert_eq!(compiler_filtered_harnesses, filtered_harnesses); + } - // Check which harnesses are missing from the difference of targets and harnesses_found - let harnesses_missing: Vec<&String> = - all_targets.difference(&harness_found_names).cloned().collect(); - let joined_string = harnesses_missing - .iter() - .map(|&s| (*s).clone()) - .collect::>() - .join("`, `"); + // If any of the `--harness` filters failed to find a harness (and thus the # of harnesses is less than the # of filters), report that to the user. + if self.args.exact && (compiler_filtered_harnesses.len() < self.args.harnesses.len()) { + let harness_found_names: BTreeSet<&String> = + compiler_filtered_harnesses.iter().map(|&h| &h.pretty_name).collect(); - bail!( - "Failed to match the following harness(es):\n{joined_string}\nPlease specify the fully-qualified name of a harness.", - ); - } + // Check which harnesses are missing from the difference of targets and all_harnesses + let harnesses_missing: Vec<&String> = + harness_filters.difference(&harness_found_names).cloned().collect(); + let joined_string = harnesses_missing + .iter() + .map(|&s| (*s).clone()) + .collect::>() + .join("`, `"); - Ok(harnesses_found) + bail!( + "Failed to match the following harness(es):\n{joined_string}\nPlease specify the fully-qualified name of a harness.", + ); } + + Ok(compiler_filtered_harnesses) } } @@ -142,44 +147,6 @@ pub fn sort_harnesses_by_loc<'a>(harnesses: &[&'a HarnessMetadata]) -> Vec<&'a H harnesses_clone } -/// Search for a proof harness with a particular name. -/// At the present time, we use `no_mangle` so collisions shouldn't happen, -/// but this function is written to be robust against that changing in the future. -fn find_proof_harnesses<'a>( - targets: &BTreeSet<&String>, - all_harnesses: &[&'a HarnessMetadata], - exact_filter: bool, -) -> Vec<&'a HarnessMetadata> { - debug!(?targets, "find_proof_harness"); - let mut result = vec![]; - for md in all_harnesses.iter() { - // --harnesses should not select automatic harnesses - if md.is_automatically_generated { - continue; - } - if exact_filter { - // Check for exact match only - if targets.contains(&md.pretty_name) { - // if exact match found, stop searching - result.push(*md); - } else { - trace!(skip = md.pretty_name, "find_proof_harnesses"); - } - } else { - // Either an exact match, or a substring match. We check the exact first since it's cheaper. - if targets.contains(&md.pretty_name) - || targets.contains(&md.get_harness_name_unqualified().to_string()) - || targets.iter().any(|target| md.pretty_name.contains(*target)) - { - result.push(*md); - } else { - trace!(skip = md.pretty_name, "find_proof_harnesses"); - } - } - } - result -} - #[cfg(test)] pub mod tests { use super::*; diff --git a/kani_metadata/src/harness.rs b/kani_metadata/src/harness.rs index 7e945b51d67c..ed5d1d9a0cd2 100644 --- a/kani_metadata/src/harness.rs +++ b/kani_metadata/src/harness.rs @@ -3,7 +3,7 @@ use crate::CbmcSolver; use serde::{Deserialize, Serialize}; -use std::path::PathBuf; +use std::{borrow::Borrow, collections::BTreeSet, path::PathBuf}; use strum_macros::Display; /// A CBMC-level `assigns` contract that needs to be enforced on a function. @@ -112,3 +112,47 @@ impl HarnessMetadata { } } } + +/// Search for a proof harness with a particular name. +/// At the present time, we use `no_mangle` so collisions shouldn't happen, +/// but this function is written to be robust against that changing in the future. +pub fn find_proof_harnesses<'a, I>( + targets: &BTreeSet<&String>, + all_harnesses: I, + exact_filter: bool, +) -> Vec<&'a HarnessMetadata> +where + I: IntoIterator, + I::Item: Borrow<&'a HarnessMetadata>, +{ + // debug!(?targets, "find_proof_harness"); + let mut result = vec![]; + for md in all_harnesses.into_iter() { + let md: &'a HarnessMetadata = md.borrow(); + + // --harnesses should not select automatic harnesses + if md.is_automatically_generated { + continue; + } + if exact_filter { + // Check for exact match only + if targets.contains(&md.pretty_name) { + // if exact match found, stop searching + result.push(md); + } else { + // trace!(skip = md.pretty_name, "find_proof_harnesses"); + } + } else { + // Either an exact match, or a substring match. We check the exact first since it's cheaper. + if targets.contains(&md.pretty_name) + || targets.contains(&md.get_harness_name_unqualified().to_string()) + || targets.iter().any(|target| md.pretty_name.contains(*target)) + { + result.push(md); + } else { + // trace!(skip = md.pretty_name, "find_proof_harnesses"); + } + } + } + result +} From 698d196f5ac5d113ba2a69ea37e50dccaae55850 Mon Sep 17 00:00:00 2001 From: Alex Portland <115113415+AlexanderPortland@users.noreply.github.com> Date: Thu, 10 Jul 2025 10:28:07 -0700 Subject: [PATCH 2/3] remove commented-out use stmt --- kani-driver/src/metadata.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/kani-driver/src/metadata.rs b/kani-driver/src/metadata.rs index 9ccf882f50b2..1a71e41d71f4 100644 --- a/kani-driver/src/metadata.rs +++ b/kani-driver/src/metadata.rs @@ -3,7 +3,6 @@ use anyhow::{Result, bail}; use std::path::Path; -// use tracing::{debug, trace}; use kani_metadata::{ HarnessMetadata, InternedString, TraitDefinedMethod, VtableCtxResults, find_proof_harnesses, From c25fb21f4694dc484a5c47feb873953c9dd55967 Mon Sep 17 00:00:00 2001 From: AlexanderPortland Date: Fri, 18 Jul 2025 15:55:23 -0700 Subject: [PATCH 3/3] fix first round of feedback * remove unneeded comment * rename overloaded `harnesses` variable * add `tracing` as a dependency --- Cargo.lock | 1 + kani-compiler/src/kani_middle/codegen_units.rs | 9 ++++----- kani_metadata/Cargo.toml | 1 + kani_metadata/src/harness.rs | 7 ++++--- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 21f6989f9d3c..2b2f64c990ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1199,6 +1199,7 @@ dependencies = [ "serde", "strum", "strum_macros", + "tracing", ] [[package]] diff --git a/kani-compiler/src/kani_middle/codegen_units.rs b/kani-compiler/src/kani_middle/codegen_units.rs index 487993e67261..d6ee6cb03b2d 100644 --- a/kani-compiler/src/kani_middle/codegen_units.rs +++ b/kani-compiler/src/kani_middle/codegen_units.rs @@ -339,20 +339,19 @@ fn get_all_manual_harnesses( /// in `kani-driver/src/metadata.rs` to ensure the filter is consistent and thus codegen is always done for the subset of harnesses we want /// to analyze. fn determine_targets( - harnesses: HashMap, + all_harnesses: HashMap, harness_filters: &[String], exact_filter: bool, ) -> HashMap { - // If there are no filters, nothing to do! if harness_filters.is_empty() { - return harnesses; + return all_harnesses; } // If there are filters, only keep around harnesses that satisfy them. - let mut new_harnesses = harnesses.clone(); + let mut new_harnesses = all_harnesses.clone(); let valid_harnesses = find_proof_harnesses( &BTreeSet::from_iter(harness_filters.iter()), - harnesses.values(), + all_harnesses.values(), exact_filter, ); diff --git a/kani_metadata/Cargo.toml b/kani_metadata/Cargo.toml index bb3b99c4a8c9..1d3ced87bf9a 100644 --- a/kani_metadata/Cargo.toml +++ b/kani_metadata/Cargo.toml @@ -16,6 +16,7 @@ cbmc = { path = "../cprover_bindings", package = "cprover_bindings" } strum = "0.27.1" strum_macros = "0.27.1" clap = { version = "4.4.11", features = ["derive"] } +tracing = "0.1.41" [lints] workspace = true diff --git a/kani_metadata/src/harness.rs b/kani_metadata/src/harness.rs index ed5d1d9a0cd2..6c90dc269c92 100644 --- a/kani_metadata/src/harness.rs +++ b/kani_metadata/src/harness.rs @@ -5,6 +5,7 @@ use crate::CbmcSolver; use serde::{Deserialize, Serialize}; use std::{borrow::Borrow, collections::BTreeSet, path::PathBuf}; use strum_macros::Display; +use tracing::{debug, trace}; /// A CBMC-level `assigns` contract that needs to be enforced on a function. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] @@ -125,7 +126,7 @@ where I: IntoIterator, I::Item: Borrow<&'a HarnessMetadata>, { - // debug!(?targets, "find_proof_harness"); + debug!(?targets, "find_proof_harness"); let mut result = vec![]; for md in all_harnesses.into_iter() { let md: &'a HarnessMetadata = md.borrow(); @@ -140,7 +141,7 @@ where // if exact match found, stop searching result.push(md); } else { - // trace!(skip = md.pretty_name, "find_proof_harnesses"); + trace!(skip = md.pretty_name, "find_proof_harnesses"); } } else { // Either an exact match, or a substring match. We check the exact first since it's cheaper. @@ -150,7 +151,7 @@ where { result.push(md); } else { - // trace!(skip = md.pretty_name, "find_proof_harnesses"); + trace!(skip = md.pretty_name, "find_proof_harnesses"); } } }