Skip to content

Commit

Permalink
Generate KaniMetadata about which functions we skipped
Browse files Browse the repository at this point in the history
  • Loading branch information
carolynzech committed Mar 5, 2025
1 parent 6a8dc61 commit 6f946bb
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ impl GotoCodegenResults {
// removes any contracts logic for ReachabilityType::Test or ReachabilityType::PubFns,
// which are the two ReachabilityTypes under which the compiler calls this function.
contracted_functions: vec![],
autoharness_skipped_fns: None,
}
}

Expand Down
11 changes: 11 additions & 0 deletions kani-compiler/src/kani_middle/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,17 @@ impl<'tcx> KaniAttributes<'tcx> {
})
}

// Is this a function inserted by Kani instrumentation?
pub fn is_kani_instrumentation(&self) -> bool {
self.fn_marker().is_some() || self.is_contract_generated()
}

// Is this a contract-generated function?
// Note that this function currently always returns false because of https://github.com/model-checking/kani/issues/3921
fn is_contract_generated(&self) -> bool {
self.map.contains_key(&KaniAttributeKind::IsContractGenerated)
}

/// Return a function marker if any.
pub fn fn_marker(&self) -> Option<Symbol> {
self.attribute_value(KaniAttributeKind::FnMarker)
Expand Down
205 changes: 142 additions & 63 deletions kani-compiler/src/kani_middle/codegen_units.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//! Today, only stub / contracts can affect the harness codegen. Thus, we group the harnesses
//! according to their stub configuration.
use crate::args::ReachabilityType;
use crate::args::{Arguments, ReachabilityType};
use crate::kani_middle::attributes::{KaniAttributes, is_proof_harness};
use crate::kani_middle::kani_functions::{KaniIntrinsic, KaniModel};
use crate::kani_middle::metadata::{
Expand All @@ -17,14 +17,17 @@ use crate::kani_middle::reachability::filter_crate_items;
use crate::kani_middle::resolve::expect_resolve_fn;
use crate::kani_middle::stubbing::{check_compatibility, harness_stub_map};
use crate::kani_queries::QueryDb;
use kani_metadata::{ArtifactType, AssignsContract, HarnessKind, HarnessMetadata, KaniMetadata};
use kani_metadata::{
ArtifactType, AssignsContract, AutoHarnessSkipReason, AutoHarnessSkippedFns, HarnessKind,
HarnessMetadata, KaniMetadata,
};
use rustc_hir::def_id::DefId;
use rustc_middle::ty::TyCtxt;
use rustc_session::config::OutputType;
use rustc_smir::rustc_internal;
use stable_mir::CrateDef;
use stable_mir::mir::{TerminatorKind, mono::Instance};
use stable_mir::ty::{FnDef, GenericArgKind, GenericArgs, IndexedVal, RigidTy, TyKind};
use stable_mir::{CrateDef, CrateItem};
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fs::File;
use std::io::BufWriter;
Expand All @@ -46,9 +49,10 @@ struct CrateInfo {

/// We group the harnesses that have the same stubs.
pub struct CodegenUnits {
units: Vec<CodegenUnit>,
harness_info: HashMap<Harness, HarnessMetadata>,
autoharness_skipped_fns: Option<AutoHarnessSkippedFns>,
crate_info: CrateInfo,
harness_info: HashMap<Harness, HarnessMetadata>,
units: Vec<CodegenUnit>,
}

#[derive(Clone, Default, Debug)]
Expand All @@ -70,7 +74,12 @@ impl CodegenUnits {
let units = group_by_stubs(tcx, &all_harnesses);
validate_units(tcx, &units);
debug!(?units, "CodegenUnits::new");
CodegenUnits { units, harness_info: all_harnesses, crate_info }
CodegenUnits {
units,
harness_info: all_harnesses,
crate_info,
autoharness_skipped_fns: None,
}
}
ReachabilityType::AllFns => {
let mut all_harnesses = get_all_manual_harnesses(tcx, base_filename);
Expand All @@ -80,23 +89,16 @@ impl CodegenUnits {
let kani_fns = queries.kani_functions();
let kani_harness_intrinsic =
kani_fns.get(&KaniIntrinsic::AutomaticHarness.into()).unwrap();
let kani_any_inst = kani_fns.get(&KaniModel::Any.into()).unwrap();

let verifiable_fns = filter_crate_items(tcx, |_, instance: Instance| -> bool {
// If the user specified functions to include or exclude, only allow instances matching those filters.
let user_included = if !args.autoharness_included_functions.is_empty() {
fn_list_contains_instance(&instance, &args.autoharness_included_functions)
} else if !args.autoharness_excluded_functions.is_empty() {
!fn_list_contains_instance(&instance, &args.autoharness_excluded_functions)
} else {
true
};
user_included
&& is_eligible_for_automatic_harness(tcx, instance, *kani_any_inst)
});

let (chosen, skipped) = automatic_harness_partition(
tcx,
args,
*kani_fns.get(&KaniModel::Any.into()).unwrap(),
);

let automatic_harnesses = get_all_automatic_harnesses(
tcx,
verifiable_fns,
chosen,
*kani_harness_intrinsic,
base_filename,
);
Expand All @@ -111,14 +113,25 @@ impl CodegenUnits {
})
.collect::<Vec<_>>(),
);
all_harnesses.extend(automatic_harnesses);
all_harnesses.extend(automatic_harnesses.clone());

// No need to validate the units again because validation only checks stubs, and we haven't added any stubs.
debug!(?units, "CodegenUnits::new");
CodegenUnits { units, harness_info: all_harnesses, crate_info }
CodegenUnits {
units,
harness_info: all_harnesses,
crate_info,
autoharness_skipped_fns: Some(skipped),
}
}
_ => {
// Leave other reachability type handling as is for now.
CodegenUnits { units: vec![], harness_info: HashMap::default(), crate_info }
CodegenUnits {
units: vec![],
harness_info: HashMap::default(),
crate_info,
autoharness_skipped_fns: None,
}
}
}
}
Expand Down Expand Up @@ -163,6 +176,7 @@ impl CodegenUnits {
unsupported_features: vec![],
test_harnesses,
contracted_functions: gen_contracts_metadata(tcx),
autoharness_skipped_fns: self.autoharness_skipped_fns.clone(),
}
}
}
Expand Down Expand Up @@ -339,47 +353,112 @@ fn get_all_automatic_harnesses(
.collect::<HashMap<_, _>>()
}

/// Determine whether `instance` is eligible for automatic verification.
fn is_eligible_for_automatic_harness(tcx: TyCtxt, instance: Instance, any_inst: FnDef) -> bool {
// `instance` is ineligble if it is a harness or has an nonexistent/empty body
if is_proof_harness(tcx, instance) || !instance.has_body() {
return false;
}
let body = instance.body().unwrap();

// `instance` is ineligble if it is an associated item of a Kani trait implementation,
// or part of Kani contract instrumentation.
// FIXME -- find a less hardcoded way of checking the former condition (perhaps upstream PR to StableMIR).
if instance.name().contains("kani::Arbitrary")
|| instance.name().contains("kani::Invariant")
|| KaniAttributes::for_instance(tcx, instance)
.fn_marker()
.is_some_and(|m| m.as_str() == "kani_contract_mode")
{
return false;
}
/// Partition every function in the crate into (chosen, skipped), where `chosen` is a vector of the Instances for which we'll generate automatic harnesses,
/// and `skipped` is a map of function names to the reason why we skipped them.
fn automatic_harness_partition(
tcx: TyCtxt,
args: &Arguments,
kani_any_def: FnDef,
) -> (Vec<Instance>, BTreeMap<String, AutoHarnessSkipReason>) {
// If `filter_list` contains `name`, either as an exact match or a substring.
let filter_contains = |name: &str, filter_list: &[String]| -> bool {
filter_list.iter().any(|filter_name| name.contains(filter_name))
};

// If `func` is not eligible for an automatic harness, return the reason why; if it is eligible, return None.
let skip_reason = |fn_item: CrateItem| -> Option<AutoHarnessSkipReason> {
if KaniAttributes::for_def_id(tcx, fn_item.def_id()).is_kani_instrumentation() {
return Some(AutoHarnessSkipReason::KaniImpl);
}

// Each argument of `instance` must implement Arbitrary.
// Note that this function operates on StableMIR `Instance`s, which are eagerly monomorphized,
// so none of these arguments are generic.
body.arg_locals().iter().all(|arg| {
let kani_any_body =
Instance::resolve(any_inst, &GenericArgs(vec![GenericArgKind::Type(arg.ty)]))
.unwrap()
.body()
.unwrap();
if let TerminatorKind::Call { func, .. } = &kani_any_body.blocks[0].terminator.kind {
if let Some((def, args)) = func.ty(body.arg_locals()).unwrap().kind().fn_def() {
return Instance::resolve(def, &args).is_ok();
let instance = match Instance::try_from(fn_item) {
Ok(inst) => inst,
Err(_) => {
return Some(AutoHarnessSkipReason::GenericFn);
}
};

if !instance.has_body() {
return Some(AutoHarnessSkipReason::NoBody);
}
false
})
}

/// Return whether the name of `instance` is included in `fn_list`,
/// either as a substring or an exact match.
fn fn_list_contains_instance(instance: &Instance, fn_list: &[String]) -> bool {
let pretty_name = instance.name();
fn_list.contains(&pretty_name) || fn_list.iter().any(|fn_name| pretty_name.contains(fn_name))
let name = instance.name();
let body = instance.body().unwrap();

if is_proof_harness(tcx, instance)
|| name.contains("kani::Arbitrary")
|| name.contains("kani::Invariant")
{
return Some(AutoHarnessSkipReason::KaniImpl);
}

if (!args.autoharness_included_functions.is_empty()
&& !filter_contains(&name, &args.autoharness_included_functions))
|| (!args.autoharness_excluded_functions.is_empty()
&& filter_contains(&name, &args.autoharness_excluded_functions))
{
return Some(AutoHarnessSkipReason::UserFilter);
}

// Each argument of `instance` must implement Arbitrary.
// Note that we've already filtered out generic functions, so we know that each of these arguments has a concrete type.
let mut problematic_args = vec![];
for (idx, arg) in body.arg_locals().iter().enumerate() {
let kani_any_body =
Instance::resolve(kani_any_def, &GenericArgs(vec![GenericArgKind::Type(arg.ty)]))
.unwrap()
.body()
.unwrap();

let implements_arbitrary = if let TerminatorKind::Call { func, .. } =
&kani_any_body.blocks[0].terminator.kind
{
if let Some((def, args)) = func.ty(body.arg_locals()).unwrap().kind().fn_def() {
Instance::resolve(def, &args).is_ok()
} else {
false
}
} else {
false
};

if !implements_arbitrary {
// Find the name of the argument by referencing var_debug_info.
// Note that enumerate() starts at 0, while StableMIR argument_index starts at 1, hence the idx+1.
let arg_debug_info = body
.var_debug_info
.iter()
.find(|var| {
var.argument_index.is_some_and(|arg_idx| idx + 1 == usize::from(arg_idx))
})
.expect("Arguments should have corresponding var debug info");
problematic_args.push(arg_debug_info.name.to_string())
}
}
if !problematic_args.is_empty() {
return Some(AutoHarnessSkipReason::MissingArbitraryImpl(problematic_args));
}
None
};

let mut chosen = vec![];
let mut skipped = BTreeMap::new();

// FIXME: ideally, this filter would be matches!(item.kind(), ItemKind::Fn), since that would allow us to generate harnesses for top-level closures,
// c.f. https://github.com/model-checking/kani/issues/3832#issuecomment-2701671798.
// Note that filtering closures out here is a UX choice: we could instead call skip_reason() on closures,
// but the limitations in the linked issue would cause the user to be flooded with reports of "skipping" Kani instrumentation functions.
// Instead, we just pretend closures don't exist in our reporting of what we did and did not verify, which has the downside of also ignoring the user's top-level closures, but those are rare.
let crate_fns =
stable_mir::all_local_items().into_iter().filter(|item| item.ty().kind().is_fn());

for func in crate_fns {
if let Some(reason) = skip_reason(func) {
skipped.insert(func.name(), reason);
} else {
chosen.push(Instance::try_from(func).unwrap());
}
}

(chosen, skipped)
}
2 changes: 2 additions & 0 deletions kani-driver/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ pub fn merge_kani_metadata(files: Vec<KaniMetadata>) -> KaniMetadata {
unsupported_features: vec![],
test_harnesses: vec![],
contracted_functions: vec![],
autoharness_skipped_fns: None,
};
for md in files {
// Note that we're taking ownership of the original vec, and so we can move the data into the new data structure.
Expand All @@ -106,6 +107,7 @@ pub fn merge_kani_metadata(files: Vec<KaniMetadata>) -> KaniMetadata {
result.unsupported_features.extend(md.unsupported_features);
result.test_harnesses.extend(md.test_harnesses);
result.contracted_functions.extend(md.contracted_functions);
// We do not handle autoharness metadata here, since this function is not reachable from the autoharness subcommand.
}
result
}
Expand Down
33 changes: 32 additions & 1 deletion kani_metadata/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
extern crate clap;

use serde::{Deserialize, Serialize};
use std::{collections::HashSet, path::PathBuf};
use std::{
collections::{BTreeMap, HashSet},
path::PathBuf,
};
use strum_macros::{Display, EnumString};

pub use artifact::ArtifactType;
pub use cbmc_solver::CbmcSolver;
Expand Down Expand Up @@ -33,8 +37,35 @@ pub struct KaniMetadata {
pub test_harnesses: Vec<HarnessMetadata>,
/// The functions with contracts in this crate
pub contracted_functions: Vec<ContractedFunction>,
/// Metadata for the `autoharness` subcommand
pub autoharness_skipped_fns: Option<AutoHarnessSkippedFns>,
}

/// Reasons that Kani does not generate an automatic harness for a function.
#[derive(Debug, Clone, Serialize, Deserialize, Display, EnumString)]
pub enum AutoHarnessSkipReason {
/// The function is generic.
#[strum(serialize = "Generic Function")]
GenericFn,
/// A Kani-internal function: already a harness, implementation of a Kani associated item or Kani contract instrumentation functions).
#[strum(serialize = "Kani implementation")]
KaniImpl,
/// At least one of the function's arguments does not implement kani::Arbitrary
/// (The Vec<String> contains the list of argument names that do not implement it)
#[strum(serialize = "Missing Arbitrary implementation for argument(s)")]
MissingArbitraryImpl(Vec<String>),
/// The function does not have a body.
#[strum(serialize = "The function does not have a body")]
NoBody,
/// The function doesn't match the user's provided filters.
#[strum(serialize = "Did not match provided filters")]
UserFilter,
}

/// For the autoharness subcommand: map function names to the reason why we did not generate an automatic harness for that function.
/// We use an ordered map so that when we print the data, it is ordered alphabetically by function name.
pub type AutoHarnessSkippedFns = BTreeMap<String, AutoHarnessSkipReason>;

#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, PartialOrd, Ord)]
pub struct ContractedFunction {
/// The fully qualified name the user gave to the function (i.e. includes the module path).
Expand Down

0 comments on commit 6f946bb

Please sign in to comment.