Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions crates/goose-bench/src/bench_work_dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub static BUILTIN_EVAL_ASSETS: Dir = include_dir!("$CARGO_MANIFEST_DIR/src/asse

pub struct BenchmarkWorkDir {
pub base_path: PathBuf,
run_dir: PathBuf,
cwd: PathBuf,
run_name: String,
suite: Option<String>,
Expand All @@ -24,6 +25,7 @@ impl Default for BenchmarkWorkDir {
}
impl BenchmarkWorkDir {
pub fn new(work_dir_name: String, include_dirs: Vec<PathBuf>) -> Self {
let run_dir = std::env::current_dir().unwrap().canonicalize().unwrap();
let base_path = PathBuf::from(format!("./benchmark-{}", work_dir_name));
fs::create_dir_all(&base_path).unwrap();

Expand Down Expand Up @@ -54,6 +56,7 @@ impl BenchmarkWorkDir {

BenchmarkWorkDir {
base_path: base_path.clone(),
run_dir,
cwd: base_path.clone(),
run_name,
suite: None,
Expand Down Expand Up @@ -178,3 +181,9 @@ impl BenchmarkWorkDir {
}
}
}

impl Drop for BenchmarkWorkDir {
fn drop(&mut self) {
std::env::set_current_dir(&self.run_dir).unwrap();
}
}
Comment on lines +185 to +189
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is this drop fn used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its called automatically by rust here

6 changes: 4 additions & 2 deletions crates/goose-bench/src/eval_suites/core/example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,17 @@ impl Evaluation for ExampleEval {
_work_dir: &mut BenchmarkWorkDir,
) -> anyhow::Result<Vec<(String, EvaluationMetric)>> {
println!("ExampleEval - run");
// let f = work_dir.fs_get(String::from("./arbitrary_dir/arbitrary_file.txt"))?;
// let _contents = fs::read_to_string(f)?;
let mut metrics = Vec::new();

let _ = agent.prompt("What can you do?".to_string()).await;

metrics.push((
"example_metric".to_string(),
EvaluationMetric::Boolean(true),
));

metrics.push(("example_count".to_string(), EvaluationMetric::Integer(42)));

Ok(metrics)
}

Expand Down
16 changes: 8 additions & 8 deletions crates/goose-cli/src/commands/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,13 @@ async fn run_eval(

async fn run_suite(suite: &str, work_dir: &mut BenchmarkWorkDir) -> anyhow::Result<SuiteResult> {
let mut suite_result = SuiteResult::new(suite.to_string());
let eval_lock = Mutex::new(());
let eval_work_dir_guard = Mutex::new(work_dir);

if let Some(evals) = EvaluationSuiteFactory::create(suite) {
for eval in evals {
let _unused = eval_lock.lock().await;
work_dir.set_eval(eval.name());
let eval_result = run_eval(eval, work_dir).await?;
let mut eval_work_dir = eval_work_dir_guard.lock().await;
eval_work_dir.set_eval(eval.name());
let eval_result = run_eval(eval, &mut eval_work_dir).await?;
suite_result.add_evaluation(eval_result);
}
}
Expand All @@ -167,13 +167,13 @@ pub async fn run_benchmark(

let mut results = BenchmarkResults::new(provider_name.clone());

let mut work_dir = BenchmarkWorkDir::new(
let suite_work_dir = Mutex::new(BenchmarkWorkDir::new(
format!("{}-{}", provider_name, goose_model),
include_dirs.clone(),
);
let suite_lock = Mutex::new(());
));

for suite in suites {
let _unused = suite_lock.lock().await;
let mut work_dir = suite_work_dir.lock().await;
work_dir.set_suite(suite);
let suite_result = run_suite(suite, &mut work_dir).await?;
results.add_suite(suite_result);
Expand Down
Loading