Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions crates/goose-bench/src/bench_session.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use goose::message::Message;
use goose::conversation::Conversation;

use serde::{Deserialize, Serialize};
use std::path::PathBuf;
Expand All @@ -19,7 +19,7 @@ pub struct BenchAgentError {
pub trait BenchBaseSession: Send + Sync {
async fn headless(&mut self, message: String) -> anyhow::Result<()>;
fn session_file(&self) -> Option<PathBuf>;
fn message_history(&self) -> Vec<Message>;
fn message_history(&self) -> Conversation;
fn get_total_token_usage(&self) -> anyhow::Result<Option<i32>>;
}
// struct for managing agent-session-access. to be passed to evals for benchmarking
Expand All @@ -34,7 +34,7 @@ impl BenchAgent {
Self { session, errors }
}

pub(crate) async fn prompt(&mut self, p: String) -> anyhow::Result<Vec<Message>> {
pub(crate) async fn prompt(&mut self, p: String) -> anyhow::Result<Conversation> {
// Clear previous errors
{
let mut errors = self.errors.lock().await;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};

Expand Down
9 changes: 5 additions & 4 deletions crates/goose-bench/src/eval_suites/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
use crate::bench_session::BenchAgent;
use crate::eval_suites::EvalMetricValue;
use goose::message::{Message, MessageContent};
use goose::conversation::message::{Message, MessageContent};
use goose::conversation::Conversation;
use std::collections::HashMap;
use std::time::Instant;

/// Collect baseline metrics including execution time, tool usage, and token count
pub async fn collect_baseline_metrics(
agent: &mut BenchAgent,
prompt: String,
) -> (Vec<Message>, HashMap<String, EvalMetricValue>) {
) -> (Conversation, HashMap<String, EvalMetricValue>) {
// Initialize metrics map
let mut metrics = HashMap::new();

Expand All @@ -23,7 +24,7 @@ pub async fn collect_baseline_metrics(
"prompt_error".to_string(),
EvalMetricValue::String(format!("Error: {}", e)),
);
Vec::new()
Conversation::new_unvalidated(Vec::new())
}
};

Expand All @@ -35,7 +36,7 @@ pub async fn collect_baseline_metrics(
);

// Count tool calls
let (total_tool_calls, tool_calls_by_name) = count_tool_calls(&messages);
let (total_tool_calls, tool_calls_by_name) = count_tool_calls(messages.messages());
metrics.insert(
"total_tool_calls".to_string(),
EvalMetricValue::Integer(total_tool_calls),
Expand Down
2 changes: 1 addition & 1 deletion crates/goose-bench/src/eval_suites/utils.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::bench_work_dir::BenchmarkWorkDir;
use anyhow::{Context, Result};
use goose::message::Message;
use goose::conversation::message::Message;
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
Expand Down
4 changes: 2 additions & 2 deletions crates/goose-bench/src/eval_suites/vibes/blog_summary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl Evaluation for BlogSummary {

// Write response to file and get the text content
let response_text =
match write_response_to_file(&response, run_loc, "blog_summary_output.txt") {
match write_response_to_file(response.messages(), run_loc, "blog_summary_output.txt") {
Ok(text) => text,
Err(e) => {
println!("Warning: Failed to write blog summary output: {}", e);
Expand All @@ -59,7 +59,7 @@ impl Evaluation for BlogSummary {
));

// Check if the fetch tool was used
let used_fetch_tool = crate::eval_suites::used_tool(&response, "fetch");
let used_fetch_tool = crate::eval_suites::used_tool(response.messages(), "fetch");
metrics.push((
"used_fetch_tool".to_string(),
EvalMetricValue::Boolean(used_fetch_tool),
Expand Down
2 changes: 1 addition & 1 deletion crates/goose-bench/src/eval_suites/vibes/flappy_bird.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};
use std::fs;
Expand Down
2 changes: 1 addition & 1 deletion crates/goose-bench/src/eval_suites/vibes/goose_wiki.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};
use std::fs;
Expand Down
27 changes: 15 additions & 12 deletions crates/goose-bench/src/eval_suites/vibes/restaurant_research.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,20 @@ Present the information in order of significance or quality. Focus specifically
).await;

// Write response to file and get the text content
let response_text =
match write_response_to_file(&response, run_loc, "restaurant_research_output.txt") {
Ok(text) => text,
Err(e) => {
println!("Warning: Failed to write restaurant research output: {}", e);
// If file write fails, still continue with the evaluation
response
.last()
.map_or_else(String::new, |msg| msg.as_concat_text())
}
};
let response_text = match write_response_to_file(
response.messages(),
run_loc,
"restaurant_research_output.txt",
) {
Ok(text) => text,
Err(e) => {
println!("Warning: Failed to write restaurant research output: {}", e);
// If file write fails, still continue with the evaluation
response
.last()
.map_or_else(String::new, |msg| msg.as_concat_text())
}
};

// Convert HashMap to Vec for our metrics
let mut metrics = metrics_hashmap_to_vec(perf_metrics);
Expand All @@ -79,7 +82,7 @@ Present the information in order of significance or quality. Focus specifically
));

// Check if the fetch tool was used
let used_fetch_tool = crate::eval_suites::used_tool(&response, "fetch");
let used_fetch_tool = crate::eval_suites::used_tool(response.messages(), "fetch");
metrics.push((
"used_fetch_tool".to_string(),
EvalMetricValue::Boolean(used_fetch_tool),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::eval_suites::{
};
use crate::register_evaluation;
use async_trait::async_trait;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use rmcp::model::Role;
use serde_json::{self, Value};

Expand Down
4 changes: 2 additions & 2 deletions crates/goose-cli/src/commands/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::session::build_session;
use crate::session::SessionBuilderConfig;
use crate::{logging, session, Session};
use async_trait::async_trait;
use goose::message::Message;
use goose::conversation::Conversation;
use goose_bench::bench_session::{BenchAgent, BenchBaseSession};
use goose_bench::eval_suites::ExtensionRequirements;
use std::path::PathBuf;
Expand All @@ -18,7 +18,7 @@ impl BenchBaseSession for Session {
fn session_file(&self) -> Option<PathBuf> {
self.session_file()
}
fn message_history(&self) -> Vec<Message> {
fn message_history(&self) -> Conversation {
self.message_history()
}
fn get_total_token_usage(&self) -> anyhow::Result<Option<i32>> {
Expand Down
4 changes: 2 additions & 2 deletions crates/goose-cli/src/commands/configure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use goose::config::{
Config, ConfigError, ExperimentManager, ExtensionConfigManager, ExtensionEntry,
PermissionManager,
};
use goose::message::Message;
use goose::conversation::message::Message;
use goose::providers::{create, providers};
use rmcp::model::{Tool, ToolAnnotations};
use rmcp::object;
Expand Down Expand Up @@ -1551,7 +1551,7 @@ pub fn configure_max_turns_dialog() -> Result<(), Box<dyn Error>> {
/// Handle OpenRouter authentication
pub async fn handle_openrouter_auth() -> Result<(), Box<dyn Error>> {
use goose::config::{configure_openrouter, signup_openrouter::OpenRouterAuth};
use goose::message::Message;
use goose::conversation::message::Message;
use goose::providers::create;

// Use the OpenRouter authentication flow
Expand Down
26 changes: 15 additions & 11 deletions crates/goose-cli/src/commands/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ pub fn handle_session_export(identifier: Identifier, output_path: Option<PathBuf
};

// Generate the markdown content using the export functionality
let markdown = export_session_to_markdown(messages, &session_file_path, None);
let markdown =
export_session_to_markdown(messages.messages().clone(), &session_file_path, None);

// Output the markdown
if let Some(output) = output_path {
Expand All @@ -214,7 +215,7 @@ pub fn handle_session_export(identifier: Identifier, output_path: Option<PathBuf
/// This function handles the formatting of a complete session including headers,
/// message organization, and proper tool request/response pairing.
fn export_session_to_markdown(
messages: Vec<goose::message::Message>,
messages: Vec<goose::conversation::message::Message>,
session_file: &Path,
session_name_override: Option<&str>,
) -> String {
Expand Down Expand Up @@ -242,10 +243,12 @@ fn export_session_to_markdown(
for message in &messages {
// Check if this is a User message containing only ToolResponses
let is_only_tool_response = message.role == rmcp::model::Role::User
&& message
.content
.iter()
.all(|content| matches!(content, goose::message::MessageContent::ToolResponse(_)));
&& message.content.iter().all(|content| {
matches!(
content,
goose::conversation::message::MessageContent::ToolResponse(_)
)
});

// If the previous message had tool requests and this one is just tool responses,
// don't create a new User section - we'll attach the responses to the tool calls
Expand Down Expand Up @@ -274,11 +277,12 @@ fn export_session_to_markdown(
markdown_output.push_str("\n\n---\n\n");

// Check if this message has any tool requests, to handle the next message differently
if message
.content
.iter()
.any(|content| matches!(content, goose::message::MessageContent::ToolRequest(_)))
{
if message.content.iter().any(|content| {
matches!(
content,
goose::conversation::message::MessageContent::ToolRequest(_)
)
}) {
skip_next_if_tool_response = true;
}
}
Expand Down
24 changes: 14 additions & 10 deletions crates/goose-cli/src/commands/web.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,16 @@ use axum::{
};
use futures::{sink::SinkExt, stream::StreamExt};
use goose::agents::{Agent, AgentEvent};
use goose::message::Message as GooseMessage;
use goose::conversation::message::Message as GooseMessage;
use goose::conversation::Conversation;
use goose::session;
use serde::{Deserialize, Serialize};
use std::{net::SocketAddr, sync::Arc};
use tokio::sync::{Mutex, RwLock};
use tower_http::cors::{Any, CorsLayer};
use tracing::error;

type SessionStore = Arc<RwLock<std::collections::HashMap<String, Arc<Mutex<Vec<GooseMessage>>>>>>;
type SessionStore = Arc<RwLock<std::collections::HashMap<String, Arc<Mutex<Conversation>>>>>;
type CancellationStore = Arc<RwLock<std::collections::HashMap<String, tokio::task::AbortHandle>>>;

#[derive(Clone)]
Expand Down Expand Up @@ -319,8 +320,8 @@ async fn handle_socket(socket: WebSocket, state: AppState) {
let mut sessions = state.sessions.write().await;

// Load existing messages from JSONL file if it exists
let existing_messages = session::read_messages(&session_file)
.unwrap_or_else(|_| Vec::new());
let existing_messages =
session::read_messages(&session_file).unwrap_or_default();

let new_session = Arc::new(Mutex::new(existing_messages));
sessions.insert(session_id.clone(), new_session.clone());
Expand Down Expand Up @@ -435,21 +436,21 @@ async fn handle_socket(socket: WebSocket, state: AppState) {

async fn process_message_streaming(
agent: &Agent,
session_messages: Arc<Mutex<Vec<GooseMessage>>>,
session_messages: Arc<Mutex<Conversation>>,
session_file: std::path::PathBuf,
content: String,
sender: Arc<Mutex<futures::stream::SplitSink<WebSocket, Message>>>,
) -> Result<()> {
use futures::StreamExt;
use goose::agents::SessionConfig;
use goose::message::MessageContent;
use goose::conversation::message::MessageContent;
use goose::session;

// Create a user message
let user_message = GooseMessage::user().with_text(content.clone());

// Messages will be auto-compacted in agent.reply() if needed
let messages = {
let messages: Conversation = {
let mut session_msgs = session_messages.lock().await;
session_msgs.push(user_message.clone());
session_msgs.clone()
Expand Down Expand Up @@ -493,7 +494,10 @@ async fn process_message_streaming(
retry_config: None,
};

match agent.reply(&messages, Some(session_config), None).await {
match agent
.reply(messages.clone(), Some(session_config), None)
.await
{
Ok(mut stream) => {
while let Some(result) = stream.next().await {
match result {
Expand Down Expand Up @@ -617,7 +621,7 @@ async fn process_message_streaming(
// For now, auto-summarize in web mode
// TODO: Implement proper UI for context handling
let (summarized_messages, _) =
agent.summarize_context(&messages).await?;
agent.summarize_context(messages.messages()).await?;
{
let mut session_msgs = session_messages.lock().await;
*session_msgs = summarized_messages;
Expand All @@ -633,7 +637,7 @@ async fn process_message_streaming(
// Replace the session's message history with the compacted messages
{
let mut session_msgs = session_messages.lock().await;
*session_msgs = new_messages;
*session_msgs = Conversation::new_unvalidated(new_messages);
}

// Persist the updated messages to the JSONL file
Expand Down
Loading
Loading