-
Notifications
You must be signed in to change notification settings - Fork 2.4k
fix: clean up result recording for code mode #6343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,6 @@ use anyhow::Result; | |
| use async_trait::async_trait; | ||
| use boa_engine::builtins::promise::PromiseState; | ||
| use boa_engine::module::{MapModuleLoader, Module, SyntheticModuleInitializer}; | ||
| use boa_engine::property::Attribute; | ||
| use boa_engine::{js_string, Context, JsNativeError, JsString, JsValue, NativeFunction, Source}; | ||
| use indoc::indoc; | ||
| use regex::Regex; | ||
|
|
@@ -266,6 +265,8 @@ impl ToolInfo { | |
| thread_local! { | ||
| static CALL_TX: std::cell::RefCell<Option<mpsc::UnboundedSender<ToolCallRequest>>> = | ||
| const { std::cell::RefCell::new(None) }; | ||
| static RESULT_CELL: std::cell::RefCell<Option<String>> = | ||
| const { std::cell::RefCell::new(None) }; | ||
| } | ||
|
|
||
| fn create_server_module( | ||
|
|
@@ -360,19 +361,22 @@ fn run_js_module( | |
| call_tx: mpsc::UnboundedSender<ToolCallRequest>, | ||
| ) -> Result<String, String> { | ||
| CALL_TX.with(|tx| *tx.borrow_mut() = Some(call_tx)); | ||
| RESULT_CELL.with(|cell| *cell.borrow_mut() = None); | ||
|
|
||
| let loader = Rc::new(MapModuleLoader::new()); | ||
| let mut ctx = Context::builder() | ||
| .module_loader(loader.clone()) | ||
| .build() | ||
| .map_err(|e| format!("Failed to create JS context: {e}"))?; | ||
|
|
||
| ctx.register_global_property( | ||
| js_string!("__result__"), | ||
| JsValue::undefined(), | ||
| Attribute::WRITABLE, | ||
| ) | ||
| .map_err(|e| format!("Failed to register __result__: {e}"))?; | ||
| let record_result = NativeFunction::from_copy_closure(|_this, args, _ctx| { | ||
| let value = args.first().cloned().unwrap_or(JsValue::undefined()); | ||
| RESULT_CELL.with(|cell| *cell.borrow_mut() = Some(value.display().to_string())); | ||
| Ok(value) | ||
| }); | ||
|
|
||
| ctx.register_global_callable(js_string!("record_result"), 1, record_result) | ||
| .map_err(|e| format!("Failed to register record_result: {e}"))?; | ||
|
|
||
| let mut by_server: BTreeMap<&str, Vec<&ToolInfo>> = BTreeMap::new(); | ||
| for tool in tools { | ||
|
|
@@ -384,35 +388,7 @@ fn run_js_module( | |
| loader.insert(*server_name, module); | ||
| } | ||
|
|
||
| let wrapped = { | ||
| let lines: Vec<&str> = code.trim().lines().collect(); | ||
| let last_idx = lines | ||
| .iter() | ||
| .rposition(|l| !l.trim().is_empty() && !l.trim().starts_with("//")) | ||
| .unwrap_or(0); | ||
| let last = lines.get(last_idx).map(|s| s.trim()).unwrap_or(""); | ||
|
|
||
| const NO_WRAP: &[&str] = &["import ", "export ", "function ", "class "]; | ||
| if last.contains("__result__") || NO_WRAP.iter().any(|p| last.starts_with(p)) { | ||
| code.to_string() | ||
| } else { | ||
| let before = lines[..last_idx].join("\n"); | ||
| let mut result = None; | ||
| for decl in ["const ", "let ", "var "] { | ||
| if let Some(rest) = last.strip_prefix(decl) { | ||
| if let Some(name) = rest.split('=').next().map(str::trim) { | ||
| result = Some(format!("{before}\n{last}\n__result__ = {name};")); | ||
| } | ||
| break; | ||
| } | ||
| } | ||
| result.unwrap_or_else(|| { | ||
| format!("{before}\n__result__ = {};", last.trim_end_matches(';')) | ||
| }) | ||
| } | ||
| }; | ||
|
|
||
| let user_module = Module::parse(Source::from_bytes(&wrapped), None, &mut ctx) | ||
| let user_module = Module::parse(Source::from_bytes(code), None, &mut ctx) | ||
| .map_err(|e| format!("Parse error: {e}"))?; | ||
| loader.insert("__main__", user_module.clone()); | ||
|
|
||
|
|
@@ -422,11 +398,8 @@ fn run_js_module( | |
|
|
||
| match promise.state() { | ||
| PromiseState::Fulfilled(_) => { | ||
| let result = ctx | ||
| .global_object() | ||
| .get(js_string!("__result__"), &mut ctx) | ||
| .map_err(|e| format!("Failed to get result: {e}"))?; | ||
| Ok(result.display().to_string()) | ||
| let result = RESULT_CELL.with(|cell| cell.borrow().clone()); | ||
| Ok(result.unwrap_or_else(|| "undefined".to_string())) | ||
| } | ||
| PromiseState::Rejected(err) => Err(format!("Module error: {}", err.display())), | ||
| PromiseState::Pending => Err("Module evaluation did not complete".to_string()), | ||
|
|
@@ -758,6 +731,7 @@ impl McpClientTrait for CodeExecutionClient { | |
| import { text_editor } from "developer"; | ||
| const content = text_editor({ path: "/path/to/source.md", command: "view" }); | ||
| text_editor({ path: "/path/to/dest.md", command: "write", file_text: content }); | ||
| record_result({ copied: true }); | ||
| ``` | ||
|
|
||
| EXAMPLE - Multiple operations chained: | ||
|
|
@@ -766,15 +740,14 @@ impl McpClientTrait for CodeExecutionClient { | |
| const files = shell({ command: "ls -la" }); | ||
| const readme = text_editor({ path: "./README.md", command: "view" }); | ||
| const status = shell({ command: "git status" }); | ||
| { files, readme, status } | ||
| record_result({ files, readme, status }); | ||
| ``` | ||
|
|
||
| SYNTAX: | ||
| - Import: import { tool1, tool2 } from "serverName"; | ||
| - Call: toolName({ param1: value, param2: value }) | ||
| - Result: record_result(value) - call this to return a value from the script | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah , looks simpler and better. However, I've seen at times smaller models ignore these type of instructions.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we do something similar for the main agent for the final result (in the context of recipes). there we check whether the call was actually made and if not, just remind the model to do it if it doesn't. we could add that here too - have the model generate the code, run the code, see that it doesn't call record, stick the js back into the model saying, you said this, but you didn't call record.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. obviously for a follow up |
||
| - All calls are synchronous, return strings | ||
| - Last expression is the result | ||
| - No comments in code | ||
|
|
||
| TOOL_GRAPH: Always provide tool_graph to describe the execution flow for the UI. | ||
| Each node has: tool (server/name), description (what it does), depends_on (indices of dependencies). | ||
|
|
@@ -942,7 +915,10 @@ mod tests { | |
| let client = CodeExecutionClient::new(context).unwrap(); | ||
|
|
||
| let mut args = JsonObject::new(); | ||
| args.insert("code".to_string(), Value::String("2 + 2".to_string())); | ||
| args.insert( | ||
| "code".to_string(), | ||
| Value::String("record_result(2 + 2)".to_string()), | ||
| ); | ||
|
|
||
| let result = client | ||
| .call_tool("execute_code", Some(args), CancellationToken::new()) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
value.display() doesn't properly serialize arrays/objects as JSON. Maybe I'll propose a followup later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
proposed #6495