diff --git a/.gitignore b/.gitignore index a829301..1d2aacb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +visual-tree-search-backend/log/* log/* shopping.json Log.md diff --git a/visual-tree-search-backend/README.md b/visual-tree-search-backend/README.md index fb44efb..fcc0f16 100644 --- a/visual-tree-search-backend/README.md +++ b/visual-tree-search-backend/README.md @@ -40,7 +40,8 @@ python run_demo_treesearch_async.py \ ``` uvicorn app.main:app --host 0.0.0.0 --port 3000 -python test/test-tree-search-ws.py +python test/test-tree-search-ws-simple.py --algorithm dfs +python test/test-tree-search-ws-simple.py --algorithm bfs ``` ## 4. end-to-end test with frontend diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/base_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/base_agent.py new file mode 100644 index 0000000..defad52 --- /dev/null +++ b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/base_agent.py @@ -0,0 +1,801 @@ +import time +from typing import Any, Optional, Tuple, List +import os +from openai import OpenAI +from datetime import datetime +import aiohttp +from dotenv import load_dotenv +load_dotenv() + +from .lats_node import LATSNode, Observation +from ...core_async.config import AgentConfig + +from ...webagent_utils_async.action.highlevel import HighLevelActionSet +from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright +from .tree_vis import RED, better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree +from .trajectory_score import create_llm_prompt, score_trajectory_with_openai +from ...replay_async import generate_feedback, playwright_step_execution, locate_element_from_action +from ...webagent_utils_async.browser_env.observation import extract_page_info, observe_features +from ...webagent_utils_async.action.prompt_functions import generate_actions_with_observation +from ...webagent_utils_async.evaluation.feedback import generate_feedback_with_screenshot +from ...webagent_utils_async.utils.utils import urls_to_images +from ...webagent_utils_async.utils.utils import parse_function_args, locate_element +from ...evaluation_async.evaluators import goal_finished_evaluator +from ...webagent_utils_async.action.prompt_functions import extract_top_actions +from ...webagent_utils_async.browser_env.observation import extract_page_info +from .lats_node import LATSNode +from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree +from .trajectory_score import create_llm_prompt, score_trajectory_with_openai +from ...webagent_utils_async.action.utils import execute_action +from ...webagent_utils_async.action.prompt_functions import extract_top_actions, is_goal_finished +from ...webagent_utils_async.browser_env.observation import extract_page_info +from ...webagent_utils_async.evaluation.feedback import capture_post_action_feedback + +openai_client = OpenAI() + + +## TODO: remove account reset websocket message +## browser setup message, ok to leave there in the _reset_browser method + + +class BaseAgent: + # no need to pass an initial playwright_manager to the agent class + def __init__( + self, + starting_url: str, + messages: list[dict[str, Any]], + goal: str, + images: list, + playwright_manager: AsyncPlaywrightManager, + config: AgentConfig, + ): + # no action grounding model, just one step to geneate both action natural language description and action at the same time + self.starting_url = starting_url + self.goal = goal + self.image_urls = images + self.images = urls_to_images(self.image_urls) + + ## TODO: check whether self.messages are needed + self.messages = messages + if len(images) == 0: + self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) + else: + self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) + + self.playwright_manager = playwright_manager + + self.config = config + + # set bid, only click, fill, hoover, drag and draw + self.agent_type = ["bid"] + self.action_set = HighLevelActionSet( + subsets=self.agent_type, strict=False, multiaction=False, demo_mode="default" + ) + self.root_node = LATSNode( + natural_language_description=None, + action=None, + prob=None, + element=None, + goal=self.goal, + parent=None + ) + self.goal_finished = False + self.result_node = None + self.reset_url = os.environ["ACCOUNT_RESET_URL"] + + def get_path_to_root(self, node: LATSNode) -> List[LATSNode]: + path = [] + current = node + while current: + path.append(current) + current = current.parent + return list(reversed(path)) + + def _get_tree_data(self): + nodes = collect_all_nodes(self.root_node) + tree_data = [] + + for node in nodes: + node_data = { + "id": id(node), + "parent_id": id(node.parent) if node.parent else None, + "action": node.action if node.action else "ROOT", + "description": node.natural_language_description, + "depth": node.depth, + "is_terminal": node.is_terminal, + "value": node.value, + "visits": node.visits, + "feedback": node.feedback, + "reward": node.reward + } + tree_data.append(node_data) + + return tree_data + + ## TODO: newly added, debug needed + async def remove_simulated_trajectory(self, starting_node, terminal_node: LATSNode, websocket=None): + # to be implemented + trajectory_data = [] + path = [] + + # Collect path from terminal to root + current = terminal_node + while current is not starting_node: + path.append(current) + current = current.parent + + # Process nodes in order from root to terminal + for level, node in enumerate(reversed(path)): + node_data = { + "id": id(node), + "level": level, + "action": node.action if node.action else "ROOT", + "description": node.natural_language_description, + "visits": node.visits, + "value": float(f"{node.value:.3f}") if hasattr(node, 'value') else None, + "reward": float(f"{node.reward:.3f}") if hasattr(node, 'reward') else None, + "is_terminal": node.is_terminal, + "feedback": node.feedback if hasattr(node, 'feedback') else None, + "is_root": not hasattr(node, 'parent') or node.parent is None, + "is_terminal_node": node == terminal_node + } + trajectory_data.append(node_data) + + await self.websocket_simulation_removed(trajectory_data, websocket=None) + pass + + def _get_trajectory_data(self, terminal_node: LATSNode): + trajectory_data = [] + path = [] + + # Collect path from terminal to root + current = terminal_node + while current is not None: + path.append(current) + current = current.parent + + # Process nodes in order from root to terminal + for level, node in enumerate(reversed(path)): + node_data = { + "id": id(node), + "level": level, + "action": node.action if node.action else "ROOT", + "description": node.natural_language_description, + "visits": node.visits, + "value": float(f"{node.value:.3f}") if hasattr(node, 'value') else None, + "reward": float(f"{node.reward:.3f}") if hasattr(node, 'reward') else None, + "is_terminal": node.is_terminal, + "feedback": node.feedback if hasattr(node, 'feedback') else None, + "is_root": not hasattr(node, 'parent') or node.parent is None, + "is_terminal_node": node == terminal_node + } + trajectory_data.append(node_data) + + return trajectory_data + + + async def _reset_browser(self, websocket=None) -> Optional[str]: + await self.playwright_manager.close() + + ## reset account using api-based account reset + if self.config.account_reset: + try: + # Use aiohttp instead of curl + async with aiohttp.ClientSession() as session: + headers = {'Connection': 'close'} # Similar to curl -N + async with session.get(self.reset_url, headers=headers) as response: + if response.status == 200: + data = await response.json() + print(f"Account reset successful: {data}") + if websocket: + await websocket.send_json({ + "type": "account_reset", + "status": "success", + "data": data, + "timestamp": datetime.utcnow().isoformat() + }) + else: + error_msg = f"Account reset failed with status {response.status}" + print(error_msg) + if websocket: + await websocket.send_json({ + "type": "account_reset", + "status": "failed", + "reason": error_msg, + "timestamp": datetime.utcnow().isoformat() + }) + + except Exception as e: + print(f"Error during account reset: {e}") + if websocket: + await websocket.send_json({ + "type": "account_reset", + "status": "failed", + "reason": str(e), + "timestamp": datetime.utcnow().isoformat() + }) + + try: + # Create new playwright manager + self.playwright_manager = await setup_playwright( + storage_state=self.config.storage_state, + headless=self.config.headless, + mode=self.config.browser_mode + ) + page = await self.playwright_manager.get_page() + live_browser_url = None + if self.config.browser_mode == "browserbase": + live_browser_url = await self.playwright_manager.get_live_browser_url() + session_id = await self.playwright_manager.get_session_id() + else: + session_id = None + live_browser_url = None + await page.goto(self.starting_url, wait_until="networkidle") + + # Send success message if websocket is provided + if websocket: + if self.config.storage_state: + await websocket.send_json({ + "type": "browser_setup", + "status": "success", + "message": f"Browser successfully initialized with storage state file: {self.config.storage_state}", + "live_browser_url": live_browser_url, + "session_id": session_id, + "timestamp": datetime.utcnow().isoformat() + }) + else: + await websocket.send_json({ + "type": "browser_setup", + "status": "success", + "message": "Browser successfully initialized", + "live_browser_url": live_browser_url, + "session_id": session_id, + "timestamp": datetime.utcnow().isoformat() + }) + + return live_browser_url, session_id + except Exception as e: + print(f"Error setting up browser: {e}") + if websocket: + await websocket.send_json({ + "type": "browser_setup", + "status": "failed", + "reason": str(e), + "timestamp": datetime.utcnow().isoformat() + }) + return None, None + + # TODO: if no websocket, print the json data + # TODO: do we need node expansion data? + # TODO: four types of websocket messages, do we need more type of websocket messages? + async def websocket_iteration_start(self, iteration, websocket=None): + if websocket: + await websocket.send_json({ + "type": "iteration_start", + "iteration": iteration, + "timestamp": datetime.utcnow().isoformat() + }) + + async def websocket_step_start(self, step, step_name, websocket=None): + if websocket: + await websocket.send_json({ + "type": "step_start", + "step": step, + "step_name": step_name, + "timestamp": datetime.utcnow().isoformat() + }) + + # node selected is used to highlight node + async def websocket_node_selection(self, node, websocket=None, type="node_selected"): + if websocket: + await websocket.send_json({ + "type":type, + "node_id": id(node), + "parent_id": id(node.parent), + "action": node.action, + "description": node.natural_language_description, + "timestamp": datetime.utcnow().isoformat() + }) + else: + print(f"{type}: {GREEN}{id(node)}{RESET}") + print(f"Node parent: {GREEN}{id(node.parent)}{RESET}") + print(f"Node action: {GREEN}{node.action}{RESET}") + print(f"Node description: {GREEN}{node.natural_language_description}{RESET}") + + async def websocket_tree_update(self, type, tree_data, websocket=None): + if websocket: + await websocket.send_json({ + "type": type, + "tree": tree_data, + "timestamp": datetime.utcnow().isoformat() + }) + else: + print(f"{type} updated: {tree_data}") + + async def websocket_node_created(self, child, node, websocket=None): + if websocket: + await websocket.send_json({ + "type": "node_created", + "node_id": id(child), + "parent_id": id(node), + "action": child.action, + "description": child.natural_language_description, + "timestamp": datetime.utcnow().isoformat() + }) + else: + print(f"Node created: {GREEN}{id(child)}{RESET}") + print(f"Node parent: {GREEN}{id(node)}{RESET}") + print(f"Node action: {GREEN}{child.action}{RESET}") + print(f"Node description: {GREEN}{child.natural_language_description}{RESET}") + + ## node simulated + ## message log and d3 visualization add different information + async def websocket_node_simulated(self, child, node, websocket=None): + if websocket: + await websocket.send_json({ + "type": "node_simulated", + "node_id": id(child), + "parent_id": id(node), + "action": child.action, + "description": child.natural_language_description, + "timestamp": datetime.utcnow().isoformat() + }) + else: + print(f"Node simulated: {GREEN}{id(child)}{RESET}") + print(f"Node parent: {GREEN}{id(node)}{RESET}") + print(f"Node action: {GREEN}{child.action}{RESET}") + print(f"Node description: {GREEN}{child.natural_language_description}{RESET}") + ## but different color for the link + + async def websocket_simulation_removed(self, trajectory, websocket=None): + if websocket: + await websocket.send_json({ + "type": "removed_simulation", + "trajectory": trajectory, + "timestamp": datetime.utcnow().isoformat() + }) + else: + print(f"Simulation removed: {GREEN}{trajectory}{RESET}") + + async def websocket_simulation_result(self, reward, terminal_node, websocket=None): + if websocket: + await websocket.send_json({ + "type": "simulation_result", + "reward": reward, + "terminal_node_id": id(terminal_node), + "terminal_node_parent_id": id(terminal_node.parent), + "terminal_node_action": terminal_node.action, + "terminal_node_description": terminal_node.natural_language_description, + "timestamp": datetime.utcnow().isoformat() + }) + else: + print(f"Simulation reward: {GREEN}{reward}{RESET}") + print(f"Simulation terminal node: {GREEN}{terminal_node}{RESET}") + + async def websocket_search_complete(self, status, score, path, websocket=None): + if websocket: + await websocket.send_json({ + "type": "search_complete", + "status": status, + "score": score, + "path": path, + "timestamp": datetime.utcnow().isoformat() + }) + + # shared, not implemented, BFS, DFS and LATS has its own node selection logic + async def node_selection(self, node, websocket = None): + NotImplemented + + + async def node_expansion(self, node: LATSNode, websocket = None) -> None: + children_state = await self.generate_children(node, websocket) + for child_state in children_state: + child = LATSNode( + natural_language_description=child_state["natural_language_description"], + action=child_state["action"], + prob=child_state["prob"], + element=child_state["element"], + goal=node.goal, + parent=node + ) + node.children.append(child) + await self.websocket_node_created(child, node, websocket=websocket) + + # Send child creation update if websocket is provided + # if websocket: + # await websocket.send_json({ + # "type": "node_created", + # "node_id": id(child), + # "parent_id": id(node), + # "action": child.action, + # "description": child.natural_language_description, + # "timestamp": datetime.utcnow().isoformat() + # }) + + + # node evaluation + # change the node evaluation to use the new prompt + async def node_children_evaluation(self, node: LATSNode) -> None: + scores = [] + print(f"{GREEN}-- total {len(node.children)} children to evaluate:{RESET}") + for i, child in enumerate(node.children): + print(f"{GREEN}--- evaluating child {i+1}...{RESET}") + if child.is_terminal: + score = 0 + else: + trajectory = child.get_trajectory() + prompt = create_llm_prompt(trajectory, self.goal) + # , child.observation.image + result = score_trajectory_with_openai(prompt, openai_client, self.config.evaluation_model) + score = result["overall_score"] + scores.append(score) + + for child, score in zip(node.children, scores): + child.value = score + child.reward = score + + async def node_evaluation(self, node: LATSNode) -> None: + """Evaluate the current node and assign its score.""" + try: + # Get the path from root to this node + path = self.get_path_to_root(node) + + # Create trajectory for scoring (skip root node) + trajectory = [] + for n in path[1:]: # Skip root node + trajectory.append({ + "natural_language_description": n.natural_language_description, + "action": n.action, + "feedback": n.feedback + }) + + try: + # Score the trajectory + if node.is_terminal: + score = 0 + else: + prompt = create_llm_prompt(trajectory, self.goal) + result = score_trajectory_with_openai( + prompt, + openai_client, + model=self.config.evaluation_model + ) + score = result["overall_score"] + + except Exception as e: + error_msg = f"Error scoring node {id(node)}: {str(e)}" + print(error_msg) + score = float('-inf') + + # Assign the score to the node + node.value = score + node.reward = score + + + except Exception as e: + error_msg = f"Error in node evaluation: {str(e)}" + print(error_msg) + + # shared + ## TODO: check the logic of updating value/ reward, is the input value? + def backpropagate(self, node: LATSNode, value: float) -> None: + while node: + node.visits += 1 + node.value = (node.value * (node.visits - 1) + value) / node.visits + node = node.parent + + # shared + async def simulation(self, node: LATSNode, max_depth: int = 2, num_simulations=1, websocket=None) -> tuple[float, LATSNode]: + depth = node.depth + print("print the trajectory") + print_trajectory(node) + print("print the entire tree") + print_entire_tree(self.root_node) + # if websocket: + # tree_data = self._get_tree_data() + # await self.websocket_tree_update(type="tree_update_simulation", tree_data=tree_data, websocket=websocket) + # await websocket.send_json({ + # "type": "tree_update", + # "tree": tree_data, + # "timestamp": datetime.utcnow().isoformat() + # }) + # trajectory_data = self._get_trajectory_data(node) + # await websocket.send_json({ + # "type": "trajectory_update", + # "trajectory": trajectory_data, + # "timestamp": datetime.utcnow().isoformat() + # }) + return await self.rollout(node, max_depth=max_depth, websocket=websocket) + + # refactor simulation, rollout, send_completion_request methods + # TODO: check, score as reward and then update value of the starting node? + async def rollout(self, node: LATSNode, max_depth: int = 2, websocket=None)-> tuple[float, LATSNode]: + # Reset browser state + await self._reset_browser() + path = self.get_path_to_root(node) + + print("execute path") + # Execute path + + messages = [] + trajectory = [] + + for n in path[1:]: # Skip root node + success = await playwright_step_execution( + n, + self.goal, + self.playwright_manager, + is_replay=False, + log_folder=self.config.log_folder + ) + if not success: + return 0, n + if not n.feedback: + n.feedback = await generate_feedback( + self.goal, + n.natural_language_description, + self.playwright_manager, + ) + trajectory.append({ + "action": n.action, + "feedback": n.feedback + }) + ## call the prompt agent + print("current depth: ", len(path) - 1) + print("max depth: ", self.config.max_depth) + + ## find a better name for this + trajectory, terminal_node = await self.send_completion_request(self.goal, len(path) - 1, node=n, trajectory=trajectory, websocket=websocket) + print("print the trajectory") + print_trajectory(terminal_node) + print("print the entire tree") + print_entire_tree(self.root_node) + # if websocket: + # trajectory_data = self._get_trajectory_data(node) + # await websocket.send_json({ + # "type": "trajectory_update", + # "trajectory": trajectory_data, + # "timestamp": datetime.utcnow().isoformat() + # }) + + page = await self.playwright_manager.get_page() + page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) + + messages = [{"role": "user", "content": f"Action is: {n.action}"} for n in path[1:]] + goal_finished, confidence_score = goal_finished_evaluator( + messages, + openai_client, + self.goal, + page_info['screenshot'] + ) + print("evaluating") + + score = confidence_score if goal_finished else 0 + await self.remove_simulated_trajectory(starting_node=node, terminal_node=terminal_node, websocket=websocket) + + return score, node + + + # TODO: decide whether to keep the tree update + async def send_completion_request(self, plan, depth, node, trajectory=[], websocket=None): + print("print the trajectory") + print_trajectory(node) + print("print the entire tree") + print_entire_tree(self.root_node) + if websocket: + # tree_data = self._get_tree_data() + # await websocket.send_json({ + # "type": "tree_update", + # "tree": tree_data, + # "timestamp": datetime.utcnow().isoformat() + # }) + trajectory_data = self._get_trajectory_data(node) + await websocket.send_json({ + "type": "trajectory_update", + "trajectory": trajectory_data, + "timestamp": datetime.utcnow().isoformat() + }) + + if depth >= self.config.max_depth: + return trajectory, node + + context = await self.playwright_manager.get_context() + page = await self.playwright_manager.get_page() + # Extract page information + time.sleep(3) + page_info = await extract_page_info(page, fullpage=True, log_folder=self.config.log_folder) + updated_actions = await extract_top_actions( + trajectory, self.goal, self.images, page_info, self.action_set, openai_client, + features=["axtree"], elements_filter="som", branching_factor=self.config.branching_factor, + log_folder=self.config.log_folder, fullpage=True, + action_generation_model=self.config.action_generation_model, + action_grounding_model=self.config.action_grounding_model + ) + next_action = updated_actions[0] + retry_count = self.config.retry_count if hasattr(self.config, 'retry_count') else 1 # Default retries if not set + + for attempt in range(retry_count): + try: + # Convert action to Python code + code, function_calls = self.action_set.to_python_code(next_action["action"]) + + # Locate element + if len(function_calls) == 1: + for function_name, function_args in function_calls: + extracted_number = parse_function_args(function_args) + element = await locate_element(page, extracted_number) + next_action["element"] = element + + # Execute action + await execute_action(next_action, self.action_set, page, context, self.goal, page_info['interactive_elements'], + self.config.log_folder) + feedback = await capture_post_action_feedback(page, next_action, self.goal, self.config.log_folder) + trajectory.append({'action': next_action['action'], 'feedback': feedback}) + action_str = next_action["action"] + + print(f"The action is: {action_str} - The action result is: {feedback}") + + # Check if goal is finished + messages = [{"role": "system", "content": "The goal is {}, Is the overall goal finished?".format(self.goal)}] + for item in trajectory: + action = item['action'] + feedback = item['feedback'] + messages.append({"role": "user", "content": 'action is: {}'.format(action)}) + messages.append({"role": "user", "content": 'action feedback is: {}'.format(feedback)}) + + goal_finished = await is_goal_finished(messages, openai_client) + + new_node = LATSNode( + natural_language_description=next_action["natural_language_description"], + action=next_action["action"], + prob=next_action["prob"], + element=next_action["element"], + goal=node.goal, + parent=node + ) + ## parent node, new node, for this, the link can be different type, indicating, this is simulated + ## we don't have node.children.append(child) + + ## new node simulated + await self.websocket_node_simulated(new_node, node, websocket=websocket) + + if goal_finished: + return trajectory, new_node + + return await self.send_completion_request(plan, depth + 1, new_node, trajectory, websocket) + + except Exception as e: + print(f"Attempt {attempt + 1} failed with error: {e}") + if attempt + 1 == retry_count: + print("Max retries reached. Skipping this step and retrying the whole request.") + # Retry the entire request from the same state + return await self.send_completion_request(plan, depth, node, trajectory, websocket) + + # If all retries and retries of retries fail, return the current trajectory and node + return trajectory, node + + + + # # simple search agent generate children method + # TODO: clean up generate children, no need to put so much information in the websocket + async def generate_children(self, node: LATSNode, websocket=None) -> list[dict]: + # Reset browser and get live URL + live_browser_url, session_id = await self._reset_browser(websocket) + path = self.get_path_to_root(node) + + # Execute path + for n in path[1:]: # Skip root node + # if websocket: + # await websocket.send_json({ + # "type": "replaying_action", + # "node_id": id(n), + # "action": n.action, + # "timestamp": datetime.utcnow().isoformat() + # }) + + success = await playwright_step_execution( + n, + self.goal, + self.playwright_manager, + is_replay=False, + log_folder=self.config.log_folder + ) + if not success: + n.is_terminal = True + # if websocket: + # await websocket.send_json({ + # "type": "replay_failed", + # "node_id": id(n), + # "timestamp": datetime.utcnow().isoformat() + # }) + return [] + + if not n.feedback: + n.feedback = await generate_feedback( + self.goal, + n.natural_language_description, + self.playwright_manager, + ) + # if websocket: + # await websocket.send_json({ + # "type": "feedback_generated", + # "node_id": id(n), + # "feedback": n.feedback, + # "timestamp": datetime.utcnow().isoformat() + # }) + + time.sleep(3) + page = await self.playwright_manager.get_page() + page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) + + messages = [{"role": "user", "content": f"Action is: {n.action}"} for n in path[1:]] + + # if websocket: + # await websocket.send_json({ + # "type": "generating_actions", + # "node_id": id(node), + # "timestamp": datetime.utcnow().isoformat() + # }) + + next_actions = await extract_top_actions( + [{"natural_language_description": n.natural_language_description, "action": n.action, "feedback": n.feedback} for n in path[1:]], + self.goal, + self.images, + page_info, + self.action_set, + openai_client, + features=self.config.features, + elements_filter=self.config.elements_filter, + branching_factor=self.config.branching_factor, + log_folder=self.config.log_folder, + fullpage=self.config.fullpage, + action_generation_model=self.config.action_generation_model, + action_grounding_model=self.config.action_grounding_model + ) + + children = [] + for action in next_actions: + if action["action"] == "FINISH": + if action["prob"] > 0.2: + node.is_terminal = True + if websocket: + await websocket.send_json({ + "type": "node_terminal", + "node_id": id(node), + "reason": "finish_action", + "timestamp": datetime.utcnow().isoformat() + }) + return [] + continue + + page = await self.playwright_manager.get_page() + code, function_calls = self.action_set.to_python_code(action["action"]) + + if len(function_calls) == 1: + try: + for function_name, function_args in function_calls: + extracted_number = parse_function_args(function_args) + element = await locate_element(page, extracted_number) + action["element"] = element + except Exception as e: + action["element"] = None + # if websocket: + # await websocket.send_json({ + # "type": "element_location_failed", + # "action": action["action"], + # "error": str(e), + # "timestamp": datetime.utcnow().isoformat() + # }) + children.append(action) + + if not children: + node.is_terminal = True + # if websocket: + # await websocket.send_json({ + # "type": "node_terminal", + # "node_id": id(node), + # "reason": "no_valid_actions", + # "timestamp": datetime.utcnow().isoformat() + # }) + + return children \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/lats_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/lats_agent.py index 5e70f28..24027db 100644 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/lats_agent.py +++ b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/lats_agent.py @@ -1,112 +1,14 @@ -"""Language-based Action Tree Search (LATS) Agent implementation.""" - -import time from typing import Any, Optional, Tuple, List -import os -from openai import OpenAI from datetime import datetime -import aiohttp from dotenv import load_dotenv load_dotenv() -from .lats_node import LATSNode, Observation -from ...core_async.config import AgentConfig - -from ...webagent_utils_async.action.highlevel import HighLevelActionSet -from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright from .tree_vis import RED, better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree -from .trajectory_score import create_llm_prompt, score_trajectory_with_openai -from ...replay_async import generate_feedback, playwright_step_execution, locate_element_from_action -from ...webagent_utils_async.browser_env.observation import extract_page_info, observe_features -from ...webagent_utils_async.action.prompt_functions import generate_actions_with_observation -from ...webagent_utils_async.evaluation.feedback import generate_feedback_with_screenshot -from ...webagent_utils_async.utils.utils import urls_to_images - - -from ...webagent_utils_async.utils.utils import parse_function_args, locate_element -from ...evaluation_async.evaluators import goal_finished_evaluator -from ...webagent_utils_async.action.prompt_functions import extract_top_actions -from ...webagent_utils_async.browser_env.observation import extract_page_info from .lats_node import LATSNode -from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree -from .trajectory_score import create_llm_prompt, score_trajectory_with_openai -from ...webagent_utils_async.action.utils import execute_action -from ...webagent_utils_async.action.prompt_functions import extract_top_actions, is_goal_finished -from ...webagent_utils_async.browser_env.observation import extract_page_info -from ...webagent_utils_async.evaluation.feedback import capture_post_action_feedback - -openai_client = OpenAI() - -## TODO: add best_path_update - -class LATSAgent: - """ - Language-based Action Tree Search Agent implementation. - - This agent uses MCTS-like tree search to find optimal action sequences for web navigation tasks. - - Attributes: - starting_url (str): The initial URL to start from - model_name (str): Name of the language model to use - goal (str): The goal state to achieve - playwright_manager (PlaywrightManager): Manager for browser automation - num_simulations (int): Number of simulations to run - exploration_weight (float): Exploration vs exploitation trade-off parameter - """ - - def __init__( - self, - starting_url: str, - messages: list[dict[str, Any]], - goal: str, - images: list, - playwright_manager: AsyncPlaywrightManager, - config: AgentConfig, - ): - """Initialize the LATS Agent.""" - # no action grounding model, just one step to geneate both action natural language description and action at the same time - self.starting_url = starting_url - self.goal = goal - self.image_urls = images - self.images = urls_to_images(self.image_urls) - - self.messages = messages - if len(images) == 0: - self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) - else: - self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) - - self.playwright_manager = playwright_manager - - self.config = config - - # set bid, only click, fill, hoover, drag and draw - self.agent_type = ["bid"] - self.action_set = HighLevelActionSet( - subsets=self.agent_type, strict=False, multiaction=False, demo_mode="default" - ) - self.root_node = LATSNode( - natural_language_description=None, - action=None, - prob=None, - element=None, - goal=self.goal, - parent=None - ) - self.goal_finished = False - self.result_node = None - self.reset_url = os.environ["ACCOUNT_RESET_URL"] +from .base_agent import BaseAgent +class LATSAgent(BaseAgent): async def run(self, websocket=None) -> list[LATSNode]: - """ - Run the LATS search and return the best path found. - - Args: - websocket: Optional WebSocket connection for sending updates - - Returns: - list[LATSNode]: Best path from root to terminal node - """ if websocket: await websocket.send_json({ "type": "search_status", @@ -117,792 +19,95 @@ async def run(self, websocket=None) -> list[LATSNode]: best_node = await self.lats_search(websocket) print_trajectory(best_node) - - if websocket: - # trajectory_data = self._get_trajectory_data(best_node) - # await websocket.send_json({ - # "type": "trajectory_update", - # "trajectory": trajectory_data, - # "timestamp": datetime.utcnow().isoformat() - # }) - # TODO: use score instead of reward to determine success - await websocket.send_json({ - "type": "search_complete", - "status": "success" if best_node.reward == 1 else "partial_success", - "score": best_node.reward, - "path": best_node.get_trajectory(), - "timestamp": datetime.utcnow().isoformat() - }) - - return best_node.get_trajectory() - - async def lats_search(self, websocket=None) -> LATSNode: - """ - Perform the main LATS search algorithm. - - Args: - websocket: Optional WebSocket connection for sending updates - - Returns: - LATSNode: Best terminal node found - """ - print(f"") - print(f"{GREEN}START SEARCH{RESET}") - - terminal_nodes = [] - - for i in range(self.config.iterations): - if websocket: - await websocket.send_json({ - "type": "iteration_start", - "iteration": i + 1, - "timestamp": datetime.utcnow().isoformat() - }) - - print(f"") - print(f"") - print(f"Iteration {i + 1}...") - - # Step 1: Selection with websocket update - if websocket: - await websocket.send_json({ - "type": "step_start", - "step": 1, - "step_name": "selection", - "iteration": i + 1, - "timestamp": datetime.utcnow().isoformat() - }) - - node = self.select_node(self.root_node) - if websocket: - await websocket.send_json({ - "type": "node_selected", - "node_id": id(node), - "timestamp": datetime.utcnow().isoformat() - }) - - if node is None: - print("All paths lead to terminal nodes with reward 0. Ending search.") - break - - print(f"{GREEN}Tree:{RESET}") - better_print(node=self.root_node, selected_node=node) - print(f"") - - # Step 2: Expansion with websocket update - if websocket: - await websocket.send_json({ - "type": "step_start", - "step": 2, - "step_name": "expansion", - "iteration": i + 1, - "timestamp": datetime.utcnow().isoformat() - }) - - await self.expand_node(node, websocket) - - while node is not None and node.is_terminal and not self.goal_finished: - print(f"Depth limit node found at iteration {i + 1}, reselecting...") - node = self.select_node(self.root_node) - if node is not None: - await self.expand_node(node, websocket) - - if node is None: - # all the nodes are terminal, stop the search - print(f"{RED}All nodes are terminal, stopping search{RESET}") - break - - if self.goal_finished: - print(f"{RED}Goal finished, stopping search{RESET}") - break - - print(f"{GREEN}Tree:{RESET}") - better_print(self.root_node) - print(f"") - tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - # Step 3: Evaluation - print(f"") - print(f"{GREEN}Step 3: evaluation{RESET}") - if websocket: - await websocket.send_json({ - "type": "step_start", - "step": 3, - "step_name": "evaluation", - "iteration": i + 1, - "timestamp": datetime.utcnow().isoformat() - }) - await self.evaluate_node(node) + async def lats_search(self, websocket=None): + terminal_nodes = [] - print(f"{GREEN}Tree:{RESET}") - better_print(self.root_node) - print(f"") - ## send tree update, since evaluation is added to the tree - if websocket: + for i in range(self.config.iterations): + await self.websocket_iteration_start(i, websocket=websocket) + + print(f"Iteration {i}...") + + # Step 1: Node Selection + ## TODO: move websocket node selection into node_selection method + print(f"{GREEN}Step 1: node selection{RESET}") + await self.websocket_step_start(step=1, step_name="node_selection", websocket=websocket) + node = await self.node_selection(self.root_node) + await self.websocket_node_selection(node, websocket=websocket) + + if node is None: + print("All paths lead to terminal nodes with reward 0. Ending search.") + break + + # Step 2: Node Expansion + print(f"{GREEN}Step 2: node expansion{RESET}") + await self.websocket_step_start(step=2, step_name="node_expansion", websocket=websocket) + await self.node_expansion(node, websocket) + if node is None: + # all the nodes are terminal, stop the search + print(f"{RED}All nodes are terminal, stopping search{RESET}") + break tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - - - # Step 4: Simulation - print(f"{GREEN}Step 4: simulation{RESET}") - if websocket: - await websocket.send_json({ - "type": "step_start", - "step": 4, - "step_name": "simulation", - "iteration": i + 1, - "timestamp": datetime.utcnow().isoformat() - }) - ## always = 1 - reward, terminal_node = await self.simulate(max(node.children, key=lambda child: child.value), max_depth=self.config.max_depth, num_simulations=1, websocket=websocket) - terminal_nodes.append(terminal_node) - - if reward == 1: - return terminal_node + if websocket: + await self.websocket_tree_update(type="tree_update_node_expansion", websocket=websocket, tree_data=tree_data) + else: + print_entire_tree(self.root_node) - # Step 5: Backpropagation - print(f"{GREEN}Step 5: backpropagation{RESET}") - if websocket: - await websocket.send_json({ - "type": "step_start", - "step": 5, - "step_name": "backpropagation", - "timestamp": datetime.utcnow().isoformat() - }) - self.backpropagate(terminal_node, reward) - print(f"{GREEN}Tree:{RESET}") - better_print(self.root_node) - print(f"") - # Send tree update after each iteration - if websocket: + # Step 3: Evaluation + print(f"{GREEN}Step 3: node chilren evaluation{RESET}") + await self.websocket_step_start(step=3, step_name="node_children_evaluation", websocket=websocket) + await self.node_children_evaluation(node) tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - - # Find best node - all_nodes_list = collect_all_nodes(self.root_node) - all_nodes_list.extend(terminal_nodes) - - ## temp change: if reward is the same, choose the deeper node - best_child = max(all_nodes_list, key=lambda x: (x.reward, x.depth)) - - if best_child.reward == 1: - print("Successful trajectory found") - else: - print("Unsuccessful trajectory found") - await self.playwright_manager.close() - - return best_child if best_child is not None else self.root_node - - def select_node(self, node: LATSNode) -> Optional[LATSNode]: - """ - Select a node for expansion using UCT. - - Args: - node: Root node to start selection from - - Returns: - Optional[LATSNode]: Selected node or None if all paths exhausted - """ - if node.is_terminal: - return None - return node.get_best_leaf() - - async def expand_node(self, node: LATSNode, websocket=None) -> None: - """ - Expand a node by generating its children. - - Args: - node: Node to expand - websocket: Optional WebSocket connection for sending updates - """ - if websocket: - await websocket.send_json({ - "type": "node_expanding", - "node_id": id(node), - "timestamp": datetime.utcnow().isoformat() - }) - - children = await self.generate_children(node, websocket) - - for child in children: - node.add_child(child) - if websocket: - await websocket.send_json({ - "type": "node_created", - "node_id": id(child), - "parent_id": id(node), - "action": child.action, - "description": child.natural_language_description, - "timestamp": datetime.utcnow().isoformat() - }) - - if children and children[0].goal_finish_feedback.is_done: - self.set_goal_finished(children[0]) - if websocket: - await websocket.send_json({ - "type": "goal_finished", - "node_id": id(children[0]), - "timestamp": datetime.utcnow().isoformat() - }) - return - - node.check_terminal() - - async def evaluate_node(self, node: LATSNode) -> None: - """ - Evaluate a node using LLM scoring. - - Args: - node: Node to evaluate - - Returns: - float: Evaluation score - """ - scores = [] - print(f"{GREEN}-- total {len(node.children)} children to evaluate:{RESET}") - for i, child in enumerate(node.children): - print(f"{GREEN}--- evaluating child {i+1}...{RESET}") - if child.is_terminal: - score = 0 - else: - trajectory = child.get_trajectory() - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, self.config.evaluation_model, child.observation.image) - score = result["overall_score"] - scores.append(score) - - for child, score in zip(node.children, scores): - child.value = score - child.reward = score - - ## TODO: make number of simulations configurable - async def simulate(self, node: LATSNode, max_depth: int = 2, num_simulations=1, websocket=None) -> tuple[float, LATSNode]: - """ - Perform a rollout simulation from a node. - - Args: - node: Starting node for rollout - max_depth: Maximum depth to simulate to - - Returns: - tuple[float, LATSNode]: (Score of the rollout, Terminal node reached) - """ - depth = node.depth - print("print the trajectory") - print_trajectory(node) - print("print the entire tree") - print_entire_tree(self.root_node) - if websocket: - tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - trajectory_data = self._get_trajectory_data(node) - await websocket.send_json({ - "type": "trajectory_update", - "trajectory": trajectory_data, - "timestamp": datetime.utcnow().isoformat() - }) - return await self.rollout(node, max_depth=max_depth, websocket=websocket) - - async def send_completion_request(self, plan, depth, node, trajectory=[], websocket=None): - print("print the trajectory") - print_trajectory(node) - print("print the entire tree") - print_entire_tree(self.root_node) - if websocket: - # tree_data = self._get_tree_data() - # await websocket.send_json({ - # "type": "tree_update", - # "tree": tree_data, - # "timestamp": datetime.utcnow().isoformat() - # }) - trajectory_data = self._get_trajectory_data(node) - await websocket.send_json({ - "type": "trajectory_update", - "trajectory": trajectory_data, - "timestamp": datetime.utcnow().isoformat() - }) - - if depth >= self.config.max_depth: - return trajectory, node - - context = await self.playwright_manager.get_context() - page = await self.playwright_manager.get_page() - # Extract page information - time.sleep(3) - page_info = await extract_page_info(page, fullpage=True, log_folder=self.config.log_folder) - updated_actions = await extract_top_actions( - trajectory, self.goal, self.images, page_info, self.action_set, openai_client, - features=["axtree"], elements_filter="som", branching_factor=self.config.branching_factor, - log_folder=self.config.log_folder, fullpage=True, - action_generation_model=self.config.action_generation_model, - action_grounding_model=self.config.action_grounding_model - ) - next_action = updated_actions[0] - retry_count = self.config.retry_count if hasattr(self.config, 'retry_count') else 1 # Default retries if not set - - for attempt in range(retry_count): - try: - # Convert action to Python code - code, function_calls = self.action_set.to_python_code(next_action["action"]) - - # Locate element - if len(function_calls) == 1: - for function_name, function_args in function_calls: - extracted_number = parse_function_args(function_args) - element = await locate_element(page, extracted_number) - next_action["element"] = element - - # Execute action - await execute_action(next_action, self.action_set, page, context, self.goal, page_info['interactive_elements'], - self.config.log_folder) - feedback = await capture_post_action_feedback(page, next_action, self.goal, self.config.log_folder) - trajectory.append({'action': next_action['action'], 'feedback': feedback}) - action_str = next_action["action"] - - print(f"The action is: {action_str} - The action result is: {feedback}") - - # Check if goal is finished - messages = [{"role": "system", "content": "The goal is {}, Is the overall goal finished?".format(self.goal)}] - for item in trajectory: - action = item['action'] - feedback = item['feedback'] - messages.append({"role": "user", "content": 'action is: {}'.format(action)}) - messages.append({"role": "user", "content": 'action feedback is: {}'.format(feedback)}) - - goal_finished = await is_goal_finished(messages, openai_client) - - new_node = LATSNode( - natural_language_description=next_action["natural_language_description"], - action=next_action["action"], - prob=next_action["prob"], - element=next_action["element"], - goal=node.goal, - parent=node - ) - - if goal_finished: - return trajectory, new_node - - return await self.send_completion_request(plan, depth + 1, new_node, trajectory, websocket) - - except Exception as e: - print(f"Attempt {attempt + 1} failed with error: {e}") - if attempt + 1 == retry_count: - print("Max retries reached. Skipping this step and retrying the whole request.") - # Retry the entire request from the same state - return await self.send_completion_request(plan, depth, node, trajectory, websocket) - - # If all retries and retries of retries fail, return the current trajectory and node - return trajectory, node - - - async def rollout(self, node: LATSNode, max_depth: int = 2, websocket=None)-> tuple[float, LATSNode]: - # Reset browser state - await self._reset_browser() - path = self.get_path_to_root(node) - - print("execute path") - # Execute path - - messages = [] - trajectory = [] - - for n in path[1:]: # Skip root node - success = await playwright_step_execution( - n, - self.goal, - self.playwright_manager, - is_replay=False, - log_folder=self.config.log_folder - ) - if not success: - return 0, n - if not n.feedback: - n.feedback = await generate_feedback( - self.goal, - n.natural_language_description, - self.playwright_manager, - ) - trajectory.append({ - "action": n.action, - "feedback": n.feedback - }) - ## call the prompt agent - print("current depth: ", len(path) - 1) - print("max depth: ", self.config.max_depth) - trajectory, node = await self.send_completion_request(self.goal, len(path) - 1, node=n, trajectory=trajectory, websocket=websocket) - print("print the trajectory") - print_trajectory(node) - print("print the entire tree") - print_entire_tree(self.root_node) - if websocket: - # tree_data = self._get_tree_data() - # await websocket.send_json({ - # "type": "tree_update", - # "tree": tree_data, - # "timestamp": datetime.utcnow().isoformat() - # }) - trajectory_data = self._get_trajectory_data(node) - await websocket.send_json({ - "type": "trajectory_update", - "trajectory": trajectory_data, - "timestamp": datetime.utcnow().isoformat() - }) - - page = await self.playwright_manager.get_page() - page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) - - messages = [{"role": "user", "content": f"Action is: {n.action}"} for n in path[1:]] - goal_finished, confidence_score = goal_finished_evaluator( - messages, - openai_client, - self.goal, - page_info['screenshot'] - ) - print("evaluating") - - score = confidence_score if goal_finished else 0 - - return score, node - - def backpropagate(self, node: LATSNode, value: float) -> None: - """ - Backpropagate values through the tree. - - Args: - node: Current node to start backpropagation from - value: Value to propagate upwards - """ - while node: - node.visits += 1 - node.value = (node.value * (node.visits - 1) + value) / node.visits - node = node.parent - - async def _reset_browser(self, websocket=None) -> Optional[str]: - """Reset the browser to initial state and return the live browser URL if available.""" - await self.playwright_manager.close() - - ## reset account using api-based account reset - if self.config.account_reset: - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "started", - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Use aiohttp instead of curl - async with aiohttp.ClientSession() as session: - headers = {'Connection': 'close'} # Similar to curl -N - async with session.get(self.reset_url, headers=headers) as response: - if response.status == 200: - data = await response.json() - print(f"Account reset successful: {data}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "success", - "data": data, - "timestamp": datetime.utcnow().isoformat() - }) - else: - error_msg = f"Account reset failed with status {response.status}" - print(error_msg) - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - except Exception as e: - print(f"Error during account reset: {e}") if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Create new playwright manager - self.playwright_manager = await setup_playwright( - storage_state=self.config.storage_state, - headless=self.config.headless, - mode=self.config.browser_mode - ) - page = await self.playwright_manager.get_page() - live_browser_url = None - if self.config.browser_mode == "browserbase": - live_browser_url = await self.playwright_manager.get_live_browser_url() - session_id = await self.playwright_manager.get_session_id() - else: - session_id = None - live_browser_url = None - await page.goto(self.starting_url, wait_until="networkidle") - - # Send success message if websocket is provided - if websocket: - if self.config.storage_state: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": f"Browser successfully initialized with storage state file: {self.config.storage_state}", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) + await self.websocket_tree_update(type="tree_update_node_children_evaluation", websocket=websocket, tree_data=tree_data) else: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": "Browser successfully initialized", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - - return live_browser_url, session_id - except Exception as e: - print(f"Error setting up browser: {e}") - if websocket: - await websocket.send_json({ - "type": "browser_setup", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - return None, None - - async def observe(self) -> None: - page = await self.playwright_manager.get_page() - page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) - feature_text = await observe_features( - page_info, - features=self.config.features, - elements_filter=self.config.elements_filter, - log_folder=self.config.log_folder, - fullpage=self.config.fullpage - ) - screenshot = page_info['screenshot_som'] - observation = Observation( - text=feature_text, - image=screenshot, - ) - return observation - - async def execute_action_trajectory(self, action_trajectory: list[dict]) -> None: - if not action_trajectory: - return True + print("after evaluation") + print_entire_tree(self.root_node) + + + # Step 4: Simulation + print(f"{GREEN}Step 4: simulation{RESET}") + await self.websocket_step_start(step=4, step_name="simulation", websocket=websocket) + selected_node = max(node.children, key=lambda child: child.value) + await self.websocket_node_selection(selected_node, websocket=websocket, type="node_selected_for_simulation") + reward, terminal_node = await self.simulation(selected_node, max_depth=self.config.max_depth, num_simulations=1, websocket=websocket) + terminal_nodes.append(terminal_node) + await self.websocket_simulation_result(reward, terminal_node, websocket=websocket) + + if reward == 1: + return terminal_node + + # Step 5: Backpropagation + print(f"{GREEN}Step 5: backpropagation{RESET}") + await self.websocket_step_start(step=5, step_name="backpropagation", websocket=websocket) + self.backpropagate(terminal_node, reward) + tree_data = self._get_tree_data() + if websocket: + await self.websocket_tree_update(type="tree_update_node_backpropagation", websocket=websocket, tree_data=tree_data) + else: + print("after backpropagation") + print_entire_tree(self.root_node) - await self._reset_browser() - print("taking action trajectory") - for action_data in action_trajectory: - print("action_data") - print(action_data) - - # Convert action_data dict to LATSNode - temp_node = LATSNode( - natural_language_description=action_data["natural_language_description"], - action=action_data["action"], - prob=0, - element=action_data["element"], - goal=self.goal, - parent=None # No parent needed for temporary node - ) + # Find best node + all_nodes_list = collect_all_nodes(self.root_node) + all_nodes_list.extend(terminal_nodes) - success = await playwright_step_execution( - temp_node, # Pass the node instead of raw action_data - self.goal, - self.playwright_manager, - is_replay=False, - log_folder=self.config.log_folder - ) + ## temp change: if reward is the same, choose the deeper node + best_child = max(all_nodes_list, key=lambda x: (x.reward, x.depth)) - if not success: - return False - return True - - async def generate_candidate_actions(self, node: LATSNode) -> list[dict]: - trajectory = node.get_trajectory() - action_trajectory = node.get_action_trajectory() - await self.execute_action_trajectory(action_trajectory) - observation = await self.observe() - # only root node has no observation at this point - if node.observation is None: - node.observation = observation - actions = await generate_actions_with_observation( - trajectory, - self.goal, - self.images, - openai_client=openai_client, - action_set=self.action_set, - feature_text=observation.text, - screenshot=observation.image, - branching_factor=self.config.branching_factor, - log_folder=self.config.log_folder, - action_generation_model=self.config.action_generation_model, - ) - - page = await self.playwright_manager.get_page() - valid_actions = [] - for action_data in actions: - if action_data["action"] == "FINISH": - continue - - is_bid_action, element_data = await locate_element_from_action(page, action_data["action"]) - if is_bid_action and not element_data: - continue - - action_data['element'] = element_data - valid_actions.append(action_data) - return valid_actions - - async def generate_children(self, node: LATSNode, websocket=None) -> list[LATSNode]: - print(f"{GREEN}-- generating candidate actions...{RESET}") - - children = [] - - action_trajectory = node.get_action_trajectory() - candidate_actions = await self.generate_candidate_actions(node) - print(f"{GREEN}-- generated {len(candidate_actions)} actions{RESET}") - for action_data in candidate_actions: - print(f"{GREEN}--- {action_data['action']}{RESET}") - print(f"{GREEN}--- {action_data['natural_language_description']}{RESET}") - - print(f"") - print(f"{GREEN}-- executing candidate trajectories{RESET}") - for i, action_data in enumerate(candidate_actions): - - candidate_action_trajectory = action_trajectory + [action_data] - print(f"{GREEN}--- trajectory {i+1}:{RESET}") - for action in candidate_action_trajectory: - print(f"{GREEN}---- {action['action']}{RESET}") - print(f"{GREEN}---- {action['natural_language_description']}{RESET}") - executed_successfully = await self.execute_action_trajectory(candidate_action_trajectory) - if not executed_successfully: - # not executed successfully, give up this candidate - print(f"{RED}--- failed to execute action trajectory{RESET}") - continue - - observation = await self.observe() - print(f"{GREEN}--- generate feedback...{RESET}") - feedback = await generate_feedback_with_screenshot( - self.goal, - action_data["natural_language_description"], - observation.image, - model=self.config.feedback_model, - ) - print(f"feedback: is_done: {feedback.is_done}, explanation: {feedback.explanation}") - - child = LATSNode( - natural_language_description=action_data["natural_language_description"], - action=action_data["action"], - prob=action_data["prob"], - element=action_data["element"], - goal=node.goal, - ) - child.observation = observation - child.goal_finish_feedback = feedback - if feedback.is_done: - # the goal is finished, stop the search - return [child] - - children.append(child) - - if node.depth + 1 >= self.config.max_depth: - child.is_terminal = True - - return children - - def set_goal_finished(self, node: LATSNode) -> None: - self.goal_finished = True - self.result_node = node - - def get_path_to_root(self, node: LATSNode) -> List[LATSNode]: - path = [] - current = node - while current: - path.append(current) - current = current.parent - return list(reversed(path)) + if best_child.reward == 1: + print("Successful trajectory found") + else: + print("Unsuccessful trajectory found") + await self.playwright_manager.close() + + return best_child if best_child is not None else self.root_node - def _get_tree_data(self): - """Get tree data in a format suitable for visualization""" - nodes = collect_all_nodes(self.root_node) - tree_data = [] - - for node in nodes: - node_data = { - "id": id(node), - "parent_id": id(node.parent) if node.parent else None, - "action": node.action if node.action else "ROOT", - "description": node.natural_language_description, - "depth": node.depth, - "is_terminal": node.is_terminal, - "value": node.value, - "visits": node.visits, - "feedback": node.feedback, - "reward": node.reward - } - tree_data.append(node_data) - - return tree_data - - def _get_trajectory_data(self, terminal_node: LATSNode): - """Get trajectory data in a format suitable for visualization - - Args: - terminal_node: The leaf node to start the trajectory from - - Returns: - list: List of node data dictionaries representing the trajectory - """ - trajectory_data = [] - path = [] - - # Collect path from terminal to root - current = terminal_node - while current is not None: - path.append(current) - current = current.parent - - # Process nodes in order from root to terminal - for level, node in enumerate(reversed(path)): - node_data = { - "id": id(node), - "level": level, - "action": node.action if node.action else "ROOT", - "description": node.natural_language_description, - "visits": node.visits, - "value": float(f"{node.value:.3f}") if hasattr(node, 'value') else None, - "reward": float(f"{node.reward:.3f}") if hasattr(node, 'reward') else None, - "is_terminal": node.is_terminal, - "feedback": node.feedback if hasattr(node, 'feedback') else None, - "is_root": not hasattr(node, 'parent') or node.parent is None, - "is_terminal_node": node == terminal_node - } - trajectory_data.append(node_data) - - return trajectory_data + async def node_selection(self, node: LATSNode, websocket=None) -> Optional[LATSNode]: + if node.is_terminal: + return None + ## TODO; move this node selection logic from LATSNode to LATSAgent + selected_node = node.get_best_leaf() + await self.websocket_node_selection(selected_node, websocket=websocket) + return selected_node \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/mcts_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/mcts_agent.py index f1cb9ea..87ce43d 100644 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/mcts_agent.py +++ b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/mcts_agent.py @@ -1,1008 +1,20 @@ -import logging -import time -from typing import Any, Dict, List, Optional -from collections import deque +from typing import Any, Optional, Tuple, List from datetime import datetime -import os -import json -import subprocess - -from openai import OpenAI from dotenv import load_dotenv load_dotenv() -import aiohttp - -from ...core_async.config import AgentConfig -from ...webagent_utils_async.action.highlevel import HighLevelActionSet -from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright -from ...webagent_utils_async.utils.utils import parse_function_args, locate_element -from ...evaluation_async.evaluators import goal_finished_evaluator -from ...replay_async import generate_feedback, playwright_step_execution -from ...webagent_utils_async.action.prompt_functions import extract_top_actions -from ...webagent_utils_async.browser_env.observation import extract_page_info +from .tree_vis import RED, better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree from .lats_node import LATSNode -from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree -from .trajectory_score import create_llm_prompt, score_trajectory_with_openai -from ...webagent_utils_async.utils.utils import urls_to_images - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -openai_client = OpenAI() - -class MCTSAgent: - def __init__( - self, - starting_url: str, - messages: list[dict[str, Any]], - goal: str, - images: list, - playwright_manager: AsyncPlaywrightManager, - config: AgentConfig, - ): - self.starting_url = starting_url - self.goal = goal - self.image_urls = images - self.images = urls_to_images(self.image_urls) - self.messages = messages - self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) - - self.playwright_manager = playwright_manager - - self.config = config - - self.agent_type = ["bid", "nav", "file", "select_option"] - self.action_set = HighLevelActionSet( - subsets=self.agent_type, strict=False, multiaction=True, demo_mode="default" - ) - self.root_node = LATSNode( - natural_language_description=None, - action=None, - prob=None, - element=None, - goal=self.goal, - parent=None - ) - self.reset_url = os.environ["ACCOUNT_RESET_URL"] - - async def run(self, websocket=None) -> List[Dict[str, Any]]: - """ - Run the MCTS algorithm based on configuration. - - Args: - websocket: Optional WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - logger.info("Starting Reflective MCTS algorithm") - if websocket: - return await self.rmcts_with_websocket(websocket) - else: - return await self.rmcts() - - async def rmcts(self) -> List[Dict[str, Any]]: - """ - Performs Monte Carlo Tree Search starting from the root node. - Uses GPT-4 for node selection and reflection-based backpropagation. - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - max_iterations = self.config.iterations # Use configured number of iterations - - try: - # Initial browser setup - live_browser_url, session_id = await self._reset_browser() - - for iteration in range(max_iterations): - logger.info(f"\n{'='*50}") - logger.info(f"RMCTS Iteration {iteration + 1}/{max_iterations}") - logger.info(f"{'='*50}\n") - - # Selection: Use GPT-4 to select a promising path - current_node = self.root_node - path = [current_node] - selection_depth = 0 - - while current_node.children and not current_node.is_terminal: - logger.info(f"\nSelection Step {selection_depth + 1}:") - logger.info(f"Current node action: {current_node.action}") - logger.info(f"Number of children: {len(current_node.children)}") - - # Get trajectory for GPT-4 to evaluate - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - # Create prompt for GPT-4 to select next node - prompt = f"""Given the current trajectory and goal, select the most promising child node to explore next. - Consider the overall progress, efficiency, and likelihood of success. - - Goal: {self.goal} - - Current Trajectory: - {json.dumps(trajectory, indent=2)} - - Available Children: - {json.dumps([{ - 'action': child.action, - 'description': child.natural_language_description, - 'visits': child.visits, - 'value': child.value - } for child in current_node.children], indent=2)} - - Return a JSON response with: - {{ - "selected_child_index": int, # Index of the selected child - "explanation": str # Brief explanation of the selection - }}""" - - try: - response = openai_client.chat.completions.create( - model=self.config.evaluation_model, - messages=[ - {"role": "system", "content": "You are an expert at selecting promising paths in a search tree."}, - {"role": "user", "content": prompt} - ], - response_format={"type": "json_object"} - ) - - selection = json.loads(response.choices[0].message.content) - selected_index = selection["selected_child_index"] - - if 0 <= selected_index < len(current_node.children): - current_node = current_node.children[selected_index] - path.append(current_node) - logger.info(f"Selected child {selected_index + 1}: {current_node.action}") - logger.info(f"Selection explanation: {selection['explanation']}") - else: - logger.warning(f"Invalid child index {selected_index}, breaking selection") - break - - except Exception as e: - logger.error(f"Error in node selection: {str(e)}") - break - - selection_depth += 1 - - # Expansion: Expand the selected node if possible - if not current_node.is_terminal and current_node.depth < self.config.max_depth: - logger.info(f"\nExpansion Step:") - logger.info(f"Expanding node: {current_node.action}") - - try: - await self.expand(current_node) - logger.info(f"Successfully expanded node with {len(current_node.children)} children") - except Exception as e: - logger.error(f"Error expanding node: {str(e)}") - current_node.is_terminal = True - # Expansion Step: Expand the selected node if possible - if not current_node.is_terminal and current_node.depth < self.config.max_depth: - logger.info(f"\nExpansion Step:") - logger.info(f"Expanding node: {current_node.action}") - - expansion_success = await self.expand(current_node, None) - if not expansion_success: - # No children were generated; backtrack if possible. - if len(path) > 1: - logger.info("Backtracking due to expansion failure (no children generated).") - path.pop() # Remove the current dead-end node. - current_node = path[-1] # Set current_node to its parent. - else: - logger.warning("Expansion failed at root; no further backtracking possible.") - break - else: - logger.info(f"Successfully expanded node with {len(current_node.children)} children") - - # Simulation: Evaluate the current path - logger.info(f"\nSimulation Step:") - logger.info(f"Evaluating path of length {len(path) - 1}") - - try: - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - - logger.info(f"Simulation Results:") - logger.info(f"Overall Score: {score:.3f}") - logger.info(f"Efficiency Score: {result['efficiency_score']:.3f}") - logger.info(f"Accuracy Score: {result['accuracy_score']:.3f}") - logger.info(f"Robustness Score: {result['robustness_score']:.3f}") - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - logger.info(f"\nNew best path found!") - logger.info(f"Previous best score: {best_score:.3f}") - logger.info(f"New best score: {score:.3f}") - - # Reflection-based backpropagation - if score < 0.75: # If the path is not satisfactory - logger.info(f"\nReflection Step (Score {score:.3f} < 0.75):") - - # Generate reflection prompt - reflection_prompt = f"""Analyze the current trajectory and suggest improvements. - - Goal: {self.goal} - - Current Trajectory: - {json.dumps(trajectory, indent=2)} - - Score: {score} - - Return a JSON response with: - {{ - "backtrack_to_step": int, # Which step to backtrack to (0-based index) - "reason": str, # Why backtrack to this step - "suggested_improvements": [str] # List of suggested improvements - }}""" - - try: - reflection = openai_client.chat.completions.create( - model=self.config.evaluation_model, - messages=[ - {"role": "system", "content": "You are an expert at analyzing and improving search trajectories."}, - {"role": "user", "content": reflection_prompt} - ], - response_format={"type": "json_object"} - ) - - reflection_result = json.loads(reflection.choices[0].message.content) - backtrack_step = reflection_result["backtrack_to_step"] - - # Backtrack to the suggested step - if 0 <= backtrack_step < len(path): - current_node = path[backtrack_step] - # Remove nodes after the backtrack point - while len(path) > backtrack_step + 1: - path.pop() - logger.info(f"Backtracking to step {backtrack_step}") - logger.info(f"Reason: {reflection_result['reason']}") - logger.info("Suggested improvements:") - for improvement in reflection_result["suggested_improvements"]: - logger.info(f"- {improvement}") - - except Exception as e: - logger.error(f"Error in reflection: {str(e)}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"\nFound satisfactory solution with score {score:.3f}") - return [{"action": node.action} for node in path[1:]] - - except Exception as e: - logger.error(f"Error in simulation: {str(e)}") - continue - - # Update node statistics - logger.info(f"\nBackpropagation Step:") - for node in path: - old_value = node.value - node.visits += 1 - node.value = (node.value * (node.visits - 1) + score) / node.visits - logger.info(f"Node {node.action}:") - logger.info(f" Visits: {node.visits}") - logger.info(f" Value: {old_value:.3f} -> {node.value:.3f}") - - # If we've exhausted all iterations and haven't found a perfect solution, - # return the best path we found - if best_path and len(best_path) > 1: - logger.info(f"\nSearch complete. Returning best path found with score {best_score:.3f}") - return [{"action": node.action} for node in best_path[1:]] - - # If no valid path was found or path was just the root, return a default action - logger.warning("\nNo valid path found, returning fallback action") - return [{"action": "refresh()", "description": "Fallback action - no valid path found"}] - - except Exception as e: - error_msg = f"Error in RMCTS search: {str(e)}" - logger.error(error_msg) - - if best_path: - logger.info(f"\nReturning best path found before error with score {best_score:.3f}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def rmcts_with_websocket(self, websocket) -> List[Dict[str, Any]]: - """ - Performs Monte Carlo Tree Search starting from the root node with WebSocket updates. - Uses GPT-4 for node selection and reflection-based backpropagation. - - Args: - websocket: WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - max_iterations = self.config.iterations # Use configured number of iterations - - try: - # Initial browser setup - live_browser_url, session_id = await self._reset_browser(websocket) - - for iteration in range(max_iterations): - logger.info(f"\n{'='*50}") - logger.info(f"RMCTS Iteration {iteration + 1}/{max_iterations}") - logger.info(f"{'='*50}\n") - - # Send iteration update if websocket is provided - await websocket.send_json({ - "type": "rmcts_iteration", - "iteration": iteration + 1, - "max_iterations": max_iterations, - "timestamp": datetime.utcnow().isoformat() - }) - - # Selection: Use GPT-4 to select a promising path - current_node = self.root_node - path = [current_node] - selection_depth = 0 - - while current_node.children and not current_node.is_terminal: - logger.info(f"\nSelection Step {selection_depth + 1}:") - logger.info(f"Current node action: {current_node.action}") - logger.info(f"Number of children: {len(current_node.children)}") - - # Get trajectory for GPT-4 to evaluate - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - # Create prompt for GPT-4 to select next node - prompt = f"""Given the current trajectory and goal, select the most promising child node to explore next. - Consider the overall progress, efficiency, and likelihood of success. - - Goal: {self.goal} - - Current Trajectory: - {json.dumps(trajectory, indent=2)} - - Available Children: - {json.dumps([{ - 'action': child.action, - 'description': child.natural_language_description, - 'visits': child.visits, - 'value': child.value - } for child in current_node.children], indent=2)} - - Return a JSON response with: - {{ - "selected_child_index": int, # Index of the selected child - "explanation": str # Brief explanation of the selection - }}""" - - try: - response = openai_client.chat.completions.create( - model=self.config.evaluation_model, - messages=[ - {"role": "system", "content": "You are an expert at selecting promising paths in a search tree."}, - {"role": "user", "content": prompt} - ], - response_format={"type": "json_object"} - ) - - selection = json.loads(response.choices[0].message.content) - selected_index = selection["selected_child_index"] - - if 0 <= selected_index < len(current_node.children): - current_node = current_node.children[selected_index] - path.append(current_node) - logger.info(f"Selected child {selected_index + 1}: {current_node.action}") - logger.info(f"Selection explanation: {selection['explanation']}") - - # Send selection update if websocket is provided - await websocket.send_json({ - "type": "node_selected", - "node_id": id(current_node), - "explanation": selection["explanation"], - "timestamp": datetime.utcnow().isoformat() - }) - else: - logger.warning(f"Invalid child index {selected_index}, breaking selection") - break - - except Exception as e: - logger.error(f"Error in node selection: {str(e)}") - await websocket.send_json({ - "type": "selection_error", - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - break - - selection_depth += 1 - - # Expansion: Expand the selected node if possible - if not current_node.is_terminal and current_node.depth < self.config.max_depth: - logger.info(f"\nExpansion Step:") - logger.info(f"Expanding node: {current_node.action}") - - await websocket.send_json({ - "type": "node_expanding", - "node_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - await self.expand(current_node, websocket) - logger.info(f"Successfully expanded node with {len(current_node.children)} children") - except Exception as e: - logger.error(f"Error expanding node: {str(e)}") - current_node.is_terminal = True - await websocket.send_json({ - "type": "expansion_error", - "node_id": id(current_node), - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - # Simulation: Evaluate the current path - logger.info(f"\nSimulation Step:") - logger.info(f"Evaluating path of length {len(path) - 1}") - - await websocket.send_json({ - "type": "simulation_start", - "path_length": len(path) - 1, - "timestamp": datetime.utcnow().isoformat() - }) - - try: - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - - logger.info(f"Simulation Results:") - logger.info(f"Overall Score: {score:.3f}") - logger.info(f"Efficiency Score: {result['efficiency_score']:.3f}") - logger.info(f"Accuracy Score: {result['accuracy_score']:.3f}") - logger.info(f"Robustness Score: {result['robustness_score']:.3f}") - - # Send simulation results if websocket is provided - await websocket.send_json({ - "type": "simulation_results", - "score": score, - "efficiency_score": result["efficiency_score"], - "accuracy_score": result["accuracy_score"], - "robustness_score": result["robustness_score"], - "timestamp": datetime.utcnow().isoformat() - }) - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - logger.info(f"\nNew best path found!") - logger.info(f"Previous best score: {best_score:.3f}") - logger.info(f"New best score: {score:.3f}") - - # Send best path update if websocket is provided - await websocket.send_json({ - "type": "best_path_update", - "score": best_score, - "path": [{"id": id(node), "action": node.action} for node in best_path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - # Reflection-based backpropagation - if score < 0.75: # If the path is not satisfactory - logger.info(f"\nReflection Step (Score {score:.3f} < 0.75):") - - await websocket.send_json({ - "type": "reflection_start", - "score": score, - "timestamp": datetime.utcnow().isoformat() - }) - - # Generate reflection prompt - reflection_prompt = f"""Analyze the current trajectory and suggest improvements. - - Goal: {self.goal} - - Current Trajectory: - {json.dumps(trajectory, indent=2)} - - Score: {score} - - Return a JSON response with: - {{ - "backtrack_to_step": int, # Which step to backtrack to (0-based index) - "reason": str, # Why backtrack to this step - "suggested_improvements": [str] # List of suggested improvements - }}""" - - try: - reflection = openai_client.chat.completions.create( - model=self.config.evaluation_model, - messages=[ - {"role": "system", "content": "You are an expert at analyzing and improving search trajectories."}, - {"role": "user", "content": reflection_prompt} - ], - response_format={"type": "json_object"} - ) - - reflection_result = json.loads(reflection.choices[0].message.content) - backtrack_step = reflection_result["backtrack_to_step"] - - # Backtrack to the suggested step - if 0 <= backtrack_step < len(path): - current_node = path[backtrack_step] - # Remove nodes after the backtrack point - while len(path) > backtrack_step + 1: - path.pop() - logger.info(f"Backtracking to step {backtrack_step}") - logger.info(f"Reason: {reflection_result['reason']}") - logger.info("Suggested improvements:") - for improvement in reflection_result["suggested_improvements"]: - logger.info(f"- {improvement}") - - # Send backtracking update if websocket is provided - await websocket.send_json({ - "type": "backtracking", - "step": backtrack_step, - "reason": reflection_result["reason"], - "suggested_improvements": reflection_result["suggested_improvements"], - "timestamp": datetime.utcnow().isoformat() - }) - - except Exception as e: - logger.error(f"Error in reflection: {str(e)}") - await websocket.send_json({ - "type": "reflection_error", - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"\nFound satisfactory solution with score {score:.3f}") - - # Send completion update if websocket is provided - await websocket.send_json({ - "type": "search_complete", - "status": "success", - "score": score, - "path": [{"id": id(node), "action": node.action} for node in path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in path[1:]] - - except Exception as e: - logger.error(f"Error in simulation: {str(e)}") - await websocket.send_json({ - "type": "simulation_error", - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Update node statistics - logger.info(f"\nBackpropagation Step:") - for node in path: - old_value = node.value - node.visits += 1 - node.value = (node.value * (node.visits - 1) + score) / node.visits - logger.info(f"Node {node.action}:") - logger.info(f" Visits: {node.visits}") - logger.info(f" Value: {old_value:.3f} -> {node.value:.3f}") - - # Send backpropagation update if websocket is provided - await websocket.send_json({ - "type": "backpropagation_complete", - "updated_nodes": [{"id": id(node), "visits": node.visits, "value": node.value} for node in path], - "timestamp": datetime.utcnow().isoformat() - }) - - # If we've exhausted all iterations and haven't found a perfect solution, - # return the best path we found - if best_path and len(best_path) > 1: - logger.info(f"\nSearch complete. Returning best path found with score {best_score:.3f}") - - # Send completion update if websocket is provided - await websocket.send_json({ - "type": "search_complete", - "status": "partial_success", - "score": best_score, - "path": [{"id": id(node), "action": node.action} for node in best_path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in best_path[1:]] - - # If no path was found at all - logger.warning("\nNo valid path found") - - # Send failure update if websocket is provided - await websocket.send_json({ - "type": "search_complete", - "status": "failure", - "message": "No valid path found", - "timestamp": datetime.utcnow().isoformat() - }) - - # If no valid path was found or path was just the root, return a default action - logger.warning("\nNo valid path found, returning fallback action") - return [{"action": "refresh()", "description": "Fallback action - no valid path found"}] - - except Exception as e: - error_msg = f"Error in RMCTS search: {str(e)}" - logger.error(error_msg) - - # Send error update if websocket is provided - await websocket.send_json({ - "type": "search_error", - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - if best_path: - logger.info(f"\nReturning best path found before error with score {best_score:.3f}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def _reset_browser(self, websocket=None) -> Optional[tuple]: - """Reset the browser to initial state and return the live browser URL if available.""" - await self.playwright_manager.close() - - ## reset account using api-based account reset - if self.config.account_reset: - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "started", - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Use aiohttp instead of curl - async with aiohttp.ClientSession() as session: - headers = {'Connection': 'close'} # Similar to curl -N - async with session.get(self.reset_url, headers=headers) as response: - if response.status == 200: - data = await response.json() - print(f"Account reset successful: {data}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "success", - "data": data, - "timestamp": datetime.utcnow().isoformat() - }) - else: - error_msg = f"Account reset failed with status {response.status}" - print(error_msg) - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - except Exception as e: - print(f"Error during account reset: {e}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Create new playwright manager - self.playwright_manager = await setup_playwright( - storage_state=self.config.storage_state, - headless=self.config.headless, - mode=self.config.browser_mode - ) - page = await self.playwright_manager.get_page() - live_browser_url = None - if self.config.browser_mode == "browserbase": - live_browser_url = await self.playwright_manager.get_live_browser_url() - session_id = await self.playwright_manager.get_session_id() - else: - session_id = None - live_browser_url = None - await page.goto(self.starting_url, wait_until="networkidle") - - # Send success message if websocket is provided - if websocket: - if self.config.storage_state: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": f"Browser successfully initialized with storage state file: {self.config.storage_state}", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - else: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": "Browser successfully initialized", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - - return live_browser_url, session_id - except Exception as e: - print(f"Error setting up browser: {e}") - if websocket: - await websocket.send_json({ - "type": "browser_setup", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - return None, None - - async def expand(self, node: LATSNode, websocket=None) -> bool: - """ - Expand a node by generating its children. If no children are generated, - mark the node as terminal and return False to trigger backtracking. - - Args: - node: Node to expand. - websocket: Optional WebSocket connection to send updates. - - Returns: - bool: True if expansion succeeded (children generated), False otherwise. - """ - try: - children_state = await self.generate_children(node, websocket) - except Exception as e: - logger.error(f"Exception during generation of children for node {node.action}: {e}") - children_state = [] - - if not children_state: - logger.warning("No children generated. Marking node as terminal and triggering backtracking.") - node.is_terminal = True - return False # Indicate that expansion did not generate children. - - for child_state in children_state: - try: - child = LATSNode( - natural_language_description=child_state.get("natural_language_description", ""), - action=child_state.get("action", ""), - prob=child_state.get("prob", 0.0), - element=child_state.get("element", None), - goal=node.goal, - parent=node - ) - node.children.append(child) - - if websocket: - await websocket.send_json({ - "type": "node_created", - "node_id": id(child), - "parent_id": id(node), - "action": child.action, - "description": child.natural_language_description, - "timestamp": datetime.utcnow().isoformat() - }) - except Exception as e: - logger.error(f"Error creating child node from state {child_state}: {e}") - return True # Expansion succeeded (children were generated). - - async def generate_children(self, node: LATSNode, websocket=None) -> list[dict]: - """ - Generate child nodes for a given node. - - Args: - node: Parent node to generate children for - websocket: Optional WebSocket connection to send updates to - - Returns: - list[dict]: List of child state dictionaries - """ - # Reset browser and get live URL - live_browser_url, session_id = await self._reset_browser(websocket) - path = self.get_path_to_root(node) - logger.info(f"######### Generating children for path with {len(path)} nodes") - # Execute path - for n in path[1:]: # Skip root node - if websocket: - await websocket.send_json({ - "type": "replaying_action", - "node_id": id(n), - "action": n.action, - "timestamp": datetime.utcnow().isoformat() - }) - try: - success = await playwright_step_execution( - n, - self.goal, - self.playwright_manager, - is_replay=False, - log_folder=self.config.log_folder - ) - logger.info(f"#########Success: {success}") - - if not success: - logger.warning(f"Action execution failed: {n.action}") - n.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "replay_failed", - "node_id": id(n), - "timestamp": datetime.utcnow().isoformat() - }) - return [{ - "natural_language_description": "Recover from failed action", - "action": "refresh()", - "prob": 0.1, - "element": None - }] - except Exception as e: - logger.error(f"Error executing action {n.action}: {str(e)}") - # Provide fallback actions instead of bubbling up the exception - return [{ - "natural_language_description": "Recover from action error", - "action": "refresh()", - "prob": 0.1, - "element": None - }] - - - if not n.feedback: - n.feedback = await generate_feedback( - self.goal, - n.natural_language_description, - self.playwright_manager, - ) - if websocket: - await websocket.send_json({ - "type": "feedback_generated", - "node_id": id(n), - "feedback": n.feedback, - "timestamp": datetime.utcnow().isoformat() - }) +from .base_agent import BaseAgent - time.sleep(3) - page = await self.playwright_manager.get_page() - page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) - - messages = [{"role": "user", "content": f"Action is: {n.action}"} for n in path[1:]] - +class MCTSAgent(BaseAgent): + async def run(self, websocket=None) -> list[LATSNode]: if websocket: await websocket.send_json({ - "type": "generating_actions", - "node_id": id(node), + "type": "search_status", + "status": "started", + "message": "Starting MCTS search", "timestamp": datetime.utcnow().isoformat() }) - next_actions = await extract_top_actions( - [{"natural_language_description": n.natural_language_description, "action": n.action, "feedback": n.feedback} for n in path[1:]], - self.goal, - self.images, - page_info, - self.action_set, - openai_client, - features=self.config.features, - elements_filter=self.config.elements_filter, - branching_factor=self.config.branching_factor, - log_folder=self.config.log_folder, - fullpage=self.config.fullpage, - action_generation_model=self.config.action_generation_model, - action_grounding_model=self.config.action_grounding_model - ) - - children = [] - for action in next_actions: - if action["action"] == "FINISH": - logger.info(f"Found FINISH action with probability: {action['prob']}") - if action["prob"] > 0.99: - node.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "node_terminal", - "node_id": id(node), - "reason": "finish_action", - "timestamp": datetime.utcnow().isoformat() - }) - continue - # return [] - continue - - page = await self.playwright_manager.get_page() - code, function_calls = self.action_set.to_python_code(action["action"]) - - if len(function_calls) == 1: - try: - for function_name, function_args in function_calls: - extracted_number = parse_function_args(function_args) - element = await locate_element(page, extracted_number) - action["element"] = element - except Exception as e: - logger.warning(f"Element location failed for action: {action['action']}, error: {str(e)}") - action["element"] = None - children.append(action) - if websocket: - await websocket.send_json({ - "type": "element_location_failed", - "action": action["action"], - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - children.append(action) - - if not children: - # node.is_terminal = True - # if websocket: - # await websocket.send_json({ - # "type": "node_terminal", - # "node_id": id(node), - # "reason": "no_valid_actions", - # "timestamp": datetime.utcnow().isoformat() - # }) - # logger.warning("No children generated") - logger.warning("No viable children, creating fallback exploration actions") - - # # If empty list would terminate search, create a "fallback" child - children.extend([ - { - "natural_language_description": "Navigate back to try a different approach", - "action": "navigate_backward()", - "prob": 0.15, - "element": None - }, - { - "natural_language_description": "Try refreshing the page", - "action": "refresh()", - "prob": 0.1, - "element": None - }, - { - "natural_language_description": "Try clicking on a random element", - "action": "click('random')", - "prob": 0.05, - "element": None - } - ]) - print(f"****** Generated children: {children}") - return children - - def get_path_to_root(self, node: LATSNode) -> List[LATSNode]: - path = [] - current = node - while current: - path.append(current) - current = current.parent - return list(reversed(path)) \ No newline at end of file + pass \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/simple_search_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/simple_search_agent.py index 111e99c..e35501d 100644 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/simple_search_agent.py +++ b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/simple_search_agent.py @@ -1,100 +1,24 @@ -import logging -import time from typing import Any, Dict, List, Optional from collections import deque from datetime import datetime -import os -import json -import subprocess - -from openai import OpenAI from dotenv import load_dotenv load_dotenv() -import aiohttp - -from ...core_async.config import AgentConfig - -from ...webagent_utils_async.action.highlevel import HighLevelActionSet -from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright -from ...webagent_utils_async.utils.utils import parse_function_args, locate_element -from ...evaluation_async.evaluators import goal_finished_evaluator -from ...replay_async import generate_feedback, playwright_step_execution -from ...webagent_utils_async.action.prompt_functions import extract_top_actions -from ...webagent_utils_async.browser_env.observation import extract_page_info -from .lats_node import LATSNode from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree -from .trajectory_score import create_llm_prompt, score_trajectory_with_openai -from ...webagent_utils_async.utils.utils import urls_to_images - -logger = logging.getLogger(__name__) -openai_client = OpenAI() - -class SimpleSearchAgent: - def __init__( - self, - starting_url: str, - messages: list[dict[str, Any]], - goal: str, - images: list, - playwright_manager: AsyncPlaywrightManager, - config: AgentConfig, - ): - self.starting_url = starting_url - self.goal = goal - self.image_urls = images - self.images = urls_to_images(self.image_urls) - self.messages = messages - self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) - - self.playwright_manager = playwright_manager - - self.config = config - - self.agent_type = ["bid", "nav", "file", "select_option"] - self.action_set = HighLevelActionSet( - subsets=self.agent_type, strict=False, multiaction=True, demo_mode="default" - ) - self.root_node = LATSNode( - natural_language_description=None, - action=None, - prob=None, - element=None, - goal=self.goal, - parent=None - ) - self.reset_url = os.environ["ACCOUNT_RESET_URL"] - +from .base_agent import BaseAgent +class SimpleSearchAgent(BaseAgent): async def run(self, websocket=None) -> List[Dict[str, Any]]: - """ - Run the search algorithm based on configuration. - - Args: - websocket: Optional WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - - Raises: - ValueError: If the search algorithm is not supported - """ algorithm = self.config.search_algorithm.lower() if algorithm == "bfs": - logger.info("Starting BFS algorithm") - if websocket: - return await self.bfs_with_websocket(websocket) - else: - return await self.bfs() + print("Starting BFS algorithm") + return await self.bfs(websocket=websocket) elif algorithm == "dfs": - logger.info("Starting DFS algorithm") - if websocket: - return await self.dfs_with_websocket(websocket) - else: - return await self.dfs() + print("Starting DFS algorithm") + return await self.dfs(websocket) else: error_msg = f"Unsupported algorithm: {algorithm}" - logger.error(error_msg) + print(error_msg) if websocket: await websocket.send_json({ "type": "error", @@ -103,793 +27,112 @@ async def run(self, websocket=None) -> List[Dict[str, Any]]: }) raise ValueError(error_msg) - async def _reset_browser(self, websocket=None) -> Optional[str]: - """Reset the browser to initial state and return the live browser URL if available.""" - await self.playwright_manager.close() - - ## reset account using api-based account reset - if self.config.account_reset: - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "started", - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Use aiohttp instead of curl - async with aiohttp.ClientSession() as session: - headers = {'Connection': 'close'} # Similar to curl -N - async with session.get(self.reset_url, headers=headers) as response: - if response.status == 200: - data = await response.json() - print(f"Account reset successful: {data}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "success", - "data": data, - "timestamp": datetime.utcnow().isoformat() - }) - else: - error_msg = f"Account reset failed with status {response.status}" - print(error_msg) - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - except Exception as e: - print(f"Error during account reset: {e}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Create new playwright manager - self.playwright_manager = await setup_playwright( - storage_state=self.config.storage_state, - headless=self.config.headless, - mode=self.config.browser_mode - ) - page = await self.playwright_manager.get_page() - live_browser_url = None - if self.config.browser_mode == "browserbase": - live_browser_url = await self.playwright_manager.get_live_browser_url() - session_id = await self.playwright_manager.get_session_id() - else: - session_id = None - live_browser_url = None - await page.goto(self.starting_url, wait_until="networkidle") - - # Send success message if websocket is provided - if websocket: - if self.config.storage_state: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": f"Browser successfully initialized with storage state file: {self.config.storage_state}", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - else: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": "Browser successfully initialized", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - - return live_browser_url, session_id - except Exception as e: - print(f"Error setting up browser: {e}") - if websocket: - await websocket.send_json({ - "type": "browser_setup", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - return None, None - - async def expand(self, node: LATSNode, websocket=None) -> None: - """ - Expand a node by generating its children. - - Args: - node: Node to expand - websocket: Optional WebSocket connection to send updates to - """ - children_state = await self.generate_children(node, websocket) - for child_state in children_state: - child = LATSNode( - natural_language_description=child_state["natural_language_description"], - action=child_state["action"], - prob=child_state["prob"], - element=child_state["element"], - goal=node.goal, - parent=node - ) - node.children.append(child) - - # Send child creation update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_created", - "node_id": id(child), - "parent_id": id(node), - "action": child.action, - "description": child.natural_language_description, - "timestamp": datetime.utcnow().isoformat() - }) - - async def generate_children(self, node: LATSNode, websocket=None) -> list[dict]: - """ - Generate child nodes for a given node. - - Args: - node: Parent node to generate children for - websocket: Optional WebSocket connection to send updates to - - Returns: - list[dict]: List of child state dictionaries - """ - # Reset browser and get live URL - live_browser_url, session_id = await self._reset_browser(websocket) - path = self.get_path_to_root(node) - - # Execute path - for n in path[1:]: # Skip root node - if websocket: - await websocket.send_json({ - "type": "replaying_action", - "node_id": id(n), - "action": n.action, - "timestamp": datetime.utcnow().isoformat() - }) - - success = await playwright_step_execution( - n, - self.goal, - self.playwright_manager, - is_replay=False, - log_folder=self.config.log_folder - ) - if not success: - n.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "replay_failed", - "node_id": id(n), - "timestamp": datetime.utcnow().isoformat() - }) - return [] - - if not n.feedback: - n.feedback = await generate_feedback( - self.goal, - n.natural_language_description, - self.playwright_manager, - ) - if websocket: - await websocket.send_json({ - "type": "feedback_generated", - "node_id": id(n), - "feedback": n.feedback, - "timestamp": datetime.utcnow().isoformat() - }) - - time.sleep(3) - page = await self.playwright_manager.get_page() - page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) - - messages = [{"role": "user", "content": f"Action is: {n.action}"} for n in path[1:]] - - if websocket: - await websocket.send_json({ - "type": "generating_actions", - "node_id": id(node), - "timestamp": datetime.utcnow().isoformat() - }) - - next_actions = await extract_top_actions( - [{"natural_language_description": n.natural_language_description, "action": n.action, "feedback": n.feedback} for n in path[1:]], - self.goal, - self.images, - page_info, - self.action_set, - openai_client, - features=self.config.features, - elements_filter=self.config.elements_filter, - branching_factor=self.config.branching_factor, - log_folder=self.config.log_folder, - fullpage=self.config.fullpage, - action_generation_model=self.config.action_generation_model, - action_grounding_model=self.config.action_grounding_model - ) - - children = [] - for action in next_actions: - if action["action"] == "FINISH": - if action["prob"] > 0.2: - node.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "node_terminal", - "node_id": id(node), - "reason": "finish_action", - "timestamp": datetime.utcnow().isoformat() - }) - return [] - continue - - page = await self.playwright_manager.get_page() - code, function_calls = self.action_set.to_python_code(action["action"]) - - if len(function_calls) == 1: - try: - for function_name, function_args in function_calls: - extracted_number = parse_function_args(function_args) - element = await locate_element(page, extracted_number) - action["element"] = element - except Exception as e: - action["element"] = None - if websocket: - await websocket.send_json({ - "type": "element_location_failed", - "action": action["action"], - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - children.append(action) - - if not children: - node.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "node_terminal", - "node_id": id(node), - "reason": "no_valid_actions", - "timestamp": datetime.utcnow().isoformat() - }) - - return children - - def get_path_to_root(self, node: LATSNode) -> List[LATSNode]: - path = [] - current = node - while current: - path.append(current) - current = current.parent - return list(reversed(path)) - - async def bfs(self) -> List[Dict[str, Any]]: - """ - Performs breadth-first search starting from the root node. - Skips nodes that are marked as terminal. - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ + # TODO: first evaluate, then expansion, right now, it is first expansion, then evaluation + async def bfs(self, websocket=None): queue = deque([self.root_node]) queue_set = {self.root_node} # Track nodes in queue best_score = float('-inf') best_path = None + best_node = None visited = set() # Track visited nodes to avoid cycles current_level = 0 # Track current level for BFS - try: - while queue: - # Process all nodes at current level - level_size = len(queue) - current_level += 1 - level_nodes = [] # Store nodes at current level for later processing - - # First, expand all nodes at current level - for _ in range(level_size): - current_node = queue.popleft() - queue_set.remove(current_node) # Remove from queue tracking - - # Skip if we've already visited this node - if current_node in visited: - continue - - visited.add(current_node) - - # Skip terminal nodes - if current_node.is_terminal: - logger.info(f"Node {id(current_node)} is terminal") - continue - - # Expand current node if it hasn't been expanded yet and hasn't reached max_depth - if not current_node.children and current_node.depth < self.config.max_depth: - try: - await self.expand(current_node) - except Exception as e: - error_msg = f"Error expanding node {id(current_node)}: {str(e)}" - logger.error(error_msg) - current_node.is_terminal = True - continue - - # Store node for later processing - level_nodes.append(current_node) - - # Add non-terminal children to queue for next level if they haven't reached max_depth - for child in current_node.children: - if not child.is_terminal and child not in visited and child not in queue_set and child.depth < self.config.max_depth: - queue.append(child) - queue_set.add(child) # Add to queue tracking - - # Now process all nodes at current level - for current_node in level_nodes: - print("print the trajectory") - print_trajectory(current_node) - print("print the entire tree") - print_entire_tree(self.root_node) - - # Get the path from root to this node - path = self.get_path_to_root(current_node) - - # Create trajectory for scoring - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - try: - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - except Exception as e: - error_msg = f"Error scoring node {id(current_node)}: {str(e)}" - logger.error(error_msg) - score = float('-inf') - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - - logger.info(f"Node {id(current_node)} score: {score}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"Found satisfactory solution with score {score}") - return [{"action": node.action} for node in path[1:]] - - # If we've exhausted all nodes and haven't found a perfect solution, - # return the best path we found - if best_path: - logger.info(f"Returning best path found with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] + while queue: + # Process all nodes at current level + level_size = len(queue) + current_level += 1 + level_nodes = [] # Store nodes at current level for later processing - # If no path was found at all - logger.warning("No valid path found") - return [] - - except Exception as e: - error_msg = f"Error in BFS search: {str(e)}" - logger.error(error_msg) - if best_path: - logger.info(f"Returning best path found before error with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def dfs(self) -> List[Dict[str, Any]]: - """ - Performs depth-first search starting from the root node. - Skips nodes that are marked as terminal. - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - stack = [self.root_node] - stack_set = {self.root_node} # Track nodes in stack - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - current_path = [] # Track current path for DFS - - try: - while stack: - current_node = stack[-1] # Peek at the top node without removing it + # First, expand all nodes at current level + for _ in range(level_size): + current_node = queue.popleft() + queue_set.remove(current_node) # Remove from queue tracking # Skip if we've already visited this node if current_node in visited: - stack.pop() - stack_set.remove(current_node) - if current_path: - current_path.pop() # Remove from current path continue visited.add(current_node) - current_path.append(current_node) # Add to current path # Skip terminal nodes if current_node.is_terminal: - logger.info(f"Node {id(current_node)} is terminal") - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path continue - + # Expand current node if it hasn't been expanded yet and hasn't reached max_depth + # node expansion for the next level if not current_node.children and current_node.depth < self.config.max_depth: - try: - await self.expand(current_node) - except Exception as e: - error_msg = f"Error expanding node {id(current_node)}: {str(e)}" - logger.error(error_msg) - current_node.is_terminal = True - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - continue + ## during the node expansion process, reset browser for each node + live_browser_url, session_id = await self._reset_browser(websocket) + # await self.websocket_step_start(step=1, step_name="node_expansion", websocket=websocket) + await self.websocket_node_selection(current_node, websocket=websocket) + await self.node_expansion(current_node, websocket) + tree_data = self._get_tree_data() - print("print the trajectory") - print_trajectory(current_node) - print("print the entire tree") - print_entire_tree(self.root_node) - - # Get the path from root to this node + if websocket: + await self.websocket_tree_update(type="tree_update_node_expansion", websocket=websocket, tree_data=tree_data) + else: + print_entire_tree(self.root_node) + + # Store node for later processing + level_nodes.append(current_node) + + # Add non-terminal children to queue for next level if they haven't reached max_depth + for child in current_node.children: + if not child.is_terminal and child not in visited and child not in queue_set and child.depth < self.config.max_depth: + queue.append(child) + queue_set.add(child) # Add to queue tracking + + # stage 2: node evaluation + for current_node in level_nodes: + # await self.websocket_step_start(step=2, step_name="node_evaluation", websocket=websocket) + await self.node_evaluation(current_node) + tree_data = self._get_tree_data() + if websocket: + await self.websocket_tree_update(type="tree_update_node_evaluation", websocket=websocket, tree_data=tree_data) + else: + print("after evaluation") + print_entire_tree(self.root_node) path = self.get_path_to_root(current_node) - - # Create trajectory for scoring - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - try: - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - except Exception as e: - error_msg = f"Error scoring node {id(current_node)}: {str(e)}" - logger.error(error_msg) - score = float('-inf') + score = current_node.value # Update best path if this score is better if score > best_score: best_score = score best_path = path - - logger.info(f"Node {id(current_node)} score: {score}") + best_node = current_node + + + print(f"Node {id(current_node)} score: {score}") # If we've found a satisfactory solution, return it if score >= 0.75: - logger.info(f"Found satisfactory solution with score {score}") + print(f"Found satisfactory solution with score {score}") + + # Send completion update if websocket is provided + await self.websocket_search_complete("success", score, current_node.get_trajectory(), websocket=None) + return [{"action": node.action} for node in path[1:]] - - # Add non-terminal children to stack in reverse order if they haven't reached max_depth - has_unvisited_children = False - for child in reversed(current_node.children): - if not child.is_terminal and child not in visited and child not in stack_set and child.depth < self.config.max_depth: - stack.append(child) - stack_set.add(child) # Add to stack tracking - has_unvisited_children = True - break # Only add one child at a time for DFS - - # If no unvisited children, remove current node from stack - if not has_unvisited_children: - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - # If we've exhausted all nodes and haven't found a perfect solution, - # return the best path we found - if best_path: - logger.info(f"Returning best path found with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] + # If we've exhausted all nodes and haven't found a perfect solution, + # return the best path we found + if best_path: + print(f"Returning best path found with score {best_score}") - # If no path was found at all - logger.warning("No valid path found") - return [] + # Send completion update if websocket is provided + await self.websocket_search_complete("partial_success", best_score, best_node.get_trajectory(), websocket=None) - except Exception as e: - error_msg = f"Error in DFS search: {str(e)}" - logger.error(error_msg) - if best_path: - logger.info(f"Returning best path found before error with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def bfs_with_websocket(self, websocket=None) -> List[Dict[str, Any]]: - """ - Performs breadth-first search starting from the root node with WebSocket updates. - Skips nodes that are marked as terminal. + return [{"action": node.action} for node in best_path[1:]] - Args: - websocket: Optional WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - queue = deque([self.root_node]) - queue_set = {self.root_node} # Track nodes in queue - best_score = float('-inf') - best_path = None - best_node = None - visited = set() # Track visited nodes to avoid cycles - current_level = 0 # Track current level for BFS + # If no path was found at all + print("No valid path found") - try: - # Get the live browser URL during initial setup - live_browser_url, session_id = await self._reset_browser(websocket) - - # Send initial status if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_status", - "status": "started", - "message": "BFS search started", - "timestamp": datetime.utcnow().isoformat(), - "live_browser_url": live_browser_url, - "session_id": session_id - }) - - while queue: - # Process all nodes at current level - level_size = len(queue) - current_level += 1 - level_nodes = [] # Store nodes at current level for later processing - - if websocket: - await websocket.send_json({ - "type": "level_start", - "level": current_level, - "nodes_in_level": level_size, - "timestamp": datetime.utcnow().isoformat() - }) - - # First, expand all nodes at current level - for _ in range(level_size): - current_node = queue.popleft() - queue_set.remove(current_node) # Remove from queue tracking - - # Skip if we've already visited this node - if current_node in visited: - if websocket: - await websocket.send_json({ - "type": "node_skipped", - "node_id": id(current_node), - "reason": "already_visited", - "timestamp": datetime.utcnow().isoformat() - }) - continue - - visited.add(current_node) - - # Skip terminal nodes - if current_node.is_terminal: - if websocket: - await websocket.send_json({ - "type": "node_terminal", - "node_id": id(current_node), - "reason": "terminal_node", - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Expand current node if it hasn't been expanded yet and hasn't reached max_depth - if not current_node.children and current_node.depth < self.config.max_depth: - if websocket: - await websocket.send_json({ - "type": "node_expanding", - "node_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - await self.expand(current_node, websocket) - except Exception as e: - error_msg = f"Error expanding node {id(current_node)}: {str(e)}" - logger.error(error_msg) - current_node.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "node_error", - "node_id": id(current_node), - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Send tree update after expansion - if websocket: - tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - - # Store node for later processing - level_nodes.append(current_node) - - # Add non-terminal children to queue for next level if they haven't reached max_depth - for child in current_node.children: - if not child.is_terminal and child not in visited and child not in queue_set and child.depth < self.config.max_depth: - queue.append(child) - queue_set.add(child) # Add to queue tracking - - # Send queue update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_queued", - "node_id": id(child), - "parent_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - - # Now process all nodes at current level - for current_node in level_nodes: - # Send node processing update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_processing", - "node_id": id(current_node), - "depth": current_node.depth, - "timestamp": datetime.utcnow().isoformat() - }) - - print("print the trajectory") - print_trajectory(current_node) - print("print the entire tree") - print_entire_tree(self.root_node) - - # Get the path from root to this node - path = self.get_path_to_root(current_node) - - # Create trajectory for scoring - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - try: - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - except Exception as e: - error_msg = f"Error scoring node {id(current_node)}: {str(e)}" - logger.error(error_msg) - score = float('-inf') - if websocket: - await websocket.send_json({ - "type": "node_error", - "node_id": id(current_node), - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - # Send score update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_scored", - "node_id": id(current_node), - "score": score, - "timestamp": datetime.utcnow().isoformat() - }) - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - best_node = current_node - - # Send best path update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "best_path_update", - "score": best_score, - "path": best_node.get_trajectory(), - "timestamp": datetime.utcnow().isoformat() - }) - - logger.info(f"Node {id(current_node)} score: {score}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"Found satisfactory solution with score {score}") - - # Send completion update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "success", - "score": score, - "path":best_node.get_trajectory(), - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in path[1:]] - - if websocket: - await websocket.send_json({ - "type": "level_complete", - "level": current_level, - "timestamp": datetime.utcnow().isoformat() - }) - - # If we've exhausted all nodes and haven't found a perfect solution, - # return the best path we found - if best_path: - logger.info(f"Returning best path found with score {best_score}") - - # Send completion update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "partial_success", - "score": best_score, - "path": best_node.get_trajectory(), - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in best_path[1:]] - - # If no path was found at all - logger.warning("No valid path found") - - # Send failure update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "failure", - "message": "No valid path found", - "timestamp": datetime.utcnow().isoformat() - }) - - return [] - - except Exception as e: - error_msg = f"Error in BFS search: {str(e)}" - logger.error(error_msg) - if websocket: - await websocket.send_json({ - "type": "search_error", - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - if best_path: - logger.info(f"Returning best path found before error with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def dfs_with_websocket(self, websocket=None) -> List[Dict[str, Any]]: - """ - Performs depth-first search starting from the root node with WebSocket updates. - Skips nodes that are marked as terminal. + # Send failure update if websocket is provided + await self.websocket_search_complete("failure", 0, None, websocket=None) - Args: - websocket: Optional WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ + return [] + + # TODO: first evaluate, then expansion + async def dfs(self, websocket=None) -> List[Dict[str, Any]]: stack = [self.root_node] stack_set = {self.root_node} # Track nodes in stack best_score = float('-inf') @@ -898,299 +141,107 @@ async def dfs_with_websocket(self, websocket=None) -> List[Dict[str, Any]]: visited = set() # Track visited nodes to avoid cycles current_path = [] # Track current path for DFS - try: - # Get the live browser URL during initial setup - live_browser_url, session_id = await self._reset_browser(websocket) - - # Send initial status if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_status", - "status": "started", - "message": "DFS search started", - "timestamp": datetime.utcnow().isoformat(), - "live_browser_url": live_browser_url, - "session_id": session_id - }) + # # Get the live browser URL during initial setup + # live_browser_url, session_id = await self._reset_browser(websocket) + + + while stack: + current_node = stack[-1] # Peek at the top node without removing it - while stack: - current_node = stack[-1] # Peek at the top node without removing it - - # Skip if we've already visited this node - if current_node in visited: - stack.pop() - stack_set.remove(current_node) - if current_path: - current_path.pop() # Remove from current path - if websocket: - await websocket.send_json({ - "type": "node_backtrack", - "node_id": id(current_node), - "reason": "already_visited", - "timestamp": datetime.utcnow().isoformat() - }) - continue - - visited.add(current_node) - current_path.append(current_node) # Add to current path - - # Skip terminal nodes - if current_node.is_terminal: - logger.info(f"Node {id(current_node)} is terminal") - stack.pop() - stack_set.remove(current_node) + # Skip if we've already visited this node + if current_node in visited: + stack.pop() + stack_set.remove(current_node) + if current_path: current_path.pop() # Remove from current path - if websocket: - await websocket.send_json({ - "type": "node_backtrack", - "node_id": id(current_node), - "reason": "terminal_node", - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Expand current node if it hasn't been expanded yet and hasn't reached max_depth - if not current_node.children and current_node.depth < self.config.max_depth: - if websocket: - await websocket.send_json({ - "type": "node_expanding", - "node_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - await self.expand(current_node, websocket) - except Exception as e: - error_msg = f"Error expanding node {id(current_node)}: {str(e)}" - logger.error(error_msg) - current_node.is_terminal = True - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - if websocket: - await websocket.send_json({ - "type": "node_backtrack", - "node_id": id(current_node), - "reason": "expansion_error", - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Send tree update after expansion - if websocket: - tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - - # Get the path from root to this node - path = self.get_path_to_root(current_node) - - # Create trajectory for scoring - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - try: - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - except Exception as e: - error_msg = f"Error scoring node {id(current_node)}: {str(e)}" - logger.error(error_msg) - score = float('-inf') - if websocket: - await websocket.send_json({ - "type": "node_error", - "node_id": id(current_node), - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - # Send score update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_scored", - "node_id": id(current_node), - "score": score, - "timestamp": datetime.utcnow().isoformat() - }) - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - best_node = current_node - - # Send best path update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "best_path_update", - "score": best_score, - "path": best_node.get_trajectory(), - "timestamp": datetime.utcnow().isoformat() - }) - - logger.info(f"Node {id(current_node)} score: {score}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"Found satisfactory solution with score {score}") - - # Send completion update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "success", - "score": score, - "path": best_node.get_trajectory(), - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in path[1:]] - - # Add non-terminal children to stack in reverse order - has_unvisited_children = False - for child in reversed(current_node.children): - if not child.is_terminal and child not in visited and child not in stack_set: - stack.append(child) - stack_set.add(child) # Add to stack tracking - has_unvisited_children = True - - # Send stack update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_stacked", - "node_id": id(child), - "parent_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - break # Only add one child at a time for DFS + continue - # If no unvisited children, remove current node from stack - if not has_unvisited_children: - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - if websocket: - await websocket.send_json({ - "type": "node_backtrack", - "node_id": id(current_node), - "reason": "no_unvisited_children", - "timestamp": datetime.utcnow().isoformat() - }) + visited.add(current_node) + current_path.append(current_node) # Add to current path - # If we've exhausted all nodes and haven't found a perfect solution, - # return the best path we found - if best_path: - logger.info(f"Returning best path found with score {best_score}") + # Skip terminal nodes + if current_node.is_terminal: + print(f"Node {id(current_node)} is terminal") + stack.pop() + stack_set.remove(current_node) + current_path.pop() # Remove from current path + continue - # Send completion update if websocket is provided + # Expand current node if it hasn't been expanded yet and hasn't reached max_depth + # stage 1: node expansion + if not current_node.children and current_node.depth < self.config.max_depth: + ## during the node expansion process, reset browser for each node + live_browser_url, session_id = await self._reset_browser(websocket) + # await self.websocket_step_start(step=1, step_name="node_expansion", websocket=websocket) + await self.websocket_node_selection(current_node, websocket=websocket) + await self.node_expansion(current_node, websocket) + tree_data = self._get_tree_data() if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "partial_success", - "score": best_score, - "path": best_node.get_trajectory(), - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in best_path[1:]] - - # If no path was found at all - logger.warning("No valid path found") + await self.websocket_tree_update(type="tree_update_node_expansion", websocket=websocket, tree_data=tree_data) + else: + print_entire_tree(self.root_node) - # Send failure update if websocket is provided + # Get the path from root to this node + path = self.get_path_to_root(current_node) + await self.node_evaluation(current_node) + tree_data = self._get_tree_data() if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "failure", - "message": "No valid path found", - "timestamp": datetime.utcnow().isoformat() - }) - - return [] + await self.websocket_tree_update(type="tree_update_node_evaluation", websocket=websocket, tree_data=tree_data) + else: + print("after evaluation") + print_entire_tree(self.root_node) + path = self.get_path_to_root(current_node) - except Exception as e: - error_msg = f"Error in DFS search: {str(e)}" - logger.error(error_msg) - if websocket: - await websocket.send_json({ - "type": "search_error", - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - if best_path: - logger.info(f"Returning best path found before error with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - return [] - def _get_tree_data(self): - """Get tree data in a format suitable for visualization""" - nodes = collect_all_nodes(self.root_node) - tree_data = [] - - for node in nodes: - node_data = { - "id": id(node), - "parent_id": id(node.parent) if node.parent else None, - "action": node.action if node.action else "ROOT", - "description": node.natural_language_description, - "depth": node.depth, - "is_terminal": node.is_terminal, - "value": node.value, - "visits": node.visits, - "feedback": node.feedback, - "reward": node.reward - } - tree_data.append(node_data) - - return tree_data - - def _get_trajectory_data(self, terminal_node: LATSNode): - """Get trajectory data in a format suitable for visualization - - Args: - terminal_node: The leaf node to start the trajectory from - - Returns: - list: List of node data dictionaries representing the trajectory - """ - trajectory_data = [] - path = [] - - # Collect path from terminal to root - current = terminal_node - while current is not None: - path.append(current) - current = current.parent + score = current_node.value - # Process nodes in order from root to terminal - for level, node in enumerate(reversed(path)): - node_data = { - "id": id(node), - "level": level, - "action": node.action if node.action else "ROOT", - "description": node.natural_language_description, - "visits": node.visits, - "value": float(f"{node.value:.3f}") if hasattr(node, 'value') else None, - "reward": float(f"{node.reward:.3f}") if hasattr(node, 'reward') else None, - "is_terminal": node.is_terminal, - "feedback": node.feedback if hasattr(node, 'feedback') else None, - "is_root": not hasattr(node, 'parent') or node.parent is None, - "is_terminal_node": node == terminal_node - } - trajectory_data.append(node_data) - - return trajectory_data - - + # Update best path if this score is better + if score > best_score: + best_score = score + best_path = path + best_node = current_node + + print(f"Node {id(current_node)} score: {score}") + + # If we've found a satisfactory solution, return it + if score >= 0.75: + print(f"Found satisfactory solution with score {score}") + + # Send completion update if websocket is provided + await self.websocket_search_complete("success", score, current_node.get_trajectory(), websocket=None) + return [{"action": node.action} for node in path[1:]] + + # Add non-terminal children to stack in reverse order + has_unvisited_children = False + for child in reversed(current_node.children): + if not child.is_terminal and child not in visited and child not in stack_set: + stack.append(child) + stack_set.add(child) # Add to stack tracking + has_unvisited_children = True + break # Only add one child at a time for DFS + + # If no unvisited children, remove current node from stack + if not has_unvisited_children: + stack.pop() + stack_set.remove(current_node) + current_path.pop() # Remove from current path + + # If we've exhausted all nodes and haven't found a perfect solution, + # return the best path we found + if best_path: + print(f"Returning best path found with score {best_score}") + + # Send completion update if websocket is provided + await self.websocket_search_complete("partial_success", best_score, best_node.get_trajectory(), websocket=None) + + return [{"action": node.action} for node in best_path[1:]] + + # If no path was found at all + print("No valid path found") + + # Send failure update if websocket is provided + await self.websocket_search_complete("failure", 0, None, websocket=None) + + return [] + \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/tree_vis.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/tree_vis.py index 48f667d..369a0b3 100644 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/tree_vis.py +++ b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/tree_vis.py @@ -96,6 +96,7 @@ def _print_subtree(node: LATSNode, level: int, prefix: str, is_last: bool) -> No # Prepare node statistics action = node.action + node_id = f"id: {id(node)}" visits = f"visits: {node.visits}" value = f"value: {node.value:.3f}" if hasattr(node, 'value') else "value: N/A" reward = f"reward: {node.reward:.3f}" if hasattr(node, 'reward') else "reward: N/A" @@ -111,7 +112,7 @@ def _print_subtree(node: LATSNode, level: int, prefix: str, is_last: bool) -> No indicator = "(Root)" # Print the current node - print(f"{current_prefix}Level {level}: {GREEN}{action}{RESET} {stats} {indicator}") + print(f"{current_prefix}{node_id} Level {level}: {GREEN}{action}{RESET} {stats} {indicator}") # Prepare the prefix for children child_prefix = prefix + (" " if is_last else "│ ") diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/__init__.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/lats_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/lats_agent.py deleted file mode 100644 index 8fe68db..0000000 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/lats_agent.py +++ /dev/null @@ -1,776 +0,0 @@ -"""Language-based Action Tree Search (LATS) Agent implementation.""" - -import time -from typing import Any, Optional, Tuple, List -import os -from openai import OpenAI -from datetime import datetime -import aiohttp -from dotenv import load_dotenv -load_dotenv() - -from .lats_node import LATSNode, Observation -from ...core_async.config import AgentConfig - -from ...webagent_utils_async.action.highlevel import HighLevelActionSet -from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright -from .tree_vis import RED, better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree -from .trajectory_score import create_llm_prompt, score_trajectory_with_openai -from ...replay_async import generate_feedback, playwright_step_execution, locate_element_from_action -from ...webagent_utils_async.browser_env.observation import extract_page_info, observe_features -from ...webagent_utils_async.action.prompt_functions import generate_actions_with_observation -from ...webagent_utils_async.evaluation.feedback import generate_feedback_with_screenshot -from ...webagent_utils_async.utils.utils import urls_to_images - - -from ...webagent_utils_async.utils.utils import parse_function_args, locate_element -from ...evaluation_async.evaluators import goal_finished_evaluator -from ...webagent_utils_async.action.prompt_functions import extract_top_actions -from ...webagent_utils_async.browser_env.observation import extract_page_info -from .lats_node import LATSNode -from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree -from .trajectory_score import create_llm_prompt, score_trajectory_with_openai -from ...webagent_utils_async.action.utils import execute_action -from ...webagent_utils_async.action.prompt_functions import extract_top_actions, is_goal_finished -from ...webagent_utils_async.browser_env.observation import extract_page_info -from ...webagent_utils_async.evaluation.feedback import capture_post_action_feedback - -openai_client = OpenAI() - -class LATSAgent: - """ - Language-based Action Tree Search Agent implementation. - - This agent uses MCTS-like tree search to find optimal action sequences for web navigation tasks. - - Attributes: - starting_url (str): The initial URL to start from - model_name (str): Name of the language model to use - goal (str): The goal state to achieve - playwright_manager (PlaywrightManager): Manager for browser automation - num_simulations (int): Number of simulations to run - exploration_weight (float): Exploration vs exploitation trade-off parameter - """ - - def __init__( - self, - starting_url: str, - messages: list[dict[str, Any]], - goal: str, - images: list, - playwright_manager: AsyncPlaywrightManager, - config: AgentConfig, - ): - """Initialize the LATS Agent.""" - # no action grounding model, just one step to geneate both action natural language description and action at the same time - self.starting_url = starting_url - self.goal = goal - self.image_urls = images - self.images = urls_to_images(self.image_urls) - - self.messages = messages - if len(images) == 0: - self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) - else: - self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) - - self.playwright_manager = playwright_manager - - self.config = config - - # set bid, only click, fill, hoover, drag and draw - self.agent_type = ["bid"] - self.action_set = HighLevelActionSet( - subsets=self.agent_type, strict=False, multiaction=False, demo_mode="default" - ) - self.root_node = LATSNode( - natural_language_description=None, - action=None, - prob=None, - element=None, - goal=self.goal, - parent=None - ) - self.goal_finished = False - self.result_node = None - self.reset_url = os.environ["ACCOUNT_RESET_URL"] - - async def run(self, websocket=None) -> list[LATSNode]: - """ - Run the LATS search and return the best path found. - - Args: - websocket: Optional WebSocket connection for sending updates - - Returns: - list[LATSNode]: Best path from root to terminal node - """ - if websocket: - await websocket.send_json({ - "type": "search_status", - "status": "started", - "message": "Starting LATS search", - "timestamp": datetime.utcnow().isoformat() - }) - - best_node = await self.lats_search(websocket) - print_trajectory(best_node) - - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "success" if best_node.reward == 1 else "partial_success", - "score": best_node.reward, - "path": best_node.get_trajectory(), - "timestamp": datetime.utcnow().isoformat() - }) - - return best_node.get_trajectory() - - async def lats_search(self, websocket=None) -> LATSNode: - """ - Perform the main LATS search algorithm. - - Args: - websocket: Optional WebSocket connection for sending updates - - Returns: - LATSNode: Best terminal node found - """ - print(f"") - print(f"{GREEN}START SEARCH{RESET}") - - terminal_nodes = [] - - for i in range(self.config.iterations): - if websocket: - await websocket.send_json({ - "type": "iteration_start", - "iteration": i + 1, - "timestamp": datetime.utcnow().isoformat() - }) - - print(f"") - print(f"") - print(f"Iteration {i + 1}...") - - # Step 1: Selection with websocket update - if websocket: - await websocket.send_json({ - "type": "step_start", - "step": "selection", - "iteration": i + 1, - "timestamp": datetime.utcnow().isoformat() - }) - - node = self.select_node(self.root_node) - - if node is None: - print("All paths lead to terminal nodes with reward 0. Ending search.") - break - - print(f"{GREEN}Tree:{RESET}") - better_print(node=self.root_node, selected_node=node) - print(f"") - - # Step 2: Expansion with websocket update - if websocket: - await websocket.send_json({ - "type": "step_start", - "step": "expansion", - "iteration": i + 1, - "timestamp": datetime.utcnow().isoformat() - }) - - await self.expand_node(node, websocket) - - while node is not None and node.is_terminal and not self.goal_finished: - print(f"Depth limit node found at iteration {i + 1}, reselecting...") - node = self.select_node(self.root_node) - if node is not None: - await self.expand_node(node, websocket) - - if node is None: - # all the nodes are terminal, stop the search - print(f"{RED}All nodes are terminal, stopping search{RESET}") - break - - if self.goal_finished: - print(f"{RED}Goal finished, stopping search{RESET}") - break - - print(f"{GREEN}Tree:{RESET}") - better_print(self.root_node) - print(f"") - - # Step 3: Evaluation - print(f"") - print(f"{GREEN}Step 3: evaluation{RESET}") - await self.evaluate_node(node) - - print(f"{GREEN}Tree:{RESET}") - better_print(self.root_node) - print(f"") - - # Step 4: Simulation - print(f"{GREEN}Step 4: simulation{RESET}") - # # Find the child with the highest value - ## always = 1 - reward, terminal_node = await self.simulate(max(node.children, key=lambda child: child.value), max_depth=self.config.max_depth, num_simulations=1) - terminal_nodes.append(terminal_node) - - if reward == 1: - return terminal_node - - # Step 5: Backpropagation - print(f"{GREEN}Step 5: backpropagation{RESET}") - self.backpropagate(terminal_node, reward) - print(f"{GREEN}Tree:{RESET}") - better_print(self.root_node) - print(f"") - - # Send tree update after each iteration - if websocket: - tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - - # Find best node - all_nodes_list = collect_all_nodes(self.root_node) - all_nodes_list.extend(terminal_nodes) - - ## temp change: if reward is the same, choose the deeper node - best_child = max(all_nodes_list, key=lambda x: (x.reward, x.depth)) - - if best_child.reward == 1: - print("Successful trajectory found") - else: - print("Unsuccessful trajectory found") - await self.playwright_manager.close() - - return best_child if best_child is not None else self.root_node - - def select_node(self, node: LATSNode) -> Optional[LATSNode]: - """ - Select a node for expansion using UCT. - - Args: - node: Root node to start selection from - - Returns: - Optional[LATSNode]: Selected node or None if all paths exhausted - """ - if node.is_terminal: - return None - return node.get_best_leaf() - - async def expand_node(self, node: LATSNode, websocket=None) -> None: - """ - Expand a node by generating its children. - - Args: - node: Node to expand - websocket: Optional WebSocket connection for sending updates - """ - if websocket: - await websocket.send_json({ - "type": "node_expanding", - "node_id": id(node), - "timestamp": datetime.utcnow().isoformat() - }) - - children = await self.generate_children(node, websocket) - - for child in children: - node.add_child(child) - if websocket: - await websocket.send_json({ - "type": "node_created", - "node_id": id(child), - "parent_id": id(node), - "action": child.action, - "description": child.natural_language_description, - "timestamp": datetime.utcnow().isoformat() - }) - - if children and children[0].goal_finish_feedback.is_done: - self.set_goal_finished(children[0]) - if websocket: - await websocket.send_json({ - "type": "goal_finished", - "node_id": id(children[0]), - "timestamp": datetime.utcnow().isoformat() - }) - return - - node.check_terminal() - - async def evaluate_node(self, node: LATSNode) -> None: - """ - Evaluate a node using LLM scoring. - - Args: - node: Node to evaluate - - Returns: - float: Evaluation score - """ - scores = [] - print(f"{GREEN}-- total {len(node.children)} children to evaluate:{RESET}") - for i, child in enumerate(node.children): - print(f"{GREEN}--- evaluating child {i+1}...{RESET}") - if child.is_terminal: - score = 0 - else: - trajectory = child.get_trajectory() - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, self.config.evaluation_model, child.observation.image) - score = result["overall_score"] - scores.append(score) - - for child, score in zip(node.children, scores): - child.value = score - child.reward = score - - async def simulate(self, node: LATSNode, max_depth: int = 2, num_simulations=1) -> tuple[float, LATSNode]: - """ - Perform a rollout simulation from a node. - - Args: - node: Starting node for rollout - max_depth: Maximum depth to simulate to - - Returns: - tuple[float, LATSNode]: (Score of the rollout, Terminal node reached) - """ - depth = node.depth - print("print the trajectory") - print_trajectory(node) - print("print the entire tree") - print_entire_tree(self.root_node) - return await self.rollout(node, max_depth=max_depth) - - async def send_completion_request(self, plan, depth, node, trajectory=[]): - print("print the trajectory") - print_trajectory(node) - print("print the entire tree") - print_entire_tree(self.root_node) - - if depth >= self.config.max_depth: - return trajectory, node - - context = await self.playwright_manager.get_context() - page = await self.playwright_manager.get_page() - # Extract page information - time.sleep(3) - page_info = await extract_page_info(page, fullpage=True, log_folder=self.config.log_folder) - updated_actions = await extract_top_actions( - trajectory, self.goal, self.images, page_info, self.action_set, openai_client, - features=["axtree"], elements_filter="som", branching_factor=self.config.branching_factor, - log_folder=self.config.log_folder, fullpage=True, - action_generation_model=self.config.action_generation_model, - action_grounding_model=self.config.action_grounding_model - ) - next_action = updated_actions[0] - retry_count = self.config.retry_count if hasattr(self.config, 'retry_count') else 1 # Default retries if not set - - for attempt in range(retry_count): - try: - # Convert action to Python code - code, function_calls = self.action_set.to_python_code(next_action["action"]) - - # Locate element - if len(function_calls) == 1: - for function_name, function_args in function_calls: - extracted_number = parse_function_args(function_args) - element = await locate_element(page, extracted_number) - next_action["element"] = element - - # Execute action - await execute_action(next_action, self.action_set, page, context, self.goal, page_info['interactive_elements'], - self.config.log_folder) - feedback = await capture_post_action_feedback(page, next_action, self.goal, self.config.log_folder) - trajectory.append({'action': next_action['action'], 'feedback': feedback}) - action_str = next_action["action"] - - print(f"The action is: {action_str} - The action result is: {feedback}") - - # Check if goal is finished - messages = [{"role": "system", "content": "The goal is {}, Is the overall goal finished?".format(self.goal)}] - for item in trajectory: - action = item['action'] - feedback = item['feedback'] - messages.append({"role": "user", "content": 'action is: {}'.format(action)}) - messages.append({"role": "user", "content": 'action feedback is: {}'.format(feedback)}) - - goal_finished = await is_goal_finished(messages, openai_client) - - new_node = LATSNode( - natural_language_description=next_action["natural_language_description"], - action=next_action["action"], - prob=next_action["prob"], - element=next_action["element"], - goal=node.goal, - parent=node - ) - - if goal_finished: - return trajectory, new_node - - return await self.send_completion_request(plan, depth + 1, new_node, trajectory) - - except Exception as e: - print(f"Attempt {attempt + 1} failed with error: {e}") - if attempt + 1 == retry_count: - print("Max retries reached. Skipping this step and retrying the whole request.") - # Retry the entire request from the same state - return await self.send_completion_request(plan, depth, node, trajectory) - - # If all retries and retries of retries fail, return the current trajectory and node - return trajectory, node - - - async def rollout(self, node: LATSNode, max_depth: int = 2)-> tuple[float, LATSNode]: - # Reset browser state - await self._reset_browser() - path = self.get_path_to_root(node) - - print("execute path") - # Execute path - - messages = [] - trajectory = [] - - for n in path[1:]: # Skip root node - success = await playwright_step_execution( - n, - self.goal, - self.playwright_manager, - is_replay=False, - log_folder=self.config.log_folder - ) - if not success: - return 0, n - if not n.feedback: - n.feedback = await generate_feedback( - self.goal, - n.natural_language_description, - self.playwright_manager, - ) - trajectory.append({ - "action": n.action, - "feedback": n.feedback - }) - ## call the prompt agent - print("current depth: ", len(path) - 1) - print("max depth: ", self.config.max_depth) - trajectory, node = await self.send_completion_request(self.goal, len(path) - 1, node=n, trajectory=trajectory) - print("print the trajectory") - print_trajectory(node) - print("print the entire tree") - print_entire_tree(self.root_node) - - page = await self.playwright_manager.get_page() - page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) - - messages = [{"role": "user", "content": f"Action is: {n.action}"} for n in path[1:]] - goal_finished, confidence_score = goal_finished_evaluator( - messages, - openai_client, - self.goal, - page_info['screenshot'] - ) - print("evaluating") - - score = confidence_score if goal_finished else 0 - - return score, node - - def backpropagate(self, node: LATSNode, value: float) -> None: - """ - Backpropagate values through the tree. - - Args: - node: Current node to start backpropagation from - value: Value to propagate upwards - """ - while node: - node.visits += 1 - node.value = (node.value * (node.visits - 1) + value) / node.visits - node = node.parent - - async def _reset_browser(self, websocket=None) -> Optional[str]: - """Reset the browser to initial state and return the live browser URL if available.""" - await self.playwright_manager.close() - - ## reset account using api-based account reset - if self.config.account_reset: - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "started", - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Use aiohttp instead of curl - async with aiohttp.ClientSession() as session: - headers = {'Connection': 'close'} # Similar to curl -N - async with session.get(self.reset_url, headers=headers) as response: - if response.status == 200: - data = await response.json() - print(f"Account reset successful: {data}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "success", - "data": data, - "timestamp": datetime.utcnow().isoformat() - }) - else: - error_msg = f"Account reset failed with status {response.status}" - print(error_msg) - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - except Exception as e: - print(f"Error during account reset: {e}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Create new playwright manager - self.playwright_manager = await setup_playwright( - storage_state=self.config.storage_state, - headless=self.config.headless, - mode=self.config.browser_mode - ) - page = await self.playwright_manager.get_page() - live_browser_url = None - if self.config.browser_mode == "browserbase": - live_browser_url = await self.playwright_manager.get_live_browser_url() - session_id = await self.playwright_manager.get_session_id() - else: - session_id = None - live_browser_url = None - await page.goto(self.starting_url, wait_until="networkidle") - - # Send success message if websocket is provided - if websocket: - if self.config.storage_state: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": f"Browser successfully initialized with storage state file: {self.config.storage_state}", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - else: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": "Browser successfully initialized", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - - return live_browser_url, session_id - except Exception as e: - print(f"Error setting up browser: {e}") - if websocket: - await websocket.send_json({ - "type": "browser_setup", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - return None, None - - async def observe(self) -> None: - page = await self.playwright_manager.get_page() - page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) - feature_text = await observe_features( - page_info, - features=self.config.features, - elements_filter=self.config.elements_filter, - log_folder=self.config.log_folder, - fullpage=self.config.fullpage - ) - screenshot = page_info['screenshot_som'] - observation = Observation( - text=feature_text, - image=screenshot, - ) - return observation - - async def execute_action_trajectory(self, action_trajectory: list[dict]) -> None: - if not action_trajectory: - return True - - await self._reset_browser() - print("taking action trajectory") - for action_data in action_trajectory: - print("action_data") - print(action_data) - - # Convert action_data dict to LATSNode - temp_node = LATSNode( - natural_language_description=action_data["natural_language_description"], - action=action_data["action"], - prob=0, - element=action_data["element"], - goal=self.goal, - parent=None # No parent needed for temporary node - ) - - success = await playwright_step_execution( - temp_node, # Pass the node instead of raw action_data - self.goal, - self.playwright_manager, - is_replay=False, - log_folder=self.config.log_folder - ) - - if not success: - return False - return True - - async def generate_candidate_actions(self, node: LATSNode) -> list[dict]: - trajectory = node.get_trajectory() - action_trajectory = node.get_action_trajectory() - await self.execute_action_trajectory(action_trajectory) - observation = await self.observe() - # only root node has no observation at this point - if node.observation is None: - node.observation = observation - actions = await generate_actions_with_observation( - trajectory, - self.goal, - self.images, - openai_client=openai_client, - action_set=self.action_set, - feature_text=observation.text, - screenshot=observation.image, - branching_factor=self.config.branching_factor, - log_folder=self.config.log_folder, - action_generation_model=self.config.action_generation_model, - ) - - page = await self.playwright_manager.get_page() - valid_actions = [] - for action_data in actions: - if action_data["action"] == "FINISH": - continue - - is_bid_action, element_data = await locate_element_from_action(page, action_data["action"]) - if is_bid_action and not element_data: - continue - - action_data['element'] = element_data - valid_actions.append(action_data) - return valid_actions - - async def generate_children(self, node: LATSNode, websocket=None) -> list[LATSNode]: - print(f"{GREEN}-- generating candidate actions...{RESET}") - - children = [] - - action_trajectory = node.get_action_trajectory() - candidate_actions = await self.generate_candidate_actions(node) - print(f"{GREEN}-- generated {len(candidate_actions)} actions{RESET}") - for action_data in candidate_actions: - print(f"{GREEN}--- {action_data['action']}{RESET}") - print(f"{GREEN}--- {action_data['natural_language_description']}{RESET}") - - print(f"") - print(f"{GREEN}-- executing candidate trajectories{RESET}") - for i, action_data in enumerate(candidate_actions): - - candidate_action_trajectory = action_trajectory + [action_data] - print(f"{GREEN}--- trajectory {i+1}:{RESET}") - for action in candidate_action_trajectory: - print(f"{GREEN}---- {action['action']}{RESET}") - print(f"{GREEN}---- {action['natural_language_description']}{RESET}") - executed_successfully = await self.execute_action_trajectory(candidate_action_trajectory) - if not executed_successfully: - # not executed successfully, give up this candidate - print(f"{RED}--- failed to execute action trajectory{RESET}") - continue - - observation = await self.observe() - print(f"{GREEN}--- generate feedback...{RESET}") - feedback = await generate_feedback_with_screenshot( - self.goal, - action_data["natural_language_description"], - observation.image, - model=self.config.feedback_model, - ) - print(f"feedback: is_done: {feedback.is_done}, explanation: {feedback.explanation}") - - child = LATSNode( - natural_language_description=action_data["natural_language_description"], - action=action_data["action"], - prob=action_data["prob"], - element=action_data["element"], - goal=node.goal, - ) - child.observation = observation - child.goal_finish_feedback = feedback - if feedback.is_done: - # the goal is finished, stop the search - return [child] - - children.append(child) - - if node.depth + 1 >= self.config.max_depth: - child.is_terminal = True - - return children - - def set_goal_finished(self, node: LATSNode) -> None: - self.goal_finished = True - self.result_node = node - - def get_path_to_root(self, node: LATSNode) -> List[LATSNode]: - path = [] - current = node - while current: - path.append(current) - current = current.parent - return list(reversed(path)) - - def _get_tree_data(self): - """Get tree data in a format suitable for visualization""" - nodes = collect_all_nodes(self.root_node) - tree_data = [] - - for node in nodes: - node_data = { - "id": id(node), - "parent_id": id(node.parent) if node.parent else None, - "action": node.action if node.action else "ROOT", - "description": node.natural_language_description, - "depth": node.depth, - "is_terminal": node.is_terminal, - "value": node.value, - "visits": node.visits, - "reward": node.reward - } - tree_data.append(node_data) - - return tree_data diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/lats_node.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/lats_node.py deleted file mode 100644 index 911255f..0000000 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/lats_node.py +++ /dev/null @@ -1,207 +0,0 @@ -import numpy as np -from dataclasses import dataclass -from typing import Optional -from pydantic import BaseModel -import base64 -from ...webagent_utils_async.evaluation.feedback import Feedback - -@dataclass -class Element: - """Represents a DOM element with its properties.""" - text: str - tag: str - id: str - title: str - ariaLabel: str - name: str - value: str - placeholder: str - class_name: str # Changed from 'class' as it's a reserved keyword - role: str - unique_selector: str - selector_uniqueness_validated: bool - -class Observation(BaseModel): - text: str - image: Optional[bytes] = None - image_base64: Optional[str] = None - - def get_base64_image(self): - if self.image_base64 is None: - self.image_base64 = base64.b64encode(self.image).decode('utf-8') - return self.image_base64 - -class LATSNode: - """ - A node class for Language-based Action Tree Search (LATS). - - This class implements a tree structure for MCTS-like search algorithms, - specifically designed for language-based action planning in UI interactions. - - Attributes: - natural_language_description (str): Human-readable description of the action - action (str): The actual action to be executed - prob (float): Probability or confidence score for this action - element (Element): DOM element associated with this action - goal (str): The target goal state - parent (Optional[LATSNode]): Parent node in the tree - children (list[LATSNode]): Child nodes in the tree - visits (int): Number of times this node has been visited - value (float): Accumulated value/score of this node - depth (int): Depth of this node in the tree - is_terminal (bool): Whether this node is a terminal state - reward (float): Reward received at this node - exhausted (bool): Whether all children have been explored - em (float): Exact match score for evaluation - """ - - def __init__( - self, - natural_language_description: str, - action: str, - prob: float, - element: dict, # Using dict instead of Element for backward compatibility - goal: str, - parent: Optional['LATSNode'] = None - ) -> None: - """ - Initialize a new LATSNode. - - Args: - natural_language_description: Human-readable description of the action - action: The actual action to be executed - prob: Probability or confidence score for this action - element: DOM element associated with this action - goal: The target goal state - parent: Parent node in the tree, if any - """ - self.natural_language_description = natural_language_description - self.action = action - self.prob = prob - self.element = element - self.feedback = '' - self.goal_finish_feedback: Optional[Feedback] = None - self.parent = parent - self.goal = goal - self.children: list[LATSNode] = [] - self.visits = 0 - self.value = 0.0 - self.depth = 0 if parent is None else parent.depth + 1 - self.is_terminal = False - self.reward = 0.0 - self.exhausted = False # If all children are terminal - self.em = 0.0 # Exact match, evaluation metric - self.observation: Optional[Observation] = None - - def uct(self) -> float: - """ - Calculate the UCT (Upper Confidence Bound for Trees) value for this node. - - Returns: - float: The UCT value for this node. If the node has never been visited, - returns the node's current value. - """ - if self.visits == 0: - return self.value - return self.value / self.visits + np.sqrt(2 * np.log(self.parent.visits) / self.visits) - - def get_best_leaf(self) -> 'LATSNode': - unfinished_children = [c for c in self.children if not c.is_terminal] - if not unfinished_children: - return self - - best_child = max(unfinished_children, key=lambda x: x.uct()) - return best_child.get_best_leaf() - - def get_action_trajectory(self) -> list[dict]: - trajectory = [] - node = self - # exclude the root node - while node.parent is not None: - trajectory.append({ - "action": node.action, - "natural_language_description": node.natural_language_description, - "element": node.element - }) - node = node.parent - return trajectory[::-1] - - def get_trajectory(self) -> list[dict]: - trajectory = [] - node = self - # exclude the root node - while node.parent is not None: - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action - }) - node = node.parent - return trajectory[::-1] - - def add_child(self, child: 'LATSNode') -> None: - self.children.append(child) - child.parent = self - child.depth = self.depth + 1 - - def check_terminal(self) -> bool: - if not self.children or all(child.is_terminal for child in self.children): - self.is_terminal = True - if self.parent: - self.parent.check_terminal() - - def __str__(self) -> str: - """ - Get a string representation of the node. - - Returns: - str: A string describing the node's key attributes - """ - return (f"Node(depth={self.depth}, value={self.value:.2f}, " - f"visits={self.visits}, action={self.action}, " - f"feedback={self.feedback})") - - def to_dict(self) -> dict: - """ - Convert the node and its subtree to a dictionary representation. - - Returns: - dict: A dictionary containing all node attributes and recursive - representations of parent and children nodes - """ - return { - 'state': self.state, - 'question': self.question, - 'parent': self.parent.to_dict() if self.parent else None, - 'children': [child.to_dict() for child in self.children], - 'visits': self.visits, - 'value': self.value, - 'depth': self.depth, - 'is_terminal': self.is_terminal, - 'reward': self.reward, - 'em': self.em, - } - - @property - def state(self) -> dict: - """ - Get the current state representation of the node. - - Returns: - dict: A dictionary containing the node's state information - """ - return { - 'natural_language_description': self.natural_language_description, - 'action': self.action, - 'prob': self.prob, - 'element': self.element - } - - @property - def question(self) -> str: - """ - Get the goal/question associated with this node. - - Returns: - str: The goal or question string - """ - return self.goal \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/mcts_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/mcts_agent.py deleted file mode 100644 index f1cb9ea..0000000 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/mcts_agent.py +++ /dev/null @@ -1,1008 +0,0 @@ -import logging -import time -from typing import Any, Dict, List, Optional -from collections import deque -from datetime import datetime -import os -import json -import subprocess - -from openai import OpenAI -from dotenv import load_dotenv -load_dotenv() -import aiohttp - -from ...core_async.config import AgentConfig - -from ...webagent_utils_async.action.highlevel import HighLevelActionSet -from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright -from ...webagent_utils_async.utils.utils import parse_function_args, locate_element -from ...evaluation_async.evaluators import goal_finished_evaluator -from ...replay_async import generate_feedback, playwright_step_execution -from ...webagent_utils_async.action.prompt_functions import extract_top_actions -from ...webagent_utils_async.browser_env.observation import extract_page_info -from .lats_node import LATSNode -from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree -from .trajectory_score import create_llm_prompt, score_trajectory_with_openai -from ...webagent_utils_async.utils.utils import urls_to_images - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -openai_client = OpenAI() - -class MCTSAgent: - def __init__( - self, - starting_url: str, - messages: list[dict[str, Any]], - goal: str, - images: list, - playwright_manager: AsyncPlaywrightManager, - config: AgentConfig, - ): - self.starting_url = starting_url - self.goal = goal - self.image_urls = images - self.images = urls_to_images(self.image_urls) - self.messages = messages - self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) - - self.playwright_manager = playwright_manager - - self.config = config - - self.agent_type = ["bid", "nav", "file", "select_option"] - self.action_set = HighLevelActionSet( - subsets=self.agent_type, strict=False, multiaction=True, demo_mode="default" - ) - self.root_node = LATSNode( - natural_language_description=None, - action=None, - prob=None, - element=None, - goal=self.goal, - parent=None - ) - self.reset_url = os.environ["ACCOUNT_RESET_URL"] - - async def run(self, websocket=None) -> List[Dict[str, Any]]: - """ - Run the MCTS algorithm based on configuration. - - Args: - websocket: Optional WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - logger.info("Starting Reflective MCTS algorithm") - if websocket: - return await self.rmcts_with_websocket(websocket) - else: - return await self.rmcts() - - async def rmcts(self) -> List[Dict[str, Any]]: - """ - Performs Monte Carlo Tree Search starting from the root node. - Uses GPT-4 for node selection and reflection-based backpropagation. - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - max_iterations = self.config.iterations # Use configured number of iterations - - try: - # Initial browser setup - live_browser_url, session_id = await self._reset_browser() - - for iteration in range(max_iterations): - logger.info(f"\n{'='*50}") - logger.info(f"RMCTS Iteration {iteration + 1}/{max_iterations}") - logger.info(f"{'='*50}\n") - - # Selection: Use GPT-4 to select a promising path - current_node = self.root_node - path = [current_node] - selection_depth = 0 - - while current_node.children and not current_node.is_terminal: - logger.info(f"\nSelection Step {selection_depth + 1}:") - logger.info(f"Current node action: {current_node.action}") - logger.info(f"Number of children: {len(current_node.children)}") - - # Get trajectory for GPT-4 to evaluate - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - # Create prompt for GPT-4 to select next node - prompt = f"""Given the current trajectory and goal, select the most promising child node to explore next. - Consider the overall progress, efficiency, and likelihood of success. - - Goal: {self.goal} - - Current Trajectory: - {json.dumps(trajectory, indent=2)} - - Available Children: - {json.dumps([{ - 'action': child.action, - 'description': child.natural_language_description, - 'visits': child.visits, - 'value': child.value - } for child in current_node.children], indent=2)} - - Return a JSON response with: - {{ - "selected_child_index": int, # Index of the selected child - "explanation": str # Brief explanation of the selection - }}""" - - try: - response = openai_client.chat.completions.create( - model=self.config.evaluation_model, - messages=[ - {"role": "system", "content": "You are an expert at selecting promising paths in a search tree."}, - {"role": "user", "content": prompt} - ], - response_format={"type": "json_object"} - ) - - selection = json.loads(response.choices[0].message.content) - selected_index = selection["selected_child_index"] - - if 0 <= selected_index < len(current_node.children): - current_node = current_node.children[selected_index] - path.append(current_node) - logger.info(f"Selected child {selected_index + 1}: {current_node.action}") - logger.info(f"Selection explanation: {selection['explanation']}") - else: - logger.warning(f"Invalid child index {selected_index}, breaking selection") - break - - except Exception as e: - logger.error(f"Error in node selection: {str(e)}") - break - - selection_depth += 1 - - # Expansion: Expand the selected node if possible - if not current_node.is_terminal and current_node.depth < self.config.max_depth: - logger.info(f"\nExpansion Step:") - logger.info(f"Expanding node: {current_node.action}") - - try: - await self.expand(current_node) - logger.info(f"Successfully expanded node with {len(current_node.children)} children") - except Exception as e: - logger.error(f"Error expanding node: {str(e)}") - current_node.is_terminal = True - # Expansion Step: Expand the selected node if possible - if not current_node.is_terminal and current_node.depth < self.config.max_depth: - logger.info(f"\nExpansion Step:") - logger.info(f"Expanding node: {current_node.action}") - - expansion_success = await self.expand(current_node, None) - if not expansion_success: - # No children were generated; backtrack if possible. - if len(path) > 1: - logger.info("Backtracking due to expansion failure (no children generated).") - path.pop() # Remove the current dead-end node. - current_node = path[-1] # Set current_node to its parent. - else: - logger.warning("Expansion failed at root; no further backtracking possible.") - break - else: - logger.info(f"Successfully expanded node with {len(current_node.children)} children") - - # Simulation: Evaluate the current path - logger.info(f"\nSimulation Step:") - logger.info(f"Evaluating path of length {len(path) - 1}") - - try: - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - - logger.info(f"Simulation Results:") - logger.info(f"Overall Score: {score:.3f}") - logger.info(f"Efficiency Score: {result['efficiency_score']:.3f}") - logger.info(f"Accuracy Score: {result['accuracy_score']:.3f}") - logger.info(f"Robustness Score: {result['robustness_score']:.3f}") - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - logger.info(f"\nNew best path found!") - logger.info(f"Previous best score: {best_score:.3f}") - logger.info(f"New best score: {score:.3f}") - - # Reflection-based backpropagation - if score < 0.75: # If the path is not satisfactory - logger.info(f"\nReflection Step (Score {score:.3f} < 0.75):") - - # Generate reflection prompt - reflection_prompt = f"""Analyze the current trajectory and suggest improvements. - - Goal: {self.goal} - - Current Trajectory: - {json.dumps(trajectory, indent=2)} - - Score: {score} - - Return a JSON response with: - {{ - "backtrack_to_step": int, # Which step to backtrack to (0-based index) - "reason": str, # Why backtrack to this step - "suggested_improvements": [str] # List of suggested improvements - }}""" - - try: - reflection = openai_client.chat.completions.create( - model=self.config.evaluation_model, - messages=[ - {"role": "system", "content": "You are an expert at analyzing and improving search trajectories."}, - {"role": "user", "content": reflection_prompt} - ], - response_format={"type": "json_object"} - ) - - reflection_result = json.loads(reflection.choices[0].message.content) - backtrack_step = reflection_result["backtrack_to_step"] - - # Backtrack to the suggested step - if 0 <= backtrack_step < len(path): - current_node = path[backtrack_step] - # Remove nodes after the backtrack point - while len(path) > backtrack_step + 1: - path.pop() - logger.info(f"Backtracking to step {backtrack_step}") - logger.info(f"Reason: {reflection_result['reason']}") - logger.info("Suggested improvements:") - for improvement in reflection_result["suggested_improvements"]: - logger.info(f"- {improvement}") - - except Exception as e: - logger.error(f"Error in reflection: {str(e)}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"\nFound satisfactory solution with score {score:.3f}") - return [{"action": node.action} for node in path[1:]] - - except Exception as e: - logger.error(f"Error in simulation: {str(e)}") - continue - - # Update node statistics - logger.info(f"\nBackpropagation Step:") - for node in path: - old_value = node.value - node.visits += 1 - node.value = (node.value * (node.visits - 1) + score) / node.visits - logger.info(f"Node {node.action}:") - logger.info(f" Visits: {node.visits}") - logger.info(f" Value: {old_value:.3f} -> {node.value:.3f}") - - # If we've exhausted all iterations and haven't found a perfect solution, - # return the best path we found - if best_path and len(best_path) > 1: - logger.info(f"\nSearch complete. Returning best path found with score {best_score:.3f}") - return [{"action": node.action} for node in best_path[1:]] - - # If no valid path was found or path was just the root, return a default action - logger.warning("\nNo valid path found, returning fallback action") - return [{"action": "refresh()", "description": "Fallback action - no valid path found"}] - - except Exception as e: - error_msg = f"Error in RMCTS search: {str(e)}" - logger.error(error_msg) - - if best_path: - logger.info(f"\nReturning best path found before error with score {best_score:.3f}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def rmcts_with_websocket(self, websocket) -> List[Dict[str, Any]]: - """ - Performs Monte Carlo Tree Search starting from the root node with WebSocket updates. - Uses GPT-4 for node selection and reflection-based backpropagation. - - Args: - websocket: WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - max_iterations = self.config.iterations # Use configured number of iterations - - try: - # Initial browser setup - live_browser_url, session_id = await self._reset_browser(websocket) - - for iteration in range(max_iterations): - logger.info(f"\n{'='*50}") - logger.info(f"RMCTS Iteration {iteration + 1}/{max_iterations}") - logger.info(f"{'='*50}\n") - - # Send iteration update if websocket is provided - await websocket.send_json({ - "type": "rmcts_iteration", - "iteration": iteration + 1, - "max_iterations": max_iterations, - "timestamp": datetime.utcnow().isoformat() - }) - - # Selection: Use GPT-4 to select a promising path - current_node = self.root_node - path = [current_node] - selection_depth = 0 - - while current_node.children and not current_node.is_terminal: - logger.info(f"\nSelection Step {selection_depth + 1}:") - logger.info(f"Current node action: {current_node.action}") - logger.info(f"Number of children: {len(current_node.children)}") - - # Get trajectory for GPT-4 to evaluate - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - # Create prompt for GPT-4 to select next node - prompt = f"""Given the current trajectory and goal, select the most promising child node to explore next. - Consider the overall progress, efficiency, and likelihood of success. - - Goal: {self.goal} - - Current Trajectory: - {json.dumps(trajectory, indent=2)} - - Available Children: - {json.dumps([{ - 'action': child.action, - 'description': child.natural_language_description, - 'visits': child.visits, - 'value': child.value - } for child in current_node.children], indent=2)} - - Return a JSON response with: - {{ - "selected_child_index": int, # Index of the selected child - "explanation": str # Brief explanation of the selection - }}""" - - try: - response = openai_client.chat.completions.create( - model=self.config.evaluation_model, - messages=[ - {"role": "system", "content": "You are an expert at selecting promising paths in a search tree."}, - {"role": "user", "content": prompt} - ], - response_format={"type": "json_object"} - ) - - selection = json.loads(response.choices[0].message.content) - selected_index = selection["selected_child_index"] - - if 0 <= selected_index < len(current_node.children): - current_node = current_node.children[selected_index] - path.append(current_node) - logger.info(f"Selected child {selected_index + 1}: {current_node.action}") - logger.info(f"Selection explanation: {selection['explanation']}") - - # Send selection update if websocket is provided - await websocket.send_json({ - "type": "node_selected", - "node_id": id(current_node), - "explanation": selection["explanation"], - "timestamp": datetime.utcnow().isoformat() - }) - else: - logger.warning(f"Invalid child index {selected_index}, breaking selection") - break - - except Exception as e: - logger.error(f"Error in node selection: {str(e)}") - await websocket.send_json({ - "type": "selection_error", - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - break - - selection_depth += 1 - - # Expansion: Expand the selected node if possible - if not current_node.is_terminal and current_node.depth < self.config.max_depth: - logger.info(f"\nExpansion Step:") - logger.info(f"Expanding node: {current_node.action}") - - await websocket.send_json({ - "type": "node_expanding", - "node_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - await self.expand(current_node, websocket) - logger.info(f"Successfully expanded node with {len(current_node.children)} children") - except Exception as e: - logger.error(f"Error expanding node: {str(e)}") - current_node.is_terminal = True - await websocket.send_json({ - "type": "expansion_error", - "node_id": id(current_node), - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - # Simulation: Evaluate the current path - logger.info(f"\nSimulation Step:") - logger.info(f"Evaluating path of length {len(path) - 1}") - - await websocket.send_json({ - "type": "simulation_start", - "path_length": len(path) - 1, - "timestamp": datetime.utcnow().isoformat() - }) - - try: - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - - logger.info(f"Simulation Results:") - logger.info(f"Overall Score: {score:.3f}") - logger.info(f"Efficiency Score: {result['efficiency_score']:.3f}") - logger.info(f"Accuracy Score: {result['accuracy_score']:.3f}") - logger.info(f"Robustness Score: {result['robustness_score']:.3f}") - - # Send simulation results if websocket is provided - await websocket.send_json({ - "type": "simulation_results", - "score": score, - "efficiency_score": result["efficiency_score"], - "accuracy_score": result["accuracy_score"], - "robustness_score": result["robustness_score"], - "timestamp": datetime.utcnow().isoformat() - }) - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - logger.info(f"\nNew best path found!") - logger.info(f"Previous best score: {best_score:.3f}") - logger.info(f"New best score: {score:.3f}") - - # Send best path update if websocket is provided - await websocket.send_json({ - "type": "best_path_update", - "score": best_score, - "path": [{"id": id(node), "action": node.action} for node in best_path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - # Reflection-based backpropagation - if score < 0.75: # If the path is not satisfactory - logger.info(f"\nReflection Step (Score {score:.3f} < 0.75):") - - await websocket.send_json({ - "type": "reflection_start", - "score": score, - "timestamp": datetime.utcnow().isoformat() - }) - - # Generate reflection prompt - reflection_prompt = f"""Analyze the current trajectory and suggest improvements. - - Goal: {self.goal} - - Current Trajectory: - {json.dumps(trajectory, indent=2)} - - Score: {score} - - Return a JSON response with: - {{ - "backtrack_to_step": int, # Which step to backtrack to (0-based index) - "reason": str, # Why backtrack to this step - "suggested_improvements": [str] # List of suggested improvements - }}""" - - try: - reflection = openai_client.chat.completions.create( - model=self.config.evaluation_model, - messages=[ - {"role": "system", "content": "You are an expert at analyzing and improving search trajectories."}, - {"role": "user", "content": reflection_prompt} - ], - response_format={"type": "json_object"} - ) - - reflection_result = json.loads(reflection.choices[0].message.content) - backtrack_step = reflection_result["backtrack_to_step"] - - # Backtrack to the suggested step - if 0 <= backtrack_step < len(path): - current_node = path[backtrack_step] - # Remove nodes after the backtrack point - while len(path) > backtrack_step + 1: - path.pop() - logger.info(f"Backtracking to step {backtrack_step}") - logger.info(f"Reason: {reflection_result['reason']}") - logger.info("Suggested improvements:") - for improvement in reflection_result["suggested_improvements"]: - logger.info(f"- {improvement}") - - # Send backtracking update if websocket is provided - await websocket.send_json({ - "type": "backtracking", - "step": backtrack_step, - "reason": reflection_result["reason"], - "suggested_improvements": reflection_result["suggested_improvements"], - "timestamp": datetime.utcnow().isoformat() - }) - - except Exception as e: - logger.error(f"Error in reflection: {str(e)}") - await websocket.send_json({ - "type": "reflection_error", - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"\nFound satisfactory solution with score {score:.3f}") - - # Send completion update if websocket is provided - await websocket.send_json({ - "type": "search_complete", - "status": "success", - "score": score, - "path": [{"id": id(node), "action": node.action} for node in path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in path[1:]] - - except Exception as e: - logger.error(f"Error in simulation: {str(e)}") - await websocket.send_json({ - "type": "simulation_error", - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Update node statistics - logger.info(f"\nBackpropagation Step:") - for node in path: - old_value = node.value - node.visits += 1 - node.value = (node.value * (node.visits - 1) + score) / node.visits - logger.info(f"Node {node.action}:") - logger.info(f" Visits: {node.visits}") - logger.info(f" Value: {old_value:.3f} -> {node.value:.3f}") - - # Send backpropagation update if websocket is provided - await websocket.send_json({ - "type": "backpropagation_complete", - "updated_nodes": [{"id": id(node), "visits": node.visits, "value": node.value} for node in path], - "timestamp": datetime.utcnow().isoformat() - }) - - # If we've exhausted all iterations and haven't found a perfect solution, - # return the best path we found - if best_path and len(best_path) > 1: - logger.info(f"\nSearch complete. Returning best path found with score {best_score:.3f}") - - # Send completion update if websocket is provided - await websocket.send_json({ - "type": "search_complete", - "status": "partial_success", - "score": best_score, - "path": [{"id": id(node), "action": node.action} for node in best_path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in best_path[1:]] - - # If no path was found at all - logger.warning("\nNo valid path found") - - # Send failure update if websocket is provided - await websocket.send_json({ - "type": "search_complete", - "status": "failure", - "message": "No valid path found", - "timestamp": datetime.utcnow().isoformat() - }) - - # If no valid path was found or path was just the root, return a default action - logger.warning("\nNo valid path found, returning fallback action") - return [{"action": "refresh()", "description": "Fallback action - no valid path found"}] - - except Exception as e: - error_msg = f"Error in RMCTS search: {str(e)}" - logger.error(error_msg) - - # Send error update if websocket is provided - await websocket.send_json({ - "type": "search_error", - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - if best_path: - logger.info(f"\nReturning best path found before error with score {best_score:.3f}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def _reset_browser(self, websocket=None) -> Optional[tuple]: - """Reset the browser to initial state and return the live browser URL if available.""" - await self.playwright_manager.close() - - ## reset account using api-based account reset - if self.config.account_reset: - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "started", - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Use aiohttp instead of curl - async with aiohttp.ClientSession() as session: - headers = {'Connection': 'close'} # Similar to curl -N - async with session.get(self.reset_url, headers=headers) as response: - if response.status == 200: - data = await response.json() - print(f"Account reset successful: {data}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "success", - "data": data, - "timestamp": datetime.utcnow().isoformat() - }) - else: - error_msg = f"Account reset failed with status {response.status}" - print(error_msg) - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - except Exception as e: - print(f"Error during account reset: {e}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Create new playwright manager - self.playwright_manager = await setup_playwright( - storage_state=self.config.storage_state, - headless=self.config.headless, - mode=self.config.browser_mode - ) - page = await self.playwright_manager.get_page() - live_browser_url = None - if self.config.browser_mode == "browserbase": - live_browser_url = await self.playwright_manager.get_live_browser_url() - session_id = await self.playwright_manager.get_session_id() - else: - session_id = None - live_browser_url = None - await page.goto(self.starting_url, wait_until="networkidle") - - # Send success message if websocket is provided - if websocket: - if self.config.storage_state: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": f"Browser successfully initialized with storage state file: {self.config.storage_state}", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - else: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": "Browser successfully initialized", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - - return live_browser_url, session_id - except Exception as e: - print(f"Error setting up browser: {e}") - if websocket: - await websocket.send_json({ - "type": "browser_setup", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - return None, None - - async def expand(self, node: LATSNode, websocket=None) -> bool: - """ - Expand a node by generating its children. If no children are generated, - mark the node as terminal and return False to trigger backtracking. - - Args: - node: Node to expand. - websocket: Optional WebSocket connection to send updates. - - Returns: - bool: True if expansion succeeded (children generated), False otherwise. - """ - try: - children_state = await self.generate_children(node, websocket) - except Exception as e: - logger.error(f"Exception during generation of children for node {node.action}: {e}") - children_state = [] - - if not children_state: - logger.warning("No children generated. Marking node as terminal and triggering backtracking.") - node.is_terminal = True - return False # Indicate that expansion did not generate children. - - for child_state in children_state: - try: - child = LATSNode( - natural_language_description=child_state.get("natural_language_description", ""), - action=child_state.get("action", ""), - prob=child_state.get("prob", 0.0), - element=child_state.get("element", None), - goal=node.goal, - parent=node - ) - node.children.append(child) - - if websocket: - await websocket.send_json({ - "type": "node_created", - "node_id": id(child), - "parent_id": id(node), - "action": child.action, - "description": child.natural_language_description, - "timestamp": datetime.utcnow().isoformat() - }) - except Exception as e: - logger.error(f"Error creating child node from state {child_state}: {e}") - return True # Expansion succeeded (children were generated). - - async def generate_children(self, node: LATSNode, websocket=None) -> list[dict]: - """ - Generate child nodes for a given node. - - Args: - node: Parent node to generate children for - websocket: Optional WebSocket connection to send updates to - - Returns: - list[dict]: List of child state dictionaries - """ - # Reset browser and get live URL - live_browser_url, session_id = await self._reset_browser(websocket) - path = self.get_path_to_root(node) - logger.info(f"######### Generating children for path with {len(path)} nodes") - # Execute path - for n in path[1:]: # Skip root node - if websocket: - await websocket.send_json({ - "type": "replaying_action", - "node_id": id(n), - "action": n.action, - "timestamp": datetime.utcnow().isoformat() - }) - try: - success = await playwright_step_execution( - n, - self.goal, - self.playwright_manager, - is_replay=False, - log_folder=self.config.log_folder - ) - logger.info(f"#########Success: {success}") - - if not success: - logger.warning(f"Action execution failed: {n.action}") - n.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "replay_failed", - "node_id": id(n), - "timestamp": datetime.utcnow().isoformat() - }) - return [{ - "natural_language_description": "Recover from failed action", - "action": "refresh()", - "prob": 0.1, - "element": None - }] - except Exception as e: - logger.error(f"Error executing action {n.action}: {str(e)}") - # Provide fallback actions instead of bubbling up the exception - return [{ - "natural_language_description": "Recover from action error", - "action": "refresh()", - "prob": 0.1, - "element": None - }] - - - if not n.feedback: - n.feedback = await generate_feedback( - self.goal, - n.natural_language_description, - self.playwright_manager, - ) - if websocket: - await websocket.send_json({ - "type": "feedback_generated", - "node_id": id(n), - "feedback": n.feedback, - "timestamp": datetime.utcnow().isoformat() - }) - - time.sleep(3) - page = await self.playwright_manager.get_page() - page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) - - messages = [{"role": "user", "content": f"Action is: {n.action}"} for n in path[1:]] - - if websocket: - await websocket.send_json({ - "type": "generating_actions", - "node_id": id(node), - "timestamp": datetime.utcnow().isoformat() - }) - - next_actions = await extract_top_actions( - [{"natural_language_description": n.natural_language_description, "action": n.action, "feedback": n.feedback} for n in path[1:]], - self.goal, - self.images, - page_info, - self.action_set, - openai_client, - features=self.config.features, - elements_filter=self.config.elements_filter, - branching_factor=self.config.branching_factor, - log_folder=self.config.log_folder, - fullpage=self.config.fullpage, - action_generation_model=self.config.action_generation_model, - action_grounding_model=self.config.action_grounding_model - ) - - children = [] - for action in next_actions: - if action["action"] == "FINISH": - logger.info(f"Found FINISH action with probability: {action['prob']}") - if action["prob"] > 0.99: - node.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "node_terminal", - "node_id": id(node), - "reason": "finish_action", - "timestamp": datetime.utcnow().isoformat() - }) - continue - # return [] - continue - - page = await self.playwright_manager.get_page() - code, function_calls = self.action_set.to_python_code(action["action"]) - - if len(function_calls) == 1: - try: - for function_name, function_args in function_calls: - extracted_number = parse_function_args(function_args) - element = await locate_element(page, extracted_number) - action["element"] = element - except Exception as e: - logger.warning(f"Element location failed for action: {action['action']}, error: {str(e)}") - action["element"] = None - children.append(action) - if websocket: - await websocket.send_json({ - "type": "element_location_failed", - "action": action["action"], - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - children.append(action) - - if not children: - # node.is_terminal = True - # if websocket: - # await websocket.send_json({ - # "type": "node_terminal", - # "node_id": id(node), - # "reason": "no_valid_actions", - # "timestamp": datetime.utcnow().isoformat() - # }) - # logger.warning("No children generated") - logger.warning("No viable children, creating fallback exploration actions") - - # # If empty list would terminate search, create a "fallback" child - children.extend([ - { - "natural_language_description": "Navigate back to try a different approach", - "action": "navigate_backward()", - "prob": 0.15, - "element": None - }, - { - "natural_language_description": "Try refreshing the page", - "action": "refresh()", - "prob": 0.1, - "element": None - }, - { - "natural_language_description": "Try clicking on a random element", - "action": "click('random')", - "prob": 0.05, - "element": None - } - ]) - print(f"****** Generated children: {children}") - return children - - def get_path_to_root(self, node: LATSNode) -> List[LATSNode]: - path = [] - current = node - while current: - path.append(current) - current = current.parent - return list(reversed(path)) \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/simple_search_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/simple_search_agent.py deleted file mode 100644 index b38302f..0000000 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/simple_search_agent.py +++ /dev/null @@ -1,1148 +0,0 @@ -import logging -import time -from typing import Any, Dict, List, Optional -from collections import deque -from datetime import datetime -import os -import json -import subprocess - -from openai import OpenAI -from dotenv import load_dotenv -load_dotenv() -import aiohttp - -from ...core_async.config import AgentConfig - -from ...webagent_utils_async.action.highlevel import HighLevelActionSet -from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright -from ...webagent_utils_async.utils.utils import parse_function_args, locate_element -from ...evaluation_async.evaluators import goal_finished_evaluator -from ...replay_async import generate_feedback, playwright_step_execution -from ...webagent_utils_async.action.prompt_functions import extract_top_actions -from ...webagent_utils_async.browser_env.observation import extract_page_info -from .lats_node import LATSNode -from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree -from .trajectory_score import create_llm_prompt, score_trajectory_with_openai -from ...webagent_utils_async.utils.utils import urls_to_images - -logger = logging.getLogger(__name__) -openai_client = OpenAI() - -class SimpleSearchAgent: - def __init__( - self, - starting_url: str, - messages: list[dict[str, Any]], - goal: str, - images: list, - playwright_manager: AsyncPlaywrightManager, - config: AgentConfig, - ): - self.starting_url = starting_url - self.goal = goal - self.image_urls = images - self.images = urls_to_images(self.image_urls) - self.messages = messages - self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"}) - - self.playwright_manager = playwright_manager - - self.config = config - - self.agent_type = ["bid", "nav", "file", "select_option"] - self.action_set = HighLevelActionSet( - subsets=self.agent_type, strict=False, multiaction=True, demo_mode="default" - ) - self.root_node = LATSNode( - natural_language_description=None, - action=None, - prob=None, - element=None, - goal=self.goal, - parent=None - ) - self.reset_url = os.environ["ACCOUNT_RESET_URL"] - - - async def run(self, websocket=None) -> List[Dict[str, Any]]: - """ - Run the search algorithm based on configuration. - - Args: - websocket: Optional WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - - Raises: - ValueError: If the search algorithm is not supported - """ - algorithm = self.config.search_algorithm.lower() - - if algorithm == "bfs": - logger.info("Starting BFS algorithm") - if websocket: - return await self.bfs_with_websocket(websocket) - else: - return await self.bfs() - elif algorithm == "dfs": - logger.info("Starting DFS algorithm") - if websocket: - return await self.dfs_with_websocket(websocket) - else: - return await self.dfs() - else: - error_msg = f"Unsupported algorithm: {algorithm}" - logger.error(error_msg) - if websocket: - await websocket.send_json({ - "type": "error", - "message": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - raise ValueError(error_msg) - - async def _reset_browser(self, websocket=None) -> Optional[str]: - """Reset the browser to initial state and return the live browser URL if available.""" - await self.playwright_manager.close() - - ## reset account using api-based account reset - if self.config.account_reset: - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "started", - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Use aiohttp instead of curl - async with aiohttp.ClientSession() as session: - headers = {'Connection': 'close'} # Similar to curl -N - async with session.get(self.reset_url, headers=headers) as response: - if response.status == 200: - data = await response.json() - print(f"Account reset successful: {data}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "success", - "data": data, - "timestamp": datetime.utcnow().isoformat() - }) - else: - error_msg = f"Account reset failed with status {response.status}" - print(error_msg) - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - except Exception as e: - print(f"Error during account reset: {e}") - if websocket: - await websocket.send_json({ - "type": "account_reset", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - # Create new playwright manager - self.playwright_manager = await setup_playwright( - storage_state=self.config.storage_state, - headless=self.config.headless, - mode=self.config.browser_mode - ) - page = await self.playwright_manager.get_page() - live_browser_url = None - if self.config.browser_mode == "browserbase": - live_browser_url = await self.playwright_manager.get_live_browser_url() - session_id = await self.playwright_manager.get_session_id() - else: - session_id = None - live_browser_url = None - await page.goto(self.starting_url, wait_until="networkidle") - - # Send success message if websocket is provided - if websocket: - if self.config.storage_state: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": f"Browser successfully initialized with storage state file: {self.config.storage_state}", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - else: - await websocket.send_json({ - "type": "browser_setup", - "status": "success", - "message": "Browser successfully initialized", - "live_browser_url": live_browser_url, - "session_id": session_id, - "timestamp": datetime.utcnow().isoformat() - }) - - return live_browser_url, session_id - except Exception as e: - print(f"Error setting up browser: {e}") - if websocket: - await websocket.send_json({ - "type": "browser_setup", - "status": "failed", - "reason": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - return None, None - - async def expand(self, node: LATSNode, websocket=None) -> None: - """ - Expand a node by generating its children. - - Args: - node: Node to expand - websocket: Optional WebSocket connection to send updates to - """ - children_state = await self.generate_children(node, websocket) - for child_state in children_state: - child = LATSNode( - natural_language_description=child_state["natural_language_description"], - action=child_state["action"], - prob=child_state["prob"], - element=child_state["element"], - goal=node.goal, - parent=node - ) - node.children.append(child) - - # Send child creation update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_created", - "node_id": id(child), - "parent_id": id(node), - "action": child.action, - "description": child.natural_language_description, - "timestamp": datetime.utcnow().isoformat() - }) - - async def generate_children(self, node: LATSNode, websocket=None) -> list[dict]: - """ - Generate child nodes for a given node. - - Args: - node: Parent node to generate children for - websocket: Optional WebSocket connection to send updates to - - Returns: - list[dict]: List of child state dictionaries - """ - # Reset browser and get live URL - live_browser_url, session_id = await self._reset_browser(websocket) - path = self.get_path_to_root(node) - - # Execute path - for n in path[1:]: # Skip root node - if websocket: - await websocket.send_json({ - "type": "replaying_action", - "node_id": id(n), - "action": n.action, - "timestamp": datetime.utcnow().isoformat() - }) - - success = await playwright_step_execution( - n, - self.goal, - self.playwright_manager, - is_replay=False, - log_folder=self.config.log_folder - ) - if not success: - n.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "replay_failed", - "node_id": id(n), - "timestamp": datetime.utcnow().isoformat() - }) - return [] - - if not n.feedback: - n.feedback = await generate_feedback( - self.goal, - n.natural_language_description, - self.playwright_manager, - ) - if websocket: - await websocket.send_json({ - "type": "feedback_generated", - "node_id": id(n), - "feedback": n.feedback, - "timestamp": datetime.utcnow().isoformat() - }) - - time.sleep(3) - page = await self.playwright_manager.get_page() - page_info = await extract_page_info(page, self.config.fullpage, self.config.log_folder) - - messages = [{"role": "user", "content": f"Action is: {n.action}"} for n in path[1:]] - - if websocket: - await websocket.send_json({ - "type": "generating_actions", - "node_id": id(node), - "timestamp": datetime.utcnow().isoformat() - }) - - next_actions = await extract_top_actions( - [{"natural_language_description": n.natural_language_description, "action": n.action, "feedback": n.feedback} for n in path[1:]], - self.goal, - self.images, - page_info, - self.action_set, - openai_client, - features=self.config.features, - elements_filter=self.config.elements_filter, - branching_factor=self.config.branching_factor, - log_folder=self.config.log_folder, - fullpage=self.config.fullpage, - action_generation_model=self.config.action_generation_model, - action_grounding_model=self.config.action_grounding_model - ) - - children = [] - for action in next_actions: - if action["action"] == "FINISH": - if action["prob"] > 0.2: - node.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "node_terminal", - "node_id": id(node), - "reason": "finish_action", - "timestamp": datetime.utcnow().isoformat() - }) - return [] - continue - - page = await self.playwright_manager.get_page() - code, function_calls = self.action_set.to_python_code(action["action"]) - - if len(function_calls) == 1: - try: - for function_name, function_args in function_calls: - extracted_number = parse_function_args(function_args) - element = await locate_element(page, extracted_number) - action["element"] = element - except Exception as e: - action["element"] = None - if websocket: - await websocket.send_json({ - "type": "element_location_failed", - "action": action["action"], - "error": str(e), - "timestamp": datetime.utcnow().isoformat() - }) - children.append(action) - - if not children: - node.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "node_terminal", - "node_id": id(node), - "reason": "no_valid_actions", - "timestamp": datetime.utcnow().isoformat() - }) - - return children - - def get_path_to_root(self, node: LATSNode) -> List[LATSNode]: - path = [] - current = node - while current: - path.append(current) - current = current.parent - return list(reversed(path)) - - async def bfs(self) -> List[Dict[str, Any]]: - """ - Performs breadth-first search starting from the root node. - Skips nodes that are marked as terminal. - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - queue = deque([self.root_node]) - queue_set = {self.root_node} # Track nodes in queue - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - current_level = 0 # Track current level for BFS - - try: - while queue: - # Process all nodes at current level - level_size = len(queue) - current_level += 1 - level_nodes = [] # Store nodes at current level for later processing - - # First, expand all nodes at current level - for _ in range(level_size): - current_node = queue.popleft() - queue_set.remove(current_node) # Remove from queue tracking - - # Skip if we've already visited this node - if current_node in visited: - continue - - visited.add(current_node) - - # Skip terminal nodes - if current_node.is_terminal: - logger.info(f"Node {id(current_node)} is terminal") - continue - - # Expand current node if it hasn't been expanded yet and hasn't reached max_depth - if not current_node.children and current_node.depth < self.config.max_depth: - try: - await self.expand(current_node) - except Exception as e: - error_msg = f"Error expanding node {id(current_node)}: {str(e)}" - logger.error(error_msg) - current_node.is_terminal = True - continue - - # Store node for later processing - level_nodes.append(current_node) - - # Add non-terminal children to queue for next level if they haven't reached max_depth - for child in current_node.children: - if not child.is_terminal and child not in visited and child not in queue_set and child.depth < self.config.max_depth: - queue.append(child) - queue_set.add(child) # Add to queue tracking - - # Now process all nodes at current level - for current_node in level_nodes: - print("print the trajectory") - print_trajectory(current_node) - print("print the entire tree") - print_entire_tree(self.root_node) - - # Get the path from root to this node - path = self.get_path_to_root(current_node) - - # Create trajectory for scoring - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - try: - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - except Exception as e: - error_msg = f"Error scoring node {id(current_node)}: {str(e)}" - logger.error(error_msg) - score = float('-inf') - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - - logger.info(f"Node {id(current_node)} score: {score}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"Found satisfactory solution with score {score}") - return [{"action": node.action} for node in path[1:]] - - # If we've exhausted all nodes and haven't found a perfect solution, - # return the best path we found - if best_path: - logger.info(f"Returning best path found with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - - # If no path was found at all - logger.warning("No valid path found") - return [] - - except Exception as e: - error_msg = f"Error in BFS search: {str(e)}" - logger.error(error_msg) - if best_path: - logger.info(f"Returning best path found before error with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def dfs(self) -> List[Dict[str, Any]]: - """ - Performs depth-first search starting from the root node. - Skips nodes that are marked as terminal. - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - stack = [self.root_node] - stack_set = {self.root_node} # Track nodes in stack - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - current_path = [] # Track current path for DFS - - try: - while stack: - current_node = stack[-1] # Peek at the top node without removing it - - # Skip if we've already visited this node - if current_node in visited: - stack.pop() - stack_set.remove(current_node) - if current_path: - current_path.pop() # Remove from current path - continue - - visited.add(current_node) - current_path.append(current_node) # Add to current path - - # Skip terminal nodes - if current_node.is_terminal: - logger.info(f"Node {id(current_node)} is terminal") - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - continue - - # Expand current node if it hasn't been expanded yet and hasn't reached max_depth - if not current_node.children and current_node.depth < self.config.max_depth: - try: - await self.expand(current_node) - except Exception as e: - error_msg = f"Error expanding node {id(current_node)}: {str(e)}" - logger.error(error_msg) - current_node.is_terminal = True - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - continue - - print("print the trajectory") - print_trajectory(current_node) - print("print the entire tree") - print_entire_tree(self.root_node) - - # Get the path from root to this node - path = self.get_path_to_root(current_node) - - # Create trajectory for scoring - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - try: - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - except Exception as e: - error_msg = f"Error scoring node {id(current_node)}: {str(e)}" - logger.error(error_msg) - score = float('-inf') - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - - logger.info(f"Node {id(current_node)} score: {score}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"Found satisfactory solution with score {score}") - return [{"action": node.action} for node in path[1:]] - - # Add non-terminal children to stack in reverse order if they haven't reached max_depth - has_unvisited_children = False - for child in reversed(current_node.children): - if not child.is_terminal and child not in visited and child not in stack_set and child.depth < self.config.max_depth: - stack.append(child) - stack_set.add(child) # Add to stack tracking - has_unvisited_children = True - break # Only add one child at a time for DFS - - # If no unvisited children, remove current node from stack - if not has_unvisited_children: - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - - # If we've exhausted all nodes and haven't found a perfect solution, - # return the best path we found - if best_path: - logger.info(f"Returning best path found with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - - # If no path was found at all - logger.warning("No valid path found") - return [] - - except Exception as e: - error_msg = f"Error in DFS search: {str(e)}" - logger.error(error_msg) - if best_path: - logger.info(f"Returning best path found before error with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def bfs_with_websocket(self, websocket=None) -> List[Dict[str, Any]]: - """ - Performs breadth-first search starting from the root node with WebSocket updates. - Skips nodes that are marked as terminal. - - Args: - websocket: Optional WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - queue = deque([self.root_node]) - queue_set = {self.root_node} # Track nodes in queue - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - current_level = 0 # Track current level for BFS - - try: - # Get the live browser URL during initial setup - live_browser_url, session_id = await self._reset_browser(websocket) - - # Send initial status if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_status", - "status": "started", - "message": "BFS search started", - "timestamp": datetime.utcnow().isoformat(), - "live_browser_url": live_browser_url, - "session_id": session_id - }) - - while queue: - # Process all nodes at current level - level_size = len(queue) - current_level += 1 - level_nodes = [] # Store nodes at current level for later processing - - if websocket: - await websocket.send_json({ - "type": "level_start", - "level": current_level, - "nodes_in_level": level_size, - "timestamp": datetime.utcnow().isoformat() - }) - - # First, expand all nodes at current level - for _ in range(level_size): - current_node = queue.popleft() - queue_set.remove(current_node) # Remove from queue tracking - - # Skip if we've already visited this node - if current_node in visited: - if websocket: - await websocket.send_json({ - "type": "node_skipped", - "node_id": id(current_node), - "reason": "already_visited", - "timestamp": datetime.utcnow().isoformat() - }) - continue - - visited.add(current_node) - - # Skip terminal nodes - if current_node.is_terminal: - if websocket: - await websocket.send_json({ - "type": "node_terminal", - "node_id": id(current_node), - "reason": "terminal_node", - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Expand current node if it hasn't been expanded yet and hasn't reached max_depth - if not current_node.children and current_node.depth < self.config.max_depth: - if websocket: - await websocket.send_json({ - "type": "node_expanding", - "node_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - await self.expand(current_node, websocket) - except Exception as e: - error_msg = f"Error expanding node {id(current_node)}: {str(e)}" - logger.error(error_msg) - current_node.is_terminal = True - if websocket: - await websocket.send_json({ - "type": "node_error", - "node_id": id(current_node), - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Send tree update after expansion - if websocket: - tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - - # Store node for later processing - level_nodes.append(current_node) - - # Add non-terminal children to queue for next level if they haven't reached max_depth - for child in current_node.children: - if not child.is_terminal and child not in visited and child not in queue_set and child.depth < self.config.max_depth: - queue.append(child) - queue_set.add(child) # Add to queue tracking - - # Send queue update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_queued", - "node_id": id(child), - "parent_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - - # Now process all nodes at current level - for current_node in level_nodes: - # Send node processing update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_processing", - "node_id": id(current_node), - "depth": current_node.depth, - "timestamp": datetime.utcnow().isoformat() - }) - - print("print the trajectory") - print_trajectory(current_node) - print("print the entire tree") - print_entire_tree(self.root_node) - - # Get the path from root to this node - path = self.get_path_to_root(current_node) - - # Create trajectory for scoring - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - try: - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - except Exception as e: - error_msg = f"Error scoring node {id(current_node)}: {str(e)}" - logger.error(error_msg) - score = float('-inf') - if websocket: - await websocket.send_json({ - "type": "node_error", - "node_id": id(current_node), - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - # Send score update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_scored", - "node_id": id(current_node), - "score": score, - "timestamp": datetime.utcnow().isoformat() - }) - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - - # Send best path update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "best_path_update", - "score": best_score, - "path": [{"id": id(node), "action": node.action} for node in best_path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - logger.info(f"Node {id(current_node)} score: {score}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"Found satisfactory solution with score {score}") - - # Send completion update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "success", - "score": score, - "path": [{"id": id(node), "action": node.action} for node in path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in path[1:]] - - if websocket: - await websocket.send_json({ - "type": "level_complete", - "level": current_level, - "timestamp": datetime.utcnow().isoformat() - }) - - # If we've exhausted all nodes and haven't found a perfect solution, - # return the best path we found - if best_path: - logger.info(f"Returning best path found with score {best_score}") - - # Send completion update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "partial_success", - "score": best_score, - "path": [{"id": id(node), "action": node.action} for node in best_path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in best_path[1:]] - - # If no path was found at all - logger.warning("No valid path found") - - # Send failure update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "failure", - "message": "No valid path found", - "timestamp": datetime.utcnow().isoformat() - }) - - return [] - - except Exception as e: - error_msg = f"Error in BFS search: {str(e)}" - logger.error(error_msg) - if websocket: - await websocket.send_json({ - "type": "search_error", - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - if best_path: - logger.info(f"Returning best path found before error with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - async def dfs_with_websocket(self, websocket=None) -> List[Dict[str, Any]]: - """ - Performs depth-first search starting from the root node with WebSocket updates. - Skips nodes that are marked as terminal. - - Args: - websocket: Optional WebSocket connection to send updates to - - Returns: - List[Dict[str, Any]]: List of actions in the best path found - """ - stack = [self.root_node] - stack_set = {self.root_node} # Track nodes in stack - best_score = float('-inf') - best_path = None - visited = set() # Track visited nodes to avoid cycles - current_path = [] # Track current path for DFS - - try: - # Get the live browser URL during initial setup - live_browser_url, session_id = await self._reset_browser(websocket) - - # Send initial status if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_status", - "status": "started", - "message": "DFS search started", - "timestamp": datetime.utcnow().isoformat(), - "live_browser_url": live_browser_url, - "session_id": session_id - }) - - while stack: - current_node = stack[-1] # Peek at the top node without removing it - - # Skip if we've already visited this node - if current_node in visited: - stack.pop() - stack_set.remove(current_node) - if current_path: - current_path.pop() # Remove from current path - if websocket: - await websocket.send_json({ - "type": "node_backtrack", - "node_id": id(current_node), - "reason": "already_visited", - "timestamp": datetime.utcnow().isoformat() - }) - continue - - visited.add(current_node) - current_path.append(current_node) # Add to current path - - # Skip terminal nodes - if current_node.is_terminal: - logger.info(f"Node {id(current_node)} is terminal") - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - if websocket: - await websocket.send_json({ - "type": "node_backtrack", - "node_id": id(current_node), - "reason": "terminal_node", - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Expand current node if it hasn't been expanded yet and hasn't reached max_depth - if not current_node.children and current_node.depth < self.config.max_depth: - if websocket: - await websocket.send_json({ - "type": "node_expanding", - "node_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - - try: - await self.expand(current_node, websocket) - except Exception as e: - error_msg = f"Error expanding node {id(current_node)}: {str(e)}" - logger.error(error_msg) - current_node.is_terminal = True - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - if websocket: - await websocket.send_json({ - "type": "node_backtrack", - "node_id": id(current_node), - "reason": "expansion_error", - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - continue - - # Send tree update after expansion - if websocket: - tree_data = self._get_tree_data() - await websocket.send_json({ - "type": "tree_update", - "tree": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - - # Get the path from root to this node - path = self.get_path_to_root(current_node) - - # Create trajectory for scoring - trajectory = [] - for node in path[1:]: # Skip root node - trajectory.append({ - "natural_language_description": node.natural_language_description, - "action": node.action, - "feedback": node.feedback - }) - - try: - # Score the trajectory - prompt = create_llm_prompt(trajectory, self.goal) - result = score_trajectory_with_openai(prompt, openai_client, model=self.config.evaluation_model) - score = result["overall_score"] - except Exception as e: - error_msg = f"Error scoring node {id(current_node)}: {str(e)}" - logger.error(error_msg) - score = float('-inf') - if websocket: - await websocket.send_json({ - "type": "node_error", - "node_id": id(current_node), - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - - # Send score update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_scored", - "node_id": id(current_node), - "score": score, - "timestamp": datetime.utcnow().isoformat() - }) - - # Update best path if this score is better - if score > best_score: - best_score = score - best_path = path - - # Send best path update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "best_path_update", - "score": best_score, - "path": [{"id": id(node), "action": node.action} for node in best_path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - logger.info(f"Node {id(current_node)} score: {score}") - - # If we've found a satisfactory solution, return it - if score >= 0.75: - logger.info(f"Found satisfactory solution with score {score}") - - # Send completion update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "success", - "score": score, - "path": [{"id": id(node), "action": node.action} for node in path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in path[1:]] - - # Add non-terminal children to stack in reverse order - has_unvisited_children = False - for child in reversed(current_node.children): - if not child.is_terminal and child not in visited and child not in stack_set: - stack.append(child) - stack_set.add(child) # Add to stack tracking - has_unvisited_children = True - - # Send stack update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "node_stacked", - "node_id": id(child), - "parent_id": id(current_node), - "timestamp": datetime.utcnow().isoformat() - }) - break # Only add one child at a time for DFS - - # If no unvisited children, remove current node from stack - if not has_unvisited_children: - stack.pop() - stack_set.remove(current_node) - current_path.pop() # Remove from current path - if websocket: - await websocket.send_json({ - "type": "node_backtrack", - "node_id": id(current_node), - "reason": "no_unvisited_children", - "timestamp": datetime.utcnow().isoformat() - }) - - # If we've exhausted all nodes and haven't found a perfect solution, - # return the best path we found - if best_path: - logger.info(f"Returning best path found with score {best_score}") - - # Send completion update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "partial_success", - "score": best_score, - "path": [{"id": id(node), "action": node.action} for node in best_path[1:]], - "timestamp": datetime.utcnow().isoformat() - }) - - return [{"action": node.action} for node in best_path[1:]] - - # If no path was found at all - logger.warning("No valid path found") - - # Send failure update if websocket is provided - if websocket: - await websocket.send_json({ - "type": "search_complete", - "status": "failure", - "message": "No valid path found", - "timestamp": datetime.utcnow().isoformat() - }) - - return [] - - except Exception as e: - error_msg = f"Error in DFS search: {str(e)}" - logger.error(error_msg) - if websocket: - await websocket.send_json({ - "type": "search_error", - "error": error_msg, - "timestamp": datetime.utcnow().isoformat() - }) - if best_path: - logger.info(f"Returning best path found before error with score {best_score}") - return [{"action": node.action} for node in best_path[1:]] - return [] - - def _get_tree_data(self): - """Get tree data in a format suitable for visualization""" - nodes = collect_all_nodes(self.root_node) - tree_data = [] - - for node in nodes: - node_data = { - "id": id(node), - "parent_id": id(node.parent) if node.parent else None, - "action": node.action if node.action else "ROOT", - "description": node.natural_language_description, - "depth": node.depth, - "is_terminal": node.is_terminal - } - tree_data.append(node_data) - - return tree_data diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/trajectory_score.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/trajectory_score.py deleted file mode 100644 index 1bbe9af..0000000 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/trajectory_score.py +++ /dev/null @@ -1,209 +0,0 @@ -"""Module for scoring and evaluating action trajectories using LLMs.""" - -import base64 -import json -import datetime -from typing import Any, Optional, List, Dict, TypedDict -from openai import OpenAI - -class TrajectoryMetrics(TypedDict): - """Structured metrics for trajectory evaluation.""" - overall_score: float - efficiency_score: float - accuracy_score: float - robustness_score: float - detailed_explanation: str - improvement_suggestions: List[str] - key_achievements: List[str] - potential_issues: List[str] - metadata: Dict[str, Any] - -SYSTEM_PROMPT = \ -"""You are an expert web task completion evaluator. Your task is to provide a comprehensive evaluation of web task completion -by analyzing the trajectory against the desired goal. Consider multiple aspects of the task execution and provide detailed feedback. - -Analyze the provided trajectory and screenshot of the web page, return a JSON response with: -1. overall_score (float 0-10): Overall task completion score -2. efficiency_score (float 0-10): How well the task was completed (minimal steps, optimal path) -3. accuracy_score (float 0-10): How precisely the actions were executed -4. robustness_score (float 0-10): How well the solution handles edge cases -5. detailed_explanation (string): Comprehensive analysis of the execution -6. improvement_suggestions (list of strings): Specific ways to improve the solution -7. key_achievements (list of strings): Important milestones reached -8. potential_issues (list of strings): Areas that could be problematic - -Example format: -{ - "overall_score": 8.5, - "efficiency_score": 9.0, - "accuracy_score": 8.0, - "robustness_score": 7.5, - "detailed_explanation": "The trajectory effectively achieves the goal with minimal steps...", - "improvement_suggestions": ["Could have used more efficient selectors", "Consider adding error handling"], - "key_achievements": ["Successfully logged in", "Found target element"], - "potential_issues": ["No timeout handling", "Assumes specific page layout"] -} -""" - -USER_PROMPT_TEMPLATE = \ -"""Goal: {goal} - -Trajectory: -{trajectory_str} - -Current Page State: -{page_state} - -Please provide a comprehensive evaluation of the task completion.""" - -def format_trajectory_step(step: Dict[str, Any], index: int) -> str: - """Format a single trajectory step with detailed information.""" - return f"""Step {index}: - Action: {step['action']} - Description: {step['natural_language_description']} - Target: {step.get('target', 'N/A')} - Status: {step.get('status', 'completed')} - Output: {step.get('output', 'N/A')}""" - -def create_llm_prompt( - trajectory: List[Dict[str, Any]], - goal: str, - page_state: Optional[Dict[str, Any]] = None -) -> str: - """ - Creates a prompt for LLM scoring and processes trajectory information. - - Args: - trajectory: List of dictionaries containing action and description - goal: The goal of the trajectory - page_state: Optional dictionary containing current page state information - - Returns: - str: Formatted prompt string - """ - # Format trajectory steps with more detail - trajectory_str = "\n\n".join( - format_trajectory_step(step, i+1) - for i, step in enumerate(trajectory) - ) - - # Format page state if available - page_state_str = "No page state information available" - if page_state: - page_state_str = json.dumps(page_state, indent=2) - - prompt = USER_PROMPT_TEMPLATE.format( - goal=goal, - trajectory_str=trajectory_str, - page_state=page_state_str - ) - return prompt - -def validate_evaluation(evaluation: Dict[str, Any]) -> bool: - """Validate the evaluation output has all required fields and correct types.""" - required_fields = { - 'overall_score': (int, float), - 'efficiency_score': (int, float), - 'accuracy_score': (int, float), - 'robustness_score': (int, float), - 'detailed_explanation': str, - 'improvement_suggestions': list, - 'key_achievements': list, - 'potential_issues': list - } - - for field, expected_type in required_fields.items(): - if field not in evaluation: - return False - if not isinstance(evaluation[field], expected_type): - return False - if isinstance(evaluation[field], (int, float)): - if not 0 <= evaluation[field] <= 10: - return False - - return True - -def normalize_scores(evaluation: Dict[str, Any]) -> Dict[str, Any]: - """Normalize all scores to be between 0 and 1.""" - score_fields = ['overall_score', 'efficiency_score', 'accuracy_score', 'robustness_score'] - for field in score_fields: - if field in evaluation: - evaluation[field] = evaluation[field] / 10.0 - return evaluation - -def score_trajectory_with_openai( - prompt: str, - openai_client: OpenAI, - model: str = "gpt-4o", - screenshot: Optional[bytes] = None -) -> Dict[str, Any]: - """ - Uses OpenAI to score the trajectory based on the provided prompt. - - Args: - prompt: The prompt to send to OpenAI - openai_client: OpenAI client instance - model: OpenAI model to use - screenshot: Screenshot of the current page - - Returns: - dict: Parsed response containing comprehensive evaluation - """ - system_message = SYSTEM_PROMPT - - try: - content = [ - {"type": "text", "text": prompt}, - ] - if screenshot is not None: - base64_image = base64.b64encode(screenshot).decode('utf-8') - content.append({ - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{base64_image}", - "detail": "high" - } - }) - - response = openai_client.chat.completions.create( - model=model, - messages=[ - {"role": "system", "content": system_message}, - {"role": "user", "content": content} - ], - response_format={"type": "json_object"} - ) - - evaluation = json.loads(response.choices[0].message.content) - - # Validate evaluation - if not validate_evaluation(evaluation): - raise ValueError("Invalid evaluation format") - - # Normalize scores - evaluation = normalize_scores(evaluation) - - # Add metadata - evaluation["metadata"] = { - "model_used": model, - "timestamp": datetime.datetime.now().isoformat(), - "has_screenshot": screenshot is not None - } - - return evaluation - - except Exception as e: - return { - "overall_score": 0.0, - "efficiency_score": 0.0, - "accuracy_score": 0.0, - "robustness_score": 0.0, - "detailed_explanation": f"Error occurred during evaluation: {str(e)}", - "improvement_suggestions": ["Check API connection and try again"], - "key_achievements": [], - "potential_issues": ["Evaluation failed"], - "metadata": { - "error": str(e), - "timestamp": datetime.datetime.now().isoformat() - } - } \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/tree_vis.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/tree_vis.py deleted file mode 100644 index 48f667d..0000000 --- a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/tree_vis.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Utilities for visualizing LATS tree structures.""" - -from typing import Optional -from .lats_node import LATSNode - -# ANSI color codes -GREEN = '\033[92m' -RED = '\033[91m' -RESET = '\033[0m' - -def collect_all_nodes(node: LATSNode) -> list[LATSNode]: - """ - Recursively collect all nodes starting from the given node. - - Args: - node: The root node to start collection from - - Returns: - list[LATSNode]: List of all nodes in the tree - """ - nodes = [node] - for child in node.children: - nodes.extend(collect_all_nodes(child)) - return nodes - -def better_print(node: LATSNode, level: int = 0, selected_node: Optional[LATSNode] = None) -> None: - """ - Print tree structure recursively with indentation, showing node statistics. - - Args: - node: The node to print - level: Current indentation level (default=0) - selected_node: The currently selected node to highlight - """ - indent = " " * level - - action = node.action if node.action is not None else 'None' - if isinstance(action, str): - action = action.replace('\n', '') - - visits = f"visits: {node.visits}" - value = f"value: {node.value:.3f}" if hasattr(node, 'value') else "value: N/A" - reward = f"reward: {node.reward:.3f}" if hasattr(node, 'reward') else "reward: N/A" - stats = f"[{visits}, {value}, {reward}]" - - if node == selected_node: - print(f"{indent}├── Level {level}: {GREEN}{action}{RESET} {stats} ← Selected") - else: - print(f"{indent}├── Level {level}: {action} {stats}") - - for child in node.children: - better_print(child, level + 1, selected_node) - -def print_trajectory(terminal_node: LATSNode) -> None: - """ - Print the single path from a terminal node to the root. - - Args: - terminal_node: The leaf node to start the trajectory from - """ - path = [] - current = terminal_node - while current is not None: - path.append(current) - current = current.parent - - for level, node in enumerate(reversed(path)): - indent = " " * level - action = node.action - - visits = f"visits: {node.visits}" - value = f"value: {node.value:.3f}" if hasattr(node, 'value') else "value: N/A" - reward = f"reward: {node.reward:.3f}" if hasattr(node, 'reward') else "reward: N/A" - is_terminal = f"terminal: {node.is_terminal}" - feedback = f"feedback: {node.feedback if node.feedback else 'N/A'}" - stats = f"[{visits}, {value}, {reward}, {is_terminal}, {feedback}]" - - indicator = "" - if node == terminal_node: - indicator = "← Terminal" - elif not hasattr(node, 'parent') or node.parent is None: - indicator = "(Root)" - - print(f"{indent}├── Level {level}: {GREEN}{action}{RESET} {stats} {indicator}") - -def print_entire_tree(root: LATSNode) -> None: - """ - Print the entire tree structure starting from the root node. - - Args: - root: The root node of the tree to print - """ - def _print_subtree(node: LATSNode, level: int, prefix: str, is_last: bool) -> None: - # Prepare the current line's prefix - current_prefix = prefix + ("└── " if is_last else "├── ") - - # Prepare node statistics - action = node.action - visits = f"visits: {node.visits}" - value = f"value: {node.value:.3f}" if hasattr(node, 'value') else "value: N/A" - reward = f"reward: {node.reward:.3f}" if hasattr(node, 'reward') else "reward: N/A" - is_terminal = f"terminal: {node.is_terminal}" - feedback = f"feedback: {node.feedback if node.feedback else 'N/A'}" - stats = f"[{visits}, {value}, {reward}, {is_terminal}, {feedback}]" - - # Add indicator for root or terminal nodes - indicator = "" - if not node.children: - indicator = "← Terminal" - elif level == 0: - indicator = "(Root)" - - # Print the current node - print(f"{current_prefix}Level {level}: {GREEN}{action}{RESET} {stats} {indicator}") - - # Prepare the prefix for children - child_prefix = prefix + (" " if is_last else "│ ") - - # Sort children by some criteria (e.g., visits) if desired - children = sorted(node.children, key=lambda x: x.visits, reverse=True) if node.children else [] - - # Recursively print all children - for i, child in enumerate(children): - is_last_child = (i == len(children) - 1) - _print_subtree(child, level + 1, child_prefix, is_last_child) - - # Start the recursive printing from the root - _print_subtree(root, 0, "", True) \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/lwats/core_async/agent_factory.py b/visual-tree-search-backend/app/api/lwats/core_async/agent_factory.py index 2c3fa12..310e362 100644 --- a/visual-tree-search-backend/app/api/lwats/core_async/agent_factory.py +++ b/visual-tree-search-backend/app/api/lwats/core_async/agent_factory.py @@ -5,12 +5,9 @@ from openai import OpenAI from .config import AgentConfig -from ..agents_async.SimpleSearchAgents.simple_search_agent import SimpleSearchAgent -from ..agents_async.SimpleSearchAgents.lats_agent import LATSAgent -from ..agents_async.SimpleSearchAgents.mcts_agent import MCTSAgent -from ..agents_async.SearchAgents.simple_search_agent import SimpleSearchAgent as NewSimpleSearchAgent -from ..agents_async.SearchAgents.lats_agent import LATSAgent as NewLATSAgent -from ..agents_async.SearchAgents.mcts_agent import MCTSAgent as NewMCTSAgent +from ..agents_async.SearchAgents.simple_search_agent import SimpleSearchAgent +from ..agents_async.SearchAgents.lats_agent import LATSAgent +from ..agents_async.SearchAgents.mcts_agent import MCTSAgent from ..webagent_utils_async.utils.utils import setup_logger from ..webagent_utils_async.utils.playwright_manager import setup_playwright @@ -109,73 +106,4 @@ async def setup_search_agent( error_message = f"Unsupported agent type: {agent_type}. Please use 'FunctionCallingAgent', 'HighLevelPlanningAgent', 'ContextAwarePlanningAgent', 'PromptAgent' or 'PromptSearchAgent' ." logger.error(error_message) return {"error": error_message} - return agent, playwright_manager - - -async def new_setup_search_agent( - agent_type, - starting_url, - goal, - images, - agent_config: AgentConfig -): - logger = setup_logger() - - file_path = os.path.join(agent_config.log_folder, 'flow', 'steps.json') - os.makedirs(os.path.dirname(file_path), exist_ok=True) - with open(file_path, 'w') as file: - file.write(goal + '\n') - file.write(starting_url + '\n') - - playwright_manager = await setup_playwright( - headless=agent_config.headless, - mode=agent_config.browser_mode, - storage_state=agent_config.storage_state - ) - # storage_state='state.json', headless=False, mode="chromium" - - page = await playwright_manager.get_page() - await page.goto(starting_url) - # Maximize the window on macOS - # await page.set_viewport_size({"width": 1440, "height": 900}) - - messages = [{ - "role": "system", - "content": SEARCH_AGENT_SYSTEM_PROMPT, - }] - - if agent_type == "SimpleSearchAgent": - print("SimpleSearchAgent") - agent = NewSimpleSearchAgent( - starting_url=starting_url, - messages=messages, - goal=goal, - images = images, - playwright_manager=playwright_manager, - config=agent_config, - ) - elif agent_type == "LATSAgent": - print("LATSAgent") - agent = NewLATSAgent( - starting_url=starting_url, - messages=messages, - goal=goal, - images = images, - playwright_manager=playwright_manager, - config=agent_config, - ) - elif agent_type == "MCTSAgent": - print("MCTSAgent") - agent = NewMCTSAgent( - starting_url=starting_url, - messages=messages, - goal=goal, - images = images, - playwright_manager=playwright_manager, - config=agent_config, - ) - else: - error_message = f"Unsupported agent type: {agent_type}. Please use 'FunctionCallingAgent', 'HighLevelPlanningAgent', 'ContextAwarePlanningAgent', 'PromptAgent' or 'PromptSearchAgent' ." - logger.error(error_message) - return {"error": error_message} return agent, playwright_manager \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/routes/new_tree_search.py b/visual-tree-search-backend/app/api/routes/new_tree_search.py deleted file mode 100644 index 247391d..0000000 --- a/visual-tree-search-backend/app/api/routes/new_tree_search.py +++ /dev/null @@ -1,234 +0,0 @@ -import asyncio -from typing import List, Optional -from fastapi import APIRouter, BackgroundTasks, HTTPException -import json -import os -import threading -import multiprocessing -from datetime import datetime -import logging - -import argparse -from dotenv import load_dotenv -import json -import logging - -from ..lwats.core_async.config import AgentConfig, add_agent_config_arguments, filter_valid_config_args -load_dotenv() -from ..lwats.core_async.agent_factory import new_setup_search_agent - -def run_tree_search(args): - # Log the arguments to help debug - logging.info(f"Running tree search with args: {args.__dict__}") - - # Ensure starting_url is set correctly - if not hasattr(args, 'starting_url') or not args.starting_url: - logging.error("starting_url is not set or is empty") - return {"error": "starting_url is required"} - - logging.info(f"Using starting URL: {args.starting_url}") - - agent_config = AgentConfig(**filter_valid_config_args(args.__dict__)) - agent, playwright_manager = new_setup_search_agent( - agent_type=args.agent_type, - starting_url=args.starting_url, - goal=args.goal, - images=args.images, - agent_config=agent_config - ) - print(agent_config) - - # Run the search - results = agent.run() - - # Close the playwright_manager when done - playwright_manager.close() - - return results -from ..lwats.core_async.config import AgentConfig, filter_valid_config_args - -router = APIRouter() - -# Store results of tree search runs -search_results = {} -# Store process objects -search_processes = {} - -def run_search_in_process(search_id: str, args_dict): - """Run the tree search in a separate process""" - try: - # Create an args object similar to what argparse would create - class Args: - pass - - args = Args() - for key, value in args_dict.items(): - setattr(args, key, value) - - # Update status to running - search_results[search_id]["status"] = "running" - - # Debug: Print current working directory and storage_state path - logging.info(f"Current working directory: {os.getcwd()}") - logging.info(f"Storage state path: {args.storage_state}") - logging.info(f"Storage state exists: {os.path.exists(args.storage_state)}") - logging.info(f"Starting URL: {args.starting_url}") # Log the starting URL - - # Run the search - results = run_tree_search(args) - - # Update results - search_results[search_id]["results"] = results - search_results[search_id]["status"] = "completed" - search_results[search_id]["completed_at"] = datetime.utcnow().isoformat() - - except Exception as e: - logging.error(f"Error in search process: {str(e)}") - search_results[search_id]["status"] = "failed" - search_results[search_id]["error"] = str(e) - -@router.post("/run") -async def start_tree_search( - background_tasks: BackgroundTasks, - agent_type: str = "SimpleSearchAgent", - starting_url: str = "http://xwebarena.pathonai.org:7770/", - goal: str = "search running shoes, click on the first result", - images: Optional[str] = None, - search_algorithm: str = "bfs", - headless: bool = True, - browser_mode: str = "chromium", - storage_state: str = "shopping.json", - action_generation_model: str = "gpt-4o-mini", - evaluation_model: str = "gpt-4o", - branching_factor: int = 5, - max_depth: int = 3, - iterations: int = 3 -): - """Start a tree search with the given parameters""" - # Create a unique ID for this search - search_id = f"search_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}" - - # Parse images - image_list = [img.strip() for img in images.split(',')] if images else [] - - # Debug: Print all possible locations for the file - logging.info(f"Current working directory: {os.getcwd()}") - possible_locations = [ - os.path.join(os.getcwd(), storage_state), - os.path.join(os.path.dirname(os.getcwd()), storage_state), - os.path.join(os.path.dirname(os.path.dirname(os.getcwd())), storage_state), - os.path.abspath(storage_state) - ] - - for loc in possible_locations: - logging.info(f"Checking location: {loc}, exists: {os.path.exists(loc)}") - - # Try to find the file in various locations - storage_state_path = None - for loc in possible_locations: - if os.path.exists(loc): - storage_state_path = loc - break - - if storage_state_path: - logging.info(f"Found storage_state at: {storage_state_path}") - else: - logging.warning(f"Could not find storage_state file '{storage_state}' in any expected location") - # Create an empty storage state file as a fallback - storage_state_path = os.path.join(os.getcwd(), "empty_storage.json") - with open(storage_state_path, 'w') as f: - f.write("{}") - logging.info(f"Created empty storage state file at {storage_state_path}") - - # Log the starting URL to verify it's being set correctly - logging.info(f"Setting starting URL to: {starting_url}") - - # Create args dictionary - args_dict = { - "agent_type": agent_type, - "starting_url": starting_url, - "goal": goal, - "images": image_list, - "search_algorithm": search_algorithm, - "headless": headless, - "browser_mode": browser_mode, - "storage_state": storage_state_path, # Use the found path - "action_generation_model": action_generation_model, - "evaluation_model": evaluation_model, - "branching_factor": branching_factor, - "max_depth": max_depth, - "iterations": iterations - } - - # Initialize the results entry - search_results[search_id] = { - "id": search_id, - "status": "pending", - "created_at": datetime.utcnow().isoformat(), - "config": args_dict - } - - # Start the search in a separate process - process = threading.Thread( - target=run_search_in_process, - args=(search_id, args_dict), - daemon=True - ) - search_processes[search_id] = process - process.start() - - return { - "search_id": search_id, - "status": "pending", - "message": "Tree search started in the background" - } - -@router.get("/status/{search_id}") -async def get_search_status(search_id: str): - """Get the status of a tree search""" - if search_id not in search_results: - raise HTTPException(status_code=404, detail="Search ID not found") - - # Check if process is still alive - if search_id in search_processes: - process = search_processes[search_id] - if process.is_alive(): - search_results[search_id]["status"] = "running" - elif search_results[search_id]["status"] == "pending": - # Process ended but status wasn't updated - search_results[search_id]["status"] = "failed" - search_results[search_id]["error"] = "Process terminated unexpectedly" - - return search_results[search_id] - -@router.get("/list") -async def list_searches(): - """List all tree searches""" - return { - "searches": [ - { - "id": search_id, - "status": search_results[search_id]["status"], - "created_at": search_results[search_id]["created_at"], - "completed_at": search_results[search_id].get("completed_at") - } - for search_id in search_results - ] - } - -@router.post("/cancel/{search_id}") -async def cancel_search(search_id: str): - """Cancel a running search""" - if search_id not in search_results: - raise HTTPException(status_code=404, detail="Search ID not found") - - if search_id in search_processes: - process = search_processes[search_id] - if process.is_alive(): - # We can't directly terminate a thread, but we can mark it as cancelled - search_results[search_id]["status"] = "cancelled" - return {"message": f"Search {search_id} has been marked for cancellation"} - else: - return {"message": f"Search {search_id} is not running"} - - return {"message": f"Search {search_id} process not found"} \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/routes/new_tree_search_websocket.py b/visual-tree-search-backend/app/api/routes/new_tree_search_websocket.py deleted file mode 100644 index 9076dad..0000000 --- a/visual-tree-search-backend/app/api/routes/new_tree_search_websocket.py +++ /dev/null @@ -1,220 +0,0 @@ -import asyncio -import json -from datetime import datetime -from typing import Dict, Any, List, Set -import logging -from collections import deque - -# Configure basic logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) - -from fastapi import APIRouter, WebSocket, WebSocketDisconnect - -# Import necessary components for the search agent -from ..lwats.webagent_utils_async.utils.playwright_manager import setup_playwright -from ..lwats.core_async.config import AgentConfig -from ..lwats.core_async.agent_factory import new_setup_search_agent -from ..lwats.agents_async.SimpleSearchAgents.tree_vis import collect_all_nodes -from ..lwats.agents_async.SimpleSearchAgents.trajectory_score import create_llm_prompt, score_trajectory_with_openai - -router = APIRouter() - -# Track active WebSocket connections -active_connections: Dict[str, WebSocket] = {} - -# This is the function that will be called from main.py -async def new_tree_search_websocket_endpoint(websocket: WebSocket): - """WebSocket endpoint for tree search visualization and control""" - await websocket.accept() - connection_id = str(id(websocket)) - active_connections[connection_id] = websocket - - logging.info(f"WebSocket connection established with ID: {connection_id}") - - try: - # Send initial connection confirmation - await websocket.send_json({ - "type": "connection_established", - "connection_id": connection_id, - "timestamp": datetime.utcnow().isoformat() - }) - - # Listen for messages from the client - while True: - data = await websocket.receive_text() - message = json.loads(data) - - # Handle different message types - if message["type"] == "ping": - await websocket.send_json({ - "type": "pong", - "timestamp": datetime.utcnow().isoformat() - }) - - elif message["type"] == "start_search": - # Start the search process - await handle_search_request(websocket, message) - - except WebSocketDisconnect: - logging.info(f"WebSocket disconnected with ID: {connection_id}") - except Exception as e: - logging.error(f"Error in WebSocket connection: {e}") - finally: - # Clean up connection - if connection_id in active_connections: - del active_connections[connection_id] - -async def handle_search_request(websocket: WebSocket, message: Dict[str, Any]): - """Handle a search request from the client""" - try: - # Extract parameters from the message - agent_type = message.get("agent_type", "SimpleSearchAgent") - starting_url = message.get("starting_url", "http://xwebarena.pathonai.org:7770/") - goal = message.get("goal", "search running shoes, click on the first result") - search_algorithm = message.get("search_algorithm", "bfs") - max_depth = message.get("max_depth", 3) - storage_state = message.get("storage_state", "app/api/shopping.json") - - # Send status update - await websocket.send_json({ - "type": "status_update", - "status": "initializing", - "message": "Initializing search agent", - "timestamp": datetime.utcnow().isoformat() - }) - - # Create agent configuration - config = AgentConfig( - search_algorithm=search_algorithm, - max_depth=max_depth, - storage_state=storage_state, - headless=False - ) - - # Send status update - await websocket.send_json({ - "type": "status_update", - "status": "setting_up", - "message": "Setting up playwright browser", - "timestamp": datetime.utcnow().isoformat() - }) - - # Setup playwright and agent - agent, playwright_manager = await new_setup_search_agent( - agent_type=agent_type, - starting_url=starting_url, - goal=goal, - images=[], # No initial images - agent_config=config - ) - - # Send status update - await websocket.send_json({ - "type": "status_update", - "status": "running", - "message": "Search agent initialized, starting search", - "timestamp": datetime.utcnow().isoformat() - }) - - # Run search with WebSocket updates - if search_algorithm.lower() == "bfs": - # Use the agent's built-in WebSocket-enabled BFS method - await agent.bfs_with_websocket(websocket) - elif search_algorithm.lower() == "dfs": - # Use the agent's built-in WebSocket-enabled DFS method - await agent.dfs_with_websocket(websocket) - elif search_algorithm.lower() == "lats": - await agent.run(websocket) - elif search_algorithm.lower() == "mcts": - await agent.run(websocket) - else: - await websocket.send_json({ - "type": "error", - "message": f"Unsupported algorithm: {search_algorithm}", - "timestamp": datetime.utcnow().isoformat() - }) - - # Clean up - await playwright_manager.close() - - except Exception as e: - logging.error(f"Error handling search request: {e}") - await websocket.send_json({ - "type": "error", - "message": f"Error during search: {str(e)}", - "timestamp": datetime.utcnow().isoformat() - }) - -async def send_tree_update(websocket: WebSocket, root_node): - """Send a tree update to the client""" - try: - # Collect all nodes in the tree - nodes = collect_all_nodes(root_node) - - # Convert nodes to a format suitable for visualization - tree_data = [] - for node in nodes: - node_data = { - "id": id(node), - "parent_id": id(node.parent) if node.parent else None, - "action": node.action if node.action else "ROOT", - "description": node.natural_language_description, - "depth": node.depth, - "is_terminal": node.is_terminal, - "score": getattr(node, "value", 0) / getattr(node, "visits", 1) if hasattr(node, "visits") and node.visits > 0 else 0 - } - tree_data.append(node_data) - - await websocket.send_json({ - "type": "tree_update", - "nodes": tree_data, - "timestamp": datetime.utcnow().isoformat() - }) - except Exception as e: - logging.error(f"Error sending tree update: {e}") - -async def send_trajectory_update(websocket: WebSocket, node, status: str): - """Send a trajectory update to the client""" - try: - # Get path from root to this node - path = [] - current = node - while current: - path.append(current) - current = current.parent - path = list(reversed(path)) - - # Convert path to a format suitable for visualization - trajectory_data = [] - for i, node in enumerate(path): - if i == 0: # Skip root node in display - continue - - node_data = { - "id": id(node), - "action": node.action, - "description": node.natural_language_description, - "feedback": node.feedback if hasattr(node, "feedback") else None, - "depth": node.depth - } - trajectory_data.append(node_data) - - await websocket.send_json({ - "type": f"trajectory_{status}", # trajectory_start or trajectory_complete - "trajectory": trajectory_data, - "timestamp": datetime.utcnow().isoformat() - }) - except Exception as e: - logging.error(f"Error sending trajectory update: {e}") - -# Add a route for testing WebSocket status via HTTP -@router.get("/status") -async def tree_search_websocket_status(): - """Get Tree Search WebSocket connection status""" - return { - "active_connections": len(active_connections), - "status": "running" - } \ No newline at end of file diff --git a/visual-tree-search-backend/app/api/routes/tree_search_websocket.py b/visual-tree-search-backend/app/api/routes/tree_search_websocket.py index 6a6b819..8b9a6be 100644 --- a/visual-tree-search-backend/app/api/routes/tree_search_websocket.py +++ b/visual-tree-search-backend/app/api/routes/tree_search_websocket.py @@ -17,8 +17,8 @@ from ..lwats.webagent_utils_async.utils.playwright_manager import setup_playwright from ..lwats.core_async.config import AgentConfig from ..lwats.core_async.agent_factory import setup_search_agent -from ..lwats.agents_async.SimpleSearchAgents.tree_vis import collect_all_nodes -from ..lwats.agents_async.SimpleSearchAgents.trajectory_score import create_llm_prompt, score_trajectory_with_openai +from ..lwats.agents_async.SearchAgents.tree_vis import collect_all_nodes +from ..lwats.agents_async.SearchAgents.trajectory_score import create_llm_prompt, score_trajectory_with_openai router = APIRouter() @@ -122,10 +122,10 @@ async def handle_search_request(websocket: WebSocket, message: Dict[str, Any]): # Run search with WebSocket updates if search_algorithm.lower() == "bfs": # Use the agent's built-in WebSocket-enabled BFS method - await agent.bfs_with_websocket(websocket) + await agent.bfs(websocket) elif search_algorithm.lower() == "dfs": # Use the agent's built-in WebSocket-enabled DFS method - await agent.dfs_with_websocket(websocket) + await agent.dfs(websocket) elif search_algorithm.lower() == "lats": await agent.run(websocket) elif search_algorithm.lower() == "mcts": diff --git a/visual-tree-search-backend/app/main.py b/visual-tree-search-backend/app/main.py index 4917d33..40eaad5 100644 --- a/visual-tree-search-backend/app/main.py +++ b/visual-tree-search-backend/app/main.py @@ -54,7 +54,6 @@ async def root(): from app.api.routes.websocket import websocket_endpoint from app.api.routes.tree_websocket import tree_websocket_endpoint from app.api.routes.tree_search_websocket import tree_search_websocket_endpoint -from app.api.routes.new_tree_search_websocket import new_tree_search_websocket_endpoint # Register the WebSocket endpoints @app.websocket("/ws") async def websocket_route(websocket: WebSocket): @@ -68,10 +67,6 @@ async def tree_websocket_route(websocket: WebSocket): async def tree_search_websocket_route(websocket: WebSocket): await tree_search_websocket_endpoint(websocket) -@app.websocket("/new-tree-search-ws") -async def new_tree_search_websocket_route(websocket: WebSocket): - await new_tree_search_websocket_endpoint(websocket) - if __name__ == "__main__": port = int(os.getenv("PORT", 3000)) uvicorn.run("app.main:app", host="0.0.0.0", port=port, reload=True) \ No newline at end of file diff --git a/visual-tree-search-backend/log/flow/steps.json b/visual-tree-search-backend/log/flow/steps.json deleted file mode 100644 index cd384fa..0000000 --- a/visual-tree-search-backend/log/flow/steps.json +++ /dev/null @@ -1,2 +0,0 @@ -search running shoes, click on the first result -http://128.105.145.205:7770/ diff --git a/visual-tree-search-backend/log/prompt/action_gen_res_20250319_124347_446670.json b/visual-tree-search-backend/log/prompt/action_gen_res_20250319_124347_446670.json deleted file mode 100644 index f25255f..0000000 --- a/visual-tree-search-backend/log/prompt/action_gen_res_20250319_124347_446670.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"chatcmpl-BCtc04X9fQ9lMQb4DYffXTRJhsyuJ","choices":[{"finish_reason":"stop","index":0,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006432518421206623,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00018006093159783632,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-0.4741942286491394,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":0.0,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-1.0280383548888494e-6,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-9.610702363715973e-6,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-2.696889623621246e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00001700132997939363,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"enter","bytes":[101,110,116,101,114],"logprob":-0.4907068610191345,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.005948355887085199,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" into","bytes":[32,105,110,116,111],"logprob":-0.47407761216163635,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":0.0,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-2.1008713702030946e-6,"top_logprobs":[]},{"token":" box","bytes":[32,98,111,120],"logprob":-0.5474576950073242,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.05262782424688339,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000039888433093437925,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"enter 'running shoes' into the search box\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":1,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.8260595202445984,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-6.704273118884885e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-0.0032061156816780567,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00008506661833962426,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0011930868495255709,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.16022665798664093,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.00038348292582668364,"top_logprobs":[]},{"token":" bar","bytes":[32,98,97,114],"logprob":-1.0832500457763672,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.0003821716527454555,"top_logprobs":[]},{"token":" start","bytes":[32,115,116,97,114,116],"logprob":-1.6323108673095703,"top_logprobs":[]},{"token":" searching","bytes":[32,115,101,97,114,99,104,105,110,103],"logprob":-0.013997425325214863,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0005679258611053228,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.00003869639476761222,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.03804665803909302,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000024153549020411447,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('274')\",\n \"natural_language_description\": \"click the search bar to start searching for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":2,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-0.5760595202445984,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":0.0,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-1.8624639324116288e-6,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-6.749814019713085e-6,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-3.054500666621607e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000015094070477061905,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"search","bytes":[115,101,97,114,99,104],"logprob":-4.517331123352051,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.000013663626305060461,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-1.1369043588638306,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":-0.0003815756645053625,"top_logprobs":[]},{"token":" in","bytes":[32,105,110],"logprob":-0.003885580925270915,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":0.0,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.000014617256056226324,"top_logprobs":[]},{"token":" box","bytes":[32,98,111,120],"logprob":-0.7709638476371765,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.04310218244791031,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00005133198283147067,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"search for 'running shoes' in the search box\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":3,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.8260747790336609,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-0.0022094969172030687,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00006611323624383658,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0015467642806470394,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.20141762495040894,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.0003384422743692994,"top_logprobs":[]},{"token":" bar","bytes":[32,98,97,114],"logprob":-1.1568434238433838,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.0005303950165398419,"top_logprobs":[]},{"token":" start","bytes":[32,115,116,97,114,116],"logprob":-1.289430856704712,"top_logprobs":[]},{"token":" searching","bytes":[32,115,101,97,114,99,104,105,110,103],"logprob":-0.010586611926555634,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0004611743788700551,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.000032020991056924686,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.02324955351650715,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000031186566047836095,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('274')\",\n \"natural_language_description\": \"click the search bar to start searching for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":4,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-0.5760747790336609,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":0.0,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-1.5048530030981055e-6,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-8.2994620242971e-6,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-2.45848218582978e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000019266199160483666,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"enter","bytes":[101,110,116,101,114],"logprob":-0.6039004921913147,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.006733845453709364,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" into","bytes":[32,105,110,116,111],"logprob":-0.47407814860343933,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":0.0,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-2.2200749754119897e-6,"top_logprobs":[]},{"token":" bar","bytes":[32,98,97,114],"logprob":-0.849614679813385,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-3.669437885284424,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00005097437315271236,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"enter 'running shoes' into the search bar.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":5,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"context","bytes":[99,111,110,116,101,120,116],"logprob":-6.251928806304932,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-0.06197698414325714,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-1.3856492842023727e-6,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-1.1472419600977446e-6,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-0.00003214019307051785,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-9.253090865968261e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000019385402993066236,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-1.9387826919555664,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.06197687238454819,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-1.0280383548888494e-6,"top_logprobs":[]},{"token":" box","bytes":[32,98,111,120],"logprob":-0.26174396276474,"top_logprobs":[]},{"token":" with","bytes":[32,119,105,116,104],"logprob":-0.00003702754474943504,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.5759584307670593,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'\",\n","bytes":[39,34,44,10],"logprob":-0.005286527331918478,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00008661626634420827,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"context\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"fill the search box with 'running shoes'\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":6,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.8261353373527527,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-0.0017214729450643063,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00012356207298580557,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0009510025265626609,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.20141646265983582,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.0004919105558656156,"top_logprobs":[]},{"token":" box","bytes":[32,98,111,120],"logprob":-0.3702867031097412,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.0004724987084046006,"top_logprobs":[]},{"token":" enter","bytes":[32,101,110,116,101,114],"logprob":-0.6298752427101135,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-1.1224493980407715,"top_logprobs":[]},{"token":" terms","bytes":[32,116,101,114,109,115],"logprob":-0.10605919361114502,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.5262662172317505,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.00009281485836254433,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.061974067240953445,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000024034345187828876,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('274')\",\n \"natural_language_description\": \"click the search box to enter search terms for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":7,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.8261353373527527,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-0.0017214729450643063,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00012356207298580557,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0009510025265626609,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.20141646265983582,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.0004919105558656156,"top_logprobs":[]},{"token":" box","bytes":[32,98,111,120],"logprob":-0.3702867031097412,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.0004724987084046006,"top_logprobs":[]},{"token":" enter","bytes":[32,101,110,116,101,114],"logprob":-0.6298752427101135,"top_logprobs":[]},{"token":" a","bytes":[32,97],"logprob":-0.6224493980407715,"top_logprobs":[]},{"token":" keyword","bytes":[32,107,101,121,119,111,114,100],"logprob":-6.827351093292236,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.15240812301635742,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.20374777913093567,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.25554099678993225,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000031424973713001236,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('274')\",\n \"natural_language_description\": \"click the search box to enter a keyword for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":8,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-0.5760920643806458,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":0.0,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-9.088346359931165e-7,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-9.610702363715973e-6,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.008129963040119e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000019266199160483666,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"enter","bytes":[101,110,116,101,114],"logprob":-0.5163914561271667,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.006735265254974365,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" into","bytes":[32,105,110,116,111],"logprob":-0.38687169551849365,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":0.0,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-1.5048530030981055e-6,"top_logprobs":[]},{"token":" bar","bytes":[32,98,97,114],"logprob":-0.9225870370864868,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.013664701953530312,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000050020742492051795,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"enter 'running shoes' into the search bar\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":9,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00006420598219847307,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00015908108616713434,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0019286326132714748,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-0.5760920643806458,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":0.0,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-9.088346359931165e-7,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-9.610702363715973e-6,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.008129963040119e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000019266199160483666,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"enter","bytes":[101,110,116,101,114],"logprob":-0.5163914561271667,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.006735265254974365,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" into","bytes":[32,105,110,116,111],"logprob":-0.38687169551849365,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":0.0,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-1.5048530030981055e-6,"top_logprobs":[]},{"token":" bar","bytes":[32,98,97,114],"logprob":-0.9225870370864868,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.013664701953530312,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000050020742492051795,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"enter 'running shoes' into the search bar\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}}],"created":1742413408,"model":"gpt-4o-mini-2024-07-18","object":"chat.completion","service_tier":"default","system_fingerprint":"fp_3267753c5d","usage":{"completion_tokens":390,"prompt_tokens":38125,"total_tokens":38515,"prompt_tokens_details":{"cached_tokens":35712,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/action_gen_res_20250319_125433_892126.json b/visual-tree-search-backend/log/prompt/action_gen_res_20250319_125433_892126.json deleted file mode 100644 index f1e99f5..0000000 --- a/visual-tree-search-backend/log/prompt/action_gen_res_20250319_125433_892126.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"chatcmpl-BCtmejNMvarzqZa73eyF5B9D9x5Hk","choices":[{"finish_reason":"stop","index":0,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00010330478107789531,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00020425561524461955,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0015023599844425917,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.12786708772182465,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-9.088346359931165e-7,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.017468981444835663,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000024391956685576588,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.00421492476016283,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.693148672580719,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.09569751471281052,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" category","bytes":[32,99,97,116,101,103,111,114,121],"logprob":-1.087802767753601,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.08486717939376831,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.5175091028213501,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0005550591740757227,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0011752246646210551,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.9741040468215942,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.0000303521392197581,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click the 'Clothing, Shoes & Jewelry' category to search for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":1,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009198043699143454,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00018029935017693788,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0015023599844425917,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.16153663396835327,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-1.1472419600977446e-6,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.014734657481312752,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000024391956685576588,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0042123133316636086,"top_logprobs":[]},{"token":" on","bytes":[32,111,110],"logprob":-0.693148672580719,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.00023858259373810142,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.18120793998241425,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" category","bytes":[32,99,97,116,101,103,111,114,121],"logprob":-0.7226436138153076,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.05210935324430466,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.523879885673523,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.00042030587792396545,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0007158888038247824,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-0.47410210967063904,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003047134305234067,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click on the 'Clothing, Shoes & Jewelry' category to search for running shoes.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":2,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009198043699143454,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00018029935017693788,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0015023599844425917,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.16153663396835327,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-1.1472419600977446e-6,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.014734657481312752,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000024391956685576588,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0042123133316636086,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.693148672580719,"top_logprobs":[]},{"token":" link","bytes":[32,108,105,110,107],"logprob":-4.995298385620117,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.001520695281215012,"top_logprobs":[]},{"token":" Clothing","bytes":[32,67,108,111,116,104,105,110,103],"logprob":-1.1421501636505127,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.4121114822482923e-6,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" category","bytes":[32,99,97,116,101,103,111,114,121],"logprob":-1.0939464569091797,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.581777811050415,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.4578203558921814,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0022103239316493273,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0009146820520982146,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-0.575953483581543,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.0000303521392197581,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click the link for Clothing, Shoes & Jewelry category to search for running shoes.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":3,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.0000724310302757658,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.0001801801408873871,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.00033546582562848926,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.20268617570400238,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-1.6240566083070007e-6,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.022879814729094505,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000024391956685576588,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.00875768531113863,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.6931484341621399,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.09486053138971329,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" menu","bytes":[32,109,101,110,117],"logprob":-0.5032888054847717,"top_logprobs":[]},{"token":" item","bytes":[32,105,116,101,109],"logprob":-0.01599600724875927,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.030175885185599327,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.6833873391151428,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0013603554107248783,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.004637791775166988,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-0.5760310888290405,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003976922744186595,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click the 'Clothing, Shoes & Jewelry' menu item to search for running shoes.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":4,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.0000724310302757658,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.0001801801408873871,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.00033546582562848926,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.20268617570400238,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-1.6240566083070007e-6,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.022879814729094505,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000024391956685576588,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.00875768531113863,"top_logprobs":[]},{"token":" on","bytes":[32,111,110],"logprob":-0.6931484341621399,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.00023858259373810142,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.16158753633499146,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" category","bytes":[32,99,97,116,101,103,111,114,121],"logprob":-0.7196370959281921,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.05655243620276451,"top_logprobs":[]},{"token":" find","bytes":[32,102,105,110,100],"logprob":-1.260739803314209,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0017852524761110544,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.3869117796421051,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003106736039626412,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click on the 'Clothing, Shoes & Jewelry' category to find running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":5,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.0000724310302757658,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.0001801801408873871,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.00033546582562848926,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.20268617570400238,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-1.6240566083070007e-6,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.022879814729094505,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000024391956685576588,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.00875768531113863,"top_logprobs":[]},{"token":" on","bytes":[32,111,110],"logprob":-0.6931484341621399,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.00023858259373810142,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.16158753633499146,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" category","bytes":[32,99,97,116,101,103,111,114,121],"logprob":-0.7196370959281921,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.05655243620276451,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.510739803314209,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.00033188972156494856,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0003831252979580313,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-0.6931704878807068,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003082895273109898,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click on the 'Clothing, Shoes & Jewelry' category to search for running shoes.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":6,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.0000724310302757658,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.0001801801408873871,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0015023599844425917,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.16113032400608063,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-1.0280383548888494e-6,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.021243887022137642,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000024391956685576588,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0042123133316636086,"top_logprobs":[]},{"token":" on","bytes":[32,111,110],"logprob":-0.693148672580719,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.00023762896307744086,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.2028583586215973,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" category","bytes":[32,99,97,116,101,103,111,114,121],"logprob":-0.7226508855819702,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.04783512279391289,"top_logprobs":[]},{"token":" find","bytes":[32,102,105,110,100],"logprob":-1.2698383331298828,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0024158458691090345,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.8259842395782471,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003070975071750581,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click on the 'Clothing, Shoes & Jewelry' category to find running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":7,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.0000724310302757658,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.0001801801408873871,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0015023599844425917,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.16113032400608063,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-1.0280383548888494e-6,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.021243887022137642,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000024391956685576588,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0042123133316636086,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.693148672580719,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.15199296176433563,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" link","bytes":[32,108,105,110,107],"logprob":-3.221681833267212,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.019002284854650497,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.7020753026008606,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.001442972687073052,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0028199704829603434,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-0.3869765102863312,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003070975071750581,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click the 'Clothing, Shoes & Jewelry' link to search for running shoes.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":8,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.0000724310302757658,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00014048899174667895,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.001170225441455841,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.1613837629556656,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-9.088346359931165e-7,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.014547757804393768,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000027610454708337784,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.004212551284581423,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.693148672580719,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.10770571231842041,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" menu","bytes":[32,109,101,110,117],"logprob":-0.4769691824913025,"top_logprobs":[]},{"token":" item","bytes":[32,105,116,101,109],"logprob":-0.009865636005997658,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.04404747486114502,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.6273479461669922,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0010113727767020464,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0036145627964287996,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-0.31329867243766785,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000030948158382670954,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click the 'Clothing, Shoes & Jewelry' menu item to search for running shoes.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":9,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.0000724310302757658,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00014048899174667895,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.001170225441455841,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.1613837629556656,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-9.088346359931165e-7,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-0.014547757804393768,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000027610454708337784,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.004212551284581423,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.693148672580719,"top_logprobs":[]},{"token":" link","bytes":[32,108,105,110,107],"logprob":-4.982705593109131,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0014903423143550754,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.31770873069763184,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":-0.008717396296560764,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.11772339791059494,"top_logprobs":[]},{"token":" navigate","bytes":[32,110,97,118,105,103,97,116,101],"logprob":-3.080153226852417,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.006458970718085766,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.2044365555047989,"top_logprobs":[]},{"token":" shoe","bytes":[32,115,104,111,101],"logprob":-3.041691303253174,"top_logprobs":[]},{"token":" section","bytes":[32,115,101,99,116,105,111,110],"logprob":-0.11135590076446533,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-2.2723281383514404,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003953082341467962,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click the link for 'Clothing, Shoes & Jewelry' to navigate to the shoe section\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}}],"created":1742414068,"model":"gpt-4o-mini-2024-07-18","object":"chat.completion","service_tier":"default","system_fingerprint":"fp_3267753c5d","usage":{"completion_tokens":435,"prompt_tokens":38125,"total_tokens":38560,"prompt_tokens_details":{"cached_tokens":35712,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/action_gen_res_20250319_125721_060558.json b/visual-tree-search-backend/log/prompt/action_gen_res_20250319_125721_060558.json deleted file mode 100644 index 3837269..0000000 --- a/visual-tree-search-backend/log/prompt/action_gen_res_20250319_125721_060558.json +++ /dev/null @@ -1 +0,0 @@ -{"id":"chatcmpl-BCtp48bnYTf4p6tnBLfl4oUFIjD8E","choices":[{"finish_reason":"stop","index":0,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-1.5041381120681763,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-0.1002107709646225,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-0.00003619311974034645,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-1.7432603272027336e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00001700132997939363,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"type","bytes":[116,121,112,101],"logprob":-1.0970673561096191,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.00003762356209335849,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" into","bytes":[32,105,110,116,111],"logprob":-0.07888978719711304,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":0.0,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-5.676981345459353e-6,"top_logprobs":[]},{"token":" box","bytes":[32,98,111,120],"logprob":-0.4360577166080475,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.021904457360506058,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000041318875446449965,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"type 'running shoes' into the search box\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":1,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.25413811206817627,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":"506","bytes":[53,48,54],"logprob":-0.7649924755096436,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000027610454708337784,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.005515960976481438,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.25193095207214355,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.694790244102478,"top_logprobs":[]},{"token":"Sports","bytes":[83,112,111,114,116,115],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":0.0,"top_logprobs":[]},{"token":" Outdoors","bytes":[32,79,117,116,100,111,111,114,115],"logprob":-1.2664456789934775e-6,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" menu","bytes":[32,109,101,110,117],"logprob":-0.4342218339443207,"top_logprobs":[]},{"token":" item","bytes":[32,105,116,101,109],"logprob":-0.016003044322133064,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.012659975327551365,"top_logprobs":[]},{"token":" explore","bytes":[32,101,120,112,108,111,114,101],"logprob":-1.0674705505371094,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.5747970342636108,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":-0.000553151941858232,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-0.5685865879058838,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000030590548703912646,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('506')\",\n \"natural_language_description\": \"click the 'Sports & Outdoors' menu item to explore running shoes.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":2,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-1.5041381120681763,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-0.1002107709646225,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-0.00003619311974034645,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-1.7432603272027336e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00001700132997939363,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"type","bytes":[116,121,112,101],"logprob":-1.0970673561096191,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.00003762356209335849,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" into","bytes":[32,105,110,116,111],"logprob":-0.07888978719711304,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":0.0,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-5.676981345459353e-6,"top_logprobs":[]},{"token":" bar","bytes":[32,98,97,114],"logprob":-1.061057686805725,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.02036902867257595,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00004084206375409849,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"type 'running shoes' into the search bar\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":3,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.25413811206817627,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-1.8899924755096436,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000040126840758603066,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.003346920944750309,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.8259409666061401,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.12811662256717682,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" menu","bytes":[32,109,101,110,117],"logprob":-0.7924502491950989,"top_logprobs":[]},{"token":" item","bytes":[32,105,116,101,109],"logprob":-0.008814050815999508,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.023569682613015175,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.6388896107673645,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0017298025777563453,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.00319375516846776,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.9741472005844116,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000030948158382670954,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click the 'Clothing, Shoes & Jewelry' menu item to search for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":4,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.25413811206817627,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":"277","bytes":[50,55,55],"logprob":-1.2649924755096436,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-6.704273118884885e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00009627176768844947,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0025554499588906765,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.10020732134580612,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.004779230337589979,"top_logprobs":[]},{"token":" bar","bytes":[32,98,97,114],"logprob":-1.0985082387924194,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.0022525531239807606,"top_logprobs":[]},{"token":" start","bytes":[32,115,116,97,114,116],"logprob":-1.481454610824585,"top_logprobs":[]},{"token":" searching","bytes":[32,115,101,97,114,99,104,105,110,103],"logprob":-0.015023237094283104,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.001058295019902289,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.00009162282367469743,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.03804837539792061,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.000024034345187828876,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('277')\",\n \"natural_language_description\": \"click the search bar to start searching for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":5,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.25413811206817627,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":"544","bytes":[53,52,52],"logprob":-1.8899924755096436,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000040126840758603066,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.003346920944750309,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.8259409666061401,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.12811662256717682,"top_logprobs":[]},{"token":"Cl","bytes":[67,108],"logprob":0.0,"top_logprobs":[]},{"token":"othing","bytes":[111,116,104,105,110,103],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":",","bytes":[44],"logprob":0.0,"top_logprobs":[]},{"token":" Shoes","bytes":[32,83,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":" Jewelry","bytes":[32,74,101,119,101,108,114,121],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" category","bytes":[32,99,97,116,101,103,111,114,121],"logprob":-0.7924502491950989,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.04336122050881386,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.3466672897338867,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.00040004486800171435,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0004907185211777687,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.6931739449501038,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003082895273109898,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('544')\",\n \"natural_language_description\": \"click the 'Clothing, Shoes & Jewelry' category to search for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":6,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.25413811206817627,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":"506","bytes":[53,48,54],"logprob":-0.7649924755096436,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-4.320199877838604e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.000027610454708337784,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.005515960976481438,"top_logprobs":[]},{"token":" on","bytes":[32,111,110],"logprob":-1.5019309520721436,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.00020389800192788243,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.5762163996696472,"top_logprobs":[]},{"token":"Sports","bytes":[83,112,111,114,116,115],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":" &","bytes":[32,38],"logprob":0.0,"top_logprobs":[]},{"token":" Outdoors","bytes":[32,79,117,116,100,111,111,114,115],"logprob":-1.3856492842023727e-6,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" category","bytes":[32,99,97,116,101,103,111,114,121],"logprob":-0.7194874882698059,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.031898390501737595,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.5173394680023193,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0007181499968282878,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.000024272752852994017,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":".\",\n","bytes":[46,34,44,10],"logprob":-0.632688581943512,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003082895273109898,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":0.0,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('506')\",\n \"natural_language_description\": \"click on the 'Sports & Outdoors' category to search for running shoes.\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":7,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.25413811206817627,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-2.5149924755096436,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00010902655776590109,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0009274221956729889,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.1602267622947693,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.0009195583988912404,"top_logprobs":[]},{"token":" bar","bytes":[32,98,97,114],"logprob":-1.0046192407608032,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.000992795336060226,"top_logprobs":[]},{"token":" enter","bytes":[32,101,110,116,101,114],"logprob":-0.9002101421356201,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.9459182620048523,"top_logprobs":[]},{"token":" terms","bytes":[32,116,101,114,109,115],"logprob":-0.11793326586484909,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.6689522862434387,"top_logprobs":[]},{"token":" running","bytes":[32,114,117,110,110,105,110,103],"logprob":-0.0001882823125924915,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.07889870554208755,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003953082341467962,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('274')\",\n \"natural_language_description\": \"click the search bar to enter search terms for running shoes\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":8,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"fill","bytes":[102,105,108,108],"logprob":-1.5041381120681763,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-0.1002107709646225,"top_logprobs":[]},{"token":"',","bytes":[39,44],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":0.0,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":-0.00003619311974034645,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-1.7432603272027336e-6,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00001700132997939363,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"enter","bytes":[101,110,116,101,114],"logprob":-0.5970674157142639,"top_logprobs":[]},{"token":" '","bytes":[32,39],"logprob":-0.004104046616703272,"top_logprobs":[]},{"token":"running","bytes":[114,117,110,110,105,110,103],"logprob":0.0,"top_logprobs":[]},{"token":" shoes","bytes":[32,115,104,111,101,115],"logprob":0.0,"top_logprobs":[]},{"token":"'","bytes":[39],"logprob":0.0,"top_logprobs":[]},{"token":" in","bytes":[32,105,110],"logprob":-1.1368718147277832,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":0.0,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-2.696889623621246e-6,"top_logprobs":[]},{"token":" box","bytes":[32,98,111,120],"logprob":-0.27816617488861084,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.009525622241199017,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00004060365608893335,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"fill('274', 'running shoes')\",\n \"natural_language_description\": \"enter 'running shoes' in the search box\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}},{"finish_reason":"stop","index":9,"logprobs":{"content":[{"token":"{\n","bytes":[123,10],"logprob":-0.00009245724504580721,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":-0.00026169343618676066,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"content","bytes":[99,111,110,116,101,110,116],"logprob":-0.0031777136027812958,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.25413811206817627,"top_logprobs":[]},{"token":"('","bytes":[40,39],"logprob":-7.896309739408025e-7,"top_logprobs":[]},{"token":"274","bytes":[50,55,52],"logprob":-2.5149924755096436,"top_logprobs":[]},{"token":"')","bytes":[39,41],"logprob":-5.512236498361744e-7,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.00010902655776590109,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"natural","bytes":[110,97,116,117,114,97,108],"logprob":0.0,"top_logprobs":[]},{"token":"_language","bytes":[95,108,97,110,103,117,97,103,101],"logprob":-3.128163257315464e-7,"top_logprobs":[]},{"token":"_description","bytes":[95,100,101,115,99,114,105,112,116,105,111,110],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":-1.9361264946837764e-7,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"click","bytes":[99,108,105,99,107],"logprob":-0.0009274221956729889,"top_logprobs":[]},{"token":" the","bytes":[32,116,104,101],"logprob":-0.1602267622947693,"top_logprobs":[]},{"token":" search","bytes":[32,115,101,97,114,99,104],"logprob":-0.0009195583988912404,"top_logprobs":[]},{"token":" box","bytes":[32,98,111,120],"logprob":-0.5046192407608032,"top_logprobs":[]},{"token":" to","bytes":[32,116,111],"logprob":-0.0007727012853138149,"top_logprobs":[]},{"token":" enable","bytes":[32,101,110,97,98,108,101],"logprob":-7.360296249389648,"top_logprobs":[]},{"token":" it","bytes":[32,105,116],"logprob":-4.416245937347412,"top_logprobs":[]},{"token":" for","bytes":[32,102,111,114],"logprob":-0.0008320206543430686,"top_logprobs":[]},{"token":" input","bytes":[32,105,110,112,117,116],"logprob":-1.7915931940078735,"top_logprobs":[]},{"token":"\",\n","bytes":[34,44,10],"logprob":-0.39733272790908813,"top_logprobs":[]},{"token":" ","bytes":[32,32,32],"logprob":0.0,"top_logprobs":[]},{"token":" \"","bytes":[32,34],"logprob":0.0,"top_logprobs":[]},{"token":"finished","bytes":[102,105,110,105,115,104,101,100],"logprob":0.0,"top_logprobs":[]},{"token":"\":","bytes":[34,58],"logprob":0.0,"top_logprobs":[]},{"token":" false","bytes":[32,102,97,108,115,101],"logprob":0.0,"top_logprobs":[]},{"token":"\n","bytes":[10],"logprob":-0.00003047134305234067,"top_logprobs":[]},{"token":"}","bytes":[125],"logprob":-1.9361264946837764e-7,"top_logprobs":[]}],"refusal":null},"message":{"content":"{\n \"content\": \"click('274')\",\n \"natural_language_description\": \"click the search box to enable it for input\",\n \"finished\": false\n}","refusal":null,"role":"assistant","function_call":null,"tool_calls":null,"annotations":[]}}],"created":1742414218,"model":"gpt-4o-mini-2024-07-18","object":"chat.completion","service_tier":"default","system_fingerprint":"fp_3267753c5d","usage":{"completion_tokens":399,"prompt_tokens":38125,"total_tokens":38524,"prompt_tokens_details":{"cached_tokens":35712,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_124347_446032.txt b/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_124347_446032.txt deleted file mode 100644 index 2493c93..0000000 --- a/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_124347_446032.txt +++ /dev/null @@ -1,34 +0,0 @@ - - # Instructions - Review the current state of the page and all other information to find the best - possible next action and a natural language description of the action (example of natural language description is like) - "click the navbar button" or "select the portrait option" etc) to accomplish your goal. - Your answer will be interpreted and executed by a program, make sure to follow the formatting instructions. - - Respond using valid JSON format, which can be parsed by python json.loads(), with keys: - - context (containing the action) - - natural_language_description - - finished (boolean: IMPORTANT - must be False if content field contains an action. - Only set to True when NO more actions are needed and content field is empty) - - Example response format: - { - "content": "action here", - "natural_language_description": "description here", - "finished": false # Must be false because content contains an action - } - - # Rules for finished field: - - If content field contains any action: finished MUST be False - - Only set finished to True when: - 1. The goal is completely achieved - 2. No more actions are needed - 3. Content field is empty - - Previous actions and action results are: [] - - Provide ONLY ONE action. Do not suggest multiple actions or a sequence of actions. - - # Goal: - search running shoes, click on the first result - \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_125433_891725.txt b/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_125433_891725.txt deleted file mode 100644 index 2493c93..0000000 --- a/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_125433_891725.txt +++ /dev/null @@ -1,34 +0,0 @@ - - # Instructions - Review the current state of the page and all other information to find the best - possible next action and a natural language description of the action (example of natural language description is like) - "click the navbar button" or "select the portrait option" etc) to accomplish your goal. - Your answer will be interpreted and executed by a program, make sure to follow the formatting instructions. - - Respond using valid JSON format, which can be parsed by python json.loads(), with keys: - - context (containing the action) - - natural_language_description - - finished (boolean: IMPORTANT - must be False if content field contains an action. - Only set to True when NO more actions are needed and content field is empty) - - Example response format: - { - "content": "action here", - "natural_language_description": "description here", - "finished": false # Must be false because content contains an action - } - - # Rules for finished field: - - If content field contains any action: finished MUST be False - - Only set finished to True when: - 1. The goal is completely achieved - 2. No more actions are needed - 3. Content field is empty - - Previous actions and action results are: [] - - Provide ONLY ONE action. Do not suggest multiple actions or a sequence of actions. - - # Goal: - search running shoes, click on the first result - \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_125721_059668.txt b/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_125721_059668.txt deleted file mode 100644 index 2493c93..0000000 --- a/visual-tree-search-backend/log/prompt/action_gen_sys_prompt_20250319_125721_059668.txt +++ /dev/null @@ -1,34 +0,0 @@ - - # Instructions - Review the current state of the page and all other information to find the best - possible next action and a natural language description of the action (example of natural language description is like) - "click the navbar button" or "select the portrait option" etc) to accomplish your goal. - Your answer will be interpreted and executed by a program, make sure to follow the formatting instructions. - - Respond using valid JSON format, which can be parsed by python json.loads(), with keys: - - context (containing the action) - - natural_language_description - - finished (boolean: IMPORTANT - must be False if content field contains an action. - Only set to True when NO more actions are needed and content field is empty) - - Example response format: - { - "content": "action here", - "natural_language_description": "description here", - "finished": false # Must be false because content contains an action - } - - # Rules for finished field: - - If content field contains any action: finished MUST be False - - Only set finished to True when: - 1. The goal is completely achieved - 2. No more actions are needed - 3. Content field is empty - - Previous actions and action results are: [] - - Provide ONLY ONE action. Do not suggest multiple actions or a sequence of actions. - - # Goal: - search running shoes, click on the first result - \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/axtree_20250319_124327_583604.txt b/visual-tree-search-backend/log/prompt/axtree_20250319_124327_583604.txt deleted file mode 100644 index db13f94..0000000 --- a/visual-tree-search-backend/log/prompt/axtree_20250319_124327_583604.txt +++ /dev/null @@ -1,29 +0,0 @@ -[227] link 'My Account' -[229] link 'My Wish List' -[231] link 'Sign In' -[235] listitem '' -[238] link 'Create an Account' -[244] link 'store logo' -[247] link '\ue611 My Cart' -[274] combobox '\ue615 Search', autocomplete='both', hasPopup='listbox', expanded=False -[277] link 'Advanced Search' -[289] menuitem '\ue622 Beauty & Personal Care', hasPopup='menu' -[506] menuitem '\ue622 Sports & Outdoors', hasPopup='menu' -[544] menuitem '\ue622 Clothing, Shoes & Jewelry', hasPopup='menu' -[596] menuitem '\ue622 Home & Kitchen', hasPopup='menu' -[723] menuitem '\ue622 Office Products', hasPopup='menu' -[755] menuitem '\ue622 Tools & Home Improvement', hasPopup='menu' -[774] menuitem '\ue622 Health & Household', hasPopup='menu' -[798] menuitem '\ue622 Patio, Lawn & Garden', hasPopup='menu' -[817] menuitem '\ue622 Electronics', hasPopup='menu' -[1026] menuitem '\ue622 Cell Phones & Accessories', hasPopup='menu' -[1077] menuitem '\ue622 Video Games', hasPopup='menu' -[1126] menuitem '\ue622 Grocery & Gourmet Food', hasPopup='menu' -[1378] link 'Image' -[1386] link 'Pre-baked Gingerbread House Kit Value Pack, 17 oz., Pack of 2, Total 34 oz.' -[1418] link 'Image' -[1458] link 'Image' -[1466] link 'Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch' -[1498] link 'Image' -[1538] link 'Image' -[1546] link 'So Delicious Dairy Free CocoWhip Light, Vegan, Non-GMO Project Verified, 9 oz. Tub' \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/axtree_20250319_125427_613572.txt b/visual-tree-search-backend/log/prompt/axtree_20250319_125427_613572.txt deleted file mode 100644 index db13f94..0000000 --- a/visual-tree-search-backend/log/prompt/axtree_20250319_125427_613572.txt +++ /dev/null @@ -1,29 +0,0 @@ -[227] link 'My Account' -[229] link 'My Wish List' -[231] link 'Sign In' -[235] listitem '' -[238] link 'Create an Account' -[244] link 'store logo' -[247] link '\ue611 My Cart' -[274] combobox '\ue615 Search', autocomplete='both', hasPopup='listbox', expanded=False -[277] link 'Advanced Search' -[289] menuitem '\ue622 Beauty & Personal Care', hasPopup='menu' -[506] menuitem '\ue622 Sports & Outdoors', hasPopup='menu' -[544] menuitem '\ue622 Clothing, Shoes & Jewelry', hasPopup='menu' -[596] menuitem '\ue622 Home & Kitchen', hasPopup='menu' -[723] menuitem '\ue622 Office Products', hasPopup='menu' -[755] menuitem '\ue622 Tools & Home Improvement', hasPopup='menu' -[774] menuitem '\ue622 Health & Household', hasPopup='menu' -[798] menuitem '\ue622 Patio, Lawn & Garden', hasPopup='menu' -[817] menuitem '\ue622 Electronics', hasPopup='menu' -[1026] menuitem '\ue622 Cell Phones & Accessories', hasPopup='menu' -[1077] menuitem '\ue622 Video Games', hasPopup='menu' -[1126] menuitem '\ue622 Grocery & Gourmet Food', hasPopup='menu' -[1378] link 'Image' -[1386] link 'Pre-baked Gingerbread House Kit Value Pack, 17 oz., Pack of 2, Total 34 oz.' -[1418] link 'Image' -[1458] link 'Image' -[1466] link 'Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch' -[1498] link 'Image' -[1538] link 'Image' -[1546] link 'So Delicious Dairy Free CocoWhip Light, Vegan, Non-GMO Project Verified, 9 oz. Tub' \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/axtree_20250319_125657_748444.txt b/visual-tree-search-backend/log/prompt/axtree_20250319_125657_748444.txt deleted file mode 100644 index db13f94..0000000 --- a/visual-tree-search-backend/log/prompt/axtree_20250319_125657_748444.txt +++ /dev/null @@ -1,29 +0,0 @@ -[227] link 'My Account' -[229] link 'My Wish List' -[231] link 'Sign In' -[235] listitem '' -[238] link 'Create an Account' -[244] link 'store logo' -[247] link '\ue611 My Cart' -[274] combobox '\ue615 Search', autocomplete='both', hasPopup='listbox', expanded=False -[277] link 'Advanced Search' -[289] menuitem '\ue622 Beauty & Personal Care', hasPopup='menu' -[506] menuitem '\ue622 Sports & Outdoors', hasPopup='menu' -[544] menuitem '\ue622 Clothing, Shoes & Jewelry', hasPopup='menu' -[596] menuitem '\ue622 Home & Kitchen', hasPopup='menu' -[723] menuitem '\ue622 Office Products', hasPopup='menu' -[755] menuitem '\ue622 Tools & Home Improvement', hasPopup='menu' -[774] menuitem '\ue622 Health & Household', hasPopup='menu' -[798] menuitem '\ue622 Patio, Lawn & Garden', hasPopup='menu' -[817] menuitem '\ue622 Electronics', hasPopup='menu' -[1026] menuitem '\ue622 Cell Phones & Accessories', hasPopup='menu' -[1077] menuitem '\ue622 Video Games', hasPopup='menu' -[1126] menuitem '\ue622 Grocery & Gourmet Food', hasPopup='menu' -[1378] link 'Image' -[1386] link 'Pre-baked Gingerbread House Kit Value Pack, 17 oz., Pack of 2, Total 34 oz.' -[1418] link 'Image' -[1458] link 'Image' -[1466] link 'Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch' -[1498] link 'Image' -[1538] link 'Image' -[1546] link 'So Delicious Dairy Free CocoWhip Light, Vegan, Non-GMO Project Verified, 9 oz. Tub' \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/prompt_20250319_124327_583960.txt b/visual-tree-search-backend/log/prompt/prompt_20250319_124327_583960.txt deleted file mode 100644 index bb375f6..0000000 --- a/visual-tree-search-backend/log/prompt/prompt_20250319_124327_583960.txt +++ /dev/null @@ -1,110 +0,0 @@ - - - # Current Accessibility Tree: - [227] link 'My Account' -[229] link 'My Wish List' -[231] link 'Sign In' -[235] listitem '' -[238] link 'Create an Account' -[244] link 'store logo' -[247] link '\ue611 My Cart' -[274] combobox '\ue615 Search', autocomplete='both', hasPopup='listbox', expanded=False -[277] link 'Advanced Search' -[289] menuitem '\ue622 Beauty & Personal Care', hasPopup='menu' -[506] menuitem '\ue622 Sports & Outdoors', hasPopup='menu' -[544] menuitem '\ue622 Clothing, Shoes & Jewelry', hasPopup='menu' -[596] menuitem '\ue622 Home & Kitchen', hasPopup='menu' -[723] menuitem '\ue622 Office Products', hasPopup='menu' -[755] menuitem '\ue622 Tools & Home Improvement', hasPopup='menu' -[774] menuitem '\ue622 Health & Household', hasPopup='menu' -[798] menuitem '\ue622 Patio, Lawn & Garden', hasPopup='menu' -[817] menuitem '\ue622 Electronics', hasPopup='menu' -[1026] menuitem '\ue622 Cell Phones & Accessories', hasPopup='menu' -[1077] menuitem '\ue622 Video Games', hasPopup='menu' -[1126] menuitem '\ue622 Grocery & Gourmet Food', hasPopup='menu' -[1378] link 'Image' -[1386] link 'Pre-baked Gingerbread House Kit Value Pack, 17 oz., Pack of 2, Total 34 oz.' -[1418] link 'Image' -[1458] link 'Image' -[1466] link 'Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch' -[1498] link 'Image' -[1538] link 'Image' -[1546] link 'So Delicious Dairy Free CocoWhip Light, Vegan, Non-GMO Project Verified, 9 oz. Tub' - - # Action Space - -10 different types of actions are available. - -noop(wait_ms: float = 1000) - Examples: - noop() - - noop(500) - -fill(bid: str, value: str, timeout: int = 10000, retry_attempts: int = 3) - Examples: - fill('237', 'example value') - - fill('45', 'multi-line\nexample') - - fill('a12', 'example with "quotes"') - -click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [], timeout: int = 3000) - Examples: - click('a51') - - click('b22', button='right') - - click('48', button='middle', modifiers=['Shift']) - -hover(bid: str) - Examples: - hover('b8') - -drag_and_drop(from_bid: str, to_bid: str) - Examples: - drag_and_drop('56', '498') - -go_back() - Examples: - go_back() - -go_forward() - Examples: - go_forward() - -goto(url: str) - Examples: - goto('http://www.example.com') - -upload_file(bid: str, file: str | list[str]) - Examples: - upload_file('572', 'my_receipt.pdf') - - upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip']) - -select_option(bid: str, options: str | list[str]) - Examples: - select_option('a48', 'blue') - - select_option('c48', ['red', 'green', 'blue']) - -Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page. -More than 2-3 actions usually leads to failure or unexpected behavior. Example: -fill('a12', 'example with "quotes"') -click('a51') -click('48', button='middle', modifiers=['Shift']) - - - # Screenshot - The image provided is a screenshot of the current application state, corresponding to the Accessibility Tree above. - - Here is an example with chain of thought of a valid action when clicking on a button: - " - In order to accomplish my goal I need to click on the button with bid 12 - ```click('12')``` - " - - Please analyze the screenshot and the Accessibility Tree to determine the next appropriate action. Refer to visual elements from the screenshot if relevant to your decision. - Provide ONLY ONE action. Do not suggest multiple actions or a sequence of actions. - \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/prompt_20250319_125427_613892.txt b/visual-tree-search-backend/log/prompt/prompt_20250319_125427_613892.txt deleted file mode 100644 index bb375f6..0000000 --- a/visual-tree-search-backend/log/prompt/prompt_20250319_125427_613892.txt +++ /dev/null @@ -1,110 +0,0 @@ - - - # Current Accessibility Tree: - [227] link 'My Account' -[229] link 'My Wish List' -[231] link 'Sign In' -[235] listitem '' -[238] link 'Create an Account' -[244] link 'store logo' -[247] link '\ue611 My Cart' -[274] combobox '\ue615 Search', autocomplete='both', hasPopup='listbox', expanded=False -[277] link 'Advanced Search' -[289] menuitem '\ue622 Beauty & Personal Care', hasPopup='menu' -[506] menuitem '\ue622 Sports & Outdoors', hasPopup='menu' -[544] menuitem '\ue622 Clothing, Shoes & Jewelry', hasPopup='menu' -[596] menuitem '\ue622 Home & Kitchen', hasPopup='menu' -[723] menuitem '\ue622 Office Products', hasPopup='menu' -[755] menuitem '\ue622 Tools & Home Improvement', hasPopup='menu' -[774] menuitem '\ue622 Health & Household', hasPopup='menu' -[798] menuitem '\ue622 Patio, Lawn & Garden', hasPopup='menu' -[817] menuitem '\ue622 Electronics', hasPopup='menu' -[1026] menuitem '\ue622 Cell Phones & Accessories', hasPopup='menu' -[1077] menuitem '\ue622 Video Games', hasPopup='menu' -[1126] menuitem '\ue622 Grocery & Gourmet Food', hasPopup='menu' -[1378] link 'Image' -[1386] link 'Pre-baked Gingerbread House Kit Value Pack, 17 oz., Pack of 2, Total 34 oz.' -[1418] link 'Image' -[1458] link 'Image' -[1466] link 'Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch' -[1498] link 'Image' -[1538] link 'Image' -[1546] link 'So Delicious Dairy Free CocoWhip Light, Vegan, Non-GMO Project Verified, 9 oz. Tub' - - # Action Space - -10 different types of actions are available. - -noop(wait_ms: float = 1000) - Examples: - noop() - - noop(500) - -fill(bid: str, value: str, timeout: int = 10000, retry_attempts: int = 3) - Examples: - fill('237', 'example value') - - fill('45', 'multi-line\nexample') - - fill('a12', 'example with "quotes"') - -click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [], timeout: int = 3000) - Examples: - click('a51') - - click('b22', button='right') - - click('48', button='middle', modifiers=['Shift']) - -hover(bid: str) - Examples: - hover('b8') - -drag_and_drop(from_bid: str, to_bid: str) - Examples: - drag_and_drop('56', '498') - -go_back() - Examples: - go_back() - -go_forward() - Examples: - go_forward() - -goto(url: str) - Examples: - goto('http://www.example.com') - -upload_file(bid: str, file: str | list[str]) - Examples: - upload_file('572', 'my_receipt.pdf') - - upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip']) - -select_option(bid: str, options: str | list[str]) - Examples: - select_option('a48', 'blue') - - select_option('c48', ['red', 'green', 'blue']) - -Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page. -More than 2-3 actions usually leads to failure or unexpected behavior. Example: -fill('a12', 'example with "quotes"') -click('a51') -click('48', button='middle', modifiers=['Shift']) - - - # Screenshot - The image provided is a screenshot of the current application state, corresponding to the Accessibility Tree above. - - Here is an example with chain of thought of a valid action when clicking on a button: - " - In order to accomplish my goal I need to click on the button with bid 12 - ```click('12')``` - " - - Please analyze the screenshot and the Accessibility Tree to determine the next appropriate action. Refer to visual elements from the screenshot if relevant to your decision. - Provide ONLY ONE action. Do not suggest multiple actions or a sequence of actions. - \ No newline at end of file diff --git a/visual-tree-search-backend/log/prompt/prompt_20250319_125657_748790.txt b/visual-tree-search-backend/log/prompt/prompt_20250319_125657_748790.txt deleted file mode 100644 index bb375f6..0000000 --- a/visual-tree-search-backend/log/prompt/prompt_20250319_125657_748790.txt +++ /dev/null @@ -1,110 +0,0 @@ - - - # Current Accessibility Tree: - [227] link 'My Account' -[229] link 'My Wish List' -[231] link 'Sign In' -[235] listitem '' -[238] link 'Create an Account' -[244] link 'store logo' -[247] link '\ue611 My Cart' -[274] combobox '\ue615 Search', autocomplete='both', hasPopup='listbox', expanded=False -[277] link 'Advanced Search' -[289] menuitem '\ue622 Beauty & Personal Care', hasPopup='menu' -[506] menuitem '\ue622 Sports & Outdoors', hasPopup='menu' -[544] menuitem '\ue622 Clothing, Shoes & Jewelry', hasPopup='menu' -[596] menuitem '\ue622 Home & Kitchen', hasPopup='menu' -[723] menuitem '\ue622 Office Products', hasPopup='menu' -[755] menuitem '\ue622 Tools & Home Improvement', hasPopup='menu' -[774] menuitem '\ue622 Health & Household', hasPopup='menu' -[798] menuitem '\ue622 Patio, Lawn & Garden', hasPopup='menu' -[817] menuitem '\ue622 Electronics', hasPopup='menu' -[1026] menuitem '\ue622 Cell Phones & Accessories', hasPopup='menu' -[1077] menuitem '\ue622 Video Games', hasPopup='menu' -[1126] menuitem '\ue622 Grocery & Gourmet Food', hasPopup='menu' -[1378] link 'Image' -[1386] link 'Pre-baked Gingerbread House Kit Value Pack, 17 oz., Pack of 2, Total 34 oz.' -[1418] link 'Image' -[1458] link 'Image' -[1466] link 'Elmwood Inn Fine Teas, Orange Vanilla Caffeine-free Fruit Infusion, 16-Ounce Pouch' -[1498] link 'Image' -[1538] link 'Image' -[1546] link 'So Delicious Dairy Free CocoWhip Light, Vegan, Non-GMO Project Verified, 9 oz. Tub' - - # Action Space - -10 different types of actions are available. - -noop(wait_ms: float = 1000) - Examples: - noop() - - noop(500) - -fill(bid: str, value: str, timeout: int = 10000, retry_attempts: int = 3) - Examples: - fill('237', 'example value') - - fill('45', 'multi-line\nexample') - - fill('a12', 'example with "quotes"') - -click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'Meta', 'Shift']] = [], timeout: int = 3000) - Examples: - click('a51') - - click('b22', button='right') - - click('48', button='middle', modifiers=['Shift']) - -hover(bid: str) - Examples: - hover('b8') - -drag_and_drop(from_bid: str, to_bid: str) - Examples: - drag_and_drop('56', '498') - -go_back() - Examples: - go_back() - -go_forward() - Examples: - go_forward() - -goto(url: str) - Examples: - goto('http://www.example.com') - -upload_file(bid: str, file: str | list[str]) - Examples: - upload_file('572', 'my_receipt.pdf') - - upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip']) - -select_option(bid: str, options: str | list[str]) - Examples: - select_option('a48', 'blue') - - select_option('c48', ['red', 'green', 'blue']) - -Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page. -More than 2-3 actions usually leads to failure or unexpected behavior. Example: -fill('a12', 'example with "quotes"') -click('a51') -click('48', button='middle', modifiers=['Shift']) - - - # Screenshot - The image provided is a screenshot of the current application state, corresponding to the Accessibility Tree above. - - Here is an example with chain of thought of a valid action when clicking on a button: - " - In order to accomplish my goal I need to click on the button with bid 12 - ```click('12')``` - " - - Please analyze the screenshot and the Accessibility Tree to determine the next appropriate action. Refer to visual elements from the screenshot if relevant to your decision. - Provide ONLY ONE action. Do not suggest multiple actions or a sequence of actions. - \ No newline at end of file diff --git a/visual-tree-search-backend/log/screenshots/screenshot_20250319_124317_880329.png b/visual-tree-search-backend/log/screenshots/screenshot_20250319_124317_880329.png deleted file mode 100644 index 223565e..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_20250319_124317_880329.png and /dev/null differ diff --git a/visual-tree-search-backend/log/screenshots/screenshot_20250319_125417_824425.png b/visual-tree-search-backend/log/screenshots/screenshot_20250319_125417_824425.png deleted file mode 100644 index 4d31fb5..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_20250319_125417_824425.png and /dev/null differ diff --git a/visual-tree-search-backend/log/screenshots/screenshot_20250319_125612_577743.png b/visual-tree-search-backend/log/screenshots/screenshot_20250319_125612_577743.png deleted file mode 100644 index c8f91c0..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_20250319_125612_577743.png and /dev/null differ diff --git a/visual-tree-search-backend/log/screenshots/screenshot_20250319_125647_949664.png b/visual-tree-search-backend/log/screenshots/screenshot_20250319_125647_949664.png deleted file mode 100644 index c8f91c0..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_20250319_125647_949664.png and /dev/null differ diff --git a/visual-tree-search-backend/log/screenshots/screenshot_20250319_125736_945435.png b/visual-tree-search-backend/log/screenshots/screenshot_20250319_125736_945435.png deleted file mode 100644 index d3afcf0..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_20250319_125736_945435.png and /dev/null differ diff --git a/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_124324_371840.png b/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_124324_371840.png deleted file mode 100644 index e6f9152..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_124324_371840.png and /dev/null differ diff --git a/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125424_336360.png b/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125424_336360.png deleted file mode 100644 index 00902ba..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125424_336360.png and /dev/null differ diff --git a/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125654_490871.png b/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125654_490871.png deleted file mode 100644 index f09f2be..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125654_490871.png and /dev/null differ diff --git a/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125743_465582.png b/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125743_465582.png deleted file mode 100644 index 3b3784a..0000000 Binary files a/visual-tree-search-backend/log/screenshots/screenshot_som_20250319_125743_465582.png and /dev/null differ diff --git a/visual-tree-search-backend/test/test-tree-search-ws-lats.py b/visual-tree-search-backend/test/test-tree-search-ws-lats.py index 0894eed..a091c95 100644 --- a/visual-tree-search-backend/test/test-tree-search-ws-lats.py +++ b/visual-tree-search-backend/test/test-tree-search-ws-lats.py @@ -3,6 +3,8 @@ import websockets import argparse import logging +import sys +import os from datetime import datetime # Configure logging @@ -12,8 +14,61 @@ ) logger = logging.getLogger(__name__) + +# account_reset +# browser_setup + +## for LATS +# step_start +# node_created +# node_selected +# node_selected_for_simulation +# tree_update_node_expansion +# tree_update_node_children_evaluation +# tree_update_node_backpropagation +# removed_simulation + +COLORS = { + # Core updates + 'iteration_start': '\033[94m', # Blue + 'step_start': '\033[94m', # Blue + + # Node operations + 'node_selected': '\033[92m', # Green + 'node_selected_for_simulation': '\033[92m', # Green + 'node_created': '\033[92m', # Green + 'node_simulated': '\033[92m', # Green + 'node_terminal': '\033[92m', # Green + + # Tree/Path updates + 'tree_update': '\033[96m', # Cyan + 'tree_update_node_expansion': '\033[96m', # Cyan + 'tree_update_node_evaluation': '\033[96m', # Cyan + 'tree_update_node_children_evaluation': '\033[96m', # Cyan + 'tree_update_node_backpropagation': '\033[96m', # Cyan + 'tree_update_simulation': '\033[96m', # Cyan + 'trajectory_update': '\033[96m', # Cyan + 'removed_simulation': '\033[96m', # Cyan + + # Results/Completion + 'simulation_result': '\033[93m', # Yellow + 'search_complete': '\033[95m', # Magenta + 'success': '\033[95m', # Magenta + 'partial_success': '\033[93m', # Yellow + 'failure': '\033[91m', # Red + + # System messages + 'account_reset': '\033[91m', # Red + 'browser_setup': '\033[91m', # Red + 'error': '\033[91m', # Red + + # Status updates + 'status_update': '\033[94m', # Blue + 'reset': '\033[0m' # Reset +} + # Default values -DEFAULT_WS_URL = "ws://localhost:3000/new-tree-search-ws" +DEFAULT_WS_URL = "ws://localhost:3000/tree-search-ws" DEFAULT_STARTING_URL = "http://xwebarena.pathonai.org:7770/" DEFAULT_GOAL = "search running shoes, click on the first result" @@ -64,57 +119,11 @@ async def connect_and_test_search( response = await websocket.recv() data = json.loads(response) - # Log the message type and some key information + # Print the raw websocket message with colored type msg_type = data.get("type", "unknown") - - if msg_type == "status_update": - logger.info(f"Status update: {data.get('status')} - {data.get('message')}") - - elif msg_type == "iteration_start": - logger.info(f"Iteration start: {data.get('iteration')}") - - elif msg_type == "step_start": - logger.info(f"Step start: {data.get('step')} - {data.get('step_name')}") - - elif msg_type == "node_update": - node_id = data.get("node_id") - status = data.get("status") - logger.info(f"Node update: {node_id} - {status}") - - # If node was scored, log the score - if status == "scored": - logger.info(f"Node score: {data.get('score')}") - - elif msg_type == "trajectory_update": - logger.info(f"Trajectory update received with {data.get('trajectory')}") - - elif msg_type == "tree_update": - logger.info(f"Tree update received with {data.get('tree')}") - - elif msg_type == "best_path_update": - logger.info(f"Best path update: score={data.get('score')}, path length={len(data.get('path', []))}") - - elif msg_type == "search_complete": - status = data.get("status") - score = data.get("score", "N/A") - path_length = len(data.get("path", [])) - - logger.info(f"Search complete: {status}, score={score}, path length={path_length}") - logger.info("Path actions:") - - for i, node in enumerate(data.get("path", [])): - logger.info(f" {i+1}. {node.get('action')}") - - # Exit the loop when search is complete - break - - elif msg_type == "error": - logger.error(f"Error: {data.get('message')}") - break - - else: - logger.info(f"Received message of type {msg_type}") - logger.info(f"Message: {data}") + color = COLORS.get(msg_type, COLORS['reset']) + print(f"\nWebSocket message - Type: {color}{msg_type}{COLORS['reset']}") + print(f"Raw message: {json.dumps(data, indent=2)}") except websockets.exceptions.ConnectionClosed: logger.warning("WebSocket connection closed") @@ -144,12 +153,40 @@ def parse_arguments(): parser.add_argument("--max-depth", type=int, default=3, help="Maximum depth for the search tree (default: 3)") + # Add the new argument for log file + parser.add_argument("--log-file", type=str, + help="File to save the colored output to") + return parser.parse_args() async def main(): """Main entry point""" args = parse_arguments() + # Setup logging to file if requested + original_stdout = sys.stdout + original_stderr = sys.stderr + log_file = None + + if args.log_file: + class TeeOutput: + def __init__(self, terminal, log_file): + self.terminal = terminal + self.log_file = log_file + + def write(self, message): + self.terminal.write(message) + self.log_file.write(message) + + def flush(self): + self.terminal.flush() + self.log_file.flush() + + log_file = open(args.log_file, 'w', encoding='utf-8') + sys.stdout = TeeOutput(sys.stdout, log_file) + sys.stderr = TeeOutput(sys.stderr, log_file) + logger.info(f"Logging colored output to {args.log_file}") + logger.info("Starting tree search WebSocket test") logger.info(f"WebSocket URL: {args.ws_url}") logger.info(f"Starting URL: {args.starting_url}") @@ -157,13 +194,21 @@ async def main(): logger.info(f"Algorithm: {args.algorithm}") logger.info(f"Max depth: {args.max_depth}") - await connect_and_test_search( - ws_url=args.ws_url, - starting_url=args.starting_url, - goal=args.goal, - search_algorithm=args.algorithm, - max_depth=args.max_depth - ) + try: + await connect_and_test_search( + ws_url=args.ws_url, + starting_url=args.starting_url, + goal=args.goal, + search_algorithm=args.algorithm, + max_depth=args.max_depth + ) + finally: + # Clean up if logging to file + if log_file: + sys.stdout = original_stdout + sys.stderr = original_stderr + log_file.close() + logger.info(f"Closed log file: {args.log_file}") if __name__ == "__main__": - asyncio.run(main()) + asyncio.run(main()) \ No newline at end of file diff --git a/visual-tree-search-backend/test/test-tree-search-ws-mcts.py b/visual-tree-search-backend/test/test-tree-search-ws-mcts.py index 3969839..058a4db 100644 --- a/visual-tree-search-backend/test/test-tree-search-ws-mcts.py +++ b/visual-tree-search-backend/test/test-tree-search-ws-mcts.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) # Default values -DEFAULT_WS_URL = "ws://localhost:3000/new-tree-search-ws" +DEFAULT_WS_URL = "ws://localhost:3000/tree-search-ws" DEFAULT_STARTING_URL = "http://xwebarena.pathonai.org:7770/" DEFAULT_GOAL = "search running shoes, click on the first result" diff --git a/visual-tree-search-backend/test/test-tree-search-ws-simple.py b/visual-tree-search-backend/test/test-tree-search-ws-simple.py index 8ef92c6..64cfc74 100644 --- a/visual-tree-search-backend/test/test-tree-search-ws-simple.py +++ b/visual-tree-search-backend/test/test-tree-search-ws-simple.py @@ -3,6 +3,8 @@ import websockets import argparse import logging +import sys +import os from datetime import datetime # Configure logging @@ -12,8 +14,53 @@ ) logger = logging.getLogger(__name__) +## for BFS and DFS +# account_reset +# browser_setup +# node_created +# node_selected +# tree_update_node_expansion +# tree_update_node_evaluation + + +# ANSI color codes for different message types +COLORS = { + # Core updates + 'iteration_start': '\033[94m', # Blue + 'step_start': '\033[94m', # Blue + + # Node operations + 'node_selected': '\033[92m', # Green + 'node_created': '\033[92m', # Green + 'node_simulated': '\033[92m', # Green + 'node_terminal': '\033[92m', # Green + + # Tree/Path updates + 'tree_update': '\033[96m', # Cyan + 'tree_update_node_expansion': '\033[96m', # Cyan + 'tree_update_node_evaluation': '\033[96m', # Cyan + 'trajectory_update': '\033[96m', # Cyan + 'removed_simulation': '\033[96m', # Cyan + + # Results/Completion + 'simulation_result': '\033[93m', # Yellow + 'search_complete': '\033[95m', # Magenta + 'success': '\033[95m', # Magenta + 'partial_success': '\033[93m', # Yellow + 'failure': '\033[91m', # Red + + # System messages + 'account_reset': '\033[91m', # Red + 'browser_setup': '\033[91m', # Red + 'error': '\033[91m', # Red + + # Status updates + 'status_update': '\033[94m', # Blue + 'reset': '\033[0m' # Reset +} + # Default values -DEFAULT_WS_URL = "ws://localhost:3000/new-tree-search-ws" +DEFAULT_WS_URL = "ws://localhost:3000/tree-search-ws" DEFAULT_STARTING_URL = "http://xwebarena.pathonai.org:7770/" DEFAULT_GOAL = "search running shoes, click on the first result" @@ -64,46 +111,11 @@ async def connect_and_test_search( response = await websocket.recv() data = json.loads(response) - # Log the message type and some key information + # Print the raw websocket message with colored type msg_type = data.get("type", "unknown") - - if msg_type == "status_update": - logger.info(f"Status update: {data.get('status')} - {data.get('message')}") - - elif msg_type == "node_update": - node_id = data.get("node_id") - status = data.get("status") - logger.info(f"Node update: {node_id} - {status}") - - # If node was scored, log the score - if status == "scored": - logger.info(f"Node score: {data.get('score')}") - - elif msg_type == "tree_update": - logger.info(f"Tree update received with {data.get('tree')}") - - elif msg_type == "best_path_update": - logger.info(f"Best path update: score={data.get('score')}, path={data.get('path')}") - - elif msg_type == "search_complete": - status = data.get("status") - score = data.get("score", "N/A") - path = data.get("path") - - logger.info(f"Search complete: {status}, score={score}, path={path}") - - for i, node in enumerate(data.get("path", [])): - logger.info(f" {i+1}. {node.get('action')}") - - # Exit the loop when search is complete - break - - elif msg_type == "error": - logger.error(f"Error: {data.get('message')}") - break - - else: - logger.info(f"Received message of type {msg_type}") + color = COLORS.get(msg_type, COLORS['reset']) + print(f"\nWebSocket message - Type: {color}{msg_type}{COLORS['reset']}") + print(f"Raw message: {json.dumps(data, indent=2)}") except websockets.exceptions.ConnectionClosed: logger.warning("WebSocket connection closed") @@ -133,12 +145,40 @@ def parse_arguments(): parser.add_argument("--max-depth", type=int, default=3, help="Maximum depth for the search tree (default: 3)") + # Add the new argument for log file + parser.add_argument("--log-file", type=str, + help="File to save the colored output to") + return parser.parse_args() async def main(): """Main entry point""" args = parse_arguments() + # Setup logging to file if requested + original_stdout = sys.stdout + original_stderr = sys.stderr + log_file = None + + if args.log_file: + class TeeOutput: + def __init__(self, terminal, log_file): + self.terminal = terminal + self.log_file = log_file + + def write(self, message): + self.terminal.write(message) + self.log_file.write(message) + + def flush(self): + self.terminal.flush() + self.log_file.flush() + + log_file = open(args.log_file, 'w', encoding='utf-8') + sys.stdout = TeeOutput(sys.stdout, log_file) + sys.stderr = TeeOutput(sys.stderr, log_file) + logger.info(f"Logging colored output to {args.log_file}") + logger.info("Starting tree search WebSocket test") logger.info(f"WebSocket URL: {args.ws_url}") logger.info(f"Starting URL: {args.starting_url}") @@ -146,13 +186,21 @@ async def main(): logger.info(f"Algorithm: {args.algorithm}") logger.info(f"Max depth: {args.max_depth}") - await connect_and_test_search( - ws_url=args.ws_url, - starting_url=args.starting_url, - goal=args.goal, - search_algorithm=args.algorithm, - max_depth=args.max_depth - ) + try: + await connect_and_test_search( + ws_url=args.ws_url, + starting_url=args.starting_url, + goal=args.goal, + search_algorithm=args.algorithm, + max_depth=args.max_depth + ) + finally: + # Clean up if logging to file + if log_file: + sys.stdout = original_stdout + sys.stderr = original_stderr + log_file.close() + logger.info(f"Closed log file: {args.log_file}") if __name__ == "__main__": asyncio.run(main()) diff --git a/visual-tree-search-backend/test/test-tree-search-ws.py b/visual-tree-search-backend/test/test-tree-search-ws.py deleted file mode 100644 index 3226af7..0000000 --- a/visual-tree-search-backend/test/test-tree-search-ws.py +++ /dev/null @@ -1,159 +0,0 @@ -import asyncio -import json -import websockets -import argparse -import logging -from datetime import datetime - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) -logger = logging.getLogger(__name__) - -# Default values -DEFAULT_WS_URL = "ws://localhost:3000/tree-search-ws" -DEFAULT_STARTING_URL = "http://xwebarena.pathonai.org:7770/" -DEFAULT_GOAL = "search running shoes, click on the first result" - -async def connect_and_test_search( - ws_url: str, - starting_url: str, - goal: str, - search_algorithm: str = "bfs", - max_depth: int = 3 -): - """ - Connect to the WebSocket endpoint and test the tree search functionality. - - Args: - ws_url: WebSocket URL to connect to - starting_url: URL to start the search from - goal: Goal to achieve - search_algorithm: Search algorithm to use (bfs or dfs) - max_depth: Maximum depth for the search tree - """ - logger.info(f"Connecting to WebSocket at {ws_url}") - - async with websockets.connect(ws_url) as websocket: - logger.info("Connected to WebSocket") - - # Wait for connection established message - response = await websocket.recv() - data = json.loads(response) - if data.get("type") == "connection_established": - logger.info(f"Connection established with ID: {data.get('connection_id')}") - - # Send search request - request = { - "type": "start_search", - "agent_type": "SimpleSearchAgent", - "starting_url": starting_url, - "goal": goal, - "search_algorithm": search_algorithm, - "max_depth": max_depth - } - - logger.info(f"Sending search request: {request}") - await websocket.send(json.dumps(request)) - - # Process responses - while True: - try: - response = await websocket.recv() - data = json.loads(response) - - # Log the message type and some key information - msg_type = data.get("type", "unknown") - - if msg_type == "status_update": - logger.info(f"Status update: {data.get('status')} - {data.get('message')}") - - elif msg_type == "node_update": - node_id = data.get("node_id") - status = data.get("status") - logger.info(f"Node update: {node_id} - {status}") - - # If node was scored, log the score - if status == "scored": - logger.info(f"Node score: {data.get('score')}") - - elif msg_type == "tree_update": - logger.info(f"Tree update received with {len(data.get('nodes', []))} nodes") - - elif msg_type == "best_path_update": - logger.info(f"Best path update: score={data.get('score')}, path length={len(data.get('path', []))}") - - elif msg_type == "search_complete": - status = data.get("status") - score = data.get("score", "N/A") - path_length = len(data.get("path", [])) - - logger.info(f"Search complete: {status}, score={score}, path length={path_length}") - logger.info("Path actions:") - - for i, node in enumerate(data.get("path", [])): - logger.info(f" {i+1}. {node.get('action')}") - - # Exit the loop when search is complete - break - - elif msg_type == "error": - logger.error(f"Error: {data.get('message')}") - break - - else: - logger.info(f"Received message of type {msg_type}") - - except websockets.exceptions.ConnectionClosed: - logger.warning("WebSocket connection closed") - break - except Exception as e: - logger.error(f"Error processing message: {e}") - break - - logger.info("Test completed") - -def parse_arguments(): - """Parse command line arguments""" - parser = argparse.ArgumentParser(description="Test the tree search WebSocket functionality") - - parser.add_argument("--ws-url", type=str, default=DEFAULT_WS_URL, - help=f"WebSocket URL (default: {DEFAULT_WS_URL})") - - parser.add_argument("--starting-url", type=str, default=DEFAULT_STARTING_URL, - help=f"Starting URL for the search (default: {DEFAULT_STARTING_URL})") - - parser.add_argument("--goal", type=str, default=DEFAULT_GOAL, - help=f"Goal to achieve (default: {DEFAULT_GOAL})") - - parser.add_argument("--algorithm", type=str, choices=["bfs", "dfs"], default="bfs", - help="Search algorithm to use (default: bfs)") - - parser.add_argument("--max-depth", type=int, default=3, - help="Maximum depth for the search tree (default: 3)") - - return parser.parse_args() - -async def main(): - """Main entry point""" - args = parse_arguments() - - logger.info("Starting tree search WebSocket test") - logger.info(f"WebSocket URL: {args.ws_url}") - logger.info(f"Starting URL: {args.starting_url}") - logger.info(f"Goal: {args.goal}") - logger.info(f"Algorithm: {args.algorithm}") - logger.info(f"Max depth: {args.max_depth}") - - await connect_and_test_search( - ws_url=args.ws_url, - starting_url=args.starting_url, - goal=args.goal, - search_algorithm=args.algorithm, - max_depth=args.max_depth - ) - -if __name__ == "__main__": - asyncio.run(main())