Merge pull request #86 from PathOnAI/search-algorithm-cleanup

TataKKKL · web-flow · commit 6e0b25cfd945 · 2025-04-30T09:18:45.000+08:00
Search algorithm cleanup
diff --git a/visual-tree-search-app/components/LATSVisual.tsx b/visual-tree-search-app/components/LATSVisual.tsx
@@ -12,7 +12,7 @@ interface TreeNode {
   value?: number;
   visits?: number;
   feedback?: string;
-  reward?: number;
+  // reward?: number;
   isSimulated?: boolean; // Flag to track newly simulated nodes
 }
 
@@ -412,9 +412,9 @@ const LATSVisual: React.FC<SimpleSearchVisualProps> = ({ messages }) => {
           }
           
           // Add reward info if available
-          if (typeof d.data.reward === 'number') {
-            tooltipContent += `<div class="mt-1">Reward: <span class="font-bold">${d.data.reward.toFixed(2)}</span></div>`;
-          }
+          // if (typeof d.data.reward === 'number') {
+          //   tooltipContent += `<div class="mt-1">Reward: <span class="font-bold">${d.data.reward.toFixed(2)}</span></div>`;
+          // }
           
           // Add value info if available
           if (typeof d.data.value === 'number') {
diff --git a/visual-tree-search-app/components/MessageLogPanelLATS.tsx b/visual-tree-search-app/components/MessageLogPanelLATS.tsx
@@ -71,8 +71,8 @@ interface ParsedMessage {
   node_id?: string;
   value?: number;
   visits?: number;
-  reward?: number;
   terminal_node_description?: string;
+  reward?: number;
   step?: number;
   step_name?: string;
   iteration?: number;
diff --git a/visual-tree-search-app/components/SimpleSearchVisual.tsx b/visual-tree-search-app/components/SimpleSearchVisual.tsx
@@ -12,7 +12,7 @@ interface TreeNode {
   value?: number;
   visits?: number;
   feedback?: string;
-  reward?: number;
+  // reward?: number;
 }
 
 interface Message {
@@ -277,10 +277,10 @@ const SimpleSearchVisual: React.FC<SimpleSearchVisualProps> = ({ messages }) =>
           tooltipContent += `<div class="mt-2">${nodeInfo.join(' | ')}</div>`;
         }
         
-        // Add reward info if available
-        if (typeof d.data.reward === 'number') {
-          tooltipContent += `<div class="mt-1">Reward: <span class="font-bold">${d.data.reward.toFixed(2)}</span></div>`;
-        }
+        // // Add reward info if available
+        // if (typeof d.data.reward === 'number') {
+        //   tooltipContent += `<div class="mt-1">Reward: <span class="font-bold">${d.data.reward.toFixed(2)}</span></div>`;
+        // }
         
         // Add value info if available
         if (typeof d.data.value === 'number') {
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/base_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/base_agent.py
@@ -102,7 +102,7 @@ def _get_tree_data(self):
                 "value": node.value,
                 "visits": node.visits,
                 "feedback": node.feedback,
-                "reward": node.reward
+                # "reward": node.reward
             }
             tree_data.append(node_data)
         
@@ -129,7 +129,7 @@ async def remove_simulated_trajectory(self, starting_node, terminal_node: LATSNo
                 "description": node.natural_language_description,
                 "visits": node.visits,
                 "value": float(f"{node.value:.3f}") if hasattr(node, 'value') else None,
-                "reward": float(f"{node.reward:.3f}") if hasattr(node, 'reward') else None,
+                # "reward": float(f"{node.reward:.3f}") if hasattr(node, 'reward') else None,
                 "is_terminal": node.is_terminal,
                 "feedback": node.feedback if hasattr(node, 'feedback') else None,
                 "is_root": not hasattr(node, 'parent') or node.parent is None,
@@ -159,7 +159,7 @@ def _get_trajectory_data(self, terminal_node: LATSNode):
                 "description": node.natural_language_description,
                 "visits": node.visits,
                 "value": float(f"{node.value:.3f}") if hasattr(node, 'value') else None,
-                "reward": float(f"{node.reward:.3f}") if hasattr(node, 'reward') else None,
+                # "reward": float(f"{node.reward:.3f}") if hasattr(node, 'reward') else None,
                 "is_terminal": node.is_terminal,
                 "feedback": node.feedback if hasattr(node, 'feedback') else None,
                 "is_root": not hasattr(node, 'parent') or node.parent is None,
@@ -424,15 +424,18 @@ async def node_children_evaluation(self, node: LATSNode) -> None:
                 score = 0
             else:
                 trajectory = child.get_trajectory()
-                prompt = create_llm_prompt(trajectory, self.goal)
-                # , child.observation.image
-                result = score_trajectory_with_openai(prompt, openai_client, self.config.evaluation_model)
-                score = result["overall_score"]
+                if len(trajectory) == 0:
+                    score = 0
+                else:
+                    prompt = create_llm_prompt(trajectory, self.goal)
+                    # , child.observation.image
+                    result = score_trajectory_with_openai(prompt, openai_client, self.config.evaluation_model)
+                    score = result["overall_score"]
             scores.append(score)
 
         for child, score in zip(node.children, scores):
             child.value = score
-            child.reward = score
+            # child.reward = score
 
     async def node_evaluation(self, node: LATSNode) -> None:
         """Evaluate the current node and assign its score."""
@@ -454,13 +457,16 @@ async def node_evaluation(self, node: LATSNode) -> None:
                 if node.is_terminal:
                     score = 0
                 else:
-                    prompt = create_llm_prompt(trajectory, self.goal)
-                    result = score_trajectory_with_openai(
-                        prompt, 
-                        openai_client, 
-                        model=self.config.evaluation_model
-                    )
-                    score = result["overall_score"]
+                    if len(trajectory) == 0:
+                        score = 0
+                    else:
+                        prompt = create_llm_prompt(trajectory, self.goal)
+                        result = score_trajectory_with_openai(
+                            prompt, 
+                            openai_client, 
+                            model=self.config.evaluation_model
+                        )
+                        score = result["overall_score"]
             
             except Exception as e:
                 error_msg = f"Error scoring node {id(node)}: {str(e)}"
@@ -469,7 +475,7 @@ async def node_evaluation(self, node: LATSNode) -> None:
             
             # Assign the score to the node
             node.value = score
-            node.reward = score
+            # node.reward = score
             
 
         except Exception as e:
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/lats_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/lats_agent.py
@@ -37,7 +37,7 @@ async def lats_search(self, websocket=None):
             await self.websocket_node_selection(node, websocket=websocket)
 
             if node is None:
-                print("All paths lead to terminal nodes with reward 0. Ending search.")
+                print("All paths lead to terminal nodes with value 0. Ending search.")
                 break
 
             # Step 2: Node Expansion
@@ -76,8 +76,10 @@ async def lats_search(self, websocket=None):
             terminal_nodes.append(terminal_node)
             await self.websocket_simulation_result(reward, terminal_node, websocket=websocket)
 
-            if reward == 1:
+            # simulation score threshold
+            if reward >= self.config.simulation_score:
                 await self.websocket_search_complete("success", reward, terminal_node.get_trajectory(), websocket=websocket)
+                await self.playwright_manager.close()
                 return terminal_node
 
             # Step 5: Backpropagation
@@ -95,8 +97,8 @@ async def lats_search(self, websocket=None):
         all_nodes_list = collect_all_nodes(self.root_node)
         all_nodes_list.extend(terminal_nodes)
         
-        ## temp change: if reward is the same, choose the deeper node
-        best_child = max(all_nodes_list, key=lambda x: (x.reward, x.depth))
+        ## temp change: if value is the same, choose the deeper node
+        best_child = max(all_nodes_list, key=lambda x: (x.value, x.depth))
         
         if best_child.value >= 0.75:
             print("Successful trajectory found")
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/lats_node.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/lats_node.py
@@ -88,7 +88,7 @@ def __init__(
         self.value = 0.0
         self.depth = 0 if parent is None else parent.depth + 1
         self.is_terminal = False
-        self.reward = 0.0
+        # self.reward = 0.0
         self.exhausted = False  # If all children are terminal
         self.em = 0.0  # Exact match, evaluation metric
         self.observation: Optional[Observation] = None
@@ -177,7 +177,7 @@ def to_dict(self) -> dict:
             'value': self.value,
             'depth': self.depth,
             'is_terminal': self.is_terminal,
-            'reward': self.reward,
+            # 'reward': self.reward,
             'em': self.em,
         }
 
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/mcts_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/mcts_agent.py
@@ -302,6 +302,7 @@ async def mcts_search(self, websocket=None) -> Optional[LATSNode]:
                 # Convert path to serializable trajectory
                 # trajectory = [node.action for node in path if node.action is not None]
                 await self.websocket_search_complete("success", score, selected_node.get_trajectory(), websocket=websocket)
+                await self.playwright_manager.close()
                 return selected_node
 
             print(f"path: {path}")
@@ -328,4 +329,5 @@ async def mcts_search(self, websocket=None) -> Optional[LATSNode]:
              # Convert node to serializable trajectory
             # trajectory = [n.action for n in self.get_path_to_root(best_node) if n.action is not None]
             await self.websocket_search_complete("partial_success", best_node.value, best_node.get_trajectory(), websocket=websocket)
+        await self.playwright_manager.close()
         return best_node
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/simple_search_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SearchAgents/simple_search_agent.py
@@ -110,6 +110,7 @@ async def bfs(self, websocket=None):
                     
                     # Send completion update if websocket is provided
                     await self.websocket_search_complete("success", score, current_node.get_trajectory(), websocket=websocket) 
+                    await self.playwright_manager.close()
                     
                     return current_node
             
@@ -120,6 +121,7 @@ async def bfs(self, websocket=None):
             
             # Send completion update if websocket is provided
             await self.websocket_search_complete("partial_success", best_score, best_node.get_trajectory(), websocket=websocket)
+            await self.playwright_manager.close()
             
             return best_node
         
@@ -128,6 +130,7 @@ async def bfs(self, websocket=None):
         
         # Send failure update if websocket is provided
         await self.websocket_search_complete("failure", 0, None, websocket=websocket)
+        await self.playwright_manager.close()
         
         return None
         
@@ -209,7 +212,8 @@ async def dfs(self, websocket=None) -> List[Dict[str, Any]]:
                 print(f"Found satisfactory solution with score {score}")
                 
                 # Send completion update if websocket is provided
-                await self.websocket_search_complete("success", score, current_node.get_trajectory(), websocket=websocket)                
+                await self.websocket_search_complete("success", score, current_node.get_trajectory(), websocket=websocket) 
+                await self.playwright_manager.close()               
                 return current_node
                         
             # Add non-terminal children to stack in reverse order
@@ -234,6 +238,7 @@ async def dfs(self, websocket=None) -> List[Dict[str, Any]]:
             
             # Send completion update if websocket is provided
             await self.websocket_search_complete("partial_success", best_score, best_node.get_trajectory(), websocket=websocket)
+            await self.playwright_manager.close()
             
             return best_node
         
@@ -242,6 +247,7 @@ async def dfs(self, websocket=None) -> List[Dict[str, Any]]:
         
         # Send failure update if websocket is provided
         await self.websocket_search_complete("failure", 0, None, websocket=websocket)
+        await self.playwright_manager.close()
         
         return None
             
diff --git a/visual-tree-search-backend/app/api/lwats/core_async/config.py b/visual-tree-search-backend/app/api/lwats/core_async/config.py
@@ -25,6 +25,7 @@ class AgentConfig:
     num_simulations: int = 1
     account_reset: bool = True
 
+    simulation_score: float = 0.75
     reflection_score: float = 0.75
     
     # Features