Refactoring few prompts and tools to make it more precise

test-zeus-ai · Feb 3, 2025 · 0514037 · 0514037
1 parent 02fda99
commit 0514037
Show file tree

Hide file tree

Showing 20 changed files with 705 additions and 319 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,6 +35,7 @@ junit2html = "^31.0.2"
 aiohttp = "^3.11.7"
 inflection = "^0.5.1"
 autogen = {extras = ["ollama", "long-context", "graph", "anthropic", "groq", "gemini", "lmm", "mistral", "bedrock", "gemini"], version = "^0.7.3"}
+aiofiles = "^24.1.0"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.3"

diff --git a/testzeus_hercules/__main__.py b/testzeus_hercules/__main__.py
@@ -1,6 +1,7 @@
 import asyncio
 import json
 import os
+import aiofiles
 
 from junit2htmlreport.runner import run as prepare_html
 from testzeus_hercules.config import get_global_conf, set_global_conf
@@ -14,7 +15,7 @@
 from testzeus_hercules.utils.logger import logger
 
 
-def sequential_process() -> None:
+async def sequential_process() -> None:
     """
     sequential_process function to process feature files, run test cases, and generate JUnit XML results.
 
@@ -36,7 +37,7 @@ def sequential_process() -> None:
     7. Logs the location of the final result file.
     """
     dont_close_browser = get_global_conf().get_dont_close_browser()
-    list_of_feats = process_feature_file(dont_append_header=dont_close_browser)
+    list_of_feats = await process_feature_file(dont_append_header=dont_close_browser)
     input_gherkin_file_path = get_global_conf().get_input_gherkin_file_path()
     # get name of the feature file using os package
     feature_file_name = os.path.basename(input_gherkin_file_path)
@@ -54,7 +55,7 @@ def sequential_process() -> None:
         # TODO: remove the following set default hack later.
         get_global_conf().set_default_test_id(stake_id)
 
-        cmd = serialize_feature_file(file_path)
+        cmd = await serialize_feature_file(file_path)
 
         logger.info(f"Running testcase: {stake_id}")
         logger.info(f"testcase details: {cmd}")
@@ -63,7 +64,7 @@ def sequential_process() -> None:
             command=cmd,
             dont_terminate_browser_after_run=dont_close_browser,
         )
-        asyncio.run(runner.start())
+        await runner.start()
 
         runner_result = {}
         cost_metrics = {}
@@ -109,7 +110,7 @@ def sequential_process() -> None:
         if cost_metrics:
             logger.info(f"Test run cost is : {cost_metrics}")
         result_of_tests.append(
-            build_junit_xml(
+            await build_junit_xml(
                 runner_result,
                 execution_time,
                 cost_metrics,
@@ -127,7 +128,7 @@ def sequential_process() -> None:
         )
 
     final_result_file_name = f"{get_global_conf().get_junit_xml_base_path()}/{feature_file_name}_result.xml"
-    JUnitXMLGenerator.merge_junit_xml(result_of_tests, final_result_file_name)
+    await JUnitXMLGenerator.merge_junit_xml(result_of_tests, final_result_file_name)
     logger.info(f"Results published in junitxml file: {final_result_file_name}")
 
     # building html from junitxml
@@ -136,7 +137,7 @@ def sequential_process() -> None:
     logger.info(f"Results published in html file: {final_result_html_file_name}")
 
 
-def process_test_directory(test_dir: str) -> None:
+async def process_test_directory(test_dir: str) -> None:
     """
     Process a single test directory by updating config paths and running sequential_process
 
@@ -155,10 +156,10 @@ def process_test_directory(test_dir: str) -> None:
     set_global_conf(test_config, override=True)
 
     logger.info(f"Processing test directory: {test_dir}")
-    sequential_process()
+    await sequential_process()
 
 
-def main() -> None:
+async def main() -> None:
     """
     Main function that checks for bulk execution flag and runs tests accordingly
     """
@@ -194,7 +195,7 @@ def is_width_gt_120() -> bool:
            +=====================+=+==***##%     |__|_______/             |__/ |__/ |__/\____  $$
            %#++++++++***+===++==+==+==***#%%                                           /$$  | $$
             %**%%*+***************#+==+***%%                                          |  $$$$$$/
-            %#*%#% %**********###*++++=#**%%@                                          \______/
+            %#*%#% %**********###*++++=#**%%                                          \______/
             ###%%%#%**********######++*%**#%@
            %++=+###@**********####%   #+++*##
             #***##% #*********####    #+++#%#     /$$$$$$ /$$   /$$ /$$$$$$  /$$$$$$  /$$$$$$  /$$$$$$$ /$$$$$$
@@ -241,15 +242,15 @@ def is_width_gt_120() -> bool:
                 test_dir = os.path.join(tests_dir, test_folder)
                 if os.path.isdir(test_dir):
                     logger.info(f"Processing test folder: {test_folder}")
-                    process_test_directory(test_dir)
+                    await process_test_directory(test_dir)
         else:
             logger.error("Bulk execution requested but no tests directory found at: %s", tests_dir)
             exit(1)
     else:
         # Single test case execution
         logger.info("Single test execution mode")
-        sequential_process()
+        await sequential_process()
 
 
 if __name__ == "__main__":  # pragma: no cover
-    main()
+    asyncio.run(main())
diff --git a/testzeus_hercules/core/agents/browser_nav_agent.py b/testzeus_hercules/core/agents/browser_nav_agent.py
@@ -1,7 +1,7 @@
-from testzeus_hercules.core.agents.base_nav_agent import BaseNavAgent
+from testzeus_hercules.core.agents.multimodal_base_nav_agent import MultimodalBaseNavAgent
 
 
-class BrowserNavAgent(BaseNavAgent):
+class BrowserNavAgent(MultimodalBaseNavAgent):
     agent_name: str = "browser_nav_agent"
     prompt = """# Web Navigation Agent
 You are a web navigation agent that executes webpage interactions and retrieves information.
@@ -10,8 +10,9 @@ class BrowserNavAgent(BaseNavAgent):
 - Navigate webpages 
 - Authenticate to websites
 - Interact with web content
-- Locate DOM elements
+- Locate DOM elements based on md id.
 - Summarize text content
+- use the tool as per the element type.
 
 ## Core Rules
 1. Execute web navigation tasks only
@@ -27,6 +28,9 @@ class BrowserNavAgent(BaseNavAgent):
 11. "md" attribute is a number identifier.
 12. FUNCTION/TOOL CALLING PARAMETERS SHOULD BE FOLLOWED STRICTLY, IT SHOULD NOT BE NO PARAMETER PASS DURING FUNCTION CALL.
 13. IF FUNCTION CALL FAILS FOR PYDANTIC VALIDATION, SOLVE IT AND RETRIGGER.
+14. IF THERE IS AN AN ERROR ON PAGE, THEN TRY TO OVERCOME THAT ERROR WITHIN INSTRUCTION BOUNDARIES.
+15. Handle popups/cookies by accepting or closing them
+
 
 ## Response Format
 Success with Data:

diff --git a/testzeus_hercules/core/agents/high_level_planner_agent.py b/testzeus_hercules/core/agents/high_level_planner_agent.py
@@ -13,12 +13,14 @@
 
 
 class PlannerAgent:
-    prompt = """# Test Automation Task Planner
+    prompt = """# Test EXECUTION Task Planner, YOU ARE TESTING THE APPLICATION
 
-You are a test automation task planner that processes Gherkin BDD feature tasks and executes them through a helper.
+You are a test EXECUTION task planner that processes Gherkin BDD feature tasks and executes them through a helper.
 
 ## Core Responsibilities
-- Parse Gherkin BDD features and create step-by-step execution plans
+- Parse Gherkin BDD features and create VERY DETAILED EXPANDED step-by-step execution plans
+- THE PLAN SHOULD BE AS DETAILED AS POSSIBLE, INCLUDING ALL STEPS
+- ASSUMPTION AGAINST INPUTS SHOULD BE AVOIDED.
 - Include assertion validation in subtasks
 - Delegate atomic operations to helper
 - Analyze helper responses before proceeding
@@ -31,7 +33,7 @@ class PlannerAgent:
 ## Response Format
 Must return well-formatted JSON with:
 {
-"plan": "Detailed plan (step-by-step with step numbers) stick to user task input, ALL IN STRING FORMAT",
+"plan": "VERY DETAILED EXPANDED plan (step-by-step with step numbers) stick to user task input AS CORE BUT HAVE LIBERTY TO EXPAND, ALL IN STRING FORMAT",
 "next_step": "Atomic operation for helper, ALL IN STRING FORMAT",
 "terminate": "'yes' when complete/failed, 'no' during iterations",
 "final_response": "Task outcome (only when terminate='yes')",
@@ -118,6 +120,8 @@ class PlannerAgent:
 10. No duplicate JSON keys
 11. Termination scenario should always be an assert.
 12. Never provide explination or notes only JSON response.
+13. Don't take unnecessary waits. Validate efficiently.
+14. MUST BE EFFICIENT IN EXECUTION AND PLANNING.
 
 Available Test Data: $basic_test_information
 """

diff --git a/testzeus_hercules/core/agents/multimodal_base_nav_agent.py b/testzeus_hercules/core/agents/multimodal_base_nav_agent.py
@@ -0,0 +1,49 @@
+from datetime import datetime
+from string import Template
+from typing import Any
+
+from testzeus_hercules.utils.llm_helper import MultimodalConversableAgent
+
+from testzeus_hercules.core.agents.base_nav_agent import BaseNavAgent
+from testzeus_hercules.utils.logger import logger
+
+
+class MultimodalBaseNavAgent(BaseNavAgent):
+    agent_name: str = "multimodal_base_nav_agent"
+    prompt = "Base Multimodal Agent"
+
+    def __init__(self, model_config_list, llm_config_params: dict[str, Any], system_prompt: str | None, nav_executor: Any, agent_name: str = None, agent_prompt: str | None = None) -> None:
+        """
+        Initialize the MultimodalBaseNavAgent using MultimodalConversableAgent instead of ConversableAgent.
+        """
+        self.nav_executor = nav_executor
+        user_ltm = self._BaseNavAgent__get_ltm()
+        agent_name = self.agent_name if agent_name is None else agent_name
+
+        system_message = agent_prompt or self.prompt
+        if system_prompt and len(system_prompt) > 0:
+            if isinstance(system_prompt, list):
+                system_message = "\n".join(system_prompt)
+            else:
+                system_message = system_prompt
+            logger.info(f"Using custom system prompt for MultimodalBaseNavAgent: {system_message}")
+
+        system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}"
+        if user_ltm:
+            user_ltm = "\n" + user_ltm
+            system_message = Template(system_message).substitute(basic_test_information=user_ltm)
+
+        logger.info(f"Nav agent {agent_name} using model: {model_config_list[0]['model']}")
+
+        # Use MultimodalConversableAgent instead of ConversableAgent
+        self.agent = MultimodalConversableAgent(
+            name=agent_name,
+            system_message=system_message,
+            llm_config={
+                "config_list": model_config_list,
+                **llm_config_params,  # unpack all the name value pairs in llm_config_params as is
+            },
+            human_input_mode="NEVER"
+        )
+
+        self.register_tools()
diff --git a/testzeus_hercules/core/extra_tools/browser_assist_tools.py b/testzeus_hercules/core/extra_tools/browser_assist_tools.py
@@ -59,3 +59,47 @@ async def take_browser_screenshot(
     except Exception as e:
         logger.exception(f"Error taking screenshot: {e}")
         return {"error": str(e)}
+
+
+@tool(
+    agent_names=["browser_nav_agent"],
+    name="see_the_page",
+    description="give you the current screenshot of the browser view",
+)
+async def see_the_page() -> Annotated[str, "Path to of screenshot"]:
+    """
+    Take and save a snapshot of the current browser view, overwriting previous snapshot.
+
+    Returns:
+        str: Path to saved screenshot
+        dict: Error message if something fails
+    """
+    try:
+        # Get current screenshot
+        browser_manager = PlaywrightManager()
+        screenshot_stream = await browser_manager.get_latest_screenshot_stream()
+        if not screenshot_stream:
+            page = await browser_manager.get_current_page()
+            await browser_manager.take_screenshots("browser_snapshot", page)
+            screenshot_stream = await browser_manager.get_latest_screenshot_stream()
+
+        if not screenshot_stream:
+            return {"error": "Failed to capture current browser view"}
+
+        # Use log_files directory
+        screenshots_dir = os.path.join("log_files")
+        os.makedirs(screenshots_dir, exist_ok=True)
+
+        # Fixed filename that will be overwritten each time
+        screenshot_file = os.path.join(screenshots_dir, "current_page.png")
+
+        # Save the screenshot, overwriting if exists
+        screenshot = Image.open(screenshot_stream)
+        screenshot.save(screenshot_file)
+
+        logger.info(f"Page snapshot saved to: {screenshot_file}")
+        return screenshot_file
+
+    except Exception as e:
+        logger.exception(f"Error taking snapshot: {e}")
+        return {"error": str(e)}
diff --git a/testzeus_hercules/core/extra_tools/visual_skill.py b/testzeus_hercules/core/extra_tools/visual_skill.py
@@ -112,8 +112,9 @@ async def compare_visual_screenshot(
         message = comparison_prompt.format(reference=reference_image_path, screenshot=screenshot_file)
 
         logger.debug(f"Comparison prompt: {message}")
+        chat_response = await image_ex_user_proxy.a_initiate_chat(image_agent, message=message)
 
-        chat_response = await asyncio.to_thread(image_ex_user_proxy.initiate_chat, image_agent, message=message)
+        # chat_response = await asyncio.to_thread(image_ex_user_proxy.initiate_chat, image_agent, message=message)
 
         last_message = None
         for msg in reversed(chat_response.chat_history):

diff --git a/testzeus_hercules/core/playwright_manager.py b/testzeus_hercules/core/playwright_manager.py
@@ -28,8 +28,8 @@
 # Reference: https://github.com/microsoft/playwright/issues/28995
 os.environ["PW_TEST_SCREENSHOT_NO_FONTS_READY"] = "1"
 
-MAX_WAIT_PAGE_LOAD_TIME = 1
-WAIT_FOR_NETWORK_IDLE = 5
+MAX_WAIT_PAGE_LOAD_TIME = 0.6
+WAIT_FOR_NETWORK_IDLE = 2
 MIN_WAIT_PAGE_LOAD_TIME = 0.05
 
 ALL_POSSIBLE_PERMISSIONS = [

diff --git a/testzeus_hercules/core/runner.py b/testzeus_hercules/core/runner.py
@@ -4,6 +4,7 @@
 import time
 from typing import Any
 
+import aiofiles
 from testzeus_hercules.config import get_global_conf
 from testzeus_hercules.core.agents_llm_config import AgentsLLMConfig
 from testzeus_hercules.core.playwright_manager import PlaywrightManager
@@ -140,15 +141,15 @@ async def save_planner_chat_messages(self) -> None:
                 res_output_thoughts_logs_di[key][idx]["content"] = res_content
 
         if self.save_chat_logs_to_files:
-            with open(
+            async with aiofiles.open(
                 os.path.join(
                     get_global_conf().get_source_log_folder_path(self.stake_id),
                     "agent_inner_thoughts.json",
                 ),
                 "w",
                 encoding="utf-8",
             ) as f:
-                json.dump(res_output_thoughts_logs_di, f, ensure_ascii=False, indent=4)
+                await f.write(json.dumps(res_output_thoughts_logs_di, ensure_ascii=False, indent=4))
             logger.debug("Chat messages saved")
         else:
             logger.info(

diff --git a/testzeus_hercules/core/simple_hercules.py b/testzeus_hercules/core/simple_hercules.py
@@ -367,14 +367,6 @@ def __save_chat_log(self, sender: autogen.ConversableAgent, receiver: autogen.Co
             with open(chat_logs_file, "w") as file:
                 json.dump(res_output_thoughts_logs_di, file, indent=4)
 
-    # def __save_chat_log(self, chat_log: list[dict[str, Any]]) -> None:
-    #     if not self.save_chat_logs_to_files:
-    #         logger.info("Nested chat logs", extra={"nested_chat_log": chat_log})
-    #     else:
-    #         chat_logs_file = os.path.join(self.get_chat_logs_dir() or "", f"nested_chat_log_{str(time_ns())}.json")
-    #         # Save the chat log to a file
-    #         with open(chat_logs_file, "w") as file:
-    #             json.dump(chat_log, file, indent=4)
 
     async def __initialize_agents(self) -> dict[str, autogen.ConversableAgent]:
         """

diff --git a/testzeus_hercules/core/tools/accessibility_calls.py b/testzeus_hercules/core/tools/accessibility_calls.py
@@ -13,7 +13,7 @@
 
 @tool(
     agent_names=["browser_nav_agent"],
-    description="Test the current page accessibility using Axe-core. This tool is used to check the accessibility of the page.",
+    description="Test the current page a11y accessibility using Axe-core. This tool is used to check only the a11y accessibility of the page.",
     name="test_page_accessibility",
 )
 async def test_page_accessibility(

diff --git a/testzeus_hercules/core/tools/enter_text_using_selector.py b/testzeus_hercules/core/tools/enter_text_using_selector.py
@@ -155,11 +155,11 @@ async def do_entertext(page: Page, selector: str, text_to_enter: str, use_keyboa
 
         if use_keyboard_fill:
             await elem.focus()
-            await asyncio.sleep(0.05)
+            await asyncio.sleep(0.01)
             await press_key_combination("Control+A")
-            await asyncio.sleep(0.05)
+            await asyncio.sleep(0.01)
             await press_key_combination("Delete")
-            await asyncio.sleep(0.05)
+            await asyncio.sleep(0.01)
             logger.debug(f"Focused element with selector {selector} to enter text")
             await page.keyboard.type(text_to_enter, delay=1)
         else: