Skip to content

Commit

Permalink
Refactoring few prompts and tools to make it more precise
Browse files Browse the repository at this point in the history
  • Loading branch information
Shriyansh Agnihotri committed Feb 3, 2025
1 parent 02fda99 commit 0514037
Show file tree
Hide file tree
Showing 20 changed files with 705 additions and 319 deletions.
153 changes: 82 additions & 71 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ junit2html = "^31.0.2"
aiohttp = "^3.11.7"
inflection = "^0.5.1"
autogen = {extras = ["ollama", "long-context", "graph", "anthropic", "groq", "gemini", "lmm", "mistral", "bedrock", "gemini"], version = "^0.7.3"}
aiofiles = "^24.1.0"

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.3"
Expand Down
27 changes: 14 additions & 13 deletions testzeus_hercules/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import json
import os
import aiofiles

from junit2htmlreport.runner import run as prepare_html
from testzeus_hercules.config import get_global_conf, set_global_conf
Expand All @@ -14,7 +15,7 @@
from testzeus_hercules.utils.logger import logger


def sequential_process() -> None:
async def sequential_process() -> None:
"""
sequential_process function to process feature files, run test cases, and generate JUnit XML results.
Expand All @@ -36,7 +37,7 @@ def sequential_process() -> None:
7. Logs the location of the final result file.
"""
dont_close_browser = get_global_conf().get_dont_close_browser()
list_of_feats = process_feature_file(dont_append_header=dont_close_browser)
list_of_feats = await process_feature_file(dont_append_header=dont_close_browser)
input_gherkin_file_path = get_global_conf().get_input_gherkin_file_path()
# get name of the feature file using os package
feature_file_name = os.path.basename(input_gherkin_file_path)
Expand All @@ -54,7 +55,7 @@ def sequential_process() -> None:
# TODO: remove the following set default hack later.
get_global_conf().set_default_test_id(stake_id)

cmd = serialize_feature_file(file_path)
cmd = await serialize_feature_file(file_path)

logger.info(f"Running testcase: {stake_id}")
logger.info(f"testcase details: {cmd}")
Expand All @@ -63,7 +64,7 @@ def sequential_process() -> None:
command=cmd,
dont_terminate_browser_after_run=dont_close_browser,
)
asyncio.run(runner.start())
await runner.start()

runner_result = {}
cost_metrics = {}
Expand Down Expand Up @@ -109,7 +110,7 @@ def sequential_process() -> None:
if cost_metrics:
logger.info(f"Test run cost is : {cost_metrics}")
result_of_tests.append(
build_junit_xml(
await build_junit_xml(
runner_result,
execution_time,
cost_metrics,
Expand All @@ -127,7 +128,7 @@ def sequential_process() -> None:
)

final_result_file_name = f"{get_global_conf().get_junit_xml_base_path()}/{feature_file_name}_result.xml"
JUnitXMLGenerator.merge_junit_xml(result_of_tests, final_result_file_name)
await JUnitXMLGenerator.merge_junit_xml(result_of_tests, final_result_file_name)
logger.info(f"Results published in junitxml file: {final_result_file_name}")

# building html from junitxml
Expand All @@ -136,7 +137,7 @@ def sequential_process() -> None:
logger.info(f"Results published in html file: {final_result_html_file_name}")


def process_test_directory(test_dir: str) -> None:
async def process_test_directory(test_dir: str) -> None:
"""
Process a single test directory by updating config paths and running sequential_process
Expand All @@ -155,10 +156,10 @@ def process_test_directory(test_dir: str) -> None:
set_global_conf(test_config, override=True)

logger.info(f"Processing test directory: {test_dir}")
sequential_process()
await sequential_process()


def main() -> None:
async def main() -> None:
"""
Main function that checks for bulk execution flag and runs tests accordingly
"""
Expand Down Expand Up @@ -194,7 +195,7 @@ def is_width_gt_120() -> bool:
+=====================+=+==***##% |__|_______/ |__/ |__/ |__/\____ $$
%#++++++++***+===++==+==+==***#%% /$$ | $$
%**%%*+***************#+==+***%% | $$$$$$/
%#*%#% %**********###*++++=#**%%@ \______/
%#*%#% %**********###*++++=#**%% \______/
###%%%#%**********######++*%**#%@
%++=+###@**********####% #+++*##
#***##% #*********#### #+++#%# /$$$$$$ /$$ /$$ /$$$$$$ /$$$$$$ /$$$$$$ /$$$$$$$ /$$$$$$
Expand Down Expand Up @@ -241,15 +242,15 @@ def is_width_gt_120() -> bool:
test_dir = os.path.join(tests_dir, test_folder)
if os.path.isdir(test_dir):
logger.info(f"Processing test folder: {test_folder}")
process_test_directory(test_dir)
await process_test_directory(test_dir)
else:
logger.error("Bulk execution requested but no tests directory found at: %s", tests_dir)
exit(1)
else:
# Single test case execution
logger.info("Single test execution mode")
sequential_process()
await sequential_process()


if __name__ == "__main__": # pragma: no cover
main()
asyncio.run(main())
10 changes: 7 additions & 3 deletions testzeus_hercules/core/agents/browser_nav_agent.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from testzeus_hercules.core.agents.base_nav_agent import BaseNavAgent
from testzeus_hercules.core.agents.multimodal_base_nav_agent import MultimodalBaseNavAgent


class BrowserNavAgent(BaseNavAgent):
class BrowserNavAgent(MultimodalBaseNavAgent):
agent_name: str = "browser_nav_agent"
prompt = """# Web Navigation Agent
You are a web navigation agent that executes webpage interactions and retrieves information.
Expand All @@ -10,8 +10,9 @@ class BrowserNavAgent(BaseNavAgent):
- Navigate webpages
- Authenticate to websites
- Interact with web content
- Locate DOM elements
- Locate DOM elements based on md id.
- Summarize text content
- use the tool as per the element type.
## Core Rules
1. Execute web navigation tasks only
Expand All @@ -27,6 +28,9 @@ class BrowserNavAgent(BaseNavAgent):
11. "md" attribute is a number identifier.
12. FUNCTION/TOOL CALLING PARAMETERS SHOULD BE FOLLOWED STRICTLY, IT SHOULD NOT BE NO PARAMETER PASS DURING FUNCTION CALL.
13. IF FUNCTION CALL FAILS FOR PYDANTIC VALIDATION, SOLVE IT AND RETRIGGER.
14. IF THERE IS AN AN ERROR ON PAGE, THEN TRY TO OVERCOME THAT ERROR WITHIN INSTRUCTION BOUNDARIES.
15. Handle popups/cookies by accepting or closing them
## Response Format
Success with Data:
Expand Down
12 changes: 8 additions & 4 deletions testzeus_hercules/core/agents/high_level_planner_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@


class PlannerAgent:
prompt = """# Test Automation Task Planner
prompt = """# Test EXECUTION Task Planner, YOU ARE TESTING THE APPLICATION
You are a test automation task planner that processes Gherkin BDD feature tasks and executes them through a helper.
You are a test EXECUTION task planner that processes Gherkin BDD feature tasks and executes them through a helper.
## Core Responsibilities
- Parse Gherkin BDD features and create step-by-step execution plans
- Parse Gherkin BDD features and create VERY DETAILED EXPANDED step-by-step execution plans
- THE PLAN SHOULD BE AS DETAILED AS POSSIBLE, INCLUDING ALL STEPS
- ASSUMPTION AGAINST INPUTS SHOULD BE AVOIDED.
- Include assertion validation in subtasks
- Delegate atomic operations to helper
- Analyze helper responses before proceeding
Expand All @@ -31,7 +33,7 @@ class PlannerAgent:
## Response Format
Must return well-formatted JSON with:
{
"plan": "Detailed plan (step-by-step with step numbers) stick to user task input, ALL IN STRING FORMAT",
"plan": "VERY DETAILED EXPANDED plan (step-by-step with step numbers) stick to user task input AS CORE BUT HAVE LIBERTY TO EXPAND, ALL IN STRING FORMAT",
"next_step": "Atomic operation for helper, ALL IN STRING FORMAT",
"terminate": "'yes' when complete/failed, 'no' during iterations",
"final_response": "Task outcome (only when terminate='yes')",
Expand Down Expand Up @@ -118,6 +120,8 @@ class PlannerAgent:
10. No duplicate JSON keys
11. Termination scenario should always be an assert.
12. Never provide explination or notes only JSON response.
13. Don't take unnecessary waits. Validate efficiently.
14. MUST BE EFFICIENT IN EXECUTION AND PLANNING.
Available Test Data: $basic_test_information
"""
Expand Down
49 changes: 49 additions & 0 deletions testzeus_hercules/core/agents/multimodal_base_nav_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from datetime import datetime
from string import Template
from typing import Any

from testzeus_hercules.utils.llm_helper import MultimodalConversableAgent

from testzeus_hercules.core.agents.base_nav_agent import BaseNavAgent
from testzeus_hercules.utils.logger import logger


class MultimodalBaseNavAgent(BaseNavAgent):
agent_name: str = "multimodal_base_nav_agent"
prompt = "Base Multimodal Agent"

def __init__(self, model_config_list, llm_config_params: dict[str, Any], system_prompt: str | None, nav_executor: Any, agent_name: str = None, agent_prompt: str | None = None) -> None:
"""
Initialize the MultimodalBaseNavAgent using MultimodalConversableAgent instead of ConversableAgent.
"""
self.nav_executor = nav_executor
user_ltm = self._BaseNavAgent__get_ltm()
agent_name = self.agent_name if agent_name is None else agent_name

system_message = agent_prompt or self.prompt
if system_prompt and len(system_prompt) > 0:
if isinstance(system_prompt, list):
system_message = "\n".join(system_prompt)
else:
system_message = system_prompt
logger.info(f"Using custom system prompt for MultimodalBaseNavAgent: {system_message}")

system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}"
if user_ltm:
user_ltm = "\n" + user_ltm
system_message = Template(system_message).substitute(basic_test_information=user_ltm)

logger.info(f"Nav agent {agent_name} using model: {model_config_list[0]['model']}")

# Use MultimodalConversableAgent instead of ConversableAgent
self.agent = MultimodalConversableAgent(
name=agent_name,
system_message=system_message,
llm_config={
"config_list": model_config_list,
**llm_config_params, # unpack all the name value pairs in llm_config_params as is
},
human_input_mode="NEVER"
)

self.register_tools()
44 changes: 44 additions & 0 deletions testzeus_hercules/core/extra_tools/browser_assist_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,47 @@ async def take_browser_screenshot(
except Exception as e:
logger.exception(f"Error taking screenshot: {e}")
return {"error": str(e)}


@tool(
agent_names=["browser_nav_agent"],
name="see_the_page",
description="give you the current screenshot of the browser view",
)
async def see_the_page() -> Annotated[str, "Path to of screenshot"]:
"""
Take and save a snapshot of the current browser view, overwriting previous snapshot.
Returns:
str: Path to saved screenshot
dict: Error message if something fails
"""
try:
# Get current screenshot
browser_manager = PlaywrightManager()
screenshot_stream = await browser_manager.get_latest_screenshot_stream()
if not screenshot_stream:
page = await browser_manager.get_current_page()
await browser_manager.take_screenshots("browser_snapshot", page)
screenshot_stream = await browser_manager.get_latest_screenshot_stream()

if not screenshot_stream:
return {"error": "Failed to capture current browser view"}

# Use log_files directory
screenshots_dir = os.path.join("log_files")
os.makedirs(screenshots_dir, exist_ok=True)

# Fixed filename that will be overwritten each time
screenshot_file = os.path.join(screenshots_dir, "current_page.png")

# Save the screenshot, overwriting if exists
screenshot = Image.open(screenshot_stream)
screenshot.save(screenshot_file)

logger.info(f"Page snapshot saved to: {screenshot_file}")
return screenshot_file

except Exception as e:
logger.exception(f"Error taking snapshot: {e}")
return {"error": str(e)}
3 changes: 2 additions & 1 deletion testzeus_hercules/core/extra_tools/visual_skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,9 @@ async def compare_visual_screenshot(
message = comparison_prompt.format(reference=reference_image_path, screenshot=screenshot_file)

logger.debug(f"Comparison prompt: {message}")
chat_response = await image_ex_user_proxy.a_initiate_chat(image_agent, message=message)

chat_response = await asyncio.to_thread(image_ex_user_proxy.initiate_chat, image_agent, message=message)
# chat_response = await asyncio.to_thread(image_ex_user_proxy.initiate_chat, image_agent, message=message)

last_message = None
for msg in reversed(chat_response.chat_history):
Expand Down
4 changes: 2 additions & 2 deletions testzeus_hercules/core/playwright_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
# Reference: https://github.com/microsoft/playwright/issues/28995
os.environ["PW_TEST_SCREENSHOT_NO_FONTS_READY"] = "1"

MAX_WAIT_PAGE_LOAD_TIME = 1
WAIT_FOR_NETWORK_IDLE = 5
MAX_WAIT_PAGE_LOAD_TIME = 0.6
WAIT_FOR_NETWORK_IDLE = 2
MIN_WAIT_PAGE_LOAD_TIME = 0.05

ALL_POSSIBLE_PERMISSIONS = [
Expand Down
5 changes: 3 additions & 2 deletions testzeus_hercules/core/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import time
from typing import Any

import aiofiles
from testzeus_hercules.config import get_global_conf
from testzeus_hercules.core.agents_llm_config import AgentsLLMConfig
from testzeus_hercules.core.playwright_manager import PlaywrightManager
Expand Down Expand Up @@ -140,15 +141,15 @@ async def save_planner_chat_messages(self) -> None:
res_output_thoughts_logs_di[key][idx]["content"] = res_content

if self.save_chat_logs_to_files:
with open(
async with aiofiles.open(
os.path.join(
get_global_conf().get_source_log_folder_path(self.stake_id),
"agent_inner_thoughts.json",
),
"w",
encoding="utf-8",
) as f:
json.dump(res_output_thoughts_logs_di, f, ensure_ascii=False, indent=4)
await f.write(json.dumps(res_output_thoughts_logs_di, ensure_ascii=False, indent=4))
logger.debug("Chat messages saved")
else:
logger.info(
Expand Down
8 changes: 0 additions & 8 deletions testzeus_hercules/core/simple_hercules.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,14 +367,6 @@ def __save_chat_log(self, sender: autogen.ConversableAgent, receiver: autogen.Co
with open(chat_logs_file, "w") as file:
json.dump(res_output_thoughts_logs_di, file, indent=4)

# def __save_chat_log(self, chat_log: list[dict[str, Any]]) -> None:
# if not self.save_chat_logs_to_files:
# logger.info("Nested chat logs", extra={"nested_chat_log": chat_log})
# else:
# chat_logs_file = os.path.join(self.get_chat_logs_dir() or "", f"nested_chat_log_{str(time_ns())}.json")
# # Save the chat log to a file
# with open(chat_logs_file, "w") as file:
# json.dump(chat_log, file, indent=4)

async def __initialize_agents(self) -> dict[str, autogen.ConversableAgent]:
"""
Expand Down
2 changes: 1 addition & 1 deletion testzeus_hercules/core/tools/accessibility_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@tool(
agent_names=["browser_nav_agent"],
description="Test the current page accessibility using Axe-core. This tool is used to check the accessibility of the page.",
description="Test the current page a11y accessibility using Axe-core. This tool is used to check only the a11y accessibility of the page.",
name="test_page_accessibility",
)
async def test_page_accessibility(
Expand Down
6 changes: 3 additions & 3 deletions testzeus_hercules/core/tools/enter_text_using_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,11 @@ async def do_entertext(page: Page, selector: str, text_to_enter: str, use_keyboa

if use_keyboard_fill:
await elem.focus()
await asyncio.sleep(0.05)
await asyncio.sleep(0.01)
await press_key_combination("Control+A")
await asyncio.sleep(0.05)
await asyncio.sleep(0.01)
await press_key_combination("Delete")
await asyncio.sleep(0.05)
await asyncio.sleep(0.01)
logger.debug(f"Focused element with selector {selector} to enter text")
await page.keyboard.type(text_to_enter, delay=1)
else:
Expand Down
Loading

0 comments on commit 0514037

Please sign in to comment.