diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 421b0384b..c2cb4428f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,4 +6,4 @@ repos: args: ["--fix", "--show-fixes", "--output-format=full"] exclude: ^.*\.(ipynb)$|^verl/.*$ - id: ruff-format - exclude: ^verl/.*$ \ No newline at end of file + exclude: ^.*\.(ipynb)$|^verl/.*$ \ No newline at end of file diff --git a/examples/archive/sft/run_sft_model.py b/examples/archive/sft/run_sft_model.py index 961782d21..ca010ca16 100644 --- a/examples/archive/sft/run_sft_model.py +++ b/examples/archive/sft/run_sft_model.py @@ -30,7 +30,9 @@ agent_args = { "tools": ["python"], "parser_name": "qwen", - "system_prompt": ('You are an expert mathematician and programmer. Your goal is to solve challenging math problems, like those from the AIME competition, by breaking them down into logical steps and using Python code for calculations. Strive for clarity and efficiency.\n\nFollow this process for every problem:\n1. **Analyze the Problem**: Read the question carefully. Identify the key information, constraints, and what is being asked.\n2. **Think Step-by-Step**: In the `` block, outline your plan. Decompose the problem into the smallest, most logical steps. **You must not write code or perform calculations in this block.** Your goal is to create a plan that will be executed by the Python tool.\n3. **Write Python Code**: In the `` block, write an efficient Python script to execute your plan. The tool expects a JSON object with `name` and `arguments` keys. The `arguments` should be a dictionary with a single `code` key. Ensure the code is self-contained, runs quickly, and prints the final result.\n4. **State the Final Answer**: After receiving the ``, verify it. Then, state the final answer clearly and concisely in the format \\boxed{answer}.\n\nHere is an example:\nQuestion: What is the largest prime factor of 25! ?\nThe problem asks for the largest prime factor of 25 factorial. The largest prime factor of n! is the largest prime number less than or equal to n. In this case, n=25. I will write a Python script to find the largest prime number less than or equal to 25.\n\n{"name": "python", "arguments": {"code": "import math\\ndef is_prime(n):\\n if n <= 1:\\n return False\\n for i in range(2, int(math.sqrt(n)) + 1):\\n if n % i == 0:\\n return False\\n return True\\n\\ndef largest_prime_up_to(n):\\n for i in range(n, 1, -1):\\n if is_prime(i):\\n return i\\n return None\\n\\nprint(largest_prime_up_to(25))"}}\n\n\n23\n\nThe largest prime factor of 25! is the largest prime number less than or equal to 25. The answer is \\boxed{23}.'), + "system_prompt": ( + 'You are an expert mathematician and programmer. Your goal is to solve challenging math problems, like those from the AIME competition, by breaking them down into logical steps and using Python code for calculations. Strive for clarity and efficiency.\n\nFollow this process for every problem:\n1. **Analyze the Problem**: Read the question carefully. Identify the key information, constraints, and what is being asked.\n2. **Think Step-by-Step**: In the `` block, outline your plan. Decompose the problem into the smallest, most logical steps. **You must not write code or perform calculations in this block.** Your goal is to create a plan that will be executed by the Python tool.\n3. **Write Python Code**: In the `` block, write an efficient Python script to execute your plan. The tool expects a JSON object with `name` and `arguments` keys. The `arguments` should be a dictionary with a single `code` key. Ensure the code is self-contained, runs quickly, and prints the final result.\n4. **State the Final Answer**: After receiving the ``, verify it. Then, state the final answer clearly and concisely in the format \\boxed{answer}.\n\nHere is an example:\nQuestion: What is the largest prime factor of 25! ?\nThe problem asks for the largest prime factor of 25 factorial. The largest prime factor of n! is the largest prime number less than or equal to n. In this case, n=25. I will write a Python script to find the largest prime number less than or equal to 25.\n\n{"name": "python", "arguments": {"code": "import math\\ndef is_prime(n):\\n if n <= 1:\\n return False\\n for i in range(2, int(math.sqrt(n)) + 1):\\n if n % i == 0:\\n return False\\n return True\\n\\ndef largest_prime_up_to(n):\\n for i in range(n, 1, -1):\\n if is_prime(i):\\n return i\\n return None\\n\\nprint(largest_prime_up_to(25))"}}\n\n\n23\n\nThe largest prime factor of 25! is the largest prime number less than or equal to 25. The answer is \\boxed{23}.' + ), } env_args = { "tools": ["python"], diff --git a/examples/fully_async/deepresearch/refine_agent.py b/examples/fully_async/deepresearch/refine_agent.py index 1ffab4ece..a44bd43d4 100644 --- a/examples/fully_async/deepresearch/refine_agent.py +++ b/examples/fully_async/deepresearch/refine_agent.py @@ -70,7 +70,9 @@ async def end_request(self, success: bool, latency: float): # Log periodically if (self.total_completed + self.total_failed) % 50 == 0: avg_latency = self.total_latency / max(1, self.total_completed) - logger.info(f"[STATS] In-flight: {self.in_flight}, Completed: {self.total_completed}, Failed: {self.total_failed}, Latency(avg/min/max): {avg_latency:.2f}s/{self.min_latency:.2f}s/{self.max_latency:.2f}s") + logger.info( + f"[STATS] In-flight: {self.in_flight}, Completed: {self.total_completed}, Failed: {self.total_failed}, Latency(avg/min/max): {avg_latency:.2f}s/{self.min_latency:.2f}s/{self.max_latency:.2f}s" + ) async def get_stats(self) -> dict: async with self._lock: diff --git a/examples/fully_async/deepresearch/search_agent.py b/examples/fully_async/deepresearch/search_agent.py index 4ffed4a7f..139d9ff74 100644 --- a/examples/fully_async/deepresearch/search_agent.py +++ b/examples/fully_async/deepresearch/search_agent.py @@ -242,7 +242,28 @@ async def run(self, question): final_answer = extract_boxed_answer(content) # Aggregate metrics across all tool calls - aggregated_metrics = {"num_turns": num_turns, "total_parse_tool_args_error": sum(m.get("parse_tool_args_error", 0) for m in metrics), "total_tool_return_error": sum(m.get("tool_return_error", 0) for m in metrics), "total_tool_calls": sum(m.get("tool_calls", 0) for m in metrics), "total_tool_wait_time": sum(m.get("tool_wait_time", 0) for m in metrics), "total_refine_time": sum(m.get("refine_time", 0) for m in metrics), "avg_refine_time": sum(m.get("refine_time", 0) for m in metrics) / max(sum(m.get("tool_calls", 0) for m in metrics), 1), "total_query_length": sum(m.get("query_length", 0) for m in metrics), "avg_query_length": sum(m.get("query_length", 0) for m in metrics) / max(sum(m.get("tool_calls", 0) for m in metrics), 1), "total_generation_time": total_generation_time, "total_completion_tokens": total_completion_tokens, "total_tool_tokens": sum(m.get("tool_tokens", 0) for m in metrics), "avg_completion_tokens_per_turn": total_completion_tokens / max(num_turns, 1), "avg_tool_tokens_per_call": sum(m.get("tool_tokens", 0) for m in metrics) / max(sum(m.get("tool_calls", 0) for m in metrics), 1), "duplicate_search_detected": duplicate_search_detected, "excessive_parallel_calls": excessive_parallel_calls, "tool_error_detected": tool_error_detected, "refine_error_detected": refine_error_detected, "overlong": overlong, "merged_step": len(trajectory.merge())} + aggregated_metrics = { + "num_turns": num_turns, + "total_parse_tool_args_error": sum(m.get("parse_tool_args_error", 0) for m in metrics), + "total_tool_return_error": sum(m.get("tool_return_error", 0) for m in metrics), + "total_tool_calls": sum(m.get("tool_calls", 0) for m in metrics), + "total_tool_wait_time": sum(m.get("tool_wait_time", 0) for m in metrics), + "total_refine_time": sum(m.get("refine_time", 0) for m in metrics), + "avg_refine_time": sum(m.get("refine_time", 0) for m in metrics) / max(sum(m.get("tool_calls", 0) for m in metrics), 1), + "total_query_length": sum(m.get("query_length", 0) for m in metrics), + "avg_query_length": sum(m.get("query_length", 0) for m in metrics) / max(sum(m.get("tool_calls", 0) for m in metrics), 1), + "total_generation_time": total_generation_time, + "total_completion_tokens": total_completion_tokens, + "total_tool_tokens": sum(m.get("tool_tokens", 0) for m in metrics), + "avg_completion_tokens_per_turn": total_completion_tokens / max(num_turns, 1), + "avg_tool_tokens_per_call": sum(m.get("tool_tokens", 0) for m in metrics) / max(sum(m.get("tool_calls", 0) for m in metrics), 1), + "duplicate_search_detected": duplicate_search_detected, + "excessive_parallel_calls": excessive_parallel_calls, + "tool_error_detected": tool_error_detected, + "refine_error_detected": refine_error_detected, + "overlong": overlong, + "merged_step": len(trajectory.merge()), + } if OVERLONG_FILTER and overlong: for seq in trajectory.sequences: diff --git a/rllm-model-gateway/src/rllm_model_gateway/client.py b/rllm-model-gateway/src/rllm_model_gateway/client.py index 48292fe8d..b245c3c64 100644 --- a/rllm-model-gateway/src/rllm_model_gateway/client.py +++ b/rllm-model-gateway/src/rllm_model_gateway/client.py @@ -60,9 +60,7 @@ def get_session_info(self, session_id: str) -> dict[str, Any]: resp.raise_for_status() return resp.json() - def list_sessions( - self, since: float | None = None, limit: int | None = None - ) -> list[dict[str, Any]]: + def list_sessions(self, since: float | None = None, limit: int | None = None) -> list[dict[str, Any]]: params: dict[str, Any] = {} if since is not None: params["since"] = since @@ -90,9 +88,7 @@ def get_session_traces( params["since"] = since if limit is not None: params["limit"] = limit - resp = self._http.get( - f"{self.gateway_url}/sessions/{session_id}/traces", params=params - ) + resp = self._http.get(f"{self.gateway_url}/sessions/{session_id}/traces", params=params) resp.raise_for_status() data = resp.json() return [TraceRecord(**t) for t in data] @@ -188,9 +184,7 @@ async def get_session_info(self, session_id: str) -> dict[str, Any]: resp.raise_for_status() return resp.json() - async def list_sessions( - self, since: float | None = None, limit: int | None = None - ) -> list[dict[str, Any]]: + async def list_sessions(self, since: float | None = None, limit: int | None = None) -> list[dict[str, Any]]: params: dict[str, Any] = {} if since is not None: params["since"] = since @@ -218,9 +212,7 @@ async def get_session_traces( params["since"] = since if limit is not None: params["limit"] = limit - resp = await self._http.get( - f"{self.gateway_url}/sessions/{session_id}/traces", params=params - ) + resp = await self._http.get(f"{self.gateway_url}/sessions/{session_id}/traces", params=params) resp.raise_for_status() data = resp.json() return [TraceRecord(**t) for t in data] diff --git a/rllm-model-gateway/src/rllm_model_gateway/middleware.py b/rllm-model-gateway/src/rllm_model_gateway/middleware.py index 13cd2453e..6e428f52e 100644 --- a/rllm-model-gateway/src/rllm_model_gateway/middleware.py +++ b/rllm-model-gateway/src/rllm_model_gateway/middleware.py @@ -67,17 +67,13 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None: # Inject sampling parameters into POST request bodies (chat completions, etc.) method = scope.get("method", "").upper() - needs_injection = ( - self.add_logprobs or self.add_return_token_ids or self.sessions is not None - ) + needs_injection = self.add_logprobs or self.add_return_token_ids or self.sessions is not None if method == "POST" and needs_injection: await self._inject_params(scope, receive, send, session_id) else: await self.app(scope, receive, send) - async def _inject_params( - self, scope: Scope, receive: Receive, send: Send, session_id: str | None = None - ) -> None: + async def _inject_params(self, scope: Scope, receive: Receive, send: Send, session_id: str | None = None) -> None: """Read body, inject sampling params, then forward with mutated body.""" body_parts: list[bytes] = [] more = True @@ -94,9 +90,7 @@ async def _inject_params( # Record whether the client originally requested logprobs # so the proxy can strip them from the response if not. state = scope["state"] - state["originally_requested_logprobs"] = ( - "logprobs" in payload and payload["logprobs"] - ) + state["originally_requested_logprobs"] = "logprobs" in payload and payload["logprobs"] self._mutate(payload, session_id) raw = json.dumps(payload).encode("utf-8") except (json.JSONDecodeError, UnicodeDecodeError): diff --git a/rllm-model-gateway/src/rllm_model_gateway/server.py b/rllm-model-gateway/src/rllm_model_gateway/server.py index 74b3cb18b..0990ec3cd 100644 --- a/rllm-model-gateway/src/rllm_model_gateway/server.py +++ b/rllm-model-gateway/src/rllm_model_gateway/server.py @@ -359,10 +359,7 @@ def _load_config(args: argparse.Namespace) -> GatewayConfig: # Workers from CLI --worker flags (WorkerConfig validator auto-splits URLs) worker_urls = getattr(args, "worker", None) or [] if worker_urls: - data["workers"] = [ - {"url": raw_url, "worker_id": str(i)} - for i, raw_url in enumerate(worker_urls) - ] + data["workers"] = [{"url": raw_url, "worker_id": str(i)} for i, raw_url in enumerate(worker_urls)] return GatewayConfig(**data) @@ -373,9 +370,7 @@ def _load_config(args: argparse.Namespace) -> GatewayConfig: def main() -> None: - parser = argparse.ArgumentParser( - description="rllm-model-gateway: lightweight LLM call proxy for RL training" - ) + parser = argparse.ArgumentParser(description="rllm-model-gateway: lightweight LLM call proxy for RL training") parser.add_argument("--host", type=str, default=None) parser.add_argument("--port", type=int, default=None) parser.add_argument("--config", type=str, default=None, help="Path to YAML config") @@ -398,9 +393,7 @@ def main() -> None: import uvicorn - uvicorn.run( - app, host=config.host, port=config.port, log_level=config.log_level.lower() - ) + uvicorn.run(app, host=config.host, port=config.port, log_level=config.log_level.lower()) if __name__ == "__main__": diff --git a/rllm-model-gateway/src/rllm_model_gateway/session_manager.py b/rllm-model-gateway/src/rllm_model_gateway/session_manager.py index 72ee79a0f..7c9df28f0 100644 --- a/rllm-model-gateway/src/rllm_model_gateway/session_manager.py +++ b/rllm-model-gateway/src/rllm_model_gateway/session_manager.py @@ -23,9 +23,7 @@ def __init__(self, store: TraceStore) -> None: self._created_at: dict[str, float] = {} self._sampling_params: dict[str, dict[str, Any]] = {} - def ensure_session( - self, session_id: str, metadata: dict[str, Any] | None = None - ) -> str: + def ensure_session(self, session_id: str, metadata: dict[str, Any] | None = None) -> str: """Ensure a session exists (create if needed). Returns session_id.""" if session_id not in self._created_at: self._created_at[session_id] = time.time() diff --git a/rllm/agents/miniwob_agent.py b/rllm/agents/miniwob_agent.py index f77b7d31b..d944f6246 100644 --- a/rllm/agents/miniwob_agent.py +++ b/rllm/agents/miniwob_agent.py @@ -38,7 +38,17 @@ def image_to_jpg_base64_url(image: np.ndarray | Image.Image) -> str: class MiniWobAgent(BaseAgent): - def __init__(self, chat_mode: bool = False, use_html: bool = True, use_axtree: bool = True, use_screenshot: bool = False, use_accumulate_thinking: bool = True, cot_prompt: bool = False, use_full_conversation: bool = True, use_reward_shaping: bool = False): + def __init__( + self, + chat_mode: bool = False, + use_html: bool = True, + use_axtree: bool = True, + use_screenshot: bool = False, + use_accumulate_thinking: bool = True, + cot_prompt: bool = False, + use_full_conversation: bool = True, + use_reward_shaping: bool = False, + ): self.chat_mode: bool = chat_mode self.use_html: bool = use_html self.use_axtree: bool = use_axtree @@ -217,7 +227,12 @@ def get_user_msgs(self, user_obs) -> list[dict[str, str]]: user_msgs.append({"type": "text", "text": self._get_action_space_description()}) # Add next action prompt - user_msgs.append({"type": "text", "text": "# Next action\nThe task has not been completed yet. You will now think step by step and produce your next best action. Reflect on your past actions, any resulting error message, and the current state of the page before deciding on your next action. The content must be in the same format as shown before in the Action Space. You can plan ahead but only 1 immediate action is needed."}) + user_msgs.append( + { + "type": "text", + "text": "# Next action\nThe task has not been completed yet. You will now think step by step and produce your next best action. Reflect on your past actions, any resulting error message, and the current state of the page before deciding on your next action. The content must be in the same format as shown before in the Action Space. You can plan ahead but only 1 immediate action is needed.", + } + ) return user_msgs diff --git a/rllm/environments/tools/mcp_env.py b/rllm/environments/tools/mcp_env.py index bb6b18cd7..cd5b6c494 100644 --- a/rllm/environments/tools/mcp_env.py +++ b/rllm/environments/tools/mcp_env.py @@ -403,9 +403,7 @@ def _ensure_connection_managers(self) -> list[str]: existing_spec = MCPEnvironment._server_specs.get(server_name) if existing_spec is not None: if existing_spec != server_spec: - raise ValueError( - f"MCP server '{server_name}' is already initialized with a different configuration" - ) + raise ValueError(f"MCP server '{server_name}' is already initialized with a different configuration") continue manager = MCPConnectionManager( @@ -460,27 +458,18 @@ def _build_tool_routing(self) -> dict[str, str]: explicit_server_name = self.tool_name_to_server_name.get(public_tool_name) if explicit_server_name is not None: if explicit_server_name not in candidate_servers: - raise ValueError( - f"Tool '{public_tool_name}' is not provided by mapped MCP server '{explicit_server_name}'" - ) + raise ValueError(f"Tool '{public_tool_name}' is not provided by mapped MCP server '{explicit_server_name}'") resolved[public_tool_name] = explicit_server_name elif len(candidate_servers) == 1: resolved[public_tool_name] = next(iter(candidate_servers)) else: - raise ValueError( - f"Tool '{public_tool_name}' is provided by multiple MCP servers {sorted(candidate_servers)}. " - "Supply 'tool_name_to_server_name' to disambiguate." - ) + raise ValueError(f"Tool '{public_tool_name}' is provided by multiple MCP servers {sorted(candidate_servers)}. Supply 'tool_name_to_server_name' to disambiguate.") for public_tool_name, mapped_server_name in self.tool_name_to_server_name.items(): if mapped_server_name not in self.mcp_servers: - raise ValueError( - f"Tool mapping for '{public_tool_name}' references unknown MCP server '{mapped_server_name}'" - ) + raise ValueError(f"Tool mapping for '{public_tool_name}' references unknown MCP server '{mapped_server_name}'") if public_tool_name not in discovered_tool_servers: - raise ValueError( - f"Tool mapping for '{public_tool_name}' does not match any discovered tool on the configured MCP servers" - ) + raise ValueError(f"Tool mapping for '{public_tool_name}' does not match any discovered tool on the configured MCP servers") return resolved diff --git a/rllm/experimental/engine/remote_runtime/agentcore_runtime.py b/rllm/experimental/engine/remote_runtime/agentcore_runtime.py index 744114018..136f2a463 100644 --- a/rllm/experimental/engine/remote_runtime/agentcore_runtime.py +++ b/rllm/experimental/engine/remote_runtime/agentcore_runtime.py @@ -112,9 +112,7 @@ async def _run_one(self, sub: TaskSubmission, timeout: float) -> RemoteTaskResul raw_result=result, ) - async def execute_tasks( - self, submissions: list[TaskSubmission], timeout: float | None = None - ) -> list[RemoteTaskResult]: + async def execute_tasks(self, submissions: list[TaskSubmission], timeout: float | None = None) -> list[RemoteTaskResult]: """Submit all tasks concurrently via asyncio.gather. Each task invokes then polls in sequence; all tasks run in parallel. diff --git a/rllm/experimental/engine/remote_runtime/protocol.py b/rllm/experimental/engine/remote_runtime/protocol.py index b90399cd2..e00e3b8d3 100644 --- a/rllm/experimental/engine/remote_runtime/protocol.py +++ b/rllm/experimental/engine/remote_runtime/protocol.py @@ -48,9 +48,7 @@ def initialize(self) -> None: """Client setup from config.""" ... - async def execute_tasks( - self, submissions: list[TaskSubmission], timeout: float | None = None - ) -> list[RemoteTaskResult]: + async def execute_tasks(self, submissions: list[TaskSubmission], timeout: float | None = None) -> list[RemoteTaskResult]: """Submit tasks concurrently and gather results. Returns one result per submission.""" ... diff --git a/rllm/experimental/test_examples/opsd/math_opsd_workflow.py b/rllm/experimental/test_examples/opsd/math_opsd_workflow.py index 8f9488591..25852b76e 100644 --- a/rllm/experimental/test_examples/opsd/math_opsd_workflow.py +++ b/rllm/experimental/test_examples/opsd/math_opsd_workflow.py @@ -26,7 +26,13 @@ async def run(self, task: dict, uid: str, **kwargs) -> Episode: self.reset(task, uid) student_prompt = f"Problem: {task['question']}" - teacher_prompt = student_prompt + "\n\n" + f"Here is a reference solution:\n\n{task['ground_truth']}" + "\n\n" + "After understanding the reference solution, please try to solve this problem using your own approach below:" + teacher_prompt = ( + student_prompt + + "\n\n" + + f"Here is a reference solution:\n\n{task['ground_truth']}" + + "\n\n" + + "After understanding the reference solution, please try to solve this problem using your own approach below:" + ) student_messages = [{"role": "user", "content": student_prompt}] teacher_messages = [{"role": "user", "content": teacher_prompt}] diff --git a/rllm/rewards/code_utils/pyext2.py b/rllm/rewards/code_utils/pyext2.py index 5e33119f7..f950f9a58 100644 --- a/rllm/rewards/code_utils/pyext2.py +++ b/rllm/rewards/code_utils/pyext2.py @@ -2,7 +2,23 @@ __version__ = "0.7" -__all__ = ["overload", "RuntimeModule", "switch", "tail_recurse", "copyfunc", "set_docstring", "annotate", "safe_unpack", "modify_function", "assign", "fannotate", "compare_and_swap", "is_main", "call_if_main", "run_main"] +__all__ = [ + "overload", + "RuntimeModule", + "switch", + "tail_recurse", + "copyfunc", + "set_docstring", + "annotate", + "safe_unpack", + "modify_function", + "assign", + "fannotate", + "compare_and_swap", + "is_main", + "call_if_main", + "run_main", +] import inspect import sys diff --git a/rllm/rewards/code_utils/swebench.py b/rllm/rewards/code_utils/swebench.py index c0716e162..160d732a3 100644 --- a/rllm/rewards/code_utils/swebench.py +++ b/rllm/rewards/code_utils/swebench.py @@ -54,7 +54,21 @@ run_threadpool, ) -from rllm.globals import CACHE_LEVEL, CLEAN, FORCE_REBUILD, INSTANCE_IMAGE_TAG, MAX_WORKERS, MODEL_NAME_OR_PATH, NAMESPACE, OPEN_FILE_LIMIT, REPORT_DIR, REWRITE_REPORTS, SPLIT, SWEBENCH_DATASET_NAME, TIMEOUT +from rllm.globals import ( + CACHE_LEVEL, + CLEAN, + FORCE_REBUILD, + INSTANCE_IMAGE_TAG, + MAX_WORKERS, + MODEL_NAME_OR_PATH, + NAMESPACE, + OPEN_FILE_LIMIT, + REPORT_DIR, + REWRITE_REPORTS, + SPLIT, + SWEBENCH_DATASET_NAME, + TIMEOUT, +) GIT_APPLY_CMDS = [ "git apply --verbose", @@ -627,7 +641,23 @@ def swebench_check_correctness( run_id = uuid.uuid4().hex instance_ids = [instance_id] - eval_report_path = run_evaluation(SWEBENCH_DATASET_NAME, instance_ids, actions, MAX_WORKERS, FORCE_REBUILD, CACHE_LEVEL, CLEAN, OPEN_FILE_LIMIT, run_id, TIMEOUT, NAMESPACE, REWRITE_REPORTS, SPLIT, INSTANCE_IMAGE_TAG, REPORT_DIR) + eval_report_path = run_evaluation( + SWEBENCH_DATASET_NAME, + instance_ids, + actions, + MAX_WORKERS, + FORCE_REBUILD, + CACHE_LEVEL, + CLEAN, + OPEN_FILE_LIMIT, + run_id, + TIMEOUT, + NAMESPACE, + REWRITE_REPORTS, + SPLIT, + INSTANCE_IMAGE_TAG, + REPORT_DIR, + ) # read from eval report and get the correct/incorrect stats for reward calculation with open(eval_report_path) as f: diff --git a/rllm/rewards/code_utils/taco.py b/rllm/rewards/code_utils/taco.py index 1a62df8a6..a1cdb0ffb 100644 --- a/rllm/rewards/code_utils/taco.py +++ b/rllm/rewards/code_utils/taco.py @@ -382,7 +382,15 @@ def execute_std_code(method, synthesized_code, inputs_list, outputs_list, timeou temp_file_name = temp_input.name stdout, stderr = "", "" try: - result = subprocess.run(["bash", "-c", "ulimit -v 10485760; python3 " + temp_program_path], stdin=temp_input, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid, timeout=timeout, text=True) + result = subprocess.run( + ["bash", "-c", "ulimit -v 10485760; python3 " + temp_program_path], + stdin=temp_input, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + preexec_fn=os.setsid, + timeout=timeout, + text=True, + ) stdout, stderr = result.stdout, result.stderr return_code = result.returncode diff --git a/rllm/rewards/math_reward.py b/rllm/rewards/math_reward.py index 4a4c17bbb..ee4ad0b75 100644 --- a/rllm/rewards/math_reward.py +++ b/rllm/rewards/math_reward.py @@ -130,7 +130,9 @@ def rllm_reward_fn_math(data_source: str, llm_solution: str, ground_truth: str | reward = RewardMathFn(RewardConfig()) task_info = { "data_source": "", - "problem": ("Let $P(x)=x^{4}+2 x^{3}-13 x^{2}-14 x+24$ be a polynomial with roots $r_{1}, r_{2}, r_{3}, r_{4}$. Let $Q$ be the quartic polynomial with roots $r_{1}^{2}, r_{2}^{2}, r_{3}^{2}, r_{4}^{2}$, such that the coefficient of the $x^{4}$ term of $Q$ is 1. Simplify the quotient $Q\\left(x^{2}\\right) / P(x)$, leaving your answer in terms of $x$. (You may assume that $x$ is not equal to any of $\\left.r_{1}, r_{2}, r_{3}, r_{4}\\right)$."), + "problem": ( + "Let $P(x)=x^{4}+2 x^{3}-13 x^{2}-14 x+24$ be a polynomial with roots $r_{1}, r_{2}, r_{3}, r_{4}$. Let $Q$ be the quartic polynomial with roots $r_{1}^{2}, r_{2}^{2}, r_{3}^{2}, r_{4}^{2}$, such that the coefficient of the $x^{4}$ term of $Q$ is 1. Simplify the quotient $Q\\left(x^{2}\\right) / P(x)$, leaving your answer in terms of $x$. (You may assume that $x$ is not equal to any of $\\left.r_{1}, r_{2}, r_{3}, r_{4}\\right)$." + ), "problem_type": RewardType.MATH, "ground_truth": ["10", "$x^{4}-2 x^{3}-13 x^{2}+14 x+24$"], "has_toolcall": True, diff --git a/rllm/system_prompts.py b/rllm/system_prompts.py index f5d108675..aa4fe9eb3 100644 --- a/rllm/system_prompts.py +++ b/rllm/system_prompts.py @@ -269,7 +269,9 @@ - IF THE PROBLEM DOESNT HAVE MULTIPLE CHOICE, OUTPUT 'NO MULTIPLE CHOICE'.""" -LCB_SYSTEM_MESSAGE_GENERIC = "You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests." +LCB_SYSTEM_MESSAGE_GENERIC = ( + "You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests." +) LCB_FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters." diff --git a/tests/agents/test_appworld_agent.py b/tests/agents/test_appworld_agent.py index 0c1faa44b..e54fbb25b 100644 --- a/tests/agents/test_appworld_agent.py +++ b/tests/agents/test_appworld_agent.py @@ -53,7 +53,11 @@ def test_update_from_env_initial_task(self): """Test updating environment with initial task observation""" agent = AppWorldReactAgent() - observation = {"instruction": "How many playlists do I have in Spotify?", "user_info": {"first_name": "Test", "last_name": "User", "email": "test@example.com", "phone_number": "+1234567890"}, "app_descriptions": "spotify: Music streaming app\nsupervisor: User management"} + observation = { + "instruction": "How many playlists do I have in Spotify?", + "user_info": {"first_name": "Test", "last_name": "User", "email": "test@example.com", "phone_number": "+1234567890"}, + "app_descriptions": "spotify: Music streaming app\nsupervisor: User management", + } agent.update_from_env(observation, 0.0, False, {}) @@ -247,7 +251,11 @@ def test_basic_interaction_flow(self): agent = AppWorldReactAgent() # Step 1: Receive initial task - task_observation = {"instruction": "How many playlists do I have in Spotify?", "user_info": {"first_name": "Test", "last_name": "User", "email": "test@example.com", "phone_number": "+1234567890"}, "app_descriptions": "spotify: Music streaming app"} + task_observation = { + "instruction": "How many playlists do I have in Spotify?", + "user_info": {"first_name": "Test", "last_name": "User", "email": "test@example.com", "phone_number": "+1234567890"}, + "app_descriptions": "spotify: Music streaming app", + } agent.update_from_env(task_observation, 0.0, False, {}) assert agent.initialized is True diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c4879a5a6..7b10c65a3 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -18,4 +18,4 @@ requires_tinker = pytest.mark.skipif( not TINKER_API_KEY, reason="TINKER_API_KEY env var required", -) \ No newline at end of file +) diff --git a/tests/integration/test_agentcore_runtime.py b/tests/integration/test_agentcore_runtime.py index 2bcab0ae7..fa34a122f 100644 --- a/tests/integration/test_agentcore_runtime.py +++ b/tests/integration/test_agentcore_runtime.py @@ -47,7 +47,9 @@ async def test_single_task(self): """Submit a GSM8K problem, verify success=True, reward is not None.""" runtime = _make_runtime() sub = _make_submission( - prompt=("Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?"), + prompt=( + "Toula went to the bakery and bought various types of pastries. She bought 3 dozen donuts which cost $68 per dozen, 2 dozen mini cupcakes which cost $80 per dozen, and 6 dozen mini cheesecakes for $55 per dozen. How much was the total cost?" + ), answer="694", ) diff --git a/tests/rewards/test_code_reward.py b/tests/rewards/test_code_reward.py index 47ccbf91f..00386b1c2 100644 --- a/tests/rewards/test_code_reward.py +++ b/tests/rewards/test_code_reward.py @@ -358,7 +358,9 @@ def test_reward_leetcode(self): class Solution:\n def minOperations(self, nums: List[int], k: int) -> int:\n is_added = [False] * k\n count = 0\n n = len(nums)\n for i in range(n - 1, -1, -1):\n if nums[i] > k or is_added[nums[i] - 1]:\n continue\n is_added[nums[i] - 1] = True\n count += 1\n if count == k:\n return n - i\n ``` """ - tests = {"functional": "def check(candidate):\n assert candidate(nums = [3,1,5,4,2], k = 2) == 4\n assert candidate(nums = [3,1,5,4,2], k = 5) == 5\n assert candidate(nums = [3,2,5,3,1], k = 3) == 4\n\n\ncheck(Solution().minOperations)"} + tests = { + "functional": "def check(candidate):\n assert candidate(nums = [3,1,5,4,2], k = 2) == 4\n assert candidate(nums = [3,1,5,4,2], k = 5) == 5\n assert candidate(nums = [3,2,5,3,1], k = 3) == 4\n\n\ncheck(Solution().minOperations)" + } reward = RewardCodeFn(RewardConfig()) task_info = {"problem": "", "problem_type": RewardType.CODE, "data_source": "leetcode", "ground_truth": tests} output = reward(task_info, model_response) @@ -373,7 +375,9 @@ def test_reward_leetcode_format_error(self): Here is my bad response, it is not in markdown oops class Solution:\n def minOperations(self, nums: List[int], k: int) -> int:\n is_added = [False] * k\n count = 0\n n = len(nums)\n for i in range(n - 1, -1, -1):\n if nums[i] > k or is_added[nums[i] - 1]:\n continue\n is_added[nums[i] - 1] = True\n count += 1\n if count == k:\n return n - i\n """ - tests = {"functional": "def check(candidate):\n assert candidate(nums = [3,1,5,4,2], k = 2) == 4\n assert candidate(nums = [3,1,5,4,2], k = 5) == 5\n assert candidate(nums = [3,2,5,3,1], k = 3) == 4\n\n\ncheck(Solution().minOperations)"} + tests = { + "functional": "def check(candidate):\n assert candidate(nums = [3,1,5,4,2], k = 2) == 4\n assert candidate(nums = [3,1,5,4,2], k = 5) == 5\n assert candidate(nums = [3,2,5,3,1], k = 3) == 4\n\n\ncheck(Solution().minOperations)" + } reward = RewardCodeFn(RewardConfig()) task_info = {"problem": "", "problem_type": RewardType.CODE, "data_source": "leetcode", "ground_truth": tests} output = reward(task_info, model_response)