diff --git a/nemo_skills/code_execution/sandbox.py b/nemo_skills/code_execution/sandbox.py index 178776e2a5..cc83b4e7bf 100644 --- a/nemo_skills/code_execution/sandbox.py +++ b/nemo_skills/code_execution/sandbox.py @@ -271,8 +271,11 @@ def _check_ready(self, timeout: float = 5.0) -> bool: except httpx.HTTPError: return False - def wait_for_sandbox(self, timeout: int = 5): - while not self._check_ready(timeout=timeout): + def wait_for_sandbox(self, wait_timeout: int = 240, http_timeout: int = 5): + start_time = time.time() + while not self._check_ready(timeout=http_timeout): + if time.time() - start_time >= wait_timeout: + raise RuntimeError(f"Sandbox at {self.host}:{self.port} did not start within {wait_timeout} seconds") time.sleep(1) diff --git a/nemo_skills/evaluation/evaluator/code.py b/nemo_skills/evaluation/evaluator/code.py index a49bea5514..49b1fb3261 100644 --- a/nemo_skills/evaluation/evaluator/code.py +++ b/nemo_skills/evaluation/evaluator/code.py @@ -53,7 +53,7 @@ def __init__(self, config: dict, num_parallel_requests: int = 12): f"max_output_characters={self.eval_config.max_output_characters}" ) self.sandbox = get_sandbox(self.eval_config.sandbox) - self.sandbox.wait_for_sandbox(50) + self.sandbox.wait_for_sandbox(wait_timeout=240) async def eval_single(self, data: dict): """Evaluate single code during generation."""