NVIDIA-NeMo · i-vainn · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/nemo_skills/code_execution/sandbox.py b/nemo_skills/code_execution/sandbox.py
@@ -271,8 +271,11 @@ def _check_ready(self, timeout: float = 5.0) -> bool:
         except httpx.HTTPError:
             return False
 
-    def wait_for_sandbox(self, timeout: int = 5):
-        while not self._check_ready(timeout=timeout):
+    def wait_for_sandbox(self, wait_timeout: int = 240, http_timeout: int = 5):
+        start_time = time.time()
+        while not self._check_ready(timeout=http_timeout):
+            if time.time() - start_time >= wait_timeout:
+                raise RuntimeError(f"Sandbox at {self.host}:{self.port} did not start within {wait_timeout} seconds")
             time.sleep(1)
 
 

diff --git a/nemo_skills/evaluation/evaluator/code.py b/nemo_skills/evaluation/evaluator/code.py
@@ -53,7 +53,7 @@ def __init__(self, config: dict, num_parallel_requests: int = 12):
             f"max_output_characters={self.eval_config.max_output_characters}"
         )
         self.sandbox = get_sandbox(self.eval_config.sandbox)
-        self.sandbox.wait_for_sandbox(50)
+        self.sandbox.wait_for_sandbox(wait_timeout=240)
 
     async def eval_single(self, data: dict):
         """Evaluate single code during generation."""