From 1bd802151012d4aef7d4b0d18c1196d3bfc21f3b Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Mon, 5 May 2025 14:21:09 -0400 Subject: [PATCH] fix(robot-server): fail proto load gracefully "Loading" a protocol, which is what we generically call the process to do everything required to get it to run, can now fail. We didn't think about this failure mode, and if it happened then we would have created some but not all of our run tracking resources, and our API would return inconsistent results (which is the bug). The orchestrator store would save the orchestrator object in a member, which then means that it would have a "current run", but since we didn't link resource allocation (or at least binding) and initialization, the exception raised after that happens would mean it never went into the database. The immediate fix is to not bind the orchestrator object to the _orchestrator data member until right before we return, after we do all the possibly-failing orchestration. The thing that would make this fail is to have a python protocol that was syntactically correct (passes ast.parse()) but did something at the file scope that would cause an error (like an import failure - a division by 0 would work too). This will fail when we exec the protocol to check for the presence of runtime parameters, which happens during the execution of this function, triggering the bug. Closes EXEC-1450 --- .../runs/run_orchestrator_store.py | 12 ++-- .../tests/runs/test_run_orchestrator_store.py | 59 +++++++++++++++---- 2 files changed, 55 insertions(+), 16 deletions(-) diff --git a/robot-server/robot_server/runs/run_orchestrator_store.py b/robot-server/robot_server/runs/run_orchestrator_store.py index 38a6c2e23c61..1033be8b6e75 100644 --- a/robot-server/robot_server/runs/run_orchestrator_store.py +++ b/robot-server/robot_server/runs/run_orchestrator_store.py @@ -245,7 +245,7 @@ async def create( notify_publishers=notify_publishers, ) - self._run_orchestrator = RunOrchestrator.build_orchestrator( + orchestrator = RunOrchestrator.build_orchestrator( run_id=run_id, protocol_engine=engine, hardware_api=self._hardware_api, @@ -257,19 +257,21 @@ async def create( # they will both "succeed" (with undefined results) instead of one # raising RunConflictError. if protocol: - await self.run_orchestrator.load( + await orchestrator.load( protocol.source, run_time_param_values=run_time_param_values, run_time_param_paths=run_time_param_paths, parse_mode=ParseMode.ALLOW_LEGACY_METADATA_AND_REQUIREMENTS, ) else: - self.run_orchestrator.prepare() + orchestrator.prepare() for offset in labware_offsets: - self.run_orchestrator.add_labware_offset(offset) + orchestrator.add_labware_offset(offset) - return self.run_orchestrator.get_state_summary() + summary = orchestrator.get_state_summary() + self._run_orchestrator = orchestrator + return summary async def clear(self) -> RunResult: """Remove the current run orchestrator. diff --git a/robot-server/tests/runs/test_run_orchestrator_store.py b/robot-server/tests/runs/test_run_orchestrator_store.py index a8564cece993..a1412922d492 100644 --- a/robot-server/tests/runs/test_run_orchestrator_store.py +++ b/robot-server/tests/runs/test_run_orchestrator_store.py @@ -1,10 +1,11 @@ """Tests for the EngineStore interface.""" from datetime import datetime +from textwrap import dedent +from pathlib import Path import pytest from decoy import Decoy, matchers -from opentrons_shared_data import get_shared_data_root from opentrons_shared_data.robot.types import RobotType from opentrons.protocol_engine.error_recovery_policy import never_recover @@ -17,7 +18,8 @@ types as pe_types, ) from opentrons.protocol_runner import RunResult, RunOrchestrator -from opentrons.protocol_reader import ProtocolReader, ProtocolSource +from opentrons.protocol_reader import ProtocolReader +from opentrons.protocol_engine.resources import FileProvider from robot_server.runs.run_orchestrator_store import ( RunOrchestratorStore, @@ -25,7 +27,8 @@ NoRunOrchestrator, handle_estop_event, ) -from opentrons.protocol_engine.resources import FileProvider +from robot_server.protocols.protocol_store import ProtocolResource +from robot_server.protocols.protocol_models import ProtocolKind def mock_notify_publishers() -> None: @@ -48,12 +51,31 @@ async def subject( @pytest.fixture -async def json_protocol_source() -> ProtocolSource: - """Get a protocol source fixture.""" - simple_protocol = ( - get_shared_data_root() / "protocol" / "fixtures" / "6" / "simpleV6.json" +async def bad_python_protocol_source(tmp_path: Path) -> ProtocolResource: + """Get a protocol source for a bad python protocol.""" + with open(tmp_path / "bad_protocol.py", "w") as proto: + proto.write( + dedent( + """ + requirements = {'apiLevel': '2.20', 'robotType': 'Flex'} + a = 1/0 + + def run(ctx): + pass + """ + ) + ) + return ProtocolResource( + protocol_id="protocol-id", + created_at=datetime.now(), + source=( + await ProtocolReader().read_saved( + files=[tmp_path / "bad_protocol.py"], directory=None + ) + ), + protocol_kind=ProtocolKind.STANDARD, + protocol_key="some-name", ) - return await ProtocolReader().read_saved(files=[simple_protocol], directory=None) async def test_create_engine(decoy: Decoy, subject: RunOrchestratorStore) -> None: @@ -164,6 +186,23 @@ async def test_archives_state_if_engine_already_exists( assert subject.current_run_id == "run-id-1" +async def test_create_does_not_store_orchestrator_on_load_failure( + subject: RunOrchestratorStore, bad_python_protocol_source: ProtocolResource +) -> None: + """It should not store an orchestrator unless it could be loaded.""" + with pytest.raises(ZeroDivisionError): + await subject.create( + run_id="run-id", + labware_offsets=[], + initial_error_recovery_policy=never_recover, + deck_configuration=[], + protocol=bad_python_protocol_source, + file_provider=FileProvider(), + notify_publishers=mock_notify_publishers, + ) + assert subject.current_run_id is None + + async def test_clear_engine(subject: RunOrchestratorStore) -> None: """It should clear a stored engine entry.""" await subject.create( @@ -185,9 +224,7 @@ async def test_clear_engine(subject: RunOrchestratorStore) -> None: subject.run_orchestrator -async def test_clear_engine_not_stopped_or_idle( - subject: RunOrchestratorStore, json_protocol_source: ProtocolSource -) -> None: +async def test_clear_engine_not_stopped_or_idle(subject: RunOrchestratorStore) -> None: """It should raise a conflict if the engine is not stopped.""" await subject.create( run_id="run-id",