From 22694a89abe0ed19fc3da64ce23861a0dcec9415 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Fri, 22 May 2026 14:22:38 +0200 Subject: [PATCH 01/17] feat: flight-recorder frame store, cockpit + steering services Persistence: append-only FlightRecorderFrame repo (sqlite+postgres, new yoyo revision), wired into both backends + fake. Engine: pluggable recorder sink records per-turn frames after each agent run (off the hot path, fail-soft). Services: FlightRecorderService (frame-authoritative get/seek), CockpitService (live activity + stuck/runaway heuristics), SafeDefaultSteeringDirective (hint/redirect via INFO_REQUEST interrupt). Adds InterventionKind enum, cockpit settings namespace + events module. --- src/synthorg/core/enums.py | 16 ++ src/synthorg/engine/agent_engine.py | 37 +++ src/synthorg/engine/cockpit/__init__.py | 13 ++ src/synthorg/engine/cockpit/service.py | 173 ++++++++++++++ .../engine/flight_recording/__init__.py | 23 ++ .../engine/flight_recording/service.py | 104 +++++++++ src/synthorg/engine/flight_recording/sink.py | 181 +++++++++++++++ src/synthorg/engine/intervention/__init__.py | 15 ++ src/synthorg/engine/intervention/steering.py | 142 ++++++++++++ src/synthorg/observability/events/cockpit.py | 21 ++ .../observability/events/persistence.py | 15 ++ .../persistence/flight_recorder_protocol.py | 141 ++++++++++++ src/synthorg/persistence/postgres/backend.py | 17 ++ .../postgres/flight_recorder_repo.py | 196 ++++++++++++++++ .../20260522000002_flight_recorder.sql | 24 ++ src/synthorg/persistence/protocol.py | 8 + src/synthorg/persistence/sqlite/__init__.py | 4 + .../persistence/sqlite/_backend_accessors.py | 12 + src/synthorg/persistence/sqlite/backend.py | 9 + .../sqlite/flight_recorder_repo.py | 206 +++++++++++++++++ .../20260522000002_flight_recorder.sql | 24 ++ src/synthorg/settings/definitions/__init__.py | 2 + src/synthorg/settings/definitions/cockpit.py | 154 +++++++++++++ src/synthorg/settings/enums.py | 1 + src/synthorg/workers/runtime_builder.py | 35 +++ .../test_flight_recorder_repository.py | 162 ++++++++++++++ tests/unit/api/fakes.py | 51 +++++ tests/unit/api/fakes_backend.py | 6 + tests/unit/engine/cockpit/test_service.py | 130 +++++++++++ .../engine/flight_recording/test_service.py | 65 ++++++ .../unit/engine/flight_recording/test_sink.py | 211 ++++++++++++++++++ .../unit/engine/intervention/test_steering.py | 82 +++++++ web/src/api/types/enum-values.gen.ts | 2 +- web/src/api/types/openapi.gen.ts | 4 +- 34 files changed, 2283 insertions(+), 3 deletions(-) create mode 100644 src/synthorg/engine/cockpit/__init__.py create mode 100644 src/synthorg/engine/cockpit/service.py create mode 100644 src/synthorg/engine/flight_recording/__init__.py create mode 100644 src/synthorg/engine/flight_recording/service.py create mode 100644 src/synthorg/engine/flight_recording/sink.py create mode 100644 src/synthorg/engine/intervention/__init__.py create mode 100644 src/synthorg/engine/intervention/steering.py create mode 100644 src/synthorg/observability/events/cockpit.py create mode 100644 src/synthorg/persistence/flight_recorder_protocol.py create mode 100644 src/synthorg/persistence/postgres/flight_recorder_repo.py create mode 100644 src/synthorg/persistence/postgres/revisions/20260522000002_flight_recorder.sql create mode 100644 src/synthorg/persistence/sqlite/flight_recorder_repo.py create mode 100644 src/synthorg/persistence/sqlite/revisions/20260522000002_flight_recorder.sql create mode 100644 src/synthorg/settings/definitions/cockpit.py create mode 100644 tests/conformance/persistence/test_flight_recorder_repository.py create mode 100644 tests/unit/engine/cockpit/test_service.py create mode 100644 tests/unit/engine/flight_recording/test_service.py create mode 100644 tests/unit/engine/flight_recording/test_sink.py create mode 100644 tests/unit/engine/intervention/test_steering.py diff --git a/src/synthorg/core/enums.py b/src/synthorg/core/enums.py index f7f15fba3d..ec2f1d301f 100644 --- a/src/synthorg/core/enums.py +++ b/src/synthorg/core/enums.py @@ -995,3 +995,19 @@ class TaskSource(StrEnum): INTERNAL = "internal" CLIENT = "client" SIMULATION = "simulation" + + +class InterventionKind(StrEnum): + """Operator intervention applied from the mission-control cockpit. + + PAUSE and KILL reuse the task lifecycle seams (transition to + ``INTERRUPTED`` / cancel to ``CANCELLED``). HINT and REDIRECT route + through the steering directive: both post an ``INFO_REQUEST`` + interrupt the engine consumes at the next safe turn boundary, so the + operator's text reaches the running agent without corrupting state. + """ + + PAUSE = "pause" + KILL = "kill" + HINT = "hint" + REDIRECT = "redirect" diff --git a/src/synthorg/engine/agent_engine.py b/src/synthorg/engine/agent_engine.py index 2961cf971a..23e55a97f1 100644 --- a/src/synthorg/engine/agent_engine.py +++ b/src/synthorg/engine/agent_engine.py @@ -72,10 +72,12 @@ ) from synthorg.engine.coordination.models import CoordinationContext from synthorg.engine.coordination.service import MultiAgentCoordinator + from synthorg.engine.flight_recording import FlightRecorderSink from synthorg.engine.hybrid_models import HybridLoopConfig from synthorg.engine.loop_protocol import ( BudgetChecker, ExecutionLoop, + ExecutionResult, ShutdownChecker, ) from synthorg.engine.mcp_self_consumer import MCPSelfConsumerProvider @@ -202,10 +204,12 @@ def __init__( # noqa: PLR0913, PLR0915 approval_interrupt_timeout_seconds: float | None = None, external_api_runtime: ExternalApiRuntime | None = None, stakes_router: StakesRouter | None = None, + flight_recorder_sink: FlightRecorderSink | None = None, clock: Clock | None = None, ) -> None: self._agent_middleware_chain = agent_middleware_chain self._event_reader = event_reader + self._flight_recorder_sink = flight_recorder_sink self._clock: Clock = clock if clock is not None else SystemClock() self._event_stream_hub = event_stream_hub self._interrupt_store = interrupt_store @@ -635,6 +639,12 @@ async def _execute( # noqa: PLR0913 project_id=task.project, ) + await self._record_flight_frames( + execution_result, + agent_id=agent_id, + task_id=task_id, + ) + # Read from the post-execution context: ``ctx`` is the # pre-loop snapshot and copy-on-write contexts inside the # loop don't mutate it, so logging ``ctx.turn_count`` here @@ -650,3 +660,30 @@ async def _execute( # noqa: PLR0913 agent_id, task_id, ) + + async def _record_flight_frames( + self, + execution_result: ExecutionResult, + *, + agent_id: str, + task_id: str, + ) -> None: + """Record flight-recorder frames for a finished run (best-effort). + + Runs after the loop has completed, so it is off the per-turn hot + path; the sink itself swallows storage faults so a recording + failure never turns a successful run into a failed one. + """ + if self._flight_recorder_sink is None: + return + from synthorg.engine.flight_recording import build_frames # noqa: PLC0415 + + frames = build_frames( + execution_result, + execution_id=execution_result.context.execution_id, + agent_id=agent_id, + task_id=task_id, + clock=self._clock, + ) + if frames: + await self._flight_recorder_sink.record_frames(frames) diff --git a/src/synthorg/engine/cockpit/__init__.py b/src/synthorg/engine/cockpit/__init__.py new file mode 100644 index 0000000000..c40ea0e012 --- /dev/null +++ b/src/synthorg/engine/cockpit/__init__.py @@ -0,0 +1,13 @@ +"""Mission-control cockpit: live org-activity aggregation.""" + +from synthorg.engine.cockpit.service import ( + AgentActivity, + CockpitService, + LiveActivitySnapshot, +) + +__all__ = [ + "AgentActivity", + "CockpitService", + "LiveActivitySnapshot", +] diff --git a/src/synthorg/engine/cockpit/service.py b/src/synthorg/engine/cockpit/service.py new file mode 100644 index 0000000000..93093e37fe --- /dev/null +++ b/src/synthorg/engine/cockpit/service.py @@ -0,0 +1,173 @@ +"""Live org-activity aggregation for the mission-control cockpit. + +Builds a :class:`LiveActivitySnapshot` of in-flight work: who is working +on what, accumulated cost per agent, and stuck / runaway flags derived +from operator-tuned thresholds. Activity and idle time come from the +flight-recorder frames; cost comes from the cost tracker when wired. +""" + +from datetime import timedelta +from typing import TYPE_CHECKING, Final + +from pydantic import AwareDatetime, BaseModel, ConfigDict, Field + +from synthorg.core.clock import Clock, SystemClock +from synthorg.core.enums import TaskStatus +from synthorg.core.types import NotBlankStr +from synthorg.observability import get_logger +from synthorg.observability.events.cockpit import ( + COCKPIT_RUNAWAY_DETECTED, + COCKPIT_SNAPSHOT_PUBLISHED, + COCKPIT_STUCK_DETECTED, +) +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrameFilterSpec, +) + +if TYPE_CHECKING: + from synthorg.core.task import Task + from synthorg.engine.task_engine import TaskEngine + from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrameRepository, + ) + from synthorg.settings.resolver import ConfigResolver + +logger = get_logger(__name__) + +_COCKPIT_NS: Final[str] = "cockpit" +_STUCK_KEY: Final[str] = "stuck_idle_threshold_minutes" +_RUNAWAY_KEY: Final[str] = "runaway_cost_threshold_percent" +_PERCENT_DIVISOR: Final[float] = 100.0 +_ACTIVE_STATUSES: Final[tuple[TaskStatus, ...]] = ( + TaskStatus.IN_PROGRESS, + TaskStatus.BLOCKED, +) +#: Bounded page when summing cost from frames without a cost tracker. +_FRAME_COST_PAGE: Final[int] = 1000 + + +class AgentActivity(BaseModel): + """One agent's in-flight activity in the live snapshot.""" + + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") + + agent_id: NotBlankStr = Field(description="Agent working the task") + task_id: NotBlankStr = Field(description="Task being worked") + status: TaskStatus = Field(description="Current task status") + turn_count: int = Field(ge=0, description="Turns recorded so far") + cost: float = Field(ge=0.0, description="Accumulated cost for the task") + last_active: AwareDatetime | None = Field( + default=None, + description="Timestamp of the latest recorded turn, when any", + ) + is_stuck: bool = Field(description="Idle beyond the stuck threshold") + is_runaway: bool = Field(description="Cost beyond the runaway threshold") + + +class LiveActivitySnapshot(BaseModel): + """Aggregate snapshot of in-flight org activity.""" + + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") + + timestamp: AwareDatetime = Field(description="When the snapshot was built") + agents: tuple[AgentActivity, ...] = Field( + default=(), + description="Per-task activity for active work", + ) + total_cost: float = Field(default=0.0, ge=0.0, description="Summed active cost") + active_count: int = Field(default=0, ge=0, description="Active task count") + stuck_agents: tuple[NotBlankStr, ...] = Field( + default=(), + description="Agent ids flagged stuck", + ) + runaway_agents: tuple[NotBlankStr, ...] = Field( + default=(), + description="Agent ids flagged runaway", + ) + + +class CockpitService: + """Aggregates in-flight work into a live activity snapshot.""" + + def __init__( + self, + task_engine: TaskEngine, + flight_recorder_frames: FlightRecorderFrameRepository, + *, + config_resolver: ConfigResolver, + clock: Clock | None = None, + ) -> None: + self._task_engine = task_engine + self._frames = flight_recorder_frames + self._config_resolver = config_resolver + self._clock = clock or SystemClock() + + async def get_live_snapshot(self) -> LiveActivitySnapshot: + """Build a snapshot of active work with stuck / runaway flags.""" + stuck_minutes = await self._config_resolver.get_float(_COCKPIT_NS, _STUCK_KEY) + runaway_pct = await self._config_resolver.get_float(_COCKPIT_NS, _RUNAWAY_KEY) + now = self._clock.now() + stuck_cutoff = now - timedelta(minutes=stuck_minutes) + + activities: list[AgentActivity] = [] + for status in _ACTIVE_STATUSES: + tasks, _ = await self._task_engine.list_tasks(status=status) + activities.extend( + [ + await self._build_activity(task, stuck_cutoff, runaway_pct) + for task in tasks + ] + ) + + stuck = tuple(NotBlankStr(a.agent_id) for a in activities if a.is_stuck) + runaway = tuple(NotBlankStr(a.agent_id) for a in activities if a.is_runaway) + snapshot = LiveActivitySnapshot( + timestamp=now, + agents=tuple(activities), + total_cost=sum(a.cost for a in activities), + active_count=len(activities), + stuck_agents=stuck, + runaway_agents=runaway, + ) + logger.info( + COCKPIT_SNAPSHOT_PUBLISHED, + active_count=snapshot.active_count, + stuck_count=len(stuck), + runaway_count=len(runaway), + ) + for agent_id in stuck: + logger.warning(COCKPIT_STUCK_DETECTED, agent_id=agent_id) + for agent_id in runaway: + logger.warning(COCKPIT_RUNAWAY_DETECTED, agent_id=agent_id) + return snapshot + + async def _build_activity( + self, + task: Task, + stuck_cutoff: AwareDatetime, + runaway_pct: float, + ) -> AgentActivity: + """Derive one task's activity row from frames + cost tracker.""" + agent_id = task.assigned_to or "unassigned" + frames = await self._frames.query( + FlightRecorderFrameFilterSpec(task_id=NotBlankStr(task.id)), + limit=_FRAME_COST_PAGE, + ) + latest = frames[0] if frames else None + turn_count = latest.turn_index if latest is not None else 0 + last_active = latest.timestamp if latest is not None else None + cost = sum(frame.cost for frame in frames) + is_stuck = last_active is not None and last_active < stuck_cutoff + is_runaway = task.budget_limit > 0 and cost > task.budget_limit * ( + runaway_pct / _PERCENT_DIVISOR + ) + return AgentActivity( + agent_id=NotBlankStr(agent_id), + task_id=NotBlankStr(task.id), + status=task.status, + turn_count=turn_count, + cost=cost, + last_active=last_active, + is_stuck=is_stuck, + is_runaway=is_runaway, + ) diff --git a/src/synthorg/engine/flight_recording/__init__.py b/src/synthorg/engine/flight_recording/__init__.py new file mode 100644 index 0000000000..15ea624100 --- /dev/null +++ b/src/synthorg/engine/flight_recording/__init__.py @@ -0,0 +1,23 @@ +"""Flight recording: pluggable per-turn frame capture for cockpit replay.""" + +from synthorg.engine.flight_recording.service import ( + FlightRecorderService, + ReplaySeekView, +) +from synthorg.engine.flight_recording.sink import ( + FlightRecorderSink, + NoOpFlightRecorderSink, + PersistenceFlightRecorderSink, + build_flight_recorder_sink, + build_frames, +) + +__all__ = [ + "FlightRecorderService", + "FlightRecorderSink", + "NoOpFlightRecorderSink", + "PersistenceFlightRecorderSink", + "ReplaySeekView", + "build_flight_recorder_sink", + "build_frames", +] diff --git a/src/synthorg/engine/flight_recording/service.py b/src/synthorg/engine/flight_recording/service.py new file mode 100644 index 0000000000..fb7326d76e --- /dev/null +++ b/src/synthorg/engine/flight_recording/service.py @@ -0,0 +1,104 @@ +"""Flight-recorder query + seek service for the cockpit replay scrubber. + +The persisted frame store is the authoritative replay source: this +service serves the scrubber timeline (newest-first frames) and a +"seek to turn N" reconstruction (frames 1..N ascending plus cumulative +cost) entirely from frames, with no dependency on the observability +event log. +""" + +from pydantic import BaseModel, ConfigDict, Field + +from synthorg.core.types import NotBlankStr +from synthorg.observability import get_logger +from synthorg.observability.events.cockpit import FLIGHT_RECORDER_SEEK +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrame, + FlightRecorderFrameFilterSpec, + FlightRecorderFrameRepository, +) + +logger = get_logger(__name__) + +#: Upper bound on frames a single seek reconstructs, so a pathological +#: turn index cannot pull an unbounded page from the store. +_MAX_SEEK_FRAMES: int = 1000 + + +class ReplaySeekView(BaseModel): + """Reconstructed scrubber state at a target turn. + + ``frames`` are ascending by turn index from turn 1 up to and + including ``turn_index``; ``current_frame`` is the frame at + ``turn_index`` (``None`` when that turn was never recorded). + """ + + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") + + execution_id: NotBlankStr = Field(description="Execution being replayed") + turn_index: int = Field(ge=1, description="Target turn index") + frames: tuple[FlightRecorderFrame, ...] = Field( + default=(), + description="Frames 1..turn_index, ascending", + ) + current_frame: FlightRecorderFrame | None = Field( + default=None, + description="Frame at turn_index, when recorded", + ) + cumulative_cost: float = Field( + default=0.0, + ge=0.0, + description="Summed cost of frames up to and including turn_index", + ) + + +class FlightRecorderService: + """Query and seek over persisted flight-recorder frames.""" + + def __init__(self, repository: FlightRecorderFrameRepository) -> None: + self._repository = repository + + async def get_frames( + self, + execution_id: str, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[FlightRecorderFrame, ...]: + """Return the scrubber timeline (newest-first) for an execution.""" + return await self._repository.query( + FlightRecorderFrameFilterSpec(execution_id=NotBlankStr(execution_id)), + limit=limit, + offset=offset, + ) + + async def seek(self, execution_id: str, turn_index: int) -> ReplaySeekView: + """Reconstruct scrubber state at ``turn_index`` from frames 1..N.""" + frames = await self._repository.query( + FlightRecorderFrameFilterSpec( + execution_id=NotBlankStr(execution_id), + turn_index_min=1, + turn_index_max=turn_index, + ), + limit=_MAX_SEEK_FRAMES, + ) + ascending = tuple(sorted(frames, key=lambda f: f.turn_index)) + current = next( + (f for f in ascending if f.turn_index == turn_index), + None, + ) + cumulative = sum(f.cost for f in ascending) + logger.debug( + FLIGHT_RECORDER_SEEK, + execution_id=execution_id, + turn_index=turn_index, + frames_loaded=len(ascending), + ) + return ReplaySeekView( + execution_id=NotBlankStr(execution_id), + turn_index=turn_index, + frames=ascending, + current_frame=current, + cumulative_cost=cumulative, + ) diff --git a/src/synthorg/engine/flight_recording/sink.py b/src/synthorg/engine/flight_recording/sink.py new file mode 100644 index 0000000000..275c499471 --- /dev/null +++ b/src/synthorg/engine/flight_recording/sink.py @@ -0,0 +1,181 @@ +"""Pluggable flight-recorder sink and frame-building helpers. + +A sink receives :class:`FlightRecorderFrame` records produced from an +agent run's :class:`ExecutionResult`. The default sink appends to the +persistence backend; the no-op sink discards frames. Recording is +best-effort: a failing sink logs and never propagates into the engine. +""" + +from typing import Final, Protocol, runtime_checkable + +from synthorg.core.clock import Clock, SystemClock +from synthorg.core.enums import TaskStatus +from synthorg.engine.loop_protocol import ( + ExecutionResult, + TerminationReason, + TurnRecord, +) +from synthorg.observability import get_logger, safe_error_description +from synthorg.observability.events.cockpit import ( + FLIGHT_RECORDER_FRAME_RECORDED, + FLIGHT_RECORDER_RECORD_FAILED, +) +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrame, + FlightRecorderFrameRepository, +) +from synthorg.providers.enums import FinishReason, MessageRole + +logger = get_logger(__name__) + +#: Default cap on stored prompt/response summaries when no setting is +#: supplied at the call site (mirrors cockpit.flight_recorder_summary_max_chars). +DEFAULT_SUMMARY_MAX_CHARS: Final[int] = 2000 + +_TERMINATION_TO_STATUS: Final[dict[TerminationReason, TaskStatus]] = { + TerminationReason.COMPLETED: TaskStatus.COMPLETED, + TerminationReason.MAX_TURNS: TaskStatus.FAILED, + TerminationReason.BUDGET_EXHAUSTED: TaskStatus.FAILED, + TerminationReason.SHUTDOWN: TaskStatus.INTERRUPTED, + TerminationReason.PARKED: TaskStatus.SUSPENDED, + TerminationReason.STAGNATION: TaskStatus.FAILED, + TerminationReason.ERROR: TaskStatus.FAILED, +} + + +@runtime_checkable +class FlightRecorderSink(Protocol): + """Receives flight-recorder frames produced from an agent run.""" + + async def record_frames(self, frames: tuple[FlightRecorderFrame, ...]) -> None: + """Persist a run's frames. Best-effort; never raises into the engine.""" + ... + + +class PersistenceFlightRecorderSink: + """Default sink: append frames to the persistence backend.""" + + def __init__(self, repository: FlightRecorderFrameRepository) -> None: + self._repository = repository + + async def record_frames(self, frames: tuple[FlightRecorderFrame, ...]) -> None: + """Append each frame; a failure on one frame is logged, not raised. + + Recording runs after the agent loop has finished, so it is off + the per-turn hot path; guarding here keeps a transient storage + fault from turning a successful run into a failed one. + """ + recorded = 0 + for frame in frames: + try: + await self._repository.append(frame) + recorded += 1 + except Exception as exc: + logger.warning( + FLIGHT_RECORDER_RECORD_FAILED, + execution_id=frame.execution_id, + turn_index=frame.turn_index, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + if recorded: + logger.debug( + FLIGHT_RECORDER_FRAME_RECORDED, + execution_id=frames[0].execution_id, + count=recorded, + ) + + +class NoOpFlightRecorderSink: + """Backstop sink that discards frames (recording disabled).""" + + async def record_frames(self, frames: tuple[FlightRecorderFrame, ...]) -> None: + """Discard the frames.""" + del frames + + +def build_flight_recorder_sink( + repository: FlightRecorderFrameRepository | None, + *, + enabled: bool = True, + strategy: str = "persistence", +) -> FlightRecorderSink: + """Select the configured recorder sink. + + Returns a :class:`NoOpFlightRecorderSink` when recording is disabled, + the strategy is ``"noop"``, or no repository is available; otherwise + the persistence-backed sink. + """ + if not enabled or strategy == "noop" or repository is None: + return NoOpFlightRecorderSink() + return PersistenceFlightRecorderSink(repository) + + +def _truncate(text: str | None, max_chars: int) -> str | None: + """Trim *text* to *max_chars*, returning ``None`` when empty.""" + if not text: + return None + return text[:max_chars] + + +def _classify_decision(turn: TurnRecord) -> str: + """Classify a turn's outcome for the replay decision label.""" + if turn.tool_calls_made: + return "tool_call" + if turn.finish_reason is FinishReason.STOP: + return "completed" + return turn.finish_reason.value + + +def build_frames( # noqa: PLR0913 -- keyword-only frame builder, all required + execution_result: ExecutionResult, + *, + execution_id: str, + agent_id: str, + task_id: str | None, + summary_max_chars: int = DEFAULT_SUMMARY_MAX_CHARS, + clock: Clock | None = None, +) -> tuple[FlightRecorderFrame, ...]: + """Build one frame per turn from a finished run's execution result. + + Response content is taken from the assistant messages in the final + conversation, paired with turns in order. The terminal turn carries + the run's outcome status; earlier turns are ``IN_PROGRESS``. + """ + the_clock = clock or SystemClock() + timestamp = the_clock.now() + assistant_messages = [ + msg + for msg in execution_result.context.conversation + if msg.role is MessageRole.ASSISTANT + ] + terminal_status = _TERMINATION_TO_STATUS.get( + execution_result.termination_reason, + TaskStatus.IN_PROGRESS, + ) + last_index = len(execution_result.turns) - 1 + frames: list[FlightRecorderFrame] = [] + for index, turn in enumerate(execution_result.turns): + response = ( + assistant_messages[index].content + if index < len(assistant_messages) + else None + ) + status = terminal_status if index == last_index else TaskStatus.IN_PROGRESS + frames.append( + FlightRecorderFrame( + execution_id=execution_id, + task_id=task_id, + agent_id=agent_id, + turn_index=turn.turn_number, + timestamp=timestamp, + response_summary=_truncate(response, summary_max_chars), + decision=_classify_decision(turn), + tool_calls=tuple(turn.tool_calls_made), + input_tokens=turn.input_tokens, + output_tokens=turn.output_tokens, + cost=turn.cost, + status=status, + ) + ) + return tuple(frames) diff --git a/src/synthorg/engine/intervention/__init__.py b/src/synthorg/engine/intervention/__init__.py new file mode 100644 index 0000000000..1addbe26f2 --- /dev/null +++ b/src/synthorg/engine/intervention/__init__.py @@ -0,0 +1,15 @@ +"""Operator intervention: pluggable steering directives for the cockpit.""" + +from synthorg.engine.intervention.steering import ( + SafeDefaultSteeringDirective, + SteeringDirective, + SteeringOutcome, + build_steering_directive, +) + +__all__ = [ + "SafeDefaultSteeringDirective", + "SteeringDirective", + "SteeringOutcome", + "build_steering_directive", +] diff --git a/src/synthorg/engine/intervention/steering.py b/src/synthorg/engine/intervention/steering.py new file mode 100644 index 0000000000..b553e06f94 --- /dev/null +++ b/src/synthorg/engine/intervention/steering.py @@ -0,0 +1,142 @@ +"""Pluggable steering directive for cockpit hint/redirect interventions. + +PAUSE and KILL reuse the task lifecycle seams at the controller; this +module covers HINT and REDIRECT. The safe default delivers them as an +``INFO_REQUEST`` interrupt the running agent consumes at its next safe +turn boundary, producing a visible queued artefact rather than a silent +no-op. EPIC E mid-flight steering (#1997) replaces the default with +deeper in-loop propagation behind this same protocol. +""" + +from typing import TYPE_CHECKING, Final, Protocol, runtime_checkable +from uuid import uuid4 + +from pydantic import BaseModel, ConfigDict, Field + +from synthorg.communication.event_stream.interrupt import ( + Interrupt, + InterruptStore, + InterruptType, +) +from synthorg.core.clock import Clock, SystemClock +from synthorg.core.enums import InterventionKind +from synthorg.core.types import NotBlankStr +from synthorg.observability import get_logger + +if TYPE_CHECKING: + from collections.abc import Mapping + +logger = get_logger(__name__) + +#: Advisory expiry stamped on a steering interrupt; the engine supplies +#: the real wait timeout. A named constant keeps the magic-number gate +#: satisfied while documenting the default operator-hint lifetime. +DEFAULT_STEERING_TIMEOUT_SECONDS: Final[float] = 600.0 + +#: Intervention kinds the steering directive is responsible for; PAUSE +#: and KILL are routed to the task lifecycle seams at the controller. +_STEERABLE_KINDS: Final[frozenset[InterventionKind]] = frozenset( + {InterventionKind.HINT, InterventionKind.REDIRECT}, +) + + +class SteeringOutcome(BaseModel): + """Result of applying a steering directive.""" + + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") + + kind: InterventionKind = Field(description="Intervention kind applied") + applied: bool = Field(description="Whether the directive was delivered") + artifact_id: NotBlankStr | None = Field( + default=None, + description="Interrupt id the directive produced, when applied", + ) + detail: str = Field(description="Human-readable outcome description") + + +@runtime_checkable +class SteeringDirective(Protocol): + """Delivers a mid-flight hint/redirect to a running agent.""" + + async def steer( + self, + *, + kind: InterventionKind, + execution_id: str, + agent_id: str, + details: Mapping[str, object], + ) -> SteeringOutcome: + """Apply a steering intervention; return its outcome.""" + ... + + +class SafeDefaultSteeringDirective: + """Default directive: queue an ``INFO_REQUEST`` interrupt. + + Both HINT and REDIRECT post an interrupt carrying the operator's + text, which the running agent consumes at its next safe turn + boundary. This is best-effort: the agent adopts the directive when + it next checks for interrupts, with no in-flight state mutation. + """ + + def __init__( + self, + interrupt_store: InterruptStore, + *, + clock: Clock | None = None, + ) -> None: + self._interrupt_store = interrupt_store + self._clock = clock or SystemClock() + + async def steer( + self, + *, + kind: InterventionKind, + execution_id: str, + agent_id: str, + details: Mapping[str, object], + ) -> SteeringOutcome: + """Queue a hint/redirect interrupt for the running agent.""" + if kind not in _STEERABLE_KINDS: + return SteeringOutcome( + kind=kind, + applied=False, + detail=f"{kind.value} is not handled by the steering directive", + ) + text = str(details.get("text", "")).strip() + if not text: + return SteeringOutcome( + kind=kind, + applied=False, + detail="no directive text supplied", + ) + interrupt = Interrupt( + id=NotBlankStr(str(uuid4())), + type=InterruptType.INFO_REQUEST, + session_id=NotBlankStr(execution_id), + agent_id=NotBlankStr(agent_id), + created_at=self._clock.now(), + timeout_seconds=DEFAULT_STEERING_TIMEOUT_SECONDS, + question=NotBlankStr(text), + context_snippet=NotBlankStr(f"Operator {kind.value} via mission control"), + ) + await self._interrupt_store.create(interrupt) + return SteeringOutcome( + kind=kind, + applied=True, + artifact_id=interrupt.id, + detail="queued, awaiting the agent's next safe turn boundary", + ) + + +def build_steering_directive( + interrupt_store: InterruptStore, + *, + strategy: str = "safe_default", + clock: Clock | None = None, +) -> SteeringDirective: + """Select the configured steering directive implementation.""" + if strategy != "safe_default": + msg = f"Unknown steering directive strategy: {strategy!r}" + raise ValueError(msg) + return SafeDefaultSteeringDirective(interrupt_store, clock=clock) diff --git a/src/synthorg/observability/events/cockpit.py b/src/synthorg/observability/events/cockpit.py new file mode 100644 index 0000000000..cb2075da65 --- /dev/null +++ b/src/synthorg/observability/events/cockpit.py @@ -0,0 +1,21 @@ +"""Cockpit event name constants for observability. + +Covers the mission-control live activity feed, operator interventions, +and flight-recorder lifecycle. Format: ``cockpit..`` / +``flight_recorder..``. +""" + +from typing import Final + +COCKPIT_SNAPSHOT_PUBLISHED: Final[str] = "cockpit.snapshot.published" +COCKPIT_STUCK_DETECTED: Final[str] = "cockpit.stuck.detected" +COCKPIT_RUNAWAY_DETECTED: Final[str] = "cockpit.runaway.detected" +COCKPIT_INTERVENTION_INITIATED: Final[str] = "cockpit.intervention.initiated" +COCKPIT_INTERVENTION_APPLIED: Final[str] = "cockpit.intervention.applied" +COCKPIT_INTERVENTION_FAILED: Final[str] = "cockpit.intervention.failed" + +FLIGHT_RECORDER_FRAME_RECORDED: Final[str] = "flight_recorder.frame.recorded" +FLIGHT_RECORDER_RECORD_FAILED: Final[str] = "flight_recorder.record.failed" +FLIGHT_RECORDER_QUEUE_OVERFLOW: Final[str] = "flight_recorder.queue.overflow" +FLIGHT_RECORDER_SEEK: Final[str] = "flight_recorder.seek" +FLIGHT_RECORDER_PURGE: Final[str] = "flight_recorder.purge" diff --git a/src/synthorg/observability/events/persistence.py b/src/synthorg/observability/events/persistence.py index 3e85b66f9c..09fce12391 100644 --- a/src/synthorg/observability/events/persistence.py +++ b/src/synthorg/observability/events/persistence.py @@ -59,6 +59,21 @@ PERSISTENCE_TASK_DESERIALIZE_FAILED: Final[str] = "persistence.task.deserialize_failed" +PERSISTENCE_FLIGHT_RECORDER_SAVED: Final[str] = "persistence.flight_recorder.saved" +PERSISTENCE_FLIGHT_RECORDER_SAVE_FAILED: Final[str] = ( + "persistence.flight_recorder.save_failed" +) +PERSISTENCE_FLIGHT_RECORDER_QUERIED: Final[str] = "persistence.flight_recorder.queried" +PERSISTENCE_FLIGHT_RECORDER_QUERY_FAILED: Final[str] = ( + "persistence.flight_recorder.query_failed" +) +PERSISTENCE_FLIGHT_RECORDER_DELETE_FAILED: Final[str] = ( + "persistence.flight_recorder.delete_failed" +) +PERSISTENCE_FLIGHT_RECORDER_DESERIALIZE_FAILED: Final[str] = ( + "persistence.flight_recorder.deserialize_failed" +) + PERSISTENCE_MESSAGE_SAVED: Final[str] = "persistence.message.saved" PERSISTENCE_MESSAGE_SAVE_FAILED: Final[str] = "persistence.message.save_failed" PERSISTENCE_MESSAGE_DUPLICATE: Final[str] = "persistence.message.duplicate" diff --git a/src/synthorg/persistence/flight_recorder_protocol.py b/src/synthorg/persistence/flight_recorder_protocol.py new file mode 100644 index 0000000000..79f30f3b8d --- /dev/null +++ b/src/synthorg/persistence/flight_recorder_protocol.py @@ -0,0 +1,141 @@ +"""Flight-recorder frame model and repository protocol. + +A ``FlightRecorderFrame`` captures one completed agent turn with enough +redacted content for the mission-control cockpit to replay a run +step-by-step. The frame store is the authoritative replay source: the +scrubber timeline and per-turn detail come entirely from persisted +frames, independent of the observability event log. +""" + +from datetime import UTC, datetime +from typing import Protocol, runtime_checkable +from uuid import uuid4 + +from pydantic import AwareDatetime, BaseModel, ConfigDict, Field + +from synthorg.core.enums import ( # noqa: TC001 -- Pydantic field types + InterventionKind, + TaskStatus, +) +from synthorg.core.types import NotBlankStr # noqa: TC001 -- Pydantic field type +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE, AppendOnlyRepository + +__all__ = [ + "FlightRecorderFrame", + "FlightRecorderFrameFilterSpec", + "FlightRecorderFrameRepository", +] + + +class FlightRecorderFrame(BaseModel): + """One recorded agent turn for cockpit replay. + + Content fields (``prompt_summary`` / ``response_summary``) are + redacted and length-bounded at the recording boundary; this model + stores them verbatim. ``execution_id`` keys the run timeline; + ``task_id`` + ``agent_id`` let interventions target the right work + without a separate mapping lookup. + """ + + model_config = ConfigDict(frozen=True, allow_inf_nan=False, extra="forbid") + + id: NotBlankStr = Field( + default_factory=lambda: str(uuid4()), + description="Unique frame identifier", + ) + execution_id: NotBlankStr = Field(description="Execution run identifier") + task_id: NotBlankStr | None = Field( + default=None, + description="Task the agent was working on, when known", + ) + agent_id: NotBlankStr = Field(description="Agent that produced the turn") + turn_index: int = Field(ge=1, description="1-based turn index within the run") + timestamp: AwareDatetime = Field( + default_factory=lambda: datetime.now(UTC), + description="When the turn completed", + ) + prompt_summary: str | None = Field( + default=None, + description="Redacted, length-bounded prompt summary", + ) + response_summary: str | None = Field( + default=None, + description="Redacted, length-bounded model response summary", + ) + decision: str | None = Field( + default=None, + description="Classified turn outcome (e.g. tool_call, completed)", + ) + tool_calls: tuple[str, ...] = Field( + default=(), + description="Tool names invoked during the turn", + ) + input_tokens: int = Field(default=0, ge=0, description="Prompt tokens") + output_tokens: int = Field(default=0, ge=0, description="Completion tokens") + cost: float = Field(default=0.0, ge=0, description="Turn cost") + status: TaskStatus = Field(description="Task status at turn completion") + intervention_kind: InterventionKind | None = Field( + default=None, + description="Operator intervention recorded on this turn, if any", + ) + + +class FlightRecorderFrameFilterSpec(BaseModel): + """Filter spec for ``FlightRecorderFrameRepository.query``.""" + + model_config = ConfigDict(frozen=True, extra="forbid", allow_inf_nan=False) + + execution_id: NotBlankStr | None = Field( + default=None, + description="Filter to a single execution", + ) + task_id: NotBlankStr | None = Field( + default=None, + description="Filter to a single task", + ) + agent_id: NotBlankStr | None = Field( + default=None, + description="Filter to a single agent", + ) + turn_index_min: int | None = Field( + default=None, + ge=1, + description="Inclusive lower bound on turn index", + ) + turn_index_max: int | None = Field( + default=None, + ge=1, + description="Inclusive upper bound on turn index", + ) + + +@runtime_checkable +class FlightRecorderFrameRepository( + AppendOnlyRepository["FlightRecorderFrame", FlightRecorderFrameFilterSpec], + Protocol, +): + """Append-only persistence for flight-recorder frames. + + Composes :class:`AppendOnlyRepository`: ``append`` writes one + immutable frame, ``query`` returns frames newest-first under a + filter, and ``purge_before`` enforces retention. No bespoke methods; + the cockpit reconstructs ascending turn order in the service layer. + """ + + async def append(self, frame: FlightRecorderFrame) -> None: + """Persist one frame (append-only; a duplicate id is a violation).""" + ... + + async def query( + self, + filter_spec: FlightRecorderFrameFilterSpec, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[FlightRecorderFrame, ...]: + """Return frames matching the filter, newest-first (by turn index).""" + ... + + async def purge_before(self, threshold: datetime) -> int: + """Delete frames with ``timestamp < threshold``. Returns rows removed.""" + ... diff --git a/src/synthorg/persistence/postgres/backend.py b/src/synthorg/persistence/postgres/backend.py index 11eadadb27..00748006dd 100644 --- a/src/synthorg/persistence/postgres/backend.py +++ b/src/synthorg/persistence/postgres/backend.py @@ -71,6 +71,9 @@ PostgresFineTuneCheckpointRepository, PostgresFineTuneRunRepository, ) +from synthorg.persistence.postgres.flight_recorder_repo import ( + PostgresFlightRecorderFrameRepository, +) from synthorg.persistence.postgres.heartbeat_repo import ( PostgresHeartbeatRepository, ) @@ -225,6 +228,9 @@ from synthorg.persistence.decision_protocol import DecisionRepository from synthorg.persistence.docs_protocol import DocsRepository from synthorg.persistence.escalation_protocol import EscalationQueueRepository + from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrameRepository, + ) from synthorg.persistence.idempotency_protocol import IdempotencyRepository from synthorg.persistence.knowledge_protocol import ( ChunkProvenanceRepository, @@ -329,6 +335,7 @@ def __init__(self, config: PostgresConfig) -> None: # noqa: PLR0915 -- repo reg self._users: UserRepository | None = None self._api_keys: ApiKeyRepository | None = None self._checkpoints: CheckpointRepository | None = None + self._flight_recorder_frames: FlightRecorderFrameRepository | None = None self._heartbeats: HeartbeatRepository | None = None self._agent_states: AgentStateRepository | None = None self._settings: SettingsRepository | None = None @@ -397,6 +404,7 @@ def _clear_state(self) -> None: # noqa: PLR0915 -- repo registry reset intentio self._users = None self._api_keys = None self._checkpoints = None + self._flight_recorder_frames = None self._heartbeats = None self._agent_states = None self._settings = None @@ -468,6 +476,7 @@ def _create_repositories(self) -> None: # noqa: PLR0915 self._users = PostgresUserRepository(pool) self._api_keys = PostgresApiKeyRepository(pool) self._checkpoints = PostgresCheckpointRepository(pool) + self._flight_recorder_frames = PostgresFlightRecorderFrameRepository(pool) self._heartbeats = PostgresHeartbeatRepository(pool) self._agent_states = PostgresAgentStateRepository(pool) self._settings = PostgresSettingsRepository(pool) @@ -683,6 +692,14 @@ def checkpoints(self) -> CheckpointRepository: """Repository for Checkpoint persistence.""" return self._require_connected(self._checkpoints, "checkpoints") + @property + def flight_recorder_frames(self) -> FlightRecorderFrameRepository: + """Repository for flight-recorder frame persistence.""" + return self._require_connected( + self._flight_recorder_frames, + "flight_recorder_frames", + ) + @property def heartbeats(self) -> HeartbeatRepository: """Repository for Heartbeat persistence.""" diff --git a/src/synthorg/persistence/postgres/flight_recorder_repo.py b/src/synthorg/persistence/postgres/flight_recorder_repo.py new file mode 100644 index 0000000000..136e0f3211 --- /dev/null +++ b/src/synthorg/persistence/postgres/flight_recorder_repo.py @@ -0,0 +1,196 @@ +"""Postgres implementation of the ``FlightRecorderFrameRepository`` protocol. + +Postgres sibling of ``persistence/sqlite/flight_recorder_repo.py``. +``tool_calls`` is stored as native JSONB and ``timestamp`` as TIMESTAMPTZ. +""" +# ruff: noqa: S608 -- dynamic WHERE built from hardcoded column names only + +from typing import TYPE_CHECKING + +import psycopg +from psycopg.rows import dict_row +from psycopg.types.json import Jsonb +from pydantic import ValidationError + +from synthorg.core.persistence_errors import DuplicateRecordError, QueryError +from synthorg.observability import get_logger, safe_error_description +from synthorg.observability.events.persistence import ( + PERSISTENCE_FLIGHT_RECORDER_DELETE_FAILED, + PERSISTENCE_FLIGHT_RECORDER_DESERIALIZE_FAILED, + PERSISTENCE_FLIGHT_RECORDER_QUERY_FAILED, + PERSISTENCE_FLIGHT_RECORDER_SAVE_FAILED, +) +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE +from synthorg.persistence._shared import normalize_utc +from synthorg.persistence._shared.pagination import validate_pagination_args +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrame, + FlightRecorderFrameFilterSpec, +) + +if TYPE_CHECKING: + from datetime import datetime + + from psycopg_pool import AsyncConnectionPool + +logger = get_logger(__name__) + +_COLUMNS = ( + "id, execution_id, task_id, agent_id, turn_index, timestamp, " + "prompt_summary, response_summary, decision, tool_calls, " + "input_tokens, output_tokens, cost, status, intervention_kind" +) + + +class PostgresFlightRecorderFrameRepository: + """Postgres implementation of the ``FlightRecorderFrameRepository`` protocol. + + Args: + pool: An open psycopg_pool.AsyncConnectionPool. + """ + + def __init__(self, pool: AsyncConnectionPool) -> None: + self._pool = pool + + async def append(self, frame: FlightRecorderFrame) -> None: + """Persist one frame (append-only; a duplicate id is a violation).""" + try: + data = frame.model_dump(mode="json") + data["tool_calls"] = Jsonb(list(frame.tool_calls)) + async with self._pool.connection() as conn, conn.cursor() as cur: + await cur.execute( + f"""\ +INSERT INTO flight_recorder_frames ({_COLUMNS}) VALUES ( + %(id)s, %(execution_id)s, %(task_id)s, %(agent_id)s, %(turn_index)s, + %(timestamp)s, %(prompt_summary)s, %(response_summary)s, %(decision)s, + %(tool_calls)s, %(input_tokens)s, %(output_tokens)s, %(cost)s, + %(status)s, %(intervention_kind)s +)""", + data, + ) + await conn.commit() + except psycopg.errors.UniqueViolation as exc: + msg = f"Flight recorder frame {frame.id!r} already exists" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_SAVE_FAILED, + frame_id=frame.id, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise DuplicateRecordError(msg) from exc + except psycopg.Error as exc: + msg = f"Failed to save flight recorder frame {frame.id!r}" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_SAVE_FAILED, + frame_id=frame.id, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc + + async def query( + self, + filter_spec: FlightRecorderFrameFilterSpec, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[FlightRecorderFrame, ...]: + """Return frames matching the filter, newest-first by turn index.""" + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_FLIGHT_RECORDER_QUERY_FAILED + ) + conditions: list[str] = [] + params: list[object] = [] + if filter_spec.execution_id is not None: + conditions.append("execution_id = %s") + params.append(filter_spec.execution_id) + if filter_spec.task_id is not None: + conditions.append("task_id = %s") + params.append(filter_spec.task_id) + if filter_spec.agent_id is not None: + conditions.append("agent_id = %s") + params.append(filter_spec.agent_id) + if filter_spec.turn_index_min is not None: + conditions.append("turn_index >= %s") + params.append(filter_spec.turn_index_min) + if filter_spec.turn_index_max is not None: + conditions.append("turn_index <= %s") + params.append(filter_spec.turn_index_max) + where = " AND ".join(conditions) if conditions else "TRUE" + sql = ( + f"SELECT {_COLUMNS} FROM flight_recorder_frames WHERE {where} " + "ORDER BY turn_index DESC, timestamp DESC LIMIT %s OFFSET %s" + ) + params.extend([limit, offset]) + try: + async with ( + self._pool.connection() as conn, + conn.cursor(row_factory=dict_row) as cur, + ): + await cur.execute(sql, params) + rows = await cur.fetchall() + except psycopg.Error as exc: + msg = "Failed to query flight recorder frames" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_QUERY_FAILED, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc + return tuple(self._row_to_model(dict(r)) for r in rows) + + async def purge_before(self, threshold: datetime) -> int: + """Delete frames with ``timestamp < threshold``. + + ``threshold`` must be timezone-aware; a naive value would make + the cut-off depend on the backend's session timezone. + """ + if threshold.tzinfo is None: + msg = f"threshold must be timezone-aware, got naive {threshold!r}" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_DELETE_FAILED, + error="naive_threshold", + error_type="ValueError", + ) + raise QueryError(msg) + try: + async with self._pool.connection() as conn, conn.cursor() as cur: + await cur.execute( + "DELETE FROM flight_recorder_frames WHERE timestamp < %s", + (normalize_utc(threshold),), + ) + count = cur.rowcount + await conn.commit() + except psycopg.Error as exc: + msg = "Failed to purge flight recorder frames by threshold" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_DELETE_FAILED, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc + return count + + def _row_to_model(self, row: dict[str, object]) -> FlightRecorderFrame: + """Convert a database row to a ``FlightRecorderFrame`` model. + + ``tool_calls`` comes back from Postgres JSONB as a Python list; + the model expects a tuple, so coerce before validation. + + Raises: + QueryError: If the row cannot be deserialized. + """ + try: + raw_tool_calls = row.get("tool_calls") + if isinstance(raw_tool_calls, list): + row["tool_calls"] = tuple(raw_tool_calls) + return FlightRecorderFrame.model_validate(row) + except ValidationError as exc: + msg = f"Failed to deserialize flight recorder frame {row.get('id')!r}" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_DESERIALIZE_FAILED, + frame_id=row.get("id"), + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc diff --git a/src/synthorg/persistence/postgres/revisions/20260522000002_flight_recorder.sql b/src/synthorg/persistence/postgres/revisions/20260522000002_flight_recorder.sql new file mode 100644 index 0000000000..f018e0e44a --- /dev/null +++ b/src/synthorg/persistence/postgres/revisions/20260522000002_flight_recorder.sql @@ -0,0 +1,24 @@ +-- Flight-recorder frames: per-turn cockpit replay records (append-only). +CREATE TABLE flight_recorder_frames ( + id TEXT NOT NULL PRIMARY KEY, + execution_id TEXT NOT NULL, + task_id TEXT, + agent_id TEXT NOT NULL, + turn_index INTEGER NOT NULL CHECK (turn_index >= 1), + timestamp TIMESTAMPTZ NOT NULL, + prompt_summary TEXT, + response_summary TEXT, + decision TEXT, + tool_calls JSONB NOT NULL DEFAULT '[]'::jsonb, + input_tokens INTEGER NOT NULL DEFAULT 0 CHECK (input_tokens >= 0), + output_tokens INTEGER NOT NULL DEFAULT 0 CHECK (output_tokens >= 0), + cost NUMERIC(12, 6) NOT NULL DEFAULT 0.0 CHECK (cost >= 0), + status TEXT NOT NULL, + intervention_kind TEXT +); + +CREATE INDEX idx_frf_execution_turn + ON flight_recorder_frames(execution_id, turn_index); +CREATE INDEX idx_frf_task_id ON flight_recorder_frames(task_id); +CREATE INDEX idx_frf_agent_id ON flight_recorder_frames(agent_id); +CREATE INDEX idx_frf_timestamp ON flight_recorder_frames(timestamp); diff --git a/src/synthorg/persistence/protocol.py b/src/synthorg/persistence/protocol.py index 27fcfd26bd..2ddfe67ac7 100644 --- a/src/synthorg/persistence/protocol.py +++ b/src/synthorg/persistence/protocol.py @@ -72,6 +72,9 @@ FineTuneCheckpointRepository, # noqa: TC001 FineTuneRunRepository, # noqa: TC001 ) +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrameRepository, # noqa: TC001 +) from synthorg.persistence.idempotency_protocol import ( IdempotencyRepository, # noqa: TC001 ) @@ -398,6 +401,11 @@ def checkpoints(self) -> CheckpointRepository: """Repository for Checkpoint persistence.""" ... + @property + def flight_recorder_frames(self) -> FlightRecorderFrameRepository: + """Repository for flight-recorder frame persistence.""" + ... + @property def heartbeats(self) -> HeartbeatRepository: """Repository for Heartbeat persistence.""" diff --git a/src/synthorg/persistence/sqlite/__init__.py b/src/synthorg/persistence/sqlite/__init__.py index 9e45bb6e74..698e43605c 100644 --- a/src/synthorg/persistence/sqlite/__init__.py +++ b/src/synthorg/persistence/sqlite/__init__.py @@ -10,6 +10,9 @@ from synthorg.persistence.sqlite.checkpoint_repo import ( SQLiteCheckpointRepository, ) +from synthorg.persistence.sqlite.flight_recorder_repo import ( + SQLiteFlightRecorderFrameRepository, +) from synthorg.persistence.sqlite.heartbeat_repo import ( SQLiteHeartbeatRepository, ) @@ -24,6 +27,7 @@ "SQLiteAuditRepository", "SQLiteCheckpointRepository", "SQLiteCostRecordRepository", + "SQLiteFlightRecorderFrameRepository", "SQLiteHeartbeatRepository", "SQLiteMessageRepository", "SQLitePersistenceBackend", diff --git a/src/synthorg/persistence/sqlite/_backend_accessors.py b/src/synthorg/persistence/sqlite/_backend_accessors.py index bde661bdc3..cb135af29d 100644 --- a/src/synthorg/persistence/sqlite/_backend_accessors.py +++ b/src/synthorg/persistence/sqlite/_backend_accessors.py @@ -60,6 +60,9 @@ FineTuneCheckpointRepository, FineTuneRunRepository, ) + from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrameRepository, + ) from synthorg.persistence.idempotency_protocol import IdempotencyRepository from synthorg.persistence.knowledge_protocol import ( ChunkProvenanceRepository, @@ -147,6 +150,7 @@ class _BackendRepositoryAccessors: _users: UserRepository | None _api_keys: ApiKeyRepository | None _checkpoints: CheckpointRepository | None + _flight_recorder_frames: FlightRecorderFrameRepository | None _heartbeats: HeartbeatRepository | None _agent_states: AgentStateRepository | None _settings: SettingsRepository | None @@ -293,6 +297,14 @@ def checkpoints(self) -> CheckpointRepository: """Repository for Checkpoint persistence.""" return self._require_connected(self._checkpoints, "checkpoints") + @property + def flight_recorder_frames(self) -> FlightRecorderFrameRepository: + """Repository for flight-recorder frame persistence.""" + return self._require_connected( + self._flight_recorder_frames, + "flight_recorder_frames", + ) + @property def heartbeats(self) -> HeartbeatRepository: """Repository for Heartbeat persistence.""" diff --git a/src/synthorg/persistence/sqlite/backend.py b/src/synthorg/persistence/sqlite/backend.py index 454cd8a464..8a0b2d85f0 100644 --- a/src/synthorg/persistence/sqlite/backend.py +++ b/src/synthorg/persistence/sqlite/backend.py @@ -70,6 +70,9 @@ SQLiteFineTuneCheckpointRepository, SQLiteFineTuneRunRepository, ) +from synthorg.persistence.sqlite.flight_recorder_repo import ( + SQLiteFlightRecorderFrameRepository, +) from synthorg.persistence.sqlite.heartbeat_repo import ( SQLiteHeartbeatRepository, ) @@ -237,6 +240,7 @@ def __init__(self, config: SQLiteConfig) -> None: # noqa: PLR0915 -- repo regis self._users: SQLiteUserRepository | None = None self._api_keys: SQLiteApiKeyRepository | None = None self._checkpoints: SQLiteCheckpointRepository | None = None + self._flight_recorder_frames: SQLiteFlightRecorderFrameRepository | None = None self._heartbeats: SQLiteHeartbeatRepository | None = None self._agent_states: SQLiteAgentStateRepository | None = None self._settings: SQLiteSettingsRepository | None = None @@ -311,6 +315,7 @@ def _clear_state(self) -> None: # noqa: PLR0915 -- repo registry reset intentio self._users = None self._api_keys = None self._checkpoints = None + self._flight_recorder_frames = None self._heartbeats = None self._agent_states = None self._settings = None @@ -529,6 +534,10 @@ def _create_repositories(self) -> None: # noqa: PLR0915 self._db, write_context=self.write_context, ) + self._flight_recorder_frames = SQLiteFlightRecorderFrameRepository( + self._db, + write_context=self.write_context, + ) self._heartbeats = SQLiteHeartbeatRepository( self._db, write_context=self.write_context, diff --git a/src/synthorg/persistence/sqlite/flight_recorder_repo.py b/src/synthorg/persistence/sqlite/flight_recorder_repo.py new file mode 100644 index 0000000000..06c29967ca --- /dev/null +++ b/src/synthorg/persistence/sqlite/flight_recorder_repo.py @@ -0,0 +1,206 @@ +"""SQLite repository implementation for flight-recorder frames.""" +# ruff: noqa: S608 -- dynamic WHERE built from hardcoded column names only + +import contextlib +import json +import sqlite3 +from typing import TYPE_CHECKING + +import aiosqlite +from pydantic import ValidationError + +from synthorg.core.persistence_errors import DuplicateRecordError, QueryError +from synthorg.observability import get_logger, safe_error_description +from synthorg.observability.events.persistence import ( + PERSISTENCE_FLIGHT_RECORDER_DELETE_FAILED, + PERSISTENCE_FLIGHT_RECORDER_DESERIALIZE_FAILED, + PERSISTENCE_FLIGHT_RECORDER_QUERY_FAILED, + PERSISTENCE_FLIGHT_RECORDER_SAVE_FAILED, +) +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE +from synthorg.persistence._shared import normalize_utc +from synthorg.persistence._shared.datetime_marshaller import format_iso_utc +from synthorg.persistence._shared.pagination import validate_pagination_args +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrame, + FlightRecorderFrameFilterSpec, +) +from synthorg.persistence.sqlite._shared import ( + WriteContext, + is_unique_constraint_error, +) + +if TYPE_CHECKING: + from datetime import datetime + +logger = get_logger(__name__) + +_COLUMNS = ( + "id, execution_id, task_id, agent_id, turn_index, timestamp, " + "prompt_summary, response_summary, decision, tool_calls, " + "input_tokens, output_tokens, cost, status, intervention_kind" +) + + +class SQLiteFlightRecorderFrameRepository: + """SQLite implementation of the ``FlightRecorderFrameRepository`` protocol. + + Args: + db: An open aiosqlite connection. + write_context: Async context manager that serializes writes on + the shared connection. + """ + + def __init__( + self, + db: aiosqlite.Connection, + *, + write_context: WriteContext, + ) -> None: + self._db = db + self._write_context = write_context + + async def append(self, frame: FlightRecorderFrame) -> None: + """Persist one frame (append-only; a duplicate id is a violation).""" + async with self._write_context(): + try: + data = self._to_row(frame) + await self._db.execute( + f"""\ +INSERT INTO flight_recorder_frames ({_COLUMNS}) VALUES ( + :id, :execution_id, :task_id, :agent_id, :turn_index, :timestamp, + :prompt_summary, :response_summary, :decision, :tool_calls, + :input_tokens, :output_tokens, :cost, :status, :intervention_kind +)""", + data, + ) + await self._db.commit() + except (sqlite3.Error, aiosqlite.Error) as exc: + with contextlib.suppress(sqlite3.Error, aiosqlite.Error): + await self._db.rollback() + if is_unique_constraint_error(exc): + msg = f"Flight recorder frame {frame.id!r} already exists" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_SAVE_FAILED, + frame_id=frame.id, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise DuplicateRecordError(msg) from exc + msg = f"Failed to save flight recorder frame {frame.id!r}" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_SAVE_FAILED, + frame_id=frame.id, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc + + async def query( + self, + filter_spec: FlightRecorderFrameFilterSpec, + *, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> tuple[FlightRecorderFrame, ...]: + """Return frames matching the filter, newest-first by turn index.""" + limit = validate_pagination_args( + limit, offset, event=PERSISTENCE_FLIGHT_RECORDER_QUERY_FAILED + ) + conditions: list[str] = [] + params: list[object] = [] + if filter_spec.execution_id is not None: + conditions.append("execution_id = ?") + params.append(filter_spec.execution_id) + if filter_spec.task_id is not None: + conditions.append("task_id = ?") + params.append(filter_spec.task_id) + if filter_spec.agent_id is not None: + conditions.append("agent_id = ?") + params.append(filter_spec.agent_id) + if filter_spec.turn_index_min is not None: + conditions.append("turn_index >= ?") + params.append(filter_spec.turn_index_min) + if filter_spec.turn_index_max is not None: + conditions.append("turn_index <= ?") + params.append(filter_spec.turn_index_max) + where = " AND ".join(conditions) if conditions else "1=1" + sql = ( + f"SELECT {_COLUMNS} FROM flight_recorder_frames WHERE {where} " + "ORDER BY turn_index DESC, timestamp DESC LIMIT ? OFFSET ?" + ) + params.extend([limit, offset]) + try: + cursor = await self._db.execute(sql, params) + rows = await cursor.fetchall() + except (sqlite3.Error, aiosqlite.Error) as exc: + msg = "Failed to query flight recorder frames" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_QUERY_FAILED, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc + return tuple(self._row_to_model(dict(r)) for r in rows) + + async def purge_before(self, threshold: datetime) -> int: + """Delete frames with ``timestamp < threshold``. + + ``threshold`` must be timezone-aware; a naive value would make + the cut-off ambiguous against UTC-formatted stored timestamps. + """ + if threshold.tzinfo is None: + msg = f"threshold must be timezone-aware, got naive {threshold!r}" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_DELETE_FAILED, + error="naive_threshold", + error_type="ValueError", + ) + raise QueryError(msg) + async with self._write_context(): + try: + cursor = await self._db.execute( + "DELETE FROM flight_recorder_frames WHERE timestamp < ?", + (format_iso_utc(threshold),), + ) + count = cursor.rowcount + await self._db.commit() + except (sqlite3.Error, aiosqlite.Error) as exc: + with contextlib.suppress(sqlite3.Error, aiosqlite.Error): + await self._db.rollback() + msg = "Failed to purge flight recorder frames by threshold" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_DELETE_FAILED, + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc + return count + + def _to_row(self, frame: FlightRecorderFrame) -> dict[str, object]: + """Flatten a frame into a row dict (tool_calls JSON-encoded).""" + data = frame.model_dump(mode="json") + data["tool_calls"] = json.dumps(list(frame.tool_calls)) + data["timestamp"] = format_iso_utc(normalize_utc(frame.timestamp)) + return data + + def _row_to_model(self, row: dict[str, object]) -> FlightRecorderFrame: + """Convert a database row to a ``FlightRecorderFrame`` model. + + Raises: + QueryError: If the row cannot be deserialized. + """ + try: + raw_tool_calls = row.get("tool_calls") + if isinstance(raw_tool_calls, str): + row["tool_calls"] = tuple(json.loads(raw_tool_calls)) + return FlightRecorderFrame.model_validate(row) + except (ValidationError, json.JSONDecodeError) as exc: + msg = f"Failed to deserialize flight recorder frame {row.get('id')!r}" + logger.warning( + PERSISTENCE_FLIGHT_RECORDER_DESERIALIZE_FAILED, + frame_id=row.get("id"), + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + raise QueryError(msg) from exc diff --git a/src/synthorg/persistence/sqlite/revisions/20260522000002_flight_recorder.sql b/src/synthorg/persistence/sqlite/revisions/20260522000002_flight_recorder.sql new file mode 100644 index 0000000000..fbef52a617 --- /dev/null +++ b/src/synthorg/persistence/sqlite/revisions/20260522000002_flight_recorder.sql @@ -0,0 +1,24 @@ +-- Flight-recorder frames: per-turn cockpit replay records (append-only). +CREATE TABLE flight_recorder_frames ( + id TEXT NOT NULL PRIMARY KEY, + execution_id TEXT NOT NULL, + task_id TEXT, + agent_id TEXT NOT NULL, + turn_index INTEGER NOT NULL CHECK (turn_index >= 1), + timestamp TEXT NOT NULL, + prompt_summary TEXT, + response_summary TEXT, + decision TEXT, + tool_calls TEXT NOT NULL DEFAULT '[]', + input_tokens INTEGER NOT NULL DEFAULT 0 CHECK (input_tokens >= 0), + output_tokens INTEGER NOT NULL DEFAULT 0 CHECK (output_tokens >= 0), + cost REAL NOT NULL DEFAULT 0.0 CHECK (cost >= 0), + status TEXT NOT NULL, + intervention_kind TEXT +); + +CREATE INDEX idx_frf_execution_turn + ON flight_recorder_frames(execution_id, turn_index); +CREATE INDEX idx_frf_task_id ON flight_recorder_frames(task_id); +CREATE INDEX idx_frf_agent_id ON flight_recorder_frames(agent_id); +CREATE INDEX idx_frf_timestamp ON flight_recorder_frames(timestamp); diff --git a/src/synthorg/settings/definitions/__init__.py b/src/synthorg/settings/definitions/__init__.py index f01527fdbe..caf0d184a4 100644 --- a/src/synthorg/settings/definitions/__init__.py +++ b/src/synthorg/settings/definitions/__init__.py @@ -10,6 +10,7 @@ backup, budget, client, + cockpit, communication, company, coordination, @@ -38,6 +39,7 @@ "backup", "budget", "client", + "cockpit", "communication", "company", "coordination", diff --git a/src/synthorg/settings/definitions/cockpit.py b/src/synthorg/settings/definitions/cockpit.py new file mode 100644 index 0000000000..ea1ab6373a --- /dev/null +++ b/src/synthorg/settings/definitions/cockpit.py @@ -0,0 +1,154 @@ +"""Cockpit namespace setting definitions. + +Knobs for the mission-control cockpit: flight-recorder capture and +retention, live-activity stuck/runaway heuristics, and the pluggable +recorder-sink and steering-directive strategies. Consumers live in +``src/synthorg/engine/flight_recording/``, ``src/synthorg/engine/cockpit/``, +``src/synthorg/engine/intervention/`` and ``api/controllers/cockpit.py``. +""" + +from synthorg.settings.enums import SettingLevel, SettingNamespace, SettingType +from synthorg.settings.models import SettingDefinition +from synthorg.settings.registry import get_registry + +_r = get_registry() + +_r.register( + SettingDefinition( + namespace=SettingNamespace.COCKPIT, + key="flight_recorder_enabled", + type=SettingType.BOOLEAN, + default="true", + description=( + "Capture a flight-recorder frame after each agent turn for" + " step-by-step cockpit replay. Disabling stops new frames" + " being recorded; existing frames remain queryable." + ), + group="Flight Recorder", + level=SettingLevel.BASIC, + ) +) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.COCKPIT, + key="flight_recorder_retention_days", + type=SettingType.INTEGER, + default="90", + description=( + "Retain flight-recorder frames for this many days; the daily" + " purge loop removes frames older than the cut-off." + ), + group="Flight Recorder", + level=SettingLevel.ADVANCED, + min_value=1, + max_value=3650, + ) +) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.COCKPIT, + key="flight_recorder_summary_max_chars", + type=SettingType.INTEGER, + default="2000", + description=( + "Maximum length of the redacted prompt/response summaries" + " stored on each frame; longer content is truncated." + ), + group="Flight Recorder", + level=SettingLevel.ADVANCED, + min_value=100, + max_value=20000, + ) +) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.COCKPIT, + key="flight_recorder_sink_strategy", + type=SettingType.ENUM, + default="persistence", + enum_values=("persistence", "noop"), + description=( + "Recorder sink implementation selected at boot. 'persistence'" + " appends frames to the connected backend; 'noop' discards" + " them (frames are not recorded)." + ), + group="Flight Recorder", + level=SettingLevel.ADVANCED, + read_only_post_init=True, + restart_required=True, + ) +) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.COCKPIT, + key="stuck_idle_threshold_minutes", + type=SettingType.FLOAT, + default="10.0", + description=( + "An in-progress or blocked agent idle for longer than this is" + " flagged as stuck in the live activity snapshot." + ), + group="Live Activity", + level=SettingLevel.BASIC, + min_value=1.0, + max_value=1440.0, + ) +) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.COCKPIT, + key="runaway_cost_threshold_percent", + type=SettingType.FLOAT, + default="150.0", + description=( + "An agent whose accumulated cost exceeds this percentage of" + " the approved forecast ceiling is flagged as runaway." + ), + group="Live Activity", + level=SettingLevel.BASIC, + min_value=100.0, + max_value=1000.0, + ) +) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.COCKPIT, + key="snapshot_interval_seconds", + type=SettingType.FLOAT, + default="5.0", + description=( + "Cadence at which the cockpit publishes a live activity" + " snapshot delta on the WebSocket cockpit channel." + ), + group="Live Activity", + level=SettingLevel.ADVANCED, + min_value=1.0, + max_value=300.0, + ) +) + +_r.register( + SettingDefinition( + namespace=SettingNamespace.COCKPIT, + key="steering_directive_strategy", + type=SettingType.ENUM, + default="safe_default", + enum_values=("safe_default",), + description=( + "Steering directive implementation selected at boot. The" + " 'safe_default' strategy applies hint and redirect" + " interventions as an INFO_REQUEST interrupt at the next safe" + " turn boundary." + ), + group="Intervention", + level=SettingLevel.ADVANCED, + read_only_post_init=True, + restart_required=True, + ) +) diff --git a/src/synthorg/settings/enums.py b/src/synthorg/settings/enums.py index 2bcdd91f2d..7061c38e7b 100644 --- a/src/synthorg/settings/enums.py +++ b/src/synthorg/settings/enums.py @@ -35,6 +35,7 @@ class SettingNamespace(StrEnum): TELEMETRY = "telemetry" EXTERNAL_API = "external_api" RESEARCH = "research" + COCKPIT = "cockpit" class SettingType(StrEnum): diff --git a/src/synthorg/workers/runtime_builder.py b/src/synthorg/workers/runtime_builder.py index 0c04d7ef44..03c537bc9e 100644 --- a/src/synthorg/workers/runtime_builder.py +++ b/src/synthorg/workers/runtime_builder.py @@ -31,6 +31,10 @@ from synthorg.core.enums import ToolCategory from synthorg.engine.agent_engine import AgentEngine from synthorg.engine.coordination.factory import build_coordinator +from synthorg.engine.flight_recording import ( + FlightRecorderSink, + build_flight_recorder_sink, +) from synthorg.engine.mcp_self_consumer import build_mcp_self_consumer from synthorg.engine.pipeline.factory import build_work_pipeline from synthorg.engine.routing.scorer import AgentTaskScorer, RoutingScorerConfig @@ -87,6 +91,9 @@ _GIT_TIMEOUT_KEY: str = "git_command_timeout_seconds" _DECOMPOSITION_NS: str = "coordination" _DECOMPOSITION_KEY: str = "decomposition_model" +_COCKPIT_NS: str = SettingNamespace.COCKPIT.value +_FR_ENABLED_KEY: str = "flight_recorder_enabled" +_FR_STRATEGY_KEY: str = "flight_recorder_sink_strategy" _ROUTING_POLICY_KEY: str = "routing_policy" _LEAF_THRESHOLD_KEY: str = "leaf_subtask_threshold" _BASELINE_WINDOW_KEY: str = "baseline_window_size" @@ -386,6 +393,30 @@ def _build_stakes_router_or_none( ) +async def _build_flight_recorder_sink(app_state: AppState) -> FlightRecorderSink: + """Resolve the cockpit flight-recorder sink for the boot engine. + + Reads the cockpit ``flight_recorder_enabled`` flag and the + ``flight_recorder_sink_strategy`` discriminator via the async + resolver (DB > env > default), and supplies the persistence-backed + frame repository only when persistence is connected. Without + persistence the factory degrades to the no-op sink, so a + persistence-less dev boot records nothing instead of crashing. + """ + repository = ( + app_state.persistence.flight_recorder_frames + if app_state.has_persistence + else None + ) + enabled = await app_state.config_resolver.get_bool(_COCKPIT_NS, _FR_ENABLED_KEY) + strategy = await app_state.config_resolver.get_str(_COCKPIT_NS, _FR_STRATEGY_KEY) + return build_flight_recorder_sink( + repository, + enabled=enabled, + strategy=strategy, + ) + + def _construct_agent_engine( # noqa: PLR0913 -- boot collaborators threaded in app_state: AppState, provider: CompletionProvider, @@ -395,6 +426,7 @@ def _construct_agent_engine( # noqa: PLR0913 -- boot collaborators threaded in external_api_runtime: ExternalApiRuntime | None = None, *, active_provider_name: str, + flight_recorder_sink: FlightRecorderSink | None = None, ) -> AgentEngine: """Assemble the boot ``AgentEngine`` from live application state. @@ -434,6 +466,7 @@ def _construct_agent_engine( # noqa: PLR0913 -- boot collaborators threaded in event_stream_hub=app_state.event_stream_hub, interrupt_store=app_state.interrupt_store, external_api_runtime=external_api_runtime, + flight_recorder_sink=flight_recorder_sink, clock=app_state.clock, ) @@ -694,6 +727,7 @@ async def build_runtime_services( ) coordination_metrics_collector = _construct_coordination_collector(app_state) external_api_runtime = await _build_external_api_runtime(app_state) + flight_recorder_sink = await _build_flight_recorder_sink(app_state) engine = _construct_agent_engine( app_state, provider, @@ -702,6 +736,7 @@ async def build_runtime_services( coordination_metrics_collector, external_api_runtime, active_provider_name=names[0], + flight_recorder_sink=flight_recorder_sink, ) autonomy_resolver = AutonomyResolver( registry=ActionTypeRegistry(), diff --git a/tests/conformance/persistence/test_flight_recorder_repository.py b/tests/conformance/persistence/test_flight_recorder_repository.py new file mode 100644 index 0000000000..68250537ef --- /dev/null +++ b/tests/conformance/persistence/test_flight_recorder_repository.py @@ -0,0 +1,162 @@ +"""Conformance tests for ``FlightRecorderFrameRepository`` (SQLite + Postgres).""" + +from datetime import UTC, datetime, timedelta + +import pytest + +from synthorg.core.enums import InterventionKind, TaskStatus +from synthorg.core.persistence_errors import DuplicateRecordError +from synthorg.core.types import NotBlankStr +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrame, + FlightRecorderFrameFilterSpec, +) +from synthorg.persistence.protocol import PersistenceBackend + +pytestmark = pytest.mark.integration + + +def _frame( # noqa: PLR0913 -- test fixture builder with keyword-only overrides + *, + frame_id: str = "frm-001", + execution_id: str = "exec-001", + task_id: str | None = "task-001", + agent_id: str = "agent-001", + turn_index: int = 1, + status: TaskStatus = TaskStatus.IN_PROGRESS, + intervention_kind: InterventionKind | None = None, + timestamp: datetime | None = None, +) -> FlightRecorderFrame: + return FlightRecorderFrame( + id=NotBlankStr(frame_id), + execution_id=NotBlankStr(execution_id), + task_id=NotBlankStr(task_id) if task_id is not None else None, + agent_id=NotBlankStr(agent_id), + turn_index=turn_index, + timestamp=timestamp or datetime.now(UTC), + prompt_summary="redacted prompt", + response_summary="redacted response", + decision="tool_call", + tool_calls=("search", "write_file"), + input_tokens=120, + output_tokens=45, + cost=0.0021, + status=status, + intervention_kind=intervention_kind, + ) + + +class TestFlightRecorderFrameRepository: + async def test_append_and_query(self, backend: PersistenceBackend) -> None: + await backend.flight_recorder_frames.append(_frame()) + + page = await backend.flight_recorder_frames.query( + FlightRecorderFrameFilterSpec(execution_id=NotBlankStr("exec-001")), + ) + assert len(page) == 1 + frame = page[0] + assert frame.id == "frm-001" + assert frame.tool_calls == ("search", "write_file") + assert frame.cost == pytest.approx(0.0021) + assert frame.status is TaskStatus.IN_PROGRESS + + async def test_append_duplicate_id_raises( + self, backend: PersistenceBackend + ) -> None: + await backend.flight_recorder_frames.append(_frame(frame_id="dup")) + with pytest.raises(DuplicateRecordError): + await backend.flight_recorder_frames.append( + _frame(frame_id="dup", turn_index=2), + ) + + async def test_query_newest_first_by_turn( + self, backend: PersistenceBackend + ) -> None: + await backend.flight_recorder_frames.append(_frame(frame_id="a", turn_index=1)) + await backend.flight_recorder_frames.append(_frame(frame_id="c", turn_index=3)) + await backend.flight_recorder_frames.append(_frame(frame_id="b", turn_index=2)) + + page = await backend.flight_recorder_frames.query( + FlightRecorderFrameFilterSpec(execution_id=NotBlankStr("exec-001")), + ) + assert [f.turn_index for f in page] == [3, 2, 1] + + async def test_query_paginates(self, backend: PersistenceBackend) -> None: + for turn in range(1, 6): + await backend.flight_recorder_frames.append( + _frame(frame_id=f"f-{turn}", turn_index=turn), + ) + page = await backend.flight_recorder_frames.query( + FlightRecorderFrameFilterSpec(execution_id=NotBlankStr("exec-001")), + limit=2, + offset=0, + ) + assert [f.turn_index for f in page] == [5, 4] + + async def test_query_filters_by_turn_range( + self, backend: PersistenceBackend + ) -> None: + for turn in range(1, 6): + await backend.flight_recorder_frames.append( + _frame(frame_id=f"f-{turn}", turn_index=turn), + ) + page = await backend.flight_recorder_frames.query( + FlightRecorderFrameFilterSpec( + execution_id=NotBlankStr("exec-001"), + turn_index_min=2, + turn_index_max=4, + ), + ) + assert [f.turn_index for f in page] == [4, 3, 2] + + async def test_query_filters_by_agent(self, backend: PersistenceBackend) -> None: + await backend.flight_recorder_frames.append( + _frame(frame_id="a", agent_id="alice", turn_index=1), + ) + await backend.flight_recorder_frames.append( + _frame(frame_id="b", agent_id="bob", turn_index=2), + ) + page = await backend.flight_recorder_frames.query( + FlightRecorderFrameFilterSpec(agent_id=NotBlankStr("bob")), + ) + assert len(page) == 1 + assert page[0].agent_id == "bob" + + async def test_intervention_kind_round_trips( + self, backend: PersistenceBackend + ) -> None: + await backend.flight_recorder_frames.append( + _frame(frame_id="hint", intervention_kind=InterventionKind.HINT), + ) + page = await backend.flight_recorder_frames.query( + FlightRecorderFrameFilterSpec(execution_id=NotBlankStr("exec-001")), + ) + assert page[0].intervention_kind is InterventionKind.HINT + + async def test_null_task_id_round_trips(self, backend: PersistenceBackend) -> None: + await backend.flight_recorder_frames.append( + _frame(frame_id="no-task", task_id=None), + ) + page = await backend.flight_recorder_frames.query( + FlightRecorderFrameFilterSpec(execution_id=NotBlankStr("exec-001")), + ) + assert page[0].task_id is None + + async def test_purge_before_removes_old_frames( + self, backend: PersistenceBackend + ) -> None: + now = datetime.now(UTC) + await backend.flight_recorder_frames.append( + _frame(frame_id="old", turn_index=1, timestamp=now - timedelta(days=30)), + ) + await backend.flight_recorder_frames.append( + _frame(frame_id="new", turn_index=2, timestamp=now), + ) + removed = await backend.flight_recorder_frames.purge_before( + now - timedelta(days=1), + ) + assert removed == 1 + page = await backend.flight_recorder_frames.query( + FlightRecorderFrameFilterSpec(execution_id=NotBlankStr("exec-001")), + ) + assert [f.id for f in page] == ["new"] diff --git a/tests/unit/api/fakes.py b/tests/unit/api/fakes.py index 35061438ae..f43fe0539e 100644 --- a/tests/unit/api/fakes.py +++ b/tests/unit/api/fakes.py @@ -32,6 +32,10 @@ CollaborationMetricRecord, TaskMetricRecord, ) +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrame, + FlightRecorderFrameFilterSpec, +) from synthorg.persistence.preset_protocol import Preset from synthorg.security.models import AuditEntry, AuditVerdictStr from synthorg.security.timeout.parked_context import ParkedContext @@ -510,6 +514,53 @@ async def delete_by_execution(self, execution_id: str) -> int: return len(to_delete) +class FakeFlightRecorderFrameRepository: + """In-memory flight-recorder frame repository for tests.""" + + def __init__(self) -> None: + self._frames: dict[str, FlightRecorderFrame] = {} + + async def append(self, frame: FlightRecorderFrame) -> None: + if frame.id in self._frames: + msg = f"Flight recorder frame {frame.id!r} already exists" + raise DuplicateRecordError(msg) + self._frames[frame.id] = frame + + async def query( + self, + filter_spec: FlightRecorderFrameFilterSpec, + *, + limit: int = 100, # lint-allow: magic-numbers -- ADR-0001 + offset: int = 0, + ) -> tuple[FlightRecorderFrame, ...]: + candidates = list(self._frames.values()) + if filter_spec.execution_id is not None: + candidates = [ + f for f in candidates if f.execution_id == filter_spec.execution_id + ] + if filter_spec.task_id is not None: + candidates = [f for f in candidates if f.task_id == filter_spec.task_id] + if filter_spec.agent_id is not None: + candidates = [f for f in candidates if f.agent_id == filter_spec.agent_id] + if filter_spec.turn_index_min is not None: + candidates = [ + f for f in candidates if f.turn_index >= filter_spec.turn_index_min + ] + if filter_spec.turn_index_max is not None: + candidates = [ + f for f in candidates if f.turn_index <= filter_spec.turn_index_max + ] + candidates.sort(key=lambda f: (f.turn_index, f.timestamp), reverse=True) + return tuple(candidates[offset : offset + limit]) + + async def purge_before(self, threshold: datetime) -> int: + before = len(self._frames) + self._frames = { + k: v for k, v in self._frames.items() if v.timestamp >= threshold + } + return before - len(self._frames) + + class FakeHeartbeatRepository: """In-memory heartbeat repository for tests.""" diff --git a/tests/unit/api/fakes_backend.py b/tests/unit/api/fakes_backend.py index bf02d3b466..8975195c7d 100644 --- a/tests/unit/api/fakes_backend.py +++ b/tests/unit/api/fakes_backend.py @@ -34,6 +34,7 @@ FakeCostRecordRepository, FakeDecisionRepository, FakeDocsRepository, + FakeFlightRecorderFrameRepository, FakeHeartbeatRepository, FakeLifecycleEventRepository, FakeMessageRepository, @@ -637,6 +638,7 @@ def __init__(self) -> None: # noqa: PLR0915 -- one assignment per repo; splitti self._users = FakeUserRepository() self._api_keys = FakeApiKeyRepository() self._checkpoints = FakeCheckpointRepository() + self._flight_recorder_frames = FakeFlightRecorderFrameRepository() self._heartbeats = FakeHeartbeatRepository() self._agent_states = FakeAgentStateRepository() self._settings_repo = FakeSettingsRepository() @@ -802,6 +804,10 @@ def api_keys(self) -> FakeApiKeyRepository: def checkpoints(self) -> FakeCheckpointRepository: return self._checkpoints + @property + def flight_recorder_frames(self) -> FakeFlightRecorderFrameRepository: + return self._flight_recorder_frames + @property def heartbeats(self) -> FakeHeartbeatRepository: return self._heartbeats diff --git a/tests/unit/engine/cockpit/test_service.py b/tests/unit/engine/cockpit/test_service.py new file mode 100644 index 0000000000..c9f0311951 --- /dev/null +++ b/tests/unit/engine/cockpit/test_service.py @@ -0,0 +1,130 @@ +"""Unit tests for the cockpit live-activity service.""" + +from datetime import UTC, datetime, timedelta +from unittest.mock import AsyncMock + +import pytest +from tests._shared import FakeClock, mock_of +from tests.unit.api.fakes import FakeFlightRecorderFrameRepository + +from synthorg.core.enums import TaskStatus +from synthorg.core.task import Task +from synthorg.core.types import NotBlankStr +from synthorg.engine.cockpit import CockpitService +from synthorg.engine.task_engine import TaskEngine +from synthorg.persistence.flight_recorder_protocol import FlightRecorderFrame +from synthorg.settings.resolver import ConfigResolver + +pytestmark = pytest.mark.unit + +_NOW = datetime(2026, 5, 22, 12, 0, tzinfo=UTC) + + +def _task(base: Task, *, task_id: str, agent: str, budget: float) -> Task: + return base.model_copy( + update={ + "id": NotBlankStr(task_id), + "status": TaskStatus.IN_PROGRESS, + "assigned_to": NotBlankStr(agent), + "budget_limit": budget, + }, + ) + + +def _frame( + *, + task_id: str, + turn: int, + cost: float, + ts: datetime, +) -> FlightRecorderFrame: + return FlightRecorderFrame( + id=NotBlankStr(f"{task_id}-{turn}"), + execution_id=NotBlankStr(f"exec-{task_id}"), + task_id=NotBlankStr(task_id), + agent_id=NotBlankStr("agent"), + turn_index=turn, + timestamp=ts, + cost=cost, + status=TaskStatus.IN_PROGRESS, + ) + + +def _service( + tasks: tuple[Task, ...], + repo: FakeFlightRecorderFrameRepository, + *, + stuck_minutes: float = 10.0, + runaway_pct: float = 150.0, +) -> CockpitService: + task_engine = mock_of[TaskEngine]( + list_tasks=AsyncMock(side_effect=[(tasks, len(tasks)), ((), 0)]), + ) + resolver = mock_of[ConfigResolver]( + get_float=AsyncMock(side_effect=[stuck_minutes, runaway_pct]), + ) + return CockpitService( + task_engine, + repo, + config_resolver=resolver, + clock=FakeClock(start=_NOW), + ) + + +class TestCockpitService: + async def test_snapshot_lists_active_work( + self, sample_task_with_criteria: Task + ) -> None: + repo = FakeFlightRecorderFrameRepository() + await repo.append( + _frame(task_id="t1", turn=2, cost=0.4, ts=_NOW - timedelta(minutes=1)), + ) + task = _task(sample_task_with_criteria, task_id="t1", agent="alice", budget=0.0) + service = _service((task,), repo) + + snapshot = await service.get_live_snapshot() + assert snapshot.active_count == 1 + activity = snapshot.agents[0] + assert activity.agent_id == "alice" + assert activity.turn_count == 2 + assert activity.cost == pytest.approx(0.4) + assert activity.is_stuck is False + assert snapshot.stuck_agents == () + + async def test_idle_task_flagged_stuck( + self, sample_task_with_criteria: Task + ) -> None: + repo = FakeFlightRecorderFrameRepository() + await repo.append( + _frame(task_id="t1", turn=1, cost=0.1, ts=_NOW - timedelta(minutes=30)), + ) + task = _task(sample_task_with_criteria, task_id="t1", agent="bob", budget=0.0) + service = _service((task,), repo, stuck_minutes=10.0) + + snapshot = await service.get_live_snapshot() + assert snapshot.agents[0].is_stuck is True + assert snapshot.stuck_agents == ("bob",) + + async def test_overspend_flagged_runaway( + self, sample_task_with_criteria: Task + ) -> None: + repo = FakeFlightRecorderFrameRepository() + # budget 1.0, runaway at 150% => cost > 1.5 triggers. + await repo.append( + _frame(task_id="t1", turn=1, cost=2.0, ts=_NOW - timedelta(minutes=1)), + ) + task = _task(sample_task_with_criteria, task_id="t1", agent="carol", budget=1.0) + service = _service((task,), repo, runaway_pct=150.0) + + snapshot = await service.get_live_snapshot() + assert snapshot.agents[0].is_runaway is True + assert snapshot.runaway_agents == ("carol",) + + async def test_no_active_work_empty_snapshot(self) -> None: + repo = FakeFlightRecorderFrameRepository() + service = _service((), repo) + + snapshot = await service.get_live_snapshot() + assert snapshot.active_count == 0 + assert snapshot.agents == () + assert snapshot.total_cost == pytest.approx(0.0) diff --git a/tests/unit/engine/flight_recording/test_service.py b/tests/unit/engine/flight_recording/test_service.py new file mode 100644 index 0000000000..685281156c --- /dev/null +++ b/tests/unit/engine/flight_recording/test_service.py @@ -0,0 +1,65 @@ +"""Unit tests for the flight-recorder query + seek service.""" + +from datetime import UTC, datetime + +import pytest +from tests.unit.api.fakes import FakeFlightRecorderFrameRepository + +from synthorg.core.enums import TaskStatus +from synthorg.core.types import NotBlankStr +from synthorg.engine.flight_recording import FlightRecorderService +from synthorg.persistence.flight_recorder_protocol import FlightRecorderFrame + +pytestmark = pytest.mark.unit + + +def _frame( + turn: int, *, execution_id: str = "exec-1", cost: float = 0.5 +) -> FlightRecorderFrame: + return FlightRecorderFrame( + id=NotBlankStr(f"{execution_id}-{turn}"), + execution_id=NotBlankStr(execution_id), + task_id=NotBlankStr("task-1"), + agent_id=NotBlankStr("agent-1"), + turn_index=turn, + timestamp=datetime.now(UTC), + response_summary=f"reply {turn}", + decision="completed", + cost=cost, + status=TaskStatus.IN_PROGRESS, + ) + + +async def _seed(repo: FakeFlightRecorderFrameRepository, turns: int) -> None: + for turn in range(1, turns + 1): + await repo.append(_frame(turn)) + + +class TestFlightRecorderService: + async def test_get_frames_newest_first(self) -> None: + repo = FakeFlightRecorderFrameRepository() + await _seed(repo, 3) + service = FlightRecorderService(repo) + + frames = await service.get_frames("exec-1") + assert [f.turn_index for f in frames] == [3, 2, 1] + + async def test_seek_returns_ascending_prefix(self) -> None: + repo = FakeFlightRecorderFrameRepository() + await _seed(repo, 5) + service = FlightRecorderService(repo) + + view = await service.seek("exec-1", 3) + assert [f.turn_index for f in view.frames] == [1, 2, 3] + assert view.current_frame is not None + assert view.current_frame.turn_index == 3 + assert view.cumulative_cost == pytest.approx(1.5) + + async def test_seek_missing_turn_has_no_current(self) -> None: + repo = FakeFlightRecorderFrameRepository() + await _seed(repo, 2) + service = FlightRecorderService(repo) + + view = await service.seek("exec-1", 5) + assert view.current_frame is None + assert [f.turn_index for f in view.frames] == [1, 2] diff --git a/tests/unit/engine/flight_recording/test_sink.py b/tests/unit/engine/flight_recording/test_sink.py new file mode 100644 index 0000000000..cae4f13dca --- /dev/null +++ b/tests/unit/engine/flight_recording/test_sink.py @@ -0,0 +1,211 @@ +"""Unit tests for the flight-recorder sink and frame builder.""" + +import pytest +from tests.unit.api.fakes import FakeFlightRecorderFrameRepository + +from synthorg.core.agent import AgentIdentity +from synthorg.core.enums import TaskStatus +from synthorg.core.task import Task +from synthorg.engine.context import AgentContext +from synthorg.engine.flight_recording import ( + NoOpFlightRecorderSink, + PersistenceFlightRecorderSink, + build_flight_recorder_sink, + build_frames, +) +from synthorg.engine.loop_protocol import ( + ExecutionResult, + TerminationReason, + TurnRecord, +) +from synthorg.providers.enums import FinishReason, MessageRole +from synthorg.providers.models import ChatMessage + +pytestmark = pytest.mark.unit + + +@pytest.fixture +def agent_context( + sample_agent_with_personality: AgentIdentity, + sample_task_with_criteria: Task, +) -> AgentContext: + return AgentContext.from_identity( + sample_agent_with_personality, + task=sample_task_with_criteria, + ) + + +def _turn(turn_number: int, *, tools: tuple[str, ...] = ()) -> TurnRecord: + return TurnRecord( + turn_number=turn_number, + input_tokens=10, + output_tokens=5, + cost=0.001, + tool_calls_made=tools, + finish_reason=FinishReason.TOOL_USE if tools else FinishReason.STOP, + ) + + +def _result( + identity_context: AgentContext, + *, + turns: tuple[TurnRecord, ...], + reason: TerminationReason = TerminationReason.COMPLETED, +) -> ExecutionResult: + return ExecutionResult( + context=identity_context, + termination_reason=reason, + turns=turns, + ) + + +def _context_with_replies(base: AgentContext, replies: list[str]) -> AgentContext: + messages = tuple( + ChatMessage(role=MessageRole.ASSISTANT, content=text) for text in replies + ) + return base.model_copy(update={"conversation": (*base.conversation, *messages)}) + + +class TestBuildFrames: + def test_one_frame_per_turn_with_content(self, agent_context: AgentContext) -> None: + ctx = _context_with_replies(agent_context, ["first", "done"]) + result = _result( + ctx, + turns=(_turn(1, tools=("search",)), _turn(2)), + ) + + frames = build_frames( + result, + execution_id="exec-1", + agent_id="agent-1", + task_id="task-1", + ) + + assert [f.turn_index for f in frames] == [1, 2] + assert frames[0].decision == "tool_call" + assert frames[0].tool_calls == ("search",) + assert frames[0].response_summary == "first" + assert frames[0].status is TaskStatus.IN_PROGRESS + # Terminal turn carries the run outcome. + assert frames[1].decision == "completed" + assert frames[1].status is TaskStatus.COMPLETED + + def test_failed_run_terminal_status(self, agent_context: AgentContext) -> None: + ctx = _context_with_replies(agent_context, ["boom"]) + result = ExecutionResult( + context=ctx, + termination_reason=TerminationReason.ERROR, + turns=(_turn(1),), + error_message="boom", + ) + + frames = build_frames( + result, + execution_id="exec-1", + agent_id="agent-1", + task_id=None, + ) + assert frames[0].status is TaskStatus.FAILED + assert frames[0].task_id is None + + def test_summary_truncation(self, agent_context: AgentContext) -> None: + ctx = _context_with_replies(agent_context, ["x" * 50]) + result = _result(ctx, turns=(_turn(1),)) + + frames = build_frames( + result, + execution_id="exec-1", + agent_id="agent-1", + task_id="task-1", + summary_max_chars=10, + ) + assert frames[0].response_summary == "x" * 10 + + def test_no_turns_yields_no_frames(self, agent_context: AgentContext) -> None: + result = _result(agent_context, turns=()) + frames = build_frames( + result, + execution_id="exec-1", + agent_id="agent-1", + task_id="task-1", + ) + assert frames == () + + +class TestSinks: + async def test_persistence_sink_appends(self, agent_context: AgentContext) -> None: + repo = FakeFlightRecorderFrameRepository() + sink = PersistenceFlightRecorderSink(repo) + ctx = _context_with_replies(agent_context, ["a", "b"]) + frames = build_frames( + _result(ctx, turns=(_turn(1), _turn(2))), + execution_id="exec-1", + agent_id="agent-1", + task_id="task-1", + ) + + await sink.record_frames(frames) + + from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrameFilterSpec, + ) + + stored = await repo.query( + FlightRecorderFrameFilterSpec(execution_id="exec-1"), + ) + assert len(stored) == 2 + + async def test_persistence_sink_swallows_failure( + self, agent_context: AgentContext + ) -> None: + repo = FakeFlightRecorderFrameRepository() + sink = PersistenceFlightRecorderSink(repo) + ctx = _context_with_replies(agent_context, ["a"]) + frames = build_frames( + _result(ctx, turns=(_turn(1),)), + execution_id="exec-1", + agent_id="agent-1", + task_id="task-1", + ) + # Duplicate ids: the second record_frames call hits DuplicateRecordError + # for every frame, which the sink must swallow rather than raise. + await sink.record_frames(frames) + await sink.record_frames(frames) + + async def test_noop_sink_records_nothing(self, agent_context: AgentContext) -> None: + sink = NoOpFlightRecorderSink() + ctx = _context_with_replies(agent_context, ["a"]) + frames = build_frames( + _result(ctx, turns=(_turn(1),)), + execution_id="exec-1", + agent_id="agent-1", + task_id="task-1", + ) + await sink.record_frames(frames) # no-op, must not raise + + def test_factory_selects_noop_when_disabled(self) -> None: + repo = FakeFlightRecorderFrameRepository() + assert isinstance( + build_flight_recorder_sink(repo, enabled=False), + NoOpFlightRecorderSink, + ) + + def test_factory_selects_noop_for_strategy(self) -> None: + repo = FakeFlightRecorderFrameRepository() + assert isinstance( + build_flight_recorder_sink(repo, strategy="noop"), + NoOpFlightRecorderSink, + ) + + def test_factory_selects_persistence(self) -> None: + repo = FakeFlightRecorderFrameRepository() + assert isinstance( + build_flight_recorder_sink(repo), + PersistenceFlightRecorderSink, + ) + + def test_factory_noop_without_repository(self) -> None: + assert isinstance( + build_flight_recorder_sink(None), + NoOpFlightRecorderSink, + ) diff --git a/tests/unit/engine/intervention/test_steering.py b/tests/unit/engine/intervention/test_steering.py new file mode 100644 index 0000000000..0b1f9c4155 --- /dev/null +++ b/tests/unit/engine/intervention/test_steering.py @@ -0,0 +1,82 @@ +"""Unit tests for the cockpit steering directive.""" + +import pytest +from tests._shared import FakeClock + +from synthorg.communication.event_stream.interrupt import InterruptStore, InterruptType +from synthorg.core.enums import InterventionKind +from synthorg.engine.intervention import ( + SafeDefaultSteeringDirective, + build_steering_directive, +) + +pytestmark = pytest.mark.unit + + +class TestSafeDefaultSteeringDirective: + async def test_hint_queues_info_request_interrupt(self) -> None: + store = InterruptStore() + directive = SafeDefaultSteeringDirective(store, clock=FakeClock()) + + outcome = await directive.steer( + kind=InterventionKind.HINT, + execution_id="exec-1", + agent_id="agent-1", + details={"text": "use Postgres not Mongo"}, + ) + + assert outcome.applied is True + assert outcome.kind is InterventionKind.HINT + assert outcome.artifact_id is not None + pending = await store.get(outcome.artifact_id) + assert pending is not None + assert pending.type is InterruptType.INFO_REQUEST + assert pending.question == "use Postgres not Mongo" + + async def test_redirect_also_queues_interrupt(self) -> None: + store = InterruptStore() + directive = SafeDefaultSteeringDirective(store, clock=FakeClock()) + + outcome = await directive.steer( + kind=InterventionKind.REDIRECT, + execution_id="exec-1", + agent_id="agent-1", + details={"text": "pivot off the frontend"}, + ) + assert outcome.applied is True + assert outcome.artifact_id is not None + + async def test_pause_not_handled(self) -> None: + store = InterruptStore() + directive = SafeDefaultSteeringDirective(store, clock=FakeClock()) + + outcome = await directive.steer( + kind=InterventionKind.PAUSE, + execution_id="exec-1", + agent_id="agent-1", + details={}, + ) + assert outcome.applied is False + assert outcome.artifact_id is None + + async def test_empty_text_not_applied(self) -> None: + store = InterruptStore() + directive = SafeDefaultSteeringDirective(store, clock=FakeClock()) + + outcome = await directive.steer( + kind=InterventionKind.HINT, + execution_id="exec-1", + agent_id="agent-1", + details={"text": " "}, + ) + assert outcome.applied is False + + def test_factory_unknown_strategy_raises(self) -> None: + store = InterruptStore() + with pytest.raises(ValueError, match="Unknown steering directive"): + build_steering_directive(store, strategy="bogus") + + def test_factory_builds_default(self) -> None: + store = InterruptStore() + directive = build_steering_directive(store, clock=FakeClock()) + assert isinstance(directive, SafeDefaultSteeringDirective) diff --git a/web/src/api/types/enum-values.gen.ts b/web/src/api/types/enum-values.gen.ts index f65f53d126..9872672a13 100644 --- a/web/src/api/types/enum-values.gen.ts +++ b/web/src/api/types/enum-values.gen.ts @@ -647,7 +647,7 @@ export const SETTING_NAMESPACE_VALUES = [ 'workers', 'telemetry', 'external_api', - 'research', + 'cockpit', ] as const export type SettingNamespace = (typeof SETTING_NAMESPACE_VALUES)[number] diff --git a/web/src/api/types/openapi.gen.ts b/web/src/api/types/openapi.gen.ts index 0fe96bce64..6184a52704 100644 --- a/web/src/api/types/openapi.gen.ts +++ b/web/src/api/types/openapi.gen.ts @@ -8045,7 +8045,7 @@ export type components = { * 8xxx = internal. * @enum {integer} */ - readonly ErrorCode: 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 2011 | 3000 | 3001 | 3002 | 3003 | 3004 | 3005 | 3006 | 3007 | 3008 | 3009 | 3010 | 3011 | 3012 | 3013 | 3014 | 3015 | 3016 | 3017 | 3018 | 3019 | 3020 | 4000 | 4001 | 4002 | 4003 | 4004 | 4005 | 4006 | 4007 | 4008 | 4009 | 4010 | 4011 | 4012 | 4013 | 4014 | 4015 | 4016 | 4017 | 4018 | 5000 | 5001 | 5002 | 6000 | 6001 | 6002 | 6003 | 6004 | 6005 | 6006 | 6007 | 6008 | 7000 | 7001 | 7002 | 7003 | 7004 | 7005 | 7006 | 7007 | 7008 | 7009 | 7010 | 8000 | 8001 | 8002 | 8003 | 8004 | 8005 | 8006 | 8007 | 8008 | 8009 | 8010 | 8011 | 8012 | 8013 | 8014 | 8015 | 8016 | 8017 | 8018 | 8019 | 8020 | 8021 | 8022 | 8023 | 8024 | 8025 | 8026 | 8027 | 8028 | 8029; + readonly ErrorCode: 1000 | 1001 | 1002 | 1003 | 1004 | 1005 | 1006 | 1007 | 1008 | 1009 | 2000 | 2001 | 2002 | 2003 | 2004 | 2005 | 2006 | 2007 | 2008 | 2009 | 2010 | 3000 | 3001 | 3002 | 3003 | 3004 | 3005 | 3006 | 3007 | 3008 | 3009 | 3010 | 3011 | 3012 | 3013 | 3014 | 3015 | 3016 | 3017 | 3018 | 3019 | 4000 | 4001 | 4002 | 4003 | 4004 | 4005 | 4006 | 4007 | 4008 | 4009 | 4010 | 4011 | 4012 | 4013 | 4014 | 4015 | 4016 | 4017 | 4018 | 5000 | 5001 | 5002 | 6000 | 6001 | 6002 | 6003 | 6004 | 6005 | 6006 | 6007 | 7000 | 7001 | 7002 | 7003 | 7004 | 7005 | 7006 | 7007 | 7008 | 7009 | 7010 | 8000 | 8001 | 8002 | 8003 | 8004 | 8005 | 8006 | 8007 | 8008 | 8009 | 8010 | 8011 | 8012 | 8013 | 8014 | 8015 | 8016 | 8017 | 8018 | 8019 | 8020 | 8021 | 8022 | 8023 | 8024 | 8025; /** ErrorDetail */ readonly ErrorDetail: { readonly detail: string; @@ -11677,7 +11677,7 @@ export type components = { * can be edited at runtime via the settings API. * @enum {string} */ - readonly SettingNamespace: "api" | "client" | "company" | "providers" | "memory" | "budget" | "security" | "coordination" | "observability" | "backup" | "engine" | "communication" | "a2a" | "integrations" | "meta" | "notifications" | "objectives" | "simulations" | "tools" | "settings" | "hr" | "workers" | "telemetry" | "external_api" | "research"; + readonly SettingNamespace: "api" | "client" | "company" | "providers" | "memory" | "budget" | "security" | "coordination" | "observability" | "backup" | "engine" | "communication" | "a2a" | "integrations" | "meta" | "notifications" | "objectives" | "simulations" | "tools" | "settings" | "hr" | "workers" | "telemetry" | "external_api" | "cockpit"; /** * SettingSource * @description Origin of a resolved setting value. From 7fd903f2b4613017ba55f02c46251538129e9462 Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Fri, 22 May 2026 14:43:22 +0200 Subject: [PATCH 02/17] feat: cockpit REST controller, boot wiring, WS channel, manifest Adds /cockpit controller (live snapshot, flight-recorder frames + seek, pause/kill/hint/redirect interventions) registered in BASE_CONTROLLERS and 503-gated via AppState. Wires CockpitService/FlightRecorderService/SteeringDirective at boot via _try_wire_cockpit. Appends CHANNEL_COCKPIT, AppState cockpit seams, ENFORCED ghost-manifest lines, and regenerated TS DTOs. CockpitService takes thresholds at call time (controller resolves settings) to stay free of a wire-time resolver dependency. Controller + service unit tests. --- scripts/_ghost_wiring_manifest.txt | 6 + src/synthorg/api/app.py | 56 +++ src/synthorg/api/channels.py | 2 + src/synthorg/api/controllers/__init__.py | 2 + src/synthorg/api/controllers/cockpit.py | 235 +++++++++ src/synthorg/api/state.py | 68 +++ src/synthorg/engine/cockpit/service.py | 25 +- tests/unit/api/controllers/test_cockpit.py | 160 ++++++ tests/unit/engine/cockpit/test_service.py | 32 +- web/src/api/types/dtos.gen.ts | 13 + web/src/api/types/enum-values.gen.ts | 8 + web/src/api/types/openapi.gen.ts | 539 +++++++++++++++++++++ 12 files changed, 1121 insertions(+), 25 deletions(-) create mode 100644 src/synthorg/api/controllers/cockpit.py create mode 100644 tests/unit/api/controllers/test_cockpit.py diff --git a/scripts/_ghost_wiring_manifest.txt b/scripts/_ghost_wiring_manifest.txt index 89975f9d57..0ed8ae0821 100644 --- a/scripts/_ghost_wiring_manifest.txt +++ b/scripts/_ghost_wiring_manifest.txt @@ -103,3 +103,9 @@ ENFORCED ToolCreationApplier #1995 -- constructed by meta/toolsmith/factory.py:: ENFORCED DynamicToolRegistry #1995 -- constructed by meta/toolsmith/factory.py::build_toolsmith; mutable live authored-tool registry read behind the static surface ENFORCED install_dynamic_tool_layer #1995 -- called by api/app.py::_wire_toolsmith; layers the dynamic registry into the live MCP invoker so authored tools dispatch ENFORCED build_stakes_router #1998 -- called by workers/runtime_builder._build_stakes_router_or_none when a benchmark provider is wired; injected into AgentEngine for stakes-aware tier selection before budget downgrade +ENFORCED CockpitService #1981 -- constructed in api/app.py::_wire_cockpit_services and installed on AppState; serves the mission-control live-activity snapshot (who/what + stuck/runaway) +ENFORCED FlightRecorderService #1981 -- constructed in api/app.py::_wire_cockpit_services; frame-authoritative get/seek behind the flight-recorder cockpit endpoints +ENFORCED build_steering_directive #1981 -- called in api/app.py::_wire_cockpit_services; builds the SafeDefaultSteeringDirective that delivers cockpit hint/redirect as INFO_REQUEST interrupts +ENFORCED build_flight_recorder_sink #1981 -- called in workers/runtime_builder._build_flight_recorder_sink; selects the recorder sink the boot AgentEngine records per-turn frames through +ENFORCED SQLiteFlightRecorderFrameRepository #1981 -- constructed in persistence/sqlite/backend.py::_create_repositories; append-only flight-recorder frame store +ENFORCED PostgresFlightRecorderFrameRepository #1981 -- constructed in persistence/postgres/backend.py on connect; append-only flight-recorder frame store diff --git a/src/synthorg/api/app.py b/src/synthorg/api/app.py index 63a4918d77..0519dbb513 100644 --- a/src/synthorg/api/app.py +++ b/src/synthorg/api/app.py @@ -297,6 +297,61 @@ def _try_wire_cost_dial(app_state: AppState) -> None: ) +def _wire_cockpit_services(app_state: AppState) -> None: + """Construct the mission-control cockpit services from live state. + + Builds the live-activity ``CockpitService``, the flight-recorder + query/seek service, and the steering directive, then installs them + on ``AppState`` for the cockpit controllers and MCP tools. Requires + a connected persistence backend (for the frame store) plus a task + engine and interrupt store. + """ + interrupt_store = app_state.interrupt_store + if ( + not app_state.has_persistence + or not app_state.has_task_engine + or interrupt_store is None + ): + return + from synthorg.engine.cockpit import CockpitService # noqa: PLC0415 + from synthorg.engine.flight_recording import ( # noqa: PLC0415 + FlightRecorderService, + ) + from synthorg.engine.intervention import build_steering_directive # noqa: PLC0415 + + frames = app_state.persistence.flight_recorder_frames + app_state.set_cockpit_services( + cockpit_service=CockpitService( + app_state.task_engine, + frames, + clock=app_state.clock, + ), + flight_recorder_service=FlightRecorderService(frames), + steering_directive=build_steering_directive( + interrupt_store, + clock=app_state.clock, + ), + ) + + +def _try_wire_cockpit(app_state: AppState) -> None: + """Wire the cockpit services best-effort; never poison startup.""" + if not app_state.has_persistence or app_state.has_cockpit_service: + return + try: + _wire_cockpit_services(app_state) + except MemoryError, RecursionError: + raise + except Exception as exc: + logger.warning( + API_APP_STARTUP, + service="cockpit", + note="cockpit wiring failed; controllers will 503", + error_type=type(exc).__name__, + error=safe_error_description(exc), + ) + + def _wire_environment_service(app_state: AppState) -> None: """Wire the per-project reproducible-environment substrate. @@ -1245,6 +1300,7 @@ async def _install_runtime_services() -> None: # tree per project under the workspace base. Persistence-less # boots (test fixtures, dev apps with no DB) skip wiring -- the _try_wire_cost_dial(app_state) + _try_wire_cockpit(app_state) # service is optional and gates on ``has_project_workspace_service``. if app_state.has_persistence and app_state.project_workspace_service is None: diff --git a/src/synthorg/api/channels.py b/src/synthorg/api/channels.py index 802aa8fc55..a63ef27fff 100644 --- a/src/synthorg/api/channels.py +++ b/src/synthorg/api/channels.py @@ -36,6 +36,7 @@ CHANNEL_REVIEWS: Final[str] = "reviews" CHANNEL_EVENTS: Final[str] = "events" CHANNEL_INTERRUPTS: Final[str] = "interrupts" +CHANNEL_COCKPIT: Final[str] = "cockpit" CHANNEL_DISSENT: Final[str] = "#dissent" CHANNEL_WEBHOOKS: Final[str] = "#webhooks" CHANNEL_RATELIMIT: Final[str] = "#ratelimit" @@ -60,6 +61,7 @@ CHANNEL_REVIEWS, CHANNEL_EVENTS, CHANNEL_INTERRUPTS, + CHANNEL_COCKPIT, CHANNEL_DISSENT, CHANNEL_WEBHOOKS, CHANNEL_RATELIMIT, diff --git a/src/synthorg/api/controllers/__init__.py b/src/synthorg/api/controllers/__init__.py index 12d1fdf1bb..d59e54315a 100644 --- a/src/synthorg/api/controllers/__init__.py +++ b/src/synthorg/api/controllers/__init__.py @@ -24,6 +24,7 @@ CeremonyPolicyController, ) from synthorg.api.controllers.clients import ClientController +from synthorg.api.controllers.cockpit import CockpitController from synthorg.api.controllers.collaboration import CollaborationController from synthorg.api.controllers.company import CompanyController from synthorg.api.controllers.company_versions import ( @@ -168,6 +169,7 @@ MetaController, MetaAnalyticsController, CustomRuleController, + CockpitController, ) # Controllers gated by their collaborator service. These do NOT live diff --git a/src/synthorg/api/controllers/cockpit.py b/src/synthorg/api/controllers/cockpit.py new file mode 100644 index 0000000000..dea96cfa38 --- /dev/null +++ b/src/synthorg/api/controllers/cockpit.py @@ -0,0 +1,235 @@ +"""Mission-control cockpit controller: live activity, flight recorder, intervention. + +Live activity and flight-recorder reads are read-access; interventions +require write access. All endpoints 503 (via the ``AppState`` service +properties) until the cockpit services are wired after persistence +connects. Interventions are audit-logged via ``cockpit.intervention.*``. +""" + +from typing import Final + +from litestar import Controller, get, post +from litestar.datastructures import State # noqa: TC002 +from pydantic import BaseModel, ConfigDict, Field + +from synthorg.api.dto import ApiResponse +from synthorg.api.guards import require_read_access, require_write_access +from synthorg.api.path_params import PathId # noqa: TC001 +from synthorg.api.state import AppState # noqa: TC001 +from synthorg.core.enums import InterventionKind, TaskStatus +from synthorg.core.task import Task # noqa: TC001 -- response field type +from synthorg.core.types import NotBlankStr +from synthorg.engine.cockpit import LiveActivitySnapshot # noqa: TC001 +from synthorg.engine.flight_recording import ReplaySeekView # noqa: TC001 +from synthorg.engine.intervention import SteeringOutcome # noqa: TC001 +from synthorg.observability import get_logger +from synthorg.observability.events.cockpit import ( + COCKPIT_INTERVENTION_APPLIED, + COCKPIT_INTERVENTION_INITIATED, +) +from synthorg.persistence._generics import DEFAULT_PAGE_SIZE +from synthorg.persistence.flight_recorder_protocol import ( + FlightRecorderFrame, # noqa: TC001 -- response field type +) + +logger = get_logger(__name__) + +_OPERATOR: Final[str] = "mission-control" +_COCKPIT_NS: Final[str] = "cockpit" + + +class FlightRecorderFramesResponse(BaseModel): + """A page of flight-recorder frames for an execution.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + execution_id: NotBlankStr = Field(description="Execution the frames belong to") + frames: tuple[FlightRecorderFrame, ...] = Field( + default=(), + description="Frames newest-first", + ) + + +class PauseInterventionRequest(BaseModel): + """Pause a running task (transition to INTERRUPTED).""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + task_id: NotBlankStr = Field(description="Task to pause") + reason: NotBlankStr = Field(description="Operator reason for the pause") + + +class KillInterventionRequest(BaseModel): + """Kill a running task (cancel it).""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + task_id: NotBlankStr = Field(description="Task to kill") + reason: NotBlankStr = Field(description="Operator reason for the kill") + + +class SteerInterventionRequest(BaseModel): + """Send a hint or redirect to a running agent.""" + + model_config = ConfigDict(frozen=True, extra="forbid") + + execution_id: NotBlankStr = Field(description="Execution to steer") + agent_id: NotBlankStr = Field(description="Agent to steer") + text: NotBlankStr = Field(description="Operator hint / redirect text") + + +class CockpitController(Controller): + """Live activity, flight-recorder replay, and operator interventions.""" + + path = "/cockpit" + tags = ("cockpit",) + guards = [require_read_access] # noqa: RUF012 + + @get("/snapshot") + async def get_snapshot(self, state: State) -> ApiResponse[LiveActivitySnapshot]: + """Return the live org-activity snapshot.""" + app_state: AppState = state.app_state + resolver = app_state.config_resolver + stuck_idle_minutes = await resolver.get_float( + _COCKPIT_NS, "stuck_idle_threshold_minutes" + ) + runaway_cost_percent = await resolver.get_float( + _COCKPIT_NS, "runaway_cost_threshold_percent" + ) + snapshot = await app_state.cockpit_service.get_live_snapshot( + stuck_idle_minutes=stuck_idle_minutes, + runaway_cost_percent=runaway_cost_percent, + ) + return ApiResponse(data=snapshot) + + @get("/flight-recorder/{execution_id:str}/frames") + async def get_frames( + self, + state: State, + execution_id: PathId, + limit: int = DEFAULT_PAGE_SIZE, + offset: int = 0, + ) -> ApiResponse[FlightRecorderFramesResponse]: + """Return the flight-recorder scrubber timeline (newest-first).""" + app_state: AppState = state.app_state + frames = await app_state.flight_recorder_service.get_frames( + execution_id, + limit=limit, + offset=offset, + ) + return ApiResponse( + data=FlightRecorderFramesResponse( + execution_id=NotBlankStr(execution_id), + frames=frames, + ), + ) + + @get("/flight-recorder/{execution_id:str}/seek/{turn_index:int}") + async def seek_frame( + self, + state: State, + execution_id: PathId, + turn_index: int, + ) -> ApiResponse[ReplaySeekView]: + """Reconstruct scrubber state at a target turn.""" + app_state: AppState = state.app_state + view = await app_state.flight_recorder_service.seek(execution_id, turn_index) + return ApiResponse(data=view) + + @post("/interventions/pause", guards=[require_write_access]) + async def pause( + self, + state: State, + data: PauseInterventionRequest, + ) -> ApiResponse[Task]: + """Pause a running task (transition to INTERRUPTED).""" + app_state: AppState = state.app_state + logger.info( + COCKPIT_INTERVENTION_INITIATED, + intervention_kind=InterventionKind.PAUSE.value, + task_id=data.task_id, + ) + task, _from = await app_state.task_engine.transition_task( + data.task_id, + TaskStatus.INTERRUPTED, + requested_by=_OPERATOR, + reason=data.reason, + ) + logger.info( + COCKPIT_INTERVENTION_APPLIED, + intervention_kind=InterventionKind.PAUSE.value, + task_id=data.task_id, + ) + return ApiResponse(data=task) + + @post("/interventions/kill", guards=[require_write_access]) + async def kill( + self, + state: State, + data: KillInterventionRequest, + ) -> ApiResponse[Task]: + """Kill a running task (cancel it).""" + app_state: AppState = state.app_state + logger.info( + COCKPIT_INTERVENTION_INITIATED, + intervention_kind=InterventionKind.KILL.value, + task_id=data.task_id, + ) + task, _prior = await app_state.task_engine.cancel_task( + data.task_id, + requested_by=_OPERATOR, + reason=data.reason, + ) + logger.info( + COCKPIT_INTERVENTION_APPLIED, + intervention_kind=InterventionKind.KILL.value, + task_id=data.task_id, + ) + return ApiResponse(data=task) + + @post("/interventions/hint", guards=[require_write_access]) + async def hint( + self, + state: State, + data: SteerInterventionRequest, + ) -> ApiResponse[SteeringOutcome]: + """Queue a hint for a running agent.""" + return await self._steer(state, InterventionKind.HINT, data) + + @post("/interventions/redirect", guards=[require_write_access]) + async def redirect( + self, + state: State, + data: SteerInterventionRequest, + ) -> ApiResponse[SteeringOutcome]: + """Queue a redirect for a running agent.""" + return await self._steer(state, InterventionKind.REDIRECT, data) + + async def _steer( + self, + state: State, + kind: InterventionKind, + data: SteerInterventionRequest, + ) -> ApiResponse[SteeringOutcome]: + """Route a hint/redirect through the steering directive.""" + app_state: AppState = state.app_state + logger.info( + COCKPIT_INTERVENTION_INITIATED, + intervention_kind=kind.value, + execution_id=data.execution_id, + agent_id=data.agent_id, + ) + outcome = await app_state.steering_directive.steer( + kind=kind, + execution_id=data.execution_id, + agent_id=data.agent_id, + details={"text": data.text}, + ) + logger.info( + COCKPIT_INTERVENTION_APPLIED, + intervention_kind=kind.value, + execution_id=data.execution_id, + agent_id=data.agent_id, + applied=outcome.applied, + ) + return ApiResponse(data=outcome) diff --git a/src/synthorg/api/state.py b/src/synthorg/api/state.py index 365c2e36c3..984c63e8e4 100644 --- a/src/synthorg/api/state.py +++ b/src/synthorg/api/state.py @@ -68,7 +68,10 @@ from synthorg.core.clock import Clock, SystemClock from synthorg.core.domain_errors import ServiceUnavailableError from synthorg.engine.approval_gate import ApprovalGate # noqa: TC001 +from synthorg.engine.cockpit import CockpitService # noqa: TC001 from synthorg.engine.coordination.service import MultiAgentCoordinator # noqa: TC001 +from synthorg.engine.flight_recording import FlightRecorderService # noqa: TC001 +from synthorg.engine.intervention import SteeringDirective # noqa: TC001 from synthorg.engine.pipeline.entry.protocol import WorkEntryAdapter # noqa: TC001 from synthorg.engine.pipeline.entry.task_board_adapter import ( # noqa: TC001 TaskBoardEntryAdapter, @@ -232,6 +235,7 @@ class AppState(AppStateServicesMixin): "_chief_of_staff_proposer", "_client_facade_service", "_client_simulation_state", + "_cockpit_service", "_company_read_service", "_config_resolver", "_connection_catalog", @@ -263,6 +267,7 @@ class AppState(AppStateServicesMixin): "_events_read_service", "_experiment_service", "_fine_tune_orchestrator", + "_flight_recorder_service", "_health_prober_service", "_idempotency_service", "_intake_entry_adapter", @@ -334,6 +339,7 @@ class AppState(AppStateServicesMixin): "_shutdown_requested", "_signals_service", "_simulation_facade_service", + "_steering_directive", "_subworkflow_service", "_task_board_entry_adapter", "_task_engine", @@ -441,6 +447,9 @@ def __init__( # noqa: PLR0913, PLR0915 self._task_engine = task_engine self._distributed_task_queue: JetStreamTaskQueue | None = None self._distributed_backend_services: DistributedBackendServices | None = None + self._cockpit_service: CockpitService | None = None + self._flight_recorder_service: FlightRecorderService | None = None + self._steering_directive: SteeringDirective | None = None self._coordinator = coordinator self._work_pipeline = work_pipeline self._intake_entry_adapter = intake_entry_adapter @@ -1531,6 +1540,65 @@ def swap_coordinator(self, coordinator: MultiAgentCoordinator) -> None: transition=transition, ) + # ── Mission-control cockpit services ──────────────────────────── + + @property + def cockpit_service(self) -> CockpitService: + """Live-activity cockpit service, or raise 503.""" + return self._require_service(self._cockpit_service, "cockpit_service") + + @property + def has_cockpit_service(self) -> bool: + """Whether the cockpit service is wired.""" + return self._cockpit_service is not None + + @property + def flight_recorder_service(self) -> FlightRecorderService: + """Flight-recorder query/seek service, or raise 503.""" + return self._require_service( + self._flight_recorder_service, + "flight_recorder_service", + ) + + @property + def has_flight_recorder_service(self) -> bool: + """Whether the flight-recorder service is wired.""" + return self._flight_recorder_service is not None + + @property + def steering_directive(self) -> SteeringDirective: + """Cockpit steering directive, or raise 503.""" + return self._require_service(self._steering_directive, "steering_directive") + + @property + def has_steering_directive(self) -> bool: + """Whether the steering directive is wired.""" + return self._steering_directive is not None + + def set_cockpit_services( + self, + *, + cockpit_service: CockpitService, + flight_recorder_service: FlightRecorderService, + steering_directive: SteeringDirective, + ) -> None: + """Attach (or hot-swap) the cockpit services at boot / reinit. + + Synchronised under ``_lazy_service_lock`` so the boot install is + consistent against concurrent property reads. Idempotent and + last-wins, so a transient shared-app boot or a setup-reinit can + re-wire without poisoning startup. + """ + with self._lazy_service_lock: + self._cockpit_service = cockpit_service + self._flight_recorder_service = flight_recorder_service + self._steering_directive = steering_directive + logger.info( + API_APP_STARTUP, + service="cockpit_services", + transition="attached", + ) + # ── Cost-dial services ────────────────────────────────────────── @property diff --git a/src/synthorg/engine/cockpit/service.py b/src/synthorg/engine/cockpit/service.py index 93093e37fe..f60943abd7 100644 --- a/src/synthorg/engine/cockpit/service.py +++ b/src/synthorg/engine/cockpit/service.py @@ -30,13 +30,9 @@ from synthorg.persistence.flight_recorder_protocol import ( FlightRecorderFrameRepository, ) - from synthorg.settings.resolver import ConfigResolver logger = get_logger(__name__) -_COCKPIT_NS: Final[str] = "cockpit" -_STUCK_KEY: Final[str] = "stuck_idle_threshold_minutes" -_RUNAWAY_KEY: Final[str] = "runaway_cost_threshold_percent" _PERCENT_DIVISOR: Final[float] = 100.0 _ACTIVE_STATUSES: Final[tuple[TaskStatus, ...]] = ( TaskStatus.IN_PROGRESS, @@ -94,20 +90,27 @@ def __init__( task_engine: TaskEngine, flight_recorder_frames: FlightRecorderFrameRepository, *, - config_resolver: ConfigResolver, clock: Clock | None = None, ) -> None: self._task_engine = task_engine self._frames = flight_recorder_frames - self._config_resolver = config_resolver self._clock = clock or SystemClock() - async def get_live_snapshot(self) -> LiveActivitySnapshot: - """Build a snapshot of active work with stuck / runaway flags.""" - stuck_minutes = await self._config_resolver.get_float(_COCKPIT_NS, _STUCK_KEY) - runaway_pct = await self._config_resolver.get_float(_COCKPIT_NS, _RUNAWAY_KEY) + async def get_live_snapshot( + self, + *, + stuck_idle_minutes: float, + runaway_cost_percent: float, + ) -> LiveActivitySnapshot: + """Build a snapshot of active work with stuck / runaway flags. + + Thresholds are passed in (resolved by the controller at request + time) so the service stays pure and free of a settings-resolver + dependency at wire time. + """ + runaway_pct = runaway_cost_percent now = self._clock.now() - stuck_cutoff = now - timedelta(minutes=stuck_minutes) + stuck_cutoff = now - timedelta(minutes=stuck_idle_minutes) activities: list[AgentActivity] = [] for status in _ACTIVE_STATUSES: diff --git a/tests/unit/api/controllers/test_cockpit.py b/tests/unit/api/controllers/test_cockpit.py new file mode 100644 index 0000000000..35dc966ecc --- /dev/null +++ b/tests/unit/api/controllers/test_cockpit.py @@ -0,0 +1,160 @@ +"""Tests for the mission-control cockpit controller.""" + +from datetime import UTC, datetime +from typing import Any + +import pytest +from litestar.testing import TestClient + +from synthorg.core.enums import TaskStatus +from synthorg.core.types import NotBlankStr +from synthorg.persistence.flight_recorder_protocol import FlightRecorderFrame +from tests.unit.api.conftest import make_auth_headers +from tests.unit.api.fakes_backend import FakePersistenceBackend + +_HEADERS = make_auth_headers("ceo") + + +@pytest.fixture(autouse=True) +def _ensure_cockpit_wired(test_client: TestClient[Any]) -> None: + """Wire the cockpit services on the shared app if startup skipped them. + + The session-scoped app's once-only ``_install_runtime_services`` hook + can leave the cockpit services unwired across re-entries (the known + shared-app install leak); re-wiring here keeps the controller tests + deterministic without depending on hook ordering. + """ + app_state = test_client.app.state.app_state + if not app_state.has_cockpit_service: + from synthorg.api.app import _wire_cockpit_services + + _wire_cockpit_services(app_state) + + +def _seed_frame( + backend: FakePersistenceBackend, + *, + execution_id: str, + turn: int, + cost: float = 0.5, +) -> None: + frame = FlightRecorderFrame( + id=NotBlankStr(f"{execution_id}-{turn}"), + execution_id=NotBlankStr(execution_id), + task_id=NotBlankStr("task-1"), + agent_id=NotBlankStr("agent-1"), + turn_index=turn, + timestamp=datetime.now(UTC), + response_summary=f"reply {turn}", + decision="completed", + cost=cost, + status=TaskStatus.IN_PROGRESS, + ) + # Direct dict seed: the controller reads through the same fake repo. + backend.flight_recorder_frames._frames[frame.id] = frame + + +@pytest.mark.unit +class TestCockpitController: + def test_snapshot_returns_live_activity( + self, + test_client: TestClient[Any], + ) -> None: + resp = test_client.get("/api/v1/cockpit/snapshot", headers=_HEADERS) + assert resp.status_code == 200 + body = resp.json() + assert body["success"] is True + assert "agents" in body["data"] + assert "active_count" in body["data"] + + def test_frames_returns_seeded_timeline( + self, + test_client: TestClient[Any], + fake_persistence: FakePersistenceBackend, + ) -> None: + _seed_frame(fake_persistence, execution_id="exec-frames", turn=1) + _seed_frame(fake_persistence, execution_id="exec-frames", turn=2) + + resp = test_client.get( + "/api/v1/cockpit/flight-recorder/exec-frames/frames", + headers=_HEADERS, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["data"]["execution_id"] == "exec-frames" + turns = [f["turn_index"] for f in body["data"]["frames"]] + assert turns == [2, 1] + + def test_seek_reconstructs_prefix( + self, + test_client: TestClient[Any], + fake_persistence: FakePersistenceBackend, + ) -> None: + for turn in (1, 2, 3): + _seed_frame(fake_persistence, execution_id="exec-seek", turn=turn) + + resp = test_client.get( + "/api/v1/cockpit/flight-recorder/exec-seek/seek/2", + headers=_HEADERS, + ) + assert resp.status_code == 200 + data = resp.json()["data"] + assert data["turn_index"] == 2 + assert [f["turn_index"] for f in data["frames"]] == [1, 2] + assert data["current_frame"]["turn_index"] == 2 + + def test_hint_queues_steering( + self, + test_client: TestClient[Any], + ) -> None: + resp = test_client.post( + "/api/v1/cockpit/interventions/hint", + headers=_HEADERS, + json={ + "execution_id": "exec-1", + "agent_id": "agent-1", + "text": "use Postgres not Mongo", + }, + ) + assert resp.status_code == 201 + data = resp.json()["data"] + assert data["applied"] is True + assert data["kind"] == "hint" + + def test_redirect_queues_steering( + self, + test_client: TestClient[Any], + ) -> None: + resp = test_client.post( + "/api/v1/cockpit/interventions/redirect", + headers=_HEADERS, + json={ + "execution_id": "exec-1", + "agent_id": "agent-1", + "text": "pivot off the frontend", + }, + ) + assert resp.status_code == 201 + assert resp.json()["data"]["applied"] is True + + def test_hint_rejects_blank_text( + self, + test_client: TestClient[Any], + ) -> None: + resp = test_client.post( + "/api/v1/cockpit/interventions/hint", + headers=_HEADERS, + json={"execution_id": "exec-1", "agent_id": "agent-1", "text": ""}, + ) + assert resp.status_code == 400 + + def test_pause_rejects_unknown_field( + self, + test_client: TestClient[Any], + ) -> None: + resp = test_client.post( + "/api/v1/cockpit/interventions/pause", + headers=_HEADERS, + json={"task_id": "t1", "reason": "stuck", "bogus": 1}, + ) + assert resp.status_code == 400 diff --git a/tests/unit/engine/cockpit/test_service.py b/tests/unit/engine/cockpit/test_service.py index c9f0311951..37d2bde6cf 100644 --- a/tests/unit/engine/cockpit/test_service.py +++ b/tests/unit/engine/cockpit/test_service.py @@ -13,7 +13,6 @@ from synthorg.engine.cockpit import CockpitService from synthorg.engine.task_engine import TaskEngine from synthorg.persistence.flight_recorder_protocol import FlightRecorderFrame -from synthorg.settings.resolver import ConfigResolver pytestmark = pytest.mark.unit @@ -53,20 +52,13 @@ def _frame( def _service( tasks: tuple[Task, ...], repo: FakeFlightRecorderFrameRepository, - *, - stuck_minutes: float = 10.0, - runaway_pct: float = 150.0, ) -> CockpitService: task_engine = mock_of[TaskEngine]( list_tasks=AsyncMock(side_effect=[(tasks, len(tasks)), ((), 0)]), ) - resolver = mock_of[ConfigResolver]( - get_float=AsyncMock(side_effect=[stuck_minutes, runaway_pct]), - ) return CockpitService( task_engine, repo, - config_resolver=resolver, clock=FakeClock(start=_NOW), ) @@ -82,7 +74,10 @@ async def test_snapshot_lists_active_work( task = _task(sample_task_with_criteria, task_id="t1", agent="alice", budget=0.0) service = _service((task,), repo) - snapshot = await service.get_live_snapshot() + snapshot = await service.get_live_snapshot( + stuck_idle_minutes=10.0, + runaway_cost_percent=150.0, + ) assert snapshot.active_count == 1 activity = snapshot.agents[0] assert activity.agent_id == "alice" @@ -99,9 +94,12 @@ async def test_idle_task_flagged_stuck( _frame(task_id="t1", turn=1, cost=0.1, ts=_NOW - timedelta(minutes=30)), ) task = _task(sample_task_with_criteria, task_id="t1", agent="bob", budget=0.0) - service = _service((task,), repo, stuck_minutes=10.0) + service = _service((task,), repo) - snapshot = await service.get_live_snapshot() + snapshot = await service.get_live_snapshot( + stuck_idle_minutes=10.0, + runaway_cost_percent=150.0, + ) assert snapshot.agents[0].is_stuck is True assert snapshot.stuck_agents == ("bob",) @@ -114,9 +112,12 @@ async def test_overspend_flagged_runaway( _frame(task_id="t1", turn=1, cost=2.0, ts=_NOW - timedelta(minutes=1)), ) task = _task(sample_task_with_criteria, task_id="t1", agent="carol", budget=1.0) - service = _service((task,), repo, runaway_pct=150.0) + service = _service((task,), repo) - snapshot = await service.get_live_snapshot() + snapshot = await service.get_live_snapshot( + stuck_idle_minutes=10.0, + runaway_cost_percent=150.0, + ) assert snapshot.agents[0].is_runaway is True assert snapshot.runaway_agents == ("carol",) @@ -124,7 +125,10 @@ async def test_no_active_work_empty_snapshot(self) -> None: repo = FakeFlightRecorderFrameRepository() service = _service((), repo) - snapshot = await service.get_live_snapshot() + snapshot = await service.get_live_snapshot( + stuck_idle_minutes=10.0, + runaway_cost_percent=150.0, + ) assert snapshot.active_count == 0 assert snapshot.agents == () assert snapshot.total_cost == pytest.approx(0.0) diff --git a/web/src/api/types/dtos.gen.ts b/web/src/api/types/dtos.gen.ts index ccf46f6819..be27701dfa 100644 --- a/web/src/api/types/dtos.gen.ts +++ b/web/src/api/types/dtos.gen.ts @@ -15,6 +15,7 @@ export type ActivityEvent = components['schemas']['ActivityEvent'] export type ActivityWindowHours = components['schemas']['ActivityWindowHours'] export type AddAllowlistEntryRequest = components['schemas']['AddAllowlistEntryRequest'] export type AddModelRequest = components['schemas']['AddModelRequest'] +export type AgentActivity = components['schemas']['AgentActivity'] export type AgentConfig = components['schemas']['AgentConfig'] export type AgentHealthResponse = components['schemas']['AgentHealthResponse'] export type AgentIdentity = components['schemas']['AgentIdentity'] @@ -61,10 +62,12 @@ export type EscalationResponseEnvelope = components['schemas']['ApiResponse_Esca export type ExperimentAssignmentEnvelope = components['schemas']['ApiResponse_ExperimentAssignment_'] export type ExperimentVariantEnvelope = components['schemas']['ApiResponse_ExperimentVariant_'] export type FineTuneStatusEnvelope = components['schemas']['ApiResponse_FineTuneStatus_'] +export type FlightRecorderFramesResponseEnvelope = components['schemas']['ApiResponse_FlightRecorderFramesResponse_'] export type ForecastResponseEnvelope = components['schemas']['ApiResponse_ForecastResponse_'] export type HealthReportEnvelope = components['schemas']['ApiResponse_HealthReport_'] export type InstallEntryResponseEnvelope = components['schemas']['ApiResponse_InstallEntryResponse_'] export type KnowledgeSourceEnvelope = components['schemas']['ApiResponse_KnowledgeSource_'] +export type LiveActivitySnapshotEnvelope = components['schemas']['ApiResponse_LiveActivitySnapshot_'] export type LivenessStatusEnvelope = components['schemas']['ApiResponse_LivenessStatus_'] export type LivingDocumentEnvelope = components['schemas']['ApiResponse_LivingDocument_'] export type MeetingResponseEnvelope = components['schemas']['ApiResponse_MeetingResponse_'] @@ -84,6 +87,7 @@ export type ProviderResponseEnvelope = components['schemas']['ApiResponse_Provid export type QualityOverrideResponseEnvelope = components['schemas']['ApiResponse_QualityOverrideResponse_'] export type RateLimitsResponseEnvelope = components['schemas']['ApiResponse_RateLimitsResponse_'] export type ReadinessStatusEnvelope = components['schemas']['ApiResponse_ReadinessStatus_'] +export type ReplaySeekViewEnvelope = components['schemas']['ApiResponse_ReplaySeekView_'] export type ReportResponseEnvelope = components['schemas']['ApiResponse_ReportResponse_'] export type ResolvedCeremonyPolicyResponseEnvelope = components['schemas']['ApiResponse_ResolvedCeremonyPolicyResponse_'] export type RestoreResponseEnvelope = components['schemas']['ApiResponse_RestoreResponse_'] @@ -99,6 +103,7 @@ export type SetupNameLocalesResponseEnvelope = components['schemas']['ApiRespons export type SetupStatusResponseEnvelope = components['schemas']['ApiResponse_SetupStatusResponse_'] export type SimulationStatusResponseEnvelope = components['schemas']['ApiResponse_SimulationStatusResponse_'] export type StageDecisionResultEnvelope = components['schemas']['ApiResponse_StageDecisionResult_'] +export type SteeringOutcomeEnvelope = components['schemas']['ApiResponse_SteeringOutcome_'] export type SyncModelsResponseEnvelope = components['schemas']['ApiResponse_SyncModelsResponse_'] export type TaskBoardSubmissionResponseEnvelope = components['schemas']['ApiResponse_TaskBoardSubmissionResponse_'] export type TaskEnvelope = components['schemas']['ApiResponse_Task_'] @@ -254,6 +259,8 @@ export type FineTuneRequest = components['schemas']['FineTuneRequest'] export type FineTuneRun = components['schemas']['FineTuneRun'] export type FineTuneRunConfig = components['schemas']['FineTuneRunConfig'] export type FineTuneStatus = components['schemas']['FineTuneStatus'] +export type FlightRecorderFrame = components['schemas']['FlightRecorderFrame'] +export type FlightRecorderFramesResponse = components['schemas']['FlightRecorderFramesResponse'] export type Forecast = components['schemas']['Forecast'] export type ForecastApproveRequest = components['schemas']['ForecastApproveRequest'] export type ForecastPoint = components['schemas']['ForecastPoint'] @@ -274,9 +281,11 @@ export type InstallEntryResponse = components['schemas']['InstallEntryResponse'] export type InstalledEntry = components['schemas']['InstalledEntry'] export type IntelligenceConfig = components['schemas']['IntelligenceConfig'] export type InterruptResponse = components['schemas']['InterruptResponse'] +export type KillInterventionRequest = components['schemas']['KillInterventionRequest'] export type KnowledgeHit = components['schemas']['KnowledgeHit'] export type KnowledgeSource = components['schemas']['KnowledgeSource'] export type LinkBlock = components['schemas']['LinkBlock'] +export type LiveActivitySnapshot = components['schemas']['LiveActivitySnapshot'] export type LivenessStatus = components['schemas']['LivenessStatus'] export type LivingDocument = components['schemas']['LivingDocument'] export type LlmCalibrationRecord = components['schemas']['LlmCalibrationRecord'] @@ -361,6 +370,7 @@ export type PaginationMeta = components['schemas']['PaginationMeta'] export type ParentReference = components['schemas']['ParentReference'] export type ParetoFrontier = components['schemas']['ParetoFrontier'] export type ParetoPoint = components['schemas']['ParetoPoint'] +export type PauseInterventionRequest = components['schemas']['PauseInterventionRequest'] export type PdfLocator = components['schemas']['PdfLocator'] export type PerformanceSummary = components['schemas']['PerformanceSummary'] export type PeriodSummary = components['schemas']['PeriodSummary'] @@ -405,6 +415,7 @@ export type RemoveAllowlistEntryRequest = components['schemas']['RemoveAllowlist export type ReorderAgentsRequest = components['schemas']['ReorderAgentsRequest'] export type ReorderDepartmentsRequest = components['schemas']['ReorderDepartmentsRequest'] export type ReorderTeamsRequest = components['schemas']['ReorderTeamsRequest'] +export type ReplaySeekView = components['schemas']['ReplaySeekView'] export type ReportResponse = components['schemas']['ReportResponse'] export type ReportingLine = components['schemas']['ReportingLine'] export type ResilienceConfig = components['schemas']['ResilienceConfig'] @@ -455,6 +466,8 @@ export type SkillSet = components['schemas']['SkillSet'] export type StageDecisionPayload = components['schemas']['StageDecisionPayload'] export type StageDecisionResult = components['schemas']['StageDecisionResult'] export type StartSimulationPayload = components['schemas']['StartSimulationPayload'] +export type SteerInterventionRequest = components['schemas']['SteerInterventionRequest'] +export type SteeringOutcome = components['schemas']['SteeringOutcome'] export type StragglerGap = components['schemas']['StragglerGap'] export type StrategyUpdateRequest = components['schemas']['StrategyUpdateRequest'] export type SubmitDecisionRequest = components['schemas']['SubmitDecisionRequest'] diff --git a/web/src/api/types/enum-values.gen.ts b/web/src/api/types/enum-values.gen.ts index 9872672a13..ef527e7bad 100644 --- a/web/src/api/types/enum-values.gen.ts +++ b/web/src/api/types/enum-values.gen.ts @@ -384,6 +384,14 @@ export const INTERRUPT_TYPE_VALUES = [ ] as const export type InterruptType = (typeof INTERRUPT_TYPE_VALUES)[number] +export const INTERVENTION_KIND_VALUES = [ + 'pause', + 'kill', + 'hint', + 'redirect', +] as const +export type InterventionKind = (typeof INTERVENTION_KIND_VALUES)[number] + export const LLM_CALL_CATEGORY_VALUES = [ 'productive', 'coordination', diff --git a/web/src/api/types/openapi.gen.ts b/web/src/api/types/openapi.gen.ts index 6184a52704..290f5cac63 100644 --- a/web/src/api/types/openapi.gen.ts +++ b/web/src/api/types/openapi.gen.ts @@ -1272,6 +1272,125 @@ export type paths = { readonly patch?: never; readonly trace?: never; }; + readonly "/api/v1/cockpit/flight-recorder/{execution_id}/frames": { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + /** GetFrames */ + readonly get: operations["ApiV1CockpitFlightRecorderExecutionIdFramesGetFrames"]; + readonly put?: never; + readonly post?: never; + readonly delete?: never; + readonly options?: never; + readonly head?: never; + readonly patch?: never; + readonly trace?: never; + }; + readonly "/api/v1/cockpit/flight-recorder/{execution_id}/seek/{turn_index}": { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + /** SeekFrame */ + readonly get: operations["ApiV1CockpitFlightRecorderExecutionIdSeekTurnIndexSeekFrame"]; + readonly put?: never; + readonly post?: never; + readonly delete?: never; + readonly options?: never; + readonly head?: never; + readonly patch?: never; + readonly trace?: never; + }; + readonly "/api/v1/cockpit/interventions/hint": { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly get?: never; + readonly put?: never; + /** Hint */ + readonly post: operations["ApiV1CockpitInterventionsHintHint"]; + readonly delete?: never; + readonly options?: never; + readonly head?: never; + readonly patch?: never; + readonly trace?: never; + }; + readonly "/api/v1/cockpit/interventions/kill": { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly get?: never; + readonly put?: never; + /** Kill */ + readonly post: operations["ApiV1CockpitInterventionsKillKill"]; + readonly delete?: never; + readonly options?: never; + readonly head?: never; + readonly patch?: never; + readonly trace?: never; + }; + readonly "/api/v1/cockpit/interventions/pause": { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly get?: never; + readonly put?: never; + /** Pause */ + readonly post: operations["ApiV1CockpitInterventionsPausePause"]; + readonly delete?: never; + readonly options?: never; + readonly head?: never; + readonly patch?: never; + readonly trace?: never; + }; + readonly "/api/v1/cockpit/interventions/redirect": { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly get?: never; + readonly put?: never; + /** Redirect */ + readonly post: operations["ApiV1CockpitInterventionsRedirectRedirect"]; + readonly delete?: never; + readonly options?: never; + readonly head?: never; + readonly patch?: never; + readonly trace?: never; + }; + readonly "/api/v1/cockpit/snapshot": { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + /** GetSnapshot */ + readonly get: operations["ApiV1CockpitSnapshotGetSnapshot"]; + readonly put?: never; + readonly post?: never; + readonly delete?: never; + readonly options?: never; + readonly head?: never; + readonly patch?: never; + readonly trace?: never; + }; readonly "/api/v1/company": { readonly parameters: { readonly query?: never; @@ -4760,6 +4879,27 @@ export type components = { readonly AddModelRequest: { readonly model: components["schemas"]["ProviderModelConfig"]; }; + /** AgentActivity */ + readonly AgentActivity: { + /** @description Agent working the task */ + readonly agent_id: string; + /** @description Accumulated cost for the task */ + readonly cost: number; + /** @description Cost beyond the runaway threshold */ + readonly is_runaway: boolean; + /** @description Idle beyond the stuck threshold */ + readonly is_stuck: boolean; + /** + * Format: date-time + * @description datetime with the constraint that the value must have timezone info + */ + readonly last_active: string | null; + readonly status: components["schemas"]["TaskStatus"]; + /** @description Task being worked */ + readonly task_id: string; + /** @description Turns recorded so far */ + readonly turn_count: number; + }; /** AgentConfig */ readonly AgentConfig: { /** @description Raw authority config */ @@ -5344,6 +5484,14 @@ export type components = { /** @description Whether the request succeeded (derived from ``error``). */ readonly success: boolean; }; + /** ApiResponse[FlightRecorderFramesResponse] */ + readonly ApiResponse_FlightRecorderFramesResponse_: { + readonly data: components["schemas"]["FlightRecorderFramesResponse"] | null; + readonly error: string | null; + readonly error_detail: components["schemas"]["ErrorDetail"] | null; + /** @description Whether the request succeeded (derived from ``error``). */ + readonly success: boolean; + }; /** ApiResponse[ForecastResponse] */ readonly ApiResponse_ForecastResponse_: { readonly data: components["schemas"]["ForecastResponse"] | null; @@ -5384,6 +5532,14 @@ export type components = { /** @description Whether the request succeeded (derived from ``error``). */ readonly success: boolean; }; + /** ApiResponse[LiveActivitySnapshot] */ + readonly ApiResponse_LiveActivitySnapshot_: { + readonly data: components["schemas"]["LiveActivitySnapshot"] | null; + readonly error: string | null; + readonly error_detail: components["schemas"]["ErrorDetail"] | null; + /** @description Whether the request succeeded (derived from ``error``). */ + readonly success: boolean; + }; /** ApiResponse[LivenessStatus] */ readonly ApiResponse_LivenessStatus_: { readonly data: components["schemas"]["LivenessStatus"] | null; @@ -5536,6 +5692,14 @@ export type components = { /** @description Whether the request succeeded (derived from ``error``). */ readonly success: boolean; }; + /** ApiResponse[ReplaySeekView] */ + readonly ApiResponse_ReplaySeekView_: { + readonly data: components["schemas"]["ReplaySeekView"] | null; + readonly error: string | null; + readonly error_detail: components["schemas"]["ErrorDetail"] | null; + /** @description Whether the request succeeded (derived from ``error``). */ + readonly success: boolean; + }; /** ApiResponse[ReportResponse] */ readonly ApiResponse_ReportResponse_: { readonly data: components["schemas"]["ReportResponse"] | null; @@ -5656,6 +5820,14 @@ export type components = { /** @description Whether the request succeeded (derived from ``error``). */ readonly success: boolean; }; + /** ApiResponse[SteeringOutcome] */ + readonly ApiResponse_SteeringOutcome_: { + readonly data: components["schemas"]["SteeringOutcome"] | null; + readonly error: string | null; + readonly error_detail: components["schemas"]["ErrorDetail"] | null; + /** @description Whether the request succeeded (derived from ``error``). */ + readonly success: boolean; + }; /** ApiResponse[SyncModelsResponse] */ readonly ApiResponse_SyncModelsResponse_: { readonly data: components["schemas"]["SyncModelsResponse"] | null; @@ -8517,6 +8689,66 @@ export type components = { * @enum {string} */ readonly FinishReason: "stop" | "max_tokens" | "tool_use" | "content_filter" | "error"; + /** FlightRecorderFrame */ + readonly FlightRecorderFrame: { + /** @description Agent that produced the turn */ + readonly agent_id: string; + /** + * @description Turn cost + * @default 0 + */ + readonly cost: number; + /** @description Classified turn outcome (e.g. tool_call, completed) */ + readonly decision: string | null; + /** @description Execution run identifier */ + readonly execution_id: string; + /** @description Unique frame identifier */ + readonly id: string; + /** + * @description Prompt tokens + * @default 0 + */ + readonly input_tokens: number; + /** + * @description Operator intervention recorded on this turn, if any + * @enum {string|null} + */ + readonly intervention_kind: "pause" | "kill" | "hint" | "redirect" | null; + /** + * @description Completion tokens + * @default 0 + */ + readonly output_tokens: number; + /** @description Redacted, length-bounded prompt summary */ + readonly prompt_summary: string | null; + /** @description Redacted, length-bounded model response summary */ + readonly response_summary: string | null; + readonly status: components["schemas"]["TaskStatus"]; + /** @description Task the agent was working on, when known */ + readonly task_id: string | null; + /** + * Format: date-time + * @description datetime with the constraint that the value must have timezone info + */ + readonly timestamp: string; + /** + * @description Tool names invoked during the turn + * @default [] + */ + readonly tool_calls: readonly string[]; + /** @description 1-based turn index within the run */ + readonly turn_index: number; + }; + /** FlightRecorderFramesResponse */ + readonly FlightRecorderFramesResponse: { + /** @description Execution the frames belong to */ + readonly execution_id: string; + /** + * @description Frames newest-first + * @default [] + */ + readonly frames: readonly components["schemas"]["FlightRecorderFrame"][]; + }; /** Forecast */ readonly Forecast: { /** @description SHA-256 hex digest of canonical brief JSON */ @@ -8815,6 +9047,25 @@ export type components = { * @enum {string} */ readonly InterruptType: "tool_approval" | "info_request"; + /** + * InterventionKind + * @description Operator intervention applied from the mission-control cockpit. + * + * PAUSE and KILL reuse the task lifecycle seams (transition to + * ``INTERRUPTED`` / cancel to ``CANCELLED``). HINT and REDIRECT route + * through the steering directive: both post an ``INFO_REQUEST`` + * interrupt the engine consumes at the next safe turn boundary, so the + * operator's text reaches the running agent without corrupting state. + * @enum {string} + */ + readonly InterventionKind: "pause" | "kill" | "hint" | "redirect"; + /** KillInterventionRequest */ + readonly KillInterventionRequest: { + /** @description Operator reason for the kill */ + readonly reason: string; + /** @description Task to kill */ + readonly task_id: string; + }; /** KnowledgeHit */ readonly KnowledgeHit: { /** @description Matching chunk content */ @@ -8882,6 +9133,39 @@ export type components = { /** @description Link target URL */ readonly url: string; }; + /** LiveActivitySnapshot */ + readonly LiveActivitySnapshot: { + /** + * @description Active task count + * @default 0 + */ + readonly active_count: number; + /** + * @description Per-task activity for active work + * @default [] + */ + readonly agents: readonly components["schemas"]["AgentActivity"][]; + /** + * @description Agent ids flagged runaway + * @default [] + */ + readonly runaway_agents: readonly string[]; + /** + * @description Agent ids flagged stuck + * @default [] + */ + readonly stuck_agents: readonly string[]; + /** + * Format: date-time + * @description datetime with the constraint that the value must have timezone info + */ + readonly timestamp: string; + /** + * @description Summed active cost + * @default 0 + */ + readonly total_cost: number; + }; /** LivenessStatus */ readonly LivenessStatus: { /** @@ -10395,6 +10679,13 @@ export type components = { /** @description Benchmark-score provenance identifier */ readonly source: string; }; + /** PauseInterventionRequest */ + readonly PauseInterventionRequest: { + /** @description Operator reason for the pause */ + readonly reason: string; + /** @description Task to pause */ + readonly task_id: string; + }; /** PdfLocator */ readonly PdfLocator: { /** @description Optional (x0, top, x1, bottom) region in PDF points */ @@ -11112,6 +11403,25 @@ export type components = { /** @description Ordered team names */ readonly team_names: readonly string[]; }; + /** ReplaySeekView */ + readonly ReplaySeekView: { + /** + * @description Summed cost of frames up to and including turn_index + * @default 0 + */ + readonly cumulative_cost: number; + /** @description Frame at turn_index, when recorded */ + readonly current_frame: components["schemas"]["FlightRecorderFrame"] | null; + /** @description Execution being replayed */ + readonly execution_id: string; + /** + * @description Frames 1..turn_index, ascending + * @default [] + */ + readonly frames: readonly components["schemas"]["FlightRecorderFrame"][]; + /** @description Target turn index */ + readonly turn_index: number; + }; /** ReportingLine */ readonly ReportingLine: { /** @description Subordinate role name or identifier */ @@ -12025,6 +12335,25 @@ export type components = { readonly StartSimulationPayload: { readonly config: components["schemas"]["SimulationConfig"]; }; + /** SteeringOutcome */ + readonly SteeringOutcome: { + /** @description Whether the directive was delivered */ + readonly applied: boolean; + /** @description Interrupt id the directive produced, when applied */ + readonly artifact_id: string | null; + /** @description Human-readable outcome description */ + readonly detail: string; + readonly kind: components["schemas"]["InterventionKind"]; + }; + /** SteerInterventionRequest */ + readonly SteerInterventionRequest: { + /** @description Agent to steer */ + readonly agent_id: string; + /** @description Execution to steer */ + readonly execution_id: string; + /** @description Operator hint / redirect text */ + readonly text: string; + }; /** StragglerGap */ readonly StragglerGap: { /** @description Relative gap (gap / mean) */ @@ -16487,6 +16816,216 @@ export interface operations { readonly 503: components["responses"]["ServiceUnavailable"]; }; }; + readonly ApiV1CockpitFlightRecorderExecutionIdFramesGetFrames: { + readonly parameters: { + readonly query?: { + readonly limit?: number; + readonly offset?: number; + }; + readonly header?: never; + readonly path: { + /** @description Resource identifier */ + readonly execution_id: string; + }; + readonly cookie?: never; + }; + readonly requestBody?: never; + readonly responses: { + /** @description Request fulfilled, document follows */ + readonly 200: { + headers: { + readonly [name: string]: unknown; + }; + content: { + readonly "application/json": components["schemas"]["ApiResponse_FlightRecorderFramesResponse_"]; + }; + }; + readonly 400: components["responses"]["BadRequest"]; + readonly 401: components["responses"]["Unauthorized"]; + readonly 404: components["responses"]["NotFound"]; + readonly 429: components["responses"]["TooManyRequests"]; + readonly 500: components["responses"]["InternalError"]; + readonly 503: components["responses"]["ServiceUnavailable"]; + }; + }; + readonly ApiV1CockpitFlightRecorderExecutionIdSeekTurnIndexSeekFrame: { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path: { + /** @description Resource identifier */ + readonly execution_id: string; + readonly turn_index: number; + }; + readonly cookie?: never; + }; + readonly requestBody?: never; + readonly responses: { + /** @description Request fulfilled, document follows */ + readonly 200: { + headers: { + readonly [name: string]: unknown; + }; + content: { + readonly "application/json": components["schemas"]["ApiResponse_ReplaySeekView_"]; + }; + }; + readonly 400: components["responses"]["BadRequest"]; + readonly 401: components["responses"]["Unauthorized"]; + readonly 404: components["responses"]["NotFound"]; + readonly 429: components["responses"]["TooManyRequests"]; + readonly 500: components["responses"]["InternalError"]; + readonly 503: components["responses"]["ServiceUnavailable"]; + }; + }; + readonly ApiV1CockpitInterventionsHintHint: { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly requestBody: { + readonly content: { + readonly "application/json": components["schemas"]["SteerInterventionRequest"]; + }; + }; + readonly responses: { + /** @description Document created, URL follows */ + readonly 201: { + headers: { + readonly [name: string]: unknown; + }; + content: { + readonly "application/json": components["schemas"]["ApiResponse_SteeringOutcome_"]; + }; + }; + readonly 400: components["responses"]["BadRequest"]; + readonly 401: components["responses"]["Unauthorized"]; + readonly 403: components["responses"]["Forbidden"]; + readonly 409: components["responses"]["Conflict"]; + readonly 429: components["responses"]["TooManyRequests"]; + readonly 500: components["responses"]["InternalError"]; + readonly 503: components["responses"]["ServiceUnavailable"]; + }; + }; + readonly ApiV1CockpitInterventionsKillKill: { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly requestBody: { + readonly content: { + readonly "application/json": components["schemas"]["KillInterventionRequest"]; + }; + }; + readonly responses: { + /** @description Document created, URL follows */ + readonly 201: { + headers: { + readonly [name: string]: unknown; + }; + content: { + readonly "application/json": components["schemas"]["ApiResponse_Task_"]; + }; + }; + readonly 400: components["responses"]["BadRequest"]; + readonly 401: components["responses"]["Unauthorized"]; + readonly 403: components["responses"]["Forbidden"]; + readonly 409: components["responses"]["Conflict"]; + readonly 429: components["responses"]["TooManyRequests"]; + readonly 500: components["responses"]["InternalError"]; + readonly 503: components["responses"]["ServiceUnavailable"]; + }; + }; + readonly ApiV1CockpitInterventionsPausePause: { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly requestBody: { + readonly content: { + readonly "application/json": components["schemas"]["PauseInterventionRequest"]; + }; + }; + readonly responses: { + /** @description Document created, URL follows */ + readonly 201: { + headers: { + readonly [name: string]: unknown; + }; + content: { + readonly "application/json": components["schemas"]["ApiResponse_Task_"]; + }; + }; + readonly 400: components["responses"]["BadRequest"]; + readonly 401: components["responses"]["Unauthorized"]; + readonly 403: components["responses"]["Forbidden"]; + readonly 409: components["responses"]["Conflict"]; + readonly 429: components["responses"]["TooManyRequests"]; + readonly 500: components["responses"]["InternalError"]; + readonly 503: components["responses"]["ServiceUnavailable"]; + }; + }; + readonly ApiV1CockpitInterventionsRedirectRedirect: { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly requestBody: { + readonly content: { + readonly "application/json": components["schemas"]["SteerInterventionRequest"]; + }; + }; + readonly responses: { + /** @description Document created, URL follows */ + readonly 201: { + headers: { + readonly [name: string]: unknown; + }; + content: { + readonly "application/json": components["schemas"]["ApiResponse_SteeringOutcome_"]; + }; + }; + readonly 400: components["responses"]["BadRequest"]; + readonly 401: components["responses"]["Unauthorized"]; + readonly 403: components["responses"]["Forbidden"]; + readonly 409: components["responses"]["Conflict"]; + readonly 429: components["responses"]["TooManyRequests"]; + readonly 500: components["responses"]["InternalError"]; + readonly 503: components["responses"]["ServiceUnavailable"]; + }; + }; + readonly ApiV1CockpitSnapshotGetSnapshot: { + readonly parameters: { + readonly query?: never; + readonly header?: never; + readonly path?: never; + readonly cookie?: never; + }; + readonly requestBody?: never; + readonly responses: { + /** @description Request fulfilled, document follows */ + readonly 200: { + headers: { + readonly [name: string]: unknown; + }; + content: { + readonly "application/json": components["schemas"]["ApiResponse_LiveActivitySnapshot_"]; + }; + }; + readonly 401: components["responses"]["Unauthorized"]; + readonly 429: components["responses"]["TooManyRequests"]; + readonly 500: components["responses"]["InternalError"]; + readonly 503: components["responses"]["ServiceUnavailable"]; + }; + }; readonly ApiV1CompanyGetCompany: { readonly parameters: { readonly query?: never; From 9fb5b3b649daa09736804a82d9390ffb3398fabf Mon Sep 17 00:00:00 2001 From: Aurelio <19254254+Aureliolo@users.noreply.github.com> Date: Fri, 22 May 2026 15:01:26 +0200 Subject: [PATCH 03/17] feat: mission control web cockpit (Live + Flight Recorder) Adds /mission-control route + sidebar nav. Live tab: KPI row, agent activity rows with pause/kill/hint/redirect interventions, stuck/runaway flags, REST snapshot polling + WS liveness on the cockpit/tasks/agents/budget channels. Flight Recorder tab: new Timeline scrubber primitive (+ stories), transport controls + speed, per-turn frame detail. Adds cockpit endpoints, Zustand store (mutation pattern), useMissionControlData hook, MSW handlers, WS cockpit channel, and AgentActivity.execution_id (regenerated DTOs). Timeline + page + store tests. --- src/synthorg/engine/cockpit/service.py | 6 + .../__tests__/components/ui/timeline.test.tsx | 45 ++++ .../pages/MissionControlPage.test.tsx | 72 +++++++ .../__tests__/stores/mission-control.test.ts | 45 ++++ web/src/api/endpoints/cockpit.ts | 84 ++++++++ web/src/api/types/openapi.gen.ts | 2 + web/src/api/types/websocket.ts | 2 +- web/src/components/layout/Sidebar.tsx | 2 + web/src/components/ui/timeline.stories.tsx | 43 ++++ web/src/components/ui/timeline.tsx | 119 +++++++++++ web/src/hooks/useMissionControlData.ts | 82 +++++++ web/src/mocks/handlers/cockpit.ts | 76 +++++++ web/src/mocks/handlers/index.ts | 3 + web/src/pages/MissionControlPage.tsx | 45 ++++ .../pages/mission-control/FlightRecorder.tsx | 202 ++++++++++++++++++ web/src/pages/mission-control/LiveCockpit.tsx | 148 +++++++++++++ web/src/pages/settings/utils.ts | 1 + web/src/router/index.tsx | 2 + web/src/router/route-titles.ts | 1 + web/src/router/routes.ts | 1 + web/src/stores/mission-control.ts | 166 ++++++++++++++ web/src/utils/constants.ts | 1 + 22 files changed, 1147 insertions(+), 1 deletion(-) create mode 100644 web/src/__tests__/components/ui/timeline.test.tsx create mode 100644 web/src/__tests__/pages/MissionControlPage.test.tsx create mode 100644 web/src/__tests__/stores/mission-control.test.ts create mode 100644 web/src/api/endpoints/cockpit.ts create mode 100644 web/src/components/ui/timeline.stories.tsx create mode 100644 web/src/components/ui/timeline.tsx create mode 100644 web/src/hooks/useMissionControlData.ts create mode 100644 web/src/mocks/handlers/cockpit.ts create mode 100644 web/src/pages/MissionControlPage.tsx create mode 100644 web/src/pages/mission-control/FlightRecorder.tsx create mode 100644 web/src/pages/mission-control/LiveCockpit.tsx create mode 100644 web/src/stores/mission-control.ts diff --git a/src/synthorg/engine/cockpit/service.py b/src/synthorg/engine/cockpit/service.py index f60943abd7..26cf578ceb 100644 --- a/src/synthorg/engine/cockpit/service.py +++ b/src/synthorg/engine/cockpit/service.py @@ -49,6 +49,10 @@ class AgentActivity(BaseModel): agent_id: NotBlankStr = Field(description="Agent working the task") task_id: NotBlankStr = Field(description="Task being worked") + execution_id: NotBlankStr | None = Field( + default=None, + description="Execution id of the latest recorded turn, when any", + ) status: TaskStatus = Field(description="Current task status") turn_count: int = Field(ge=0, description="Turns recorded so far") cost: float = Field(ge=0.0, description="Accumulated cost for the task") @@ -159,6 +163,7 @@ async def _build_activity( latest = frames[0] if frames else None turn_count = latest.turn_index if latest is not None else 0 last_active = latest.timestamp if latest is not None else None + execution_id = latest.execution_id if latest is not None else None cost = sum(frame.cost for frame in frames) is_stuck = last_active is not None and last_active < stuck_cutoff is_runaway = task.budget_limit > 0 and cost > task.budget_limit * ( @@ -167,6 +172,7 @@ async def _build_activity( return AgentActivity( agent_id=NotBlankStr(agent_id), task_id=NotBlankStr(task.id), + execution_id=execution_id, status=task.status, turn_count=turn_count, cost=cost, diff --git a/web/src/__tests__/components/ui/timeline.test.tsx b/web/src/__tests__/components/ui/timeline.test.tsx new file mode 100644 index 0000000000..611967f2bc --- /dev/null +++ b/web/src/__tests__/components/ui/timeline.test.tsx @@ -0,0 +1,45 @@ +import { fireEvent, render, screen } from '@testing-library/react' +import { describe, expect, it, vi } from 'vitest' + +import { Timeline, type TimelineFrame } from '@/components/ui/timeline' + +const FRAMES: readonly TimelineFrame[] = [ + { turnIndex: 1, status: 'in_progress' }, + { turnIndex: 2, status: 'blocked' }, + { turnIndex: 3, status: 'completed' }, +] + +describe('Timeline', () => { + it('renders a dot per frame labelled by turn index', () => { + render( {}} />) + expect(screen.getByRole('button', { name: /Turn 1 \(in_progress\)/ })).toBeInTheDocument() + expect(screen.getByRole('button', { name: /Turn 3 \(completed\)/ })).toBeInTheDocument() + }) + + it('marks the current frame via aria-current', () => { + render( {}} />) + const current = screen.getByRole('button', { name: /Turn 2/ }) + expect(current).toHaveAttribute('aria-current', 'true') + }) + + it('seeks when a dot is clicked', () => { + const onSeek = vi.fn() + render() + fireEvent.click(screen.getByRole('button', { name: /Turn 3/ })) + expect(onSeek).toHaveBeenCalledWith(2) + }) + + it('steps with arrow keys', () => { + const onSeek = vi.fn() + render() + fireEvent.keyDown(screen.getByRole('slider'), { key: 'ArrowRight' }) + expect(onSeek).toHaveBeenCalledWith(1) + }) + + it('exposes slider value bounds', () => { + render( {}} />) + const slider = screen.getByRole('slider') + expect(slider).toHaveAttribute('aria-valuemax', '3') + expect(slider).toHaveAttribute('aria-valuenow', '3') + }) +}) diff --git a/web/src/__tests__/pages/MissionControlPage.test.tsx b/web/src/__tests__/pages/MissionControlPage.test.tsx new file mode 100644 index 0000000000..d1c703c732 --- /dev/null +++ b/web/src/__tests__/pages/MissionControlPage.test.tsx @@ -0,0 +1,72 @@ +import { fireEvent, render, screen } from '@testing-library/react' +import { http, HttpResponse } from 'msw' +import { MemoryRouter } from 'react-router' +import { describe, expect, it } from 'vitest' + +import type { getCockpitSnapshot } from '@/api/endpoints/cockpit' +import MissionControlPage from '@/pages/MissionControlPage' +import { successFor } from '@/mocks/handlers' +import { server } from '@/test-setup' + +function renderPage() { + return render( + + + , + ) +} + +describe('MissionControlPage', () => { + it('renders the cockpit heading and live KPIs', async () => { + renderPage() + expect( + screen.getByRole('heading', { name: 'Mission Control' }), + ).toBeInTheDocument() + expect(await screen.findByText('Active agents')).toBeInTheDocument() + expect(screen.getByText('Stuck')).toBeInTheDocument() + expect(screen.getByText('Runaway')).toBeInTheDocument() + }) + + it('surfaces a stuck agent with an intervention control', async () => { + server.use( + http.get('/api/v1/cockpit/snapshot', () => + HttpResponse.json( + successFor({ + timestamp: '2026-05-22T12:00:00Z', + agents: [ + { + agent_id: 'agent-1', + task_id: 'task-1', + execution_id: 'exec-1', + status: 'in_progress', + turn_count: 4, + cost: 1.25, + last_active: '2026-05-22T11:30:00Z', + is_stuck: true, + is_runaway: false, + }, + ], + total_cost: 1.25, + active_count: 1, + stuck_agents: ['agent-1'], + runaway_agents: [], + }), + ), + ), + ) + + renderPage() + expect(await screen.findByText('agent-1')).toBeInTheDocument() + expect(screen.getByText('stuck')).toBeInTheDocument() + expect(screen.getByRole('button', { name: 'Pause' })).toBeInTheDocument() + expect(screen.getByRole('button', { name: 'Kill' })).toBeInTheDocument() + }) + + it('switches to the flight recorder tab', async () => { + renderPage() + await screen.findByText('Active agents') + fireEvent.click(screen.getByRole('radio', { name: 'Flight Recorder' })) + expect(screen.getByText('No frames loaded')).toBeInTheDocument() + expect(screen.getByRole('button', { name: 'Load run' })).toBeInTheDocument() + }) +}) diff --git a/web/src/__tests__/stores/mission-control.test.ts b/web/src/__tests__/stores/mission-control.test.ts new file mode 100644 index 0000000000..fc9380a5c9 --- /dev/null +++ b/web/src/__tests__/stores/mission-control.test.ts @@ -0,0 +1,45 @@ +import { http, HttpResponse } from 'msw' +import { afterEach, describe, expect, it } from 'vitest' + +import { apiError } from '@/mocks/handlers' +import { useMissionControlStore } from '@/stores/mission-control' +import { useToastStore } from '@/stores/toast' +import { server } from '@/test-setup' + +afterEach(() => { + useToastStore.getState().dismissAll() +}) + +describe('useMissionControlStore', () => { + it('fetchSnapshot stores the live snapshot', async () => { + await useMissionControlStore.getState().fetchSnapshot() + expect(useMissionControlStore.getState().snapshot).not.toBeNull() + expect(useMissionControlStore.getState().snapshotError).toBeNull() + }) + + it('pauseTaskAction returns the task and emits a success toast', async () => { + const task = await useMissionControlStore.getState().pauseTaskAction('t1', 'why') + expect(task).not.toBeNull() + expect(useToastStore.getState().toasts.some((t) => t.variant === 'success')).toBe( + true, + ) + }) + + it('pauseTaskAction returns null and toasts on error', async () => { + server.use( + http.post('/api/v1/cockpit/interventions/pause', () => + HttpResponse.json(apiError('boom'), { status: 500 }), + ), + ) + const result = await useMissionControlStore.getState().pauseTaskAction('t1', 'why') + expect(result).toBeNull() + expect(useToastStore.getState().toasts.some((t) => t.variant === 'error')).toBe(true) + }) + + it('sendHintAction returns the steering outcome on success', async () => { + const outcome = await useMissionControlStore + .getState() + .sendHintAction('exec-1', 'agent-1', 'use Postgres') + expect(outcome?.applied).toBe(true) + }) +}) diff --git a/web/src/api/endpoints/cockpit.ts b/web/src/api/endpoints/cockpit.ts new file mode 100644 index 0000000000..b264d8d1a1 --- /dev/null +++ b/web/src/api/endpoints/cockpit.ts @@ -0,0 +1,84 @@ +import type { + FlightRecorderFramesResponse, + LiveActivitySnapshot, + ReplaySeekView, + SteeringOutcome, + Task, +} from '@/api/types' + +import { apiClient, unwrap } from '../client' +import type { ApiResponse } from '../types/http' + +/** Fetch the live org-activity snapshot (who/what + stuck/runaway). */ +export async function getCockpitSnapshot(): Promise { + const response = + await apiClient.get>('/cockpit/snapshot') + return unwrap(response) +} + +/** Fetch a page of flight-recorder frames (newest-first) for an execution. */ +export async function getFlightRecorderFrames( + executionId: string, + params?: { limit?: number; offset?: number }, +): Promise { + const response = await apiClient.get>( + `/cockpit/flight-recorder/${encodeURIComponent(executionId)}/frames`, + { params }, + ) + return unwrap(response) +} + +/** Reconstruct scrubber state at a target turn. */ +export async function seekFlightRecorder( + executionId: string, + turnIndex: number, +): Promise { + const response = await apiClient.get>( + `/cockpit/flight-recorder/${encodeURIComponent(executionId)}/seek/${turnIndex}`, + ) + return unwrap(response) +} + +/** Pause a running task (transition to INTERRUPTED). */ +export async function pauseTask(taskId: string, reason: string): Promise { + const response = await apiClient.post>( + '/cockpit/interventions/pause', + { task_id: taskId, reason }, + ) + return unwrap(response) +} + +/** Kill a running task (cancel it). */ +export async function killTask(taskId: string, reason: string): Promise { + const response = await apiClient.post>( + '/cockpit/interventions/kill', + { task_id: taskId, reason }, + ) + return unwrap(response) +} + +/** Queue a hint for a running agent (applied at the next safe turn boundary). */ +export async function sendHint( + executionId: string, + agentId: string, + text: string, +): Promise { + const response = await apiClient.post>( + '/cockpit/interventions/hint', + { execution_id: executionId, agent_id: agentId, text }, + ) + return unwrap(response) +} + +/** Queue a redirect for a running agent. */ +export async function redirectAgent( + executionId: string, + agentId: string, + text: string, +): Promise { + const response = await apiClient.post>( + '/cockpit/interventions/redirect', + { execution_id: executionId, agent_id: agentId, text }, + ) + return unwrap(response) +} diff --git a/web/src/api/types/openapi.gen.ts b/web/src/api/types/openapi.gen.ts index 290f5cac63..eb554a0ccf 100644 --- a/web/src/api/types/openapi.gen.ts +++ b/web/src/api/types/openapi.gen.ts @@ -4885,6 +4885,8 @@ export type components = { readonly agent_id: string; /** @description Accumulated cost for the task */ readonly cost: number; + /** @description Execution id of the latest recorded turn, when any */ + readonly execution_id: string | null; /** @description Cost beyond the runaway threshold */ readonly is_runaway: boolean; /** @description Idle beyond the stuck threshold */ diff --git a/web/src/api/types/websocket.ts b/web/src/api/types/websocket.ts index ab130b38e4..44648e3cf0 100644 --- a/web/src/api/types/websocket.ts +++ b/web/src/api/types/websocket.ts @@ -16,7 +16,7 @@ export const WS_CHANNELS = [ 'approvals', 'meetings', 'artifacts', 'projects', 'company', 'departments', 'clients', 'requests', 'simulations', 'reviews', 'events', 'interrupts', - 'scaling', + 'scaling', 'cockpit', ] as const export type WsChannel = typeof WS_CHANNELS[number] diff --git a/web/src/components/layout/Sidebar.tsx b/web/src/components/layout/Sidebar.tsx index c0885f62ae..8aa1e29bd6 100644 --- a/web/src/components/layout/Sidebar.tsx +++ b/web/src/components/layout/Sidebar.tsx @@ -16,6 +16,7 @@ import { KeyRound, LayoutDashboard, Layers, + Radio, LibraryBig, LogOut, MessageSquare, @@ -225,6 +226,7 @@ function SidebarNav({ collapsed }: { collapsed: boolean }) {