Skip to content
6 changes: 3 additions & 3 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,15 @@ curl http://localhost:3000/api/v1/health # backend (via web proxy)

```text
src/synthorg/
api/ # Litestar REST + WebSocket API (controllers, guards, channels, JWT + API key auth, approval gate integration, coordination endpoint, RFC 9457 structured errors (ErrorCategory, ErrorCode, ErrorDetail))
api/ # Litestar REST + WebSocket API (controllers, guards, channels, JWT + API key auth, approval gate integration, coordination endpoint, collaboration endpoint, RFC 9457 structured errors (ErrorCategory, ErrorCode, ErrorDetail))
budget/ # Cost tracking, budget enforcement (pre-flight/in-flight checks, auto-downgrade), billing periods, cost tiers, quota/subscription tracking, CFO cost optimization (anomaly detection, efficiency analysis, downgrade recommendations, approval decisions), spending reports, budget errors (BudgetExhaustedError, DailyLimitExceededError, QuotaExhaustedError)
cli/ # Python CLI module (superseded by top-level cli/ Go binary)
communication/ # Message bus, dispatcher, messenger, channels, delegation, loop prevention, conflict resolution
meeting/ # Meeting protocol (round-robin, position papers, structured phases), scheduler (frequency, participant resolver), orchestrator
config/ # YAML company config loading and validation
core/ # Shared domain models, base classes, and resilience config (RetryConfig, RateLimiterConfig)
engine/ # Agent orchestration, execution loops, parallel execution, task decomposition, routing, task assignment, centralized single-writer task state engine (TaskEngine), task lifecycle, recovery, shutdown, workspace isolation, coordination (multi-agent pipeline: TopologyDispatcher protocol, 4 dispatchers — SAS/centralized/decentralized/context-dependent, wave execution, workspace lifecycle integration, CoordinationSectionConfig company config bridge, build_coordinator factory), coordination error classification, prompt policy validation, checkpoint recovery (checkpoint/, per-turn persistence, heartbeat detection, CheckpointRecoveryStrategy), approval gate (escalation detection, context parking/resume, EscalationInfo/ResumePayload models), stagnation detection (stagnation/, StagnationDetector protocol, ToolRepetitionDetector, dual-signal analysis, corrective prompt injection), agent runtime state (AgentRuntimeState, lightweight per-agent execution status for dashboard queries and recovery)
hr/ # HR engine: hiring, firing, onboarding, offboarding, agent registry, performance tracking (task metrics, collaboration scoring, trend detection), promotion/demotion (criteria evaluation, approval strategies, model mapping)
hr/ # HR engine: hiring, firing, onboarding, offboarding, agent registry, performance tracking (task metrics, collaboration scoring, LLM calibration sampling, collaboration overrides, trend detection), promotion/demotion (criteria evaluation, approval strategies, model mapping)
memory/ # Persistent agent memory (pluggable MemoryBackend protocol), backends/ (Mem0 adapter: backends/mem0/), retrieval pipeline (ranking, RRF fusion, injection, context formatting, non-inferable filtering), shared org memory (org/), consolidation/archival (consolidation/)
persistence/ # Operational data persistence — pluggable PersistenceBackend protocol, SQLite initial (see Memory & Persistence design page)
observability/ # Structured logging, correlation tracking, log sinks
Expand Down Expand Up @@ -191,7 +191,7 @@ site/ # Astro landing page (synthorg.io)
- **Every module** with business logic MUST have: `from synthorg.observability import get_logger` then `logger = get_logger(__name__)`
- **Never** use `import logging` / `logging.getLogger()` / `print()` in application code
- **Variable name**: always `logger` (not `_logger`, not `log`)
- **Event names**: always use constants from the domain-specific module under `synthorg.observability.events` (e.g., `PROVIDER_CALL_START` from `events.provider`, `BUDGET_RECORD_ADDED` from `events.budget`, `CFO_ANOMALY_DETECTED` from `events.cfo`, `CONFLICT_DETECTED` from `events.conflict`, `MEETING_STARTED` from `events.meeting`, `MEETING_SCHEDULER_STARTED` from `events.meeting`, `MEETING_SCHEDULER_ERROR` from `events.meeting`, `MEETING_SCHEDULER_STOPPED` from `events.meeting`, `MEETING_PERIODIC_TRIGGERED` from `events.meeting`, `MEETING_EVENT_TRIGGERED` from `events.meeting`, `MEETING_PARTICIPANTS_RESOLVED` from `events.meeting`, `MEETING_NO_PARTICIPANTS` from `events.meeting`, `MEETING_NOT_FOUND` from `events.meeting`, `CLASSIFICATION_START` from `events.classification`, `CONSOLIDATION_START` from `events.consolidation`, `ORG_MEMORY_QUERY_START` from `events.org_memory`, `API_REQUEST_STARTED` from `events.api`, `API_REQUEST_COMPLETED` from `events.api`, `API_REQUEST_ERROR` from `events.api`, `API_ROUTE_NOT_FOUND` from `events.api`, `API_HEALTH_CHECK` from `events.api`, `API_COORDINATION_STARTED` from `events.api`, `API_COORDINATION_COMPLETED` from `events.api`, `API_COORDINATION_FAILED` from `events.api`, `API_COORDINATION_AGENT_RESOLVE_FAILED` from `events.api`, `CODE_RUNNER_EXECUTE_START` from `events.code_runner`, `DOCKER_EXECUTE_START` from `events.docker`, `MCP_INVOKE_START` from `events.mcp`, `SECURITY_EVALUATE_START` from `events.security`, `HR_HIRING_REQUEST_CREATED` from `events.hr`, `PERF_METRIC_RECORDED` from `events.performance`, `TRUST_EVALUATE_START` from `events.trust`, `PROMOTION_EVALUATE_START` from `events.promotion`, `PROMPT_BUILD_START` from `events.prompt`, `MEMORY_RETRIEVAL_START` from `events.memory`, `MEMORY_BACKEND_CONNECTED` from `events.memory`, `MEMORY_ENTRY_STORED` from `events.memory`, `MEMORY_BACKEND_SYSTEM_ERROR` from `events.memory`, `MEMORY_RRF_FUSION_COMPLETE` from `events.memory`, `MEMORY_RRF_VALIDATION_FAILED` from `events.memory`, `AUTONOMY_ACTION_AUTO_APPROVED` from `events.autonomy`, `TIMEOUT_POLICY_EVALUATED` from `events.timeout`, `PERSISTENCE_AUDIT_ENTRY_SAVED` from `events.persistence`, `TASK_ENGINE_STARTED` from `events.task_engine`, `COORDINATION_STARTED` from `events.coordination`, `COORDINATION_FACTORY_BUILT` from `events.coordination`, `COMMUNICATION_DISPATCH_START` from `events.communication`, `COMPANY_STARTED` from `events.company`, `CONFIG_LOADED` from `events.config`, `CORRELATION_ID_CREATED` from `events.correlation`, `DECOMPOSITION_STARTED` from `events.decomposition`, `DELEGATION_STARTED` from `events.delegation`, `EXECUTION_LOOP_START` from `events.execution`, `CHECKPOINT_SAVED` from `events.checkpoint`, `PERSISTENCE_CHECKPOINT_SAVED` from `events.persistence`, `GIT_OPERATION_START` from `events.git`, `PARALLEL_GROUP_START` from `events.parallel`, `PERSONALITY_LOADED` from `events.personality`, `QUOTA_CHECKED` from `events.quota`, `ROLE_ASSIGNED` from `events.role`, `ROUTING_STARTED` from `events.routing`, `SANDBOX_EXECUTE_START` from `events.sandbox`, `TASK_CREATED` from `events.task`, `TASK_ASSIGNMENT_STARTED` from `events.task_assignment`, `TASK_ROUTING_STARTED` from `events.task_routing`, `TEMPLATE_LOADED` from `events.template`, `TOOL_INVOKE_START` from `events.tool`, `TOOL_OUTPUT_WITHHELD` from `events.tool`, `WORKSPACE_CREATED` from `events.workspace`, `APPROVAL_GATE_ESCALATION_DETECTED` from `events.approval_gate`, `APPROVAL_GATE_ESCALATION_FAILED` from `events.approval_gate`, `APPROVAL_GATE_INITIALIZED` from `events.approval_gate`, `APPROVAL_GATE_RISK_CLASSIFIED` from `events.approval_gate`, `APPROVAL_GATE_RISK_CLASSIFY_FAILED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_PARKED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_PARK_FAILED` from `events.approval_gate`, `APPROVAL_GATE_PARK_TASKLESS` from `events.approval_gate`, `APPROVAL_GATE_RESUME_STARTED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_RESUMED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_FAILED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_DELETE_FAILED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_TRIGGERED` from `events.approval_gate`, `APPROVAL_GATE_NO_PARKED_CONTEXT` from `events.approval_gate`, `APPROVAL_GATE_LOOP_WIRING_WARNING` from `events.approval_gate`, `STAGNATION_CHECK_PERFORMED` from `events.stagnation`, `STAGNATION_DETECTED` from `events.stagnation`, `STAGNATION_CORRECTION_INJECTED` from `events.stagnation`, `STAGNATION_TERMINATED` from `events.stagnation`, `PERSISTENCE_AGENT_STATE_SAVED` from `events.persistence`, `PERSISTENCE_AGENT_STATE_FETCHED` from `events.persistence`, `PERSISTENCE_AGENT_STATE_ACTIVE_QUERIED` from `events.persistence`, `PERSISTENCE_AGENT_STATE_DELETED` from `events.persistence`). Import directly: `from synthorg.observability.events.<domain> import EVENT_CONSTANT`
- **Event names**: always use constants from the domain-specific module under `synthorg.observability.events` (e.g., `PROVIDER_CALL_START` from `events.provider`, `BUDGET_RECORD_ADDED` from `events.budget`, `CFO_ANOMALY_DETECTED` from `events.cfo`, `CONFLICT_DETECTED` from `events.conflict`, `MEETING_STARTED` from `events.meeting`, `MEETING_SCHEDULER_STARTED` from `events.meeting`, `MEETING_SCHEDULER_ERROR` from `events.meeting`, `MEETING_SCHEDULER_STOPPED` from `events.meeting`, `MEETING_PERIODIC_TRIGGERED` from `events.meeting`, `MEETING_EVENT_TRIGGERED` from `events.meeting`, `MEETING_PARTICIPANTS_RESOLVED` from `events.meeting`, `MEETING_NO_PARTICIPANTS` from `events.meeting`, `MEETING_NOT_FOUND` from `events.meeting`, `CLASSIFICATION_START` from `events.classification`, `CONSOLIDATION_START` from `events.consolidation`, `ORG_MEMORY_QUERY_START` from `events.org_memory`, `API_REQUEST_STARTED` from `events.api`, `API_REQUEST_COMPLETED` from `events.api`, `API_REQUEST_ERROR` from `events.api`, `API_ROUTE_NOT_FOUND` from `events.api`, `API_HEALTH_CHECK` from `events.api`, `API_COORDINATION_STARTED` from `events.api`, `API_COORDINATION_COMPLETED` from `events.api`, `API_COORDINATION_FAILED` from `events.api`, `API_COORDINATION_AGENT_RESOLVE_FAILED` from `events.api`, `CODE_RUNNER_EXECUTE_START` from `events.code_runner`, `DOCKER_EXECUTE_START` from `events.docker`, `MCP_INVOKE_START` from `events.mcp`, `SECURITY_EVALUATE_START` from `events.security`, `HR_HIRING_REQUEST_CREATED` from `events.hr`, `PERF_METRIC_RECORDED` from `events.performance`, `PERF_LLM_SAMPLE_STARTED` from `events.performance`, `PERF_LLM_SAMPLE_COMPLETED` from `events.performance`, `PERF_LLM_SAMPLE_FAILED` from `events.performance`, `PERF_OVERRIDE_SET` from `events.performance`, `PERF_OVERRIDE_CLEARED` from `events.performance`, `PERF_OVERRIDE_APPLIED` from `events.performance`, `PERF_OVERRIDE_EXPIRED` from `events.performance`, `TRUST_EVALUATE_START` from `events.trust`, `PROMOTION_EVALUATE_START` from `events.promotion`, `PROMPT_BUILD_START` from `events.prompt`, `MEMORY_RETRIEVAL_START` from `events.memory`, `MEMORY_BACKEND_CONNECTED` from `events.memory`, `MEMORY_ENTRY_STORED` from `events.memory`, `MEMORY_BACKEND_SYSTEM_ERROR` from `events.memory`, `MEMORY_RRF_FUSION_COMPLETE` from `events.memory`, `MEMORY_RRF_VALIDATION_FAILED` from `events.memory`, `AUTONOMY_ACTION_AUTO_APPROVED` from `events.autonomy`, `TIMEOUT_POLICY_EVALUATED` from `events.timeout`, `PERSISTENCE_AUDIT_ENTRY_SAVED` from `events.persistence`, `TASK_ENGINE_STARTED` from `events.task_engine`, `COORDINATION_STARTED` from `events.coordination`, `COORDINATION_FACTORY_BUILT` from `events.coordination`, `COMMUNICATION_DISPATCH_START` from `events.communication`, `COMPANY_STARTED` from `events.company`, `CONFIG_LOADED` from `events.config`, `CORRELATION_ID_CREATED` from `events.correlation`, `DECOMPOSITION_STARTED` from `events.decomposition`, `DELEGATION_STARTED` from `events.delegation`, `EXECUTION_LOOP_START` from `events.execution`, `CHECKPOINT_SAVED` from `events.checkpoint`, `PERSISTENCE_CHECKPOINT_SAVED` from `events.persistence`, `GIT_OPERATION_START` from `events.git`, `PARALLEL_GROUP_START` from `events.parallel`, `PERSONALITY_LOADED` from `events.personality`, `QUOTA_CHECKED` from `events.quota`, `ROLE_ASSIGNED` from `events.role`, `ROUTING_STARTED` from `events.routing`, `SANDBOX_EXECUTE_START` from `events.sandbox`, `TASK_CREATED` from `events.task`, `TASK_ASSIGNMENT_STARTED` from `events.task_assignment`, `TASK_ROUTING_STARTED` from `events.task_routing`, `TEMPLATE_LOADED` from `events.template`, `TOOL_INVOKE_START` from `events.tool`, `TOOL_OUTPUT_WITHHELD` from `events.tool`, `WORKSPACE_CREATED` from `events.workspace`, `APPROVAL_GATE_ESCALATION_DETECTED` from `events.approval_gate`, `APPROVAL_GATE_ESCALATION_FAILED` from `events.approval_gate`, `APPROVAL_GATE_INITIALIZED` from `events.approval_gate`, `APPROVAL_GATE_RISK_CLASSIFIED` from `events.approval_gate`, `APPROVAL_GATE_RISK_CLASSIFY_FAILED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_PARKED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_PARK_FAILED` from `events.approval_gate`, `APPROVAL_GATE_PARK_TASKLESS` from `events.approval_gate`, `APPROVAL_GATE_RESUME_STARTED` from `events.approval_gate`, `APPROVAL_GATE_CONTEXT_RESUMED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_FAILED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_DELETE_FAILED` from `events.approval_gate`, `APPROVAL_GATE_RESUME_TRIGGERED` from `events.approval_gate`, `APPROVAL_GATE_NO_PARKED_CONTEXT` from `events.approval_gate`, `APPROVAL_GATE_LOOP_WIRING_WARNING` from `events.approval_gate`, `STAGNATION_CHECK_PERFORMED` from `events.stagnation`, `STAGNATION_DETECTED` from `events.stagnation`, `STAGNATION_CORRECTION_INJECTED` from `events.stagnation`, `STAGNATION_TERMINATED` from `events.stagnation`, `PERSISTENCE_AGENT_STATE_SAVED` from `events.persistence`, `PERSISTENCE_AGENT_STATE_FETCHED` from `events.persistence`, `PERSISTENCE_AGENT_STATE_ACTIVE_QUERIED` from `events.persistence`, `PERSISTENCE_AGENT_STATE_DELETED` from `events.persistence`). Import directly: `from synthorg.observability.events.<domain> import EVENT_CONSTANT`
- **Structured kwargs**: always `logger.info(EVENT, key=value)` — never `logger.info("msg %s", val)`
- **All error paths** must log at WARNING or ERROR with context before raising
- **All state transitions** must log at INFO
Expand Down
2 changes: 1 addition & 1 deletion docs/architecture/decisions.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ All significant design and architecture decisions, organized by domain. Each ent
| ID | Decision | Rationale | Alternatives considered |
|----|----------|-----------|------------------------|
| D2 | Pluggable `QualityScoringStrategy`; initial: layered (CI signals + LLM judge + human override) | Multiple independent signals, hardest to game. Start with Layer 1 (free CI signals), add layers incrementally | Human only (doesn't scale), LLM-as-judge only (12+ known biases), CI signals only (narrow view), peer ratings (reciprocity bias). Research: LLM judges >80% human alignment but biased (CALM framework) |
| D3 | Pluggable `CollaborationScoringStrategy`; initial: automated behavioral telemetry | Objective, zero token cost. Weighted average of delegation success, response latency, conflict constructiveness, meeting contribution, loop prevention, handoff completeness | LLM evaluation (expensive, circular — LLM judging LLM), peer ratings (reciprocity/collusion), human-provided (doesn't scale) |
| D3 | Pluggable `CollaborationScoringStrategy`; initial: automated behavioral telemetry + LLM calibration sampling (1%, opt-in) + human override via API | Objective, zero token cost for primary strategy. LLM sampling (1%) for drift calibration only — not full LLM evaluation. Human override via API for targeted corrections. Weighted average of delegation success, response latency, conflict constructiveness, meeting contribution, loop prevention, handoff completeness | Full LLM evaluation as primary strategy (expensive, circular — LLM judging LLM), peer ratings (reciprocity/collusion), human-provided as sole source (doesn't scale) |
| D11 | Pluggable `MetricsWindowStrategy`; initial: multiple windows (7d, 30d, 90d) | Industry standard (Google SRE Workbook prescribes multi-window alerting). Handles heterogeneous metric cadences. Min 5 data points per window | Fixed 30d (too rigid), configurable per-metric (added complexity without multi-resolution benefit) |
| D12 | Pluggable `TrendDetectionStrategy`; initial: Theil-Sen regression + thresholds | 29.3% outlier breakdown (tolerates ~1 in 3 bad data points). Classifies trends as improving/stable/declining. Min 5 data points | Period-over-period (statistically weak), OLS regression (0% outlier breakdown), threshold-only (not a trend detection method). EPA recommends Theil-Sen for noisy data |

Expand Down
9 changes: 6 additions & 3 deletions docs/design/agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,12 @@ agent_metrics:
)
```

Weights are configurable per-role. Optional: periodic LLM sampling (1%) for
calibration + human override via API. Future strategies: LLM evaluation, peer
ratings, human-provided.
Weights are configurable per-role. Periodic LLM sampling (1%, configurable)
for calibration is implemented via `LlmCalibrationSampler` (opt-in,
requires `llm_sampling_model` config). Human override via API is
implemented via `CollaborationOverrideStore` + `CollaborationController`
at `/agents/{agent_id}/collaboration`. Future strategies: LLM evaluation,
peer ratings, human-provided.

---

Expand Down
4 changes: 4 additions & 0 deletions src/synthorg/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from synthorg.core.approval import ApprovalItem # noqa: TC001
from synthorg.engine.coordination.service import MultiAgentCoordinator # noqa: TC001
from synthorg.engine.task_engine import TaskEngine # noqa: TC001
from synthorg.hr.performance.tracker import PerformanceTracker # noqa: TC001
from synthorg.hr.registry import AgentRegistryService # noqa: TC001
from synthorg.observability import get_logger
from synthorg.observability.events.api import (
Expand Down Expand Up @@ -436,6 +437,7 @@ def create_app( # noqa: PLR0913
agent_registry: AgentRegistryService | None = None,
meeting_orchestrator: MeetingOrchestrator | None = None,
meeting_scheduler: MeetingScheduler | None = None,
performance_tracker: PerformanceTracker | None = None,
) -> Litestar:
"""Create and configure the Litestar application.

Expand All @@ -454,6 +456,7 @@ def create_app( # noqa: PLR0913
agent_registry: Agent registry service.
meeting_orchestrator: Meeting orchestrator.
meeting_scheduler: Meeting scheduler.
performance_tracker: Performance tracking service.

Returns:
Configured Litestar application.
Expand Down Expand Up @@ -498,6 +501,7 @@ def create_app( # noqa: PLR0913
agent_registry=agent_registry,
meeting_orchestrator=meeting_orchestrator,
meeting_scheduler=meeting_scheduler,
performance_tracker=performance_tracker,
startup_time=time.monotonic(),
)

Expand Down
3 changes: 3 additions & 0 deletions src/synthorg/api/controllers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from synthorg.api.controllers.artifacts import ArtifactController
from synthorg.api.controllers.autonomy import AutonomyController
from synthorg.api.controllers.budget import BudgetController
from synthorg.api.controllers.collaboration import CollaborationController
from synthorg.api.controllers.company import CompanyController
from synthorg.api.controllers.coordination import CoordinationController
from synthorg.api.controllers.departments import DepartmentController
Expand Down Expand Up @@ -36,6 +37,7 @@
ApprovalsController,
AutonomyController,
AuthController,
CollaborationController,
CoordinationController,
)

Expand All @@ -48,6 +50,7 @@
"AuthController",
"AutonomyController",
"BudgetController",
"CollaborationController",
"CompanyController",
"Controller",
"CoordinationController",
Expand Down
Loading
Loading