Aureliolo · Aureliolo · Apr 8, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
@@ -90,7 +90,7 @@ See `web/CLAUDE.md` for the full component inventory, design token rules, and po
 - **Every module** with business logic MUST have: `from synthorg.observability import get_logger` then `logger = get_logger(__name__)`
 - **Never** use `import logging` / `logging.getLogger()` / `print()` in application code (exception: `observability/setup.py`, `observability/sinks.py`, `observability/syslog_handler.py`, and `observability/http_handler.py` may use stdlib `logging` and `print(..., file=sys.stderr)` for handler construction, bootstrap, and error reporting code that runs before or during logging system configuration)
 - **Variable name**: always `logger` (not `_logger`, not `log`)
-- **Event names**: always use constants from the domain-specific module under `synthorg.observability.events` (e.g., `API_REQUEST_STARTED` from `events.api`, `TOOL_INVOKE_START` from `events.tool`, `GIT_COMMAND_START` from `events.git`, `CONTEXT_BUDGET_FILL_UPDATED` from `events.context_budget`, `BACKUP_STARTED` from `events.backup`, `SETUP_COMPLETED` from `events.setup`, `ROUTING_CANDIDATE_SELECTED` from `events.routing`, `SHIPPING_HTTP_BATCH_SENT` from `events.shipping`, `EVAL_REPORT_COMPUTED` from `events.evaluation`, `PROMPT_PROFILE_SELECTED` from `events.prompt`, `PROCEDURAL_MEMORY_START` from `events.procedural_memory`, `PERF_LLM_JUDGE_STARTED` from `events.performance`, `TASK_ENGINE_OBSERVER_FAILED` from `events.task_engine`, `WORKFLOW_EXEC_COMPLETED` from `events.workflow_execution`, `BLUEPRINT_INSTANTIATE_START` from `events.blueprint`, `WORKFLOW_DEF_ROLLED_BACK` from `events.workflow_definition`, `WORKFLOW_VERSION_SAVED` from `events.workflow_version`, `MEMORY_FINE_TUNE_STARTED`, `MEMORY_SELF_EDIT_TOOL_EXECUTE`, `MEMORY_SELF_EDIT_CORE_READ`, `MEMORY_SELF_EDIT_CORE_WRITE`, `MEMORY_SELF_EDIT_CORE_WRITE_REJECTED`, `MEMORY_SELF_EDIT_ARCHIVAL_SEARCH`, `MEMORY_SELF_EDIT_ARCHIVAL_WRITE`, `MEMORY_SELF_EDIT_RECALL_READ`, `MEMORY_SELF_EDIT_RECALL_WRITE`, `MEMORY_SELF_EDIT_WRITE_FAILED` from `events.memory`, `REPORTING_GENERATION_STARTED` from `events.reporting`, `RISK_BUDGET_SCORE_COMPUTED` from `events.risk_budget`, `LLM_STRATEGY_SYNTHESIZED` and `DISTILLATION_CAPTURED` from `events.consolidation`, `MEMORY_DIVERSITY_RERANKED`, `MEMORY_DIVERSITY_RERANK_FAILED`, and `MEMORY_REFORMULATION_ROUND` from `events.memory`, `NOTIFICATION_DISPATCHED` and `NOTIFICATION_DISPATCH_FAILED` from `events.notification`, `QUALITY_STEP_CLASSIFIED` from `events.quality`, `HEALTH_TICKET_EMITTED` from `events.health`, `TRAJECTORY_SCORING_START` from `events.trajectory`, `COORD_METRICS_AMDAHL_COMPUTED` from `events.coordination_metrics`, `WEB_REQUEST_START` and `WEB_SSRF_BLOCKED` from `events.web`, `DB_QUERY_START` and `DB_WRITE_BLOCKED` from `events.database`, `TERMINAL_COMMAND_START` and `TERMINAL_COMMAND_BLOCKED` from `events.terminal`, `SUB_CONSTRAINT_RESOLVED` and `SUB_CONSTRAINT_DENIED` from `events.sub_constraint`, `VERSION_SAVED` and `VERSION_SNAPSHOT_FAILED` from `events.versioning`, `ANALYTICS_AGGREGATION_COMPUTED` and `ANALYTICS_RETRY_RATE_ALERT` from `events.analytics`, `CALL_CLASSIFICATION_COMPUTED` from `events.call_classification`, `QUOTA_THRESHOLD_ALERT` and `QUOTA_POLL_FAILED` from `events.quota`). Each domain has its own module -- see `src/synthorg/observability/events/` for the full inventory of constants. Import directly: `from synthorg.observability.events.<domain> import EVENT_CONSTANT`
+- **Event names**: always use constants from the domain-specific module under `synthorg.observability.events` (e.g., `API_REQUEST_STARTED` from `events.api`, `TOOL_INVOKE_START` from `events.tool`, `GIT_COMMAND_START` from `events.git`, `CONTEXT_BUDGET_FILL_UPDATED` from `events.context_budget`, `BACKUP_STARTED` from `events.backup`, `SETUP_COMPLETED` from `events.setup`, `ROUTING_CANDIDATE_SELECTED` from `events.routing`, `SHIPPING_HTTP_BATCH_SENT` from `events.shipping`, `EVAL_REPORT_COMPUTED` from `events.evaluation`, `PROMPT_PROFILE_SELECTED` from `events.prompt`, `PROCEDURAL_MEMORY_START` from `events.procedural_memory`, `PERF_LLM_JUDGE_STARTED` from `events.performance`, `TASK_ENGINE_OBSERVER_FAILED` from `events.task_engine`, `WORKFLOW_EXEC_COMPLETED` from `events.workflow_execution`, `BLUEPRINT_INSTANTIATE_START` from `events.blueprint`, `WORKFLOW_DEF_ROLLED_BACK` from `events.workflow_definition`, `WORKFLOW_VERSION_SAVED` from `events.workflow_version`, `MEMORY_FINE_TUNE_STARTED`, `MEMORY_SELF_EDIT_TOOL_EXECUTE`, `MEMORY_SELF_EDIT_CORE_READ`, `MEMORY_SELF_EDIT_CORE_WRITE`, `MEMORY_SELF_EDIT_CORE_WRITE_REJECTED`, `MEMORY_SELF_EDIT_ARCHIVAL_SEARCH`, `MEMORY_SELF_EDIT_ARCHIVAL_WRITE`, `MEMORY_SELF_EDIT_RECALL_READ`, `MEMORY_SELF_EDIT_RECALL_WRITE`, `MEMORY_SELF_EDIT_WRITE_FAILED` from `events.memory`, `REPORTING_GENERATION_STARTED` from `events.reporting`, `RISK_BUDGET_SCORE_COMPUTED` from `events.risk_budget`, `LLM_STRATEGY_SYNTHESIZED` and `DISTILLATION_CAPTURED` from `events.consolidation`, `MEMORY_DIVERSITY_RERANKED`, `MEMORY_DIVERSITY_RERANK_FAILED`, and `MEMORY_REFORMULATION_ROUND` from `events.memory`, `NOTIFICATION_DISPATCHED` and `NOTIFICATION_DISPATCH_FAILED` from `events.notification`, `QUALITY_STEP_CLASSIFIED` from `events.quality`, `HEALTH_TICKET_EMITTED` from `events.health`, `TRAJECTORY_SCORING_START` from `events.trajectory`, `COORD_METRICS_AMDAHL_COMPUTED` from `events.coordination_metrics`, `WEB_REQUEST_START` and `WEB_SSRF_BLOCKED` from `events.web`, `DB_QUERY_START` and `DB_WRITE_BLOCKED` from `events.database`, `TERMINAL_COMMAND_START` and `TERMINAL_COMMAND_BLOCKED` from `events.terminal`, `SUB_CONSTRAINT_RESOLVED` and `SUB_CONSTRAINT_DENIED` from `events.sub_constraint`, `VERSION_SAVED` and `VERSION_SNAPSHOT_FAILED` from `events.versioning`, `ANALYTICS_AGGREGATION_COMPUTED` and `ANALYTICS_RETRY_RATE_ALERT` from `events.analytics`, `CALL_CLASSIFICATION_COMPUTED` from `events.call_classification`, `QUOTA_THRESHOLD_ALERT` and `QUOTA_POLL_FAILED` from `events.quota`, `CONFLICT_DEBATE_EVALUATOR_FAILED` from `events.conflict`, `DELEGATION_LOOP_CIRCUIT_BACKOFF` and `DELEGATION_LOOP_CIRCUIT_PERSIST_FAILED` from `events.delegation`, `MEETING_EVENT_COOLDOWN_SKIPPED` and `MEETING_TASKS_CAPPED` from `events.meeting`, `PERSISTENCE_CIRCUIT_BREAKER_SAVED`, `PERSISTENCE_CIRCUIT_BREAKER_SAVE_FAILED`, `PERSISTENCE_CIRCUIT_BREAKER_LOADED`, `PERSISTENCE_CIRCUIT_BREAKER_LOAD_FAILED`, `PERSISTENCE_CIRCUIT_BREAKER_DELETED`, and `PERSISTENCE_CIRCUIT_BREAKER_DELETE_FAILED` from `events.persistence`). Each domain has its own module -- see `src/synthorg/observability/events/` for the full inventory of constants. Import directly: `from synthorg.observability.events.<domain> import EVENT_CONSTANT`
 - **Structured kwargs**: always `logger.info(EVENT, key=value)` -- never `logger.info("msg %s", val)`
 - **All error paths** must log at WARNING or ERROR with context before raising
 - **All state transitions** must log at INFO

@@ -463,19 +463,19 @@ All three meeting protocols (StructuredPhases, RoundRobin, PositionPapers) guara
 bounded execution via `TokenTracker` phase-boundary checks, hard token budgets with 20%
 synthesis reserve, and turn/round limits. No protocol has unbounded execution paths.
 
-**Known risk -- meeting-task feedback loop**: `MeetingProtocolConfig.auto_create_tasks`
-defaults to `True`. The meeting orchestrator can spawn tasks from action items, and
-`MeetingScheduler.trigger_event()` has no deduplication or cooldown. Event-triggered
-meetings should use a `min_interval_seconds` guard to prevent runaway task/meeting cycles.
-This is a production risk for deployments using event-triggered meetings at scale.
+**Meeting-task feedback loop mitigation**: `MeetingProtocolConfig.auto_create_tasks`
+defaults to `True`. Two guardrails prevent runaway task/meeting cycles:
+`MeetingTypeConfig.min_interval_seconds` enforces per-type cooldown on event-triggered
+meetings, and `MeetingProtocolConfig.max_tasks_per_meeting` caps task creation from
+action items. See #1115.
 
 ### Conflict Resolution Termination
 
 All four conflict resolution strategies terminate with bounded resource use:
 
 - **AuthorityResolver**: Deterministic seniority comparison. Always terminates; no LLM calls.
 - **DebateResolver**: Single LLM judge call (one-shot, no retry loop). Falls back to
-  Authority if no evaluator configured. Exception in evaluator propagates without automatic fallback.
+  Authority if no evaluator configured, or if the evaluator raises an exception (#1117).
 - **HumanEscalationResolver**: Returns `ESCALATED_TO_HUMAN` immediately. **Stub
   implementation** pending #37 -- no actual blocking for human input yet.
 - **HybridResolver**: Single LLM review call; deterministic fallback to Authority on ambiguity.
@@ -488,28 +488,14 @@ Five mechanisms protect against swarm drift (`communication/loop_prevention/guar
 2. Max delegation depth (default 5)
 3. Content deduplication (60s window)
 4. Per-pair rate limiting (10/min)
-5. Circuit breaker (3 bounces, 300s cooldown)
-
-**Known risk -- circuit breaker bounce count reset**: After cooldown, the state entry is
-evicted entirely, resetting the bounce count to 0. Slow-burn delegation patterns (>60s
-between delegations) can bypass all five guards after each cooldown expiry.
-
-Recommended mitigation -- two options:
-
-1. **Exponential backoff on cooldown**: instead of evicting the entry, retain it and
-   apply `cooldown_seconds = base_cooldown * 2^bounce_count`. Each bounce extends the
-   cooldown duration exponentially, making slow-burn bypass progressively harder.
-2. **Non-resetting global bounce counter**: store a per-pair lifetime bounce count
-   separate from the per-window circuit breaker. Once the lifetime count exceeds a
-   threshold (e.g., 10), escalate to a permanent circuit-open state requiring manual
-   reset.
-
-Option 1 is simpler to implement within `circuit_breaker.py` without breaking the
-existing eviction model. Option 2 is more robust against very long-horizon patterns.
-
-**Known risk -- in-memory state**: All guard state (circuit breaker, dedup window, rate
-limiter) is in-memory. Service restart resets all guardrails. Consider persisting circuit
-breaker state to SQLite for restart resilience.
+5. Circuit breaker (3 bounces, exponential backoff cooldown capped at `max_cooldown_seconds`)
+
+Circuit breaker uses exponential backoff: `cooldown = base * 2^(trip_count - 1)`,
+capped at `max_cooldown_seconds` (default 3600s). On cooldown expiry, the bounce count
+resets but the trip count is preserved, so successive trips produce progressively longer
+cooldowns (#1116). Circuit breaker state (trip count, bounce count) is persisted to SQLite
+via `CircuitBreakerStateRepository` so guardrails survive restarts. Dedup window and rate
+limiter remain in-memory (short-lived by design).
 
 ### Microservices Anti-Patterns: Assessment
 

@@ -257,6 +257,7 @@ addopts = ["--strict-markers", "--strict-config", "-ra", "--tb=short", "--dist=w
 filterwarnings = [
     "error",
     "ignore:Core Pydantic V1 functionality:UserWarning",
+    "ignore:Exception in thread.*_connection_worker_thread:pytest.PytestUnhandledThreadExceptionWarning",
 ]
 
 # ---------------------------------------------------------------------------

@@ -119,6 +119,11 @@ class MeetingTypeConfig(BaseModel):
         default_factory=MeetingProtocolConfig,
         description="Meeting protocol configuration",
     )
+    min_interval_seconds: int | None = Field(
+        default=None,
+        ge=1,
+        description="Minimum seconds between event-triggered meetings of this type",
+    )
 
     @model_validator(mode="after")
     def _validate_frequency_or_trigger(self) -> Self:
@@ -129,6 +134,9 @@ def _validate_frequency_or_trigger(self) -> Self:
         if self.frequency is None and self.trigger is None:
             msg = "Exactly one of frequency or trigger must be set"
             raise ValueError(msg)
+        if self.min_interval_seconds is not None and self.trigger is None:
+            msg = "min_interval_seconds requires trigger-based meetings"
+            raise ValueError(msg)
         return self
 
     @model_validator(mode="after")
@@ -235,6 +243,19 @@ class CircuitBreakerConfig(BaseModel):
         gt=0,
         description="Cooldown period in seconds",
     )
+    max_cooldown_seconds: int = Field(
+        default=3600,
+        gt=0,
+        description="Maximum cooldown period in seconds (caps exponential backoff)",
+    )
+
+    @model_validator(mode="after")
+    def _validate_cooldown_bounds(self) -> Self:
+        """Ensure the exponential backoff cap is not below the base cooldown."""
+        if self.max_cooldown_seconds < self.cooldown_seconds:
+            msg = "max_cooldown_seconds must be >= cooldown_seconds"
+            raise ValueError(msg)
+        return self
 
 
-
+
+    @model_validator(mode="after")
+    def _validate_cooldown_bounds(self) -> Self:
+        """Ensure the exponential backoff cap is not below the base cooldown."""
+        if self.max_cooldown_seconds < self.cooldown_seconds:
+            raise ValueError(
+                "max_cooldown_seconds must be greater than or equal to cooldown_seconds"
+            )
+        return self
-
+
+    @model_validator(mode="after")
+    def _validate_cooldown_bounds(self) -> Self:
+        """Ensure the exponential backoff cap is not below the base cooldown."""
+        if self.max_cooldown_seconds < self.cooldown_seconds:
+            raise ValueError(
+                "max_cooldown_seconds must be greater than or equal to cooldown_seconds"
+            )
+        return self
 class LoopPreventionConfig(BaseModel):

@@ -37,11 +37,11 @@
 from synthorg.observability import get_logger
 from synthorg.observability.events.conflict import (
     CONFLICT_AUTHORITY_FALLBACK,
+    CONFLICT_DEBATE_EVALUATOR_FAILED,
     CONFLICT_DEBATE_JUDGE_DECIDED,
     CONFLICT_DEBATE_STARTED,
     CONFLICT_HIERARCHY_ERROR,
     CONFLICT_LCM_LOOKUP,
-    CONFLICT_STRATEGY_ERROR,
 )
 
 logger = get_logger(__name__)
@@ -101,15 +101,37 @@ async def resolve(self, conflict: Conflict) -> ConflictResolution:
                     conflict,
                     judge_id,
                 )
+            except MemoryError, RecursionError:
-            except MemoryError, RecursionError:
+            except (MemoryError, RecursionError):
-            except MemoryError, RecursionError:
+            except (MemoryError, RecursionError):
+                raise
             except Exception:
                 logger.exception(
-                    CONFLICT_STRATEGY_ERROR,
+                    CONFLICT_DEBATE_EVALUATOR_FAILED,
                     conflict_id=conflict.id,
-                    strategy="debate",
-                    operation="judge_evaluate",
                     judge=judge_id,
                 )
-                raise
+                try:
+                    winning_agent_id, reasoning = self._authority_fallback(
+                        conflict,
+                    )
+                except ConflictHierarchyError:
+                    # Hierarchy tiebreak failed too -- fall back without
+                    # hierarchy so we always produce a resolution.
+                    logger.warning(
+                        CONFLICT_HIERARCHY_ERROR,
+                        conflict_id=conflict.id,
+                        note="authority fallback hierarchy failed; "
+                        "using seniority without hierarchy",
+                    )
+                    best = pick_highest_seniority(
+                        conflict,
+                        hierarchy=None,
+                    )
+                    winning_agent_id = best.agent_id
+                    reasoning = (
+                        f"Debate fallback: authority-based judging "
+                        f"(no hierarchy) -- {best.agent_id} "
+                        f"({best.agent_level}) has highest seniority"
+                    )
         else:
             logger.warning(
                 CONFLICT_AUTHORITY_FALLBACK,