diff --git a/.github/release-please-config.json b/.github/release-please-config.json index b6dd987b2a..ecc2145049 100644 --- a/.github/release-please-config.json +++ b/.github/release-please-config.json @@ -4,6 +4,7 @@ ".": { "release-type": "python", "package-name": "ai-company", + "include-component-in-tag": false, "bump-minor-pre-major": true, "bump-patch-for-minor-pre-major": true, "changelog-sections": [ diff --git a/CLAUDE.md b/CLAUDE.md index e99c953c7b..515ebab4bc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -66,11 +66,11 @@ curl http://localhost:3000/api/v1/health # backend (via web proxy) ```text src/ai_company/ api/ # Litestar REST + WebSocket API (controllers, guards, channels) - budget/ # Cost tracking, budget enforcement (pre-flight/in-flight checks, auto-downgrade), billing periods, cost tiers, quota/subscription tracking, CFO cost optimization (anomaly detection, efficiency analysis, downgrade recommendations, approval decisions), spending reports + budget/ # Cost tracking, budget enforcement (pre-flight/in-flight checks, auto-downgrade), billing periods, cost tiers, quota/subscription tracking, CFO cost optimization (anomaly detection, efficiency analysis, downgrade recommendations, approval decisions), spending reports, budget errors (BudgetExhaustedError, DailyLimitExceededError, QuotaExhaustedError) cli/ # CLI interface (future — thin API wrapper if needed) communication/ # Message bus, dispatcher, messenger, channels, delegation, loop prevention, conflict resolution, meeting protocol config/ # YAML company config loading and validation - core/ # Shared domain models and base classes + core/ # Shared domain models, base classes, and resilience config (RetryConfig, RateLimiterConfig) engine/ # Agent orchestration, execution loops, parallel execution, task decomposition, routing, task assignment, task lifecycle, recovery, shutdown, workspace isolation, coordination error classification, and prompt policy validation hr/ # HR engine: hiring, firing, onboarding, offboarding, agent registry, performance tracking (task metrics, collaboration scoring, trend detection), promotion/demotion (criteria evaluation, approval strategies, model mapping) memory/ # Persistent agent memory (Mem0 initial, custom stack future — ADR-001), retrieval pipeline (ranking, injection, context formatting, non-inferable filtering), shared org memory (org/), consolidation/archival (consolidation/) diff --git a/DESIGN_SPEC.md b/DESIGN_SPEC.md index d362aebfc7..c489d8a459 100644 --- a/DESIGN_SPEC.md +++ b/DESIGN_SPEC.md @@ -2775,7 +2775,8 @@ ai-company/ │ │ ├── artifact.py # Produced work items │ │ ├── role.py # Role model │ │ ├── role_catalog.py # Role catalog -│ │ └── personality.py # Personality compatibility scoring +│ │ ├── personality.py # Personality compatibility scoring +│ │ └── resilience_config.py # RetryConfig, RateLimiterConfig (shared by config.schema + providers.resilience) │ ├── engine/ # Agent orchestration, execution loops, parallel execution, task decomposition, routing, task assignment, task lifecycle, recovery, shutdown, workspace isolation, coordination error classification, and prompt policy validation │ │ ├── errors.py # Engine error hierarchy │ │ ├── prompt.py # System prompt builder @@ -3048,7 +3049,6 @@ ai-company/ │ │ │ ├── router.py # Router orchestrator │ │ │ └── strategies.py # Routing strategies │ │ └── resilience/ # Resilience patterns -│ │ ├── config.py # RetryConfig, RateLimiterConfig │ │ ├── errors.py # RetryExhaustedError │ │ ├── rate_limiter.py # Token bucket rate limiter │ │ └── retry.py # RetryHandler with backoff @@ -3157,6 +3157,7 @@ ai-company/ │ │ ├── enums.py # Budget-related enums │ │ ├── billing.py # Billing period computation utilities │ │ ├── enforcer.py # BudgetEnforcer service (pre-flight, in-flight, auto-downgrade) +│ │ ├── errors.py # BudgetExhaustedError, DailyLimitExceededError, QuotaExhaustedError │ │ ├── optimizer.py # CostOptimizer service — anomaly detection, efficiency analysis, downgrade recommendations, approval decisions (M5) │ │ ├── optimizer_models.py # CostOptimizer domain models — anomaly, efficiency, downgrade, approval, config (M5) │ │ ├── quota.py # Quota/subscription models, degradation config, quota snapshots diff --git a/pyproject.toml b/pyproject.toml index 73865b467e..aa14594df4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ dependencies = [ "aiosqlite==0.22.1", "jinja2==3.1.6", "jsonschema==4.26.0", - "litellm==1.82.0", + "litellm==1.82.1", "litestar[standard,structlog,pydantic,brotli,prometheus]==2.21.1", "mcp==1.26.0", "pydantic==2.12.5", diff --git a/src/ai_company/budget/__init__.py b/src/ai_company/budget/__init__.py index e73eb24303..2972039f27 100644 --- a/src/ai_company/budget/__init__.py +++ b/src/ai_company/budget/__init__.py @@ -38,6 +38,11 @@ ) from ai_company.budget.enforcer import BudgetEnforcer from ai_company.budget.enums import BudgetAlertLevel +from ai_company.budget.errors import ( + BudgetExhaustedError, + DailyLimitExceededError, + QuotaExhaustedError, +) from ai_company.budget.hierarchy import ( BudgetHierarchy, DepartmentBudget, @@ -100,6 +105,7 @@ "BudgetAlertLevel", "BudgetConfig", "BudgetEnforcer", + "BudgetExhaustedError", "BudgetHierarchy", "CategoryBreakdown", "CoordinationEfficiency", @@ -113,6 +119,7 @@ "CostTierDefinition", "CostTiersConfig", "CostTracker", + "DailyLimitExceededError", "DegradationAction", "DegradationConfig", "DepartmentBudget", @@ -135,6 +142,7 @@ "ProviderCostModel", "ProviderDistribution", "QuotaCheckResult", + "QuotaExhaustedError", "QuotaLimit", "QuotaSnapshot", "QuotaTracker", diff --git a/src/ai_company/budget/enforcer.py b/src/ai_company/budget/enforcer.py index fe7cabe93c..02c645b7ab 100644 --- a/src/ai_company/budget/enforcer.py +++ b/src/ai_company/budget/enforcer.py @@ -10,13 +10,13 @@ from ai_company.budget.billing import billing_period_start, daily_period_start from ai_company.budget.enums import BudgetAlertLevel -from ai_company.budget.quota import QuotaCheckResult -from ai_company.constants import BUDGET_ROUNDING_PRECISION -from ai_company.engine.errors import ( +from ai_company.budget.errors import ( BudgetExhaustedError, DailyLimitExceededError, QuotaExhaustedError, ) +from ai_company.budget.quota import QuotaCheckResult +from ai_company.constants import BUDGET_ROUNDING_PRECISION from ai_company.observability import get_logger from ai_company.observability.events.budget import ( BUDGET_ALERT_THRESHOLD_CROSSED, diff --git a/src/ai_company/budget/errors.py b/src/ai_company/budget/errors.py new file mode 100644 index 0000000000..62d5220c1e --- /dev/null +++ b/src/ai_company/budget/errors.py @@ -0,0 +1,32 @@ +"""Budget-layer error hierarchy. + +Defines budget-specific exceptions in a leaf module with no intra-project +imports, preventing circular dependency chains when these exceptions are +needed by both the budget enforcer and the engine layer. +""" + + +class BudgetExhaustedError(Exception): + """Budget exhaustion signal. + + Used in two contexts: + + 1. Raised directly by :meth:`BudgetEnforcer.check_can_execute` + when pre-flight budget checks fail (monthly hard stop or daily + limit exceeded). + 2. Caught by the engine layer (``AgentEngine.run``) and converted + into an ``ExecutionResult`` with + ``TerminationReason.BUDGET_EXHAUSTED``. + """ + + +class DailyLimitExceededError(BudgetExhaustedError): + """Per-agent daily spending limit exceeded.""" + + +class QuotaExhaustedError(BudgetExhaustedError): + """Raised when provider quota is exhausted. + + Raised for all degradation strategies. Degradation routing + (FALLBACK/QUEUE) is tracked in M7. + """ diff --git a/src/ai_company/config/schema.py b/src/ai_company/config/schema.py index 60baa7b5bb..39beb30bc2 100644 --- a/src/ai_company/config/schema.py +++ b/src/ai_company/config/schema.py @@ -18,6 +18,7 @@ WorkflowHandoff, ) from ai_company.core.enums import AutonomyLevel, CompanyType, SeniorityLevel +from ai_company.core.resilience_config import RateLimiterConfig, RetryConfig from ai_company.core.role import CustomRole # noqa: TC001 from ai_company.core.types import NotBlankStr # noqa: TC001 from ai_company.hr.promotion.config import PromotionConfig @@ -35,94 +36,6 @@ logger = get_logger(__name__) -# ── Resilience config models ───────────────────────────────────── -# Defined here (not in providers.resilience) to avoid circular imports -# between config ↔ providers. Re-exported by providers.resilience.config. - - -class RetryConfig(BaseModel): - """Configuration for automatic retry of transient provider errors. - - Attributes: - max_retries: Maximum number of retry attempts (0 disables retries). - base_delay: Initial delay in seconds before the first retry. - max_delay: Upper bound on computed delay in seconds. - exponential_base: Multiplier for exponential backoff. - jitter: Whether to add random jitter to delay. - """ - - model_config = ConfigDict(frozen=True, allow_inf_nan=False) - - max_retries: int = Field( - default=3, - ge=0, - le=10, - description="Maximum number of retry attempts (0 disables retries)", - ) - base_delay: float = Field( - default=1.0, - gt=0.0, - description="Initial delay in seconds before the first retry", - ) - max_delay: float = Field( - default=60.0, - gt=0.0, - description="Upper bound on computed delay in seconds", - ) - exponential_base: float = Field( - default=2.0, - gt=1.0, - description="Multiplier for exponential backoff", - ) - jitter: bool = Field( - default=True, - description="Whether to add random jitter to delay", - ) - - @model_validator(mode="after") - def _validate_delay_ordering(self) -> Self: - """Ensure base_delay does not exceed max_delay.""" - if self.base_delay > self.max_delay: - msg = ( - f"base_delay ({self.base_delay}) must be" - f" <= max_delay ({self.max_delay})" - ) - logger.warning( - CONFIG_VALIDATION_FAILED, - model="RetryConfig", - field="base_delay/max_delay", - base_delay=self.base_delay, - max_delay=self.max_delay, - reason=msg, - ) - raise ValueError(msg) - return self - - -class RateLimiterConfig(BaseModel): - """Configuration for client-side rate limiting. - - Attributes: - max_requests_per_minute: Maximum requests per minute - (0 means unlimited). - max_concurrent: Maximum concurrent in-flight requests - (0 means unlimited). - """ - - model_config = ConfigDict(frozen=True, allow_inf_nan=False) - - max_requests_per_minute: int = Field( - default=0, - ge=0, - description="Maximum requests per minute (0 = unlimited)", - ) - max_concurrent: int = Field( - default=0, - ge=0, - description="Maximum concurrent in-flight requests (0 = unlimited)", - ) - - class ProviderModelConfig(BaseModel): """Configuration for a single LLM model within a provider. diff --git a/src/ai_company/core/resilience_config.py b/src/ai_company/core/resilience_config.py new file mode 100644 index 0000000000..57685a79e5 --- /dev/null +++ b/src/ai_company/core/resilience_config.py @@ -0,0 +1,97 @@ +"""Resilience configuration models (retry + rate limiting). + +Defined in ``core/`` to avoid circular imports between ``config.schema`` +and ``providers.resilience``. Both modules import from here. +""" + +from typing import Self + +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from ai_company.observability import get_logger +from ai_company.observability.events.config import CONFIG_VALIDATION_FAILED + +logger = get_logger(__name__) + + +class RetryConfig(BaseModel): + """Configuration for automatic retry of transient provider errors. + + Attributes: + max_retries: Maximum number of retry attempts (0 disables retries). + base_delay: Initial delay in seconds before the first retry. + max_delay: Upper bound on computed delay in seconds. + exponential_base: Multiplier for exponential backoff. + jitter: Whether to add random jitter to delay. + """ + + model_config = ConfigDict(frozen=True, allow_inf_nan=False) + + max_retries: int = Field( + default=3, + ge=0, + le=10, + description="Maximum number of retry attempts (0 disables retries)", + ) + base_delay: float = Field( + default=1.0, + gt=0.0, + description="Initial delay in seconds before the first retry", + ) + max_delay: float = Field( + default=60.0, + gt=0.0, + description="Upper bound on computed delay in seconds", + ) + exponential_base: float = Field( + default=2.0, + gt=1.0, + description="Multiplier for exponential backoff", + ) + jitter: bool = Field( + default=True, + description="Whether to add random jitter to delay", + ) + + @model_validator(mode="after") + def _validate_delay_ordering(self) -> Self: + """Ensure base_delay does not exceed max_delay.""" + if self.base_delay > self.max_delay: + msg = ( + f"base_delay ({self.base_delay}) must be" + f" <= max_delay ({self.max_delay})" + ) + logger.warning( + CONFIG_VALIDATION_FAILED, + model="RetryConfig", + field="base_delay/max_delay", + base_delay=self.base_delay, + max_delay=self.max_delay, + reason=msg, + ) + raise ValueError(msg) + return self + + +class RateLimiterConfig(BaseModel): + """Configuration for client-side rate limiting. + + Attributes: + max_requests_per_minute: Maximum requests per minute + (0 means unlimited). + max_concurrent: Maximum concurrent in-flight requests + (0 means unlimited). + """ + + model_config = ConfigDict(frozen=True, allow_inf_nan=False) + + max_requests_per_minute: int = Field( + default=0, + ge=0, + description="Maximum requests per minute (0 = unlimited)", + ) + max_concurrent: int = Field( + default=0, + ge=0, + description="Maximum concurrent in-flight requests (0 = unlimited)", + ) diff --git a/src/ai_company/engine/__init__.py b/src/ai_company/engine/__init__.py index 138137888c..d9d43940c8 100644 --- a/src/ai_company/engine/__init__.py +++ b/src/ai_company/engine/__init__.py @@ -55,8 +55,6 @@ TaskStructureClassifier, ) from ai_company.engine.errors import ( - BudgetExhaustedError, - DailyLimitExceededError, DecompositionCycleError, DecompositionDepthError, DecompositionError, @@ -67,7 +65,6 @@ NoEligibleAgentError, ParallelExecutionError, PromptBuildError, - QuotaExhaustedError, ResourceConflictError, TaskAssignmentError, TaskRoutingError, @@ -171,12 +168,10 @@ "AuctionAssignmentStrategy", "AutoTopologyConfig", "BudgetChecker", - "BudgetExhaustedError", "ClassificationResult", "CleanupCallback", "CooperativeTimeoutStrategy", "CostOptimizedAssignmentStrategy", - "DailyLimitExceededError", "DecompositionContext", "DecompositionCycleError", "DecompositionDepthError", @@ -221,7 +216,6 @@ "ProgressCallback", "PromptBuildError", "PromptTokenEstimator", - "QuotaExhaustedError", "ReactLoop", "RecoveryResult", "RecoveryStrategy", diff --git a/src/ai_company/engine/agent_engine.py b/src/ai_company/engine/agent_engine.py index 132b616fb4..4fe1a84382 100644 --- a/src/ai_company/engine/agent_engine.py +++ b/src/ai_company/engine/agent_engine.py @@ -9,6 +9,7 @@ import time from typing import TYPE_CHECKING +from ai_company.budget.errors import BudgetExhaustedError from ai_company.core.enums import TaskStatus from ai_company.engine._validation import ( validate_agent, @@ -18,10 +19,7 @@ from ai_company.engine.classification.pipeline import classify_execution_errors from ai_company.engine.context import DEFAULT_MAX_TURNS, AgentContext from ai_company.engine.cost_recording import record_execution_costs -from ai_company.engine.errors import ( - BudgetExhaustedError, - ExecutionStateError, -) +from ai_company.engine.errors import ExecutionStateError from ai_company.engine.loop_protocol import ( ExecutionResult, TerminationReason, diff --git a/src/ai_company/engine/errors.py b/src/ai_company/engine/errors.py index 42a1c4a636..7f5fdb7bce 100644 --- a/src/ai_company/engine/errors.py +++ b/src/ai_company/engine/errors.py @@ -21,31 +21,6 @@ class MaxTurnsExceededError(EngineError): """ -class BudgetExhaustedError(EngineError): - """Budget exhaustion signal for the engine layer. - - Used in two contexts: - - 1. Raised directly by :meth:`BudgetEnforcer.check_can_execute` - when pre-flight budget checks fail (monthly hard stop or daily - limit exceeded). - 2. Available for converting ``TerminationReason.BUDGET_EXHAUSTED`` - loop results into a raised error at the engine layer. - """ - - -class DailyLimitExceededError(BudgetExhaustedError): - """Per-agent daily spending limit exceeded.""" - - -class QuotaExhaustedError(BudgetExhaustedError): - """Raised when provider quota is exhausted. - - Currently raised for all degradation strategies. Degradation routing - (FALLBACK/QUEUE) is planned for a future milestone. - """ - - class LoopExecutionError(EngineError): """Non-recoverable execution loop error for the engine layer. diff --git a/src/ai_company/providers/__init__.py b/src/ai_company/providers/__init__.py index 84c5952584..a198d4797d 100644 --- a/src/ai_company/providers/__init__.py +++ b/src/ai_company/providers/__init__.py @@ -38,8 +38,6 @@ from .registry import ProviderRegistry from .resilience import ( RateLimiter, - RateLimiterConfig, - RetryConfig, RetryExhaustedError, RetryHandler, ) @@ -107,9 +105,7 @@ "ProviderTimeoutError", "RateLimitError", "RateLimiter", - "RateLimiterConfig", "ResolvedModel", - "RetryConfig", "RetryExhaustedError", "RetryHandler", "RoleBasedStrategy", diff --git a/src/ai_company/providers/resilience/__init__.py b/src/ai_company/providers/resilience/__init__.py index d8e6a5f1f1..442e3757de 100644 --- a/src/ai_company/providers/resilience/__init__.py +++ b/src/ai_company/providers/resilience/__init__.py @@ -1,18 +1,15 @@ """Provider resilience infrastructure. -Exports retry handling, rate limiting, configuration models, -and the ``RetryExhaustedError`` for fallback-chain signaling. +Exports retry handling, rate limiting, and the +``RetryExhaustedError`` for fallback-chain signaling. """ -from .config import RateLimiterConfig, RetryConfig from .errors import RetryExhaustedError from .rate_limiter import RateLimiter from .retry import RetryHandler __all__ = [ "RateLimiter", - "RateLimiterConfig", - "RetryConfig", "RetryExhaustedError", "RetryHandler", ] diff --git a/src/ai_company/providers/resilience/config.py b/src/ai_company/providers/resilience/config.py deleted file mode 100644 index bd9ac0ba2f..0000000000 --- a/src/ai_company/providers/resilience/config.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Re-export resilience configuration models. - -Canonical definitions live in :mod:`ai_company.config.schema` to avoid -circular imports (config → providers → config). This module re-exports -them so consumers can use ``from ai_company.providers.resilience.config -import RetryConfig``. -""" - -from ai_company.config.schema import RateLimiterConfig, RetryConfig - -__all__ = ["RateLimiterConfig", "RetryConfig"] diff --git a/src/ai_company/providers/resilience/rate_limiter.py b/src/ai_company/providers/resilience/rate_limiter.py index 31428fa70e..0f3035166a 100644 --- a/src/ai_company/providers/resilience/rate_limiter.py +++ b/src/ai_company/providers/resilience/rate_limiter.py @@ -4,14 +4,13 @@ import math import time +from ai_company.core.resilience_config import RateLimiterConfig # noqa: TC001 from ai_company.observability import get_logger from ai_company.observability.events.provider import ( PROVIDER_RATE_LIMITER_PAUSED, PROVIDER_RATE_LIMITER_THROTTLED, ) -from .config import RateLimiterConfig # noqa: TC001 - logger = get_logger(__name__) diff --git a/src/ai_company/providers/resilience/retry.py b/src/ai_company/providers/resilience/retry.py index b461b4649a..877e2ff274 100644 --- a/src/ai_company/providers/resilience/retry.py +++ b/src/ai_company/providers/resilience/retry.py @@ -18,7 +18,7 @@ if TYPE_CHECKING: from collections.abc import Callable, Coroutine - from .config import RetryConfig + from ai_company.core.resilience_config import RetryConfig logger = get_logger(__name__) diff --git a/src/ai_company/providers/routing/_strategy_helpers.py b/src/ai_company/providers/routing/_strategy_helpers.py index 0d68be4e7a..ee82cbfead 100644 --- a/src/ai_company/providers/routing/_strategy_helpers.py +++ b/src/ai_company/providers/routing/_strategy_helpers.py @@ -4,7 +4,8 @@ selection logic. """ -from ai_company.config.schema import RoutingConfig, RoutingRuleConfig # noqa: TC001 +from typing import TYPE_CHECKING + from ai_company.core.role_catalog import get_seniority_info from ai_company.observability import get_logger from ai_company.observability.events.routing import ( @@ -18,6 +19,9 @@ from .models import ResolvedModel, RoutingDecision, RoutingRequest from .resolver import ModelResolver # noqa: TC001 +if TYPE_CHECKING: + from ai_company.config.schema import RoutingConfig, RoutingRuleConfig + logger = get_logger(__name__) diff --git a/src/ai_company/providers/routing/resolver.py b/src/ai_company/providers/routing/resolver.py index 76b6e264e9..0ba2250f8b 100644 --- a/src/ai_company/providers/routing/resolver.py +++ b/src/ai_company/providers/routing/resolver.py @@ -7,8 +7,8 @@ """ from types import MappingProxyType +from typing import TYPE_CHECKING -from ai_company.config.schema import ProviderConfig # noqa: TC001 from ai_company.observability import get_logger from ai_company.observability.events.routing import ( ROUTING_MODEL_RESOLUTION_FAILED, @@ -19,6 +19,9 @@ from .errors import ModelResolutionError from .models import ResolvedModel +if TYPE_CHECKING: + from ai_company.config.schema import ProviderConfig + logger = get_logger(__name__) diff --git a/src/ai_company/providers/routing/router.py b/src/ai_company/providers/routing/router.py index 357c8ecbc8..bbf0b582ab 100644 --- a/src/ai_company/providers/routing/router.py +++ b/src/ai_company/providers/routing/router.py @@ -4,7 +4,8 @@ Delegates to strategy implementations. """ -from ai_company.config.schema import ProviderConfig, RoutingConfig # noqa: TC001 +from typing import TYPE_CHECKING + from ai_company.observability import get_logger from ai_company.observability.events.routing import ( ROUTING_DECISION_MADE, @@ -18,6 +19,9 @@ from .resolver import ModelResolver from .strategies import STRATEGY_MAP +if TYPE_CHECKING: + from ai_company.config.schema import ProviderConfig, RoutingConfig + logger = get_logger(__name__) diff --git a/src/ai_company/providers/routing/strategies.py b/src/ai_company/providers/routing/strategies.py index 8cd5255291..0077fc1eaf 100644 --- a/src/ai_company/providers/routing/strategies.py +++ b/src/ai_company/providers/routing/strategies.py @@ -7,9 +7,8 @@ from collections.abc import Mapping # noqa: TC003 from types import MappingProxyType -from typing import Final, NoReturn, Protocol, runtime_checkable +from typing import TYPE_CHECKING, Final, NoReturn, Protocol, runtime_checkable -from ai_company.config.schema import RoutingConfig # noqa: TC001 from ai_company.core.enums import SeniorityLevel # noqa: TC001 from ai_company.core.role_catalog import get_seniority_info from ai_company.observability import get_logger @@ -35,6 +34,9 @@ from .models import RoutingDecision, RoutingRequest from .resolver import ModelResolver # noqa: TC001 +if TYPE_CHECKING: + from ai_company.config.schema import RoutingConfig + logger = get_logger(__name__) # ── Strategy name constants ────────────────────────────────────── diff --git a/tests/integration/providers/conftest.py b/tests/integration/providers/conftest.py index 38a260fe5f..377904b4c1 100644 --- a/tests/integration/providers/conftest.py +++ b/tests/integration/providers/conftest.py @@ -13,11 +13,13 @@ from litellm.types.utils import ( # type: ignore[attr-defined] ChatCompletionToolCallChunk, Delta, + ModelResponseStream, StreamingChoices, Usage, ) -from ai_company.config.schema import ProviderConfig, ProviderModelConfig, RetryConfig +from ai_company.config.schema import ProviderConfig, ProviderModelConfig +from ai_company.core.resilience_config import RetryConfig from ai_company.providers.enums import MessageRole from ai_company.providers.models import ( ChatMessage, @@ -165,9 +167,9 @@ def build_content_chunk( *, model: str = "test-model-001", chunk_id: str = "chunk_0", -) -> ModelResponse: +) -> ModelResponseStream: """Build a streaming chunk with text content.""" - return ModelResponse( + return ModelResponseStream( id=chunk_id, choices=[ StreamingChoices( @@ -177,7 +179,6 @@ def build_content_chunk( ), ], model=model, - stream=True, ) @@ -187,9 +188,9 @@ def build_usage_chunk( completion_tokens: int = 50, model: str = "test-model-001", chunk_id: str = "chunk_usage", -) -> ModelResponse: +) -> ModelResponseStream: """Build a streaming chunk with usage data and no choices.""" - return ModelResponse( + return ModelResponseStream( id=chunk_id, choices=[], usage=Usage( @@ -198,7 +199,6 @@ def build_usage_chunk( total_tokens=prompt_tokens + completion_tokens, ), model=model, - stream=True, ) @@ -210,7 +210,7 @@ def build_tool_call_delta_chunk( # noqa: PLR0913 arguments: str | None = None, model: str = "test-model-001", chunk_id: str = "chunk_tc", -) -> ModelResponse: +) -> ModelResponseStream: """Build a streaming chunk with a tool call delta.""" tc_delta = ChatCompletionToolCallChunk( index=index, @@ -220,7 +220,7 @@ def build_tool_call_delta_chunk( # noqa: PLR0913 ), type="function", ) - return ModelResponse( + return ModelResponseStream( id=chunk_id, choices=[ StreamingChoices( @@ -230,7 +230,6 @@ def build_tool_call_delta_chunk( # noqa: PLR0913 ), ], model=model, - stream=True, ) @@ -239,9 +238,9 @@ def build_finish_chunk( *, model: str = "test-model-001", chunk_id: str = "chunk_fin", -) -> ModelResponse: +) -> ModelResponseStream: """Build a streaming chunk with only a finish reason.""" - return ModelResponse( + return ModelResponseStream( id=chunk_id, choices=[ StreamingChoices( @@ -251,14 +250,13 @@ def build_finish_chunk( ), ], model=model, - stream=True, ) async def async_iter_chunks( - chunks: list[ModelResponse], -) -> AsyncIterator[ModelResponse]: - """Wrap a list of ``ModelResponse`` chunks into an ``AsyncIterator``.""" + chunks: list[ModelResponseStream], +) -> AsyncIterator[ModelResponseStream]: + """Wrap a list of ``ModelResponseStream`` chunks into an ``AsyncIterator``.""" for chunk in chunks: yield chunk diff --git a/tests/integration/providers/test_retry_integration.py b/tests/integration/providers/test_retry_integration.py index 642a5d3f24..2c675f8890 100644 --- a/tests/integration/providers/test_retry_integration.py +++ b/tests/integration/providers/test_retry_integration.py @@ -9,12 +9,8 @@ import pytest -from ai_company.config.schema import ( - ProviderConfig, - ProviderModelConfig, - RateLimiterConfig, - RetryConfig, -) +from ai_company.config.schema import ProviderConfig, ProviderModelConfig +from ai_company.core.resilience_config import RateLimiterConfig, RetryConfig from ai_company.providers.drivers.litellm_driver import LiteLLMDriver from ai_company.providers.enums import MessageRole from ai_company.providers.errors import ( diff --git a/tests/unit/budget/test_enforcer.py b/tests/unit/budget/test_enforcer.py index 76c6283e43..ac29539a60 100644 --- a/tests/unit/budget/test_enforcer.py +++ b/tests/unit/budget/test_enforcer.py @@ -17,12 +17,12 @@ BudgetConfig, ) from ai_company.budget.enforcer import BudgetEnforcer +from ai_company.budget.errors import BudgetExhaustedError, DailyLimitExceededError from ai_company.budget.tracker import CostTracker from ai_company.core.agent import AgentIdentity, ModelConfig from ai_company.core.enums import TaskStatus, TaskType from ai_company.core.task import Task from ai_company.engine.context import AgentContext -from ai_company.engine.errors import BudgetExhaustedError, DailyLimitExceededError from ai_company.observability.events.budget import BUDGET_ALERT_THRESHOLD_CROSSED from ai_company.providers.models import TokenUsage from ai_company.providers.routing.models import ResolvedModel diff --git a/tests/unit/budget/test_enforcer_quota.py b/tests/unit/budget/test_enforcer_quota.py index 1df53c447a..f5eed304e1 100644 --- a/tests/unit/budget/test_enforcer_quota.py +++ b/tests/unit/budget/test_enforcer_quota.py @@ -11,6 +11,7 @@ from ai_company.budget.config import BudgetAlertConfig, BudgetConfig from ai_company.budget.enforcer import BudgetEnforcer +from ai_company.budget.errors import QuotaExhaustedError from ai_company.budget.quota import ( QuotaCheckResult, QuotaLimit, @@ -19,7 +20,6 @@ ) from ai_company.budget.quota_tracker import QuotaTracker from ai_company.budget.tracker import CostTracker -from ai_company.engine.errors import QuotaExhaustedError pytestmark = pytest.mark.timeout(30) diff --git a/tests/unit/budget/test_errors.py b/tests/unit/budget/test_errors.py new file mode 100644 index 0000000000..499eb70af4 --- /dev/null +++ b/tests/unit/budget/test_errors.py @@ -0,0 +1,49 @@ +"""Tests for budget error hierarchy.""" + +import pytest + +from ai_company.budget.errors import ( + BudgetExhaustedError, + DailyLimitExceededError, + QuotaExhaustedError, +) + +pytestmark = pytest.mark.timeout(30) + + +@pytest.mark.unit +class TestBudgetErrorHierarchy: + """Verify inheritance relationships in the budget error hierarchy.""" + + def test_budget_exhausted_is_exception(self) -> None: + assert issubclass(BudgetExhaustedError, Exception) + + def test_daily_limit_is_budget_exhausted(self) -> None: + assert issubclass(DailyLimitExceededError, BudgetExhaustedError) + err = DailyLimitExceededError("daily limit hit") + assert isinstance(err, BudgetExhaustedError) + + def test_quota_exhausted_is_budget_exhausted(self) -> None: + assert issubclass(QuotaExhaustedError, BudgetExhaustedError) + err = QuotaExhaustedError("quota hit") + assert isinstance(err, BudgetExhaustedError) + + def test_budget_exhausted_not_engine_error(self) -> None: + """Budget errors are independent of the engine error hierarchy.""" + from ai_company.engine.errors import EngineError + + assert not issubclass(BudgetExhaustedError, EngineError) + assert not issubclass(DailyLimitExceededError, EngineError) + assert not issubclass(QuotaExhaustedError, EngineError) + + def test_message_preserved(self) -> None: + msg = "agent-1 budget exhausted" + err = BudgetExhaustedError(msg) + assert str(err) == msg + + def test_except_budget_exhausted_catches_subclasses(self) -> None: + """Ensure except BudgetExhaustedError catches all subtypes.""" + for exc_cls in (DailyLimitExceededError, QuotaExhaustedError): + msg = "subclass caught" + with pytest.raises(BudgetExhaustedError): + raise exc_cls(msg) diff --git a/tests/unit/config/conftest.py b/tests/unit/config/conftest.py index 597d612cbf..f05e50b05d 100644 --- a/tests/unit/config/conftest.py +++ b/tests/unit/config/conftest.py @@ -14,14 +14,13 @@ AgentConfig, ProviderConfig, ProviderModelConfig, - RateLimiterConfig, - RetryConfig, RootConfig, RoutingConfig, RoutingRuleConfig, TaskAssignmentConfig, ) from ai_company.core.company import CompanyConfig +from ai_company.core.resilience_config import RateLimiterConfig, RetryConfig from ai_company.hr.promotion.config import PromotionConfig from ai_company.memory.config import CompanyMemoryConfig from ai_company.memory.org.config import OrgMemoryConfig diff --git a/tests/unit/engine/test_agent_engine_budget.py b/tests/unit/engine/test_agent_engine_budget.py index 4c095a480c..324e2b78bb 100644 --- a/tests/unit/engine/test_agent_engine_budget.py +++ b/tests/unit/engine/test_agent_engine_budget.py @@ -10,9 +10,13 @@ BudgetConfig, ) from ai_company.budget.enforcer import BudgetEnforcer +from ai_company.budget.errors import ( + BudgetExhaustedError, + DailyLimitExceededError, + QuotaExhaustedError, +) from ai_company.budget.tracker import CostTracker from ai_company.engine.agent_engine import AgentEngine -from ai_company.engine.errors import BudgetExhaustedError, DailyLimitExceededError from ai_company.engine.loop_protocol import TerminationReason if TYPE_CHECKING: @@ -51,8 +55,9 @@ class TestEngineWithEnforcer: [ (BudgetExhaustedError, "Monthly budget exhausted"), (DailyLimitExceededError, "Daily limit exceeded"), + (QuotaExhaustedError, "Provider quota exhausted"), ], - ids=["monthly_exhausted", "daily_limit"], + ids=["monthly_exhausted", "daily_limit", "quota_exhausted"], ) async def test_preflight_budget_stop_returns_budget_exhausted( self, diff --git a/tests/unit/engine/test_errors.py b/tests/unit/engine/test_errors.py index d98346b4ce..38ba7ea75a 100644 --- a/tests/unit/engine/test_errors.py +++ b/tests/unit/engine/test_errors.py @@ -2,8 +2,8 @@ import pytest +from ai_company.budget.errors import BudgetExhaustedError from ai_company.engine.errors import ( - BudgetExhaustedError, EngineError, ExecutionStateError, LoopExecutionError, @@ -34,10 +34,10 @@ def test_prompt_build_error_is_engine_error(self) -> None: err = PromptBuildError("test") assert isinstance(err, EngineError) - def test_budget_exhausted_error_is_engine_error(self) -> None: - assert issubclass(BudgetExhaustedError, EngineError) + def test_budget_exhausted_error_is_not_engine_error(self) -> None: + assert not issubclass(BudgetExhaustedError, EngineError) err = BudgetExhaustedError("out of budget") - assert isinstance(err, EngineError) + assert isinstance(err, Exception) assert str(err) == "out of budget" def test_loop_execution_error_is_engine_error(self) -> None: diff --git a/tests/unit/providers/drivers/conftest.py b/tests/unit/providers/drivers/conftest.py index ecd89d22a7..fba9d14d85 100644 --- a/tests/unit/providers/drivers/conftest.py +++ b/tests/unit/providers/drivers/conftest.py @@ -5,12 +5,8 @@ import pytest -from ai_company.config.schema import ( - ProviderConfig, - ProviderModelConfig, - RateLimiterConfig, - RetryConfig, -) +from ai_company.config.schema import ProviderConfig, ProviderModelConfig +from ai_company.core.resilience_config import RateLimiterConfig, RetryConfig if TYPE_CHECKING: from collections.abc import AsyncIterator diff --git a/tests/unit/providers/resilience/conftest.py b/tests/unit/providers/resilience/conftest.py index 6adaf59c72..ec6e20f71e 100644 --- a/tests/unit/providers/resilience/conftest.py +++ b/tests/unit/providers/resilience/conftest.py @@ -2,6 +2,7 @@ import pytest +from ai_company.core.resilience_config import RateLimiterConfig, RetryConfig from ai_company.providers.errors import ( AuthenticationError, ProviderConnectionError, @@ -9,7 +10,6 @@ ProviderTimeoutError, RateLimitError, ) -from ai_company.providers.resilience.config import RateLimiterConfig, RetryConfig @pytest.fixture diff --git a/tests/unit/providers/resilience/test_config.py b/tests/unit/providers/resilience/test_config.py index 98b2381cf4..795f497680 100644 --- a/tests/unit/providers/resilience/test_config.py +++ b/tests/unit/providers/resilience/test_config.py @@ -3,7 +3,7 @@ import pytest from pydantic import ValidationError -from ai_company.providers.resilience.config import RateLimiterConfig, RetryConfig +from ai_company.core.resilience_config import RateLimiterConfig, RetryConfig pytestmark = pytest.mark.timeout(30) diff --git a/tests/unit/providers/resilience/test_rate_limiter.py b/tests/unit/providers/resilience/test_rate_limiter.py index ef52120585..e13ef23e38 100644 --- a/tests/unit/providers/resilience/test_rate_limiter.py +++ b/tests/unit/providers/resilience/test_rate_limiter.py @@ -7,11 +7,11 @@ import pytest import structlog +from ai_company.core.resilience_config import RateLimiterConfig from ai_company.observability.events.provider import ( PROVIDER_RATE_LIMITER_PAUSED, PROVIDER_RATE_LIMITER_THROTTLED, ) -from ai_company.providers.resilience.config import RateLimiterConfig from ai_company.providers.resilience.rate_limiter import RateLimiter pytestmark = pytest.mark.timeout(30) diff --git a/tests/unit/providers/resilience/test_retry.py b/tests/unit/providers/resilience/test_retry.py index 80c6181a21..98b0a05c24 100644 --- a/tests/unit/providers/resilience/test_retry.py +++ b/tests/unit/providers/resilience/test_retry.py @@ -5,6 +5,7 @@ import pytest import structlog +from ai_company.core.resilience_config import RetryConfig from ai_company.observability.events.provider import ( PROVIDER_CALL_ERROR, PROVIDER_RETRY_ATTEMPT, @@ -18,7 +19,6 @@ ProviderTimeoutError, RateLimitError, ) -from ai_company.providers.resilience.config import RetryConfig from ai_company.providers.resilience.errors import RetryExhaustedError from ai_company.providers.resilience.retry import RetryHandler diff --git a/uv.lock b/uv.lock index 2805dc3cd7..d12431845f 100644 --- a/uv.lock +++ b/uv.lock @@ -52,7 +52,7 @@ requires-dist = [ { name = "aiosqlite", specifier = "==0.22.1" }, { name = "jinja2", specifier = "==3.1.6" }, { name = "jsonschema", specifier = "==4.26.0" }, - { name = "litellm", specifier = "==1.82.0" }, + { name = "litellm", specifier = "==1.82.1" }, { name = "litestar", extras = ["brotli", "prometheus", "pydantic", "standard", "structlog"], specifier = "==2.21.1" }, { name = "mcp", specifier = "==1.26.0" }, { name = "pydantic", specifier = "==2.12.5" }, @@ -893,7 +893,7 @@ wheels = [ [[package]] name = "litellm" -version = "1.82.0" +version = "1.82.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -909,9 +909,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/00/49bb5c28e0dea0f5086229a2a08d5fdc6c8dc0d8e2acb2a2d1f7dd9f4b70/litellm-1.82.0.tar.gz", hash = "sha256:d388f52447daccbcaafa19a3e68d17b75f1374b5bf2cde680d65e1cd86e50d22", size = 16800355, upload-time = "2026-03-01T02:35:30.363Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/bd/6251e9a965ae2d7bc3342ae6c1a2d25dd265d354c502e63225451b135016/litellm-1.82.1.tar.gz", hash = "sha256:bc8427cdccc99e191e08e36fcd631c93b27328d1af789839eb3ac01a7d281890", size = 17197496, upload-time = "2026-03-10T09:10:04.438Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/89/eb28bfcf97d6b045c400e72eb047c381594467048c237dbb6c227764084c/litellm-1.82.0-py3-none-any.whl", hash = "sha256:5496b5d4532cccdc7a095c21cbac4042f7662021c57bc1d17be4e39838929e80", size = 14911978, upload-time = "2026-03-01T02:35:26.844Z" }, + { url = "https://files.pythonhosted.org/packages/57/77/0c6eca2cb049793ddf8ce9cdcd5123a35666c4962514788c4fc90edf1d3b/litellm-1.82.1-py3-none-any.whl", hash = "sha256:a9ec3fe42eccb1611883caaf8b1bf33c9f4e12163f94c7d1004095b14c379eb2", size = 15341896, upload-time = "2026-03-10T09:10:00.702Z" }, ] [[package]]