-
Notifications
You must be signed in to change notification settings - Fork 1
feat: implement model routing engine #99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
78f4bff
7618335
99fa913
b17601b
50c17a3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,49 +22,49 @@ | |
| SeniorityInfo( | ||
| level=SeniorityLevel.JUNIOR, | ||
| authority_scope="Execute assigned tasks only", | ||
| typical_model_tier="haiku", | ||
| typical_model_tier="small", | ||
| cost_tier=CostTier.LOW, | ||
| ), | ||
|
Comment on lines
21
to
27
|
||
| SeniorityInfo( | ||
| level=SeniorityLevel.MID, | ||
| authority_scope="Execute and suggest improvements", | ||
| typical_model_tier="sonnet", | ||
| typical_model_tier="medium", | ||
| cost_tier=CostTier.MEDIUM, | ||
| ), | ||
| SeniorityInfo( | ||
| level=SeniorityLevel.SENIOR, | ||
| authority_scope="Execute, design, and review others", | ||
| typical_model_tier="sonnet", | ||
| typical_model_tier="medium", | ||
| cost_tier=CostTier.HIGH, | ||
| ), | ||
| SeniorityInfo( | ||
| level=SeniorityLevel.LEAD, | ||
| authority_scope="All above plus approve and delegate", | ||
| typical_model_tier="opus", | ||
| typical_model_tier="large", | ||
| cost_tier=CostTier.HIGH, | ||
| ), | ||
| SeniorityInfo( | ||
| level=SeniorityLevel.PRINCIPAL, | ||
| authority_scope="All above plus architectural decisions", | ||
| typical_model_tier="opus", | ||
| typical_model_tier="large", | ||
| cost_tier=CostTier.PREMIUM, | ||
| ), | ||
| SeniorityInfo( | ||
| level=SeniorityLevel.DIRECTOR, | ||
| authority_scope="Strategic decisions and budget authority", | ||
| typical_model_tier="opus", | ||
| typical_model_tier="large", | ||
| cost_tier=CostTier.PREMIUM, | ||
| ), | ||
| SeniorityInfo( | ||
| level=SeniorityLevel.VP, | ||
| authority_scope="Department-wide authority", | ||
| typical_model_tier="opus", | ||
| typical_model_tier="large", | ||
| cost_tier=CostTier.PREMIUM, | ||
| ), | ||
| SeniorityInfo( | ||
| level=SeniorityLevel.C_SUITE, | ||
| authority_scope="Company-wide authority and final approvals", | ||
| typical_model_tier="opus", | ||
| typical_model_tier="large", | ||
| cost_tier=CostTier.PREMIUM, | ||
| ), | ||
| ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| """Model routing engine — strategy-based LLM model selection. | ||
|
|
||
| Exports the router, resolver, domain models, errors, strategies, | ||
| and the ``RoutingStrategy`` protocol. | ||
| """ | ||
|
|
||
| from .errors import ( | ||
| ModelResolutionError, | ||
| NoAvailableModelError, | ||
| RoutingError, | ||
| UnknownStrategyError, | ||
| ) | ||
| from .models import ResolvedModel, RoutingDecision, RoutingRequest | ||
| from .resolver import ModelResolver | ||
| from .router import ModelRouter | ||
| from .strategies import ( | ||
| CostAwareStrategy, | ||
| ManualStrategy, | ||
| RoleBasedStrategy, | ||
| RoutingStrategy, | ||
| SmartStrategy, | ||
| ) | ||
|
|
||
| __all__ = [ | ||
| "CostAwareStrategy", | ||
| "ManualStrategy", | ||
| "ModelResolutionError", | ||
| "ModelResolver", | ||
| "ModelRouter", | ||
| "NoAvailableModelError", | ||
| "ResolvedModel", | ||
| "RoleBasedStrategy", | ||
| "RoutingDecision", | ||
| "RoutingError", | ||
| "RoutingRequest", | ||
| "RoutingStrategy", | ||
| "SmartStrategy", | ||
| "UnknownStrategyError", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| """Routing error hierarchy. | ||
|
|
||
| All routing errors extend ``ProviderError`` so the entire provider | ||
| layer shares a single exception tree. | ||
| """ | ||
|
|
||
| from ai_company.providers.errors import ProviderError | ||
|
|
||
|
|
||
| class RoutingError(ProviderError): | ||
| """Base exception for all model-routing errors.""" | ||
|
|
||
| is_retryable = False | ||
|
|
||
|
|
||
| class ModelResolutionError(RoutingError): | ||
| """Model alias or ID could not be found in any provider.""" | ||
|
|
||
| is_retryable = False | ||
|
|
||
|
|
||
| class NoAvailableModelError(RoutingError): | ||
| """All candidate models exhausted (primary + fallbacks).""" | ||
|
|
||
| is_retryable = False | ||
|
|
||
|
|
||
| class UnknownStrategyError(RoutingError): | ||
| """Configured strategy name is not recognized.""" | ||
|
|
||
| is_retryable = False |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| """Domain models for the routing engine.""" | ||
|
|
||
| from pydantic import BaseModel, ConfigDict, Field | ||
|
|
||
| from ai_company.core.enums import SeniorityLevel # noqa: TC001 | ||
| from ai_company.core.types import NotBlankStr # noqa: TC001 | ||
|
|
||
|
|
||
| class ResolvedModel(BaseModel): | ||
| """A fully resolved model reference. | ||
|
|
||
| Attributes: | ||
| provider_name: Provider that owns this model (e.g. ``"anthropic"``). | ||
| model_id: Concrete model identifier (e.g. ``"claude-sonnet-4-6"``). | ||
| alias: Short alias used in routing rules, if any. | ||
| cost_per_1k_input: Cost per 1 000 input tokens in USD. | ||
| cost_per_1k_output: Cost per 1 000 output tokens in USD. | ||
| max_context: Maximum context window size in tokens. | ||
| """ | ||
|
|
||
| model_config = ConfigDict(frozen=True) | ||
|
|
||
| provider_name: NotBlankStr = Field(description="Provider name") | ||
| model_id: NotBlankStr = Field(description="Model identifier") | ||
| alias: NotBlankStr | None = Field(default=None, description="Short alias") | ||
| cost_per_1k_input: float = Field( | ||
| default=0.0, | ||
| ge=0.0, | ||
| description="Cost per 1k input tokens in USD", | ||
| ) | ||
| cost_per_1k_output: float = Field( | ||
| default=0.0, | ||
| ge=0.0, | ||
| description="Cost per 1k output tokens in USD", | ||
| ) | ||
| max_context: int = Field( | ||
| default=200_000, | ||
| gt=0, | ||
| description="Maximum context window size in tokens", | ||
| ) | ||
|
|
||
|
|
||
| class RoutingRequest(BaseModel): | ||
| """Inputs to a routing decision. | ||
|
|
||
| Attributes: | ||
| agent_level: Seniority level of the requesting agent. | ||
| task_type: Task type label (e.g. ``"development"``). | ||
| model_override: Explicit model reference for manual routing. | ||
| remaining_budget: Remaining cost budget in USD. | ||
| """ | ||
|
|
||
| model_config = ConfigDict(frozen=True) | ||
|
|
||
| agent_level: SeniorityLevel | None = Field( | ||
| default=None, | ||
| description="Seniority level of the requesting agent", | ||
| ) | ||
| task_type: NotBlankStr | None = Field( | ||
| default=None, | ||
| description="Task type label", | ||
| ) | ||
| model_override: NotBlankStr | None = Field( | ||
| default=None, | ||
| description="Explicit model reference for manual routing", | ||
| ) | ||
| remaining_budget: float | None = Field( | ||
| default=None, | ||
| ge=0.0, | ||
| description="Remaining cost budget in USD", | ||
| ) | ||
|
Comment on lines
+83
to
+90
|
||
|
|
||
|
|
||
| class RoutingDecision(BaseModel): | ||
| """Output of a routing decision. | ||
|
|
||
| Attributes: | ||
| resolved_model: The chosen model. | ||
| strategy_used: Name of the strategy that produced this decision. | ||
| reason: Human-readable explanation. | ||
| fallbacks_tried: Model refs that were tried before the final choice. | ||
| """ | ||
|
|
||
| model_config = ConfigDict(frozen=True) | ||
|
|
||
| resolved_model: ResolvedModel = Field(description="The chosen model") | ||
| strategy_used: NotBlankStr = Field(description="Strategy name") | ||
| reason: NotBlankStr = Field(description="Human-readable explanation") | ||
| fallbacks_tried: tuple[str, ...] = Field( | ||
| default=(), | ||
| description="Model refs tried before the final choice", | ||
| ) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider restarting list numbering for the soft rules section.
The static analysis tool flagged that items 7-10 should restart from 1 after the new "Logging coverage suggestions" header, per MD029. However, if the cross-section numbering (1-6 for hard rules, 7-10 for soft rules) is intentional for easier reference in triage discussions, you may suppress this warning. Otherwise, consider restarting from 1.
🧰 Tools
🪛 markdownlint-cli2 (0.21.0)
[warning] 111-111: Ordered list item prefix
Expected: 1; Actual: 7; Style: 1/2/3
(MD029, ol-prefix)
[warning] 112-112: Ordered list item prefix
Expected: 2; Actual: 8; Style: 1/2/3
(MD029, ol-prefix)
[warning] 113-113: Ordered list item prefix
Expected: 3; Actual: 9; Style: 1/2/3
(MD029, ol-prefix)
[warning] 114-114: Ordered list item prefix
Expected: 4; Actual: 10; Style: 1/2/3
(MD029, ol-prefix)
🤖 Prompt for AI Agents