Skip to content
Merged
80 changes: 80 additions & 0 deletions .agents/configs/rate-limits.json.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"_comment": "Rate limit configuration per provider (t1330). Copy to ~/.config/aidevops/rate-limits.json and adjust for your plan.",
"_note": "Values are per-minute limits. Set to 0 to disable tracking for that metric.",
"_threshold": "warn_pct controls when a provider is flagged as throttle-risk (default 80%).",
"warn_pct": 80,
"window_minutes": 1,
"providers": {
"anthropic": {
"_comment": "Anthropic Claude API - limits vary by plan tier. These are conservative defaults for Pro/Team plans.",
"_docs": "https://docs.anthropic.com/en/api/rate-limits",
"requests_per_min": 50,
"tokens_per_min": 40000,
"input_tokens_per_min": 0,
"output_tokens_per_min": 0,
"billing_type": "subscription"
},
"openai": {
"_comment": "OpenAI API - limits vary by usage tier. Tier 1 defaults shown.",
"_docs": "https://platform.openai.com/docs/guides/rate-limits",
"requests_per_min": 500,
"tokens_per_min": 200000,
"input_tokens_per_min": 0,
"output_tokens_per_min": 0,
"billing_type": "token"
},
"google": {
"_comment": "Google Gemini API - limits vary by model and plan.",
"_docs": "https://ai.google.dev/gemini-api/docs/rate-limits",
"requests_per_min": 60,
"tokens_per_min": 1000000,
"input_tokens_per_min": 0,
"output_tokens_per_min": 0,
"billing_type": "token"
},
"deepseek": {
"_comment": "DeepSeek API - limits vary by plan.",
"_docs": "https://platform.deepseek.com/docs",
"requests_per_min": 60,
"tokens_per_min": 100000,
"input_tokens_per_min": 0,
"output_tokens_per_min": 0,
"billing_type": "token"
},
"openrouter": {
"_comment": "OpenRouter - limits depend on credits and plan.",
"_docs": "https://openrouter.ai/docs/limits",
"requests_per_min": 200,
"tokens_per_min": 500000,
"input_tokens_per_min": 0,
"output_tokens_per_min": 0,
"billing_type": "token"
},
"groq": {
"_comment": "Groq API - free tier limits shown. Paid plans are higher.",
"_docs": "https://console.groq.com/docs/rate-limits",
"requests_per_min": 30,
"tokens_per_min": 6000,
"input_tokens_per_min": 0,
"output_tokens_per_min": 0,
"billing_type": "token"
},
"opencode": {
"_comment": "OpenCode Zen proxy - inherits limits from underlying provider.",
"requests_per_min": 50,
"tokens_per_min": 40000,
"input_tokens_per_min": 0,
"output_tokens_per_min": 0,
"billing_type": "subscription"
},
"xai": {
"_comment": "xAI Grok API - limits vary by plan.",
"_docs": "https://docs.x.ai/docs/rate-limits",
"requests_per_min": 60,
"tokens_per_min": 100000,
"input_tokens_per_min": 0,
"output_tokens_per_min": 0,
"billing_type": "token"
}
}
}
133 changes: 120 additions & 13 deletions .agents/scripts/model-availability-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -849,8 +849,85 @@ _record_model_availability() {
# Tier Resolution with Fallback
# =============================================================================

# =============================================================================
# Rate Limit Awareness (t1330)
# =============================================================================

# Check if a provider is at throttle risk using observability data.
# Delegates to observability-helper.sh check_rate_limit_risk() if available.
# Returns: 0=ok, 1=throttle-risk (warn), 2=critical
# Outputs: "ok", "warn", or "critical" on stdout
_check_provider_rate_limit_risk() {
local provider="$1"
local obs_helper="${SCRIPT_DIR}/observability-helper.sh"

if [[ ! -x "$obs_helper" ]]; then
echo "ok"
return 0
fi

# Determine timeout command (gtimeout on macOS, timeout on Linux)
local timeout_cmd=""
if command -v gtimeout &>/dev/null; then
timeout_cmd="gtimeout 5"
elif command -v timeout &>/dev/null; then
timeout_cmd="timeout 5"
fi

# Query rate-limit status as a subprocess to avoid variable conflicts.
# Timeout prevents blocking dispatch if observability DB is slow.
local risk_status
if [[ -n "$timeout_cmd" ]]; then
risk_status=$($timeout_cmd bash "$obs_helper" rate-limits --provider "$provider" --json |
jq -r '.[0].status // "ok"' || true)
else
risk_status=$(bash "$obs_helper" rate-limits --provider "$provider" --json |
jq -r '.[0].status // "ok"' || true)
fi
risk_status="${risk_status:-ok}"

case "$risk_status" in
critical)
echo "critical"
return 2
;;
warn)
echo "warn"
return 1
;;
*)
echo "ok"
return 0
;;
esac
}

# Extract provider from a model spec (provider/model or model)
_extract_provider() {
local model_spec="$1"
if [[ "$model_spec" == *"/"* ]]; then
echo "${model_spec%%/*}"
else
case "$model_spec" in
claude*) echo "anthropic" ;;
gpt* | o3* | o4*) echo "openai" ;;
gemini*) echo "google" ;;
deepseek*) echo "deepseek" ;;
llama*) echo "groq" ;;
*) echo "" ;;
esac
fi
return 0
}

# =============================================================================
# Tier Resolution with Fallback
# =============================================================================

# Resolve the best available model for a given tier.
# Checks primary model first, falls back to secondary if primary is unavailable.
# Rate limit awareness (t1330): if primary provider is at throttle risk (>=warn_pct),
# prefer the fallback provider even if primary is technically available.
# If both fail, delegates to fallback-chain-helper.sh for extended chain resolution
# including gateway providers (OpenRouter, Cloudflare AI Gateway).
# Output: provider/model_id on stdout
Expand All @@ -871,6 +948,27 @@ resolve_tier() {
primary="${tier_spec%%|*}"
fallback="${tier_spec#*|}"

# Rate limit check (t1330): if primary provider is at throttle risk,
# try fallback first to avoid hitting rate limits
local primary_provider
primary_provider=$(_extract_provider "$primary")
if [[ -n "$primary_provider" ]]; then
local rl_risk
rl_risk=$(_check_provider_rate_limit_risk "$primary_provider") || true
rl_risk="${rl_risk:-ok}"
if [[ "$rl_risk" == "warn" || "$rl_risk" == "critical" ]]; then
[[ "$quiet" != "true" ]] && print_warning "$primary_provider: rate limit ${rl_risk} — preferring fallback for $tier"
# Try fallback first when primary is throttle-risk
if [[ -n "$fallback" && "$fallback" != "$primary" ]] && check_model_available "$fallback" "$force" "true"; then
echo "$fallback"
[[ "$quiet" != "true" ]] && print_success "Resolved $tier -> $fallback (rate-limit routing: $primary_provider at ${rl_risk})"
return 0
fi
# Fallback also unavailable — still try primary (better than nothing)
[[ "$quiet" != "true" ]] && print_warning "Fallback also unavailable, trying primary despite rate limit risk"
fi
fi

# Try primary
if check_model_available "$primary" "$force" "true"; then
echo "$primary"
Expand Down Expand Up @@ -1239,30 +1337,39 @@ cmd_rate_limits() {
fi

echo ""
echo "Rate Limit Status"
echo "================="
echo "Rate Limit Status (from API response headers)"
echo "============================================="
echo ""

local count
count=$(db_query "SELECT COUNT(*) FROM rate_limits;")

if [[ "$count" -eq 0 ]]; then
print_info "No rate limit data cached. Probe providers to collect rate limit headers."
return 0
else
printf " %-12s %-12s %-12s %-20s %-12s %-12s %-20s %-20s\n" \
"Provider" "Req Limit" "Req Left" "Req Reset" "Tok Limit" "Tok Left" "Tok Reset" "Checked"
printf " %-12s %-12s %-12s %-20s %-12s %-12s %-20s %-20s\n" \
"--------" "---------" "--------" "---------" "---------" "--------" "---------" "-------"

db_query "SELECT * FROM rate_limits ORDER BY provider;" |
while IFS='|' read -r prov rl rr rres tl tr tres checked _ttl; do
printf " %-12s %-12s %-12s %-20s %-12s %-12s %-20s %-20s\n" \
"$prov" "$rl" "$rr" "${rres:-n/a}" "$tl" "$tr" "${tres:-n/a}" "$checked"
done
fi

printf " %-12s %-12s %-12s %-20s %-12s %-12s %-20s %-20s\n" \
"Provider" "Req Limit" "Req Left" "Req Reset" "Tok Limit" "Tok Left" "Tok Reset" "Checked"
printf " %-12s %-12s %-12s %-20s %-12s %-12s %-20s %-20s\n" \
"--------" "---------" "--------" "---------" "---------" "--------" "---------" "-------"
echo ""

db_query "SELECT * FROM rate_limits ORDER BY provider;" |
while IFS='|' read -r prov rl rr rres tl tr tres checked _ttl; do
printf " %-12s %-12s %-12s %-20s %-12s %-12s %-20s %-20s\n" \
"$prov" "$rl" "$rr" "${rres:-n/a}" "$tl" "$tr" "${tres:-n/a}" "$checked"
done
# Also show observability-derived utilisation (t1330)
local obs_helper="${SCRIPT_DIR}/observability-helper.sh"
if [[ -x "$obs_helper" ]]; then
echo "Rate Limit Utilisation (from observability DB, t1330)"
echo "====================================================="
echo ""
bash "$obs_helper" rate-limits || true
fi

echo ""
return 0
}

Expand Down
Loading