Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions llama_stack/providers/remote/inference/anthropic/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,17 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

import logging

from anthropic import AsyncAnthropic, NotFoundError

from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin

from .config import AnthropicConfig
from .models import MODEL_ENTRIES

logger = logging.getLogger(__name__)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wrong logger

from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="anthropic")



class AnthropicInferenceAdapter(LiteLLMOpenAIMixin):
def __init__(self, config: AnthropicConfig) -> None:
Expand All @@ -19,9 +25,35 @@ def __init__(self, config: AnthropicConfig) -> None:
provider_data_api_key_field="anthropic_api_key",
)
self.config = config
self._client: AsyncAnthropic | None = None

async def initialize(self) -> None:
await super().initialize()

async def shutdown(self) -> None:
# Clean up the client connection pool
if self._client:
await self._client.aclose()
self._client = None
await super().shutdown()

@property
def client(self) -> AsyncAnthropic:
if self._client is None:
api_key = self.config.api_key if self.config.api_key else "no-key"
self._client = AsyncAnthropic(api_key=api_key)
return self._client
Comment on lines +41 to +45
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will only work if the API key for anthropic is provided in the config. However, Llama Stack users can also provide their own API key in each request for this (any many other) providers. Our general pattern for providers that extend LiteLLM and any that support per-request credential passthrough via the x-llamastack-provider-data header is that we do not cache the clients ever, as that could lead to subsequent requests that do not send proper auth using previously sent auth from a different client.

I wonder more generally if the scope of this PR should be adjusted since #2835 landed? It provides a way to fetch clients and check model availability that should work for any of our LiteLLM based providers, I believe?

Copy link
Contributor Author

@r3v5 r3v5 Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @bbrowning for clarification. However, Anthropic is not fully OpenAI compatible in terms of retrieve specific model or list models api endpoints. OpenAI requires Bearer token while Anthropic has slightly different structure for API Key to use during calling these endpoints. That's why I used AsyncAnthropic.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@r3v5 what about the pattern in #2886 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, @mattf! I will look on that.

Copy link
Contributor Author

@r3v5 r3v5 Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattf your PR is nice! Great job 👏 Should I again cherry pickup from your PR and use your infrastructure? I didn’t realise litellm is so powerful 😁

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's a good idea


async def check_model_availability(self, model: str) -> bool:
try:
retrieved_model = await self.client.models.retrieve(model)
logger.info(f"Model {retrieved_model.id} is available on Anthropic")
return True

except NotFoundError:
logger.info(f"Model {model} was not found on Anthropic")

except Exception as e:
logger.error(f"Failed to check model availability for {model} on Anthropic: {e}")

return False
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ classifiers = [
]
dependencies = [
"aiohttp",
"anthropic>=0.58.2",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed? It's a provider dep, not a server one.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should go in llama_stack/providers/registry/inference.py

"fastapi>=0.115.0,<1.0", # server
"fire", # for MCP in LLS client
"httpx",
Expand Down
12 changes: 11 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ aiosqlite==0.21.0
# via llama-stack
annotated-types==0.7.0
# via pydantic
anthropic==0.58.2
# via llama-stack
anyio==4.8.0
# via
# anthropic
# httpx
# llama-api-client
# llama-stack-client
Expand Down Expand Up @@ -50,6 +53,7 @@ deprecated==1.2.18
# opentelemetry-semantic-conventions
distro==1.9.0
# via
# anthropic
# llama-api-client
# llama-stack-client
# openai
Expand Down Expand Up @@ -82,6 +86,7 @@ httpcore==1.0.9
# via httpx
httpx==0.28.1
# via
# anthropic
# llama-api-client
# llama-stack
# llama-stack-client
Expand All @@ -99,7 +104,9 @@ importlib-metadata==8.5.0
jinja2==3.1.6
# via llama-stack
jiter==0.8.2
# via openai
# via
# anthropic
# openai
jsonschema==4.23.0
# via llama-stack
jsonschema-specifications==2024.10.1
Expand Down Expand Up @@ -169,6 +176,7 @@ pycparser==2.22 ; platform_python_implementation != 'PyPy'
# via cffi
pydantic==2.10.6
# via
# anthropic
# fastapi
# llama-api-client
# llama-stack
Expand Down Expand Up @@ -220,6 +228,7 @@ six==1.17.0
# python-dateutil
sniffio==1.3.1
# via
# anthropic
# anyio
# llama-api-client
# llama-stack-client
Expand All @@ -243,6 +252,7 @@ tqdm==4.67.1
typing-extensions==4.12.2
# via
# aiosqlite
# anthropic
# anyio
# fastapi
# huggingface-hub
Expand Down
Loading
Loading