BerriAI · Mar 14, 2026 · Feb 19, 2026 · Feb 19, 2026 · Mar 14, 2026 · Mar 14, 2026
diff --git a/docs/my-website/docs/providers/aws_sagemaker.md b/docs/my-website/docs/providers/aws_sagemaker.md
@@ -526,3 +526,98 @@ print(f"response: {response}")
 ```
 
 
+
+## Nova Models on SageMaker
+
+LiteLLM supports Amazon Nova models (Nova Micro, Nova Lite, Nova 2 Lite) deployed on SageMaker Inference real-time endpoints. These custom/fine-tuned Nova models use an OpenAI-compatible API format.
+
+**Reference:** [AWS Blog - Amazon SageMaker Inference for Custom Amazon Nova Models](https://aws.amazon.com/blogs/aws/announcing-amazon-sagemaker-inference-for-custom-amazon-nova-models/)
+
+### Usage
+
+Use the `sagemaker_nova/` prefix with your SageMaker endpoint name:
+
+```python
+import litellm
+import os
+
+os.environ["AWS_ACCESS_KEY_ID"] = ""
+os.environ["AWS_SECRET_ACCESS_KEY"] = ""
+os.environ["AWS_REGION_NAME"] = "us-east-1"
+
+# Basic chat completion
+response = litellm.completion(
+    model="sagemaker_nova/my-nova-endpoint",
+    messages=[{"role": "user", "content": "Hello, how are you?"}],
+    temperature=0.7,
+    max_tokens=512,
+)
+print(response.choices[0].message.content)
+```
+
+### Streaming
+
+```python
+response = litellm.completion(
+    model="sagemaker_nova/my-nova-endpoint",
+    messages=[{"role": "user", "content": "Write a short poem"}],
+    stream=True,
+    stream_options={"include_usage": True},
+)
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### Multimodal (Images)
+
+Nova models on SageMaker support image inputs using base64 data URIs:
+
+```python
+response = litellm.completion(
+    model="sagemaker_nova/my-nova-endpoint",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this image?"},
+                {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}
+            ]
+        }
+    ],
+)
+```
+
+### Proxy Config
+
+```yaml
+model_list:
+  - model_name: nova-micro
+    litellm_params:
+      model: sagemaker_nova/my-nova-micro-endpoint
+      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
+      aws_region_name: us-east-1
+```
+
+### Supported Parameters
+
+All standard OpenAI parameters are supported, plus these Nova-specific parameters:
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `top_k` | integer | Limits token selection to top K most likely tokens |
+| `reasoning_effort` | `"low"` \| `"high"` | Reasoning effort level (Nova 2 Lite custom models only) |
+| `allowed_token_ids` | array[int] | Restrict output to specified token IDs |
+| `truncate_prompt_tokens` | integer | Truncate prompt to N tokens if it exceeds limit |
+
+```python
+response = litellm.completion(
+    model="sagemaker_nova/my-nova-endpoint",
+    messages=[{"role": "user", "content": "Think step by step: what is 2+2?"}],
+    top_k=40,
+    reasoning_effort="low",
+    logprobs=True,
+    top_logprobs=2,
+)
+```
diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -1464,12 +1464,9 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
     from .llms.petals.completion.transformation import PetalsConfig as PetalsConfig
     from .llms.ollama.chat.transformation import OllamaChatConfig as OllamaChatConfig
     from .llms.ollama.completion.transformation import OllamaConfig as OllamaConfig
-    from .llms.sagemaker.completion.transformation import (
-        SagemakerConfig as SagemakerConfig,
-    )
-    from .llms.sagemaker.chat.transformation import (
-        SagemakerChatConfig as SagemakerChatConfig,
-    )
+    from .llms.sagemaker.completion.transformation import SagemakerConfig as SagemakerConfig
+    from .llms.sagemaker.chat.transformation import SagemakerChatConfig as SagemakerChatConfig
+    from .llms.sagemaker.nova.transformation import SagemakerNovaConfig as SagemakerNovaConfig
     from .llms.cohere.chat.transformation import CohereChatConfig as CohereChatConfig
     from .llms.anthropic.experimental_pass_through.messages.transformation import (
         AnthropicMessagesConfig as AnthropicMessagesConfig,

diff --git a/litellm/_lazy_imports_registry.py b/litellm/_lazy_imports_registry.py
@@ -167,6 +167,7 @@
     "OllamaConfig",
     "SagemakerConfig",
     "SagemakerChatConfig",
+    "SagemakerNovaConfig",
     "CohereChatConfig",
     "AnthropicMessagesConfig",
     "AmazonAnthropicClaudeMessagesConfig",
@@ -701,6 +702,10 @@
         ".llms.sagemaker.chat.transformation",
         "SagemakerChatConfig",
     ),
+    "SagemakerNovaConfig": (
+        ".llms.sagemaker.nova.transformation",
+        "SagemakerNovaConfig",
+    ),
     "CohereChatConfig": (".llms.cohere.chat.transformation", "CohereChatConfig"),
     "AnthropicMessagesConfig": (
         ".llms.anthropic.experimental_pass_through.messages.transformation",

diff --git a/litellm/constants.py b/litellm/constants.py
@@ -505,6 +505,7 @@
     "azure_ai",
     "sagemaker",
     "sagemaker_chat",
+    "sagemaker_nova",
     "bedrock",
     "vllm",
     "nlp_cloud",

diff --git a/litellm/llms/sagemaker/chat/transformation.py b/litellm/llms/sagemaker/chat/transformation.py
@@ -160,7 +160,7 @@ def get_sync_custom_stream_wrapper(
         streaming_response = CustomStreamWrapper(
             completion_stream=completion_stream,
             model=model,
-            custom_llm_provider="sagemaker_chat",
+            custom_llm_provider=custom_llm_provider,
             logging_obj=logging_obj,
         )
         return streaming_response
@@ -180,8 +180,12 @@ async def get_async_custom_stream_wrapper(
         signed_json_body: Optional[bytes] = None,
     ) -> CustomStreamWrapper:
         if client is None or isinstance(client, HTTPHandler):
+            try:
+                llm_provider = LlmProviders(custom_llm_provider)
+            except ValueError:
+                llm_provider = LlmProviders.SAGEMAKER_CHAT
             client = get_async_httpx_client(
-                llm_provider=LlmProviders.SAGEMAKER_CHAT, params={}
+                llm_provider=llm_provider, params={}
             )
 
         try:
@@ -210,7 +214,7 @@ async def get_async_custom_stream_wrapper(
         streaming_response = CustomStreamWrapper(
             completion_stream=completion_stream,
             model=model,
-            custom_llm_provider="sagemaker_chat",
+            custom_llm_provider=custom_llm_provider,
             logging_obj=logging_obj,
         )
         return streaming_response
diff --git a/litellm/llms/sagemaker/nova/__init__.py b/litellm/llms/sagemaker/nova/__init__.py
@@ -0,0 +1 @@
+from .transformation import SagemakerNovaConfig  # noqa: F401
diff --git a/litellm/llms/sagemaker/nova/transformation.py b/litellm/llms/sagemaker/nova/transformation.py
@@ -0,0 +1,70 @@
+"""
+Translate from OpenAI's `/v1/chat/completions` to SageMaker Nova Inference endpoints.
+
+Nova models on SageMaker use OpenAI-compatible request/response format with
+additional Nova-specific parameters (top_k, reasoning_effort, etc.).
+
+Docs: https://docs.aws.amazon.com/nova/latest/nova2-userguide/nova-sagemaker-inference-api-reference.html
+"""
+
+from typing import List
+
+from litellm.types.llms.openai import AllMessageValues
+
+from ..chat.transformation import SagemakerChatConfig
+
+
+class SagemakerNovaConfig(SagemakerChatConfig):
+    """
+    Config for Amazon Nova models deployed on SageMaker Inference endpoints.
+
+    Nova uses OpenAI-compatible format (same as sagemaker_chat / HF Messages API)
+    but with additional Nova-specific parameters and requires `stream: true` in
+    the request body for streaming.
+
+    Usage:
+        model="sagemaker_nova/<endpoint-name>"
+    """
+
+    @property
+    def supports_stream_param_in_request_body(self) -> bool:
+        """Nova expects `stream: true` in the request body for streaming."""
+        return True
+
+    def get_supported_openai_params(self, model: str) -> List:
+        """Extend parent params with Nova-specific parameters."""
+        params = super().get_supported_openai_params(model)
+        nova_params = [
+            "top_k",
+            "reasoning_effort",
+            "allowed_token_ids",
+            "truncate_prompt_tokens",
+        ]
+        for p in nova_params:
+            if p not in params:
+                params.append(p)
+        return params
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        """
+        Nova SageMaker endpoints do not accept 'model' in the request body.
+        Only supported fields: messages, max_tokens, max_completion_tokens,
+        temperature, top_p, top_k, stream, stream_options, logprobs,
+        top_logprobs, reasoning_effort, allowed_token_ids, truncate_prompt_tokens.
+        """
+        request_body = super().transform_request(
+            model=model,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+        request_body.pop("model", None)
+        return request_body
diff --git a/litellm/main.py b/litellm/main.py
@@ -3712,8 +3712,10 @@ def completion(  # type: ignore # noqa: PLR0915
             ):
                 return _model_response
             response = _model_response
-        elif custom_llm_provider == "sagemaker_chat":
+        elif custom_llm_provider in ("sagemaker_chat", "sagemaker_nova"):
             # boto3 reads keys from .env
+            # sagemaker_chat: HF Messages API endpoints
+            # sagemaker_nova: Nova models on SageMaker (OpenAI-compatible)
             model_response = base_llm_http_handler.completion(
                 model=model,
                 stream=stream,
@@ -3723,7 +3725,7 @@ def completion(  # type: ignore # noqa: PLR0915
                 model_response=model_response,
                 optional_params=optional_params,
                 litellm_params=litellm_params,
-                custom_llm_provider="sagemaker_chat",
+                custom_llm_provider=custom_llm_provider,
                 timeout=timeout,
                 headers=headers,
                 encoding=_get_encoding(),

diff --git a/litellm/types/utils.py b/litellm/types/utils.py
@@ -3131,6 +3131,7 @@ class LlmProviders(str, Enum):
     AZURE_AI = "azure_ai"
     SAGEMAKER = "sagemaker"
     SAGEMAKER_CHAT = "sagemaker_chat"
+    SAGEMAKER_NOVA = "sagemaker_nova"
     BEDROCK = "bedrock"
     VLLM = "vllm"
     NLP_CLOUD = "nlp_cloud"

diff --git a/litellm/utils.py b/litellm/utils.py
@@ -7945,6 +7945,7 @@ def _build_provider_config_map() -> dict[LlmProviders, tuple[Callable, bool]]:
             LlmProviders.VERTEX_AI_BETA: (lambda: litellm.VertexGeminiConfig(), False),
             LlmProviders.CLOUDFLARE: (lambda: litellm.CloudflareChatConfig(), False),
             LlmProviders.SAGEMAKER_CHAT: (lambda: litellm.SagemakerChatConfig(), False),
+            LlmProviders.SAGEMAKER_NOVA: (lambda: litellm.SagemakerNovaConfig(), False),
             LlmProviders.SAGEMAKER: (lambda: litellm.SagemakerConfig(), False),
             LlmProviders.FIREWORKS_AI: (lambda: litellm.FireworksAIConfig(), False),
             LlmProviders.FRIENDLIAI: (lambda: litellm.FriendliaiChatConfig(), False),
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .transformation import SagemakerNovaConfig # noqa: F401