From aceb6c47d86fbfdf2de539245de38d1d48491b9a Mon Sep 17 00:00:00 2001
From: Ryan H <3118399+ryanh-ai@users.noreply.github.com>
Date: Wed, 18 Feb 2026 22:36:07 -0800
Subject: [PATCH 1/3] feat: add sagemaker_nova provider for Nova models on
 SageMaker

Add support for custom/fine-tuned Amazon Nova models (Nova Micro, Nova Lite,
Nova 2 Lite) deployed on SageMaker Inference real-time endpoints.

Nova uses OpenAI-compatible request/response format with additional
Nova-specific parameters (top_k, reasoning_effort, allowed_token_ids,
truncate_prompt_tokens) and requires stream:true in the request body.
Nova endpoints also reject 'model' in the request body.

Changes:
- New provider: sagemaker_nova/<endpoint-name>
- SagemakerNovaConfig inherits from SagemakerChatConfig
- Override transform_request to strip 'model' from request body
- Override supports_stream_param_in_request_body (True for Nova)
- Extend get_supported_openai_params with Nova-specific params
- Refactored SagemakerChatConfig to use custom_llm_provider param
  instead of hardcoded strings (backwards-compatible)
- Consolidated main.py routing for sagemaker_chat and sagemaker_nova
- 22 unit tests + 9 integration tests (skip-gated)
- Documentation with SDK, streaming, multimodal, and proxy examples
- All tests verified against live SageMaker Nova endpoint
---
 .../docs/providers/aws_sagemaker.md           |  95 +++++
 litellm/__init__.py                           |   1 +
 litellm/_lazy_imports_registry.py             |   5 +
 litellm/constants.py                          |   1 +
 litellm/llms/sagemaker/chat/transformation.py |  10 +-
 litellm/llms/sagemaker/nova/__init__.py       |   1 +
 litellm/llms/sagemaker/nova/transformation.py |  73 ++++
 litellm/main.py                               |   6 +-
 litellm/types/utils.py                        |   1 +
 litellm/utils.py                              |   1 +
 .../test_sagemaker_nova_integration.py        | 276 ++++++++++++
 .../test_sagemaker_nova_transformation.py     | 393 ++++++++++++++++++
 12 files changed, 858 insertions(+), 5 deletions(-)
 create mode 100644 litellm/llms/sagemaker/nova/__init__.py
 create mode 100644 litellm/llms/sagemaker/nova/transformation.py
 create mode 100644 tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py
 create mode 100644 tests/test_litellm/llms/sagemaker/test_sagemaker_nova_transformation.py
diff --git a/docs/my-website/docs/providers/aws_sagemaker.md b/docs/my-website/docs/providers/aws_sagemaker.md
index bab475e7305..a2440c73d7d 100644
--- a/docs/my-website/docs/providers/aws_sagemaker.md
+++ b/docs/my-website/docs/providers/aws_sagemaker.md
@@ -526,3 +526,98 @@ print(f"response: {response}")
 ```
 
 
+
+## Nova Models on SageMaker
+
+LiteLLM supports Amazon Nova models (Nova Micro, Nova Lite, Nova 2 Lite) deployed on SageMaker Inference real-time endpoints. These custom/fine-tuned Nova models use an OpenAI-compatible API format.
+
+**Reference:** [AWS Blog - Amazon SageMaker Inference for Custom Amazon Nova Models](https://aws.amazon.com/blogs/aws/announcing-amazon-sagemaker-inference-for-custom-amazon-nova-models/)
+
+### Usage
+
+Use the `sagemaker_nova/` prefix with your SageMaker endpoint name:
+
+```python
+import litellm
+import os
+
+os.environ["AWS_ACCESS_KEY_ID"] = ""
+os.environ["AWS_SECRET_ACCESS_KEY"] = ""
+os.environ["AWS_REGION_NAME"] = "us-east-1"
+
+# Basic chat completion
+response = litellm.completion(
+    model="sagemaker_nova/my-nova-endpoint",
+    messages=[{"role": "user", "content": "Hello, how are you?"}],
+    temperature=0.7,
+    max_tokens=512,
+)
+print(response.choices[0].message.content)
+```
+
+### Streaming
+
+```python
+response = litellm.completion(
+    model="sagemaker_nova/my-nova-endpoint",
+    messages=[{"role": "user", "content": "Write a short poem"}],
+    stream=True,
+    stream_options={"include_usage": True},
+)
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### Multimodal (Images)
+
+Nova models on SageMaker support image inputs using base64 data URIs:
+
+```python
+response = litellm.completion(
+    model="sagemaker_nova/my-nova-endpoint",
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this image?"},
+                {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}
+            ]
+        }
+    ],
+)
+```
+
+### Proxy Config
+
+```yaml
+model_list:
+  - model_name: nova-micro
+    litellm_params:
+      model: sagemaker_nova/my-nova-micro-endpoint
+      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
+      aws_region_name: us-east-1
+```
+
+### Supported Parameters
+
+All standard OpenAI parameters are supported, plus these Nova-specific parameters:
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `top_k` | integer | Limits token selection to top K most likely tokens |
+| `reasoning_effort` | `"low"` \| `"high"` | Reasoning effort level (Nova 2 Lite custom models only) |
+| `allowed_token_ids` | array[int] | Restrict output to specified token IDs |
+| `truncate_prompt_tokens` | integer | Truncate prompt to N tokens if it exceeds limit |
+
+```python
+response = litellm.completion(
+    model="sagemaker_nova/my-nova-endpoint",
+    messages=[{"role": "user", "content": "Think step by step: what is 2+2?"}],
+    top_k=40,
+    reasoning_effort="low",
+    logprobs=True,
+    top_logprobs=2,
+)
+```
diff --git a/litellm/__init__.py b/litellm/__init__.py
index a994db85b11..6882f34b8de 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -1367,6 +1367,7 @@ def set_global_gitlab_config(config: Dict[str, Any]) -> None:
     from .llms.ollama.completion.transformation import OllamaConfig as OllamaConfig
     from .llms.sagemaker.completion.transformation import SagemakerConfig as SagemakerConfig
     from .llms.sagemaker.chat.transformation import SagemakerChatConfig as SagemakerChatConfig
+    from .llms.sagemaker.nova.transformation import SagemakerNovaConfig as SagemakerNovaConfig
     from .llms.cohere.chat.transformation import CohereChatConfig as CohereChatConfig
     from .llms.anthropic.experimental_pass_through.messages.transformation import AnthropicMessagesConfig as AnthropicMessagesConfig
     from .llms.bedrock.messages.invoke_transformations.anthropic_claude3_transformation import AmazonAnthropicClaudeMessagesConfig as AmazonAnthropicClaudeMessagesConfig
diff --git a/litellm/_lazy_imports_registry.py b/litellm/_lazy_imports_registry.py
index 943acc6320f..3278d7067d4 100644
--- a/litellm/_lazy_imports_registry.py
+++ b/litellm/_lazy_imports_registry.py
@@ -167,6 +167,7 @@
     "OllamaConfig",
     "SagemakerConfig",
     "SagemakerChatConfig",
+    "SagemakerNovaConfig",
     "CohereChatConfig",
     "AnthropicMessagesConfig",
     "AmazonAnthropicClaudeMessagesConfig",
@@ -694,6 +695,10 @@
         ".llms.sagemaker.chat.transformation",
         "SagemakerChatConfig",
     ),
+    "SagemakerNovaConfig": (
+        ".llms.sagemaker.nova.transformation",
+        "SagemakerNovaConfig",
+    ),
     "CohereChatConfig": (".llms.cohere.chat.transformation", "CohereChatConfig"),
     "AnthropicMessagesConfig": (
         ".llms.anthropic.experimental_pass_through.messages.transformation",
diff --git a/litellm/constants.py b/litellm/constants.py
index e4d3fa39e9d..89955220bf6 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -467,6 +467,7 @@
     "azure_ai",
     "sagemaker",
     "sagemaker_chat",
+    "sagemaker_nova",
     "bedrock",
     "vllm",
     "nlp_cloud",
diff --git a/litellm/llms/sagemaker/chat/transformation.py b/litellm/llms/sagemaker/chat/transformation.py
index 2b458fbc438..60e85c9f93b 100644
--- a/litellm/llms/sagemaker/chat/transformation.py
+++ b/litellm/llms/sagemaker/chat/transformation.py
@@ -160,7 +160,7 @@ def get_sync_custom_stream_wrapper(
         streaming_response = CustomStreamWrapper(
             completion_stream=completion_stream,
             model=model,
-            custom_llm_provider="sagemaker_chat",
+            custom_llm_provider=custom_llm_provider,
             logging_obj=logging_obj,
         )
         return streaming_response
@@ -180,8 +180,12 @@ async def get_async_custom_stream_wrapper(
         signed_json_body: Optional[bytes] = None,
     ) -> CustomStreamWrapper:
         if client is None or isinstance(client, HTTPHandler):
+            try:
+                llm_provider = LlmProviders(custom_llm_provider)
+            except ValueError:
+                llm_provider = LlmProviders.SAGEMAKER_CHAT
             client = get_async_httpx_client(
-                llm_provider=LlmProviders.SAGEMAKER_CHAT, params={}
+                llm_provider=llm_provider, params={}
             )
 
         try:
@@ -210,7 +214,7 @@ async def get_async_custom_stream_wrapper(
         streaming_response = CustomStreamWrapper(
             completion_stream=completion_stream,
             model=model,
-            custom_llm_provider="sagemaker_chat",
+            custom_llm_provider=custom_llm_provider,
             logging_obj=logging_obj,
         )
         return streaming_response
diff --git a/litellm/llms/sagemaker/nova/__init__.py b/litellm/llms/sagemaker/nova/__init__.py
new file mode 100644
index 00000000000..fdebd0b0e41
--- /dev/null
+++ b/litellm/llms/sagemaker/nova/__init__.py
@@ -0,0 +1 @@
+from .transformation import SagemakerNovaConfig  # noqa: F401
diff --git a/litellm/llms/sagemaker/nova/transformation.py b/litellm/llms/sagemaker/nova/transformation.py
new file mode 100644
index 00000000000..bab8c7033d8
--- /dev/null
+++ b/litellm/llms/sagemaker/nova/transformation.py
@@ -0,0 +1,73 @@
+"""
+Translate from OpenAI's `/v1/chat/completions` to SageMaker Nova Inference endpoints.
+
+Nova models on SageMaker use OpenAI-compatible request/response format with
+additional Nova-specific parameters (top_k, reasoning_effort, etc.).
+
+Docs: https://docs.aws.amazon.com/nova/latest/nova2-userguide/nova-sagemaker-inference-api-reference.html
+"""
+
+from typing import List
+
+from litellm.types.llms.openai import AllMessageValues
+
+from ..chat.transformation import SagemakerChatConfig
+
+
+class SagemakerNovaConfig(SagemakerChatConfig):
+    """
+    Config for Amazon Nova models deployed on SageMaker Inference endpoints.
+
+    Nova uses OpenAI-compatible format (same as sagemaker_chat / HF Messages API)
+    but with additional Nova-specific parameters and requires `stream: true` in
+    the request body for streaming.
+
+    Usage:
+        model="sagemaker_nova/<endpoint-name>"
+    """
+
+    @property
+    def supports_stream_param_in_request_body(self) -> bool:
+        """Nova expects `stream: true` in the request body for streaming."""
+        return True
+
+    def get_supported_openai_params(self, model: str) -> List:
+        """Extend parent params with Nova-specific parameters."""
+        params = super().get_supported_openai_params(model)
+        nova_params = [
+            "top_k",
+            "reasoning_effort",
+            "allowed_token_ids",
+            "truncate_prompt_tokens",
+        ]
+        for p in nova_params:
+            if p not in params:
+                params.append(p)
+        return params
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        """
+        Nova SageMaker endpoints do not accept 'model' in the request body.
+        Only supported fields: messages, max_tokens, max_completion_tokens,
+        temperature, top_p, top_k, stream, stream_options, logprobs,
+        top_logprobs, reasoning_effort, allowed_token_ids, truncate_prompt_tokens.
+        """
+        request_body = super().transform_request(
+            model=model,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+        request_body.pop("model", None)
+        return request_body
+
+
+sagemaker_nova_config = SagemakerNovaConfig()
diff --git a/litellm/main.py b/litellm/main.py
index 80a2f74c571..e622c698a4a 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -3623,8 +3623,10 @@ def completion(  # type: ignore # noqa: PLR0915
             ):
                 return _model_response
             response = _model_response
-        elif custom_llm_provider == "sagemaker_chat":
+        elif custom_llm_provider in ("sagemaker_chat", "sagemaker_nova"):
             # boto3 reads keys from .env
+            # sagemaker_chat: HF Messages API endpoints
+            # sagemaker_nova: Nova models on SageMaker (OpenAI-compatible)
             model_response = base_llm_http_handler.completion(
                 model=model,
                 stream=stream,
@@ -3634,7 +3636,7 @@ def completion(  # type: ignore # noqa: PLR0915
                 model_response=model_response,
                 optional_params=optional_params,
                 litellm_params=litellm_params,
-                custom_llm_provider="sagemaker_chat",
+                custom_llm_provider=custom_llm_provider,
                 timeout=timeout,
                 headers=headers,
                 encoding=_get_encoding(),
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index 1fc0e55dab3..d25e8440d8b 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -3076,6 +3076,7 @@ class LlmProviders(str, Enum):
     AZURE_AI = "azure_ai"
     SAGEMAKER = "sagemaker"
     SAGEMAKER_CHAT = "sagemaker_chat"
+    SAGEMAKER_NOVA = "sagemaker_nova"
     BEDROCK = "bedrock"
     VLLM = "vllm"
     NLP_CLOUD = "nlp_cloud"
diff --git a/litellm/utils.py b/litellm/utils.py
index 6a18fcc9e35..e5eb53e5758 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7839,6 +7839,7 @@ def _build_provider_config_map() -> dict[LlmProviders, tuple[Callable, bool]]:
             LlmProviders.VERTEX_AI_BETA: (lambda: litellm.VertexGeminiConfig(), False),
             LlmProviders.CLOUDFLARE: (lambda: litellm.CloudflareChatConfig(), False),
             LlmProviders.SAGEMAKER_CHAT: (lambda: litellm.SagemakerChatConfig(), False),
+            LlmProviders.SAGEMAKER_NOVA: (lambda: litellm.SagemakerNovaConfig(), False),
             LlmProviders.SAGEMAKER: (lambda: litellm.SagemakerConfig(), False),
             LlmProviders.FIREWORKS_AI: (lambda: litellm.FireworksAIConfig(), False),
             LlmProviders.FRIENDLIAI: (lambda: litellm.FriendliaiChatConfig(), False),
diff --git a/tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py b/tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py
new file mode 100644
index 00000000000..6f55bea38b9
--- /dev/null
+++ b/tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py
@@ -0,0 +1,276 @@
+"""
+Integration tests for SageMaker Nova provider.
+
+These tests require a live SageMaker Nova endpoint and AWS credentials.
+They are skipped by default — run manually with:
+
+    pytest tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py -v --no-header -rN
+
+Prerequisites:
+    export AWS_PROFILE=<your-profile>      # or set AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY
+    export AWS_REGION_NAME=us-east-1
+    export SAGEMAKER_NOVA_ENDPOINT=<your-endpoint-name>
+"""
+
+import base64
+import io
+import json
+import os
+import struct
+import zlib
+
+import pytest
+
+import litellm
+
+ENDPOINT = os.environ.get("SAGEMAKER_NOVA_ENDPOINT", "")
+MODEL = f"sagemaker_nova/{ENDPOINT}"
+
+skip_if_no_endpoint = pytest.mark.skipif(
+    not ENDPOINT,
+    reason="SAGEMAKER_NOVA_ENDPOINT not set — skipping live integration tests",
+)
+
+
+def _make_test_png() -> str:
+    """Create a minimal 4x4 PNG (red border, blue center) and return base64."""
+
+    def chunk(ctype, data):
+        c = ctype + data
+        return (
+            struct.pack(">I", len(data))
+            + c
+            + struct.pack(">I", zlib.crc32(c) & 0xFFFFFFFF)
+        )
+
+    width, height = 4, 4
+    pixels = []
+    for y in range(height):
+        for x in range(width):
+            if 1 <= x <= 2 and 1 <= y <= 2:
+                pixels.append((0, 0, 255))
+            else:
+                pixels.append((255, 0, 0))
+
+    raw = b""
+    for y in range(height):
+        raw += b"\x00"
+        for x in range(width):
+            raw += bytes(pixels[y * width + x])
+
+    png = (
+        b"\x89PNG\r\n\x1a\n"
+        + chunk(
+            b"IHDR", struct.pack(">IIBBBBB", width, height, 8, 2, 0, 0, 0)
+        )
+        + chunk(b"IDAT", zlib.compress(raw))
+        + chunk(b"IEND", b"")
+    )
+    return base64.b64encode(png).decode()
+
+
+@skip_if_no_endpoint
+class TestSagemakerNovaIntegration:
+    """Live integration tests for sagemaker_nova provider."""
+
+    def test_should_complete_basic_single_turn(self):
+        """Basic single-turn chat completion."""
+        response = litellm.completion(
+            model=MODEL,
+            messages=[{"role": "user", "content": "What is 2+2? Reply in one word."}],
+            max_tokens=32,
+            temperature=0.1,
+        )
+        assert response.choices[0].message.content is not None
+        assert len(response.choices[0].message.content.strip()) > 0
+        assert response.choices[0].finish_reason == "stop"
+        assert response.usage.prompt_tokens > 0
+        assert response.usage.completion_tokens > 0
+        assert response.usage.total_tokens == (
+            response.usage.prompt_tokens + response.usage.completion_tokens
+        )
+
+    def test_should_complete_multi_turn_conversation(self):
+        """Multi-turn conversation maintains context."""
+        messages = [
+            {"role": "user", "content": "My name is Alice."},
+        ]
+        response1 = litellm.completion(
+            model=MODEL,
+            messages=messages,
+            max_tokens=64,
+            temperature=0.1,
+        )
+        assistant_msg = response1.choices[0].message.content
+        assert assistant_msg is not None
+
+        # Second turn — model should remember the name
+        messages.append({"role": "assistant", "content": assistant_msg})
+        messages.append({"role": "user", "content": "What is my name?"})
+
+        response2 = litellm.completion(
+            model=MODEL,
+            messages=messages,
+            max_tokens=64,
+            temperature=0.1,
+        )
+        answer = response2.choices[0].message.content.lower()
+        assert "alice" in answer, f"Expected 'alice' in response, got: {answer}"
+
+    def test_should_stream_response(self):
+        """Streaming returns chunks with content and final usage."""
+        response = litellm.completion(
+            model=MODEL,
+            messages=[{"role": "user", "content": "Count from 1 to 5."}],
+            max_tokens=64,
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+
+        chunks = []
+        full_content = ""
+        for chunk in response:
+            chunks.append(chunk)
+            delta = chunk.choices[0].delta.content or ""
+            full_content += delta
+
+        assert len(chunks) > 1, "Expected multiple streaming chunks"
+        assert len(full_content.strip()) > 0, "Expected non-empty streamed content"
+
+        # Last chunk should have finish_reason
+        final_chunks_with_finish = [
+            c for c in chunks if c.choices and c.choices[0].finish_reason is not None
+        ]
+        assert len(final_chunks_with_finish) > 0, "Expected at least one chunk with finish_reason"
+
+    def test_should_return_logprobs(self):
+        """Logprobs are returned when requested."""
+        response = litellm.completion(
+            model=MODEL,
+            messages=[{"role": "user", "content": "Say hello."}],
+            max_tokens=16,
+            temperature=0.1,
+            logprobs=True,
+            top_logprobs=3,
+        )
+        lp = response.choices[0].logprobs
+        assert lp is not None, "Expected logprobs in response"
+
+        content = lp.content if hasattr(lp, "content") else lp.get("content")
+        assert content is not None and len(content) > 0, "Expected logprobs content"
+
+        first_token = content[0]
+        assert "token" in first_token or hasattr(first_token, "token")
+        assert "logprob" in first_token or hasattr(first_token, "logprob")
+
+        top = first_token.get("top_logprobs") if isinstance(first_token, dict) else first_token.top_logprobs
+        assert top is not None and len(top) == 3, "Expected 3 top_logprobs"
+
+    def test_should_handle_multimodal_image_input(self):
+        """Multimodal with base64 image in content array."""
+        b64_image = _make_test_png()
+        response = litellm.completion(
+            model=MODEL,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "What colors do you see in this image? List them.",
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/png;base64,{b64_image}"
+                            },
+                        },
+                    ],
+                }
+            ],
+            max_tokens=128,
+        )
+        content = response.choices[0].message.content.lower()
+        assert response.choices[0].message.content is not None
+        assert len(content) > 0
+        # The image has red and blue — model should mention at least one
+        assert "red" in content or "blue" in content, (
+            f"Expected 'red' or 'blue' in multimodal response, got: {content}"
+        )
+
+    def test_should_pass_nova_specific_params(self):
+        """Nova-specific parameters (top_k) are accepted."""
+        response = litellm.completion(
+            model=MODEL,
+            messages=[{"role": "user", "content": "Say hello."}],
+            max_tokens=32,
+            top_k=40,
+            temperature=0.7,
+        )
+        assert response.choices[0].message.content is not None
+        assert response.usage.total_tokens > 0
+
+    def test_should_respect_system_message(self):
+        """System message should influence the response."""
+        response = litellm.completion(
+            model=MODEL,
+            messages=[
+                {
+                    "role": "system",
+                    "content": "You are a pirate. Always respond in pirate speak.",
+                },
+                {"role": "user", "content": "How are you today?"},
+            ],
+            max_tokens=128,
+            temperature=0.7,
+        )
+        content = response.choices[0].message.content.lower()
+        assert response.choices[0].message.content is not None
+        # Pirate-themed words likely in response
+        pirate_words = ["arr", "ahoy", "matey", "ye", "sail", "sea", "cap"]
+        assert any(
+            w in content for w in pirate_words
+        ), f"Expected pirate speak, got: {content}"
+
+
+NOVA2_ENDPOINT = os.environ.get("SAGEMAKER_NOVA2_LITE_ENDPOINT", "")
+NOVA2_MODEL = f"sagemaker_nova/{NOVA2_ENDPOINT}"
+
+skip_if_no_nova2_endpoint = pytest.mark.skipif(
+    not NOVA2_ENDPOINT,
+    reason="SAGEMAKER_NOVA2_LITE_ENDPOINT not set — requires Nova 2 Lite endpoint",
+)
+
+
+@skip_if_no_nova2_endpoint
+class TestSagemakerNova2LiteIntegration:
+    """
+    Integration tests requiring a Nova 2 Lite endpoint (reasoning_effort support).
+
+    Run with:
+        export SAGEMAKER_NOVA2_LITE_ENDPOINT=<your-nova-2-lite-endpoint>
+        pytest tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py::TestSagemakerNova2LiteIntegration -v
+    """
+
+    def test_should_accept_reasoning_effort_low(self):
+        """reasoning_effort='low' should be accepted by Nova 2 Lite."""
+        response = litellm.completion(
+            model=NOVA2_MODEL,
+            messages=[{"role": "user", "content": "What is 2+2?"}],
+            max_tokens=32,
+            reasoning_effort="low",
+        )
+        assert response.choices[0].message.content is not None
+        assert response.usage.total_tokens > 0
+
+    def test_should_accept_reasoning_effort_high(self):
+        """reasoning_effort='high' should be accepted by Nova 2 Lite."""
+        response = litellm.completion(
+            model=NOVA2_MODEL,
+            messages=[{"role": "user", "content": "Explain why the sky is blue."}],
+            max_tokens=256,
+            reasoning_effort="high",
+        )
+        assert response.choices[0].message.content is not None
+        assert len(response.choices[0].message.content) > 0
+        assert response.usage.completion_tokens > 0
diff --git a/tests/test_litellm/llms/sagemaker/test_sagemaker_nova_transformation.py b/tests/test_litellm/llms/sagemaker/test_sagemaker_nova_transformation.py
new file mode 100644
index 00000000000..8c468a1ff66
--- /dev/null
+++ b/tests/test_litellm/llms/sagemaker/test_sagemaker_nova_transformation.py
@@ -0,0 +1,393 @@
+"""
+Unit tests for SageMaker Nova transformation config.
+"""
+
+import json
+import pytest
+
+from litellm.llms.sagemaker.nova.transformation import SagemakerNovaConfig
+from litellm.types.utils import ModelResponse
+from litellm.utils import convert_to_model_response_object
+
+
+class TestSagemakerNovaConfig:
+    def setup_method(self):
+        self.config = SagemakerNovaConfig()
+
+    def test_should_support_stream_param_in_request_body(self):
+        """Nova requires stream: true in the request body."""
+        assert self.config.supports_stream_param_in_request_body is True
+
+    def test_should_include_nova_specific_params(self):
+        """Nova-specific params should be in the supported params list."""
+        params = self.config.get_supported_openai_params(model="my-nova-endpoint")
+        assert "top_k" in params
+        assert "reasoning_effort" in params
+        assert "allowed_token_ids" in params
+        assert "truncate_prompt_tokens" in params
+
+    def test_should_include_standard_openai_params(self):
+        """Standard OpenAI params from parent should still be present."""
+        params = self.config.get_supported_openai_params(model="my-nova-endpoint")
+        assert "temperature" in params
+        assert "max_tokens" in params
+        assert "top_p" in params
+        assert "stream" in params
+        assert "logprobs" in params
+        assert "top_logprobs" in params
+        assert "stream_options" in params
+
+    def test_should_map_nova_params_to_request(self):
+        """Nova-specific params should pass through to optional_params."""
+        optional_params = self.config.map_openai_params(
+            non_default_params={
+                "top_k": 40,
+                "reasoning_effort": "low",
+                "temperature": 0.7,
+            },
+            optional_params={},
+            model="my-nova-endpoint",
+            drop_params=False,
+        )
+        assert optional_params["top_k"] == 40
+        assert optional_params["reasoning_effort"] == "low"
+        assert optional_params["temperature"] == 0.7
+
+    def test_should_generate_correct_url_non_streaming(self):
+        """Non-streaming URL should use /invocations."""
+        url = self.config.get_complete_url(
+            api_base=None,
+            api_key=None,
+            model="my-nova-endpoint",
+            optional_params={"aws_region_name": "us-east-1"},
+            litellm_params={},
+            stream=False,
+        )
+        assert url == "https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/my-nova-endpoint/invocations"
+
+    def test_should_generate_correct_url_streaming(self):
+        """Streaming URL should use /invocations-response-stream."""
+        url = self.config.get_complete_url(
+            api_base=None,
+            api_key=None,
+            model="my-nova-endpoint",
+            optional_params={"aws_region_name": "us-east-1"},
+            litellm_params={},
+            stream=True,
+        )
+        assert url == "https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/my-nova-endpoint/invocations-response-stream"
+
+    def test_should_have_custom_stream_wrapper(self):
+        """Nova should use custom stream wrapper (AWS EventStream)."""
+        assert self.config.has_custom_stream_wrapper is True
+
+
+class TestSagemakerNovaResponseParsing:
+    """Test that Nova's OpenAI-compatible responses are correctly parsed."""
+
+    def test_should_parse_non_streaming_response(self):
+        """Nova non-streaming response should be parsed into ModelResponse."""
+        nova_response = {
+            "id": "chatcmpl-123e4567-e89b-12d3-a456-426614174000",
+            "object": "chat.completion",
+            "created": 1677652288,
+            "model": "nova-micro-custom",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Hello! How can I help?",
+                        "refusal": None,
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 9,
+                "completion_tokens": 12,
+                "total_tokens": 21,
+            },
+        }
+        result = convert_to_model_response_object(
+            response_object=nova_response,
+            model_response_object=ModelResponse(),
+        )
+        assert result.id == "chatcmpl-123e4567-e89b-12d3-a456-426614174000"
+        assert result.choices[0].message.content == "Hello! How can I help?"
+        assert result.choices[0].finish_reason == "stop"
+        assert result.usage.prompt_tokens == 9
+        assert result.usage.completion_tokens == 12
+        assert result.usage.total_tokens == 21
+
+    def test_should_parse_response_with_reasoning_content(self):
+        """Nova reasoning_content should be extracted correctly."""
+        nova_response = {
+            "id": "chatcmpl-reasoning-test",
+            "object": "chat.completion",
+            "created": 1677652288,
+            "model": "nova-2-lite-custom",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "The answer is 4.",
+                        "reasoning_content": "Let me think: 2+2=4",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 15,
+                "completion_tokens": 20,
+                "total_tokens": 35,
+            },
+        }
+        result = convert_to_model_response_object(
+            response_object=nova_response,
+            model_response_object=ModelResponse(),
+        )
+        assert result.choices[0].message.content == "The answer is 4."
+        assert result.choices[0].message.reasoning_content == "Let me think: 2+2=4"
+
+    def test_should_parse_response_with_logprobs(self):
+        """Nova logprobs should be preserved in response."""
+        nova_response = {
+            "id": "chatcmpl-logprobs-test",
+            "object": "chat.completion",
+            "created": 1677652288,
+            "model": "nova-micro-custom",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Hello",
+                    },
+                    "logprobs": {
+                        "content": [
+                            {
+                                "token": "Hello",
+                                "logprob": -0.5,
+                                "top_logprobs": [
+                                    {"token": "Hello", "logprob": -0.5},
+                                    {"token": "Hi", "logprob": -1.2},
+                                ],
+                            }
+                        ]
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 5,
+                "completion_tokens": 1,
+                "total_tokens": 6,
+            },
+        }
+        result = convert_to_model_response_object(
+            response_object=nova_response,
+            model_response_object=ModelResponse(),
+        )
+        assert result.choices[0].logprobs is not None
+        assert result.choices[0].logprobs["content"][0]["token"] == "Hello"
+
+    def test_should_parse_response_with_cached_tokens(self):
+        """Nova prompt_tokens_details with cached_tokens should be parsed."""
+        nova_response = {
+            "id": "chatcmpl-cached-test",
+            "object": "chat.completion",
+            "created": 1677652288,
+            "model": "nova-micro-custom",
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "Hi",
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 20,
+                "completion_tokens": 1,
+                "total_tokens": 21,
+                "prompt_tokens_details": {"cached_tokens": 10},
+            },
+        }
+        result = convert_to_model_response_object(
+            response_object=nova_response,
+            model_response_object=ModelResponse(),
+        )
+        assert result.usage.prompt_tokens_details.cached_tokens == 10
+
+
+class TestSagemakerChatBackwardsCompatibility:
+    """Verify that changes to SagemakerChatConfig don't break existing sagemaker_chat callers."""
+
+    def setup_method(self):
+        from litellm.llms.sagemaker.chat.transformation import SagemakerChatConfig
+        self.config = SagemakerChatConfig()
+
+    def test_should_not_support_stream_param_in_request_body(self):
+        """sagemaker_chat should NOT send stream in request body (unchanged behavior)."""
+        assert self.config.supports_stream_param_in_request_body is False
+
+    def test_should_generate_correct_urls(self):
+        """sagemaker_chat URLs should be unchanged."""
+        url = self.config.get_complete_url(
+            api_base=None,
+            api_key=None,
+            model="my-hf-endpoint",
+            optional_params={"aws_region_name": "us-west-2"},
+            litellm_params={},
+            stream=False,
+        )
+        assert url == "https://runtime.sagemaker.us-west-2.amazonaws.com/endpoints/my-hf-endpoint/invocations"
+
+        stream_url = self.config.get_complete_url(
+            api_base=None,
+            api_key=None,
+            model="my-hf-endpoint",
+            optional_params={"aws_region_name": "us-west-2"},
+            litellm_params={},
+            stream=True,
+        )
+        assert stream_url == "https://runtime.sagemaker.us-west-2.amazonaws.com/endpoints/my-hf-endpoint/invocations-response-stream"
+
+    def test_should_still_have_custom_stream_wrapper(self):
+        """sagemaker_chat should still use custom stream wrapper."""
+        assert self.config.has_custom_stream_wrapper is True
+
+    def test_should_not_include_nova_specific_params(self):
+        """sagemaker_chat should NOT have Nova-specific params."""
+        params = self.config.get_supported_openai_params(model="my-hf-endpoint")
+        assert "top_k" not in params
+        assert "reasoning_effort" not in params
+        assert "allowed_token_ids" not in params
+        assert "truncate_prompt_tokens" not in params
+
+    def test_should_preserve_standard_openai_params(self):
+        """sagemaker_chat should still support standard OpenAI params."""
+        params = self.config.get_supported_openai_params(model="my-hf-endpoint")
+        assert "temperature" in params
+        assert "max_tokens" in params
+        assert "top_p" in params
+        assert "stream" in params
+
+    def test_sync_stream_wrapper_uses_correct_provider_string(self):
+        """
+        Verify that when get_sync_custom_stream_wrapper is called with
+        custom_llm_provider="sagemaker_chat", the CustomStreamWrapper
+        receives "sagemaker_chat" (not something else).
+        """
+        from unittest.mock import patch, MagicMock
+
+        mock_client = MagicMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.iter_bytes.return_value = iter([])
+        mock_client.post.return_value = mock_response
+
+        with patch("litellm.llms.sagemaker.chat.transformation.CustomStreamWrapper") as mock_csw:
+            mock_csw.return_value = MagicMock()
+            self.config.get_sync_custom_stream_wrapper(
+                model="my-hf-endpoint",
+                custom_llm_provider="sagemaker_chat",
+                logging_obj=MagicMock(),
+                api_base="https://example.com",
+                headers={},
+                data={},
+                messages=[],
+                client=mock_client,
+            )
+            mock_csw.assert_called_once()
+            call_kwargs = mock_csw.call_args[1]
+            assert call_kwargs["custom_llm_provider"] == "sagemaker_chat"
+
+    def test_async_stream_wrapper_uses_correct_provider_string(self):
+        """
+        Verify that when get_async_custom_stream_wrapper is called with
+        custom_llm_provider="sagemaker_chat", the CustomStreamWrapper
+        receives "sagemaker_chat".
+        """
+        import asyncio
+        from unittest.mock import patch, MagicMock, AsyncMock
+
+        mock_client = AsyncMock()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+
+        async def empty_aiter():
+            return
+            yield  # make it an async generator
+
+        mock_response.aiter_bytes.return_value = empty_aiter()
+        mock_client.post.return_value = mock_response
+
+        with patch("litellm.llms.sagemaker.chat.transformation.CustomStreamWrapper") as mock_csw:
+            mock_csw.return_value = MagicMock()
+            asyncio.run(
+                self.config.get_async_custom_stream_wrapper(
+                    model="my-hf-endpoint",
+                    custom_llm_provider="sagemaker_chat",
+                    logging_obj=MagicMock(),
+                    api_base="https://example.com",
+                    headers={},
+                    data={},
+                    messages=[],
+                    client=mock_client,
+                )
+            )
+            mock_csw.assert_called_once()
+            call_kwargs = mock_csw.call_args[1]
+            assert call_kwargs["custom_llm_provider"] == "sagemaker_chat"
+
+    def test_async_stream_wrapper_llm_provider_enum_resolves(self):
+        """
+        Verify LlmProviders(custom_llm_provider) resolves correctly for
+        "sagemaker_chat" and doesn't fall through to the ValueError fallback.
+        """
+        from litellm.types.utils import LlmProviders
+        provider = LlmProviders("sagemaker_chat")
+        assert provider == LlmProviders.SAGEMAKER_CHAT
+
+
+class TestSagemakerNovaTransformRequest:
+    """Test Nova-specific request transformation."""
+
+    def setup_method(self):
+        self.config = SagemakerNovaConfig()
+
+    def test_should_not_include_model_in_request_body(self):
+        """Nova SageMaker endpoints reject 'model' in the request body."""
+        request = self.config.transform_request(
+            model="my-nova-endpoint",
+            messages=[{"role": "user", "content": "Hello"}],
+            optional_params={"temperature": 0.7},
+            litellm_params={},
+            headers={},
+        )
+        assert "model" not in request
+        assert "messages" in request
+        assert request["temperature"] == 0.7
+
+    def test_should_include_all_nova_params_in_request(self):
+        """Nova-specific params should appear in the request body."""
+        request = self.config.transform_request(
+            model="my-nova-endpoint",
+            messages=[{"role": "user", "content": "Hello"}],
+            optional_params={
+                "top_k": 40,
+                "max_tokens": 512,
+                "reasoning_effort": "low",
+            },
+            litellm_params={},
+            headers={},
+        )
+        assert "model" not in request
+        assert request["top_k"] == 40
+        assert request["max_tokens"] == 512
+        assert request["reasoning_effort"] == "low"

From 57efcb9a6ba8520b38eaa6238bd8c21d99836d4f Mon Sep 17 00:00:00 2001
From: Ryan H <3118399+ryanh-ai@users.noreply.github.com>
Date: Wed, 18 Feb 2026 22:47:20 -0800
Subject: [PATCH 2/3] fix: move integration tests to tests/local_testing/ per
 test directory policy

---
 .../test_sagemaker_nova_integration.py                            | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{test_litellm/llms/sagemaker => local_testing}/test_sagemaker_nova_integration.py (100%)

diff --git a/tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py b/tests/local_testing/test_sagemaker_nova_integration.py
similarity index 100%
rename from tests/test_litellm/llms/sagemaker/test_sagemaker_nova_integration.py
rename to tests/local_testing/test_sagemaker_nova_integration.py

From cd4248bf0636e9fff42975b1860486374e8179f7 Mon Sep 17 00:00:00 2001
From: Ryan H <3118399+ryanh-ai@users.noreply.github.com>
Date: Sat, 14 Mar 2026 13:32:09 -0700
Subject: [PATCH 3/3] fix: remove unused module-level SagemakerNovaConfig
 instance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sagemaker_nova_config singleton was never imported or used — the
ProviderConfigManager creates its own instance via the lambda registered
in utils.py. Removing this leftover boilerplate.
---
 litellm/llms/sagemaker/nova/transformation.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/litellm/llms/sagemaker/nova/transformation.py b/litellm/llms/sagemaker/nova/transformation.py
index bab8c7033d8..41c20847b53 100644
--- a/litellm/llms/sagemaker/nova/transformation.py
+++ b/litellm/llms/sagemaker/nova/transformation.py
@@ -68,6 +68,3 @@ def transform_request(
         )
         request_body.pop("model", None)
         return request_body
-
-
-sagemaker_nova_config = SagemakerNovaConfig()