BerriAI · Sameerlite · Mar 20, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/docs/my-website/docs/prompt_management.md b/docs/my-website/docs/prompt_management.md
@@ -0,0 +1,48 @@
+---
+title: Prompt Management with Responses API
+---
+
+# Prompt Management with Responses API
+
+Use LiteLLM Prompt Management with `/v1/responses` by passing `prompt_id` and optional `prompt_variables`.
+
+## Basic Usage
+
+```bash
+curl -X POST "http://localhost:4000/v1/responses" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o",
+    "prompt_id": "my-responses-prompt",
+    "prompt_variables": {"topic": "large language models"},
+    "input": []
+  }'
+```
+
+## Multi-turn Follow-up in `input`
+
+To send follow-up turns in one request, pass message history in `input`.
+
+```bash
+curl -X POST "http://localhost:4000/v1/responses" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o",
+    "prompt_id": "my-responses-prompt",
+    "prompt_variables": {"topic": "large language models"},
+    "input": [
+      {"role": "user", "content": "Topic is LLMs. Start short."},
+      {"role": "assistant", "content": "Sure, go ahead."},
+      {"role": "user", "content": "Now give me 3 bullets and include pricing caveat."}
+    ]
+  }'
+```
+
+## Notes
+
+- Prompt template messages are merged with your `input` messages.
+- Prompt variable substitution applies to prompt message content.
+- Tool call payload fields are not substituted by prompt variables.
+- For follow-ups with `previous_response_id`, include `prompt_id` again if you want prompt management applied on that turn.
diff --git a/docs/my-website/docs/proxy/prompt_management.md b/docs/my-website/docs/proxy/prompt_management.md
@@ -311,7 +311,7 @@ litellm_settings:
 1. **At Startup**: When the proxy starts, it reads the `prompts` field from `config.yaml`
 2. **Initialization**: Each prompt is initialized based on its `prompt_integration` type
 3. **In-Memory Storage**: Prompts are stored in the `IN_MEMORY_PROMPT_REGISTRY`
-4. **Access**: Use these prompts via the `/v1/chat/completions` endpoint with `prompt_id` in the request
+4. **Access**: Use these prompts via `/v1/chat/completions` or `/v1/responses` with `prompt_id` in the request
 
 ### Using Config-Loaded Prompts
 
@@ -331,6 +331,23 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
 }'
 ```
 
+You can also use the same `prompt_id` with the Responses API:
+
+```bash
+curl -L -X POST 'http://0.0.0.0:4000/v1/responses' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+    "model": "gpt-4o",
+    "prompt_id": "coding_assistant",
+    "prompt_variables": {
+        "language": "python",
+        "task": "create a web scraper"
+    },
+    "input": []
+}'
+```
+
 ### Prompt Schema Reference
 
 Each prompt in the `prompts` list requires:

diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
@@ -687,6 +687,7 @@ const sidebars = {
         "proxy/realtime_webrtc",
         "rerank",
         "response_api",
+        "prompt_management",
         "response_api_compact",
         {
           type: "category",

diff --git a/litellm/responses/main.py b/litellm/responses/main.py
@@ -37,6 +37,7 @@
 )
 from litellm.responses.utils import ResponsesAPIRequestUtils
 from litellm.types.llms.openai import (
+    AllMessageValues,
     PromptObject,
     Reasoning,
     ResponseIncludable,
@@ -463,6 +464,53 @@ async def aresponses(
             # Update local_vars with detected provider (fixes #19782)
             local_vars["custom_llm_provider"] = custom_llm_provider
 
+        #########################################################
+        # ASYNC PROMPT MANAGEMENT
+        # Run the async hook here so async-only prompt loggers are honoured.
+        # Then pop prompt_id from kwargs so the sync responses() path does NOT
+        # re-run the hook (which would double-prepend template messages).
+        # Pass merged_optional_params via an internal kwarg so responses()
+        # can apply them to local_vars without re-invoking the hook.
+        #########################################################
+        litellm_logging_obj = kwargs.get("litellm_logging_obj", None)
+        prompt_id = cast(Optional[str], kwargs.get("prompt_id", None))
+        prompt_variables = cast(Optional[dict], kwargs.get("prompt_variables", None))
+        original_model = model
+
+        if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and litellm_logging_obj.should_run_prompt_management_hooks(
+            prompt_id=prompt_id, non_default_params=kwargs
+        ):
+            if isinstance(input, str):
+                client_input: List[AllMessageValues] = [
+                    {"role": "user", "content": input}
+                ]
+            else:
+                client_input = [
+                    item  # type: ignore[misc]
+                    for item in input
+                    if isinstance(item, dict) and "role" in item
+                ]
+            (
+                model,
+                merged_input,
+                merged_optional_params,
+            ) = await litellm_logging_obj.async_get_chat_completion_prompt(
+                model=model,
+                messages=client_input,
+                non_default_params=kwargs,
+                prompt_id=prompt_id,
+                prompt_variables=prompt_variables,
+                prompt_label=kwargs.get("prompt_label", None),
+                prompt_version=kwargs.get("prompt_version", None),
+            )
+            input = cast(Union[str, ResponseInputParam], merged_input)
+            if model != original_model:
+                _, custom_llm_provider, _, _ = litellm.get_llm_provider(
+                    model=model
+                )
+            kwargs.pop("prompt_id", None)
+            kwargs["_async_prompt_merged_params"] = merged_optional_params
+
         func = partial(
             responses,
             input=input,
@@ -623,6 +671,60 @@ def responses(
         if dynamic_api_base is not None:
             litellm_params.api_base = dynamic_api_base
 
+        #########################################################
+        # PROMPT MANAGEMENT
+        # If aresponses() already ran the async hook, it pops prompt_id and
+        # passes the result via _async_prompt_merged_params — apply those
+        # directly and skip the sync hook to avoid double-merging.
+        #########################################################
+        _async_merged = kwargs.pop("_async_prompt_merged_params", None)
+        if _async_merged is not None:
+            for k, v in _async_merged.items():
+                local_vars[k] = v
-        _async_merged = kwargs.pop("_async_prompt_merged_params", None)
-        if _async_merged is not None:
-            for k, v in _async_merged.items():
-                local_vars[k] = v
+        _async_merged = kwargs.pop("_async_prompt_merged_params", None)
+        if _async_merged is not None:
+            # Keep model / input in sync with local_vars just as the sync path does
+            local_vars["model"] = model
+            local_vars["input"] = input
+            for k, v in _async_merged.items():
+                local_vars[k] = v
-        _async_merged = kwargs.pop("_async_prompt_merged_params", None)
-        if _async_merged is not None:
-            for k, v in _async_merged.items():
-                local_vars[k] = v
+        _async_merged = kwargs.pop("_async_prompt_merged_params", None)
+        if _async_merged is not None:
+            # Keep model / input in sync with local_vars just as the sync path does
+            local_vars["model"] = model
+            local_vars["input"] = input
+            for k, v in _async_merged.items():
+                local_vars[k] = v
+        else:
+            prompt_id = cast(Optional[str], kwargs.get("prompt_id", None))
+            prompt_variables = cast(
+                Optional[dict], kwargs.get("prompt_variables", None)
+            )
+            original_model = model
+
+            if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and litellm_logging_obj.should_run_prompt_management_hooks(
+                prompt_id=prompt_id, non_default_params=kwargs
+            ):
+                if isinstance(input, str):
+                    client_input: List[AllMessageValues] = [
+                        {"role": "user", "content": input}
+                    ]
+                else:
+                    client_input = [
+                        item  # type: ignore[misc]
+                        for item in input
+                        if isinstance(item, dict) and "role" in item
+                    ]
+                (
+                    model,
+                    merged_input,
+                    merged_optional_params,
+                ) = litellm_logging_obj.get_chat_completion_prompt(
+                    model=model,
+                    messages=client_input,
+                    non_default_params=kwargs,
+                    prompt_id=prompt_id,
+                    prompt_variables=prompt_variables,
+                    prompt_label=kwargs.get("prompt_label", None),
+                    prompt_version=kwargs.get("prompt_version", None),
+                )
+                input = cast(Union[str, ResponseInputParam], merged_input)
+                local_vars["input"] = input
+                local_vars["model"] = model
+                if model != original_model:
+                    _, custom_llm_provider, _, _ = litellm.get_llm_provider(
+                        model=model
+                    )
+                    local_vars["custom_llm_provider"] = custom_llm_provider
+                for k, v in merged_optional_params.items():
+                    local_vars[k] = v
+
         #########################################################
         # Update input and tools with provider-specific file IDs if managed files are used
         #########################################################