Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions docs/my-website/docs/prompt_management.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
---
title: Prompt Management with Responses API
---

# Prompt Management with Responses API

Use LiteLLM Prompt Management with `/v1/responses` by passing `prompt_id` and optional `prompt_variables`.

## Basic Usage

```bash
curl -X POST "http://localhost:4000/v1/responses" \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o",
"prompt_id": "my-responses-prompt",
"prompt_variables": {"topic": "large language models"},
"input": []
}'
```

## Multi-turn Follow-up in `input`

To send follow-up turns in one request, pass message history in `input`.

```bash
curl -X POST "http://localhost:4000/v1/responses" \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o",
"prompt_id": "my-responses-prompt",
"prompt_variables": {"topic": "large language models"},
"input": [
{"role": "user", "content": "Topic is LLMs. Start short."},
{"role": "assistant", "content": "Sure, go ahead."},
{"role": "user", "content": "Now give me 3 bullets and include pricing caveat."}
]
}'
```

## Notes

- Prompt template messages are merged with your `input` messages.
- Prompt variable substitution applies to prompt message content.
- Tool call payload fields are not substituted by prompt variables.
- For follow-ups with `previous_response_id`, include `prompt_id` again if you want prompt management applied on that turn.
19 changes: 18 additions & 1 deletion docs/my-website/docs/proxy/prompt_management.md
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ litellm_settings:
1. **At Startup**: When the proxy starts, it reads the `prompts` field from `config.yaml`
2. **Initialization**: Each prompt is initialized based on its `prompt_integration` type
3. **In-Memory Storage**: Prompts are stored in the `IN_MEMORY_PROMPT_REGISTRY`
4. **Access**: Use these prompts via the `/v1/chat/completions` endpoint with `prompt_id` in the request
4. **Access**: Use these prompts via `/v1/chat/completions` or `/v1/responses` with `prompt_id` in the request

### Using Config-Loaded Prompts

Expand All @@ -331,6 +331,23 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
}'
```

You can also use the same `prompt_id` with the Responses API:

```bash
curl -L -X POST 'http://0.0.0.0:4000/v1/responses' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-d '{
"model": "gpt-4o",
"prompt_id": "coding_assistant",
"prompt_variables": {
"language": "python",
"task": "create a web scraper"
},
"input": []
}'
```

### Prompt Schema Reference

Each prompt in the `prompts` list requires:
Expand Down
1 change: 1 addition & 0 deletions docs/my-website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ const sidebars = {
"proxy/realtime_webrtc",
"rerank",
"response_api",
"prompt_management",
"response_api_compact",
{
type: "category",
Expand Down
102 changes: 102 additions & 0 deletions litellm/responses/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
)
from litellm.responses.utils import ResponsesAPIRequestUtils
from litellm.types.llms.openai import (
AllMessageValues,
PromptObject,
Reasoning,
ResponseIncludable,
Expand Down Expand Up @@ -463,6 +464,53 @@ async def aresponses(
# Update local_vars with detected provider (fixes #19782)
local_vars["custom_llm_provider"] = custom_llm_provider

#########################################################
# ASYNC PROMPT MANAGEMENT
# Run the async hook here so async-only prompt loggers are honoured.
# Then pop prompt_id from kwargs so the sync responses() path does NOT
# re-run the hook (which would double-prepend template messages).
# Pass merged_optional_params via an internal kwarg so responses()
# can apply them to local_vars without re-invoking the hook.
#########################################################
litellm_logging_obj = kwargs.get("litellm_logging_obj", None)
prompt_id = cast(Optional[str], kwargs.get("prompt_id", None))
prompt_variables = cast(Optional[dict], kwargs.get("prompt_variables", None))
original_model = model

if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and litellm_logging_obj.should_run_prompt_management_hooks(
prompt_id=prompt_id, non_default_params=kwargs
):
if isinstance(input, str):
client_input: List[AllMessageValues] = [
{"role": "user", "content": input}
]
else:
client_input = [
item # type: ignore[misc]
for item in input
if isinstance(item, dict) and "role" in item
]
(
model,
merged_input,
merged_optional_params,
) = await litellm_logging_obj.async_get_chat_completion_prompt(
model=model,
messages=client_input,
non_default_params=kwargs,
prompt_id=prompt_id,
prompt_variables=prompt_variables,
prompt_label=kwargs.get("prompt_label", None),
prompt_version=kwargs.get("prompt_version", None),
)
input = cast(Union[str, ResponseInputParam], merged_input)
if model != original_model:
_, custom_llm_provider, _, _ = litellm.get_llm_provider(
model=model
)
kwargs.pop("prompt_id", None)
kwargs["_async_prompt_merged_params"] = merged_optional_params

func = partial(
responses,
input=input,
Expand Down Expand Up @@ -623,6 +671,60 @@ def responses(
if dynamic_api_base is not None:
litellm_params.api_base = dynamic_api_base

#########################################################
# PROMPT MANAGEMENT
# If aresponses() already ran the async hook, it pops prompt_id and
# passes the result via _async_prompt_merged_params — apply those
# directly and skip the sync hook to avoid double-merging.
#########################################################
_async_merged = kwargs.pop("_async_prompt_merged_params", None)
if _async_merged is not None:
for k, v in _async_merged.items():
local_vars[k] = v
Comment on lines +680 to +683
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 local_vars["model"] not refreshed in async fast-path (inconsistency with sync path)

In the sync prompt-management block (lines 718–719) the code explicitly writes:

local_vars["model"] = model
local_vars["input"] = input

In this _async_merged fast-path only the optional params from the template are applied to local_vars; model and input are not re-written. This is not a functional bug today — model is passed in as a function argument (captured correctly by locals()) and local_vars["input"] is overwritten at line 746 — but the inconsistency makes the code harder to reason about and could silently break if anything upstream reads local_vars["model"] before line 804.

Suggested change
_async_merged = kwargs.pop("_async_prompt_merged_params", None)
if _async_merged is not None:
for k, v in _async_merged.items():
local_vars[k] = v
_async_merged = kwargs.pop("_async_prompt_merged_params", None)
if _async_merged is not None:
# Keep model / input in sync with local_vars just as the sync path does
local_vars["model"] = model
local_vars["input"] = input
for k, v in _async_merged.items():
local_vars[k] = v

else:
prompt_id = cast(Optional[str], kwargs.get("prompt_id", None))
prompt_variables = cast(
Optional[dict], kwargs.get("prompt_variables", None)
)
original_model = model

if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and litellm_logging_obj.should_run_prompt_management_hooks(
prompt_id=prompt_id, non_default_params=kwargs
):
if isinstance(input, str):
client_input: List[AllMessageValues] = [
{"role": "user", "content": input}
]
else:
client_input = [
item # type: ignore[misc]
for item in input
if isinstance(item, dict) and "role" in item
]
(
model,
merged_input,
merged_optional_params,
) = litellm_logging_obj.get_chat_completion_prompt(
model=model,
messages=client_input,
non_default_params=kwargs,
prompt_id=prompt_id,
prompt_variables=prompt_variables,
prompt_label=kwargs.get("prompt_label", None),
prompt_version=kwargs.get("prompt_version", None),
)
input = cast(Union[str, ResponseInputParam], merged_input)
local_vars["input"] = input
local_vars["model"] = model
if model != original_model:
_, custom_llm_provider, _, _ = litellm.get_llm_provider(
model=model
)
local_vars["custom_llm_provider"] = custom_llm_provider
for k, v in merged_optional_params.items():
local_vars[k] = v

#########################################################
# Update input and tools with provider-specific file IDs if managed files are used
#########################################################
Expand Down
Loading
Loading