Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions litellm/responses/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,8 +600,12 @@ def responses(
# Update input and tools with provider-specific file IDs if managed files are used
#########################################################
model_file_id_mapping = kwargs.get("model_file_id_mapping")
model_info_id = kwargs.get("model_info", {}).get("id") if isinstance(kwargs.get("model_info"), dict) else None

model_info_id = (
kwargs.get("model_info", {}).get("id")
if isinstance(kwargs.get("model_info"), dict)
else None
)

input = cast(
Union[str, ResponseInputParam],
update_responses_input_with_model_file_ids(
Expand All @@ -611,7 +615,7 @@ def responses(
),
)
local_vars["input"] = input

# Update tools with provider-specific file IDs if needed
if tools:
tools = cast(
Expand Down Expand Up @@ -696,7 +700,10 @@ def responses(
)
)

# Pre Call logging
# Pre Call logging - preserve metadata for custom callbacks
# When called from completion bridge (codex models), metadata is in litellm_metadata
metadata_for_callbacks = metadata or kwargs.get("litellm_metadata") or {}

litellm_logging_obj.update_environment_variables(
model=model,
user=user,
Expand All @@ -705,7 +712,7 @@ def responses(
**responses_api_request_params,
"aresponses": _is_async,
"litellm_call_id": litellm_call_id,
"metadata": metadata,
"metadata": metadata_for_callbacks,
},
custom_llm_provider=custom_llm_provider,
)
Expand Down
176 changes: 176 additions & 0 deletions tests/test_litellm/responses/test_metadata_codex_callback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""
Test that metadata is passed to custom callbacks during chat completion calls to codex models.

Fixes issue: Metadata is no longer passed to custom callback during chat completion
calls to codex models (#21204)

Codex models (gpt-5.1-codex, gpt-5.2-codex) use mode=responses and route through
responses_api_bridge. The bridge converts metadata to litellm_metadata. This test
verifies metadata is preserved for custom callbacks via kwargs['litellm_params']['metadata'].
"""

import asyncio
import os
import sys
from typing import Optional
from unittest.mock import AsyncMock, patch

sys.path.insert(0, os.path.abspath("../../.."))

import pytest

import litellm
from litellm.integrations.custom_logger import CustomLogger


def _make_mock_http_response(response_dict: dict):
"""Create a mock HTTP response that returns response_dict from .json()."""

class MockResponse:
def __init__(self, json_data, status_code=200):
self._json_data = json_data
self.status_code = status_code
self.text = str(json_data)
self.headers = {}

def json(self):
return self._json_data

return MockResponse(response_dict, 200)


class MetadataCaptureCallback(CustomLogger):
"""Custom callback that captures kwargs passed to async_log_success_event."""

def __init__(self):
self.captured_kwargs: Optional[dict] = None

async def async_log_success_event(
self, kwargs, response_obj, start_time, end_time
):
self.captured_kwargs = kwargs


@pytest.mark.asyncio
async def test_metadata_passed_to_custom_callback_codex_models():
"""
Test that metadata passed to completion() is available in custom callback
when using codex models (responses API bridge path).

Codex models have mode=responses and route through responses_api_bridge,
which passes litellm_metadata. The fix ensures this is preserved as
litellm_params.metadata for callback compatibility.
"""
from litellm.types.llms.openai import ResponsesAPIResponse

mock_response = ResponsesAPIResponse.model_construct(
id="resp-test",
created_at=0,
output=[
{
"type": "message",
"id": "msg-1",
"status": "completed",
"role": "assistant",
"content": [{"type": "output_text", "text": "Hello!"}],
}
],
object="response",
model="gpt-5.1-codex",
status="completed",
usage={
"input_tokens": 5,
"output_tokens": 10,
"total_tokens": 15,
},
)

test_metadata = {"foo": "bar", "trace_id": "test-123"}
callback = MetadataCaptureCallback()
original_callbacks = litellm.callbacks.copy() if litellm.callbacks else []
litellm.callbacks = [callback]

with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
mock_post.return_value = _make_mock_http_response(
mock_response.model_dump()
)
# gpt-5.1-codex has mode=responses - routes through responses bridge
await litellm.acompletion(
model="gpt-5.1-codex",
messages=[{"role": "user", "content": "Hello"}],
metadata=test_metadata,
)

await asyncio.sleep(1)

assert callback.captured_kwargs is not None, "Callback should have been invoked"

litellm_params = callback.captured_kwargs.get("litellm_params", {})
metadata = litellm_params.get("metadata") or {}

assert "foo" in metadata, "metadata['foo'] should be accessible in callback"
assert metadata["foo"] == "bar"
assert metadata.get("trace_id") == "test-123"


@pytest.mark.asyncio
async def test_metadata_passed_via_litellm_metadata_responses_api():
"""
Test that when calling responses() directly with litellm_metadata,
metadata is preserved for custom callbacks.

Uses HTTP mock since mock_response returns early before update_environment_variables.
"""
from litellm.types.llms.openai import ResponsesAPIResponse

mock_response = ResponsesAPIResponse.model_construct(
id="resp-test-2",
created_at=0,
output=[
{
"type": "message",
"id": "msg-2",
"status": "completed",
"role": "assistant",
"content": [{"type": "output_text", "text": "Hi there!"}],
}
],
object="response",
model="gpt-4o",
status="completed",
usage={
"input_tokens": 2,
"output_tokens": 3,
"total_tokens": 5,
},
)

test_metadata = {"request_id": "req-456"}
callback = MetadataCaptureCallback()
litellm.callbacks = [callback]

with patch(
"litellm.llms.custom_httpx.http_handler.AsyncHTTPHandler.post",
new_callable=AsyncMock,
) as mock_post:
mock_post.return_value = _make_mock_http_response(
mock_response.model_dump()
)
await litellm.aresponses(
model="gpt-4o",
input="hi",
litellm_metadata=test_metadata,
)

await asyncio.sleep(1)

assert callback.captured_kwargs is not None

litellm_params = callback.captured_kwargs.get("litellm_params", {})
metadata = litellm_params.get("metadata") or {}

assert "request_id" in metadata
assert metadata["request_id"] == "req-456"
Loading