We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
response_format is not supported for Hugging Face models even though Hugging Face and TGI have this option: Merge Request
response_format
BadRequestError Traceback (most recent call last) Cell In[83], line 19 10 population: int 13 client = OpenAI( 14 base_url="https://example.com/v1", 15 api_key=api_key, 16 ) ---> 19 chat_completion = client.chat.completions.create( 20 model="Llama-3.3-70B", 21 messages=[ 22 { 23 "role": "system", 24 "content": f"\nRespond to the users questions and answer them in the following format: {City.model_json_schema()}. ", 25 }, 26 { 27 "role": "user", 28 "content": "Give me facts about London", 29 }, 30 ], 31 max_tokens=500, 32 response_format={"type": "json_schema", "value": City.model_json_schema()}, 33 ) 35 json_response = chat_completion.choices[0].message.content 36 json_response = json.loads(json_response) File ~/miniconda3/envs/jupyter/lib/python3.10/site-packages/openai/_utils/_utils.py:279, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs) 277 msg = f"Missing required argument: {quote(missing[0])}" 278 raise TypeError(msg) --> 279 return func(*args, **kwargs) File ~/miniconda3/envs/jupyter/lib/python3.10/site-packages/openai/resources/chat/completions.py:859, in Completions.create(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, reasoning_effort, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout) 817 @required_args(["messages", "model"], ["messages", "model", "stream"]) 818 def create( 819 self, (...) 856 timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, 857 ) -> ChatCompletion | Stream[ChatCompletionChunk]: 858 validate_response_format(response_format) --> 859 return self._post( 860 "/chat/completions", 861 body=maybe_transform( 862 { 863 "messages": messages, 864 "model": model, 865 "audio": audio, 866 "frequency_penalty": frequency_penalty, 867 "function_call": function_call, 868 "functions": functions, 869 "logit_bias": logit_bias, 870 "logprobs": logprobs, 871 "max_completion_tokens": max_completion_tokens, 872 "max_tokens": max_tokens, 873 "metadata": metadata, 874 "modalities": modalities, 875 "n": n, 876 "parallel_tool_calls": parallel_tool_calls, 877 "prediction": prediction, 878 "presence_penalty": presence_penalty, 879 "reasoning_effort": reasoning_effort, 880 "response_format": response_format, 881 "seed": seed, 882 "service_tier": service_tier, 883 "stop": stop, 884 "store": store, 885 "stream": stream, 886 "stream_options": stream_options, 887 "temperature": temperature, 888 "tool_choice": tool_choice, 889 "tools": tools, 890 "top_logprobs": top_logprobs, 891 "top_p": top_p, 892 "user": user, 893 }, 894 completion_create_params.CompletionCreateParams, 895 ), 896 options=make_request_options( 897 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout 898 ), 899 cast_to=ChatCompletion, 900 stream=stream or False, 901 stream_cls=Stream[ChatCompletionChunk], 902 ) File ~/miniconda3/envs/jupyter/lib/python3.10/site-packages/openai/_base_client.py:1283, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls) 1269 def post( 1270 self, 1271 path: str, (...) 1278 stream_cls: type[_StreamT] | None = None, 1279 ) -> ResponseT | _StreamT: 1280 opts = FinalRequestOptions.construct( 1281 method="post", url=path, json_data=body, files=to_httpx_files(files), **options 1282 ) -> 1283 return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)) File ~/miniconda3/envs/jupyter/lib/python3.10/site-packages/openai/_base_client.py:960, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls) 957 else: 958 retries_taken = 0 --> 960 return self._request( 961 cast_to=cast_to, 962 options=options, 963 stream=stream, 964 stream_cls=stream_cls, 965 retries_taken=retries_taken, 966 ) File ~/miniconda3/envs/jupyter/lib/python3.10/site-packages/openai/_base_client.py:1064, in SyncAPIClient._request(self, cast_to, options, retries_taken, stream, stream_cls) 1061 err.response.read() 1063 log.debug("Re-raising status error") -> 1064 raise self._make_status_error_from_response(err.response) from None 1066 return self._process_response( 1067 cast_to=cast_to, 1068 options=options, (...) 1072 retries_taken=retries_taken, 1073 ) BadRequestError: Error code: 400 - {'error': {'message': "litellm.UnsupportedParamsError: huggingface does not support parameters: {'response_format': {'type': 'json_schema', 'value': {'properties': {'mayor': {'title': 'Mayor', 'type': 'number'}, 'country': {'title': 'Country', 'type': 'number'}, 'population': {'title': 'Population', 'type': 'integer'}}, 'required': ['mayor', 'country', 'population'], 'title': 'City', 'type': 'object'}}}, for model=meta-llama/Meta-Llama-3-70B-Instruct. To drop these, set `litellm.drop_params=True` or for proxy:\n\n`litellm_settings:\n drop_params: true`\n\nReceived Model Group=Llama-3.3-70B\nAvailable Model Group Fallbacks=None", 'type': 'None', 'param': None, 'code': '400'}}
Yes
v1.58.0
No response
The text was updated successfully, but these errors were encountered:
Successfully merging a pull request may close this issue.
What happened?
response_format
is not supported for Hugging Face models even though Hugging Face and TGI have this option: Merge RequestRelevant log output
Are you a ML Ops Team?
Yes
What LiteLLM version are you on ?
v1.58.0
Twitter / LinkedIn details
No response
The text was updated successfully, but these errors were encountered: