Skip to content

Commit c5162ff

Browse files
valaisesmitya52
authored andcommitted
n_choices support
1 parent 49604bf commit c5162ff

File tree

2 files changed

+13
-16
lines changed

2 files changed

+13
-16
lines changed

refact_webgui/webgui/selfhost_fastapi_completions.py

+12-15
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,6 @@ def compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n) -> Dict[
511511
model_dict = self._model_assigner.models_db_with_passthrough.get(post.model, {})
512512

513513
async def litellm_streamer():
514-
final_msg = {}
515514
generated_tokens_n = 0
516515
try:
517516
self._integrations_env_setup()
@@ -521,7 +520,8 @@ async def litellm_streamer():
521520
max_tokens=min(model_dict.get('T_out', post.max_tokens), post.max_tokens),
522521
tools=post.tools,
523522
tool_choice=post.tool_choice,
524-
stop=post.stop
523+
stop=post.stop,
524+
n=post.n,
525525
)
526526
finish_reason = None
527527
async for model_response in response:
@@ -533,18 +533,14 @@ async def litellm_streamer():
533533
if text := delta.get("content"):
534534
generated_tokens_n += litellm.token_counter(model_name, text=text)
535535

536-
if finish_reason:
537-
final_msg = data
538-
break
539-
540536
except json.JSONDecodeError:
541537
data = {"choices": [{"finish_reason": finish_reason}]}
542538
yield prefix + json.dumps(data) + postfix
543539

544-
if final_msg:
545-
usage_dict = compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n)
546-
final_msg.update(usage_dict)
547-
yield prefix + json.dumps(final_msg) + postfix
540+
final_msg = {"choices": []}
541+
usage_dict = compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n)
542+
final_msg.update(usage_dict)
543+
yield prefix + json.dumps(final_msg) + postfix
548544

549545
# NOTE: DONE needed by refact-lsp server
550546
yield prefix + "[DONE]" + postfix
@@ -563,15 +559,16 @@ async def litellm_non_streamer():
563559
max_tokens=min(model_dict.get('T_out', post.max_tokens), post.max_tokens),
564560
tools=post.tools,
565561
tool_choice=post.tool_choice,
566-
stop=post.stop
562+
stop=post.stop,
563+
n=post.n,
567564
)
568565
finish_reason = None
569566
try:
570567
data = model_response.dict()
571-
choice0 = data["choices"][0]
572-
if text := choice0.get("message", {}).get("content"):
573-
generated_tokens_n = litellm.token_counter(model_name, text=text)
574-
finish_reason = choice0["finish_reason"]
568+
for choice in data.get("choices", []):
569+
if text := choice.get("message", {}).get("content"):
570+
generated_tokens_n += litellm.token_counter(model_name, text=text)
571+
finish_reason = choice.get("finish_reason")
575572
usage_dict = compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n)
576573
data.update(usage_dict)
577574
except json.JSONDecodeError:

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class PyPackage:
3737
"starlette==0.27.0", "uvicorn", "uvloop", "termcolor", "python-multipart", "more_itertools",
3838
"scyllapy==1.3.0", "pandas>=2.0.3",
3939
# NOTE: litellm has bug with anthropic streaming, so we're staying on this version for now
40-
"litellm==1.34.42",
40+
"litellm==1.42.0",
4141
],
4242
requires_packages=["refact_known_models", "refact_utils"],
4343
data=["webgui/static/*", "webgui/static/components/modals/*",

0 commit comments

Comments
 (0)