@@ -511,7 +511,6 @@ def compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n) -> Dict[
511
511
model_dict = self ._model_assigner .models_db_with_passthrough .get (post .model , {})
512
512
513
513
async def litellm_streamer ():
514
- final_msg = {}
515
514
generated_tokens_n = 0
516
515
try :
517
516
self ._integrations_env_setup ()
@@ -521,7 +520,8 @@ async def litellm_streamer():
521
520
max_tokens = min (model_dict .get ('T_out' , post .max_tokens ), post .max_tokens ),
522
521
tools = post .tools ,
523
522
tool_choice = post .tool_choice ,
524
- stop = post .stop
523
+ stop = post .stop ,
524
+ n = post .n ,
525
525
)
526
526
finish_reason = None
527
527
async for model_response in response :
@@ -533,18 +533,14 @@ async def litellm_streamer():
533
533
if text := delta .get ("content" ):
534
534
generated_tokens_n += litellm .token_counter (model_name , text = text )
535
535
536
- if finish_reason :
537
- final_msg = data
538
- break
539
-
540
536
except json .JSONDecodeError :
541
537
data = {"choices" : [{"finish_reason" : finish_reason }]}
542
538
yield prefix + json .dumps (data ) + postfix
543
539
544
- if final_msg :
545
- usage_dict = compose_usage_dict (model_dict , prompt_tokens_n , generated_tokens_n )
546
- final_msg .update (usage_dict )
547
- yield prefix + json .dumps (final_msg ) + postfix
540
+ final_msg = { "choices" : []}
541
+ usage_dict = compose_usage_dict (model_dict , prompt_tokens_n , generated_tokens_n )
542
+ final_msg .update (usage_dict )
543
+ yield prefix + json .dumps (final_msg ) + postfix
548
544
549
545
# NOTE: DONE needed by refact-lsp server
550
546
yield prefix + "[DONE]" + postfix
@@ -563,15 +559,16 @@ async def litellm_non_streamer():
563
559
max_tokens = min (model_dict .get ('T_out' , post .max_tokens ), post .max_tokens ),
564
560
tools = post .tools ,
565
561
tool_choice = post .tool_choice ,
566
- stop = post .stop
562
+ stop = post .stop ,
563
+ n = post .n ,
567
564
)
568
565
finish_reason = None
569
566
try :
570
567
data = model_response .dict ()
571
- choice0 = data [ "choices" ][ 0 ]
572
- if text := choice0 .get ("message" , {}).get ("content" ):
573
- generated_tokens_n = litellm .token_counter (model_name , text = text )
574
- finish_reason = choice0 [ "finish_reason" ]
568
+ for choice in data . get ( "choices" , []):
569
+ if text := choice .get ("message" , {}).get ("content" ):
570
+ generated_tokens_n + = litellm .token_counter (model_name , text = text )
571
+ finish_reason = choice . get ( "finish_reason" )
575
572
usage_dict = compose_usage_dict (model_dict , prompt_tokens_n , generated_tokens_n )
576
573
data .update (usage_dict )
577
574
except json .JSONDecodeError :
0 commit comments