@@ -850,7 +850,7 @@ def _executor_loop_pp(self):
850850 self .num_scheduled_requests = scheduled_batch .batch_size
851851
852852 logger .debug (
853- f'has { len (self .active_requests )} active_request , '
853+ f'has { len (self .active_requests )} active_requests , '
854854 f'scheduled { len (scheduled_batch .context_requests )} context requests and '
855855 f'{ len (scheduled_batch .generation_requests )} generation requests'
856856 )
@@ -1089,7 +1089,7 @@ def _prepare_and_schedule_batch(self):
10891089
10901090 self .num_scheduled_requests = scheduled_batch .batch_size
10911091 logger .debug (
1092- f'has { len (self .active_requests )} active_request , '
1092+ f'has { len (self .active_requests )} active_requests , '
10931093 f'scheduled { len (scheduled_batch .context_requests )} context requests and '
10941094 f'{ len (scheduled_batch .generation_requests )} generation requests' )
10951095 return scheduled_batch , iter_stats
@@ -1359,19 +1359,20 @@ def _executor_loop_overlap(self):
13591359 if target_inputs is not None :
13601360 self ._process_draft_results (scheduled_batch ,
13611361 draft_outputs , draft_batch )
1362- elif self .previous_batch is not None and not use_previous_draft_tokens :
1363- self ._update_requests (self .previous_batch .sample_state )
1362+ if target_inputs is None and self .previous_batch is not None and not use_previous_draft_tokens :
1363+ self ._update_requests (self .previous_batch .sample_state )
13641364
1365- if self .block_reuse_enabled and not self .kv_cache_manager .is_vswa and self .kv_cache_transceiver :
1366- for req in self .previous_batch .sample_state .scheduled_requests .context_requests :
1367- if req .is_context_only_request and (
1368- req .is_context_finished
1369- or req .is_finished_due_to_length ):
1370- block_id = self .kv_cache_manager .store_blocks_for_reuse (
1371- req , True )
1372- self .ctx_in_transmission_requests .append (
1373- (req , block_id ))
1365+ if self .block_reuse_enabled and not self .kv_cache_manager .is_vswa and self .kv_cache_transceiver :
1366+ for req in self .previous_batch .sample_state .scheduled_requests .context_requests :
1367+ if req .is_context_only_request and (
1368+ req .is_context_finished
1369+ or req .is_finished_due_to_length ):
1370+ block_id = self .kv_cache_manager .store_blocks_for_reuse (
1371+ req , True )
1372+ self .ctx_in_transmission_requests .append (
1373+ (req , block_id ))
13741374
1375+ if scheduled_batch .batch_size > 0 :
13751376 if self .guided_decoder is not None :
13761377 # add_batch must be called again to have updated new tokens.
13771378 self .guided_decoder .add_batch (scheduled_batch )
@@ -1387,9 +1388,10 @@ def _executor_loop_overlap(self):
13871388 scheduled_batch .context_requests
13881389 ) if self .kv_cache_transceiver else []
13891390
1390- if self .previous_batch is not None :
1391- self ._process_previous_batch ()
1391+ if self .previous_batch is not None :
1392+ self ._process_previous_batch ()
13921393
1394+ if scheduled_batch .batch_size > 0 :
13931395 if self .enable_iter_perf_stats :
13941396 iter_stats .inflight_batching_stats .num_ctx_tokens = self .model_engine .iter_states [
13951397 'num_ctx_tokens' ]
@@ -1862,7 +1864,17 @@ def _update_request_states_tp(self, scheduled_requests: ScheduledRequests):
18621864 request .context_chunk_size )
18631865 request .move_to_next_context_chunk ()
18641866 if request .context_remaining_length == 0 :
1865- request .state = LlmRequestState .GENERATION_IN_PROGRESS
1867+ if not self .disable_overlap_scheduler and request .will_complete_next_iteration (
1868+ ):
1869+ request .state = LlmRequestState .GENERATION_TO_COMPLETE
1870+ else :
1871+ request .state = LlmRequestState .GENERATION_IN_PROGRESS
1872+
1873+ for request in scheduled_requests .generation_requests :
1874+ if request .state != LlmRequestState .GENERATION_COMPLETE :
1875+ if not self .disable_overlap_scheduler and request .will_complete_next_iteration (
1876+ ):
1877+ request .state = LlmRequestState .GENERATION_TO_COMPLETE
18661878
18671879 def _update_request_states_star_attention (
18681880 self , scheduled_requests : ScheduledRequests ):
0 commit comments