@@ -144,7 +144,7 @@ def _prepare_pad_input_ids(
144144            seq_len  =  input_ids_i .size (0 )
145145            if  max_len  >  seq_len :
146146                logger .info (
147-                     "Left padding request ofla  length %d tokens to %d tokens." ,
147+                     "Left padding request of  length %d tokens to %d tokens." ,
148148                    seq_len , max_len )
149149            pads  =  torch .ones (max_len  -  seq_len ,
150150                              dtype = torch .long ,
@@ -220,9 +220,7 @@ def complete_warmup(self):
220220        """Turn off warmup mode once the warmup is complete""" 
221221        self .warmup_mode  =  False 
222222
223-     def  _update_states (
224-             self ,
225-             scheduler_output : SchedulerOutput ):
223+     def  _update_states (self , scheduler_output : SchedulerOutput ):
226224        # Update the states of the running/resumed requests. 
227225        # Update input_batch's `token_ids_cpu`, 
228226        # `num_tokens`. For continuous batching it cleans 
@@ -587,9 +585,9 @@ def __init__(
587585        # TODO: Remove this once we can prefill and decode 
588586        # in the same step 
589587        self .prefill_batch  =  InputBatch (
590-             # TODO: review this, currently we only support prefill for   
588+             # TODO: review this, currently we only support prefill for 
591589            # `batch_size=1` 
592-             max_num_reqs = 1 ,   
590+             max_num_reqs = 1 ,
593591            max_model_len = vllm_config .model_config .max_model_len ,
594592            device = self .device ,
595593            pin_memory = self .pin_memory ,
@@ -598,11 +596,11 @@ def __init__(
598596
599597        # Requests 
600598        self .requests : dict [str , CachedRequestData ] =  {}
601-          
599+ 
602600    def  _update_states (self , scheduler_output ):
603-          
601+ 
604602        super ()._update_states (scheduler_output )
605-          
603+ 
606604        # Continuous batching stuff 
607605        for  req_id  in  scheduler_output .finished_req_ids :
608606            if  req_id  in  self .req_ids2blocks :
@@ -611,7 +609,7 @@ def _update_states(self, scheduler_output):
611609                    self .free_blocks .append (freed_block )
612610                del  self .req_ids2blocks [req_id ]
613611                del  self .req_ids2left_pads [req_id ]
614-          
612+ 
615613        [self .input_batch .remove_request (req_id ) \
616614            for  req_id  in  scheduler_output .finished_req_ids ]
617615
@@ -636,9 +634,6 @@ def _prepare_prompt(
636634        # Internal state is managed here. 
637635        slot_mapping  =  []
638636
639-         # TODO: we are deactivating all, because we 
640-         # only encode or prefill at time. 
641-         # self.input_batch.deactivate_all_requests() 
642637        self .prefill_batch .clear_requests ()
643638
644639        for  request_data  in  new_requests :
0 commit comments