1515from vllm .transformers_utils .tokenizer_group import TokenizerGroup
1616from vllm .v1 .engine import EngineCoreOutput , EngineCoreRequest , FinishReason
1717from vllm .v1 .engine .detokenizer import IncrementalDetokenizer
18+ from vllm .v1 .engine .hidden_states import HiddenStatesProcessor
1819from vllm .v1 .engine .logprobs import LogprobsProcessor
1920from vllm .v1 .engine .parallel_sampling import ParentRequest
2021from vllm .v1 .metrics .stats import (IterationStats , LoRARequestStates ,
@@ -93,6 +94,7 @@ def __init__(
9394 arrival_time : float ,
9495 queue : Optional [RequestOutputCollector ],
9596 log_stats : bool ,
97+ hidden_states_processor : Optional [HiddenStatesProcessor ],
9698 ):
9799 self .request_id = request_id
98100 self .parent_req = parent_req
@@ -111,6 +113,7 @@ def __init__(
111113
112114 self .stats = RequestStateStats (
113115 arrival_time = arrival_time ) if log_stats else None
116+ self .hidden_states_processor = hidden_states_processor
114117
115118 @classmethod
116119 def from_new_request (
@@ -137,10 +140,12 @@ def from_new_request(
137140 request = request ,
138141 )
139142 max_tokens_param = sampling_params .max_tokens
143+ hidden_states_processor = HiddenStatesProcessor .from_new_request ()
140144 else :
141145 logprobs_processor = None
142146 detokenizer = None
143147 max_tokens_param = None
148+ hidden_states_processor = None
144149 assert request .pooling_params is not None
145150 output_kind = request .pooling_params .output_kind
146151
@@ -159,6 +164,7 @@ def from_new_request(
159164 arrival_time = request .arrival_time ,
160165 queue = queue ,
161166 log_stats = log_stats ,
167+ hidden_states_processor = hidden_states_processor ,
162168 )
163169
164170 def make_request_output (
@@ -204,7 +210,7 @@ def _new_request_output(
204210 finished : bool ,
205211 kv_transfer_params : Optional [dict [str , Any ]] = None ,
206212 ) -> Union [RequestOutput , PoolingRequestOutput ]:
207-
213+ # Seeems here to process outputs
208214 first_output = outputs [0 ]
209215 if isinstance (first_output , PoolingOutput ):
210216 assert len (outputs ) == 1
@@ -215,17 +221,23 @@ def _new_request_output(
215221 finished = finished ,
216222 )
217223 assert self .logprobs_processor is not None
224+ assert self .hidden_states_processor is not None
218225 if self .output_kind == RequestOutputKind .DELTA :
219226 # Side effect: logprobs processor forgets prompt logprobs
220227 prompt_logprobs = self .logprobs_processor .pop_prompt_logprobs ()
228+ prompt_hidden_states = self .hidden_states_processor .pop_prompt_hidden_states (
229+ )
221230 else :
222231 prompt_logprobs = self .logprobs_processor .prompt_logprobs
232+ prompt_hidden_states = self .hidden_states_processor .prompt_hidden_states
223233
234+ # prompt logprobs is added here
224235 return RequestOutput (
225236 request_id = request_id ,
226237 prompt = self .prompt ,
227238 prompt_token_ids = self .prompt_token_ids ,
228239 prompt_logprobs = prompt_logprobs ,
240+ prompt_hidden_states = prompt_hidden_states ,
229241 outputs = cast (list [CompletionOutput ], outputs ),
230242 finished = finished ,
231243 kv_transfer_params = kv_transfer_params ,
@@ -399,6 +411,7 @@ def process_outputs(
399411 kv_transfer_params = engine_core_output .kv_transfer_params
400412 req_state .num_cached_tokens = engine_core_output .num_cached_tokens
401413 req_state .is_prefilling = False
414+ prompt_hidden_states = engine_core_output .prompt_hidden_states
402415
403416 if pooling_output is None :
404417 assert req_state .detokenizer is not None
@@ -414,8 +427,12 @@ def process_outputs(
414427 # if required.
415428 req_state .logprobs_processor .update_from_output (
416429 engine_core_output )
430+ assert req_state .hidden_states_processor is not None
431+ req_state .hidden_states_processor .update_from_output (
432+ engine_core_output )
417433
418434 # 4) Create and handle RequestOutput objects.
435+ print ("lxy here make_request_output" , prompt_hidden_states is None )
419436 if request_output := req_state .make_request_output (
420437 new_token_ids , pooling_output , finish_reason , stop_reason ,
421438 kv_transfer_params ):
0 commit comments