@@ -130,7 +130,13 @@ type Director struct {
130130 postResponsePlugins []PostResponse
131131}
132132
133- // HandleRequest orchestrates the request lifecycle.
133+ // HandleRequest orchestrates the request lifecycle:
134+ // 1. Parses request details.
135+ // 2. Calls admitRequest for admission control.
136+ // 3. Calls Scheduler.Schedule if request is approved.
137+ // 4. Calls prepareRequest to populate RequestContext with result and call PreRequest plugins.
138+ //
139+ // It always returns the requestContext even in the error case, as the request context is used in error handling.
134140func (d * Director ) HandleRequest (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
135141 logger := log .FromContext (ctx )
136142
@@ -197,13 +203,15 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
197203 if len (candidatePods ) == 0 {
198204 return reqCtx , errutil.Error {Code : errutil .ServiceUnavailable , Msg : "failed to find candidate pods for serving the request" }
199205 }
200- results , err := d .scheduler .Schedule (ctx , reqCtx .SchedulingRequest , candidatePods )
206+ result , err := d .scheduler .Schedule (ctx , reqCtx .SchedulingRequest , candidatePods )
201207 if err != nil {
202208 return reqCtx , errutil.Error {Code : errutil .InferencePoolResourceExhausted , Msg : fmt .Errorf ("failed to find target pod: %w" , err ).Error ()}
203209 }
204210
205- // --- 4. Prepare Request ---
206- reqCtx , err = d .prepareRequest (ctx , reqCtx , results )
211+ // --- 4. Prepare Request (Populates RequestContext and call PreRequest plugins) ---
212+ // Insert target endpoint to instruct Envoy to route requests to the specified target pod and attach the port number.
213+ // Invoke PreRequest registered plugins.
214+ reqCtx , err = d .prepareRequest (ctx , reqCtx , result )
207215 if err != nil {
208216 return reqCtx , err
209217 }
@@ -279,7 +287,7 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet
279287}
280288
281289// prepareRequest populates the RequestContext and calls the registered PreRequest plugins
282- // for allowing plugging customized logic based on the scheduling results .
290+ // for allowing plugging customized logic based on the scheduling result .
283291func (d * Director ) prepareRequest (ctx context.Context , reqCtx * handlers.RequestContext , result * schedulingtypes.SchedulingResult ) (* handlers.RequestContext , error ) {
284292 if result == nil || len (result .ProfileResults ) == 0 {
285293 return reqCtx , errutil.Error {Code : errutil .Internal , Msg : "empty scheduling results" }
@@ -657,7 +665,8 @@ func RandomWeightedDraw(logger logr.Logger, model *v1alpha2.InferenceModel, seed
657665}
658666
659667func (d * Director ) runPreRequestPlugins (ctx context.Context , request * schedulingtypes.LLMRequest , schedulingResult * schedulingtypes.SchedulingResult ,
660- targetPort int ) {
668+ targetPort int ,
669+ ) {
661670 for _ , plugin := range d .preRequestPlugins {
662671 log .FromContext (ctx ).V (logutil .DEBUG ).Info ("Running pre-request plugin" , "plugin" , plugin .TypedName ().Type )
663672 before := time .Now ()
0 commit comments