@@ -156,14 +156,11 @@ func NewDirectorWithConfig(
156156 config * Config ,
157157) * Director {
158158 return & Director {
159- datastore : datastore ,
160- scheduler : scheduler ,
161- admissionController : admissionController ,
162- preRequestPlugins : config .preRequestPlugins ,
163- postResponsePlugins : config .postResponsePlugins ,
164- postResponseChunkPlugins : config .postResponseChunkPlugins ,
165- postResponseCompletePlugins : config .postResponseCompletePlugins ,
166- defaultPriority : 0 , // define default priority explicitly
159+ datastore : datastore ,
160+ scheduler : scheduler ,
161+ admissionController : admissionController ,
162+ requestControlPlugins : * config ,
163+ defaultPriority : 0 , // define default priority explicitly
167164 }
168165}
169166
@@ -177,13 +174,10 @@ func NewDirectorWithConfig(
177174// - Preparing the request context for the Envoy ext_proc filter to route the request.
178175// - Running PostResponse plugins.
179176type Director struct {
180- datastore Datastore
181- scheduler Scheduler
182- admissionController AdmissionController
183- preRequestPlugins []PreRequest
184- postResponsePlugins []PostResponse
185- postResponseChunkPlugins []PostResponseChunk
186- postResponseCompletePlugins []PostResponseComplete
177+ datastore Datastore
178+ scheduler Scheduler
179+ admissionController AdmissionController
180+ requestControlPlugins Config
187181 // we just need a pointer to an int variable since priority is a pointer in InferenceObjective
188182 // no need to set this in the constructor, since the value we want is the default int val
189183 // and value types cannot be nil
@@ -391,36 +385,47 @@ func (d *Director) toSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []sch
391385 return pm
392386}
393387
394- // HandleResponseHeaders is called when the first chunk of the response arrives.
395- func (d * Director ) HandleResponse (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
396- logger := log .FromContext (ctx ).WithValues ("stage" , "headers" )
397- logger .V (logutil .DEBUG ).Info ("Entering HandleResponseHeaders" )
388+ // HandleResponseReceived is called when the response headers are received.
389+ func (d * Director ) HandleResponseReceived (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
390+ response := & Response {
391+ RequestId : reqCtx .Request .Headers [requtil .RequestIdHeaderKey ],
392+ Headers : reqCtx .Response .Headers ,
393+ }
398394
399- d .runPostResponsePlugins (ctx , reqCtx )
395+ // TODO: to extend fallback functionality, handle cases where target pod is unavailable
396+ // https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/1224
397+ d .runResponseReceivedPlugins (ctx , reqCtx .SchedulingRequest , response , reqCtx .TargetPod )
400398
401- logger .V (logutil .DEBUG ).Info ("Exiting HandleResponseHeaders" )
402399 return reqCtx , nil
403400}
404401
405- func (d * Director ) HandleResponseBodyChunk (ctx context.Context , reqCtx * handlers.RequestContext ) error {
402+ // HandleResponseBodyStreaming is called every time a chunk of the response body is received.
403+ func (d * Director ) HandleResponseBodyStreaming (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
406404 logger := log .FromContext (ctx ).WithValues ("stage" , "bodyChunk" )
407405 logger .V (logutil .TRACE ).Info ("Entering HandleResponseBodyChunk" )
406+ response := & Response {
407+ RequestId : reqCtx .Request .Headers [requtil .RequestIdHeaderKey ],
408+ Headers : reqCtx .Response .Headers ,
409+ }
408410
409- d .runPostResponseChunkPlugins (ctx , reqCtx )
411+ d .runResponseStreamingPlugins (ctx , reqCtx . SchedulingRequest , response , reqCtx . TargetPod )
410412 logger .V (logutil .TRACE ).Info ("Exiting HandleResponseBodyChunk" )
411- return nil
413+ return reqCtx , nil
412414}
413415
414416// HandleResponseBodyComplete is called when the response body is fully received.
415- // It runs the PostResponseComplete plugins.
416- func (d * Director ) HandleResponseBodyComplete (ctx context.Context , reqCtx * handlers.RequestContext ) error {
417+ func (d * Director ) HandleResponseBodyComplete (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
417418 logger := log .FromContext (ctx ).WithValues ("stage" , "bodyChunk" )
418419 logger .V (logutil .DEBUG ).Info ("Entering HandleResponseBodyComplete" )
420+ response := & Response {
421+ RequestId : reqCtx .Request .Headers [requtil .RequestIdHeaderKey ],
422+ Headers : reqCtx .Response .Headers ,
423+ }
419424
420- d .runPostResponseCompletePlugins (ctx , reqCtx )
425+ d .runResponseCompletePlugins (ctx , reqCtx . SchedulingRequest , response , reqCtx . TargetPod )
421426
422427 logger .V (logutil .DEBUG ).Info ("Exiting HandleResponseBodyComplete" )
423- return nil
428+ return reqCtx , nil
424429}
425430
426431func (d * Director ) GetRandomPod () * backend.Pod {
@@ -436,43 +441,44 @@ func (d *Director) GetRandomPod() *backend.Pod {
436441func (d * Director ) runPreRequestPlugins (ctx context.Context , request * schedulingtypes.LLMRequest ,
437442 schedulingResult * schedulingtypes.SchedulingResult , targetPort int ) {
438443 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
439- for _ , plugin := range d .preRequestPlugins {
440- loggerDebug .Info ("Running pre-request plugin" , "plugin" , plugin .TypedName ())
444+ for _ , plugin := range d .requestControlPlugins . preRequestPlugins {
445+ loggerDebug .Info ("Running PreRequest plugin" , "plugin" , plugin .TypedName ())
441446 before := time .Now ()
442447 plugin .PreRequest (ctx , request , schedulingResult , targetPort )
443448 metrics .RecordPluginProcessingLatency (PreRequestExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
444- loggerDebug .Info ("Completed running pre-request plugin successfully" , "plugin" , plugin .TypedName ())
449+ loggerDebug .Info ("Completed running PreRequest plugin successfully" , "plugin" , plugin .TypedName ())
445450 }
446451}
447452
448- func (d * Director ) runPostResponsePlugins (ctx context.Context , reqCtx * handlers. RequestContext ) {
453+ func (d * Director ) runResponseReceivedPlugins (ctx context.Context , request * schedulingtypes. LLMRequest , response * Response , targetPod * backend. Pod ) {
449454 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
450- for _ , plugin := range d .postResponsePlugins {
451- loggerDebug .Info ("Running post-response plugin" , "plugin" , plugin .TypedName ())
455+ for _ , plugin := range d .requestControlPlugins . responseReceivedPlugins {
456+ loggerDebug .Info ("Running ResponseReceived plugin" , "plugin" , plugin .TypedName ())
452457 before := time .Now ()
453- plugin .PostResponse (ctx , reqCtx )
454- metrics .RecordPluginProcessingLatency (PostResponseExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
455- loggerDebug .Info ("Completed running post-response plugin successfully" , "plugin" , plugin .TypedName ())
458+ plugin .ResponseReceived (ctx , request , response , targetPod )
459+ metrics .RecordPluginProcessingLatency (ResponseReceivedExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
460+ loggerDebug .Info ("Completed running ResponseReceived plugin successfully" , "plugin" , plugin .TypedName ())
456461 }
457462}
458463
459- func (d * Director ) runPostResponseChunkPlugins (ctx context.Context , reqCtx * handlers. RequestContext ) {
464+ func (d * Director ) runResponseStreamingPlugins (ctx context.Context , request * schedulingtypes. LLMRequest , response * Response , targetPod * backend. Pod ) {
460465 loggerTrace := log .FromContext (ctx ).V (logutil .TRACE )
461- for _ , plugin := range d .postResponseChunkPlugins {
462- loggerTrace .Info ("Running post-response chunk plugin" , "plugin" , plugin .TypedName (). Type )
466+ for _ , plugin := range d .requestControlPlugins . responseStreamingPlugins {
467+ loggerTrace .Info ("Running ResponseStreaming plugin" , "plugin" , plugin .TypedName ())
463468 before := time .Now ()
464- plugin .PostResponseChunk (ctx , reqCtx )
465- metrics .RecordPluginProcessingLatency (PostResponseChunkExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
469+ plugin .ResponseStreaming (ctx , request , response , targetPod )
470+ metrics .RecordPluginProcessingLatency (ResponseStreamingExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
471+ loggerTrace .Info ("Completed running ResponseStreaming plugin successfully" , "plugin" , plugin .TypedName ())
466472 }
467473}
468474
469- func (d * Director ) runPostResponseCompletePlugins (ctx context.Context , reqCtx * handlers. RequestContext ) {
475+ func (d * Director ) runResponseCompletePlugins (ctx context.Context , request * schedulingtypes. LLMRequest , response * Response , targetPod * backend. Pod ) {
470476 loggerDebug := log .FromContext (ctx ).V (logutil .DEBUG )
471- for _ , plugin := range d .postResponseCompletePlugins {
472- loggerDebug .Info ("Running post-response complete plugin" , "plugin" , plugin .TypedName (). Type )
477+ for _ , plugin := range d .requestControlPlugins . responseCompletePlugins {
478+ loggerDebug .Info ("Running ResponseComplete plugin" , "plugin" , plugin .TypedName ())
473479 before := time .Now ()
474- plugin .PostResponseComplete (ctx , reqCtx )
475- metrics .RecordPluginProcessingLatency (PostResponseCompleteExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
476- loggerDebug .Info ("Completed running post-response complete plugin successfully" , "plugin" , plugin .TypedName ())
480+ plugin .ResponseComplete (ctx , request , response , targetPod )
481+ metrics .RecordPluginProcessingLatency (ResponseCompleteExtensionPoint , plugin .TypedName ().Type , plugin .TypedName ().Name , time .Since (before ))
482+ loggerDebug .Info ("Completed running ResponseComplete plugin successfully" , "plugin" , plugin .TypedName ())
477483 }
478484}
0 commit comments