kubernetes-sigs · k8s-ci-robot · May 23, 2025 · May 18, 2025 · May 18, 2025 · May 19, 2025
diff --git a/cmd/epp/main.go b/cmd/epp/main.go
@@ -44,10 +44,12 @@ import (
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics/collectors"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/multi/prefix"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/scorer"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/filter"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker"
+	profilepicker "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile-picker"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/scorer"
 	runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
 	envutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/env"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -196,20 +198,21 @@ func run() error {
 		queueScorerWeight := envutil.GetEnvInt("QUEUE_SCORE_WEIGHT", scorer.DefaultQueueScorerWeight, setupLog)
 		kvCacheScorerWeight := envutil.GetEnvInt("KV_CACHE_SCORE_WEIGHT", scorer.DefaultKVCacheScorerWeight, setupLog)
 
-		schedulerConfig := scheduling.NewSchedulerConfig().
+		schedulerProfile := framework.NewSchedulerProfile().
 			WithFilters(filter.NewSheddableCapacityFilter()).
-			WithScorers(scorer.NewWeightedScorer(&scorer.QueueScorer{}, queueScorerWeight),
-				scorer.NewWeightedScorer(&scorer.KVCacheScorer{}, kvCacheScorerWeight)).
+			WithScorers(framework.NewWeightedScorer(&scorer.QueueScorer{}, queueScorerWeight),
+				framework.NewWeightedScorer(&scorer.KVCacheScorer{}, kvCacheScorerWeight)).
 			WithPicker(picker.NewMaxScorePicker())
 
 		if prefixCacheScheduling == "true" {
 			prefixScorerWeight := envutil.GetEnvInt("PREFIX_CACHE_SCORE_WEIGHT", prefix.DefaultScorerWeight, setupLog)
-			if err := schedulerConfig.AddPlugins(scorer.NewWeightedScorer(prefix.New(loadPrefixCacheConfig()), prefixScorerWeight)); err != nil {
+			if err := schedulerProfile.AddPlugins(framework.NewWeightedScorer(prefix.New(loadPrefixCacheConfig()), prefixScorerWeight)); err != nil {
 				setupLog.Error(err, "Failed to register scheduler plugins")
 				return err
 			}
 		}
 
+		schedulerConfig := scheduling.NewSchedulerConfig(profilepicker.NewAllProfilesPicker(), map[string]*framework.SchedulerProfile{"schedulerv2": schedulerProfile})
 		scheduler = scheduling.NewSchedulerWithConfig(datastore, schedulerConfig)
 	}
 	serverRunner := &runserver.ExtProcServerRunner{

diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go
@@ -35,7 +35,7 @@ import (
 )
 
 type Scheduler interface {
-	Schedule(ctx context.Context, b *schedulingtypes.LLMRequest) (result *schedulingtypes.Result, err error)
+	Schedule(ctx context.Context, b *schedulingtypes.LLMRequest) (result map[string]*schedulingtypes.Result, err error)
 	OnResponse(ctx context.Context, resp *schedulingtypes.LLMResponse, targetPodName string)
 }
 
@@ -108,23 +108,27 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
 }
 
 // Dispatch runs one or many scheduling cycles.
-func (d *Director) Dispatch(ctx context.Context, llmReq *schedulingtypes.LLMRequest) ([]*schedulingtypes.Result, error) {
+func (d *Director) Dispatch(ctx context.Context, llmReq *schedulingtypes.LLMRequest) (map[string]*schedulingtypes.Result, error) {
 	var err error
 	res, err := d.scheduler.Schedule(ctx, llmReq)
 	if err != nil {
 		return nil, errutil.Error{Code: errutil.InferencePoolResourceExhausted, Msg: fmt.Errorf("failed to find target pod: %w", err).Error()}
 	}
 
-	return []*schedulingtypes.Result{res}, nil
+	return res, nil // TODO handle multi cycle result after defining the PostDispatch extension point
 }
 
-func (d *Director) PostDispatch(ctx context.Context, reqCtx *handlers.RequestContext, results []*schedulingtypes.Result) (*handlers.RequestContext, error) {
+func (d *Director) PostDispatch(ctx context.Context, reqCtx *handlers.RequestContext, results map[string]*schedulingtypes.Result) (*handlers.RequestContext, error) {
 	logger := log.FromContext(ctx)
 	// currently only get a single result. Will refactor to pluggably implement the PostSchedule
 	if len(results) == 0 {
 		return reqCtx, errutil.Error{Code: errutil.Internal, Msg: "results must be greater than zero"}
 	}
-	targetPod := results[0].TargetPod.GetPod()
+	var targetPod *backend.Pod
+	// TODO should handle multi cycle results, this should be pluggable logic
+	for _, result := range results {
+		targetPod = result.TargetPod.GetPod()
+	}
 
 	pool, err := d.datastore.PoolGet()
 	if err != nil {

diff --git a/pkg/epp/scheduling/plugins/plugins.go → pkg/epp/scheduling/framework/plugins.go b/pkg/epp/scheduling/plugins/plugins.go → pkg/epp/scheduling/framework/plugins.go
@@ -14,18 +14,19 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-package plugins
+package framework
 
 import (
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 )
 
 const (
-	PreSchedulerPluginType = "PreSchedule"
+	ProfilePickerType      = "ProfilePicker"
+	PreCyclePluginType     = "PreCycle"
 	FilterPluginType       = "Filter"
 	ScorerPluginType       = "Scorer"
-	PostSchedulePluginType = "PostSchedule"
 	PickerPluginType       = "Picker"
+	PostCyclePluginType    = "PostCycle"
 	PostResponsePluginType = "PostResponse"
 )
 
@@ -36,11 +37,18 @@ type Plugin interface {
 	Name() string
 }
 
-// PreSchedule is called when the scheduler receives a new request. It can be used for various
-// initialization work.
-type PreSchedule interface {
+// ProfilePicker selects the SchedulingProfiles to run from a list of candidate profiles, while taking into consideration the request properties
+// and the previously executed SchedluderProfile cycles along with their results.
+type ProfilePicker interface {
 	Plugin
-	PreSchedule(ctx *types.SchedulingContext)
+	Pick(request *types.LLMRequest, profiles map[string]*SchedulerProfile, executionResults map[string]*types.Result) map[string]*SchedulerProfile
+}
+
+// PreCycle is called when the scheduler receives a new request and invokes a SchedulerProfile cycle.
+// It can be used for various initialization work.
+type PreCycle interface {
+	Plugin
+	PreCycle(ctx *types.SchedulingContext)
 }
 
 // Filter defines the interface for filtering a list of pods based on context.
@@ -62,10 +70,10 @@ type Picker interface {
 	Pick(ctx *types.SchedulingContext, scoredPods []*types.ScoredPod) *types.Result
 }
 
-// PostSchedule is called by the scheduler after it selects a targetPod for the request.
-type PostSchedule interface {
+// PostCycle is called by the scheduler after it selects a targetPod for the request in the SchedulerProfile cycle.
+type PostCycle interface {
 	Plugin
-	PostSchedule(ctx *types.SchedulingContext, res *types.Result)
+	PostCycle(ctx *types.SchedulingContext, res *types.Result)
 }
 
 // PostResponse is called by the scheduler after a successful response was sent.

diff --git a/pkg/epp/scheduling/framework/plugins/README.md b/pkg/epp/scheduling/framework/plugins/README.md
@@ -0,0 +1,15 @@
+# Scheduling Plugins
+
+This package contains the scheduling plugin implementations.
+
+Plugins are organized by the following rule. Follow this rule when adding a new
+plugin.
+
+```
+plugins/
+|__ filter/(Plugins that implement the Filter interface only.)
+|__ scorer/ (Plugins that implement the Scorer interface only.)
+|__ picker/(Plugins that implement the Picker interface only.)
+|__ multi/ (Plugins that implement multiple plugin interfaces.)
+|____prefix/ (Prefix cache aware scheduling plugin.)
+```
diff --git a/...ng/plugins/filter/decision_tree_filter.go → ...rk/plugins/filter/decision_tree_filter.go b/...ng/plugins/filter/decision_tree_filter.go → ...rk/plugins/filter/decision_tree_filter.go
@@ -17,31 +17,31 @@ limitations under the License.
 package filter
 
 import (
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
 
-// compile-time type validation
-var _ plugins.Filter = &DecisionTreeFilter{}
+// compile-time type assertion
+var _ framework.Filter = &DecisionTreeFilter{}
 
 // DecisionTreeFilter applies current fitler, and then recursively applies next filters
 // depending success or failure of the current filter.
 // It can be used to construct a flow chart algorithm.
 type DecisionTreeFilter struct {
-	Current plugins.Filter
+	Current framework.Filter
 	// NextOnSuccess filter will be applied after successfully applying the current filter.
 	// The filtered results will be passed to the next filter.
-	NextOnSuccess plugins.Filter
+	NextOnSuccess framework.Filter
 	// NextOnFailure filter will be applied if current filter results in no pods.
 	// The original input will be passed to the next filter.
-	NextOnFailure plugins.Filter
+	NextOnFailure framework.Filter
 	// NextOnSuccessOrFailure is a convenience field to configure the next filter regardless of the
 	// success or failure of the current filter.
 	// NOTE: When using NextOnSuccessOrFailure, both nextOnSuccess and nextOnFailure SHOULD be nil.
 	// However if that's not the case, nextOnSuccess and nextOnFailure will be used, instead of
 	// NextOnSuccessOrFailure, in the success and failure scenarios, respectively.
-	NextOnSuccessOrFailure plugins.Filter
+	NextOnSuccessOrFailure framework.Filter
 }
 
 // Name returns the name of the filter.

diff --git a/.../scheduling/plugins/filter/filter_test.go → ...g/framework/plugins/filter/filter_test.go b/.../scheduling/plugins/filter/filter_test.go → ...g/framework/plugins/filter/filter_test.go
@@ -26,10 +26,13 @@ import (
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
 	backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 )
 
+// compile-time type assertion
+var _ framework.Filter = &filterAll{}
+
 type filterAll struct{}
 
 func (f *filterAll) Name() string {
@@ -44,7 +47,7 @@ func TestFilter(t *testing.T) {
 	tests := []struct {
 		name   string
 		req    *types.LLMRequest
-		filter plugins.Filter
+		filter framework.Filter
 		input  []types.Pod
 		output []types.Pod
 	}{

diff --git a/...ng/plugins/filter/least_kvcache_filter.go → ...rk/plugins/filter/least_kvcache_filter.go b/...ng/plugins/filter/least_kvcache_filter.go → ...rk/plugins/filter/least_kvcache_filter.go
@@ -19,12 +19,12 @@ package filter
 import (
 	"math"
 
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 )
 
-// compile-time type validation
-var _ plugins.Filter = &LeastKVCacheFilter{}
+// compile-time type assertion
+var _ framework.Filter = &LeastKVCacheFilter{}
 
 // NewLeastKVCacheFilter initializes a new LeastKVCacheFilter and returns its pointer.
 func NewLeastKVCacheFilter() *LeastKVCacheFilter {

diff --git a/...ling/plugins/filter/least_queue_filter.go → ...work/plugins/filter/least_queue_filter.go b/...ling/plugins/filter/least_queue_filter.go → ...work/plugins/filter/least_queue_filter.go
@@ -19,12 +19,12 @@ package filter
 import (
 	"math"
 
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 )
 
-// compile-time type validation
-var _ plugins.Filter = &LeastQueueFilter{}
+// compile-time type assertion
+var _ framework.Filter = &LeastQueueFilter{}
 
 // NewLeastQueueFilter initializes a new LeastQueueFilter and returns its pointer.
 func NewLeastQueueFilter() *LeastQueueFilter {

diff --git a/...ng/plugins/filter/lora_affinity_filter.go → ...rk/plugins/filter/lora_affinity_filter.go b/...ng/plugins/filter/lora_affinity_filter.go → ...rk/plugins/filter/lora_affinity_filter.go
@@ -21,12 +21,12 @@ import (
 	"time"
 
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 )
 
-// compile-time type validation
-var _ plugins.Filter = &LoraAffinityFilter{}
+// compile-time type assertion
+var _ framework.Filter = &LoraAffinityFilter{}
 
 // NewLoraAffinityFilter initializes a new LoraAffinityFilter and returns its pointer.
 func NewLoraAffinityFilter() *LoraAffinityFilter {

diff --git a/...duling/plugins/filter/low_queue_filter.go → ...mework/plugins/filter/low_queue_filter.go b/...duling/plugins/filter/low_queue_filter.go → ...mework/plugins/filter/low_queue_filter.go
@@ -18,12 +18,12 @@ package filter
 
 import (
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 )
 
-// compile-time type validation
-var _ plugins.Filter = &LowQueueFilter{}
+// compile-time type assertion
+var _ framework.Filter = &LowQueueFilter{}
 
 // NewLowQueueFilter initializes a new LowQueueFilter and returns its pointer.
 func NewLowQueueFilter() *LowQueueFilter {

diff --git a/...ugins/filter/sheddable_capacity_filter.go → ...ugins/filter/sheddable_capacity_filter.go b/...ugins/filter/sheddable_capacity_filter.go → ...ugins/filter/sheddable_capacity_filter.go
@@ -18,12 +18,12 @@ package filter
 
 import (
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/config"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 )
 
-// compile-time type validation
-var _ plugins.Filter = &SheddableCapacityFilter{}
+// compile-time type assertion
+var _ framework.Filter = &SheddableCapacityFilter{}
 
 // NewSheddableCapacityFilter initializes a new SheddableCapacityFilter and returns its pointer.
 func NewSheddableCapacityFilter() *SheddableCapacityFilter {

diff --git a/...cheduling/plugins/multi/prefix/indexer.go → ...framework/plugins/multi/prefix/indexer.go b/...cheduling/plugins/multi/prefix/indexer.go → ...framework/plugins/multi/prefix/indexer.go
diff --git a/...ling/plugins/multi/prefix/indexer_test.go → ...work/plugins/multi/prefix/indexer_test.go b/...ling/plugins/multi/prefix/indexer_test.go → ...work/plugins/multi/prefix/indexer_test.go
diff --git a/...scheduling/plugins/multi/prefix/plugin.go → .../framework/plugins/multi/prefix/plugin.go b/...scheduling/plugins/multi/prefix/plugin.go → .../framework/plugins/multi/prefix/plugin.go
@@ -23,7 +23,7 @@ import (
 	"github.com/cespare/xxhash/v2"
 	k8stypes "k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
-	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
+	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
 	logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
 )
@@ -88,7 +88,7 @@ func (s ServerID) String() string {
 	return k8stypes.NamespacedName(s).String()
 }
 
-// compile-time type validation
+// compile-time type assertion
 var _ types.StateData = &schedulingContextState{}
 
 // This is the state of this plugin to be used during a scheduling cycle.
@@ -113,10 +113,10 @@ func (s *schedulingContextState) Clone() types.StateData {
 	}
 }
 
-// compile-time type validation
-var _ plugins.PreSchedule = &Plugin{}
-var _ plugins.Scorer = &Plugin{}
-var _ plugins.PostSchedule = &Plugin{}
+// compile-time type assertion
+var _ framework.PreCycle = &Plugin{}
+var _ framework.Scorer = &Plugin{}
+var _ framework.PostCycle = &Plugin{}
 
 // New initializes a new prefix Plugin and returns its pointer.
 func New(config Config) *Plugin {
@@ -132,20 +132,20 @@ func (m *Plugin) Name() string {
 	return "prefix-cache"
 }
 
-// PreSchedule initializes the prefix plugin state for the current scheduling cycle.
-func (m *Plugin) PreSchedule(ctx *types.SchedulingContext) {
+// PreCycle initializes the prefix plugin state for the current scheduling cycle.
+func (m *Plugin) PreCycle(ctx *types.SchedulingContext) {
 	hashes := hashPrompt(ctx, m.HashBlockSize, m.MaxPrefixBlocksToMatch)
 	state := &schedulingContextState{
 		PrefixHashes:       hashes,
 		PrefixCacheServers: m.matchLongestPrefix(ctx, hashes, DefaultNumServersToMatch),
 	}
 
 	ctx.CycleState.Write(types.StateKey(m.Name()), state)
-	ctx.Logger.V(logutil.TRACE).Info(fmt.Sprintf("PreSchedule, cached servers: %+v", state.PrefixCacheServers), "hashes", state.PrefixHashes)
+	ctx.Logger.V(logutil.TRACE).Info(fmt.Sprintf("PreCycle, cached servers: %+v", state.PrefixCacheServers), "hashes", state.PrefixHashes)
 }
 
-// PostSchedule records in the plugin cache the result of the scheduling selection.
-func (m *Plugin) PostSchedule(ctx *types.SchedulingContext, res *types.Result) {
+// PostCycle records in the plugin cache the result of the scheduling selection.
+func (m *Plugin) PostCycle(ctx *types.SchedulingContext, res *types.Result) {
 	targetPod := res.TargetPod.GetPod()
 	state, err := m.getPrefixState(ctx.CycleState)
 	if err != nil {