diff --git a/pkg/epp/scheduling/plugins/filter/filter_test.go b/pkg/epp/scheduling/plugins/filter/filter_test.go index 1818c5209..d78452a62 100644 --- a/pkg/epp/scheduling/plugins/filter/filter_test.go +++ b/pkg/epp/scheduling/plugins/filter/filter_test.go @@ -131,8 +131,9 @@ func TestFilter(t *testing.T) { }, }, { - name: "lowQueueAndLessThanKVCacheThresholdPredicate", - filter: &HasCapacityFilter{queueThreshold: 0, kvCacheThreshold: 0.8}, + name: "SheddableCapacityFilter, sheddable request", + req: &types.LLMRequest{Critical: false}, + filter: &SheddableCapacityFilter{queueThreshold: 0, kvCacheThreshold: 0.8}, input: []types.Pod{ &types.PodMetrics{ // This pod should be returned. diff --git a/pkg/epp/scheduling/plugins/filter/has_capacity_filter.go b/pkg/epp/scheduling/plugins/filter/sheddable_capacity_filter.go similarity index 67% rename from pkg/epp/scheduling/plugins/filter/has_capacity_filter.go rename to pkg/epp/scheduling/plugins/filter/sheddable_capacity_filter.go index e6ff2ebfb..5a298a022 100644 --- a/pkg/epp/scheduling/plugins/filter/has_capacity_filter.go +++ b/pkg/epp/scheduling/plugins/filter/sheddable_capacity_filter.go @@ -23,29 +23,33 @@ import ( ) // compile-time type validation -var _ plugins.Filter = &HasCapacityFilter{} +var _ plugins.Filter = &SheddableCapacityFilter{} -// NewHasCapacityFilter returns a new HasCapacityFilter. -func NewHasCapacityFilter() *HasCapacityFilter { - return &HasCapacityFilter{ +// NewSheddableCapacityFilter returns a new SheddableCapacityFilter. +func NewSheddableCapacityFilter() *SheddableCapacityFilter { + return &SheddableCapacityFilter{ queueThreshold: config.Conf.QueueThresholdCritical, kvCacheThreshold: config.Conf.KVCacheThreshold, } } -// HasCapacityFilter filters only pods that has capacity for sheddable requests. -type HasCapacityFilter struct { +// SheddableCapacityFilter filters only pods that has capacity for sheddable requests. +type SheddableCapacityFilter struct { queueThreshold int kvCacheThreshold float64 } // Name returns the name of the filter. -func (f *HasCapacityFilter) Name() string { - return "has-capacity" +func (f *SheddableCapacityFilter) Name() string { + return "sheddable-capacity" } // Filter filters out pods that doesn't meet the filter criteria. -func (f *HasCapacityFilter) Filter(ctx *types.SchedulingContext, pods []types.Pod) []types.Pod { +func (f *SheddableCapacityFilter) Filter(ctx *types.SchedulingContext, pods []types.Pod) []types.Pod { + if ctx.Req.Critical { + return pods // // Allow all pods to passthrough if the request is critical, even if all pods reach their capacity. + } + filteredPods := []types.Pod{} for _, pod := range pods { diff --git a/pkg/epp/scheduling/plugins/filter/sheddable_request_filter.go b/pkg/epp/scheduling/plugins/filter/sheddable_request_filter.go deleted file mode 100644 index 7eb6326be..000000000 --- a/pkg/epp/scheduling/plugins/filter/sheddable_request_filter.go +++ /dev/null @@ -1,53 +0,0 @@ -/* -Copyright 2025 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package filter - -import ( - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins" - "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" -) - -// compile-time type validation -var _ plugins.Filter = &SheddableRequestFilter{} - -// NewSheddableRequestFilter returns a new SheddableRequestFilter. -func NewSheddableRequestFilter() *SheddableRequestFilter { - return &SheddableRequestFilter{ - hasCapacityFilter: NewHasCapacityFilter(), - } -} - -// LowQueueFilter returns pods that their waiting queue size is less than a configured threshold -type SheddableRequestFilter struct { - hasCapacityFilter *HasCapacityFilter -} - -// Name returns the name of the filter. -func (f *SheddableRequestFilter) Name() string { - return "sheddable-request" -} - -// Filter filters out pods that doesn't meet the filter criteria. -func (f *SheddableRequestFilter) Filter(ctx *types.SchedulingContext, pods []types.Pod) []types.Pod { - if ctx.Req.Critical { - // Allow all pods to pass through if the request is critical, even if all pods reach their capacity. - return pods - } - - // Only allow pods that have enough capacity to handle the request. - return f.hasCapacityFilter.Filter(ctx, pods) -} diff --git a/pkg/epp/scheduling/scheduler.go b/pkg/epp/scheduling/scheduler.go index b96730437..2e85619af 100644 --- a/pkg/epp/scheduling/scheduler.go +++ b/pkg/epp/scheduling/scheduler.go @@ -66,7 +66,7 @@ func NewScheduler(datastore Datastore) *Scheduler { defaultConfig := &SchedulerConfig{ preSchedulePlugins: []plugins.PreSchedule{}, - filters: []plugins.Filter{filter.NewSheddableRequestFilter(), lowLatencyFilter}, + filters: []plugins.Filter{filter.NewSheddableCapacityFilter(), lowLatencyFilter}, scorers: map[plugins.Scorer]int{}, picker: &picker.RandomPicker{}, postSchedulePlugins: []plugins.PostSchedule{},