@@ -26,47 +26,11 @@ import (
2626 backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
2727 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
2828 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
29- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/filter"
30- "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/picker"
3129 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
3230 errutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/error"
3331 logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
3432)
3533
36- var (
37- lowLatencyFilter = & filter.DecisionTreeFilter {
38- Current : filter .LowQueueFilter ,
39- NextOnSuccess : & filter.DecisionTreeFilter {
40- Current : filter .LoRAAffinityFilter ,
41- NextOnSuccessOrFailure : & filter.DecisionTreeFilter {
42- Current : filter .LeastQueueFilter ,
43- NextOnSuccessOrFailure : & filter.DecisionTreeFilter {
44- Current : filter .LeastKVCacheFilter ,
45- },
46- },
47- },
48- NextOnFailure : & filter.DecisionTreeFilter {
49- Current : filter .LeastQueueFilter ,
50- NextOnSuccessOrFailure : & filter.DecisionTreeFilter {
51- Current : filter .LoRAAffinityFilter ,
52- NextOnSuccessOrFailure : & filter.DecisionTreeFilter {
53- Current : filter .LeastKVCacheFilter ,
54- },
55- },
56- },
57- }
58-
59- sheddableRequestFilter = & filter.DecisionTreeFilter {
60- // When there is at least one model server that's not queuing requests, and still has KV
61- // cache below a certain threshold, we consider this model server has capacity to handle
62- // a sheddable request without impacting critical requests.
63- Current : filter .HasCapacityFilter ,
64- NextOnSuccess : lowLatencyFilter ,
65- // If all pods are queuing or running above the KVCache threshold, we drop the sheddable
66- // request to make room for critical requests. for this, we don't define nextOnFailure.
67- }
68- )
69-
7034func NewScheduler (datastore Datastore ) * Scheduler {
7135 return NewSchedulerWithConfig (datastore , defaultConfig )
7236}
@@ -206,19 +170,3 @@ func (s *Scheduler) runPostSchedulePlugins(ctx *types.SchedulingContext, res *ty
206170 metrics .RecordSchedulerPluginProcessingLatency (plugins .PostSchedulePluginType , plugin .Name (), time .Since (before ))
207171 }
208172}
209-
210- type defaultPlugin struct {
211- picker.RandomPicker
212- }
213-
214- func (p * defaultPlugin ) Name () string {
215- return "DefaultPlugin"
216- }
217-
218- func (p * defaultPlugin ) Filter (ctx * types.SchedulingContext , pods []types.Pod ) []types.Pod {
219- if ctx .Req .Critical {
220- return lowLatencyFilter .Filter (ctx , pods )
221- }
222-
223- return sheddableRequestFilter .Filter (ctx , pods )
224- }
0 commit comments