Skip to content

Commit 23a3787

Browse files
committed
Remove redundant SheddableCapacityFilter.
Admission control/capacity management is now handled in `requestcontrol.Director.PreDispatch` (and soon to be absorbed into the new Flow Controller). This should no longer be a responsibility of the scheduling framework and this check is already being applied in kubernetes-sigs#805 prior to the scheduling layer being invoked. This is not a no-op change. Previously, the `SheddableCapacityFilter`, in addition to dropping sheddable requests when at capacity, would also strictly filter the pods that the rest of the scheduling plugins would consider as input. This change removes that strict filtering so all pods are now considered so long as the system is not considered saturated. This means sheddable requests now follow the same scheduling path as critical requests provided they are not dropped by the saturation detection check in `PreDispatch`.
1 parent 1d6a81f commit 23a3787

File tree

5 files changed

+3
-219
lines changed

5 files changed

+3
-219
lines changed

cmd/epp/main.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ import (
4343
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/saturationdetector"
4444
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling"
4545
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
46-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/filter"
4746
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/multi/prefix"
4847
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/picker"
4948
profilepicker "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework/plugins/profile-picker"
@@ -208,7 +207,6 @@ func run() error {
208207
kvCacheScorerWeight := envutil.GetEnvInt("KV_CACHE_SCORE_WEIGHT", scorer.DefaultKVCacheScorerWeight, setupLog)
209208

210209
schedulerProfile := framework.NewSchedulerProfile().
211-
WithFilters(filter.NewSheddableCapacityFilter()).
212210
WithScorers(framework.NewWeightedScorer(&scorer.QueueScorer{}, queueScorerWeight),
213211
framework.NewWeightedScorer(&scorer.KVCacheScorer{}, kvCacheScorerWeight)).
214212
WithPicker(picker.NewMaxScorePicker())

pkg/epp/scheduling/framework/plugins/filter/filter_test.go

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -134,42 +134,6 @@ func TestFilter(t *testing.T) {
134134
},
135135
},
136136
},
137-
{
138-
name: "SheddableCapacityFilter, sheddable request",
139-
req: &types.LLMRequest{Critical: false},
140-
filter: &SheddableCapacityFilter{queueThreshold: 0, kvCacheThreshold: 0.8},
141-
input: []types.Pod{
142-
&types.PodMetrics{
143-
// This pod should be returned.
144-
MetricsState: &backendmetrics.MetricsState{
145-
WaitingQueueSize: 0,
146-
KVCacheUsagePercent: 0,
147-
},
148-
},
149-
&types.PodMetrics{
150-
// Queue is non zero, despite low kv cache, should not return.
151-
MetricsState: &backendmetrics.MetricsState{
152-
WaitingQueueSize: 1,
153-
KVCacheUsagePercent: 0.3,
154-
},
155-
},
156-
&types.PodMetrics{
157-
// High kv cache despite zero queue, should not return
158-
MetricsState: &backendmetrics.MetricsState{
159-
WaitingQueueSize: 0,
160-
KVCacheUsagePercent: 1.0,
161-
},
162-
},
163-
},
164-
output: []types.Pod{
165-
&types.PodMetrics{
166-
MetricsState: &backendmetrics.MetricsState{
167-
WaitingQueueSize: 0,
168-
KVCacheUsagePercent: 0,
169-
},
170-
},
171-
},
172-
},
173137
}
174138

175139
for _, test := range tests {
@@ -241,7 +205,7 @@ func TestLoRASoftAffinityDistribution(t *testing.T) {
241205
// initialize LoraAffinityFilter
242206
LoraAffinityFilter := NewLoraAffinityFilter()
243207

244-
for i := 0; i < numIterations; i++ {
208+
for range numIterations {
245209
result := LoraAffinityFilter.Filter(context.Background(), req, types.NewCycleState(), pods)
246210

247211
// Check which type of pod was returned

pkg/epp/scheduling/framework/plugins/filter/sheddable_capacity_filter.go

Lines changed: 0 additions & 64 deletions
This file was deleted.

pkg/epp/scheduling/scheduler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func NewScheduler(datastore Datastore) *Scheduler {
6565
}
6666

6767
defaultProfile := framework.NewSchedulerProfile().
68-
WithFilters(filter.NewSheddableCapacityFilter(), lowLatencyFilter).
68+
WithFilters(lowLatencyFilter).
6969
WithPicker(&picker.RandomPicker{})
7070

7171
profilePicker := profilepicker.NewAllProfilesPicker()

pkg/epp/scheduling/scheduler_test.go

Lines changed: 1 addition & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func TestSchedule(t *testing.T) {
5151
err: true,
5252
},
5353
{
54-
name: "critical request",
54+
name: "finds optimal pod",
5555
req: &types.LLMRequest{
5656
TargetModel: "critical",
5757
RequestId: uuid.NewString(),
@@ -116,120 +116,6 @@ func TestSchedule(t *testing.T) {
116116
},
117117
},
118118
},
119-
{
120-
name: "sheddable request, accepted",
121-
req: &types.LLMRequest{
122-
TargetModel: "sheddable",
123-
RequestId: uuid.NewString(),
124-
Critical: false,
125-
},
126-
// pod1 will be picked because it has capacity for the sheddable request.
127-
input: []*backendmetrics.FakePodMetrics{
128-
{
129-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}},
130-
Metrics: &backendmetrics.MetricsState{
131-
WaitingQueueSize: 0,
132-
KVCacheUsagePercent: 0.2,
133-
MaxActiveModels: 2,
134-
ActiveModels: map[string]int{
135-
"foo": 1,
136-
"bar": 1,
137-
},
138-
},
139-
},
140-
{
141-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}},
142-
Metrics: &backendmetrics.MetricsState{
143-
WaitingQueueSize: 3,
144-
KVCacheUsagePercent: 0.1,
145-
MaxActiveModels: 2,
146-
ActiveModels: map[string]int{
147-
"foo": 1,
148-
"critical": 1,
149-
},
150-
},
151-
},
152-
{
153-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}},
154-
Metrics: &backendmetrics.MetricsState{
155-
WaitingQueueSize: 10,
156-
KVCacheUsagePercent: 0.2,
157-
MaxActiveModels: 2,
158-
ActiveModels: map[string]int{
159-
"foo": 1,
160-
},
161-
},
162-
},
163-
},
164-
wantRes: map[string]*types.Result{
165-
"default": {
166-
TargetPod: &types.ScoredPod{
167-
Pod: &types.PodMetrics{
168-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}, Labels: make(map[string]string)},
169-
MetricsState: &backendmetrics.MetricsState{
170-
WaitingQueueSize: 0,
171-
KVCacheUsagePercent: 0.2,
172-
MaxActiveModels: 2,
173-
ActiveModels: map[string]int{
174-
"foo": 1,
175-
"bar": 1,
176-
},
177-
WaitingModels: map[string]int{},
178-
},
179-
},
180-
},
181-
},
182-
},
183-
},
184-
{
185-
name: "sheddable request, dropped",
186-
req: &types.LLMRequest{
187-
TargetModel: "sheddable",
188-
RequestId: uuid.NewString(),
189-
Critical: false,
190-
},
191-
// All pods have higher KV cache thant the threshold, so the sheddable request will be
192-
// dropped.
193-
input: []*backendmetrics.FakePodMetrics{
194-
{
195-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}},
196-
Metrics: &backendmetrics.MetricsState{
197-
WaitingQueueSize: 10,
198-
KVCacheUsagePercent: 0.9,
199-
MaxActiveModels: 2,
200-
ActiveModels: map[string]int{
201-
"foo": 1,
202-
"bar": 1,
203-
},
204-
},
205-
},
206-
{
207-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}},
208-
Metrics: &backendmetrics.MetricsState{
209-
WaitingQueueSize: 3,
210-
KVCacheUsagePercent: 0.85,
211-
MaxActiveModels: 2,
212-
ActiveModels: map[string]int{
213-
"foo": 1,
214-
"critical": 1,
215-
},
216-
},
217-
},
218-
{
219-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}},
220-
Metrics: &backendmetrics.MetricsState{
221-
WaitingQueueSize: 10,
222-
KVCacheUsagePercent: 0.85,
223-
MaxActiveModels: 2,
224-
ActiveModels: map[string]int{
225-
"foo": 1,
226-
},
227-
},
228-
},
229-
},
230-
wantRes: nil,
231-
err: true,
232-
},
233119
}
234120

235121
for _, test := range tests {

0 commit comments

Comments
 (0)