Skip to content

Commit eb5da45

Browse files
committed
filter refactor finalizing
Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent e06b9a9 commit eb5da45

File tree

4 files changed

+8
-6
lines changed

4 files changed

+8
-6
lines changed

pkg/epp/scheduling/plugins/filter/least_kvcache_filter.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ func NewLeastKVCacheFilter() *LeastKVCacheFilter {
3636
// The intuition is that if there are multiple pods that share similar KV cache in the low range, we
3737
// should consider them all instead of the absolute minimum one. This worked better than picking the
3838
// least one as it gives more choices for the next filter, which on aggregate gave better results.
39-
// TODO: Compare this strategy with other strategies such as top K.
4039
type LeastKVCacheFilter struct{}
4140

4241
// Name returns the name of the filter.

pkg/epp/scheduling/plugins/filter/least_queue_filter.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ func NewLeastQueueFilter() *LeastQueueFilter {
3737
// we should consider them all instead of the absolute minimum one. This worked better than picking
3838
// the least one as it gives more choices for the next filter, which on aggregate gave better
3939
// results.
40-
// TODO: Compare this strategy with other strategies such as top K.
4140
type LeastQueueFilter struct{}
4241

4342
// Name returns the name of the filter.

pkg/epp/scheduling/plugins/filter/low_queue_filter.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,15 @@ var _ plugins.Filter = &LowQueueFilter{}
2727

2828
// NewLowQueueFilter returns a new LowQueueFilter.
2929
func NewLowQueueFilter() *LowQueueFilter {
30-
return &LowQueueFilter{}
30+
return &LowQueueFilter{
31+
queueingThresholdLoRA: config.Conf.QueueingThresholdLoRA,
32+
}
3133
}
3234

3335
// LowQueueFilter returns pods that their waiting queue size is less than a configured threshold
34-
type LowQueueFilter struct{}
36+
type LowQueueFilter struct {
37+
queueingThresholdLoRA int
38+
}
3539

3640
// Name returns the name of the filter.
3741
func (f *LowQueueFilter) Name() string {
@@ -43,7 +47,7 @@ func (f *LowQueueFilter) Filter(ctx *types.SchedulingContext, pods []types.Pod)
4347
filteredPods := []types.Pod{}
4448

4549
for _, pod := range pods {
46-
if pod.GetMetrics().WaitingQueueSize <= config.Conf.QueueingThresholdLoRA {
50+
if pod.GetMetrics().WaitingQueueSize <= f.queueingThresholdLoRA {
4751
filteredPods = append(filteredPods, pod)
4852
}
4953
}

pkg/epp/scheduling/scheduler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func NewScheduler(datastore Datastore) *Scheduler {
6666

6767
defaultConfig := &SchedulerConfig{
6868
preSchedulePlugins: []plugins.PreSchedule{},
69-
filters: []plugins.Filter{&filter.SheddableRequestFilter{}, lowLatencyFilter},
69+
filters: []plugins.Filter{filter.NewSheddableRequestFilter(), lowLatencyFilter},
7070
scorers: map[plugins.Scorer]int{},
7171
picker: &picker.RandomPicker{},
7272
postSchedulePlugins: []plugins.PostSchedule{},

0 commit comments

Comments
 (0)