Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ func (r *Runner) registerInTreePlugins() {
plugins.Register(picker.MaxScorePickerType, picker.MaxScorePickerFactory)
plugins.Register(picker.RandomPickerType, picker.RandomPickerFactory)
plugins.Register(profile.SingleProfileHandlerType, profile.SingleProfileHandlerFactory)
plugins.Register(scorer.KvCacheScorerType, scorer.KvCacheScorerFactory)
plugins.Register(scorer.KvCacheUtilizationScorerType, scorer.KvCacheUtilizationScorerFactory)
plugins.Register(scorer.QueueScorerType, scorer.QueueScorerFactory)
// register filter for test purpose only (used in conformance tests)
plugins.Register(testfilter.HeaderBasedTestingFilterType, testfilter.HeaderBasedTestingFilterFactory)
Expand Down
4 changes: 2 additions & 2 deletions config/charts/inferencepool/templates/epp-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ data:
kind: EndpointPickerConfig
plugins:
- type: queue-scorer
- type: kv-cache-scorer
- type: kv-cache-utilization-scorer
- type: prefix-cache-scorer
parameters:
hashBlockSize: 64
Expand All @@ -74,7 +74,7 @@ data:
plugins:
- pluginRef: queue-scorer
weight: 1
- pluginRef: kv-cache-scorer
- pluginRef: kv-cache-utilization-scorer
weight: 1
- pluginRef: prefix-cache-scorer
weight: 1
Expand Down
4 changes: 2 additions & 2 deletions config/manifests/inferencepool-resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ data:
kind: EndpointPickerConfig
plugins:
- type: queue-scorer
- type: kv-cache-scorer
- type: kv-cache-utilization-scorer
- type: prefix-cache-scorer
parameters:
hashBlockSize: 64
Expand All @@ -168,7 +168,7 @@ data:
plugins:
- pluginRef: queue-scorer
weight: 1
- pluginRef: kv-cache-scorer
- pluginRef: kv-cache-utilization-scorer
weight: 1
- pluginRef: prefix-cache-scorer
weight: 1
Expand Down
4 changes: 2 additions & 2 deletions pkg/epp/scheduling/framework/plugins/filter/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ func TestDecisionTreeFilterFactory(t *testing.T) {
loraAffinityFilter := NewLoraAffinityFilter(config.Conf.LoraAffinityThreshold)
lowQueueFilter := NewLowQueueFilter(config.Conf.QueueingThresholdLoRA)

kvCacheScorer := scorer.NewKVCacheScorer()
kvCacheScorer := scorer.NewKVCacheUtilizationScorer()

testHandle := utils.NewTestHandle(context.Background())

Expand Down Expand Up @@ -365,7 +365,7 @@ func TestDecisionTreeFilterFactory(t *testing.T) {
}

cmpOptions := cmpopts.IgnoreUnexported(LeastKVCacheFilter{}, LeastQueueFilter{},
LoraAffinityFilter{}, LowQueueFilter{}, scorer.KVCacheScorer{}, plugins.TypedName{})
LoraAffinityFilter{}, LowQueueFilter{}, scorer.KVCacheUtilizationScorer{}, plugins.TypedName{})

for _, test := range tests {
rawParameters := struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,42 +26,42 @@ import (
)

const (
KvCacheScorerType = "kv-cache-scorer"
KvCacheUtilizationScorerType = "kv-cache-utilization-scorer"
)

// compile-time type assertion
var _ framework.Scorer = &KVCacheScorer{}
var _ framework.Scorer = &KVCacheUtilizationScorer{}

// KvCacheScorerFactory defines the factory function for KVCacheScorer.
func KvCacheScorerFactory(name string, _ json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) {
return NewKVCacheScorer().WithName(name), nil
// KvCacheUtilizationScorerFactory defines the factory function for KVCacheUtilizationScorer.
func KvCacheUtilizationScorerFactory(name string, _ json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) {
return NewKVCacheUtilizationScorer().WithName(name), nil
}

// NewKVCacheScorer initializes a new KVCacheScorer and returns its pointer.
func NewKVCacheScorer() *KVCacheScorer {
return &KVCacheScorer{
typedName: plugins.TypedName{Type: KvCacheScorerType, Name: KvCacheScorerType},
// NewKVCacheUtilizationScorer initializes a new KVCacheUtilizationScorer and returns its pointer.
func NewKVCacheUtilizationScorer() *KVCacheUtilizationScorer {
return &KVCacheUtilizationScorer{
typedName: plugins.TypedName{Type: KvCacheUtilizationScorerType, Name: KvCacheUtilizationScorerType},
}
}

// KVCacheScorer scores list of candidate pods based on KV cache utilization.
type KVCacheScorer struct {
// KVCacheUtilizationScorer scores list of candidate pods based on KV cache utilization.
type KVCacheUtilizationScorer struct {
typedName plugins.TypedName
}

// TypedName returns the type and name tuple of this plugin instance.
func (s *KVCacheScorer) TypedName() plugins.TypedName {
func (s *KVCacheUtilizationScorer) TypedName() plugins.TypedName {
return s.typedName
}

// WithName sets the name of the scorer.
func (s *KVCacheScorer) WithName(name string) *KVCacheScorer {
func (s *KVCacheUtilizationScorer) WithName(name string) *KVCacheUtilizationScorer {
s.typedName.Name = name
return s
}

// Score returns the scoring result for the given list of pods based on context.
func (s *KVCacheScorer) Score(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
func (s *KVCacheUtilizationScorer) Score(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
scores := make(map[types.Pod]float64, len(pods))
for _, pod := range pods {
scores[pod] = 1 - pod.GetMetrics().KVCacheUsagePercent
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
)

func TestKvCacheScorer(t *testing.T) {
func TestKvCacheUtilizationScorer(t *testing.T) {
tests := []struct {
name string
pods []types.Pod
Expand Down Expand Up @@ -83,8 +83,7 @@ func TestKvCacheScorer(t *testing.T) {

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
scorer := &KVCacheScorer{}
scores := scorer.Score(context.Background(), types.NewCycleState(), &types.LLMRequest{}, test.pods)
scores := NewKVCacheUtilizationScorer().Score(context.Background(), types.NewCycleState(), &types.LLMRequest{}, test.pods)

for i, pod := range test.pods {
expectedScore := test.expectedScoresPod[i]
Expand Down
2 changes: 1 addition & 1 deletion site-src/guides/epp-configuration/config-text.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ Picks a random pod from the list of candidates.

Scores the candidate pods based on their KV cache utilization.

- *Type*: kv-cache-scorer
- *Type*: kv-cache-utilization-scorer
- *Parameters*: none

#### **QueueScorer**
Expand Down
4 changes: 2 additions & 2 deletions site-src/guides/inferencepool-rollout.md
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ data:
kind: EndpointPickerConfig
plugins:
- type: queue-scorer
- type: kv-cache-scorer
- type: kv-cache-utilization-scorer
- type: prefix-cache-scorer
parameters:
hashBlockSize: 64
Expand All @@ -370,7 +370,7 @@ data:
plugins:
- pluginRef: queue-scorer
weight: 1
- pluginRef: kv-cache-scorer
- pluginRef: kv-cache-utilization-scorer
weight: 1
- pluginRef: prefix-cache-scorer
weight: 1
Expand Down
4 changes: 2 additions & 2 deletions test/testdata/inferencepool-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ data:
kind: EndpointPickerConfig
plugins:
- type: queue-scorer
- type: kv-cache-scorer
- type: kv-cache-utilization-scorer
- type: prefix-cache-scorer
parameters:
hashBlockSize: 64
Expand All @@ -165,7 +165,7 @@ data:
plugins:
- pluginRef: queue-scorer
weight: 1
- pluginRef: kv-cache-scorer
- pluginRef: kv-cache-utilization-scorer
weight: 1
- pluginRef: prefix-cache-scorer
weight: 1
Expand Down