diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 53c24668b..04f5287be 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -375,7 +375,7 @@ func (r *Runner) registerInTreePlugins() { plugins.Register(picker.MaxScorePickerType, picker.MaxScorePickerFactory) plugins.Register(picker.RandomPickerType, picker.RandomPickerFactory) plugins.Register(profile.SingleProfileHandlerType, profile.SingleProfileHandlerFactory) - plugins.Register(scorer.KvCacheScorerType, scorer.KvCacheScorerFactory) + plugins.Register(scorer.KvCacheUtilizationScorerType, scorer.KvCacheUtilizationScorerFactory) plugins.Register(scorer.QueueScorerType, scorer.QueueScorerFactory) // register filter for test purpose only (used in conformance tests) plugins.Register(testfilter.HeaderBasedTestingFilterType, testfilter.HeaderBasedTestingFilterFactory) diff --git a/config/charts/inferencepool/templates/epp-config.yaml b/config/charts/inferencepool/templates/epp-config.yaml index 12cbd5859..929baa76b 100644 --- a/config/charts/inferencepool/templates/epp-config.yaml +++ b/config/charts/inferencepool/templates/epp-config.yaml @@ -59,7 +59,7 @@ data: kind: EndpointPickerConfig plugins: - type: queue-scorer - - type: kv-cache-scorer + - type: kv-cache-utilization-scorer - type: prefix-cache-scorer parameters: hashBlockSize: 64 @@ -74,7 +74,7 @@ data: plugins: - pluginRef: queue-scorer weight: 1 - - pluginRef: kv-cache-scorer + - pluginRef: kv-cache-utilization-scorer weight: 1 - pluginRef: prefix-cache-scorer weight: 1 diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml index 2e18011f7..fa49b25c7 100644 --- a/config/manifests/inferencepool-resources.yaml +++ b/config/manifests/inferencepool-resources.yaml @@ -153,7 +153,7 @@ data: kind: EndpointPickerConfig plugins: - type: queue-scorer - - type: kv-cache-scorer + - type: kv-cache-utilization-scorer - type: prefix-cache-scorer parameters: hashBlockSize: 64 @@ -168,7 +168,7 @@ data: plugins: - pluginRef: queue-scorer weight: 1 - - pluginRef: kv-cache-scorer + - pluginRef: kv-cache-utilization-scorer weight: 1 - pluginRef: prefix-cache-scorer weight: 1 diff --git a/pkg/epp/scheduling/framework/plugins/filter/filter_test.go b/pkg/epp/scheduling/framework/plugins/filter/filter_test.go index 0643fe245..11f7a6cd0 100644 --- a/pkg/epp/scheduling/framework/plugins/filter/filter_test.go +++ b/pkg/epp/scheduling/framework/plugins/filter/filter_test.go @@ -271,7 +271,7 @@ func TestDecisionTreeFilterFactory(t *testing.T) { loraAffinityFilter := NewLoraAffinityFilter(config.Conf.LoraAffinityThreshold) lowQueueFilter := NewLowQueueFilter(config.Conf.QueueingThresholdLoRA) - kvCacheScorer := scorer.NewKVCacheScorer() + kvCacheScorer := scorer.NewKVCacheUtilizationScorer() testHandle := utils.NewTestHandle(context.Background()) @@ -365,7 +365,7 @@ func TestDecisionTreeFilterFactory(t *testing.T) { } cmpOptions := cmpopts.IgnoreUnexported(LeastKVCacheFilter{}, LeastQueueFilter{}, - LoraAffinityFilter{}, LowQueueFilter{}, scorer.KVCacheScorer{}, plugins.TypedName{}) + LoraAffinityFilter{}, LowQueueFilter{}, scorer.KVCacheUtilizationScorer{}, plugins.TypedName{}) for _, test := range tests { rawParameters := struct { diff --git a/pkg/epp/scheduling/framework/plugins/scorer/kvcache.go b/pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization.go similarity index 54% rename from pkg/epp/scheduling/framework/plugins/scorer/kvcache.go rename to pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization.go index ec3cbb6b2..c58f63534 100644 --- a/pkg/epp/scheduling/framework/plugins/scorer/kvcache.go +++ b/pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization.go @@ -26,42 +26,42 @@ import ( ) const ( - KvCacheScorerType = "kv-cache-scorer" + KvCacheUtilizationScorerType = "kv-cache-utilization-scorer" ) // compile-time type assertion -var _ framework.Scorer = &KVCacheScorer{} +var _ framework.Scorer = &KVCacheUtilizationScorer{} -// KvCacheScorerFactory defines the factory function for KVCacheScorer. -func KvCacheScorerFactory(name string, _ json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) { - return NewKVCacheScorer().WithName(name), nil +// KvCacheUtilizationScorerFactory defines the factory function for KVCacheUtilizationScorer. +func KvCacheUtilizationScorerFactory(name string, _ json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) { + return NewKVCacheUtilizationScorer().WithName(name), nil } -// NewKVCacheScorer initializes a new KVCacheScorer and returns its pointer. -func NewKVCacheScorer() *KVCacheScorer { - return &KVCacheScorer{ - typedName: plugins.TypedName{Type: KvCacheScorerType, Name: KvCacheScorerType}, +// NewKVCacheUtilizationScorer initializes a new KVCacheUtilizationScorer and returns its pointer. +func NewKVCacheUtilizationScorer() *KVCacheUtilizationScorer { + return &KVCacheUtilizationScorer{ + typedName: plugins.TypedName{Type: KvCacheUtilizationScorerType, Name: KvCacheUtilizationScorerType}, } } -// KVCacheScorer scores list of candidate pods based on KV cache utilization. -type KVCacheScorer struct { +// KVCacheUtilizationScorer scores list of candidate pods based on KV cache utilization. +type KVCacheUtilizationScorer struct { typedName plugins.TypedName } // TypedName returns the type and name tuple of this plugin instance. -func (s *KVCacheScorer) TypedName() plugins.TypedName { +func (s *KVCacheUtilizationScorer) TypedName() plugins.TypedName { return s.typedName } // WithName sets the name of the scorer. -func (s *KVCacheScorer) WithName(name string) *KVCacheScorer { +func (s *KVCacheUtilizationScorer) WithName(name string) *KVCacheUtilizationScorer { s.typedName.Name = name return s } // Score returns the scoring result for the given list of pods based on context. -func (s *KVCacheScorer) Score(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 { +func (s *KVCacheUtilizationScorer) Score(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 { scores := make(map[types.Pod]float64, len(pods)) for _, pod := range pods { scores[pod] = 1 - pod.GetMetrics().KVCacheUsagePercent diff --git a/pkg/epp/scheduling/framework/plugins/scorer/kvcache_test.go b/pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization_test.go similarity index 94% rename from pkg/epp/scheduling/framework/plugins/scorer/kvcache_test.go rename to pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization_test.go index 81f6d0ae6..76aaeee31 100644 --- a/pkg/epp/scheduling/framework/plugins/scorer/kvcache_test.go +++ b/pkg/epp/scheduling/framework/plugins/scorer/kvcache_utilization_test.go @@ -27,7 +27,7 @@ import ( "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types" ) -func TestKvCacheScorer(t *testing.T) { +func TestKvCacheUtilizationScorer(t *testing.T) { tests := []struct { name string pods []types.Pod @@ -83,8 +83,7 @@ func TestKvCacheScorer(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - scorer := &KVCacheScorer{} - scores := scorer.Score(context.Background(), types.NewCycleState(), &types.LLMRequest{}, test.pods) + scores := NewKVCacheUtilizationScorer().Score(context.Background(), types.NewCycleState(), &types.LLMRequest{}, test.pods) for i, pod := range test.pods { expectedScore := test.expectedScoresPod[i] diff --git a/site-src/guides/epp-configuration/config-text.md b/site-src/guides/epp-configuration/config-text.md index ed184e014..ceb0ed69e 100644 --- a/site-src/guides/epp-configuration/config-text.md +++ b/site-src/guides/epp-configuration/config-text.md @@ -250,7 +250,7 @@ Picks a random pod from the list of candidates. Scores the candidate pods based on their KV cache utilization. -- *Type*: kv-cache-scorer +- *Type*: kv-cache-utilization-scorer - *Parameters*: none #### **QueueScorer** diff --git a/site-src/guides/inferencepool-rollout.md b/site-src/guides/inferencepool-rollout.md index 88bfe0736..30ce97da6 100644 --- a/site-src/guides/inferencepool-rollout.md +++ b/site-src/guides/inferencepool-rollout.md @@ -355,7 +355,7 @@ data: kind: EndpointPickerConfig plugins: - type: queue-scorer - - type: kv-cache-scorer + - type: kv-cache-utilization-scorer - type: prefix-cache-scorer parameters: hashBlockSize: 64 @@ -370,7 +370,7 @@ data: plugins: - pluginRef: queue-scorer weight: 1 - - pluginRef: kv-cache-scorer + - pluginRef: kv-cache-utilization-scorer weight: 1 - pluginRef: prefix-cache-scorer weight: 1 diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml index ac0bb00dc..3cd63f391 100644 --- a/test/testdata/inferencepool-e2e.yaml +++ b/test/testdata/inferencepool-e2e.yaml @@ -150,7 +150,7 @@ data: kind: EndpointPickerConfig plugins: - type: queue-scorer - - type: kv-cache-scorer + - type: kv-cache-utilization-scorer - type: prefix-cache-scorer parameters: hashBlockSize: 64 @@ -165,7 +165,7 @@ data: plugins: - pluginRef: queue-scorer weight: 1 - - pluginRef: kv-cache-scorer + - pluginRef: kv-cache-utilization-scorer weight: 1 - pluginRef: prefix-cache-scorer weight: 1