Skip to content

Commit 97a8239

Browse files
committed
Add support for plugin configuration in the InferencePool helm chart
1 parent 4ffb5f6 commit 97a8239

File tree

3 files changed

+110
-0
lines changed

3 files changed

+110
-0
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: {{ include "gateway-api-inference-extension.name" . }}
5+
namespace: {{ .Release.Namespace }}
6+
data:
7+
default-plugins.yaml: |
8+
apiVersion: inference.networking.x-k8s.io/v1alpha1
9+
kind: EndpointPickerConfig
10+
plugins:
11+
- type: low-queue-filter
12+
parameters:
13+
threshold: 128
14+
- type: lora-affinity-filter
15+
parameters:
16+
threshold: 0.999
17+
- type: least-queue-filter
18+
- type: least-kv-cache-filter
19+
- type: decision-tree-filter
20+
name: low-latency-filter
21+
parameters:
22+
current:
23+
pluginRef: low-queue-filter
24+
nextOnSuccess:
25+
decisionTree:
26+
current:
27+
pluginRef: lora-affinity-filter
28+
nextOnSuccessOrFailure:
29+
decisionTree:
30+
current:
31+
pluginRef: least-queue-filter
32+
nextOnSuccessOrFailure:
33+
decisionTree:
34+
current:
35+
pluginRef: least-kv-cache-filter
36+
nextOnFailure:
37+
decisionTree:
38+
current:
39+
pluginRef: least-queue-filter
40+
nextOnSuccessOrFailure:
41+
decisionTree:
42+
current:
43+
pluginRef: lora-affinity-filter
44+
nextOnSuccessOrFailure:
45+
decisionTree:
46+
current:
47+
pluginRef: least-kv-cache-filter
48+
- type: random-picker
49+
parameters:
50+
maxNumOfEndpoints: 1
51+
- type: single-profile-handler
52+
schedulingProfiles:
53+
- name: default
54+
plugins:
55+
- pluginRef: low-latency-filter
56+
- pluginRef: random-picker
57+
plugins-v2.yaml: |
58+
apiVersion: inference.networking.x-k8s.io/v1alpha1
59+
kind: EndpointPickerConfig
60+
plugins:
61+
- type: queue-scorer
62+
- type: kv-cache-scorer
63+
- type: prefix-cache-scorer
64+
parameters:
65+
hashBlockSize: 64
66+
maxPrefixBlocksToMatch: 256
67+
lruCapacityPerServer: 31250
68+
- type: max-score-picker
69+
parameters:
70+
maxNumOfEndpoints: 1
71+
- type: single-profile-handler
72+
schedulingProfiles:
73+
- name: default
74+
plugins:
75+
- pluginRef: queue-scorer
76+
weight: 1
77+
- pluginRef: kv-cache-scorer
78+
weight: 1
79+
- pluginRef: prefix-cache-scorer
80+
weight: 1
81+
- pluginRef: max-score-picker
82+
{{- if (hasKey .Values.inferenceExtension "additionalConfigs") }}
83+
{{- .Values.inferenceExtension.additionalConfigs | toYaml | nindent 2 }}
84+
{{- end }}
85+

config/charts/inferencepool/templates/epp-deployment.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ spec:
3535
- "9003"
3636
- -metricsPort
3737
- "9090"
38+
- -configFile
39+
- "/config/default-plugins.yaml"
3840
# https://pkg.go.dev/flag#hdr-Command_line_flag_syntax; space is only for non-bool flags
3941
- "-enablePprof={{ .Values.inferenceExtension.enablePprof }}"
4042
{{- if eq (.Values.inferencePool.modelServerType | default "vllm") "triton-tensorrt-llm" }}
@@ -69,3 +71,10 @@ spec:
6971
- name: {{ $key }}
7072
value: {{ $value | quote }}
7173
{{- end }}
74+
volumeMounts:
75+
- name: plugins-config-volume
76+
mountPath: "/config"
77+
volumes:
78+
- name: plugins-config-volume
79+
configMap:
80+
name: {{ include "gateway-api-inference-extension.name" . }}

config/charts/inferencepool/values.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,22 @@ inferenceExtension:
88
extProcPort: 9002
99
env: {}
1010
enablePprof: true # Enable pprof handlers for profiling and debugging
11+
# additionalConfigs:
12+
# custom-config.yaml: |
13+
# apiVersion: inference.networking.x-k8s.io/v1alpha1
14+
# kind: EndpointPickerConfig
15+
# plugins:
16+
# - type: custom-scorer
17+
# parameters:
18+
# custom-threshold: 64
19+
# - type: max-score-picker
20+
# - type: single-profile-handler
21+
# schedulingProfiles:
22+
# - name: default
23+
# plugins:
24+
# - pluginRef: custom-scorer
25+
# - pluginRef: max-score-picker
26+
1127
# Example environment variables:
1228
# env:
1329
# KV_CACHE_SCORE_WEIGHT: "1"

0 commit comments

Comments
 (0)