diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 82be6b85c..c7374bcd1 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -131,6 +131,7 @@ The following table list the configurable parameters of the chart. | **Parameter Name** | **Description** | |---------------------------------------------|------------------------------------------------------------------------------------------------------------------------| +| `inferencePool.apiVersion` | The API version of the InferencePool resource. Defaults to `inference.networking.k8s.io/v1`. This can be changed to `inference.networking.x-k8s.io/v1alpha2` to support older API versions. | | `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. | | `inferencePool.modelServerType` | Type of the model servers in the pool, valid options are [vllm, triton-tensorrt-llm], default is vllm. | | `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. | diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml index f01699a96..dce1ed45c 100644 --- a/config/charts/inferencepool/templates/epp-deployment.yaml +++ b/config/charts/inferencepool/templates/epp-deployment.yaml @@ -27,6 +27,10 @@ spec: - {{ .Release.Name }} - --pool-namespace - {{ .Release.Namespace }} + {{- if ne .Values.inferencePool.apiVersion "inference.networking.k8s.io" }} + - --pool-group + - "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}" + {{- end }} - --zap-encoder - "json" - --config-file diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index 92010c0d0..470063c79 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -9,7 +9,7 @@ metadata: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: targetRef: - group: "inference.networking.k8s.io" + group: "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}" kind: InferencePool name: {{ .Release.Name }} default: @@ -28,7 +28,7 @@ metadata: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: targetRef: - group: "inference.networking.k8s.io" + group: "{{ (split "/" .Values.inferencePool.apiVersion)._0 }}" kind: InferencePool name: {{ .Release.Name }} default: diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml index a8d891c32..4924e4325 100644 --- a/config/charts/inferencepool/templates/rbac.yaml +++ b/config/charts/inferencepool/templates/rbac.yaml @@ -40,9 +40,9 @@ metadata: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} rules: - apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferenceobjectives", "inferencepools"] + resources: ["inferenceobjectives"] verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.k8s.io"] +- apiGroups: ["{{ (split "/" .Values.inferencePool.apiVersion)._0 }}"] resources: ["inferencepools"] verbs: ["get", "watch", "list"] - apiGroups: [""]