diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 5a5663d1a..b6629d2b8 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -133,7 +133,9 @@ inferenceExtension: **Note:** Prometheus monitoring requires the Prometheus Operator and ServiceMonitor CRD to be installed in the cluster. -For GKE environments, monitoring is automatically configured when `provider.name` is set to `gke`. +For GKE environments, monitoring is enabled by setting `provider.name` to `gke` and `inferenceExtension.monitoring.gke.enabled` to `true`. This will create the necessary `PodMonitoring` and RBAC resources for metrics collection. + +If you are using a GKE Autopilot cluster, you also need to set `provider.gke.autopilot` to `true`. Then apply it with: @@ -174,8 +176,10 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.monitoring.interval` | Metrics scraping interval for monitoring. Defaults to `10s`. | | `inferenceExtension.monitoring.secret.name` | Name of the service account token secret for metrics authentication. Defaults to `inference-gateway-sa-metrics-reader-secret`. | | `inferenceExtension.monitoring.prometheus.enabled` | Enable Prometheus ServiceMonitor creation for EPP metrics collection. Defaults to `false`. | +| `inferenceExtension.monitoring.gke.enabled` | Enable GKE monitoring resources (`PodMonitoring` and RBAC). Defaults to `false`. | | `inferenceExtension.pluginsCustomConfig` | Custom config that is passed to EPP as inline yaml. | | `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`. | +| `provider.gke.autopilot` | Set to `true` if the cluster is a GKE Autopilot cluster. This is only used if `provider.name` is `gke`. Defaults to `false`. | ## Notes diff --git a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml index 9abee0fcd..df54b3475 100644 --- a/config/charts/inferencepool/templates/epp-sa-token-secret.yaml +++ b/config/charts/inferencepool/templates/epp-sa-token-secret.yaml @@ -1,4 +1,4 @@ -{{- if or .Values.inferenceExtension.monitoring.prometheus.enabled .Values.inferenceExtension.monitoring.gke.enabled }} +{{- if .Values.inferenceExtension.monitoring.prometheus.enabled }} apiVersion: v1 kind: Secret metadata: diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index f2296aafb..59e186a94 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -35,11 +35,44 @@ spec: timeoutSec: 300 # 5-minute timeout (adjust as needed) logging: enabled: true # log all requests by default +{{- if .Values.inferenceExtension.monitoring.gke.enabled }} +{{- $metricsReadSA := printf "%s-metrics-reader-sa" .Release.Name -}} +{{- $metricsReadSecretName := printf "%s-metrics-reader-secret" .Release.Name -}} +{{- $metricsReadRoleName := printf "%s-%s-metrics-reader" .Release.Namespace .Release.Name -}} +{{- $metricsReadRoleBindingName := printf "%s-%s-metrics-reader-role-binding" .Release.Namespace .Release.Name -}} +{{- $secretReadRoleName := printf "%s-metrics-reader-secret-read" .Release.Name -}} +{{- $gmpNamespace := "gmp-system" -}} +{{- $isAutopilot := false -}} +{{- with .Values.provider.gke }} + {{- $isAutopilot = .autopilot | default false -}} +{{- end }} +{{- if $isAutopilot -}} +{{- $gmpNamespace = "gke-gmp-system" -}} +{{- end -}} +{{- $gmpCollectorRoleBindingName := printf "%s:collector:%s-%s-metrics-reader-secret-read" $gmpNamespace .Release.Namespace .Release.Name -}} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ $metricsReadSA }} + namespace: {{ .Release.Namespace }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ $metricsReadSecretName }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} + annotations: + kubernetes.io/service-account.name: {{ $metricsReadSA }} +type: kubernetes.io/service-account-token --- apiVersion: monitoring.googleapis.com/v1 -kind: ClusterPodMonitoring +kind: PodMonitoring metadata: - name: {{ .Release.Namespace }}-{{ .Release.Name }} + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: @@ -52,10 +85,58 @@ spec: type: Bearer credentials: secret: - name: {{ .Values.inferenceExtension.monitoring.secret.name }} + name: {{ $metricsReadSecretName }} key: token - namespace: {{ .Release.Namespace }} selector: matchLabels: {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ $metricsReadRoleName }} +rules: +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ $metricsReadRoleBindingName }} +subjects: +- kind: ServiceAccount + name: {{ $metricsReadSA }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ $metricsReadRoleName }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $secretReadRoleName }} +rules: +- resources: + - secrets + apiGroups: [""] + verbs: ["get", "list", "watch"] + resourceNames: [{{ $metricsReadSecretName | quote }}] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $gmpCollectorRoleBindingName }} + namespace: {{ .Release.Namespace }} +roleRef: + name: {{ $secretReadRoleName }} + kind: Role + apiGroup: rbac.authorization.k8s.io +subjects: +- name: collector + namespace: {{ $gmpNamespace }} + kind: ServiceAccount +{{- end }} {{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index f61b64e37..6476bd800 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -50,6 +50,9 @@ inferenceExtension: # Prometheus ServiceMonitor will be created when enabled for EPP metrics collection prometheus: enabled: false + + gke: + enabled: false inferencePool: targetPorts: @@ -67,3 +70,8 @@ inferencePool: provider: name: none + # GKE-specific configuration. + # This block is only used if name is "gke". + gke: + # Set to true if the cluster is an Autopilot cluster. + autopilot: false