Skip to content

Commit dc5e130

Browse files
committed
Watch InferencePools and configure nginx
This commit adds support for the control plane to watch InferencePools. A feature flag has been added to enable/disable processing these resources. By default, it is disabled. When an HTTPRoute references an InferencePool, we will create a headless Service associated with that InferencePool, and reference it internally in the graph config for that Route. This allows us to use all of our existing logic to get the endpoints and build the proper nginx config for those endpoints. In a future commit, the nginx config will be updated to handle the proper load balancing for the AI workloads, but for now we just use our default methods by proxy_passing to the upstream.
1 parent c63f9fe commit dc5e130

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2539
-133
lines changed

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@ install-gateway-crds: ## Install Gateway API CRDs
136136
uninstall-gateway-crds: ## Uninstall Gateway API CRDs
137137
kubectl kustomize $(SELF_DIR)config/crd/gateway-api/$(if $(filter true,$(ENABLE_EXPERIMENTAL)),experimental,standard) | kubectl delete -f -
138138

139+
.PHONY: install-inference-crds
140+
install-inference-crds: ## Install Gateway API Inference Extension CRDs
141+
kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl apply -f -
142+
143+
.PHONY: uninstall-inference-crds
144+
uninstall-inference-crds: ## Uninstall Gateway API Inference Extension CRDs
145+
kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl delete -f -
146+
139147
.PHONY: generate-manifests
140148
generate-manifests: ## Generate manifests using Helm.
141149
./scripts/generate-manifests.sh

charts/nginx-gateway-fabric/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
244244
| `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` |
245245
| `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` |
246246
| `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` |
247-
| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
247+
| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
248248
| `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` |
249249
| `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` |
250250
| `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` |
@@ -256,6 +256,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
256256
| `nginxGateway.gatewayClassName` | The name of the GatewayClass that will be created as part of this release. Every NGINX Gateway Fabric must have a unique corresponding GatewayClass resource. NGINX Gateway Fabric only processes resources that belong to its class - i.e. have the "gatewayClassName" field resource equal to the class. | string | `"nginx"` |
257257
| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` |
258258
| `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` |
259+
| `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` |
259260
| `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"}` |
260261
| `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` |
261262
| `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` |

charts/nginx-gateway-fabric/templates/clusterrole.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,22 @@ rules:
129129
{{- end }}
130130
verbs:
131131
- update
132+
{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
133+
- apiGroups:
134+
- inference.networking.k8s.io
135+
resources:
136+
- inferencepools
137+
verbs:
138+
- get
139+
- list
140+
- watch
141+
- apiGroups:
142+
- inference.networking.k8s.io
143+
resources:
144+
- inferencepools/status
145+
verbs:
146+
- update
147+
{{- end }}
132148
{{- if .Values.nginxGateway.leaderElection.enable }}
133149
- apiGroups:
134150
- coordination.k8s.io

charts/nginx-gateway-fabric/templates/deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ spec:
9797
{{- if .Values.nginxGateway.gwAPIExperimentalFeatures.enable }}
9898
- --gateway-api-experimental-features
9999
{{- end }}
100+
{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
101+
- --gateway-api-inference-extension
102+
{{- end }}
100103
{{- if .Values.nginxGateway.snippetsFilters.enable }}
101104
- --snippets-filters
102105
{{- end }}

charts/nginx-gateway-fabric/values.schema.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,20 @@
831831
"title": "gwAPIExperimentalFeatures",
832832
"type": "object"
833833
},
834+
"gwAPIInferenceExtension": {
835+
"properties": {
836+
"enable": {
837+
"default": false,
838+
"description": "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.",
839+
"required": [],
840+
"title": "enable",
841+
"type": "boolean"
842+
}
843+
},
844+
"required": [],
845+
"title": "gwAPIInferenceExtension",
846+
"type": "object"
847+
},
834848
"image": {
835849
"description": "The image configuration for the NGINX Gateway Fabric control plane.",
836850
"properties": {

charts/nginx-gateway-fabric/values.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ nginxGateway:
210210
# APIs installed from the experimental channel.
211211
enable: false
212212

213+
gwAPIInferenceExtension:
214+
# -- Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.
215+
enable: false
216+
213217
snippetsFilters:
214218
# -- Enable SnippetsFilters feature. SnippetsFilters allow inserting NGINX configuration into the generated NGINX
215219
# config for HTTPRoute and GRPCRoute resources.

cmd/gateway/commands.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ func createControllerCommand() *cobra.Command {
7474
leaderElectionLockNameFlag = "leader-election-lock-name"
7575
productTelemetryDisableFlag = "product-telemetry-disable"
7676
gwAPIExperimentalFlag = "gateway-api-experimental-features"
77+
gwAPIInferenceExtensionFlag = "gateway-api-inference-extension"
7778
nginxDockerSecretFlag = "nginx-docker-secret" //nolint:gosec // not credentials
7879
usageReportSecretFlag = "usage-report-secret"
7980
usageReportEndpointFlag = "usage-report-endpoint"
@@ -139,6 +140,7 @@ func createControllerCommand() *cobra.Command {
139140
}
140141

141142
gwExperimentalFeatures bool
143+
gwInferenceExtension bool
142144

143145
disableProductTelemetry bool
144146

@@ -264,6 +266,7 @@ func createControllerCommand() *cobra.Command {
264266
},
265267
Plus: plus,
266268
ExperimentalFeatures: gwExperimentalFeatures,
269+
InferenceExtension: gwInferenceExtension,
267270
ImageSource: imageSource,
268271
Flags: config.Flags{
269272
Names: flagKeys,
@@ -424,6 +427,14 @@ func createControllerCommand() *cobra.Command {
424427
"Requires the Gateway APIs installed from the experimental channel.",
425428
)
426429

430+
cmd.Flags().BoolVar(
431+
&gwInferenceExtension,
432+
gwAPIInferenceExtensionFlag,
433+
false,
434+
"Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route "+
435+
"traffic to AI workloads.",
436+
)
437+
427438
cmd.Flags().Var(
428439
&nginxDockerSecrets,
429440
nginxDockerSecretFlag,
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
resources:
4+
- https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd?timeout=120&ref=v1.0.0

0 commit comments

Comments
 (0)