Skip to content

Commit 032c0dc

Browse files
sjbermansalonichf5
authored andcommitted
Watch InferencePools and configure nginx (#3894)
This commit adds support for the control plane to watch InferencePools. A feature flag has been added to enable/disable processing these resources. By default, it is disabled. When an HTTPRoute references an InferencePool, we will create a headless Service associated with that InferencePool, and reference it internally in the graph config for that Route. This allows us to use all of our existing logic to get the endpoints and build the proper nginx config for those endpoints. In a future commit, the nginx config will be updated to handle the proper load balancing for the AI workloads, but for now we just use our default methods by proxy_passing to the upstream.
1 parent 308136b commit 032c0dc

File tree

39 files changed

+2699
-64
lines changed

39 files changed

+2699
-64
lines changed

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,14 @@ install-gateway-crds: ## Install Gateway API CRDs
139139
uninstall-gateway-crds: ## Uninstall Gateway API CRDs
140140
kubectl kustomize $(SELF_DIR)config/crd/gateway-api/$(if $(filter true,$(ENABLE_EXPERIMENTAL)),experimental,standard) | kubectl delete -f -
141141

142+
.PHONY: install-inference-crds
143+
install-inference-crds: ## Install Gateway API Inference Extension CRDs
144+
kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl apply -f -
145+
146+
.PHONY: uninstall-inference-crds
147+
uninstall-inference-crds: ## Uninstall Gateway API Inference Extension CRDs
148+
kubectl kustomize $(SELF_DIR)config/crd/inference-extension | kubectl delete -f -
149+
142150
.PHONY: generate-manifests
143151
generate-manifests: ## Generate manifests using Helm.
144152
./scripts/generate-manifests.sh

charts/nginx-gateway-fabric/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
245245
| `nginx.usage.resolver` | The nameserver used to resolve the NGINX Plus usage reporting endpoint. Used with NGINX Instance Manager. | string | `""` |
246246
| `nginx.usage.secretName` | The name of the Secret containing the JWT for NGINX Plus usage reporting. Must exist in the same namespace that the NGINX Gateway Fabric control plane is running in (default namespace: nginx-gateway). | string | `"nplus-license"` |
247247
| `nginx.usage.skipVerify` | Disable client verification of the NGINX Plus usage reporting server certificate. | bool | `false` |
248-
| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
248+
| `nginxGateway` | The nginxGateway section contains configuration for the NGINX Gateway Fabric control plane deployment. | object | `{"affinity":{},"autoscaling":{"enable":false},"config":{"logging":{"level":"info"}},"configAnnotations":{},"extraVolumeMounts":[],"extraVolumes":[],"gatewayClassAnnotations":{},"gatewayClassName":"nginx","gatewayControllerName":"gateway.nginx.org/nginx-gateway-controller","gwAPIExperimentalFeatures":{"enable":false},"gwAPIInferenceExtension":{"enable":false},"image":{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"},"kind":"deployment","labels":{},"leaderElection":{"enable":true,"lockName":""},"lifecycle":{},"metrics":{"enable":true,"port":9113,"secure":false},"name":"","nodeSelector":{},"podAnnotations":{},"productTelemetry":{"enable":true},"readinessProbe":{"enable":true,"initialDelaySeconds":3,"port":8081},"replicas":1,"resources":{},"service":{"annotations":{},"labels":{}},"serviceAccount":{"annotations":{},"imagePullSecret":"","imagePullSecrets":[],"name":""},"snippetsFilters":{"enable":false},"terminationGracePeriodSeconds":30,"tolerations":[],"topologySpreadConstraints":[]}` |
249249
| `nginxGateway.affinity` | The affinity of the NGINX Gateway Fabric control plane pod. | object | `{}` |
250250
| `nginxGateway.autoscaling` | Autoscaling configuration for the NGINX Gateway Fabric control plane. | object | `{"enable":false}` |
251251
| `nginxGateway.autoscaling.enable` | Enable or disable Horizontal Pod Autoscaler for the control plane. | bool | `false` |
@@ -257,6 +257,7 @@ The following table lists the configurable parameters of the NGINX Gateway Fabri
257257
| `nginxGateway.gatewayClassName` | The name of the GatewayClass that will be created as part of this release. Every NGINX Gateway Fabric must have a unique corresponding GatewayClass resource. NGINX Gateway Fabric only processes resources that belong to its class - i.e. have the "gatewayClassName" field resource equal to the class. | string | `"nginx"` |
258258
| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | string | `"gateway.nginx.org/nginx-gateway-controller"` |
259259
| `nginxGateway.gwAPIExperimentalFeatures.enable` | Enable the experimental features of Gateway API which are supported by NGINX Gateway Fabric. Requires the Gateway APIs installed from the experimental channel. | bool | `false` |
260+
| `nginxGateway.gwAPIInferenceExtension.enable` | Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads. | bool | `false` |
260261
| `nginxGateway.image` | The image configuration for the NGINX Gateway Fabric control plane. | object | `{"pullPolicy":"Always","repository":"ghcr.io/nginx/nginx-gateway-fabric","tag":"edge"}` |
261262
| `nginxGateway.image.repository` | The NGINX Gateway Fabric image to use | string | `"ghcr.io/nginx/nginx-gateway-fabric"` |
262263
| `nginxGateway.kind` | The kind of the NGINX Gateway Fabric installation - currently, only deployment is supported. | string | `"deployment"` |

charts/nginx-gateway-fabric/templates/clusterrole.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,22 @@ rules:
147147
{{- end }}
148148
verbs:
149149
- update
150+
{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
151+
- apiGroups:
152+
- inference.networking.k8s.io
153+
resources:
154+
- inferencepools
155+
verbs:
156+
- get
157+
- list
158+
- watch
159+
- apiGroups:
160+
- inference.networking.k8s.io
161+
resources:
162+
- inferencepools/status
163+
verbs:
164+
- update
165+
{{- end }}
150166
{{- if .Values.nginxGateway.leaderElection.enable }}
151167
- apiGroups:
152168
- coordination.k8s.io

charts/nginx-gateway-fabric/templates/deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ spec:
100100
{{- if .Values.nginxGateway.gwAPIExperimentalFeatures.enable }}
101101
- --gateway-api-experimental-features
102102
{{- end }}
103+
{{- if .Values.nginxGateway.gwAPIInferenceExtension.enable }}
104+
- --gateway-api-inference-extension
105+
{{- end }}
103106
{{- if .Values.nginxGateway.snippetsFilters.enable }}
104107
- --snippets-filters
105108
{{- end }}

charts/nginx-gateway-fabric/values.schema.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,20 @@
838838
"title": "gwAPIExperimentalFeatures",
839839
"type": "object"
840840
},
841+
"gwAPIInferenceExtension": {
842+
"properties": {
843+
"enable": {
844+
"default": false,
845+
"description": "Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.",
846+
"required": [],
847+
"title": "enable",
848+
"type": "boolean"
849+
}
850+
},
851+
"required": [],
852+
"title": "gwAPIInferenceExtension",
853+
"type": "object"
854+
},
841855
"image": {
842856
"description": "The image configuration for the NGINX Gateway Fabric control plane.",
843857
"properties": {

charts/nginx-gateway-fabric/values.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ nginxGateway:
210210
# APIs installed from the experimental channel.
211211
enable: false
212212

213+
gwAPIInferenceExtension:
214+
# -- Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route traffic to AI workloads.
215+
enable: false
216+
213217
snippetsFilters:
214218
# -- Enable SnippetsFilters feature. SnippetsFilters allow inserting NGINX configuration into the generated NGINX
215219
# config for HTTPRoute and GRPCRoute resources.

cmd/gateway/commands.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ func createControllerCommand() *cobra.Command {
8585
leaderElectionLockNameFlag = "leader-election-lock-name"
8686
productTelemetryDisableFlag = "product-telemetry-disable"
8787
gwAPIExperimentalFlag = "gateway-api-experimental-features"
88+
gwAPIInferenceExtensionFlag = "gateway-api-inference-extension"
8889
nginxDockerSecretFlag = "nginx-docker-secret" //nolint:gosec // not credentials
8990
usageReportSecretFlag = "usage-report-secret"
9091
usageReportEndpointFlag = "usage-report-endpoint"
@@ -151,6 +152,7 @@ func createControllerCommand() *cobra.Command {
151152
}
152153

153154
gwExperimentalFeatures bool
155+
gwInferenceExtension bool
154156

155157
disableProductTelemetry bool
156158

@@ -270,6 +272,7 @@ func createControllerCommand() *cobra.Command {
270272
},
271273
Plus: plus,
272274
ExperimentalFeatures: gwExperimentalFeatures,
275+
InferenceExtension: gwInferenceExtension,
273276
ImageSource: imageSource,
274277
Flags: config.Flags{
275278
Names: flagKeys,
@@ -430,6 +433,14 @@ func createControllerCommand() *cobra.Command {
430433
"Requires the Gateway APIs installed from the experimental channel.",
431434
)
432435

436+
cmd.Flags().BoolVar(
437+
&gwInferenceExtension,
438+
gwAPIInferenceExtensionFlag,
439+
false,
440+
"Enable Gateway API Inference Extension support. Allows for configuring InferencePools to route "+
441+
"traffic to AI workloads.",
442+
)
443+
433444
cmd.Flags().Var(
434445
&nginxDockerSecrets,
435446
nginxDockerSecretFlag,
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: kustomize.config.k8s.io/v1beta1
2+
kind: Kustomization
3+
resources:
4+
- https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd?timeout=120&ref=v1.0.0

0 commit comments

Comments
 (0)