nginx · salonichf5 · Oct 17, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -451,6 +451,7 @@ jobs:
       build-os: ${{ matrix.build-os }}
       production-release: ${{ inputs.is_production_release == true && (inputs.dry_run == false || inputs.dry_run == null) }}
       release_version: ${{ inputs.release_version }}
+      enable-inference-extension: true
     secrets: inherit
     permissions:
       contents: write

diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml
@@ -16,6 +16,9 @@ on:
       enable-experimental:
         required: true
         type: boolean
+      enable-inference-extension:
+        required: true
+        type: boolean
       production-release:
         required: false
         type: boolean
@@ -32,6 +35,7 @@ defaults:
 env:
   PLUS_USAGE_ENDPOINT: ${{ secrets.JWT_PLUS_REPORTING_ENDPOINT }}
   ENABLE_EXPERIMENTAL: ${{ inputs.enable-experimental }}
+  ENABLE_INFERENCE_EXTENSION: ${{ inputs.enable-inference-extension }}
 
 permissions:
   contents: read
@@ -194,3 +198,24 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: gh release upload ${{ github.ref_name }} conformance-profile.yaml --clobber
         working-directory: ./tests
+
+      - name: Run inference conformance tests
+        run: |
+          make run-inference-conformance-tests CONFORMANCE_TAG=${{ github.sha }} NGF_VERSION=${{ github.ref_name }} CLUSTER_NAME=${{ github.run_id }}
+          core_result=$(cat conformance-profile-inference.yaml | yq '.profiles[0].core.result')
+          if [ "${core_result}" == "failure" ] ]; then echo "Inference Conformance test failed, see above for details." && exit 2; fi
+        working-directory: ./tests
+
+      - name: Upload profile to GitHub
+        if: ${{ inputs.enable-experimental }} # add experimental flag to filter result upload
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: conformance-profile-inference-${{ inputs.image }}-${{ inputs.k8s-version }}-${{ steps.ngf-meta.outputs.version }}-${{ github.run_id }}
+          path: ./tests/conformance-profile-inference.yaml
+
+      - name: Upload profile to release
+        if: ${{ inputs.production-release && inputs.enable-experimental }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: gh release upload ${{ github.ref_name }} conformance-profile-inference.yaml --clobber
+        working-directory: ./tests
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@
 cover.html
 cmd-cover.html
 conformance-profile.yaml
+conformance-profile-inference.yaml
 
 # Dependency directories (remove the comment below to include it)
 # vendor/

diff --git a/Makefile b/Makefile
@@ -15,6 +15,7 @@ TELEMETRY_ENDPOINT=# if empty, NGF will report telemetry in its logs at debug le
 TELEMETRY_ENDPOINT_INSECURE = false
 
 ENABLE_EXPERIMENTAL ?= false
+ENABLE_INFERENCE_EXTENSION ?= false
 
 # go build flags - should not be overridden by the user
 GO_LINKER_FlAGS_VARS = -X main.version=${VERSION} -X main.telemetryReportPeriod=${TELEMETRY_REPORT_PERIOD} -X main.telemetryEndpoint=${TELEMETRY_ENDPOINT} -X main.telemetryEndpointInsecure=${TELEMETRY_ENDPOINT_INSECURE}
@@ -237,10 +238,16 @@ install-ngf-local-build-with-plus: check-for-plus-usage-endpoint build-images-wi
 
 .PHONY: helm-install-local
 helm-install-local: install-gateway-crds ## Helm install NGF on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build.
-	helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PREFIX) --create-namespace --wait --set nginxGateway.image.pullPolicy=$(PULL_POLICY) --set nginx.service.type=$(NGINX_SERVICE_TYPE) --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=$(PULL_POLICY) --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway $(HELM_PARAMETERS)
+	@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
+		$(MAKE) install-inference-crds; \
+	fi
+	helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PREFIX) --create-namespace --wait --set nginxGateway.image.pullPolicy=Never --set nginx.service.type=NodePort --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=Never --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway $(HELM_PARAMETERS)
 
 .PHONY: helm-install-local-with-plus
 helm-install-local-with-plus: check-for-plus-usage-endpoint install-gateway-crds ## Helm install NGF with NGINX Plus on configured kind cluster with local images. To build, load, and install with helm run make install-ngf-local-build-with-plus.
+	@if [ "$(ENABLE_INFERENCE_EXTENSION)" = "true" ]; then \
+		$(MAKE) install-inference-crds; \
+	fi
 	kubectl create namespace nginx-gateway || true
 	kubectl -n nginx-gateway create secret generic nplus-license --from-file $(PLUS_LICENSE_FILE) || true
 	helm install nginx-gateway $(CHART_DIR) --set nginx.image.repository=$(NGINX_PLUS_PREFIX) --wait --set nginxGateway.image.pullPolicy=$(PULL_POLICY) --set nginx.service.type=$(NGINX_SERVICE_TYPE) --set nginxGateway.image.repository=$(PREFIX) --set nginxGateway.image.tag=$(TAG) --set nginx.image.tag=$(TAG) --set nginx.image.pullPolicy=$(PULL_POLICY) --set nginxGateway.gwAPIExperimentalFeatures.enable=$(ENABLE_EXPERIMENTAL) -n nginx-gateway --set nginx.plus=true --set nginx.usage.endpoint=$(PLUS_USAGE_ENDPOINT) $(HELM_PARAMETERS)

diff --git a/cmd/gateway/endpoint_picker.go b/cmd/gateway/endpoint_picker.go
@@ -1,17 +1,20 @@
 package main
 
 import (
+	"crypto/tls"
 	"errors"
 	"fmt"
 	"io"
 	"net"
 	"net/http"
+	"strings"
 	"time"
 
 	corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
 	extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
 	"github.com/go-logr/logr"
 	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
 	"google.golang.org/grpc/credentials/insecure"
 	eppMetadata "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metadata"
 
@@ -34,7 +37,19 @@ func endpointPickerServer(handler http.Handler) error {
 // realExtProcClientFactory returns a factory that creates a new gRPC connection and client per request.
 func realExtProcClientFactory() extProcClientFactory {
 	return func(target string) (extprocv3.ExternalProcessorClient, func() error, error) {
-		conn, err := grpc.NewClient(target, grpc.WithTransportCredentials(insecure.NewCredentials()))
+		var opts []grpc.DialOption
+		enableTLS := true
+		insecureSkipVerify := true
+
+		if !enableTLS {
+			opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials()))
+		} else {
+			creds := credentials.NewTLS(&tls.Config{
+				InsecureSkipVerify: insecureSkipVerify, //nolint:gosec
+			})
+			opts = append(opts, grpc.WithTransportCredentials(creds))
+		}
+		conn, err := grpc.NewClient(target, opts...)
 		if err != nil {
 			return nil, nil, err
 		}
@@ -148,8 +163,15 @@ func buildHeaderRequest(r *http.Request) *extprocv3.ProcessingRequest {
 
 	for key, values := range r.Header {
 		for _, value := range values {
+			// Normalize header keys to lowercase for case-insensitive matching.
+			// This addresses the mismatch between Go's default HTTP header normalization (Title-Case)
+			// and EPP's expectation of lowercase header keys. Additionally, HTTP/2 — which gRPC uses —
+			// requires all header field names to be lowercase as specified in RFC 7540, Section 8.1.2:
+			// https://datatracker.ietf.org/doc/html/rfc7540#section-8.1.2
+			normalizedKey := strings.ToLower(key)
+
 			headerMap.Headers = append(headerMap.Headers, &corev3.HeaderValue{
-				Key:   key,
+				Key:   normalizedKey,
 				Value: value,
 			})
 		}

diff --git a/internal/controller/nginx/conf/nginx-plus.conf b/internal/controller/nginx/conf/nginx-plus.conf
@@ -12,8 +12,8 @@ events {
 http {
   include /etc/nginx/conf.d/*.conf;
   include /etc/nginx/mime.types;
-  js_import /usr/lib/nginx/modules/njs/httpmatches.js;
-  js_import /usr/lib/nginx/modules/njs/epp.js;
+  js_import modules/njs/httpmatches.js;
+  js_import modules/njs/epp.js;
 
   default_type application/octet-stream;
 

diff --git a/internal/controller/nginx/conf/nginx.conf b/internal/controller/nginx/conf/nginx.conf
@@ -12,8 +12,8 @@ events {
 http {
   include /etc/nginx/conf.d/*.conf;
   include /etc/nginx/mime.types;
-  js_import /usr/lib/nginx/modules/njs/httpmatches.js;
-  js_import /usr/lib/nginx/modules/njs/epp.js;
+  js_import modules/njs/httpmatches.js;
+  js_import modules/njs/epp.js;
 
   default_type application/octet-stream;
 

diff --git a/internal/controller/nginx/config/maps.go b/internal/controller/nginx/config/maps.go
@@ -186,37 +186,57 @@ func createAddHeadersMap(name string) shared.Map {
 // buildInferenceMaps creates maps for InferencePool Backends.
 func buildInferenceMaps(groups []dataplane.BackendGroup) []shared.Map {
 	inferenceMaps := make([]shared.Map, 0, len(groups))
+
 	for _, group := range groups {
 		for _, backend := range group.Backends {
-			if backend.EndpointPickerConfig != nil {
-				var defaultResult string
-				switch backend.EndpointPickerConfig.FailureMode {
-				// in FailClose mode, if the EPP is unavailable or returns an error,
-				// we return an invalid backend to ensure the request fails
-				case inference.EndpointPickerFailClose:
-					defaultResult = invalidBackendRef
-				// in FailOpen mode, if the EPP is unavailable or returns an error,
-				// we fall back to the upstream
-				case inference.EndpointPickerFailOpen:
-					defaultResult = backend.UpstreamName
-				}
-				params := []shared.MapParameter{
-					{
-						Value:  "~.+",
-						Result: "$inference_workload_endpoint",
-					},
-					{
-						Value:  "default",
-						Result: defaultResult,
-					},
-				}
-				backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_")
-				inferenceMaps = append(inferenceMaps, shared.Map{
-					Source:     "$inference_workload_endpoint",
-					Variable:   fmt.Sprintf("$inference_backend_%s", backendVarName),
-					Parameters: params,
-				})
+			if backend.EndpointPickerConfig == nil || backend.EndpointPickerConfig.EndpointPickerRef == nil {
+				continue
+			}
+
+			// Decide what the map must return when the picker didn’t set a value.
+			var defaultResult string
+			switch backend.EndpointPickerConfig.EndpointPickerRef.FailureMode {
+			// in FailClose mode, if the EPP is unavailable or returns an error,
+			// we return an invalid backend to ensure the request fails
+			case inference.EndpointPickerFailClose:
+				defaultResult = invalidBackendRef
+
+			// in FailOpen mode, if the EPP is unavailable or returns an error,
+			// we fall back to the upstream
+			case inference.EndpointPickerFailOpen:
+				defaultResult = backend.UpstreamName
 			}
+
+			// Build the ordered parameter list.
+			params := make([]shared.MapParameter, 0, 3)
+
+			// no endpoint picked by EPP go to inference pool directly
+			params = append(params, shared.MapParameter{
+				Value:  `""`,
+				Result: backend.UpstreamName,
+			})
+
+			// endpoint picked by the EPP is stored in $inference_workload_endpoint.
+			params = append(params, shared.MapParameter{
+				Value:  `~.+`,
+				Result: `$inference_workload_endpoint`,
+			})
+
+			// this is set based on EPP failure mode,
+			// if EPP is failOpen, we set the default to the inference pool upstream,
+			// if EPP is failClose, we set the default to invalidBackendRef.
+			params = append(params, shared.MapParameter{
+				Value:  "default",
+				Result: defaultResult,
+			})
+
+			backendVarName := strings.ReplaceAll(backend.UpstreamName, "-", "_")
+
+			inferenceMaps = append(inferenceMaps, shared.Map{
+				Source:     `$inference_workload_endpoint`,
+				Variable:   fmt.Sprintf("$inference_backend_%s", backendVarName),
+				Parameters: params,
+			})
 		}
 	}
 	return inferenceMaps

diff --git a/internal/controller/nginx/config/maps_test.go b/internal/controller/nginx/config/maps_test.go
@@ -73,8 +73,11 @@ func TestExecuteMaps(t *testing.T) {
 				Backends: []dataplane.Backend{
 					{
 						UpstreamName: "upstream1",
-						EndpointPickerConfig: &inference.EndpointPickerRef{
-							FailureMode: inference.EndpointPickerFailClose,
+						EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+							NsName: "default",
+							EndpointPickerRef: &inference.EndpointPickerRef{
+								FailureMode: inference.EndpointPickerFailClose,
+							},
 						},
 					},
 				},
@@ -400,14 +403,20 @@ func TestBuildInferenceMaps(t *testing.T) {
 		Backends: []dataplane.Backend{
 			{
 				UpstreamName: "upstream1",
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					FailureMode: inference.EndpointPickerFailClose,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					NsName: "default",
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						FailureMode: inference.EndpointPickerFailClose,
+					},
 				},
 			},
 			{
 				UpstreamName: "upstream2",
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					FailureMode: inference.EndpointPickerFailOpen,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					NsName: "default",
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						FailureMode: inference.EndpointPickerFailOpen,
+					},
 				},
 			},
 			{
@@ -421,6 +430,22 @@ func TestBuildInferenceMaps(t *testing.T) {
 	g.Expect(maps).To(HaveLen(2))
 	g.Expect(maps[0].Source).To(Equal("$inference_workload_endpoint"))
 	g.Expect(maps[0].Variable).To(Equal("$inference_backend_upstream1"))
-	g.Expect(maps[0].Parameters[1].Result).To(Equal("invalid-backend-ref"))
-	g.Expect(maps[1].Parameters[1].Result).To(Equal("upstream2"))
+	g.Expect(maps[0].Parameters).To(HaveLen(3))
+	g.Expect(maps[0].Parameters[0].Value).To(Equal("\"\""))
+	g.Expect(maps[0].Parameters[0].Result).To(Equal("upstream1"))
+	g.Expect(maps[0].Parameters[1].Value).To(Equal("~.+"))
+	g.Expect(maps[0].Parameters[1].Result).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[0].Parameters[2].Value).To(Equal("default"))
+	g.Expect(maps[0].Parameters[2].Result).To(Equal("invalid-backend-ref"))
+
+	// Check the second map
+	g.Expect(maps[1].Source).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[1].Variable).To(Equal("$inference_backend_upstream2"))
+	g.Expect(maps[1].Parameters).To(HaveLen(3))
+	g.Expect(maps[1].Parameters[0].Value).To(Equal("\"\""))
+	g.Expect(maps[1].Parameters[0].Result).To(Equal("upstream2"))
+	g.Expect(maps[1].Parameters[1].Value).To(Equal("~.+"))
+	g.Expect(maps[1].Parameters[1].Result).To(Equal("$inference_workload_endpoint"))
+	g.Expect(maps[1].Parameters[2].Value).To(Equal("default"))
+	g.Expect(maps[1].Parameters[2].Result).To(Equal("upstream2"))
 }
diff --git a/internal/controller/nginx/config/servers.go b/internal/controller/nginx/config/servers.go
@@ -452,13 +452,18 @@ func createInternalLocationsForRule(
 			intLocation, match = initializeInternalMatchLocationWithInference(pathRuleIdx, matchRuleIdx, r.Match)
 			intInfLocation := initializeInternalInferenceRedirectLocation(pathRuleIdx, matchRuleIdx)
 			for _, b := range r.BackendGroup.Backends {
-				if b.EndpointPickerConfig != nil {
+				if b.EndpointPickerConfig != nil && b.EndpointPickerConfig.EndpointPickerRef != nil {
+					eppRef := b.EndpointPickerConfig.EndpointPickerRef
 					var portNum int
-					if b.EndpointPickerConfig.Port != nil {
-						portNum = int(b.EndpointPickerConfig.Port.Number)
+					if eppRef.Port != nil {
+						portNum = int(eppRef.Port.Number)
 					}
 					intInfLocation.EPPInternalPath = intLocation.Path
-					intInfLocation.EPPHost = string(b.EndpointPickerConfig.Name)
+					if b.EndpointPickerConfig.NsName != "" {
+						intInfLocation.EPPHost = string(eppRef.Name) + "." + b.EndpointPickerConfig.NsName
+					} else {
+						intInfLocation.EPPHost = string(eppRef.Name)
+					}
 					intInfLocation.EPPPort = portNum
 				}
 			}
@@ -506,14 +511,19 @@ func createInferenceLocationsForRule(
 			mirrorPercentage,
 		)
 		for _, b := range r.BackendGroup.Backends {
-			if b.EndpointPickerConfig != nil {
+			if b.EndpointPickerConfig != nil && b.EndpointPickerConfig.EndpointPickerRef != nil {
 				for i := range extLocations {
+					eppRef := b.EndpointPickerConfig.EndpointPickerRef
 					var portNum int
-					if b.EndpointPickerConfig.Port != nil {
-						portNum = int(b.EndpointPickerConfig.Port.Number)
+					if eppRef.Port != nil {
+						portNum = int(eppRef.Port.Number)
 					}
 					extLocations[i].EPPInternalPath = intLocation.Path
-					extLocations[i].EPPHost = string(b.EndpointPickerConfig.Name)
+					if b.EndpointPickerConfig.NsName != "" {
+						extLocations[i].EPPHost = string(eppRef.Name) + "." + b.EndpointPickerConfig.NsName
+					} else {
+						extLocations[i].EPPHost = string(eppRef.Name)
+					}
 					extLocations[i].EPPPort = portNum
 				}
 			}

diff --git a/internal/controller/nginx/config/servers_test.go b/internal/controller/nginx/config/servers_test.go
@@ -2457,11 +2457,14 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 				UpstreamName: "test_foo_80",
 				Valid:        true,
 				Weight:       1,
-				EndpointPickerConfig: &inference.EndpointPickerRef{
-					Name: "test-epp",
-					Port: &inference.Port{
-						Number: 80,
+				EndpointPickerConfig: &dataplane.EndpointPickerConfig{
+					EndpointPickerRef: &inference.EndpointPickerRef{
+						Name: "test-epp",
+						Port: &inference.Port{
+							Number: 80,
+						},
 					},
+					NsName: hrNsName.Namespace,
 				},
 			},
 		},
@@ -2522,7 +2525,7 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 					Path:            "= /inference",
 					Type:            http.InferenceExternalLocationType,
 					EPPInternalPath: "/_ngf-internal-rule0-route0-inference",
-					EPPHost:         "test-epp",
+					EPPHost:         "test-epp.test",
 					EPPPort:         80,
 				},
 				createDefaultRootLocation(),
@@ -2542,7 +2545,7 @@ func TestCreateLocations_InferenceBackends(t *testing.T) {
 					Path:            "/_ngf-internal-rule0-route0-inference",
 					Type:            http.InferenceInternalLocationType,
 					EPPInternalPath: "/_ngf-internal-rule0-route0",
-					EPPHost:         "test-epp",
+					EPPHost:         "test-epp.test",
 					EPPPort:         80,
 				},
 				{