This repository was archived by the owner on May 15, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 7 files changed +169
-0
lines changed
deploy/components/inference-gateway Expand file tree Collapse file tree 7 files changed +169
-0
lines changed Original file line number Diff line number Diff line change 1+ apiVersion : v1
2+ kind : ConfigMap
3+ metadata :
4+ name : endpoint-picker-config
5+ data :
6+ config.yaml : |
7+ pod_selector:
8+ ai-aware-router-pod: true
9+ routing_filters:
10+ routing_scorers:
11+ - name: session-affinity
12+ weight: 60
13+ - name: route-by-active-lora
14+ weight: 50
15+ routing_header: x-ai-aware-router-routing
16+ session_id_header: x-ai-aware-router-session-id
17+ listening_port: 9080
18+ inference_port: 8000
Original file line number Diff line number Diff line change 1+ apiVersion : apps/v1
2+ kind : Deployment
3+ metadata :
4+ name : endpoint-picker
5+ spec :
6+ replicas : 1
7+ selector :
8+ matchLabels :
9+ app : endpoint-picker
10+ template :
11+ metadata :
12+ labels :
13+ app : endpoint-picker
14+ spec :
15+ serviceAccountName : endpoint-picker
16+ containers :
17+ - name : endpoint-picker
18+ image : inference-router/router-ext-proc:latest
19+ args :
20+ - " --config-file"
21+ - " /etc/endpoint-picker/config.yaml"
22+ ports :
23+ - name : grpc
24+ containerPort : 9080
25+ protocol : TCP
26+ volumeMounts :
27+ - name : endpoint-picker-config
28+ mountPath : /etc/endpoint-picker
29+ volumes :
30+ - name : endpoint-picker-config
31+ configMap :
32+ name : endpoint-picker-config
Original file line number Diff line number Diff line change 1+ apiVersion : networking.istio.io/v1alpha3
2+ kind : EnvoyFilter
3+ metadata :
4+ name : endpoint-picker
5+ spec :
6+ configPatches :
7+ - applyTo : HTTP_FILTER
8+ match :
9+ listener :
10+ filterChain :
11+ filter :
12+ name : " envoy.filters.network.http_connection_manager"
13+ patch :
14+ operation : INSERT_FIRST
15+ value :
16+ name : envoy.filters.http.ext_proc
17+ typed_config :
18+ " @type " : type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
19+ failure_mode_allow : false
20+ allow_mode_override : true
21+ processing_mode :
22+ request_header_mode : " SEND"
23+ response_header_mode : " SEND"
24+ request_body_mode : " BUFFERED"
25+ response_body_mode : " BUFFERED"
26+ request_trailer_mode : " SEND"
27+ response_trailer_mode : " SKIP"
28+ grpc_service :
29+ envoy_grpc :
30+ cluster_name : outbound|9080||endpoint-picker.REPLACE_NAMESPACE.svc.cluster.local
31+ timeout : 5s
Original file line number Diff line number Diff line change 1+ apiVersion : gateway.networking.k8s.io/v1
2+ kind : Gateway
3+ metadata :
4+ name : inference-gateway
5+ labels :
6+ istio.io/rev : istio-control-plane
7+ annotations :
8+ networking.istio.io/service-type : ClusterIP
9+ spec :
10+ gatewayClassName : istio
11+ listeners :
12+ - name : default
13+ port : 80
14+ protocol : HTTP
Original file line number Diff line number Diff line change 1+ # ------------------------------------------------------------------------------
2+ # Inference Gateway
3+ #
4+ # This deploys a Gateway and the Endpoint Picker (EPP), and attaches the EPP to
5+ # the Gateway with an EnvoyFilter.
6+ #
7+ # Add an HTTPRoute to route traffic to VLLM, or a VLLM simulator.
8+ #
9+ # **WARNING**: The EnvoyFilter contains a variable that needs to be replaced
10+ # with the namespace to match the EPP's Service. For now use sed to replace it,
11+ # e.g.:
12+ #
13+ # $ kubectl kustomize deploy/components/inference-gateway \
14+ # | sed 's/REPLACE_NAMESPACE/mynamespace/gI' \
15+ # | kubectl -n mynamespace apply -f -
16+ # ------------------------------------------------------------------------------
17+ apiVersion : kustomize.config.k8s.io/v1beta1
18+ kind : Kustomization
19+
20+ resources :
21+ - configmaps.yaml
22+ - deployments.yaml
23+ - services.yaml
24+ - rbac.yaml
25+ - gateways.yaml
26+ - envoy-filters.yaml
27+
28+ images :
29+ - name : inference-router/router-ext-proc
30+ newTag : 0.0.1
Original file line number Diff line number Diff line change 1+ apiVersion : v1
2+ kind : ServiceAccount
3+ metadata :
4+ name : endpoint-picker
5+ ---
6+ apiVersion : rbac.authorization.k8s.io/v1
7+ kind : Role
8+ metadata :
9+ name : endpoint-picker
10+ rules :
11+ - apiGroups :
12+ - " "
13+ resources :
14+ - " pods"
15+ verbs :
16+ - " get"
17+ - " list"
18+ - " watch"
19+ ---
20+ apiVersion : rbac.authorization.k8s.io/v1
21+ kind : RoleBinding
22+ metadata :
23+ name : endpoint-picker-binding
24+ subjects :
25+ - kind : ServiceAccount
26+ name : endpoint-picker
27+ roleRef :
28+ kind : Role
29+ name : endpoint-picker
30+ apiGroup : rbac.authorization.k8s.io
31+
Original file line number Diff line number Diff line change 1+ apiVersion : v1
2+ kind : Service
3+ metadata :
4+ name : endpoint-picker
5+ spec :
6+ type : ClusterIP
7+ selector :
8+ app : endpoint-picker
9+ ports :
10+ - name : grpc
11+ protocol : TCP
12+ port : 9080
13+ targetPort : 9080
You can’t perform that action at this time.
0 commit comments