Skip to content
This repository was archived by the owner on May 15, 2025. It is now read-only.

First iteration of development deployments & environments #4

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions deploy/components/inference-gateway/configmaps.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: endpoint-picker-config
data:
config.yaml: |
pod_selector:
ai-aware-router-pod: true
routing_filters:
routing_scorers:
- name: session-affinity
weight: 60
- name: route-by-active-lora
weight: 50
routing_header: x-ai-aware-router-routing
session_id_header: x-ai-aware-router-session-id
listening_port: 9080
inference_port: 8000
32 changes: 32 additions & 0 deletions deploy/components/inference-gateway/deployments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: endpoint-picker
spec:
replicas: 1
selector:
matchLabels:
app: endpoint-picker
template:
metadata:
labels:
app: endpoint-picker
spec:
serviceAccountName: endpoint-picker
containers:
- name: endpoint-picker
image: inference-router/router-ext-proc:latest
args:
- "--config-file"
- "/etc/endpoint-picker/config.yaml"
ports:
- name: grpc
containerPort: 9080
protocol: TCP
volumeMounts:
- name: endpoint-picker-config
mountPath: /etc/endpoint-picker
volumes:
- name: endpoint-picker-config
configMap:
name: endpoint-picker-config
31 changes: 31 additions & 0 deletions deploy/components/inference-gateway/envoy-filters.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: networking.istio.io/v1alpha3
kind: EnvoyFilter
metadata:
name: endpoint-picker
spec:
configPatches:
- applyTo: HTTP_FILTER
match:
listener:
filterChain:
filter:
name: "envoy.filters.network.http_connection_manager"
patch:
operation: INSERT_FIRST
value:
name: envoy.filters.http.ext_proc
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
failure_mode_allow: false
allow_mode_override: true
processing_mode:
request_header_mode: "SEND"
response_header_mode: "SEND"
request_body_mode: "BUFFERED"
response_body_mode: "BUFFERED"
request_trailer_mode: "SEND"
response_trailer_mode: "SKIP"
grpc_service:
envoy_grpc:
cluster_name: outbound|9080||endpoint-picker.REPLACE_NAMESPACE.svc.cluster.local
timeout: 5s
14 changes: 14 additions & 0 deletions deploy/components/inference-gateway/gateways.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: gateway.networking.k8s.io/v1
kind: Gateway
metadata:
name: inference-gateway
labels:
istio.io/rev: istio-control-plane
annotations:
networking.istio.io/service-type: ClusterIP
spec:
gatewayClassName: istio
listeners:
- name: default
port: 80
protocol: HTTP
30 changes: 30 additions & 0 deletions deploy/components/inference-gateway/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# ------------------------------------------------------------------------------
# Inference Gateway
#
# This deploys a Gateway and the Endpoint Picker (EPP), and attaches the EPP to
# the Gateway with an EnvoyFilter.
#
# Add an HTTPRoute to route traffic to VLLM, or a VLLM simulator.
#
# **WARNING**: The EnvoyFilter contains a variable that needs to be replaced
# with the namespace to match the EPP's Service. For now use sed to replace it,
# e.g.:
#
# $ kubectl kustomize deploy/components/inference-gateway \
# | sed 's/REPLACE_NAMESPACE/mynamespace/gI' \
# | kubectl -n mynamespace apply -f -
# ------------------------------------------------------------------------------
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- configmaps.yaml
- deployments.yaml
- services.yaml
- rbac.yaml
- gateways.yaml
- envoy-filters.yaml

images:
- name: inference-router/router-ext-proc
newTag: 0.0.1
31 changes: 31 additions & 0 deletions deploy/components/inference-gateway/rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: endpoint-picker
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: endpoint-picker
rules:
- apiGroups:
- ""
resources:
- "pods"
verbs:
- "get"
- "list"
- "watch"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: endpoint-picker-binding
subjects:
- kind: ServiceAccount
name: endpoint-picker
roleRef:
kind: Role
name: endpoint-picker
apiGroup: rbac.authorization.k8s.io

13 changes: 13 additions & 0 deletions deploy/components/inference-gateway/services.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: endpoint-picker
spec:
type: ClusterIP
selector:
app: endpoint-picker
ports:
- name: grpc
protocol: TCP
port: 9080
targetPort: 9080
13 changes: 13 additions & 0 deletions deploy/components/istio-control-plane/control-plane.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: sailoperator.io/v1
kind: Istio
metadata:
name: control-plane
spec:
version: v1.25-latest
values:
pilot:
resources:
requests:
cpu: 100m
memory: 1024Mi

15 changes: 15 additions & 0 deletions deploy/components/istio-control-plane/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# ------------------------------------------------------------------------------
# Istio Control Plane
#
# This deploys an Istio control-plane for the entire cluster. This enables the
# creation of Gateways.
# ------------------------------------------------------------------------------
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: istio-system
namePrefix: istio-

resources:
- namespaces.yaml
- control-plane.yaml
4 changes: 4 additions & 0 deletions deploy/components/istio-control-plane/namespaces.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: system
1 change: 1 addition & 0 deletions deploy/components/sail-operator/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
charts/
32 changes: 32 additions & 0 deletions deploy/components/sail-operator/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# ------------------------------------------------------------------------------
# Istio Sail Operator
#
# This deploys the Istio Sail Operator via Helm chart to enable the creation
# of Istio Control Planes, and ultimately Gateways. This will also deploy all
# the Istio and Gateway API CRDs.
#
# This is required on Kubernetes clusters, and OpenShift clusters versions
# below 4.19 (OpenShift 4.19+ includes all this by default).
#
# **Warning**: This needs to be deployed before, and separately from other
# components as it deploys CRDs. It can be deployed with:
#
# $ kubectl kustomize --enable-helm deploy/components/sail-operator/ \
# | kubectl apply --server-side --force-conflicts -f -
#
# ------------------------------------------------------------------------------
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: sail-operator

resources:
- https://github.com/kubernetes-sigs/gateway-api/config/crd?ref=v1.2.1
- namespaces.yaml

helmCharts:
- name: sail-operator
namespace: sail-operator
repo: https://istio-ecosystem.github.io/sail-operator
version: 1.25.1
includeCRDs: true
4 changes: 4 additions & 0 deletions deploy/components/sail-operator/namespaces.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: sail-operator
86 changes: 86 additions & 0 deletions deploy/components/vllm-sim/deployments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm-30801
labels:
app: vllm-30801
spec:
replicas: 1
selector:
matchLabels:
app: vllm-30801
template:
metadata:
labels:
app: vllm-30801
ai-aware-router-pod: "true"
annotations:
ai-aware-router-address: 127.0.0.1:30801
spec:
containers:
- name: vllm
image: vllm-sim/vllm-sim:latest
args:
- "--port=30801"
- "--model=model1"
- "--lora=lora1,lora2"
ports:
- containerPort: 30801
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm-30802
labels:
app: vllm-30802
spec:
replicas: 1
selector:
matchLabels:
app: vllm-30802
template:
metadata:
labels:
app: vllm-30802
ai-aware-router-pod: "true"
annotations:
ai-aware-router-address: 127.0.0.1:30802
spec:
containers:
- name: vllm
image: vllm-sim/vllm-sim:latest
args:
- "--port=30802"
- "--model=model1"
- "--lora=lora1,lora2"
ports:
- containerPort: 30802
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm-30803
labels:
app: vllm-30803
spec:
replicas: 1
selector:
matchLabels:
app: vllm-30803
template:
metadata:
labels:
app: vllm-30803
ai-aware-router-pod: "true"
annotations:
ai-aware-router-address: 127.0.0.1:30803
spec:
containers:
- name: vllm
image: vllm-sim/vllm-sim:latest
args:
- "--port=30803"
- "--model=model2"
- "--lora=lora3"
ports:
- containerPort: 30803
17 changes: 17 additions & 0 deletions deploy/components/vllm-sim/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# ------------------------------------------------------------------------------
# VLLM Simulator
#
# This deploys a VLLM simulator which can be used to simulate inference for
# small environments (e.g. Kubernetes In Docker (KIND) clusters) or for simple
# tests.
# ------------------------------------------------------------------------------
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- deployments.yaml
- services.yaml

images:
- name: vllm-sim/vllm-sim
newTag: 0.0.2
38 changes: 38 additions & 0 deletions deploy/components/vllm-sim/services.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
kind: Service
apiVersion: v1
metadata:
name: vllm-30801
spec:
type: ClusterIP
selector:
app: vllm-30801
ports:
- protocol: TCP
port: 30801
targetPort: 30801
---
kind: Service
apiVersion: v1
metadata:
name: vllm-30802
spec:
type: ClusterIP
selector:
app: vllm-30802
ports:
- protocol: TCP
port: 30802
targetPort: 30802
---
kind: Service
apiVersion: v1
metadata:
name: vllm-30803
spec:
type: ClusterIP
selector:
app: vllm-30803
ports:
- protocol: TCP
port: 30803
targetPort: 30803
Loading