diff --git a/deploy/environments/dev/kind/gateway.yaml b/deploy/environments/dev/kind/gateway.yaml new file mode 100644 index 000000000..bf85e8269 --- /dev/null +++ b/deploy/environments/dev/kind/gateway.yaml @@ -0,0 +1,6 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: inference-gateway + annotations: + networking.istio.io/service-type: NodePort \ No newline at end of file diff --git a/deploy/environments/dev/kind/kustomization.yaml b/deploy/environments/dev/kind/kustomization.yaml index 57fa4b319..34d2a8210 100644 --- a/deploy/environments/dev/kind/kustomization.yaml +++ b/deploy/environments/dev/kind/kustomization.yaml @@ -13,6 +13,10 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: +- services.yaml - ../../../components/istio-control-plane/ - ../../../components/vllm-sim/ - ../../../components/inference-gateway/ + +patches: +- path: gateway.yaml \ No newline at end of file diff --git a/deploy/environments/dev/kind/services.yaml b/deploy/environments/dev/kind/services.yaml new file mode 100644 index 000000000..853bfcca0 --- /dev/null +++ b/deploy/environments/dev/kind/services.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + networking.istio.io/service-type: NodePort + labels: + gateway.istio.io/managed: istio.io-gateway-controller + gateway.networking.k8s.io/gateway-name: inference-gateway + istio.io/enable-inference-extproc: "true" + name: inference-gateway-istio + namespace: default +spec: + type: NodePort + selector: + gateway.networking.k8s.io/gateway-name: inference-gateway + ports: + - appProtocol: tcp + name: status-port + port: 15021 + protocol: TCP + targetPort: 15021 + nodePort: 32021 + - appProtocol: http + name: default + port: 80 + protocol: TCP + targetPort: 80 + nodePort: 30080 diff --git a/scripts/kind-dev-env.sh b/scripts/kind-dev-env.sh index 3cad7fbad..89eb11195 100755 --- a/scripts/kind-dev-env.sh +++ b/scripts/kind-dev-env.sh @@ -22,6 +22,9 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Set the namespace to deploy the Gateway stack to : "${PROJECT_NAMESPACE:=default}" +# Set the host port to map to the Gateway's inbound port (30080) +: "${GATEWAY_HOST_PORT:=30080}" + # ------------------------------------------------------------------------------ # Setup & Requirement Checks # ------------------------------------------------------------------------------ @@ -63,7 +66,16 @@ done if kind get clusters 2>/dev/null | grep -q "^${CLUSTER_NAME}$"; then echo "Cluster '${CLUSTER_NAME}' already exists, re-using" else - kind create cluster --name "${CLUSTER_NAME}" + kind create cluster --name "${CLUSTER_NAME}" --config - << EOF +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane + extraPortMappings: + - containerPort: 30080 + hostPort: ${GATEWAY_HOST_PORT} + protocol: TCP +EOF fi # Set the kubectl context to the kind cluster @@ -126,13 +138,9 @@ You can watch the Endpoint Picker logs with: $ kubectl --context ${KUBE_CONTEXT} logs -f deployments/endpoint-picker -You can use a port-forward to access the Gateway: - - $ kubectl --context ${KUBE_CONTEXT} port-forward service/inference-gateway-istio 8080:80 - With that running in the background, you can make requests: - $ curl -s -w '\n' http://localhost:8080/v1/completions -H 'Content-Type: application/json' -d '{"model":"food-review","prompt":"hi","max_tokens":10,"temperature":0}' | jq + $ curl -s -w '\n' http://localhost:${GATEWAY_HOST_PORT}/v1/completions -H 'Content-Type: application/json' -d '{"model":"food-review","prompt":"hi","max_tokens":10,"temperature":0}' | jq ----------------------------------------- EOF