From 5e36653bfc62d70fe382178ad6f74ac318d3672e Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Sat, 28 Dec 2024 23:07:22 -0800 Subject: [PATCH 1/3] Fix the outdated fields in inference pool Signed-off-by: Jiaxin Shan --- examples/poc/manifests/inferencepool-with-model.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/poc/manifests/inferencepool-with-model.yaml b/examples/poc/manifests/inferencepool-with-model.yaml index 329a90e28..bd3aaa423 100644 --- a/examples/poc/manifests/inferencepool-with-model.yaml +++ b/examples/poc/manifests/inferencepool-with-model.yaml @@ -4,8 +4,8 @@ metadata: labels: name: vllm-llama2-7b-pool spec: - targetPort: 8000 - modelServerSelector: + targetPortNumber: 8000 + selector: "app": "vllm-llama2-7b-pool" --- apiVersion: inference.networking.x-k8s.io/v1alpha1 From 423e412dbb6f97cf830c7fc43e2f208b8176ccf9 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Sat, 28 Dec 2024 23:12:35 -0800 Subject: [PATCH 2/3] fix model routing configuration issues Signed-off-by: Jiaxin Shan --- examples/poc/manifests/inferencepool-with-model.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/poc/manifests/inferencepool-with-model.yaml b/examples/poc/manifests/inferencepool-with-model.yaml index bd3aaa423..f05823eaa 100644 --- a/examples/poc/manifests/inferencepool-with-model.yaml +++ b/examples/poc/manifests/inferencepool-with-model.yaml @@ -16,7 +16,7 @@ metadata: app.kubernetes.io/managed-by: kustomize name: inferencemodel-sample spec: - modelName: sql-lora + modelName: tweet-summary criticality: Critical poolRef: # this is the default val: @@ -25,6 +25,8 @@ spec: kind: InferencePool name: vllm-llama2-7b-pool targetModels: - - name: sql-lora-1fdg2 - weight: 100 + - name: tweet-summary-0 + weight: 50 + - name: tweet-summary-1 + weight: 50 From 7f9f860b9c685b1fa7cf371464eeb33d21ea8c62 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Sat, 28 Dec 2024 23:13:15 -0800 Subject: [PATCH 3/3] Update to use latest gateway name Signed-off-by: Jiaxin Shan --- pkg/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/README.md b/pkg/README.md index 68d2378ae..7f2550378 100644 --- a/pkg/README.md +++ b/pkg/README.md @@ -43,7 +43,7 @@ The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy. Wait until the gateway is ready. ```bash - IP=$(kubectl get gateway/instance-gateway -o jsonpath='{.status.addresses[0].value}') + IP=$(kubectl get gateway/inference-gateway -o jsonpath='{.status.addresses[0].value}') PORT=8081 curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{