kubernetes-sigs · k8s-ci-robot · Dec 30, 2024 · Dec 29, 2024 · Dec 29, 2024 · Dec 29, 2024
diff --git a/examples/poc/manifests/inferencepool-with-model.yaml b/examples/poc/manifests/inferencepool-with-model.yaml
@@ -4,8 +4,8 @@ metadata:
   labels:
   name: vllm-llama2-7b-pool
 spec:
-  targetPort: 8000
-  modelServerSelector:
+  targetPortNumber: 8000
+  selector:
     "app": "vllm-llama2-7b-pool"
 ---
 apiVersion: inference.networking.x-k8s.io/v1alpha1
@@ -16,7 +16,7 @@ metadata:
     app.kubernetes.io/managed-by: kustomize
   name: inferencemodel-sample
 spec:
-  modelName: sql-lora
+  modelName: tweet-summary
   criticality: Critical
   poolRef:
     # this is the default val:
@@ -25,6 +25,8 @@ spec:
     kind: InferencePool
     name: vllm-llama2-7b-pool
   targetModels:
-  - name: sql-lora-1fdg2
-    weight: 100
+  - name: tweet-summary-0
+    weight: 50
+  - name: tweet-summary-1
+    weight: 50
 
diff --git a/pkg/README.md b/pkg/README.md
@@ -43,7 +43,7 @@ The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy.
    Wait until the gateway is ready.
 
    ```bash
-   IP=$(kubectl get gateway/instance-gateway -o jsonpath='{.status.addresses[0].value}')
+   IP=$(kubectl get gateway/inference-gateway -o jsonpath='{.status.addresses[0].value}')
    PORT=8081
 
    curl -i ${IP}:${PORT}/v1/completions -H 'Content-Type: application/json' -d '{