more

Xunzhuo · Xunzhuo · commit d2eeec1416eb · 2025-10-13T20:42:56.000+08:00
Signed-off-by: bitliu &lt;bitliu@tencent.com&gt;
diff --git a/website/docs/installation/configuration.md b/website/docs/installation/configuration.md
@@ -159,10 +159,13 @@ Configure your LLM servers:
 vllm_endpoints:
   - name: "my_endpoint"
     address: "127.0.0.1"  # Your server IP - MUST be IP address format
-    port: 8000                # Your server port
-    models:
-      - "llama2-7b"          # Model name - must match vLLM --served-model-name
-    weight: 1                 # Load balancing weight
+    port: 8000            # Your server port
+    weight: 1             # Load balancing weight
+
+# Model configuration - maps models to endpoints
+model_config:
+  "llama2-7b":            # Model name - must match vLLM --served-model-name
+    preferred_endpoints: ["my_endpoint"]
 ```
 
 #### Address Format Requirements
@@ -204,11 +207,12 @@ The model names in the `models` array must **exactly match** the `--served-model
 # vLLM server command:
 vllm serve meta-llama/Llama-2-7b-hf --served-model-name llama2-7b
 
-# config.yaml must use the same name:
-vllm_endpoints:
-  - models: ["llama2-7b"]  # ✅ Matches --served-model-name
-
+# config.yaml must reference the model in model_config:
 model_config:
+  "llama2-7b":  # ✅ Matches --served-model-name
+    preferred_endpoints: ["your-endpoint"]
+
+vllm_endpoints:
   "llama2-7b":             # ✅ Matches --served-model-name
     # ... configuration
 ```
@@ -683,12 +687,10 @@ vllm_endpoints:
   - name: "math_endpoint"
     address: "192.168.1.10"  # Math server IP
     port: 8000
-    models: ["math-model"]
     weight: 1
   - name: "general_endpoint"
     address: "192.168.1.20"  # General server IP
     port: 8000
-    models: ["general-model"]
     weight: 1
 
 categories:
@@ -711,12 +713,10 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "192.168.1.30"  # Primary server IP
     port: 8000
-    models: ["my-model"]
     weight: 2              # Higher weight = more traffic
   - name: "endpoint2"
     address: "192.168.1.31"  # Secondary server IP
     port: 8000
-    models: ["my-model"]
     weight: 1
 ```
 
diff --git a/website/docs/installation/installation.md b/website/docs/installation/installation.md
@@ -109,13 +109,11 @@ Edit `config/config.yaml` to point to your LLM endpoints:
 vllm_endpoints:
   - name: "your-endpoint"
     address: "127.0.0.1"        # MUST be IP address (IPv4 or IPv6)
-    port: 11434                     # Replace with your port
-    models:
-      - "your-model-name"           # Replace with your model
+    port: 11434                 # Replace with your port
     weight: 1
 
 model_config:
-  "your-model-name":
+  "your-model-name":            # Replace with your model name
     pii_policy:
       allow_by_default: false  # Deny all PII by default
       pii_types_allowed: ["EMAIL_ADDRESS", "PERSON", "GPE", "PHONE_NUMBER"]  # Only allow these specific PII types
@@ -146,11 +144,12 @@ The model name in your configuration **must exactly match** the `--served-model-
 # When starting vLLM server:
 vllm serve microsoft/phi-4 --port 11434 --served-model-name your-model-name
 
-# The config.yaml must use the same name:
-vllm_endpoints:
-  - models: ["your-model-name"]  # ✅ Must match --served-model-name
-
+# The config.yaml must reference the model in model_config:
 model_config:
+  "your-model-name":  # ✅ Must match --served-model-name
+    preferred_endpoints: ["your-endpoint"]
+
+vllm_endpoints:
   "your-model-name":             # ✅ Must match --served-model-name
     # ... configuration
 ```
diff --git a/website/docs/training/model-performance-eval.md b/website/docs/training/model-performance-eval.md
@@ -69,11 +69,9 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 11434
-    models: ["phi4"]          # ✅ Matches --served_model_name phi4
   - name: "endpoint2"
     address: "127.0.0.1"
     port: 11435
-    models: ["qwen3-0.6B"]    # ✅ Matches --served_model_name qwen3-0.6B
 
 model_config:
   "phi4":                     # ✅ Matches --served_model_name phi4
diff --git a/website/docs/tutorials/content-safety/pii-detection.md b/website/docs/tutorials/content-safety/pii-detection.md
@@ -61,11 +61,9 @@ vllm_endpoints:
   - name: secure-model
     address: "127.0.0.1"
     port: 8080
-    models: ["secure-llm"]
   - name: general-model
     address: "127.0.0.1"
     port: 8081
-    models: ["general-llm"]
 
 # Model-specific configurations
 model_config:
diff --git a/website/docs/tutorials/intelligent-route/reasoning.md b/website/docs/tutorials/intelligent-route/reasoning.md
@@ -34,7 +34,6 @@ vllm_endpoints:
   - name: "endpoint1"
     address: "127.0.0.1"
     port: 8000
-    models: ["deepseek-v31", "qwen3-30b", "openai/gpt-oss-20b"]  # Must match --served-model-name
     weight: 1
 
 # Reasoning family configurations (how to express reasoning for a family)