@@ -147,18 +147,28 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
147
147
} ;
148
148
149
149
const snippetVllm = ( model : ModelData ) : LocalAppSnippet [ ] => {
150
+ const runCommand = [
151
+ "" ,
152
+ "# Call the server using curl:" ,
153
+ `curl -X POST "http://localhost:8000/v1/chat/completions" \\ ` ,
154
+ ` -H "Content-Type: application/json" \\ ` ,
155
+ ` --data '{` ,
156
+ ` "model": "${ model . id } "` ,
157
+ ` "messages": [` ,
158
+ ` {"role": "user", "content": "Hello!"}` ,
159
+ ` ]` ,
160
+ ` }'` ,
161
+ ] ;
150
162
return [
151
163
{
152
164
title : "Install from pip" ,
153
- setup : [ "# Install vLLM from pip" , "pip install vllm" ] . join ( "\n" ) ,
154
- content : [ "# Load and run the model:" , `python -m vllm.entrypoints.openai.api_server --model "${ model . id } "` ] . join (
155
- "\n"
156
- ) ,
165
+ setup : [ "# Install vLLM from pip:" , "pip install vllm" ] . join ( "\n" ) ,
166
+ content : [ "# Load and run the model:" , `vllm serve --model "${ model . id } "` , ...runCommand ] . join ( "\n" ) ,
157
167
} ,
158
168
{
159
169
title : "Use Docker images" ,
160
170
setup : [
161
- "# Deploy with linux and docker (needs Docker installed) a gated model (please, request access in Hugginface's model repo): " ,
171
+ "# Deploy with docker on Linux: " ,
162
172
`docker run --runtime nvidia --gpus all \\` ,
163
173
` --name my_vllm_container \\` ,
164
174
` -v ~/.cache/huggingface:/root/.cache/huggingface \\` ,
@@ -170,17 +180,8 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
170
180
] . join ( "\n" ) ,
171
181
content : [
172
182
"# Load and run the model:" ,
173
- `docker exec -it my_vllm_container bash -c "python -m vllm.entrypoints.openai.api_server --model ${ model . id } --dtype auto --api-key token-abc123"` ,
174
- "# Call the server using curl" ,
175
- "curl -X POST 'http://localhost:8000/v1/chat/completions' \\ " ,
176
- ` -H "Content-Type: application/json" \\ ` ,
177
- ` -H "Authorization: Bearer token-abc123" \\ ` ,
178
- ` --data '{` ,
179
- ` "model": "${ model . id } "` ,
180
- ` "messages": [` ,
181
- ` {"role": "user", "content": "Hello!"}` ,
182
- ` ]` ,
183
- ` }'` ,
183
+ `docker exec -it my_vllm_container bash -c "vllm serve ${ model . id } "` ,
184
+ ...runCommand ,
184
185
] . join ( "\n" ) ,
185
186
} ,
186
187
] ;
0 commit comments