@@ -58,11 +58,30 @@ export type LocalApp = {
58
58
}
59
59
) ;
60
60
61
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
62
- function isGgufModel ( model : ModelData ) {
61
+ function isGgufModel ( model : ModelData ) : boolean {
63
62
return model . tags . includes ( "gguf" ) ;
64
63
}
65
64
65
+ function isAwqModel ( model : ModelData ) : boolean {
66
+ return model . config ?. quantization_config ?. quant_method === "awq" ;
67
+ }
68
+
69
+ function isGptqModel ( model : ModelData ) : boolean {
70
+ return model . config ?. quantization_config ?. quant_method === "gptq" ;
71
+ }
72
+
73
+ function isAqlmModel ( model : ModelData ) : boolean {
74
+ return model . config ?. quantization_config ?. quant_method === "aqlm" ;
75
+ }
76
+
77
+ function isMarlinModel ( model : ModelData ) : boolean {
78
+ return model . config ?. quantization_config ?. quant_method === "marlin" ;
79
+ }
80
+
81
+ function isTransformersModel ( model : ModelData ) : boolean {
82
+ return model . tags . includes ( "transformers" ) ;
83
+ }
84
+
66
85
function isLlamaCppGgufModel ( model : ModelData ) {
67
86
return ! ! model . gguf ?. context_length ;
68
87
}
@@ -127,6 +146,47 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
127
146
] ;
128
147
} ;
129
148
149
+ const snippetVllm = ( model : ModelData ) : LocalAppSnippet [ ] => {
150
+ const runCommand = [
151
+ "" ,
152
+ "# Call the server using curl:" ,
153
+ `curl -X POST "http://localhost:8000/v1/chat/completions" \\ ` ,
154
+ ` -H "Content-Type: application/json" \\ ` ,
155
+ ` --data '{` ,
156
+ ` "model": "${ model . id } "` ,
157
+ ` "messages": [` ,
158
+ ` {"role": "user", "content": "Hello!"}` ,
159
+ ` ]` ,
160
+ ` }'` ,
161
+ ] ;
162
+ return [
163
+ {
164
+ title : "Install from pip" ,
165
+ setup : [ "# Install vLLM from pip:" , "pip install vllm" ] . join ( "\n" ) ,
166
+ content : [ "# Load and run the model:" , `vllm serve "${ model . id } "` , ...runCommand ] . join ( "\n" ) ,
167
+ } ,
168
+ {
169
+ title : "Use Docker images" ,
170
+ setup : [
171
+ "# Deploy with docker on Linux:" ,
172
+ `docker run --runtime nvidia --gpus all \\` ,
173
+ ` --name my_vllm_container \\` ,
174
+ ` -v ~/.cache/huggingface:/root/.cache/huggingface \\` ,
175
+ ` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\` ,
176
+ ` -p 8000:8000 \\` ,
177
+ ` --ipc=host \\` ,
178
+ ` vllm/vllm-openai:latest \\` ,
179
+ ` --model ${ model . id } ` ,
180
+ ] . join ( "\n" ) ,
181
+ content : [
182
+ "# Load and run the model:" ,
183
+ `docker exec -it my_vllm_container bash -c "vllm serve ${ model . id } "` ,
184
+ ...runCommand ,
185
+ ] . join ( "\n" ) ,
186
+ } ,
187
+ ] ;
188
+ } ;
189
+
130
190
/**
131
191
* Add your new local app here.
132
192
*
@@ -146,6 +206,19 @@ export const LOCAL_APPS = {
146
206
displayOnModelPage : isLlamaCppGgufModel ,
147
207
snippet : snippetLlamacpp ,
148
208
} ,
209
+ vllm : {
210
+ prettyLabel : "vLLM" ,
211
+ docsUrl : "https://docs.vllm.ai" ,
212
+ mainTask : "text-generation" ,
213
+ displayOnModelPage : ( model : ModelData ) =>
214
+ isAwqModel ( model ) ||
215
+ isGptqModel ( model ) ||
216
+ isAqlmModel ( model ) ||
217
+ isMarlinModel ( model ) ||
218
+ isGgufModel ( model ) ||
219
+ isTransformersModel ( model ) ,
220
+ snippet : snippetVllm ,
221
+ } ,
149
222
lmstudio : {
150
223
prettyLabel : "LM Studio" ,
151
224
docsUrl : "https://lmstudio.ai" ,
0 commit comments