Skip to content

Commit 2123430

Browse files
committed
Validation for config.quantization_config.quant_method
1 parent 82233de commit 2123430

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

packages/tasks/src/local-apps.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,11 @@ docker run --runtime nvidia --gpus all \
7676
vllm/vllm-openai:latest \
7777
--model ${model.id}
7878
`,
79-
`
79+
`
8080
## Load and run the model
8181
docker exec -it my_vllm_container bash -c "python -m vllm.entrypoints.openai.api_server --model ${model.id} --dtype auto --api-key token-abc123"
8282
`,
83-
`
83+
`
8484
## Call the server using the official OpenAI Python client library, or any other HTTP client
8585
from openai import OpenAI
8686
client = OpenAI(
@@ -117,7 +117,7 @@ export const LOCAL_APPS = {
117117
displayOnModelPage: isGgufModel,
118118
snippet: snippetLlamacpp,
119119
},
120-
"vllm": {
120+
vllm: {
121121
prettyLabel: "vLLM",
122122
docsUrl: "https://docs.vllm.ai",
123123
mainTask: "text-generation",

packages/tasks/src/model-data.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ export interface ModelData {
5252
bits?: number;
5353
load_in_4bit?: boolean;
5454
load_in_8bit?: boolean;
55+
/**
56+
* awq, gptq, aqlm, marlin, … Used by vLLM
57+
*/
58+
quant_method?: string;
5559
};
5660
tokenizer_config?: TokenizerConfig;
5761
adapter_transformers?: {

0 commit comments

Comments
 (0)