Validation for config.quantization_config.quant_method

julien-c · julien-c · commit 2123430a737a · 2024-05-22T19:40:55.000+02:00
diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
@@ -76,11 +76,11 @@ docker run --runtime nvidia --gpus all \
     vllm/vllm-openai:latest \
     --model ${model.id}
 `,
-`
+		`
 ## Load and run the model
 docker exec -it my_vllm_container bash -c "python -m vllm.entrypoints.openai.api_server --model ${model.id} --dtype auto --api-key token-abc123"
 `,
-`
+		`
 ## Call the server using the official OpenAI Python client library, or any other HTTP client
 from openai import OpenAI
 client = OpenAI(
@@ -117,7 +117,7 @@ export const LOCAL_APPS = {
 		displayOnModelPage: isGgufModel,
 		snippet: snippetLlamacpp,
 	},
-	"vllm": {
+	vllm: {
 		prettyLabel: "vLLM",
 		docsUrl: "https://docs.vllm.ai",
 		mainTask: "text-generation",
diff --git a/packages/tasks/src/model-data.ts b/packages/tasks/src/model-data.ts
@@ -52,6 +52,10 @@ export interface ModelData {
 			bits?: number;
 			load_in_4bit?: boolean;
 			load_in_8bit?: boolean;
+			/**
+			 * awq, gptq, aqlm, marlin, … Used by vLLM
+			 */
+			quant_method?: string;
 		};
 		tokenizer_config?: TokenizerConfig;
 		adapter_transformers?: {