Mixtral 8x7B support (#2011)

pierrestock · zhuohan123 · web-flow · commit b5f882cc98e2 · 2023-12-11T01:09:15.000-08:00
Co-authored-by: Pierre Stock &lt;p@mistral.ai&gt;
Co-authored-by: Zhuohan Li &lt;zhuohan123@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -60,6 +60,7 @@ vLLM seamlessly supports many Hugging Face models, including the following archi
 - InternLM (`internlm/internlm-7b`, `internlm/internlm-chat-7b`, etc.)
 - LLaMA & LLaMA-2 (`meta-llama/Llama-2-70b-hf`, `lmsys/vicuna-13b-v1.3`, `young-geng/koala`, `openlm-research/open_llama_13b`, etc.)
 - Mistral (`mistralai/Mistral-7B-v0.1`, `mistralai/Mistral-7B-Instruct-v0.1`, etc.)
+- Mixtral (`mistralai/Mixtral-8x7B-v0.1`, `mistralai/Mixtral-8x7B-Instruct-v0.1`, etc.)
 - MPT (`mosaicml/mpt-7b`, `mosaicml/mpt-30b`, etc.)
 - OPT (`facebook/opt-66b`, `facebook/opt-iml-max-30b`, etc.)
 - Phi-1.5 (`microsoft/phi-1_5`, etc.)
diff --git a/vllm/model_executor/model_loader.py b/vllm/model_executor/model_loader.py
@@ -33,6 +33,7 @@
     "LlamaForCausalLM": LlamaForCausalLM,
     "LLaMAForCausalLM": LlamaForCausalLM,  # For decapoda-research/llama-*
     "MistralForCausalLM": MistralForCausalLM,
+    "MixtralForCausalLM": MixtralForCausalLM,
     # transformers's mpt class has lower case
     "MptForCausalLM": MPTForCausalLM,
     "MPTForCausalLM": MPTForCausalLM,
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
@@ -10,6 +10,7 @@
 from vllm.model_executor.models.internlm import InternLMForCausalLM
 from vllm.model_executor.models.llama import LlamaForCausalLM
 from vllm.model_executor.models.mistral import MistralForCausalLM
+from vllm.model_executor.models.mixtral import MixtralForCausalLM
 from vllm.model_executor.models.mpt import MPTForCausalLM
 from vllm.model_executor.models.opt import OPTForCausalLM
 from vllm.model_executor.models.phi_1_5 import PhiForCausalLM
@@ -35,5 +36,6 @@
     "PhiForCausalLM",
     "QWenLMHeadModel",
     "MistralForCausalLM",
+    "MixtralForCausalLM",
     "YiForCausalLM",
 ]
diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py