From cf75991cac5390ee463491205c5d25a73955b09c Mon Sep 17 00:00:00 2001 From: Radek Pilar Date: Tue, 12 Dec 2023 17:03:44 +0100 Subject: [PATCH 1/3] convert : typo fix, add additional hyperparameters, use LLaMA arch for Mixtral-instruct --- convert-hf-to-gguf.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index bced1f5617a0f..770e10eb96300 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -77,8 +77,18 @@ def set_gguf_parameters(self): self.gguf_writer.add_embedding_length(n_embd) if (n_ff := self.hparams.get("intermediate_size")) is not None: self.gguf_writer.add_feed_forward_length(n_ff) - if (n_head := self.hparams.get("num_attention_head")) is not None: + if (n_head := self.hparams.get("num_attention_heads")) is not None: self.gguf_writer.add_head_count(n_head) + if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None: + self.gguf_writer.add_head_count_kv(n_head_kv) + + if (n_rms_eps := self.hparams.get("rms_norm_eps")) is not None: + self.gguf_writer.add_layer_norm_rms_eps(n_rms_eps) + if (n_experts := self.hparams.get("num_local_experts")) is not None: + self.gguf_writer.add_expert_count(n_experts) + if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None: + self.gguf_writer.add_expert_used_count(n_experts_used) + self.gguf_writer.add_parallel_residual(self.hparams.get("use_parallel_residual", True)) def write_tensors(self): @@ -207,6 +217,8 @@ def _get_model_architecture(self) -> gguf.MODEL_ARCH: return gguf.MODEL_ARCH.STABLELM if arch == "QWenLMHeadModel": return gguf.MODEL_ARCH.QWEN + if arch == "MixtralForCausalLM": + return gguf.MODEL_ARCH.LLAMA raise NotImplementedError(f'Architecture "{arch}" not supported!') From d6f74975a40beba94ed6f55c964c35aae66f5314 Mon Sep 17 00:00:00 2001 From: Radek Pilar Date: Tue, 12 Dec 2023 17:05:37 +0100 Subject: [PATCH 2/3] convert : use sentencepiece tokenizer for Mixtral-instruct --- convert-hf-to-gguf.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 770e10eb96300..40358409ff394 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -180,6 +180,8 @@ def from_model_architecture(model_architecture): return StableLMModel if model_architecture == "QWenLMHeadModel": return QwenModel + if model_architecture == "MixtralForCausalLM": + return MixtralModel return Model def _is_model_safetensors(self) -> bool: @@ -848,6 +850,11 @@ def set_gguf_parameters(self): self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True) self.gguf_writer.add_layer_norm_eps(1e-5) +class MixtralModel(Model): + def set_vocab(self): + self._set_vocab_sentencepiece() + + class QwenModel(Model): @staticmethod From facb81b83cd6c9f65b9cccc62ccd3685e1904865 Mon Sep 17 00:00:00 2001 From: Radek Pilar Date: Tue, 12 Dec 2023 19:57:17 +0100 Subject: [PATCH 3/3] convert : make flake8 happy --- convert-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 40358409ff394..e46a7813a78e9 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -850,12 +850,12 @@ def set_gguf_parameters(self): self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True) self.gguf_writer.add_layer_norm_eps(1e-5) + class MixtralModel(Model): def set_vocab(self): self._set_vocab_sentencepiece() - class QwenModel(Model): @staticmethod def token_bytes_to_string(b):