From 87daf40964ac100bb7e0330408fd5be3e27c08a7 Mon Sep 17 00:00:00 2001 From: pockers21 Date: Thu, 29 May 2025 10:40:00 +0800 Subject: [PATCH 1/3] convert: add eagle2 draft arch --- convert_hf_to_gguf.py | 17 +++++++++++++++++ gguf-py/gguf/constants.py | 19 +++++++++++++++++++ gguf-py/gguf/tensor_mapping.py | 4 ++++ 3 files changed, 40 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 8fcff0de7d6ba..9358b44226871 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2711,6 +2711,23 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] yield from super().modify_tensors(data_torch, name, bid) +@ModelBase.register("Eagle2DraftForCausalLM") +class Eagle2DraftModel(TextModel): + model_arch = gguf.MODEL_ARCH.EAGLE2_DRAFT + + def set_vocab(self): + try: + self._set_vocab_sentencepiece() + except FileNotFoundError: + self._set_vocab_gpt2() + + def set_gguf_parameters(self): + super().set_gguf_parameters() + if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: + if self.hparams["rope_scaling"].get("type") == "yarn": + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) + self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) + self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) @ModelBase.register( "Qwen2VLModel", diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 635b61f224b1e..4055ad17f76fe 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -300,6 +300,7 @@ class MODEL_ARCH(IntEnum): QWEN2VL = auto() QWEN3 = auto() QWEN3MOE = auto() + EAGLE2_DRAFT = auto() PHI2 = auto() PHI3 = auto() PHIMOE = auto() @@ -360,6 +361,7 @@ class MODEL_TENSOR(IntEnum): TOKEN_EMBD_NORM = auto() TOKEN_TYPES = auto() POS_EMBD = auto() + FC = auto() OUTPUT = auto() OUTPUT_NORM = auto() ROPE_FREQS = auto() @@ -580,6 +582,7 @@ class MODEL_TENSOR(IntEnum): MODEL_ARCH.QWEN2VL: "qwen2vl", MODEL_ARCH.QWEN3: "qwen3", MODEL_ARCH.QWEN3MOE: "qwen3moe", + MODEL_ARCH.EAGLE2_DRAFT: "eagle2-draft", MODEL_ARCH.PHI2: "phi2", MODEL_ARCH.PHI3: "phi3", MODEL_ARCH.PHIMOE: "phimoe", @@ -640,6 +643,7 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm", MODEL_TENSOR.TOKEN_TYPES: "token_types", MODEL_TENSOR.POS_EMBD: "position_embd", + MODEL_TENSOR.FC: "fc", MODEL_TENSOR.OUTPUT_NORM: "output_norm", MODEL_TENSOR.OUTPUT: "output", MODEL_TENSOR.ROPE_FREQS: "rope_freqs", @@ -1207,6 +1211,21 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, ], + MODEL_ARCH.EAGLE2_DRAFT: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.FC, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + + ], MODEL_ARCH.QWEN2MOE: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT_NORM, diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 48167dd648c0a..3addec6ed1627 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -58,6 +58,10 @@ class TensorNameMap: "wpe", # gpt2 ), + #eagle2 draft model + MODEL_TENSOR.FC: ( + "model.fc", + ), # Output MODEL_TENSOR.OUTPUT: ( "embed_out", # gptneox From 4b4975c3969d7fce38c77b163448c8678626a37b Mon Sep 17 00:00:00 2001 From: pockers21 Date: Fri, 30 May 2025 14:08:18 +0800 Subject: [PATCH 2/3] fix: resolve code formatting issues --- convert_hf_to_gguf.py | 2 ++ gguf-py/gguf/tensor_mapping.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 9358b44226871..9eada7c674da5 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2711,6 +2711,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] yield from super().modify_tensors(data_torch, name, bid) + @ModelBase.register("Eagle2DraftForCausalLM") class Eagle2DraftModel(TextModel): model_arch = gguf.MODEL_ARCH.EAGLE2_DRAFT @@ -2729,6 +2730,7 @@ def set_gguf_parameters(self): self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) + @ModelBase.register( "Qwen2VLModel", "Qwen2VLForConditionalGeneration", diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 3addec6ed1627..455bd685d0999 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -58,9 +58,9 @@ class TensorNameMap: "wpe", # gpt2 ), - #eagle2 draft model + # eagle2 draft model MODEL_TENSOR.FC: ( - "model.fc", + "model.fc", ), # Output MODEL_TENSOR.OUTPUT: ( From ec22e4b42e3fdd60974ccbe76fa6d084a4d4ed5c Mon Sep 17 00:00:00 2001 From: pockers21 Date: Sun, 1 Jun 2025 18:01:45 +0800 Subject: [PATCH 3/3] refactor: make Eagle2DraftModel inherits from Qwen2Model instead of TextModel --- convert_hf_to_gguf.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 9eada7c674da5..b45f30fd37b22 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2713,23 +2713,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter @ModelBase.register("Eagle2DraftForCausalLM") -class Eagle2DraftModel(TextModel): +class Eagle2DraftModel(Qwen2Model): model_arch = gguf.MODEL_ARCH.EAGLE2_DRAFT - def set_vocab(self): - try: - self._set_vocab_sentencepiece() - except FileNotFoundError: - self._set_vocab_gpt2() - - def set_gguf_parameters(self): - super().set_gguf_parameters() - if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]: - if self.hparams["rope_scaling"].get("type") == "yarn": - self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) - self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"]) - self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"]) - @ModelBase.register( "Qwen2VLModel",