Skip to content

Commit 97b9cc3

Browse files
slarenpull[bot]
authored andcommitted
gguf : add rope_freq_base parameter for CodeLlama (#2769)
1 parent bcdaeee commit 97b9cc3

File tree

3 files changed

+48
-21
lines changed

3 files changed

+48
-21
lines changed

convert.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ class Params:
104104
n_head_kv: int
105105
f_norm_eps: float
106106

107+
f_rope_freq_base: Optional[float] = None
108+
107109
ftype: Optional[GGMLFileType] = None
108110

109111
# path to the directory containing the model files
@@ -194,15 +196,16 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
194196
def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
195197
config = json.load(open(config_path))
196198

197-
n_vocab = config["vocab_size"] if "vocab_size" in config else -1
198-
n_embd = config["dim"]
199-
n_layer = config["n_layers"]
200-
n_mult = config["multiple_of"]
201-
n_ctx = 2048 if config["norm_eps"] == 1e-06 else 4096 # hack to determine LLaMA v1 vs v2
202-
n_ff = -1
203-
n_head = config["n_heads"]
204-
n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head
205-
f_norm_eps = config["norm_eps"]
199+
n_vocab = config["vocab_size"] if "vocab_size" in config else -1
200+
n_embd = config["dim"]
201+
n_layer = config["n_layers"]
202+
n_mult = config["multiple_of"]
203+
n_ctx = 2048 if config["norm_eps"] == 1e-06 else 4096 # hack to determine LLaMA v1 vs v2
204+
n_ff = -1
205+
n_head = config["n_heads"]
206+
n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head
207+
f_norm_eps = config["norm_eps"]
208+
f_rope_freq_base = config["rope_theta"] if "rope_theta" in config else None
206209

207210
if n_vocab == -1:
208211
n_vocab = model["tok_embeddings.weight"].shape[0]
@@ -211,15 +214,16 @@ def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
211214
n_ff = model["layers.0.feed_forward.w1.weight"].shape[0]
212215

213216
return Params(
214-
n_vocab = n_vocab,
215-
n_embd = n_embd,
216-
n_mult = n_mult,
217-
n_layer = n_layer,
218-
n_ctx = n_ctx,
219-
n_ff = n_ff,
220-
n_head = n_head,
221-
n_head_kv = n_head_kv,
222-
f_norm_eps = f_norm_eps,
217+
n_vocab = n_vocab,
218+
n_embd = n_embd,
219+
n_mult = n_mult,
220+
n_layer = n_layer,
221+
n_ctx = n_ctx,
222+
n_ff = n_ff,
223+
n_head = n_head,
224+
n_head_kv = n_head_kv,
225+
f_norm_eps = f_norm_eps,
226+
f_rope_freq_base = f_rope_freq_base,
223227
)
224228

225229
@staticmethod
@@ -754,6 +758,9 @@ def add_meta_arch(self, params: Params) -> None:
754758
self.gguf.add_head_count_kv (params.n_head_kv)
755759
self.gguf.add_layer_norm_rms_eps (params.f_norm_eps)
756760

761+
if params.f_rope_freq_base:
762+
self.gguf.add_rope_freq_base(params.f_rope_freq_base)
763+
757764
if params.ftype:
758765
self.gguf.add_file_type(params.ftype)
759766

gguf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
# RoPE
4949
KEY_ROPE_DIMENSION_COUNT = "{arch}.rope.dimension_count"
50+
KEY_ROPE_FREQ_BASE = "{arch}.rope.freq_base"
5051
KEY_ROPE_SCALE_LINEAR = "{arch}.rope.scale_linear"
5152

5253
# tokenization
@@ -663,7 +664,10 @@ def add_rope_dimension_count(self, count: int):
663664
self.add_uint32(
664665
KEY_ROPE_DIMENSION_COUNT.format(arch=self.arch), count)
665666

666-
def add_rope_scale_linear(self, value: float):
667+
def add_rope_freq_base(self, value: float):
668+
self.add_float32(KEY_ROPE_FREQ_BASE.format(arch=self.arch), value)
669+
670+
def add_rope_scale_linear(self, value: float):
667671
self.add_float32(KEY_ROPE_SCALE_LINEAR.format(arch=self.arch), value)
668672

669673
def add_tokenizer_model(self, model: str):

llama.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ enum llm_kv {
195195
LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
196196

197197
LLM_KV_ROPE_DIMENSION_COUNT,
198+
LLM_KV_ROPE_FREQ_BASE,
198199
LLM_KV_ROPE_SCALE_LINEAR,
199200

200201
LLM_KV_TOKENIZER_MODEL,
@@ -238,6 +239,7 @@ static std::map<llm_kv, std::string> LLM_KV_NAMES = {
238239
{ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
239240

240241
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
242+
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
241243
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
242244

243245
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
@@ -1561,12 +1563,26 @@ static void llm_load_hparams(
15611563
hparams.n_head_kv = hparams.n_head;
15621564
GGUF_GET_KEY(ctx, hparams.n_head_kv, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_ATTENTION_HEAD_COUNT_KV));
15631565

1564-
// TODO: manually setting rope scale should override this
1566+
// TODO: manually setting rope freq base and scale should override this
1567+
// FIXME: partial fix when the param specified is not the default value, but
1568+
// will not work for overriding the model value to the params default
1569+
1570+
llama_context_params defaults = llama_context_default_params();
1571+
1572+
// rope_freq_base
1573+
{
1574+
float ropebase = 10000.0f;
1575+
GGUF_GET_KEY(ctx, ropebase, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_FREQ_BASE));
1576+
if (ropebase != 10000.0f && rope_freq_base == defaults.rope_freq_base) {
1577+
rope_freq_base = ropebase;
1578+
}
1579+
}
1580+
15651581
// rope_freq_scale (inverse of the kv) is optional
15661582
{
15671583
float ropescale = 1.0f;
15681584
GGUF_GET_KEY(ctx, ropescale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_SCALE_LINEAR));
1569-
if (ropescale != 1.0f) {
1585+
if (ropescale != 1.0f && rope_freq_scale == defaults.rope_freq_scale) {
15701586
rope_freq_scale = 1.0f/ropescale;
15711587
}
15721588
}

0 commit comments

Comments
 (0)