convert.py : Get rope scale from HuggingFace models (ggml-org#2772)

pnb · akawrykow · commit ece8f4aadf91 · 2023-08-29T14:14:01.000-07:00
* Get rope scale from HF models

* Save rope scale only for linear scaling

* Rewrite for clarity
diff --git a/convert.py b/convert.py
@@ -105,6 +105,7 @@ class Params:
     f_norm_eps: float
 
     f_rope_freq_base: Optional[float] = None
+    f_rope_scale: Optional[float] = None
 
     ftype: Optional[GGMLFileType] = None
 
@@ -169,6 +170,11 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
         f_norm_eps       = config["rms_norm_eps"]
         f_rope_freq_base = config["rope_theta"] if "rope_theta" in config else None
 
+        if "rope_scaling" in config and config["rope_scaling"].get("type") == "linear":
+            f_rope_scale = config["rope_scaling"].get("factor")
+        else:
+            f_rope_scale = None
+
         n_mult = Params.find_n_mult(n_ff, n_embd)
 
         if "max_sequence_length" in config:
@@ -190,6 +196,7 @@ def loadHFTransformerJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
             n_head_kv        = n_head_kv,
             f_norm_eps       = f_norm_eps,
             f_rope_freq_base = f_rope_freq_base,
+            f_rope_scale     = f_rope_scale,
         )
 
     # LLaMA v2 70B params.json
@@ -773,6 +780,9 @@ def add_meta_arch(self, params: Params) -> None:
         if params.f_rope_freq_base:
             self.gguf.add_rope_freq_base(params.f_rope_freq_base)
 
+        if params.f_rope_scale:
+            self.gguf.add_rope_scale_linear(params.f_rope_scale)
+
         if params.ftype:
             self.gguf.add_file_type(params.ftype)