Skip to content

Commit 6eeb4d9

Browse files
authored
convert: remove most of the n_mult usage in convert.py (#3098)
1 parent 21ac3a1 commit 6eeb4d9

File tree

1 file changed

+1
-17
lines changed

1 file changed

+1
-17
lines changed

convert.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ def type_for_tensor(self, name: str, tensor: LazyTensor) -> DataType:
145145
class Params:
146146
n_vocab: int
147147
n_embd: int
148-
n_mult: int
149148
n_layer: int
150149
n_ctx: int
151150
n_ff: int
@@ -161,15 +160,6 @@ class Params:
161160
# path to the directory containing the model files
162161
path_model: Path | None = None
163162

164-
@staticmethod
165-
def find_n_mult(n_ff: int, n_embd: int) -> int:
166-
# hardcoded magic range
167-
for n_mult in range(8192, 1, -1):
168-
calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
169-
if calc_ff == n_ff:
170-
return n_mult
171-
raise Exception(f"failed to find n_mult for (n_ff={n_ff}, n_embd={n_embd}).")
172-
173163
@staticmethod
174164
def guessed(model: LazyModel) -> Params:
175165
# try transformer naming first
@@ -197,7 +187,6 @@ def guessed(model: LazyModel) -> Params:
197187
return Params(
198188
n_vocab = n_vocab,
199189
n_embd = n_embd,
200-
n_mult = n_mult,
201190
n_layer = n_layer,
202191
n_ctx = -1,
203192
n_ff = n_ff,
@@ -225,8 +214,6 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
225214
else:
226215
f_rope_scale = None
227216

228-
n_mult = Params.find_n_mult(n_ff, n_embd)
229-
230217
if "max_sequence_length" in config:
231218
n_ctx = config["max_sequence_length"]
232219
elif "max_position_embeddings" in config:
@@ -238,7 +225,6 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
238225
return Params(
239226
n_vocab = n_vocab,
240227
n_embd = n_embd,
241-
n_mult = n_mult,
242228
n_layer = n_layer,
243229
n_ctx = n_ctx,
244230
n_ff = n_ff,
@@ -250,15 +236,14 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
250236
)
251237

252238
# LLaMA v2 70B params.json
253-
# {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1
239+
# {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1}
254240
@staticmethod
255241
def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
256242
config = json.load(open(config_path))
257243

258244
n_vocab = config["vocab_size"] if "vocab_size" in config else -1
259245
n_embd = config["dim"]
260246
n_layer = config["n_layers"]
261-
n_mult = config["multiple_of"]
262247
n_ff = -1
263248
n_head = config["n_heads"]
264249
n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head
@@ -285,7 +270,6 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
285270
return Params(
286271
n_vocab = n_vocab,
287272
n_embd = n_embd,
288-
n_mult = n_mult,
289273
n_layer = n_layer,
290274
n_ctx = n_ctx,
291275
n_ff = n_ff,

0 commit comments

Comments
 (0)