@@ -48,28 +48,27 @@ class Model:
48
48
49
49
dir_model : Path
50
50
ftype : int
51
- fname_out : Path
52
51
is_big_endian : bool
53
52
endianess : gguf .GGUFEndian
54
53
use_temp_file : bool
55
54
lazy : bool
56
55
part_names : list [str ]
57
56
is_safetensors : bool
58
57
hparams : dict [str , Any ]
59
- gguf_writer : gguf .GGUFWriter
60
58
block_count : int
61
59
tensor_map : gguf .TensorNameMap
62
60
tensor_names : set [str ] | None
61
+ fname_out : Path
62
+ gguf_writer : gguf .GGUFWriter
63
63
64
64
# subclasses should define this!
65
65
model_arch : gguf .MODEL_ARCH
66
66
67
- def __init__ (self , dir_model : Path , ftype : int , fname_out : Path , is_big_endian : bool , use_temp_file : bool , eager : bool ):
67
+ def __init__ (self , dir_model : Path , ftype : gguf . LlamaFileType , fname_out : Path , is_big_endian : bool , use_temp_file : bool , eager : bool ):
68
68
if type (self ) is Model :
69
69
raise TypeError (f"{ type (self ).__name__ !r} should not be directly instantiated" )
70
70
self .dir_model = dir_model
71
71
self .ftype = ftype
72
- self .fname_out = fname_out
73
72
self .is_big_endian = is_big_endian
74
73
self .endianess = gguf .GGUFEndian .BIG if is_big_endian else gguf .GGUFEndian .LITTLE
75
74
self .use_temp_file = use_temp_file
@@ -79,7 +78,6 @@ def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian:
79
78
if not self .is_safetensors :
80
79
self .part_names = Model .get_model_part_names (self .dir_model , ".bin" )
81
80
self .hparams = Model .load_hparams (self .dir_model )
82
- self .gguf_writer = gguf .GGUFWriter (fname_out , gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file )
83
81
self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" ])
84
82
self .tensor_map = gguf .get_tensor_name_map (self .model_arch , self .block_count )
85
83
self .tensor_names = None
@@ -92,6 +90,11 @@ def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian:
92
90
else :
93
91
logger .info (f"choosing --outtype bf16 from first tensor type ({ first_tensor .dtype } )" )
94
92
self .ftype = gguf .LlamaFileType .MOSTLY_BF16
93
+ ftype_up : str = self .ftype .name .partition ("_" )[2 ].upper ()
94
+ ftype_lw : str = ftype_up .lower ()
95
+ # allow templating the file name with the output ftype, useful with the "auto" ftype
96
+ self .fname_out = fname_out .parent / fname_out .name .format (ftype_lw , outtype = ftype_lw , ftype = ftype_lw , OUTTYPE = ftype_up , FTYPE = ftype_up )
97
+ self .gguf_writer = gguf .GGUFWriter (self .fname_out , gguf .MODEL_ARCH_NAMES [self .model_arch ], endianess = self .endianess , use_temp_file = self .use_temp_file )
95
98
96
99
@classmethod
97
100
def __init_subclass__ (cls ):
@@ -2400,11 +2403,11 @@ def parse_args() -> argparse.Namespace:
2400
2403
)
2401
2404
parser .add_argument (
2402
2405
"--outfile" , type = Path ,
2403
- help = "path to write to; default: based on input" ,
2406
+ help = "path to write to; default: based on input. {ftype} will be replaced by the outtype. " ,
2404
2407
)
2405
2408
parser .add_argument (
2406
- "--outtype" , type = str , choices = ["f32" , "f16" , "bf16" , "auto-f16 " ], default = "f16" ,
2407
- help = "output format - use f32 for float32, f16 for float16, bf16 for bfloat16, auto-f16 for the highest-fidelity 16-bit float type depending on the first loaded tensor type" ,
2409
+ "--outtype" , type = str , choices = ["f32" , "f16" , "bf16" , "auto" ], default = "f16" ,
2410
+ help = "output format - use f32 for float32, f16 for float16, bf16 for bfloat16, auto for the highest-fidelity 16-bit float type depending on the first loaded tensor type" ,
2408
2411
)
2409
2412
parser .add_argument (
2410
2413
"--bigendian" , action = "store_true" ,
@@ -2462,14 +2465,14 @@ def main() -> None:
2462
2465
"f32" : gguf .LlamaFileType .ALL_F32 ,
2463
2466
"f16" : gguf .LlamaFileType .MOSTLY_F16 ,
2464
2467
"bf16" : gguf .LlamaFileType .MOSTLY_BF16 ,
2465
- "auto-f16 " : gguf .LlamaFileType .GUESSED , # TODO: use a more appropriate "auto" type
2468
+ "auto" : gguf .LlamaFileType .GUESSED ,
2466
2469
}
2467
2470
2468
2471
if args .outfile is not None :
2469
2472
fname_out = args .outfile
2470
2473
else :
2471
2474
# output in the same directory as the model by default
2472
- fname_out = dir_model / f 'ggml-model-{ args . outtype } .gguf'
2475
+ fname_out = dir_model / 'ggml-model-{ftype }.gguf'
2473
2476
2474
2477
logger .info (f"Loading model: { dir_model .name } " )
2475
2478
@@ -2488,13 +2491,13 @@ def main() -> None:
2488
2491
model_instance .gguf_writer .add_quantization_version (gguf .GGML_QUANT_VERSION )
2489
2492
2490
2493
if args .vocab_only :
2491
- logger .info (f"Exporting model vocab to '{ fname_out } '" )
2494
+ logger .info (f"Exporting model vocab to '{ model_instance . fname_out } '" )
2492
2495
model_instance .write_vocab ()
2493
2496
else :
2494
- logger .info (f"Exporting model to '{ fname_out } '" )
2497
+ logger .info (f"Exporting model to '{ model_instance . fname_out } '" )
2495
2498
model_instance .write ()
2496
2499
2497
- logger .info (f"Model successfully exported to '{ fname_out } '" )
2500
+ logger .info (f"Model successfully exported to '{ model_instance . fname_out } '" )
2498
2501
2499
2502
2500
2503
if __name__ == '__main__' :
0 commit comments