@@ -1432,17 +1432,18 @@ def load_vocab(
1432
1432
return vocab , special_vocab
1433
1433
1434
1434
1435
- def default_outfile (model_paths : list [Path ], file_type : GGMLFileType ) -> Path :
1435
+ def default_output_file (model_paths : list [Path ], file_type : GGMLFileType ) -> Path :
1436
1436
namestr = {
1437
- GGMLFileType .AllF32 : "f32" ,
1437
+ GGMLFileType .AllF32 : "f32" ,
1438
1438
GGMLFileType .MostlyF16 : "f16" ,
1439
- GGMLFileType .MostlyQ8_0 :"q8_0" ,
1439
+ GGMLFileType .MostlyQ8_0 : "q8_0" ,
1440
1440
}[file_type ]
1441
1441
ret = model_paths [0 ].parent / f"ggml-model-{ namestr } .gguf"
1442
1442
if ret in model_paths :
1443
1443
sys .stderr .write (
1444
1444
f"Error: Default output path ({ ret } ) would overwrite the input. "
1445
- "Please explicitly specify a path using --outfile.\n " )
1445
+ "Please explicitly specify a path using --out-file.\n "
1446
+ )
1446
1447
sys .exit (1 )
1447
1448
return ret
1448
1449
@@ -1452,29 +1453,107 @@ def do_dump_model(model_plus: ModelPlus) -> None:
1452
1453
print (f"model_plus.format = { model_plus .format !r} " )
1453
1454
print (f"model_plus.vocab = { model_plus .vocab !r} " )
1454
1455
for name , lazy_tensor in model_plus .model .items ():
1455
- print (f"{ name } : shape={ lazy_tensor .shape } type={ lazy_tensor .data_type } ; { lazy_tensor .description } " )
1456
+ print (
1457
+ f"{ name } : shape={ lazy_tensor .shape } type={ lazy_tensor .data_type } ; { lazy_tensor .description } "
1458
+ )
1456
1459
1457
1460
1458
- def main ( args_in : list [ str ] | None = None ) -> None :
1461
+ def get_argument_parser ( ) -> ArgumentParser :
1459
1462
output_choices = ["f32" , "f16" ]
1460
1463
if np .uint32 (1 ) == np .uint32 (1 ).newbyteorder ("<" ):
1461
1464
# We currently only support Q8_0 output on little endian systems.
1462
1465
output_choices .append ("q8_0" )
1463
- parser = argparse .ArgumentParser (description = "Convert a LLaMa model to a GGML compatible file" )
1464
- parser .add_argument ("--awq-path" , type = Path , help = "Path to scale awq cache file" , default = None )
1465
- parser .add_argument ("--dump" , action = "store_true" , help = "don't convert, just show what's in the model" )
1466
- parser .add_argument ("--dump-single" , action = "store_true" , help = "don't convert, just show what's in a single model file" )
1467
- parser .add_argument ("--vocab-only" , action = "store_true" , help = "extract only the vocab" )
1468
- parser .add_argument ("--outtype" , choices = output_choices , help = "output format - note: q8_0 may be very slow (default: f16 or f32 based on input)" )
1469
- parser .add_argument ("--vocab-dir" , type = Path , help = "directory containing tokenizer.model, if separate from model file" )
1470
- parser .add_argument ("--outfile" , type = Path , help = "path to write to; default: based on input" )
1471
- parser .add_argument ("model" , type = Path , help = "directory containing model file, or model file itself (*.pth, *.pt, *.bin)" )
1472
- parser .add_argument ("--ctx" , type = int , help = "model training context (default: based on input)" )
1473
- parser .add_argument ("--concurrency" , type = int , help = f"concurrency used for conversion (default: { DEFAULT_CONCURRENCY } )" , default = DEFAULT_CONCURRENCY )
1474
- parser .add_argument ("--bigendian" , action = "store_true" , help = "model is executed on big endian machine" )
1475
- parser .add_argument ("--padvocab" , action = "store_true" , help = "add pad tokens when model vocab expects more than tokenizer metadata provides" )
1476
-
1477
- args = parser .parse_args (args_in )
1466
+
1467
+ parser = argparse .ArgumentParser (
1468
+ description = "Convert a LLaMa model to a GGML compatible file"
1469
+ )
1470
+
1471
+ parser .add_argument (
1472
+ "model" ,
1473
+ type = Path ,
1474
+ help = "Directory containing the model file or the model file itself (*.pth, *.pt, *.bin)" ,
1475
+ )
1476
+
1477
+ parser .add_argument (
1478
+ "--awq-path" ,
1479
+ type = Path ,
1480
+ help = "Path to the Activation-aware Weight Quantization cache file" ,
1481
+ default = None ,
1482
+ )
1483
+
1484
+ parser .add_argument (
1485
+ "--dump" ,
1486
+ action = "store_true" ,
1487
+ help = "Display the model content without converting it" ,
1488
+ )
1489
+
1490
+ parser .add_argument (
1491
+ "--dump-single" ,
1492
+ action = "store_true" ,
1493
+ help = "Display the content of a single model file without conversion" ,
1494
+ )
1495
+
1496
+ parser .add_argument (
1497
+ "--vocab-only" ,
1498
+ action = "store_true" ,
1499
+ help = "Extract and output only the vocabulary" ,
1500
+ )
1501
+
1502
+ parser .add_argument (
1503
+ "--out-type" ,
1504
+ choices = output_choices ,
1505
+ help = "Output format - note: q8_0 may be very slow (default: f16 or f32 based on input)" ,
1506
+ )
1507
+
1508
+ parser .add_argument (
1509
+ "--vocab-dir" ,
1510
+ type = Path ,
1511
+ help = "Directory containing the tokenizer.model, if separate from the model file" ,
1512
+ )
1513
+
1514
+ parser .add_argument (
1515
+ "--vocab-type" ,
1516
+ choices = ["spm" , "bpe" , "hfft" ], # hfft: Hugging Face Fast Tokenizer
1517
+ default = "spm" ,
1518
+ help = "The vocabulary format used to define the tokenizer model (default: spm)" ,
1519
+ )
1520
+
1521
+ parser .add_argument (
1522
+ "--pad-vocab" ,
1523
+ action = "store_true" ,
1524
+ help = "Add padding tokens when the model's vocabulary size exceeds the tokenizer metadata" ,
1525
+ )
1526
+
1527
+ parser .add_argument (
1528
+ "--out-file" ,
1529
+ type = Path ,
1530
+ help = "Specify the path for the output file (default is based on input)" ,
1531
+ )
1532
+
1533
+ parser .add_argument (
1534
+ "--ctx" , type = int , help = "Model training context (default is based on input)"
1535
+ )
1536
+
1537
+ parser .add_argument (
1538
+ "--concurrency" ,
1539
+ type = int ,
1540
+ help = f"Concurrency used for conversion (default: { DEFAULT_CONCURRENCY } )" ,
1541
+ default = DEFAULT_CONCURRENCY ,
1542
+ )
1543
+
1544
+ parser .add_argument (
1545
+ "--big-endian" ,
1546
+ action = "store_true" ,
1547
+ help = "Indicate that the model is executed on a big-endian machine" ,
1548
+ )
1549
+
1550
+ return parser
1551
+
1552
+
1553
+ def main (argv : Optional [list [str ]] = None ) -> None :
1554
+ parser = get_argument_parser ()
1555
+ args = parser .parse_args (argv )
1556
+
1478
1557
if args .awq_path :
1479
1558
sys .path .insert (1 , str (Path (__file__ ).parent / 'awq-py' ))
1480
1559
from awq .apply_awq import add_scale_weights
0 commit comments