@@ -1555,8 +1555,9 @@ def main(argv: Optional[list[str]] = None) -> None:
1555
1555
args = parser .parse_args (argv )
1556
1556
1557
1557
if args .awq_path :
1558
- sys .path .insert (1 , str (Path (__file__ ).parent / ' awq-py' ))
1558
+ sys .path .insert (1 , str (Path (__file__ ).resolve (). parent / " awq-py" ))
1559
1559
from awq .apply_awq import add_scale_weights
1560
+
1560
1561
tmp_model_path = args .model / "weighted_model"
1561
1562
if tmp_model_path .is_dir ():
1562
1563
print (f"{ tmp_model_path } exists as a weighted model." )
@@ -1575,74 +1576,83 @@ def main(argv: Optional[list[str]] = None) -> None:
1575
1576
if not args .vocab_only :
1576
1577
model_plus = load_some_model (args .model )
1577
1578
else :
1578
- model_plus = ModelPlus (model = {}, paths = [args .model / 'dummy' ], format = 'none' , vocab = None )
1579
+ model_plus = ModelPlus (
1580
+ model = {}, paths = [args .model / "dummy" ], format = "none" , vocab = None
1581
+ )
1579
1582
1580
1583
if args .dump :
1581
1584
do_dump_model (model_plus )
1582
1585
return
1586
+
1583
1587
endianess = gguf .GGUFEndian .LITTLE
1584
- if args .bigendian :
1588
+ if args .big_endian :
1585
1589
endianess = gguf .GGUFEndian .BIG
1586
1590
1587
1591
params = Params .load (model_plus )
1588
1592
if params .n_ctx == - 1 :
1589
1593
if args .ctx is None :
1590
- raise Exception ("The model doesn't have a context size, and you didn't specify one with --ctx\n "
1591
- "Please specify one with --ctx:\n "
1592
- " - LLaMA v1: --ctx 2048\n "
1593
- " - LLaMA v2: --ctx 4096\n " )
1594
+ raise Exception (
1595
+ "The model doesn't have a context size, and you didn't specify one with --ctx\n "
1596
+ "Please specify one with --ctx:\n "
1597
+ " - LLaMA v1: --ctx 2048\n "
1598
+ " - LLaMA v2: --ctx 4096\n "
1599
+ )
1594
1600
params .n_ctx = args .ctx
1595
1601
1596
- if args .outtype :
1602
+ if args .out_type :
1597
1603
params .ftype = {
1598
1604
"f32" : GGMLFileType .AllF32 ,
1599
1605
"f16" : GGMLFileType .MostlyF16 ,
1600
1606
"q8_0" : GGMLFileType .MostlyQ8_0 ,
1601
- }[args .outtype ]
1607
+ }[args .out_type ]
1602
1608
1603
1609
print (f"params = { params } " )
1604
1610
1605
- vocab : Vocab
1611
+ model_parent_path = model_plus .paths [0 ].parent
1612
+ vocab_path = Path (args .vocab_dir or args .model or model_parent_path )
1613
+ vocab_factory = VocabFactory (vocab_path )
1614
+ vocab , special_vocab = vocab_factory .load_vocab (args .vocab_type , model_parent_path )
1615
+
1606
1616
if args .vocab_only :
1607
- if not args .outfile :
1608
- raise ValueError ("need --outfile if using --vocab-only" )
1609
- # FIXME: Try to respect vocab_dir somehow?
1610
- vocab = VocabLoader (params , args .vocab_dir or args .model )
1611
- special_vocab = gguf .SpecialVocab (model_plus .paths [0 ].parent ,
1612
- load_merges = True ,
1613
- n_vocab = vocab .vocab_size )
1614
- outfile = args .outfile
1615
- OutputFile .write_vocab_only (outfile , params , vocab , special_vocab ,
1616
- endianess = endianess , pad_vocab = args .padvocab )
1617
- print (f"Wrote { outfile } " )
1617
+ if not args .out_file :
1618
+ raise ValueError ("need --out-file if using --vocab-only" )
1619
+ out_file = args .out_file
1620
+ OutputFile .write_vocab_only (
1621
+ out_file ,
1622
+ params ,
1623
+ vocab ,
1624
+ special_vocab ,
1625
+ endianess = endianess ,
1626
+ pad_vocab = args .pad_vocab ,
1627
+ )
1628
+ print (f"Wrote { out_file } " )
1618
1629
return
1619
1630
1620
1631
if model_plus .vocab is not None and args .vocab_dir is None :
1621
1632
vocab = model_plus .vocab
1622
- else :
1623
- vocab_dir = args .vocab_dir if args .vocab_dir else model_plus .paths [0 ].parent
1624
- vocab = VocabLoader (params , vocab_dir )
1625
-
1626
- # FIXME: Try to respect vocab_dir somehow?
1627
- print (f"Vocab info: { vocab } " )
1628
- special_vocab = gguf .SpecialVocab (model_plus .paths [0 ].parent ,
1629
- load_merges = True ,
1630
- n_vocab = vocab .vocab_size )
1631
-
1632
- print (f"Special vocab info: { special_vocab } " )
1633
- model = model_plus .model
1634
- model = convert_model_names (model , params )
1635
- ftype = pick_output_type (model , args .outtype )
1636
- model = convert_to_output_type (model , ftype )
1637
- outfile = args .outfile or default_outfile (model_plus .paths , ftype )
1638
1633
1639
- params .ftype = ftype
1640
- print (f"Writing { outfile } , format { ftype } " )
1634
+ model = model_plus .model
1635
+ model = convert_model_names (model , params )
1636
+ ftype = pick_output_type (model , args .out_type )
1637
+ model = convert_to_output_type (model , ftype )
1638
+ out_file = args .out_file or default_output_file (model_plus .paths , ftype )
1641
1639
1642
- OutputFile .write_all (outfile , ftype , params , model , vocab , special_vocab ,
1643
- concurrency = args .concurrency , endianess = endianess , pad_vocab = args .padvocab )
1644
- print (f"Wrote { outfile } " )
1640
+ params .ftype = ftype
1641
+ print (f"Writing { out_file } , format { ftype } " )
1642
+
1643
+ OutputFile .write_all (
1644
+ out_file ,
1645
+ ftype ,
1646
+ params ,
1647
+ model ,
1648
+ vocab ,
1649
+ special_vocab ,
1650
+ concurrency = args .concurrency ,
1651
+ endianess = endianess ,
1652
+ pad_vocab = args .pad_vocab ,
1653
+ )
1654
+ print (f"Wrote { out_file } " )
1645
1655
1646
1656
1647
- if __name__ == ' __main__' :
1648
- main ()
1657
+ if __name__ == " __main__" :
1658
+ main (sys . argv [ 1 :]) # Exclude the first element (script name) from sys.argv
0 commit comments