File tree 1 file changed +7
-3
lines changed 1 file changed +7
-3
lines changed Original file line number Diff line number Diff line change 7
7
import enum
8
8
import faulthandler
9
9
import functools
10
+ import hashlib
10
11
import itertools
11
12
import json
12
13
import math
@@ -1630,6 +1631,11 @@ def write_llama3_tokenizer(tokenizer_path, input_tokenizer_path):
1630
1631
tokenizer .save_pretrained (tokenizer_path )
1631
1632
return tokenizer
1632
1633
1634
+ def is_llama3_tokenizer (tokenizer_path ) -> bool :
1635
+ llama3_tokenizer_model_hash : str = "82e9d31979e92ab929cd544440f129d9ecd797b69e327f80f17e1c50d5551b55"
1636
+ with open (tokenizer_path , "rb" ) as f :
1637
+ tokenizer_hash = hashlib .sha256 (f .read ()).hexdigest ()
1638
+ return llama3_tokenizer_model_hash == tokenizer_hash
1633
1639
1634
1640
def main (args_in : list [str ] | None = None ) -> None :
1635
1641
output_choices = ["f32" , "f16" ]
@@ -1667,12 +1673,10 @@ def main(args_in: list[str] | None = None) -> None:
1667
1673
metadata = Metadata .load (args .metadata )
1668
1674
1669
1675
#TODO: add more bandaids for llama 3 detection
1670
- try :
1676
+ if is_llama3_tokenizer ( os . path . join ( args . model , "tokenizer.model" )) :
1671
1677
global is_llama3_model
1672
1678
write_llama3_tokenizer (args .model , os .path .join (args .model , "tokenizer.model" ))
1673
1679
is_llama3_model = True
1674
- except :
1675
- pass
1676
1680
1677
1681
if args .get_outfile :
1678
1682
model_plus = load_some_model (args .model )
You can’t perform that action at this time.
0 commit comments