1
+ #!/usr/bin/env python3
2
+
3
+ import sys
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ from pathlib import Path
8
+
9
+ def run_command (command_list , cwd = None , check = True ):
10
+ print (f"Executing: { ' ' .join (map (str , command_list ))} " )
11
+ try :
12
+ process = subprocess .run (command_list , cwd = cwd , check = check , capture_output = False , text = True )
13
+ return process
14
+ except subprocess .CalledProcessError as e :
15
+ print (f"Error executing command: { ' ' .join (map (str , e .cmd ))} " )
16
+ print (f"Return code: { e .returncode } " )
17
+ raise
18
+
19
+ def main ():
20
+ if len (sys .argv ) < 2 :
21
+ script_name = Path (sys .argv [0 ]).name
22
+ print (f"Usage: python { script_name } <model-directory>" )
23
+ sys .exit (1 )
24
+
25
+ model_dir_arg = sys .argv [1 ]
26
+ model_dir = Path (model_dir_arg ).resolve ()
27
+
28
+ if not model_dir .is_dir ():
29
+ print (f"Error: Model directory '{ model_dir } ' not found or is not a directory." )
30
+ sys .exit (1 )
31
+
32
+ utils_dir = Path (__file__ ).parent .resolve ()
33
+ project_root_dir = utils_dir .parent
34
+
35
+ preprocess_script = utils_dir / "preprocess-huggingface-bitnet.py"
36
+ convert_script = utils_dir / "convert-ms-to-gguf-bitnet.py"
37
+
38
+ llama_quantize_binary = project_root_dir / "build" / "bin" / "llama-quantize"
39
+
40
+ input_file = model_dir / "model.safetensors"
41
+ input_backup_file = model_dir / "model.safetensors.backup"
42
+ preprocessed_output_file = model_dir / "model.safetensors"
43
+
44
+ gguf_f32_output = model_dir / "ggml-model-f32-bitnet.gguf"
45
+ gguf_i2s_output = model_dir / "ggml-model-i2s-bitnet.gguf"
46
+
47
+ if not preprocess_script .is_file ():
48
+ print (f"Error: Preprocess script not found at '{ preprocess_script } '" )
49
+ sys .exit (1 )
50
+ if not convert_script .is_file ():
51
+ print (f"Error: Convert script not found at '{ convert_script } '" )
52
+ sys .exit (1 )
53
+ if not llama_quantize_binary .is_file ():
54
+ print (f"Error: llama-quantize binary not found at '{ llama_quantize_binary } '" )
55
+ sys .exit (1 )
56
+
57
+ if not input_file .is_file ():
58
+ print (f"Error: Input safetensors file not found at '{ input_file } '" )
59
+ sys .exit (1 )
60
+
61
+ try :
62
+ print (f"Backing up '{ input_file } ' to '{ input_backup_file } '" )
63
+ if input_backup_file .exists ():
64
+ print (f"Warning: Removing existing backup file '{ input_backup_file } '" )
65
+ input_backup_file .unlink ()
66
+ shutil .move (input_file , input_backup_file )
67
+
68
+ print ("Preprocessing huggingface checkpoint..." )
69
+ cmd_preprocess = [
70
+ sys .executable ,
71
+ str (preprocess_script ),
72
+ "--input" , str (input_backup_file ),
73
+ "--output" , str (preprocessed_output_file )
74
+ ]
75
+ run_command (cmd_preprocess )
76
+
77
+ print ("Converting to GGUF (f32)..." )
78
+ cmd_convert = [
79
+ sys .executable ,
80
+ str (convert_script ),
81
+ str (model_dir ),
82
+ "--vocab-type" , "bpe" ,
83
+ "--outtype" , "f32" ,
84
+ "--concurrency" , "1" ,
85
+ "--outfile" , str (gguf_f32_output )
86
+ ]
87
+ run_command (cmd_convert )
88
+
89
+ print ("Quantizing model to I2_S..." )
90
+ cmd_quantize = [
91
+ str (llama_quantize_binary ),
92
+ str (gguf_f32_output ),
93
+ str (gguf_i2s_output ),
94
+ "I2_S" ,
95
+ "1"
96
+ ]
97
+ run_command (cmd_quantize )
98
+
99
+ print ("Convert successfully." )
100
+
101
+ except Exception as e :
102
+ print (f"An error occurred: { e } " )
103
+ finally :
104
+ print ("Cleaning up intermediate files..." )
105
+ if preprocessed_output_file .exists () and preprocessed_output_file != input_backup_file :
106
+ print (f"Removing preprocessed file: { preprocessed_output_file } " )
107
+ try :
108
+ preprocessed_output_file .unlink ()
109
+ except OSError as e :
110
+ print (f"Warning: Could not remove { preprocessed_output_file } : { e } " )
111
+
112
+ if gguf_f32_output .exists ():
113
+ print (f"Removing f32 GGUF: { gguf_f32_output } " )
114
+ try :
115
+ gguf_f32_output .unlink ()
116
+ except OSError as e :
117
+ print (f"Warning: Could not remove { gguf_f32_output } : { e } " )
118
+
119
+ if input_backup_file .exists ():
120
+ if not input_file .exists ():
121
+ print (f"Restoring original '{ input_file } ' from '{ input_backup_file } '" )
122
+ try :
123
+ shutil .move (input_backup_file , input_file )
124
+ except Exception as e :
125
+ print (f"Warning: Could not restore { input_file } from backup: { e } " )
126
+ else :
127
+ print (f"Removing backup '{ input_backup_file } ' as original '{ input_file } ' should be present." )
128
+ try :
129
+ input_backup_file .unlink ()
130
+ except OSError as e :
131
+ print (f"Warning: Could not remove backup { input_backup_file } : { e } " )
132
+
133
+ if __name__ == "__main__" :
134
+ main ()
0 commit comments