Skip to content

Commit ac43576

Browse files
make-ggml.py : compatibility with more models and GGUF (#3290)
* Resync my fork with new llama.cpp commits * examples : rename to use dash instead of underscore * New model conversions --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 20c7e1e commit ac43576

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

examples/make-ggml.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
11
#!/usr/bin/env python3
22
"""
3-
This script converts Hugging Face llama models to GGML and quantizes them.
3+
This script converts Hugging Face Llama, StarCoder, Falcon, Baichuan, and GPT-NeoX models to GGUF and quantizes them.
44
55
Usage:
6-
python make-ggml.py --model {model_dir_or_hf_repo_name} [--outname {output_name} (Optional)] [--outdir {output_directory} (Optional)] [--quants {quant_types} (Optional)] [--keep_fp16 (Optional)]
6+
python make-ggml.py {model_dir_or_hf_repo_name} --model_type {model_type} [--outname {output_name} (Optional)] [--outdir {output_directory} (Optional)] [--quants {quant_types} (Optional)] [--keep_fp16 (Optional)]
77
88
Arguments:
9-
- --model: (Required) The directory of the downloaded Hugging Face model or the name of the Hugging Face model repository. If the model directory does not exist, it will be downloaded from the Hugging Face model hub.
9+
- model: (Required) The directory of the downloaded Hugging Face model or the name of the Hugging Face model repository. If the model directory does not exist, it will be downloaded from the Hugging Face model hub.
10+
- --model_type: (Required) The type of the model to be converted. Choose from llama, starcoder, falcon, baichuan, or gptneox.
1011
- --outname: (Optional) The name of the output model. If not specified, the last part of the model directory path or the Hugging Face model repo name will be used.
1112
- --outdir: (Optional) The directory where the output model(s) will be stored. If not specified, '../models/{outname}' will be used.
1213
- --quants: (Optional) The types of quantization to apply. This should be a space-separated list. The default is 'Q4_K_M Q5_K_S'.
1314
- --keep_fp16: (Optional) If specified, the FP16 model will not be deleted after the quantized models are created.
1415
15-
Quant types:
16+
Old quant types (some base model types require these):
1617
- Q4_0: small, very high quality loss - legacy, prefer using Q3_K_M
1718
- Q4_1: small, substantial quality loss - legacy, prefer using Q3_K_L
1819
- Q5_0: medium, balanced quality - legacy, prefer using Q4_K_M
1920
- Q5_1: medium, low quality loss - legacy, prefer using Q5_K_M
21+
22+
New quant types (recommended):
2023
- Q2_K: smallest, extreme quality loss - not recommended
2124
- Q3_K: alias for Q3_K_M
2225
- Q3_K_S: very small, very high quality loss
@@ -40,9 +43,7 @@
4043
import os
4144
from huggingface_hub import snapshot_download
4245

43-
def main(model, outname, outdir, quants, keep_fp16):
44-
ggml_version = "v3"
45-
46+
def main(model, model_type, outname, outdir, quants, keep_fp16):
4647
if not os.path.isdir(model):
4748
print(f"Model not found at {model}. Downloading...")
4849
try:
@@ -63,31 +64,35 @@ def main(model, outname, outdir, quants, keep_fp16):
6364
print("Building llama.cpp")
6465
subprocess.run(f"cd .. && make quantize", shell=True, check=True)
6566

66-
fp16 = f"{outdir}/{outname}.ggml{ggml_version}.fp16.bin"
67+
fp16 = f"{outdir}/{outname}.gguf.fp16.bin"
6768

68-
print(f"Making unquantised GGML at {fp16}")
69+
print(f"Making unquantised GGUF at {fp16}")
6970
if not os.path.isfile(fp16):
70-
subprocess.run(f"python3 ../convert.py {model} --outtype f16 --outfile {fp16}", shell=True, check=True)
71+
if model_type != "llama":
72+
subprocess.run(f"python3 ../convert-{model_type}-hf-to-gguf.py {model} 1 --outfile {fp16}", shell=True, check=True)
73+
else:
74+
subprocess.run(f"python3 ../convert.py {model} --outtype f16 --outfile {fp16}", shell=True, check=True)
7175
else:
7276
print(f"Unquantised GGML already exists at: {fp16}")
7377

7478
print("Making quants")
7579
for type in quants:
76-
outfile = f"{outdir}/{outname}.ggml{ggml_version}.{type}.bin"
80+
outfile = f"{outdir}/{outname}.gguf.{type}.bin"
7781
print(f"Making {type} : {outfile}")
7882
subprocess.run(f"../quantize {fp16} {outfile} {type}", shell=True, check=True)
7983

8084
if not keep_fp16:
8185
os.remove(fp16)
8286

8387
if __name__ == "__main__":
84-
parser = argparse.ArgumentParser(description='Convert/Quantize HF to GGML. If you have the HF model downloaded already, pass the path to the model dir. Otherwise, pass the Hugging Face model repo name. You need to be in the /examples folder for it to work.')
85-
parser.add_argument('--model', required=True, help='Downloaded model dir or Hugging Face model repo name')
88+
parser = argparse.ArgumentParser(description='Convert/Quantize HF models to GGUF. If you have the HF model downloaded already, pass the path to the model dir. Otherwise, pass the Hugging Face model repo name. You need to be in the /examples folder for it to work.')
89+
parser.add_argument('model', help='Downloaded model dir or Hugging Face model repo name')
90+
parser.add_argument('--model_type', required=True, choices=['llama', 'starcoder', 'falcon', 'baichuan', 'gptneox'], help='Type of the model to be converted. Choose from llama, starcoder, falcon, baichuan, or gptneox.')
8691
parser.add_argument('--outname', default=None, help='Output model(s) name')
8792
parser.add_argument('--outdir', default=None, help='Output directory')
8893
parser.add_argument('--quants', nargs='*', default=["Q4_K_M", "Q5_K_S"], help='Quant types')
8994
parser.add_argument('--keep_fp16', action='store_true', help='Keep fp16 model', default=False)
9095

9196
args = parser.parse_args()
9297

93-
main(args.model, args.outname, args.outdir, args.quants, args.keep_fp16)
98+
main(args.model, args.model_type, args.outname, args.outdir, args.quants, args.keep_fp16)

0 commit comments

Comments
 (0)