Skip to content

Commit 226cea2

Browse files
committed
refactor: Improve code organization, argument parsing, and user interface
- Renamed 'default_outfile' to 'default_output_file' for clarity. - Refactored argument parser setup into 'get_argument_parser' function. - Introduced descriptive comments for each argument in the parser. - Added '--vocab-type' argument with choices ["spm", "bpe", "hfft"] for vocabulary processing. - Improved flag naming consistency: '--outfile' to '--out-file' and '--bigendian' to '--big-endian'. - Enhanced error handling to prevent overwriting input data in 'default_output_file'. - Made 'argv' in 'main' an optional parameter for flexibility. - Introduced dynamic import for 'awq.apply_awq' based on 'args.awq_path' for conditional dependency. These changes enhance code clarity, organization, and the user interface of the script, aligning it with Python best practices and improving maintainability.
1 parent 8aa5818 commit 226cea2

File tree

1 file changed

+100
-21
lines changed

1 file changed

+100
-21
lines changed

convert.py

Lines changed: 100 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,17 +1432,18 @@ def load_vocab(
14321432
return vocab, special_vocab
14331433

14341434

1435-
def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
1435+
def default_output_file(model_paths: list[Path], file_type: GGMLFileType) -> Path:
14361436
namestr = {
1437-
GGMLFileType.AllF32: "f32",
1437+
GGMLFileType.AllF32: "f32",
14381438
GGMLFileType.MostlyF16: "f16",
1439-
GGMLFileType.MostlyQ8_0:"q8_0",
1439+
GGMLFileType.MostlyQ8_0: "q8_0",
14401440
}[file_type]
14411441
ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
14421442
if ret in model_paths:
14431443
sys.stderr.write(
14441444
f"Error: Default output path ({ret}) would overwrite the input. "
1445-
"Please explicitly specify a path using --outfile.\n")
1445+
"Please explicitly specify a path using --out-file.\n"
1446+
)
14461447
sys.exit(1)
14471448
return ret
14481449

@@ -1452,29 +1453,107 @@ def do_dump_model(model_plus: ModelPlus) -> None:
14521453
print(f"model_plus.format = {model_plus.format!r}")
14531454
print(f"model_plus.vocab = {model_plus.vocab!r}")
14541455
for name, lazy_tensor in model_plus.model.items():
1455-
print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}")
1456+
print(
1457+
f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}"
1458+
)
14561459

14571460

1458-
def main(args_in: list[str] | None = None) -> None:
1461+
def get_argument_parser() -> ArgumentParser:
14591462
output_choices = ["f32", "f16"]
14601463
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
14611464
# We currently only support Q8_0 output on little endian systems.
14621465
output_choices.append("q8_0")
1463-
parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
1464-
parser.add_argument("--awq-path", type=Path, help="Path to scale awq cache file", default=None)
1465-
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
1466-
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
1467-
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
1468-
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
1469-
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
1470-
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
1471-
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
1472-
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
1473-
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default = DEFAULT_CONCURRENCY)
1474-
parser.add_argument("--bigendian", action="store_true", help="model is executed on big endian machine")
1475-
parser.add_argument("--padvocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
1476-
1477-
args = parser.parse_args(args_in)
1466+
1467+
parser = argparse.ArgumentParser(
1468+
description="Convert a LLaMa model to a GGML compatible file"
1469+
)
1470+
1471+
parser.add_argument(
1472+
"model",
1473+
type=Path,
1474+
help="Directory containing the model file or the model file itself (*.pth, *.pt, *.bin)",
1475+
)
1476+
1477+
parser.add_argument(
1478+
"--awq-path",
1479+
type=Path,
1480+
help="Path to the Activation-aware Weight Quantization cache file",
1481+
default=None,
1482+
)
1483+
1484+
parser.add_argument(
1485+
"--dump",
1486+
action="store_true",
1487+
help="Display the model content without converting it",
1488+
)
1489+
1490+
parser.add_argument(
1491+
"--dump-single",
1492+
action="store_true",
1493+
help="Display the content of a single model file without conversion",
1494+
)
1495+
1496+
parser.add_argument(
1497+
"--vocab-only",
1498+
action="store_true",
1499+
help="Extract and output only the vocabulary",
1500+
)
1501+
1502+
parser.add_argument(
1503+
"--out-type",
1504+
choices=output_choices,
1505+
help="Output format - note: q8_0 may be very slow (default: f16 or f32 based on input)",
1506+
)
1507+
1508+
parser.add_argument(
1509+
"--vocab-dir",
1510+
type=Path,
1511+
help="Directory containing the tokenizer.model, if separate from the model file",
1512+
)
1513+
1514+
parser.add_argument(
1515+
"--vocab-type",
1516+
choices=["spm", "bpe", "hfft"], # hfft: Hugging Face Fast Tokenizer
1517+
default="spm",
1518+
help="The vocabulary format used to define the tokenizer model (default: spm)",
1519+
)
1520+
1521+
parser.add_argument(
1522+
"--pad-vocab",
1523+
action="store_true",
1524+
help="Add padding tokens when the model's vocabulary size exceeds the tokenizer metadata",
1525+
)
1526+
1527+
parser.add_argument(
1528+
"--out-file",
1529+
type=Path,
1530+
help="Specify the path for the output file (default is based on input)",
1531+
)
1532+
1533+
parser.add_argument(
1534+
"--ctx", type=int, help="Model training context (default is based on input)"
1535+
)
1536+
1537+
parser.add_argument(
1538+
"--concurrency",
1539+
type=int,
1540+
help=f"Concurrency used for conversion (default: {DEFAULT_CONCURRENCY})",
1541+
default=DEFAULT_CONCURRENCY,
1542+
)
1543+
1544+
parser.add_argument(
1545+
"--big-endian",
1546+
action="store_true",
1547+
help="Indicate that the model is executed on a big-endian machine",
1548+
)
1549+
1550+
return parser
1551+
1552+
1553+
def main(argv: Optional[list[str]] = None) -> None:
1554+
parser = get_argument_parser()
1555+
args = parser.parse_args(argv)
1556+
14781557
if args.awq_path:
14791558
sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
14801559
from awq.apply_awq import add_scale_weights

0 commit comments

Comments
 (0)