diff --git a/benchmarks/kernels/benchmark_paged_attention.py b/benchmarks/kernels/benchmark_paged_attention.py index e6f4e9e6b971..a5355f4c13d3 100644 --- a/benchmarks/kernels/benchmark_paged_attention.py +++ b/benchmarks/kernels/benchmark_paged_attention.py @@ -165,7 +165,7 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float: choices=["v1", "v2"], default="v2") parser.add_argument("--batch-size", type=int, default=8) - parser.add_argument("--seq_len", type=int, default=4096) + parser.add_argument("--seq-len", type=int, default=4096) parser.add_argument("--num-query-heads", type=int, default=64) parser.add_argument("--num-kv-heads", type=int, default=8) parser.add_argument("--head-size", diff --git a/examples/aqlm_example.py b/examples/aqlm_example.py index e7c17fa0362a..3a63003ab4ba 100644 --- a/examples/aqlm_example.py +++ b/examples/aqlm_example.py @@ -17,7 +17,7 @@ def main(): type=int, default=0, help='known good models by index, [0-4]') - parser.add_argument('--tensor_parallel_size', + parser.add_argument('--tensor-parallel-size', '-t', type=int, default=1, diff --git a/examples/fp8/extract_scales.py b/examples/fp8/extract_scales.py index 1eb961a5a76e..e007a3bc0821 100644 --- a/examples/fp8/extract_scales.py +++ b/examples/fp8/extract_scales.py @@ -327,7 +327,7 @@ def main(args): "--quantization-param-path ). This is only used " "if the KV cache dtype is FP8 and on ROCm (AMD GPU).") parser.add_argument( - "--quantized_model", + "--quantized-model", help="Specify the directory containing a single quantized HF model. " "It is expected that the quantization format is FP8_E4M3, for use " "on ROCm (AMD GPU).", @@ -339,18 +339,18 @@ def main(args): choices=["auto", "safetensors", "npz", "pt"], default="auto") parser.add_argument( - "--output_dir", + "--output-dir", help="Optionally specify the output directory. By default the " "KV cache scaling factors will be saved in the model directory, " "however you can override this behavior here.", default=None) parser.add_argument( - "--output_name", + "--output-name", help="Optionally specify the output filename.", # TODO: Change this once additional scaling factors are enabled default="kv_cache_scales.json") parser.add_argument( - "--tp_size", + "--tp-size", help="Optionally specify the tensor-parallel (TP) size that the " "quantized model should correspond to. If specified, during KV " "cache scaling factor extraction the observed TP size will be "