diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py index 0f79b7e79d38..033551d07c39 100644 --- a/vllm/engine/metrics.py +++ b/vllm/engine/metrics.py @@ -145,7 +145,7 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig): documentation="Histogram of number of tokens per engine_step.", labelnames=labelnames, buckets=[ - 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, 16192 + 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384 ]) self.histogram_time_to_first_token = self._histogram_cls( name="vllm:time_to_first_token_seconds", diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index e2e0b305e81f..7051c681b1a0 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -233,8 +233,8 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0): name="vllm:iteration_tokens_total", documentation="Histogram of number of tokens per engine_step.", buckets=[ - 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, - 16192 + 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, + 16384 ], labelnames=labelnames).labels(*labelvalues)