@@ -259,21 +259,6 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
259
259
documentation = "Number of emitted tokens." ,
260
260
labelnames = labelnames ))
261
261
262
- # Deprecated in favor of vllm:prompt_tokens_total
263
- self .gauge_avg_prompt_throughput = self ._gauge_cls (
264
- name = "vllm:avg_prompt_throughput_toks_per_s" ,
265
- documentation = "Average prefill throughput in tokens/s." ,
266
- labelnames = labelnames ,
267
- multiprocess_mode = "sum" ,
268
- )
269
- # Deprecated in favor of vllm:generation_tokens_total
270
- self .gauge_avg_generation_throughput = self ._gauge_cls (
271
- name = "vllm:avg_generation_throughput_toks_per_s" ,
272
- documentation = "Average generation throughput in tokens/s." ,
273
- labelnames = labelnames ,
274
- multiprocess_mode = "sum" ,
275
- )
276
-
277
262
278
263
# end-metrics-definitions
279
264
@@ -635,20 +620,6 @@ def _log_prometheus(self, stats: Stats) -> None:
635
620
self ._log_histogram (self .metrics .histogram_max_tokens_request ,
636
621
stats .max_tokens_requests )
637
622
638
- def _log_prometheus_interval (self , prompt_throughput : float ,
639
- generation_throughput : float ) -> None :
640
- # Logs metrics to prometheus that are computed every logging_interval.
641
- # Support legacy gauge metrics that make throughput calculations on
642
- # the vLLM side. Moving forward, we should use counters like
643
- # counter_prompt_tokens, counter_generation_tokens
644
- # Which log raw data and calculate summaries using rate() on the
645
- # grafana/prometheus side. See
646
- # https://github.com/vllm-project/vllm/pull/2316#discussion_r1464204666
647
- self .metrics .gauge_avg_prompt_throughput .labels (
648
- ** self .labels ).set (prompt_throughput )
649
- self .metrics .gauge_avg_generation_throughput .labels (
650
- ** self .labels ).set (generation_throughput )
651
-
652
623
def log (self , stats : Stats ):
653
624
"""Logs to prometheus and tracked stats every iteration."""
654
625
# Log to prometheus.
@@ -664,20 +635,6 @@ def log(self, stats: Stats):
664
635
# Log locally every local_interval seconds.
665
636
if local_interval_elapsed (stats .now , self .last_local_log ,
666
637
self .local_interval ):
667
- # Compute summary metrics for tracked stats (and log them
668
- # to promethus if applicable).
669
- prompt_throughput = get_throughput (self .num_prompt_tokens ,
670
- now = stats .now ,
671
- last_log = self .last_local_log )
672
- generation_throughput = get_throughput (
673
- self .num_generation_tokens ,
674
- now = stats .now ,
675
- last_log = self .last_local_log )
676
-
677
- self ._log_prometheus_interval (
678
- prompt_throughput = prompt_throughput ,
679
- generation_throughput = generation_throughput )
680
-
681
638
if self .spec_decode_metrics is not None :
682
639
self ._log_gauge (
683
640
self .metrics .gauge_spec_decode_draft_acceptance_rate ,
0 commit comments