@@ -259,21 +259,6 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
259259 documentation = "Number of emitted tokens." ,
260260 labelnames = labelnames ))
261261
262- # Deprecated in favor of vllm:prompt_tokens_total
263- self .gauge_avg_prompt_throughput = self ._gauge_cls (
264- name = "vllm:avg_prompt_throughput_toks_per_s" ,
265- documentation = "Average prefill throughput in tokens/s." ,
266- labelnames = labelnames ,
267- multiprocess_mode = "sum" ,
268- )
269- # Deprecated in favor of vllm:generation_tokens_total
270- self .gauge_avg_generation_throughput = self ._gauge_cls (
271- name = "vllm:avg_generation_throughput_toks_per_s" ,
272- documentation = "Average generation throughput in tokens/s." ,
273- labelnames = labelnames ,
274- multiprocess_mode = "sum" ,
275- )
276-
277262
278263# end-metrics-definitions
279264
@@ -635,20 +620,6 @@ def _log_prometheus(self, stats: Stats) -> None:
635620 self ._log_histogram (self .metrics .histogram_max_tokens_request ,
636621 stats .max_tokens_requests )
637622
638- def _log_prometheus_interval (self , prompt_throughput : float ,
639- generation_throughput : float ) -> None :
640- # Logs metrics to prometheus that are computed every logging_interval.
641- # Support legacy gauge metrics that make throughput calculations on
642- # the vLLM side. Moving forward, we should use counters like
643- # counter_prompt_tokens, counter_generation_tokens
644- # Which log raw data and calculate summaries using rate() on the
645- # grafana/prometheus side. See
646- # https://github.com/vllm-project/vllm/pull/2316#discussion_r1464204666
647- self .metrics .gauge_avg_prompt_throughput .labels (
648- ** self .labels ).set (prompt_throughput )
649- self .metrics .gauge_avg_generation_throughput .labels (
650- ** self .labels ).set (generation_throughput )
651-
652623 def log (self , stats : Stats ):
653624 """Logs to prometheus and tracked stats every iteration."""
654625 # Log to prometheus.
@@ -664,20 +635,6 @@ def log(self, stats: Stats):
664635 # Log locally every local_interval seconds.
665636 if local_interval_elapsed (stats .now , self .last_local_log ,
666637 self .local_interval ):
667- # Compute summary metrics for tracked stats (and log them
668- # to promethus if applicable).
669- prompt_throughput = get_throughput (self .num_prompt_tokens ,
670- now = stats .now ,
671- last_log = self .last_local_log )
672- generation_throughput = get_throughput (
673- self .num_generation_tokens ,
674- now = stats .now ,
675- last_log = self .last_local_log )
676-
677- self ._log_prometheus_interval (
678- prompt_throughput = prompt_throughput ,
679- generation_throughput = generation_throughput )
680-
681638 if self .spec_decode_metrics is not None :
682639 self ._log_gauge (
683640 self .metrics .gauge_spec_decode_draft_acceptance_rate ,
0 commit comments