Skip to content

Commit b7ca51b

Browse files
committed
[Metrics] Add --show-hidden-metrics-for-version CLI arg
Add some infrastructure to help us deprecate and remove metrics in a less user-hostile way. Our deprecation process will now be: 1) Deprecate the metric in 0.N.0 - document the deprecation in release notes, user-facing docs, and the help text in `/metrics` 2) Hide the metric in 0.N+1.0 - users can still re-enable the metrics using `--show-hidden-metrics-for-version=0.N.0` as an escape hatch 3) Remove the metric completely in 0.N+2.0 `--show-hidden-metrics` takes a version string argument so that users cannot fall into the habit of always enabling all deprecated metrics, which would defeat the purpose. This approach is copied directly from kubernetes/kubernetes#85270 Signed-off-by: Mark McLoughlin <[email protected]>
1 parent 367cb8c commit b7ca51b

File tree

7 files changed

+95
-1
lines changed

7 files changed

+95
-1
lines changed

docs/source/serving/metrics.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,11 @@ The following metrics are exposed:
3636
:language: python
3737
:start-after: begin-metrics-definitions
3838
:::
39+
40+
The following metrics are deprecated and due to be removed in a future version:
41+
42+
- *(No metrics are currently deprecated)*
43+
44+
Note: when metrics are deprecated in version `X.Y`, they are hidden in version `X.Y+1`
45+
but can be re-enabled using the `--show-hidden-metrics-for-version=X.Y` escape hatch,
46+
and are then removed in version `X.Y+2`.

tests/test_version.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
from unittest.mock import patch
4+
5+
import pytest
6+
7+
from vllm import version
8+
9+
10+
def test_version_is_defined():
11+
assert version.__version__ is not None
12+
13+
14+
def test_version_tuple():
15+
assert len(version.__version_tuple__) in (3, 4, 5)
16+
17+
18+
@pytest.mark.parametrize(
19+
"version_tuple, version_str, expected",
20+
[
21+
((0, 0, "dev"), "0.0", True),
22+
((0, 0, "dev"), "foobar", True),
23+
((0, 7, 4), "0.6", True),
24+
((0, 7, 4), "0.5", False),
25+
((0, 7, 4), "0.7", False),
26+
((1, 2, 3), "1.1", True),
27+
((1, 2, 3), "1.0", False),
28+
((1, 2, 3), "1.2", False),
29+
# This won't work as expected
30+
((1, 0, 0), "1.-1", True),
31+
((1, 0, 0), "0.9", False),
32+
((1, 0, 0), "0.17", False),
33+
])
34+
def test_prev_minor_version_was(version_tuple, version_str, expected):
35+
with patch("vllm.version.__version_tuple__", version_tuple):
36+
assert version._prev_minor_version_was(version_str) == expected

vllm/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2602,7 +2602,9 @@ def __post_init__(self):
26022602

26032603
@dataclass
26042604
class ObservabilityConfig:
2605-
"""Configuration for observability."""
2605+
"""Configuration for observability - metrics and tracing."""
2606+
show_hidden_metrics: bool = False
2607+
26062608
otlp_traces_endpoint: Optional[str] = None
26072609

26082610
# Collecting detailed timing information for each request can be expensive.

vllm/engine/arg_utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import torch
1111

1212
import vllm.envs as envs
13+
from vllm import version
1314
from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
1415
DecodingConfig, DeviceConfig, HfOverrides,
1516
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
@@ -188,6 +189,7 @@ class EngineArgs:
188189
qlora_adapter_name_or_path: Optional[str] = None
189190
disable_logprobs_during_spec_decoding: Optional[bool] = None
190191

192+
show_hidden_metrics_for_version: Optional[str] = None
191193
otlp_traces_endpoint: Optional[str] = None
192194
collect_detailed_traces: Optional[str] = None
193195
disable_async_output_proc: bool = False
@@ -905,6 +907,18 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
905907
default=None,
906908
help='Name or path of the QLoRA adapter.')
907909

910+
parser.add_argument('--show-hidden-metrics-for-version',
911+
type=str,
912+
default=None,
913+
help='Enable deprecated Prometheus metrics that '
914+
'have been hidden since the specified version. '
915+
'For example, if a previously deprecated metric '
916+
'has been hidden since the v0.7.0 release, you '
917+
'use --show-hidden-metrics-for-version=0.7 as a '
918+
'temporary escape hatch while you migrate to new '
919+
'metrics. The metric is likely to be removed '
920+
'completely in an upcoming release.')
921+
908922
parser.add_argument(
909923
'--otlp-traces-endpoint',
910924
type=str,
@@ -1305,6 +1319,11 @@ def create_engine_config(self,
13051319
decoding_config = DecodingConfig(
13061320
guided_decoding_backend=self.guided_decoding_backend)
13071321

1322+
show_hidden_metrics = False
1323+
if self.show_hidden_metrics_for_version is not None:
1324+
show_hidden_metrics = version._prev_minor_version_was(
1325+
self.show_hidden_metrics_for_version)
1326+
13081327
detailed_trace_modules = []
13091328
if self.collect_detailed_traces is not None:
13101329
detailed_trace_modules = self.collect_detailed_traces.split(",")
@@ -1314,6 +1333,7 @@ def create_engine_config(self,
13141333
f"Invalid module {m} in collect_detailed_traces. "
13151334
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
13161335
observability_config = ObservabilityConfig(
1336+
show_hidden_metrics=show_hidden_metrics,
13171337
otlp_traces_endpoint=self.otlp_traces_endpoint,
13181338
collect_model_forward_time="model" in detailed_trace_modules
13191339
or "all" in detailed_trace_modules,

vllm/engine/metrics.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,11 @@ def __init__(self, local_interval: float, labels: Dict[str, str],
517517
self.metrics = self._metrics_cls(labelnames=list(labels.keys()),
518518
vllm_config=vllm_config)
519519

520+
# Use this flag to hide metrics that were deprecated in
521+
# a previous release and which will be removed future
522+
self.show_hidden_metrics = \
523+
vllm_config.observability_config.show_hidden_metrics
524+
520525
def _log_gauge(self, gauge, data: Union[int, float]) -> None:
521526
# Convenience function for logging to gauge.
522527
gauge.labels(**self.labels).set(data)

vllm/v1/metrics/loggers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ class PrometheusStatLogger(StatLoggerBase):
9595
def __init__(self, vllm_config: VllmConfig):
9696
self._unregister_vllm_metrics()
9797

98+
# Use this flag to hide metrics that were deprecated in
99+
# a previous release and which will be removed future
100+
self.show_hidden_metrics = \
101+
vllm_config.observability_config.show_hidden_metrics
102+
98103
labelnames = ["model_name"]
99104
labelvalues = [vllm_config.model_config.served_model_name]
100105

vllm/version.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,21 @@
1111

1212
__version__ = "dev"
1313
__version_tuple__ = (0, 0, __version__)
14+
15+
16+
def _prev_minor_version_was(version_str):
17+
"""Check whether a given version matches the previous minor version.
18+
19+
Return True if version_str matches the previous minor version.
20+
21+
For example - return True if the current version if 0.7.4 and the
22+
supplied version_str is '0.6'.
23+
24+
Used for --show-hidden-metrics-for-version.
25+
"""
26+
# Match anything if this is a dev tree
27+
if __version_tuple__[0:2] == (0, 0):
28+
return True
29+
30+
# Note - this won't do the right thing when we release 1.0!
31+
return version_str == f"{__version_tuple__[0]}.{__version_tuple__[1] - 1}"

0 commit comments

Comments
 (0)