Skip to content

Commit 7b301ad

Browse files
fangpingsFangping Shi
authored andcommitted
[Bugfix] Fix incorrect kv cache metrics in grafana.json (vllm-project#27133)
Signed-off-by: Fangping Shi <[email protected]> Co-authored-by: Fangping Shi <[email protected]>
1 parent 664bff9 commit 7b301ad

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

examples/online_serving/dashboards/perses/performance_statistics.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ spec:
530530
name: accelerators-thanos-querier-datasource
531531
# Multiply by 100 so we can read it as a percentage without setting a unit (avoids CUE unit conflicts)
532532
query: >
533-
100 * avg(vllm:gpu_cache_usage_perc)
533+
100 * avg(vllm:kv_cache_usage_perc)
534534
535535
"18":
536536
kind: Panel

examples/online_serving/dashboards/perses/query_statistics.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ spec:
9898
kind: PrometheusTimeSeriesQuery
9999
spec:
100100
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
101-
query: avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0)
101+
query: avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0)
102102
minStep: "15s"
103103

104104
core_running_ts:
@@ -168,7 +168,7 @@ spec:
168168
spec:
169169
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
170170
# multiply by 100 to present percentage; omit format.unit to avoid schema conflicts
171-
query: (avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
171+
query: (avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
172172
minStep: "15s"
173173

174174
core_kv_usage_pct_ts:
@@ -187,7 +187,7 @@ spec:
187187
kind: PrometheusTimeSeriesQuery
188188
spec:
189189
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
190-
query: (avg by (service) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
190+
query: (avg by (service) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
191191
minStep: "15s"
192192

193193
# --- Per-Pod breakdowns (works on Simulator & Real) ---
@@ -246,7 +246,7 @@ spec:
246246
spec:
247247
datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource }
248248
# if your exporter labels kv metric with pod (the sim does), this works; otherwise it will just return empty
249-
query: (avg by (pod) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
249+
query: (avg by (pod) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0)
250250
minStep: "15s"
251251

252252
# --- Real vLLM only (zeros on simulator) ---

examples/online_serving/prometheus_grafana/grafana.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,7 @@
852852
"uid": "${DS_PROMETHEUS}"
853853
},
854854
"editorMode": "code",
855-
"expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}",
855+
"expr": "vllm:kv_cache_usage_perc{model_name=\"$model_name\"}",
856856
"instant": false,
857857
"legendFormat": "GPU Cache Usage",
858858
"range": true,

0 commit comments

Comments
 (0)