Skip to content

Commit ec4bdc0

Browse files
committed
Auto merge of #3684 - pietroalbini:more-metrics, r=jtgeibel
Add more instance-level metrics Now that we have support for collecting metrics this PR adds the following ones: * `response_times`: how long preparing responses take, grouped by endpoint. Note that this does not create a metric series for each visited URL: for example all the downloads will be counted as `/crates/:crate_id/:version/download`. * `responses_by_status_code_total` how many responses we sent, grouped by HTTP status code. * `downloads_not_counted_total` how many download counts aren't persisted yet. This also had to implement histograms support, needed by `response_times`.
2 parents 529c446 + 674fb79 commit ec4bdc0

File tree

6 files changed

+158
-5
lines changed

6 files changed

+158
-5
lines changed

src/downloads_counter.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ impl DownloadsCounter {
199199
pub fn shards_count(&self) -> usize {
200200
self.inner.shards().len()
201201
}
202+
203+
pub(crate) fn pending_count(&self) -> i64 {
204+
self.pending_count.load(Ordering::SeqCst)
205+
}
202206
}
203207

204208
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]

src/metrics/instance.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
2020
use crate::util::errors::AppResult;
2121
use crate::{app::App, db::DieselPool};
22-
use prometheus::{proto::MetricFamily, IntCounter, IntGauge, IntGaugeVec};
22+
use prometheus::{
23+
proto::MetricFamily, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
24+
};
2325

2426
metrics! {
2527
pub struct InstanceMetrics {
@@ -33,10 +35,17 @@ metrics! {
3335
/// Number of requests currently being processed
3436
pub requests_in_flight: IntGauge,
3537

38+
/// Response times of our endpoints
39+
pub response_times: HistogramVec["endpoint"],
40+
/// Nmber of responses per status code
41+
pub responses_by_status_code_total: IntCounterVec["status"],
42+
3643
/// Number of download requests that were served with an unconditional redirect.
3744
pub downloads_unconditional_redirects_total: IntCounter,
3845
/// Number of download requests with a non-canonical crate name.
3946
pub downloads_non_canonical_crate_name_total: IntCounter,
47+
/// Number of download requests that are not counted yet.
48+
downloads_not_counted_total: IntGauge,
4049
}
4150

4251
// All instance metrics will be prefixed with this namespace.
@@ -51,6 +60,9 @@ impl InstanceMetrics {
5160
self.refresh_pool_stats("follower", follower)?;
5261
}
5362

63+
self.downloads_not_counted_total
64+
.set(app.downloads_counter.pending_count());
65+
5466
Ok(self.registry.gather())
5567
}
5668

src/metrics/log_encoder.rs

Lines changed: 118 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,27 @@ fn families_to_json_events(families: &[MetricFamily]) -> Vec<VectorEvent<'_>> {
5858
MetricType::GAUGE => VectorMetricData::Gauge {
5959
value: metric.get_gauge().get_value(),
6060
},
61+
MetricType::HISTOGRAM => {
62+
let histogram = metric.get_histogram();
63+
64+
// We need to convert from cumulative counts (used by the Prometheus library)
65+
// to plain counts (used by Vector).
66+
let mut buckets = Vec::new();
67+
let mut last_cumulative_count = 0;
68+
for bucket in histogram.get_bucket() {
69+
buckets.push(VectorHistogramBucket {
70+
upper_limit: bucket.get_upper_bound(),
71+
count: bucket.get_cumulative_count() - last_cumulative_count,
72+
});
73+
last_cumulative_count = bucket.get_cumulative_count();
74+
}
75+
76+
VectorMetricData::AggregatedHistogram {
77+
count: histogram.get_sample_count(),
78+
sum: histogram.get_sample_sum(),
79+
buckets,
80+
}
81+
}
6182
other => {
6283
panic!("unsupported metric type: {:?}", other)
6384
}
@@ -96,15 +117,30 @@ struct VectorMetric<'a> {
96117
#[derive(Serialize, Debug, PartialEq)]
97118
#[serde(rename_all = "snake_case")]
98119
enum VectorMetricData {
99-
Counter { value: f64 },
100-
Gauge { value: f64 },
120+
AggregatedHistogram {
121+
buckets: Vec<VectorHistogramBucket>,
122+
count: u64,
123+
sum: f64,
124+
},
125+
Counter {
126+
value: f64,
127+
},
128+
Gauge {
129+
value: f64,
130+
},
131+
}
132+
133+
#[derive(Serialize, Debug, PartialEq)]
134+
struct VectorHistogramBucket {
135+
upper_limit: f64,
136+
count: u64,
101137
}
102138

103139
#[cfg(test)]
104140
mod tests {
105141
use super::*;
106142
use anyhow::Error;
107-
use prometheus::{IntCounter, IntGauge, IntGaugeVec, Opts, Registry};
143+
use prometheus::{Histogram, HistogramOpts, IntCounter, IntGauge, IntGaugeVec, Opts, Registry};
108144

109145
#[test]
110146
fn test_counter_to_json() -> Result<(), Error> {
@@ -175,6 +211,85 @@ mod tests {
175211
Ok(())
176212
}
177213

214+
#[test]
215+
fn test_histogram_to_json() -> Result<(), Error> {
216+
let histogram = Histogram::with_opts(HistogramOpts::new(
217+
"sample_histogram",
218+
"sample_histogram help message",
219+
))?;
220+
let registry = Registry::new();
221+
registry.register(Box::new(histogram.clone()))?;
222+
223+
let mut value = 0.0;
224+
while value < 11.0 {
225+
histogram.observe(value);
226+
value += 0.001;
227+
}
228+
229+
assert_eq!(
230+
vec![VectorEvent {
231+
metric: VectorMetric {
232+
data: VectorMetricData::AggregatedHistogram {
233+
buckets: vec![
234+
VectorHistogramBucket {
235+
upper_limit: 0.005,
236+
count: 6,
237+
},
238+
VectorHistogramBucket {
239+
upper_limit: 0.01,
240+
count: 4,
241+
},
242+
VectorHistogramBucket {
243+
upper_limit: 0.025,
244+
count: 15,
245+
},
246+
VectorHistogramBucket {
247+
upper_limit: 0.05,
248+
count: 25,
249+
},
250+
VectorHistogramBucket {
251+
upper_limit: 0.1,
252+
count: 50,
253+
},
254+
VectorHistogramBucket {
255+
upper_limit: 0.25,
256+
count: 150,
257+
},
258+
VectorHistogramBucket {
259+
upper_limit: 0.5,
260+
count: 250,
261+
},
262+
VectorHistogramBucket {
263+
upper_limit: 1.0,
264+
count: 500,
265+
},
266+
VectorHistogramBucket {
267+
upper_limit: 2.5,
268+
count: 1501,
269+
},
270+
VectorHistogramBucket {
271+
upper_limit: 5.0,
272+
count: 2499,
273+
},
274+
VectorHistogramBucket {
275+
upper_limit: 10.0,
276+
count: 5001,
277+
},
278+
],
279+
count: 11001,
280+
sum: 60505.50000000138,
281+
},
282+
kind: "absolute",
283+
name: "sample_histogram",
284+
tags: IndexMap::new(),
285+
}
286+
}],
287+
families_to_json_events(&registry.gather())
288+
);
289+
290+
Ok(())
291+
}
292+
178293
#[test]
179294
fn test_metric_with_tags_to_json() -> Result<(), Error> {
180295
let gauge_vec = IntGaugeVec::new(

src/metrics/macros.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ macro_rules! load_metric_type {
5959
use prometheus::$name;
6060
impl crate::metrics::macros::MetricFromOpts for $name {
6161
fn from_opts(opts: prometheus::Opts) -> Result<Self, prometheus::Error> {
62-
$name::with_opts(opts)
62+
$name::with_opts(opts.into())
6363
}
6464
}
6565
};

src/metrics/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,6 @@ mod service;
1111

1212
load_metric_type!(IntGauge as single);
1313
load_metric_type!(IntCounter as single);
14+
load_metric_type!(IntCounterVec as vec);
1415
load_metric_type!(IntGaugeVec as vec);
16+
load_metric_type!(HistogramVec as vec);

src/middleware/update_metrics.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use super::app::RequestApp;
22
use super::prelude::*;
3+
use conduit_router::RoutePattern;
34

45
#[derive(Debug, Default)]
56
pub(super) struct UpdateMetrics;
@@ -19,6 +20,25 @@ impl Middleware for UpdateMetrics {
1920
metrics.requests_in_flight.dec();
2021
metrics.requests_total.inc();
2122

23+
let endpoint = req
24+
.extensions()
25+
.find::<RoutePattern>()
26+
.map(|p| p.pattern())
27+
.unwrap_or("<unknown>");
28+
metrics
29+
.response_times
30+
.with_label_values(&[endpoint])
31+
.observe(req.elapsed().as_millis() as f64 / 1000.0);
32+
33+
let status = match &res {
34+
Ok(res) => res.status().as_u16(),
35+
Err(_) => 500,
36+
};
37+
metrics
38+
.responses_by_status_code_total
39+
.with_label_values(&[&status.to_string()])
40+
.inc();
41+
2242
res
2343
}
2444
}

0 commit comments

Comments
 (0)