From faf8377665bac190c60b7136c2adbdf962b6bc3b Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Thu, 3 Jun 2021 13:25:15 +0200 Subject: [PATCH 1/4] add support for histograms in log_encoder --- src/metrics/log_encoder.rs | 121 ++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 3 deletions(-) diff --git a/src/metrics/log_encoder.rs b/src/metrics/log_encoder.rs index 543886defdc..4d0d572996b 100644 --- a/src/metrics/log_encoder.rs +++ b/src/metrics/log_encoder.rs @@ -58,6 +58,27 @@ fn families_to_json_events(families: &[MetricFamily]) -> Vec> { MetricType::GAUGE => VectorMetricData::Gauge { value: metric.get_gauge().get_value(), }, + MetricType::HISTOGRAM => { + let histogram = metric.get_histogram(); + + // We need to convert from cumulative counts (used by the Prometheus library) + // to plain counts (used by Vector). + let mut buckets = Vec::new(); + let mut last_cumulative_count = 0; + for bucket in histogram.get_bucket() { + buckets.push(VectorHistogramBucket { + upper_limit: bucket.get_upper_bound(), + count: bucket.get_cumulative_count() - last_cumulative_count, + }); + last_cumulative_count = bucket.get_cumulative_count(); + } + + VectorMetricData::AggregatedHistogram { + count: histogram.get_sample_count(), + sum: histogram.get_sample_sum(), + buckets, + } + } other => { panic!("unsupported metric type: {:?}", other) } @@ -96,15 +117,30 @@ struct VectorMetric<'a> { #[derive(Serialize, Debug, PartialEq)] #[serde(rename_all = "snake_case")] enum VectorMetricData { - Counter { value: f64 }, - Gauge { value: f64 }, + AggregatedHistogram { + buckets: Vec, + count: u64, + sum: f64, + }, + Counter { + value: f64, + }, + Gauge { + value: f64, + }, +} + +#[derive(Serialize, Debug, PartialEq)] +struct VectorHistogramBucket { + upper_limit: f64, + count: u64, } #[cfg(test)] mod tests { use super::*; use anyhow::Error; - use prometheus::{IntCounter, IntGauge, IntGaugeVec, Opts, Registry}; + use prometheus::{Histogram, HistogramOpts, IntCounter, IntGauge, IntGaugeVec, Opts, Registry}; #[test] fn test_counter_to_json() -> Result<(), Error> { @@ -175,6 +211,85 @@ mod tests { Ok(()) } + #[test] + fn test_histogram_to_json() -> Result<(), Error> { + let histogram = Histogram::with_opts(HistogramOpts::new( + "sample_histogram", + "sample_histogram help message", + ))?; + let registry = Registry::new(); + registry.register(Box::new(histogram.clone()))?; + + let mut value = 0.0; + while value < 11.0 { + histogram.observe(value); + value += 0.001; + } + + assert_eq!( + vec![VectorEvent { + metric: VectorMetric { + data: VectorMetricData::AggregatedHistogram { + buckets: vec![ + VectorHistogramBucket { + upper_limit: 0.005, + count: 6, + }, + VectorHistogramBucket { + upper_limit: 0.01, + count: 4, + }, + VectorHistogramBucket { + upper_limit: 0.025, + count: 15, + }, + VectorHistogramBucket { + upper_limit: 0.05, + count: 25, + }, + VectorHistogramBucket { + upper_limit: 0.1, + count: 50, + }, + VectorHistogramBucket { + upper_limit: 0.25, + count: 150, + }, + VectorHistogramBucket { + upper_limit: 0.5, + count: 250, + }, + VectorHistogramBucket { + upper_limit: 1.0, + count: 500, + }, + VectorHistogramBucket { + upper_limit: 2.5, + count: 1501, + }, + VectorHistogramBucket { + upper_limit: 5.0, + count: 2499, + }, + VectorHistogramBucket { + upper_limit: 10.0, + count: 5001, + }, + ], + count: 11001, + sum: 60505.50000000138, + }, + kind: "absolute", + name: "sample_histogram", + tags: IndexMap::new(), + } + }], + families_to_json_events(®istry.gather()) + ); + + Ok(()) + } + #[test] fn test_metric_with_tags_to_json() -> Result<(), Error> { let gauge_vec = IntGaugeVec::new( From ae201a8d764e787757d6b24b503cdbb18a57b70e Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Thu, 3 Jun 2021 14:15:00 +0200 Subject: [PATCH 2/4] track response times per endpoint --- src/metrics/instance.rs | 5 ++++- src/metrics/macros.rs | 2 +- src/metrics/mod.rs | 1 + src/middleware/update_metrics.rs | 11 +++++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/metrics/instance.rs b/src/metrics/instance.rs index 49cba20869b..2e4223db4ad 100644 --- a/src/metrics/instance.rs +++ b/src/metrics/instance.rs @@ -19,7 +19,7 @@ use crate::util::errors::AppResult; use crate::{app::App, db::DieselPool}; -use prometheus::{proto::MetricFamily, IntCounter, IntGauge, IntGaugeVec}; +use prometheus::{proto::MetricFamily, IntCounter, IntGauge, IntGaugeVec, HistogramVec}; metrics! { pub struct InstanceMetrics { @@ -33,6 +33,9 @@ metrics! { /// Number of requests currently being processed pub requests_in_flight: IntGauge, + /// Response times of our endpoints + pub response_times: HistogramVec["endpoint"], + /// Number of download requests that were served with an unconditional redirect. pub downloads_unconditional_redirects_total: IntCounter, /// Number of download requests with a non-canonical crate name. diff --git a/src/metrics/macros.rs b/src/metrics/macros.rs index 1b1f06a0fa7..e088bf7e7ea 100644 --- a/src/metrics/macros.rs +++ b/src/metrics/macros.rs @@ -59,7 +59,7 @@ macro_rules! load_metric_type { use prometheus::$name; impl crate::metrics::macros::MetricFromOpts for $name { fn from_opts(opts: prometheus::Opts) -> Result { - $name::with_opts(opts) + $name::with_opts(opts.into()) } } }; diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 722f1f18c4f..83346c9b7ef 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -12,3 +12,4 @@ mod service; load_metric_type!(IntGauge as single); load_metric_type!(IntCounter as single); load_metric_type!(IntGaugeVec as vec); +load_metric_type!(HistogramVec as vec); diff --git a/src/middleware/update_metrics.rs b/src/middleware/update_metrics.rs index ef1f03ed673..5d064c71381 100644 --- a/src/middleware/update_metrics.rs +++ b/src/middleware/update_metrics.rs @@ -1,5 +1,6 @@ use super::app::RequestApp; use super::prelude::*; +use conduit_router::RoutePattern; #[derive(Debug, Default)] pub(super) struct UpdateMetrics; @@ -19,6 +20,16 @@ impl Middleware for UpdateMetrics { metrics.requests_in_flight.dec(); metrics.requests_total.inc(); + let endpoint = req + .extensions() + .find::() + .map(|p| p.pattern()) + .unwrap_or(""); + metrics + .response_times + .with_label_values(&[endpoint]) + .observe(req.elapsed().as_millis() as f64 / 1000.0); + res } } From 92ca57afc173995ec36fee39bf8759c5be974b34 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 7 Jun 2021 15:20:46 +0200 Subject: [PATCH 3/4] track responses by status code --- src/metrics/instance.rs | 6 +++++- src/metrics/mod.rs | 1 + src/middleware/update_metrics.rs | 9 +++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/metrics/instance.rs b/src/metrics/instance.rs index 2e4223db4ad..ab40fa1bd0e 100644 --- a/src/metrics/instance.rs +++ b/src/metrics/instance.rs @@ -19,7 +19,9 @@ use crate::util::errors::AppResult; use crate::{app::App, db::DieselPool}; -use prometheus::{proto::MetricFamily, IntCounter, IntGauge, IntGaugeVec, HistogramVec}; +use prometheus::{ + proto::MetricFamily, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, +}; metrics! { pub struct InstanceMetrics { @@ -35,6 +37,8 @@ metrics! { /// Response times of our endpoints pub response_times: HistogramVec["endpoint"], + /// Nmber of responses per status code + pub responses_by_status_code_total: IntCounterVec["status"], /// Number of download requests that were served with an unconditional redirect. pub downloads_unconditional_redirects_total: IntCounter, diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 83346c9b7ef..09843dc8876 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -11,5 +11,6 @@ mod service; load_metric_type!(IntGauge as single); load_metric_type!(IntCounter as single); +load_metric_type!(IntCounterVec as vec); load_metric_type!(IntGaugeVec as vec); load_metric_type!(HistogramVec as vec); diff --git a/src/middleware/update_metrics.rs b/src/middleware/update_metrics.rs index 5d064c71381..0d34e6b58cf 100644 --- a/src/middleware/update_metrics.rs +++ b/src/middleware/update_metrics.rs @@ -30,6 +30,15 @@ impl Middleware for UpdateMetrics { .with_label_values(&[endpoint]) .observe(req.elapsed().as_millis() as f64 / 1000.0); + let status = match &res { + Ok(res) => res.status().as_u16(), + Err(_) => 500, + }; + metrics + .responses_by_status_code_total + .with_label_values(&[&status.to_string()]) + .inc(); + res } } From 674fb79de552ede27dd74bdad60618e105471bf0 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 7 Jun 2021 15:23:33 +0200 Subject: [PATCH 4/4] track uncounted downloads --- src/downloads_counter.rs | 4 ++++ src/metrics/instance.rs | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/src/downloads_counter.rs b/src/downloads_counter.rs index 3d96be419ac..18cd38c8da2 100644 --- a/src/downloads_counter.rs +++ b/src/downloads_counter.rs @@ -199,6 +199,10 @@ impl DownloadsCounter { pub fn shards_count(&self) -> usize { self.inner.shards().len() } + + pub(crate) fn pending_count(&self) -> i64 { + self.pending_count.load(Ordering::SeqCst) + } } #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] diff --git a/src/metrics/instance.rs b/src/metrics/instance.rs index ab40fa1bd0e..e813f342ead 100644 --- a/src/metrics/instance.rs +++ b/src/metrics/instance.rs @@ -44,6 +44,8 @@ metrics! { pub downloads_unconditional_redirects_total: IntCounter, /// Number of download requests with a non-canonical crate name. pub downloads_non_canonical_crate_name_total: IntCounter, + /// Number of download requests that are not counted yet. + downloads_not_counted_total: IntGauge, } // All instance metrics will be prefixed with this namespace. @@ -58,6 +60,9 @@ impl InstanceMetrics { self.refresh_pool_stats("follower", follower)?; } + self.downloads_not_counted_total + .set(app.downloads_counter.pending_count()); + Ok(self.registry.gather()) }