refactor: [torrust#1598] make recalculate udp avg connect processing time metric and update atomic

josecelano · josecelano · commit d50948ea1a5a · 2025-06-19T11:19:57.000+01:00
It also fixes a division by zero bug when the metrics is updated before
the counter for number of conenctions has been increased.

It only avoid the division by zero. I will propoerly fixed with
independent request counter for the moving average calculation.
diff --git a/packages/udp-tracker-server/src/statistics/event/handler/response_sent.rs b/packages/udp-tracker-server/src/statistics/event/handler/response_sent.rs
@@ -16,26 +16,13 @@ pub async fn handle_event(
     let (result_label_value, kind_label_value) = match kind {
         UdpResponseKind::Ok { req_kind } => match req_kind {
             UdpRequestKind::Connect => {
-                let new_avg = stats_repository
-                    .recalculate_udp_avg_connect_processing_time_ns(req_processing_time)
-                    .await;
-
-                tracing::debug!("Updating average processing time metric for connect requests: {} ns", new_avg);
-
                 let mut label_set = LabelSet::from(context.clone());
                 label_set.upsert(label_name!("request_kind"), LabelValue::new(&req_kind.to_string()));
-                match stats_repository
-                    .set_gauge(
-                        &metric_name!(UDP_TRACKER_SERVER_PERFORMANCE_AVG_PROCESSING_TIME_NS),
-                        &label_set,
-                        new_avg,
-                        now,
-                    )
-                    .await
-                {
-                    Ok(()) => {}
-                    Err(err) => tracing::error!("Failed to set gauge: {}", err),
-                }
+
+                let _new_avg = stats_repository
+                    .recalculate_udp_avg_connect_processing_time_ns(req_processing_time, &label_set, now)
+                    .await;
+
                 (LabelValue::new("ok"), UdpRequestKind::Connect.into())
             }
             UdpRequestKind::Announce { announce_request } => {
diff --git a/packages/udp-tracker-server/src/statistics/metrics.rs b/packages/udp-tracker-server/src/statistics/metrics.rs
@@ -51,14 +51,23 @@ impl Metrics {
 
 impl Metrics {
     #[allow(clippy::cast_precision_loss)]
-    pub fn recalculate_udp_avg_connect_processing_time_ns(&self, req_processing_time: Duration) -> f64 {
+    pub fn recalculate_udp_avg_connect_processing_time_ns(
+        &mut self,
+        req_processing_time: Duration,
+        label_set: &LabelSet,
+        now: DurationSinceUnixEpoch,
+    ) -> f64 {
         let req_processing_time = req_processing_time.as_nanos() as f64;
         let udp_connections_handled = (self.udp4_connections_handled() + self.udp6_connections_handled()) as f64;
 
         let previous_avg = self.udp_avg_connect_processing_time_ns();
 
-        // Moving average: https://en.wikipedia.org/wiki/Moving_average
-        let new_avg = previous_avg as f64 + (req_processing_time - previous_avg as f64) / udp_connections_handled;
+        let new_avg = if udp_connections_handled == 0.0 {
+            req_processing_time
+        } else {
+            // Moving average: https://en.wikipedia.org/wiki/Moving_average
+            previous_avg as f64 + (req_processing_time - previous_avg as f64) / udp_connections_handled
+        };
 
         tracing::debug!(
             "Recalculated UDP average connect processing time: {} ns (previous: {} ns, req_processing_time: {} ns, udp_connections_handled: {})",
@@ -68,9 +77,25 @@ impl Metrics {
             udp_connections_handled
         );
 
+        self.update_udp_avg_connect_processing_time_ns(new_avg, label_set, now);
+
         new_avg
     }
 
+    fn update_udp_avg_connect_processing_time_ns(&mut self, new_avg: f64, label_set: &LabelSet, now: DurationSinceUnixEpoch) {
+        tracing::debug!("Updating average processing time metric for connect requests: {} ns", new_avg);
+
+        match self.set_gauge(
+            &metric_name!(UDP_TRACKER_SERVER_PERFORMANCE_AVG_PROCESSING_TIME_NS),
+            label_set,
+            new_avg,
+            now,
+        ) {
+            Ok(()) => {}
+            Err(err) => tracing::error!("Failed to set gauge: {}", err),
+        }
+    }
+
     #[allow(clippy::cast_precision_loss)]
     pub fn recalculate_udp_avg_announce_processing_time_ns(&self, req_processing_time: Duration) -> f64 {
         let req_processing_time = req_processing_time.as_nanos() as f64;
diff --git a/packages/udp-tracker-server/src/statistics/repository.rs b/packages/udp-tracker-server/src/statistics/repository.rs
@@ -73,10 +73,15 @@ impl Repository {
         result
     }
 
-    pub async fn recalculate_udp_avg_connect_processing_time_ns(&self, req_processing_time: Duration) -> f64 {
-        let stats_lock = self.stats.write().await;
+    pub async fn recalculate_udp_avg_connect_processing_time_ns(
+        &self,
+        req_processing_time: Duration,
+        label_set: &LabelSet,
+        now: DurationSinceUnixEpoch,
+    ) -> f64 {
+        let mut stats_lock = self.stats.write().await;
 
-        let new_avg = stats_lock.recalculate_udp_avg_connect_processing_time_ns(req_processing_time);
+        let new_avg = stats_lock.recalculate_udp_avg_connect_processing_time_ns(req_processing_time, label_set, now);
 
         drop(stats_lock);
 
@@ -338,7 +343,9 @@ mod tests {
 
         // Calculate new average with processing time of 2000ns
         let processing_time = Duration::from_nanos(2000);
-        let new_avg = repo.recalculate_udp_avg_connect_processing_time_ns(processing_time).await;
+        let new_avg = repo
+            .recalculate_udp_avg_connect_processing_time_ns(processing_time, &connect_labels, now)
+            .await;
 
         // Moving average: previous_avg + (new_value - previous_avg) / total_connections
         // 1000 + (2000 - 1000) / 3 = 1000 + 333.33 = 1333.33
@@ -436,17 +443,25 @@ mod tests {
     #[tokio::test]
     async fn recalculate_average_methods_should_handle_zero_connections_gracefully() {
         let repo = Repository::new();
+        let now = CurrentClock::now();
 
         // Test with zero connections (should not panic, should handle division by zero)
         let processing_time = Duration::from_nanos(1000);
 
-        let connect_avg = repo.recalculate_udp_avg_connect_processing_time_ns(processing_time).await;
+        let connect_labels = LabelSet::from([("request_kind", "connect")]);
+        let connect_avg = repo
+            .recalculate_udp_avg_connect_processing_time_ns(processing_time, &connect_labels, now)
+            .await;
+
+        let _announce_labels = LabelSet::from([("request_kind", "announce")]);
         let announce_avg = repo.recalculate_udp_avg_announce_processing_time_ns(processing_time).await;
+
+        let _scrape_labels = LabelSet::from([("request_kind", "scrape")]);
         let scrape_avg = repo.recalculate_udp_avg_scrape_processing_time_ns(processing_time).await;
 
         // With 0 total connections, the formula becomes 0 + (1000 - 0) / 0
         // This should handle the division by zero case gracefully
-        assert!(connect_avg.is_infinite() || connect_avg.is_nan());
+        assert!((connect_avg - 1000.0).abs() < f64::EPSILON);
         assert!(announce_avg.is_infinite() || announce_avg.is_nan());
         assert!(scrape_avg.is_infinite() || scrape_avg.is_nan());
     }
@@ -500,7 +515,10 @@ mod tests {
 
         // Test with very large processing time
         let large_duration = Duration::from_secs(1); // 1 second = 1,000,000,000 ns
-        let new_avg = repo.recalculate_udp_avg_connect_processing_time_ns(large_duration).await;
+        let connect_labels = LabelSet::from([("request_kind", "connect")]);
+        let new_avg = repo
+            .recalculate_udp_avg_connect_processing_time_ns(large_duration, &connect_labels, now)
+            .await;
 
         // Should handle large numbers without overflow
         assert!(new_avg > 0.0);
@@ -575,6 +593,7 @@ mod tests {
     #[tokio::test]
     async fn it_should_handle_moving_average_calculation_before_any_connections_are_recorded() {
         let repo = Repository::new();
+        let connect_labels = LabelSet::from([("request_kind", "connect")]);
         let now = CurrentClock::now();
 
         // This test checks the behavior of `recalculate_udp_avg_connect_processing_time_ns``
@@ -591,12 +610,13 @@ mod tests {
 
         // First calculation: no connections recorded yet, should result in infinity
         let processing_time_1 = Duration::from_nanos(2000);
-        let avg_1 = repo.recalculate_udp_avg_connect_processing_time_ns(processing_time_1).await;
+        let avg_1 = repo
+            .recalculate_udp_avg_connect_processing_time_ns(processing_time_1, &connect_labels, now)
+            .await;
 
-        // Division by zero: 1000 + (2000 - 1000) / 0 = infinity
         assert!(
-            avg_1.is_infinite(),
-            "First calculation should be infinite due to division by zero"
+            (avg_1 - 2000.0).abs() < f64::EPSILON,
+            "First calculation should be 2000, but got {avg_1}"
         );
 
         // Now add one connection and try again
@@ -605,10 +625,17 @@ mod tests {
             .await
             .unwrap();
 
-        // Second calculation: 1 connection, but previous average is infinity
+        // Second calculation: 1 connection
         let processing_time_2 = Duration::from_nanos(3000);
-        let avg_2 = repo.recalculate_udp_avg_connect_processing_time_ns(processing_time_2).await;
+        let connect_labels = LabelSet::from([("request_kind", "connect")]);
+        let avg_2 = repo
+            .recalculate_udp_avg_connect_processing_time_ns(processing_time_2, &connect_labels, now)
+            .await;
 
+        // There is one connection, so the average should be:
+        // 2000 + (3000 - 2000) / 1 = 2000 + 1000 = 3000
+        // This is because one connection is not counted yet in the average calculation,
+        // so the average is simply the processing time of the second connection.
         assert!(
             (avg_2 - 3000.0).abs() < f64::EPSILON,
             "Second calculation should be 3000ns, but got {avg_2}"