From 6a17ea5c2c860ae6d22a37615a31b7c9beaf7390 Mon Sep 17 00:00:00 2001 From: Larry Safran Date: Thu, 19 Dec 2024 11:36:27 -0800 Subject: [PATCH 1/2] Fix retry timer backoff duration. --- .../main/java/io/grpc/xds/client/ControlPlaneClient.java | 7 ++----- .../test/java/io/grpc/xds/GrpcXdsClientImplTestBase.java | 1 + 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java b/xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java index 047f8a2e315..32b7b33488a 100644 --- a/xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java +++ b/xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java @@ -451,12 +451,9 @@ private void handleRpcStreamClosed(Status status) { // FakeClock in tests isn't thread-safe. Schedule the retry timer before notifying callbacks // to avoid TSAN races, since tests may wait until callbacks are called but then would run // concurrently with the stopwatch and schedule. - - long elapsed = stopwatch.elapsed(TimeUnit.NANOSECONDS); - long delayNanos = Math.max(0, retryBackoffPolicy.nextBackoffNanos() - elapsed); - rpcRetryTimer = - syncContext.schedule(new RpcRetryTask(), delayNanos, TimeUnit.NANOSECONDS, timeService); + syncContext.schedule(new RpcRetryTask(), retryBackoffPolicy.nextBackoffNanos(), + TimeUnit.NANOSECONDS, timeService); Status newStatus = status; if (responseReceived) { diff --git a/xds/src/test/java/io/grpc/xds/GrpcXdsClientImplTestBase.java b/xds/src/test/java/io/grpc/xds/GrpcXdsClientImplTestBase.java index f1c114673b5..1b0c363d94f 100644 --- a/xds/src/test/java/io/grpc/xds/GrpcXdsClientImplTestBase.java +++ b/xds/src/test/java/io/grpc/xds/GrpcXdsClientImplTestBase.java @@ -3524,6 +3524,7 @@ public void streamClosedAndRetryWithBackoff() { call.verifyRequest(EDS, EDS_RESOURCE, "", "", NODE); // Management server closes the RPC stream with an error. + fakeClock.forwardNanos(1000L); // Make sure retry isn't based on stopwatch 0 call.sendError(Status.UNKNOWN.asException()); verify(ldsResourceWatcher, Mockito.timeout(1000).times(1)) .onError(errorCaptor.capture()); From c2c84b58c341df619c3b209cf728cdd92f91ac95 Mon Sep 17 00:00:00 2001 From: Larry Safran Date: Thu, 19 Dec 2024 13:38:15 -0800 Subject: [PATCH 2/2] Reset stopwatch when we had results on AdsStream rather than change the delay calculation logic. --- .../main/java/io/grpc/xds/client/ControlPlaneClient.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java b/xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java index 32b7b33488a..9c7d744816a 100644 --- a/xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java +++ b/xds/src/main/java/io/grpc/xds/client/ControlPlaneClient.java @@ -446,14 +446,18 @@ private void handleRpcStreamClosed(Status status) { // Reset the backoff sequence if had received a response, or backoff sequence // has never been initialized. retryBackoffPolicy = backoffPolicyProvider.get(); + stopwatch.reset(); } // FakeClock in tests isn't thread-safe. Schedule the retry timer before notifying callbacks // to avoid TSAN races, since tests may wait until callbacks are called but then would run // concurrently with the stopwatch and schedule. + + long elapsed = stopwatch.elapsed(TimeUnit.NANOSECONDS); + long delayNanos = Math.max(0, retryBackoffPolicy.nextBackoffNanos() - elapsed); + rpcRetryTimer = - syncContext.schedule(new RpcRetryTask(), retryBackoffPolicy.nextBackoffNanos(), - TimeUnit.NANOSECONDS, timeService); + syncContext.schedule(new RpcRetryTask(), delayNanos, TimeUnit.NANOSECONDS, timeService); Status newStatus = status; if (responseReceived) {