diff --git a/lib/api/backoff.dart b/lib/api/backoff.dart index 640509d24d..f81630529c 100644 --- a/lib/api/backoff.dart +++ b/lib/api/backoff.dart @@ -5,13 +5,10 @@ import 'dart:math'; /// Call the constructor before a loop starts, and call [wait] in each iteration /// of the loop. Do not re-use the instance after exiting the loop. class BackoffMachine { - BackoffMachine(); - - static const double _firstDurationMs = 100; - static const double _durationCeilingMs = 10 * 1000; - static const double _base = 2; - - DateTime? _startTime; + BackoffMachine({ + this.firstBound = const Duration(milliseconds: 100), + this.maxBound = const Duration(seconds: 10), + }) : assert(firstBound <= maxBound); /// How many waits have completed so far. /// @@ -20,19 +17,42 @@ class BackoffMachine { int get waitsCompleted => _waitsCompleted; int _waitsCompleted = 0; - /// A future that resolves after the appropriate duration. + /// The upper bound on the duration of the first wait. + /// + /// The actual duration will vary randomly up to this value; see [wait]. + final Duration firstBound; + + /// The maximum upper bound on the duration of each wait, + /// even after many waits. + /// + /// The actual durations will vary randomly up to this value; see [wait]. + final Duration maxBound; + + /// The factor the bound is multiplied by at each wait, + /// until it reaches [maxBound]. + /// + /// This factor determines the bound on a given wait + /// as a multiple of the *bound* that applied to the previous wait, + /// not the (random) previous wait duration itself. + static const double base = 2; + + /// A future that resolves after an appropriate backoff time, + /// with jitter applied to capped exponential growth. /// - /// The popular exponential backoff strategy is to increase the duration - /// exponentially with the number of sleeps completed, with a base of 2, - /// until a ceiling is reached. E.g., if the first duration is 100ms and - /// the ceiling is 10s = 10000ms, the sequence is, in ms: + /// Each [wait] computes an upper bound on its wait duration, + /// in a sequence growing exponentially from [firstBound] + /// to a cap of [maxBound] by factors of [base]. + /// With their default values, this sequence is, in seconds: /// - /// 100, 200, 400, 800, 1600, 3200, 6400, 10000, 10000, 10000, ... + /// 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 10, 10, 10, ... /// - /// Instead of using this strategy directly, we also apply "jitter". - /// We use capped exponential backoff for the *upper bound* on a random - /// duration, where the lower bound is always zero. Mitigating "bursts" is - /// the goal of any "jitter" strategy, and the larger the range of randomness, + /// To provide jitter, the actual wait duration is chosen randomly + /// on the whole interval from zero up to the computed upper bound. + /// + /// This jitter strategy with a lower bound of zero is reported to be more + /// effective than some widespread strategies that use narrower intervals. + /// The purpose of jitter is to mitigate "bursts" where many clients make + /// requests in a short period; the larger the range of randomness, /// the smoother the bursts. Keeping the lower bound at zero /// maximizes the range while preserving a capped exponential shape on /// the expected value. Greg discusses this in more detail at: @@ -44,16 +64,19 @@ class BackoffMachine { /// Because in the real world any delay takes nonzero time, this mainly /// affects tests that use fake time, and keeps their behavior more realistic. Future wait() async { - _startTime ??= DateTime.now(); - - final durationMs = - Random().nextDouble() // "Jitter" - * min(_durationCeilingMs, - _firstDurationMs * pow(_base, _waitsCompleted)); - - await Future.delayed(Duration( - microseconds: max(1, (1000 * durationMs).round()))); - + final bound = _minDuration(maxBound, + firstBound * pow(base, _waitsCompleted)); + final duration = _maxDuration(const Duration(microseconds: 1), + bound * Random().nextDouble()); + await Future.delayed(duration); _waitsCompleted++; } } + +Duration _minDuration(Duration a, Duration b) { + return a <= b ? a : b; +} + +Duration _maxDuration(Duration a, Duration b) { + return a >= b ? a : b; +} diff --git a/test/api/backoff_test.dart b/test/api/backoff_test.dart index 0b66d5028f..22cadee001 100644 --- a/test/api/backoff_test.dart +++ b/test/api/backoff_test.dart @@ -15,18 +15,35 @@ Future measureWait(Future future) async { } void main() { - test('BackoffMachine timeouts are random from zero to 100ms, 200ms, 400ms, ...', () { + List expectedBounds({ + required int length, + required Duration firstBound, + required Duration maxBound, + }) { + return List.generate(length, growable: false, (completed) { + return Duration(microseconds: + min(maxBound.inMicroseconds, + (firstBound.inMicroseconds + * pow(BackoffMachine.base, completed)).round())); + }); + } + + void checkEmpirically({ + required Duration firstBound, required Duration maxBound}) { // This is a randomized test. [numTrials] is chosen so that the failure // probability < 1e-9. There are 2 * 11 assertions, and each one has a // failure probability < 1e-12; see below. const numTrials = 100; - final expectedMaxDurations = [ - 100, 200, 400, 800, 1600, 3200, 6400, 10000, 10000, 10000, 10000, - ].map((ms) => Duration(milliseconds: ms)).toList(); + final expectedMaxDurations = expectedBounds(length: 11, + firstBound: firstBound, maxBound: maxBound); + + // Check an assumption used in our failure-probability estimates. + assert(2 * expectedMaxDurations.length < 1000); - final trialResults = List.generate(numTrials, (_) => - awaitFakeAsync((async) async { - final backoffMachine = BackoffMachine(); + final trialResults = List.generate(numTrials, (_) { + return awaitFakeAsync((async) async { + final backoffMachine = BackoffMachine(firstBound: firstBound, + maxBound: maxBound); final results = []; for (int i = 0; i < expectedMaxDurations.length; i++) { final duration = await measureWait(backoffMachine.wait()); @@ -34,7 +51,8 @@ void main() { } check(async.pendingTimers).isEmpty(); return results; - })); + }); + }); for (int i = 0; i < expectedMaxDurations.length; i++) { Duration maxFromAllTrials = trialResults[0][i]; @@ -51,6 +69,35 @@ void main() { check(minFromAllTrials).isLessThan( expectedMax * 0.25); check(maxFromAllTrials).isGreaterThan(expectedMax * 0.75); } + } + + test('BackoffMachine timeouts are random from zero to the intended bounds', () { + checkEmpirically(firstBound: const Duration(milliseconds: 100), + maxBound: const Duration(seconds: 10)); + }); + + test('BackoffMachine timeouts, varying firstBound and maxBound', () { + checkEmpirically(firstBound: const Duration(seconds: 5), + maxBound: const Duration(seconds: 300)); + }); + + test('BackoffMachine timeouts, maxBound equal to firstBound', () { + checkEmpirically(firstBound: const Duration(seconds: 1), + maxBound: const Duration(seconds: 1)); + }); + + test('BackoffMachine default firstBound and maxBound', () { + final backoffMachine = BackoffMachine(); + check(backoffMachine.firstBound).equals(const Duration(milliseconds: 100)); + check(backoffMachine.maxBound).equals(const Duration(seconds: 10)); + + // This check on expectedBounds acts as a cross-check on the + // other test cases above, confirming what it is they're checking for. + final bounds = expectedBounds(length: 11, + firstBound: backoffMachine.firstBound, maxBound: backoffMachine.maxBound); + check(bounds.map((d) => d.inMilliseconds)).deepEquals([ + 100, 200, 400, 800, 1600, 3200, 6400, 10000, 10000, 10000, 10000, + ]); }); test('BackoffMachine timeouts are always positive', () {