Skip to content

backoff: Make duration bounds customizable #975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 50 additions & 27 deletions lib/api/backoff.dart
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,10 @@ import 'dart:math';
/// Call the constructor before a loop starts, and call [wait] in each iteration
/// of the loop. Do not re-use the instance after exiting the loop.
class BackoffMachine {
BackoffMachine();

static const double _firstDurationMs = 100;
static const double _durationCeilingMs = 10 * 1000;
static const double _base = 2;

DateTime? _startTime;
BackoffMachine({
this.firstBound = const Duration(milliseconds: 100),
this.maxBound = const Duration(seconds: 10),
}) : assert(firstBound <= maxBound);

/// How many waits have completed so far.
///
Expand All @@ -20,19 +17,42 @@ class BackoffMachine {
int get waitsCompleted => _waitsCompleted;
int _waitsCompleted = 0;

/// A future that resolves after the appropriate duration.
/// The upper bound on the duration of the first wait.
///
/// The actual duration will vary randomly up to this value; see [wait].
final Duration firstBound;

/// The maximum upper bound on the duration of each wait,
/// even after many waits.
///
/// The actual durations will vary randomly up to this value; see [wait].
final Duration maxBound;

/// The factor the bound is multiplied by at each wait,
/// until it reaches [maxBound].
///
/// This factor determines the bound on a given wait
/// as a multiple of the *bound* that applied to the previous wait,
/// not the (random) previous wait duration itself.
static const double base = 2;

/// A future that resolves after an appropriate backoff time,
/// with jitter applied to capped exponential growth.
///
/// The popular exponential backoff strategy is to increase the duration
/// exponentially with the number of sleeps completed, with a base of 2,
/// until a ceiling is reached. E.g., if the first duration is 100ms and
/// the ceiling is 10s = 10000ms, the sequence is, in ms:
/// Each [wait] computes an upper bound on its wait duration,
/// in a sequence growing exponentially from [firstBound]
/// to a cap of [maxBound] by factors of [base].
/// With their default values, this sequence is, in seconds:
///
/// 100, 200, 400, 800, 1600, 3200, 6400, 10000, 10000, 10000, ...
/// 0.1, 0.2, 0.4, 0.8, 1.6, 3.2, 6.4, 10, 10, 10, ...
///
/// Instead of using this strategy directly, we also apply "jitter".
/// We use capped exponential backoff for the *upper bound* on a random
/// duration, where the lower bound is always zero. Mitigating "bursts" is
/// the goal of any "jitter" strategy, and the larger the range of randomness,
/// To provide jitter, the actual wait duration is chosen randomly
/// on the whole interval from zero up to the computed upper bound.
///
/// This jitter strategy with a lower bound of zero is reported to be more
/// effective than some widespread strategies that use narrower intervals.
/// The purpose of jitter is to mitigate "bursts" where many clients make
/// requests in a short period; the larger the range of randomness,
/// the smoother the bursts. Keeping the lower bound at zero
/// maximizes the range while preserving a capped exponential shape on
/// the expected value. Greg discusses this in more detail at:
Expand All @@ -44,16 +64,19 @@ class BackoffMachine {
/// Because in the real world any delay takes nonzero time, this mainly
/// affects tests that use fake time, and keeps their behavior more realistic.
Future<void> wait() async {
_startTime ??= DateTime.now();

final durationMs =
Random().nextDouble() // "Jitter"
* min(_durationCeilingMs,
_firstDurationMs * pow(_base, _waitsCompleted));

await Future<void>.delayed(Duration(
microseconds: max(1, (1000 * durationMs).round())));

final bound = _minDuration(maxBound,
firstBound * pow(base, _waitsCompleted));
final duration = _maxDuration(const Duration(microseconds: 1),
bound * Random().nextDouble());
await Future<void>.delayed(duration);
_waitsCompleted++;
}
}

Duration _minDuration(Duration a, Duration b) {
return a <= b ? a : b;
}

Duration _maxDuration(Duration a, Duration b) {
return a >= b ? a : b;
}
63 changes: 55 additions & 8 deletions test/api/backoff_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,44 @@ Future<Duration> measureWait(Future<void> future) async {
}

void main() {
test('BackoffMachine timeouts are random from zero to 100ms, 200ms, 400ms, ...', () {
List<Duration> expectedBounds({
required int length,
required Duration firstBound,
required Duration maxBound,
}) {
return List.generate(length, growable: false, (completed) {
return Duration(microseconds:
min(maxBound.inMicroseconds,
(firstBound.inMicroseconds
* pow(BackoffMachine.base, completed)).round()));
});
}

void checkEmpirically({
required Duration firstBound, required Duration maxBound}) {
// This is a randomized test. [numTrials] is chosen so that the failure
// probability < 1e-9. There are 2 * 11 assertions, and each one has a
// failure probability < 1e-12; see below.
const numTrials = 100;
final expectedMaxDurations = [
100, 200, 400, 800, 1600, 3200, 6400, 10000, 10000, 10000, 10000,
].map((ms) => Duration(milliseconds: ms)).toList();
final expectedMaxDurations = expectedBounds(length: 11,
firstBound: firstBound, maxBound: maxBound);

// Check an assumption used in our failure-probability estimates.
assert(2 * expectedMaxDurations.length < 1000);

final trialResults = List.generate(numTrials, (_) =>
awaitFakeAsync((async) async {
final backoffMachine = BackoffMachine();
final trialResults = List.generate(numTrials, (_) {
return awaitFakeAsync((async) async {
final backoffMachine = BackoffMachine(firstBound: firstBound,
maxBound: maxBound);
final results = <Duration>[];
for (int i = 0; i < expectedMaxDurations.length; i++) {
final duration = await measureWait(backoffMachine.wait());
results.add(duration);
}
check(async.pendingTimers).isEmpty();
return results;
}));
});
});

for (int i = 0; i < expectedMaxDurations.length; i++) {
Duration maxFromAllTrials = trialResults[0][i];
Expand All @@ -51,6 +69,35 @@ void main() {
check(minFromAllTrials).isLessThan( expectedMax * 0.25);
check(maxFromAllTrials).isGreaterThan(expectedMax * 0.75);
}
}

test('BackoffMachine timeouts are random from zero to the intended bounds', () {
checkEmpirically(firstBound: const Duration(milliseconds: 100),
maxBound: const Duration(seconds: 10));
});

test('BackoffMachine timeouts, varying firstBound and maxBound', () {
checkEmpirically(firstBound: const Duration(seconds: 5),
maxBound: const Duration(seconds: 300));
});

test('BackoffMachine timeouts, maxBound equal to firstBound', () {
checkEmpirically(firstBound: const Duration(seconds: 1),
maxBound: const Duration(seconds: 1));
});

test('BackoffMachine default firstBound and maxBound', () {
final backoffMachine = BackoffMachine();
check(backoffMachine.firstBound).equals(const Duration(milliseconds: 100));
check(backoffMachine.maxBound).equals(const Duration(seconds: 10));

// This check on expectedBounds acts as a cross-check on the
// other test cases above, confirming what it is they're checking for.
final bounds = expectedBounds(length: 11,
firstBound: backoffMachine.firstBound, maxBound: backoffMachine.maxBound);
check(bounds.map((d) => d.inMilliseconds)).deepEquals([
100, 200, 400, 800, 1600, 3200, 6400, 10000, 10000, 10000, 10000,
]);
});

test('BackoffMachine timeouts are always positive', () {
Expand Down