diff --git a/src/DataProtection/DataProtection/src/Error.cs b/src/DataProtection/DataProtection/src/Error.cs index 6de1d7aa3bd6..14e33e994eda 100644 --- a/src/DataProtection/DataProtection/src/Error.cs +++ b/src/DataProtection/DataProtection/src/Error.cs @@ -97,4 +97,9 @@ public static InvalidOperationException KeyRingProvider_DefaultKeyRevoked(Guid i var message = string.Format(CultureInfo.CurrentCulture, Resources.KeyRingProvider_DefaultKeyRevoked, id); return new InvalidOperationException(message); } + + public static InvalidOperationException KeyRingProvider_RefreshFailedOnOtherThread() + { + return new InvalidOperationException(Resources.KeyRingProvider_RefreshFailedOnOtherThread); + } } diff --git a/src/DataProtection/DataProtection/src/KeyManagement/KeyRingProvider.cs b/src/DataProtection/DataProtection/src/KeyManagement/KeyRingProvider.cs index b2b704a27bb4..a36477fd4aa8 100644 --- a/src/DataProtection/DataProtection/src/KeyManagement/KeyRingProvider.cs +++ b/src/DataProtection/DataProtection/src/KeyManagement/KeyRingProvider.cs @@ -6,6 +6,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Threading; +using System.Threading.Tasks; using Microsoft.AspNetCore.Cryptography; using Microsoft.AspNetCore.DataProtection.KeyManagement.Internal; using Microsoft.Extensions.Logging; @@ -16,13 +17,17 @@ namespace Microsoft.AspNetCore.DataProtection.KeyManagement; internal sealed class KeyRingProvider : ICacheableKeyRingProvider, IKeyRingProvider { + private const string DisableAsyncKeyRingUpdateSwitchKey = "Microsoft.AspNetCore.DataProtection.KeyManagement.DisableAsyncKeyRingUpdate"; + private CacheableKeyRing? _cacheableKeyRing; private readonly object _cacheableKeyRingLockObj = new object(); + private Task? _cacheableKeyRingTask; // Also covered by _cacheableKeyRingLockObj private readonly IDefaultKeyResolver _defaultKeyResolver; private readonly bool _autoGenerateKeys; private readonly TimeSpan _newKeyLifetime; private readonly IKeyManager _keyManager; private readonly ILogger _logger; + private readonly bool _disableAsyncKeyRingUpdate; public KeyRingProvider( IKeyManager keyManager, @@ -52,6 +57,8 @@ public KeyRingProvider( // We will automatically refresh any unknown keys for 2 minutes see https://github.com/dotnet/aspnetcore/issues/3975 AutoRefreshWindowEnd = DateTime.UtcNow.AddMinutes(2); + + AppContext.TryGetSwitch(DisableAsyncKeyRingUpdateSwitchKey, out _disableAsyncKeyRingUpdate); } // for testing @@ -195,6 +202,19 @@ internal IKeyRing RefreshCurrentKeyRing() internal IKeyRing GetCurrentKeyRingCore(DateTime utcNow, bool forceRefresh = false) { + // We're making a big, scary change to the way this cache is updated: now threads + // only block during computation of the new value if no old value is available + // (or if they force it). We'll leave the old code in place, behind an appcontext + // switch in case it turns out to have unwelcome emergent behavior. + // TODO: Delete one of these codepaths in 10.0. + return _disableAsyncKeyRingUpdate + ? GetCurrentKeyRingCoreOld(utcNow, forceRefresh) + : GetCurrentKeyRingCoreNew(utcNow, forceRefresh); + } + + private IKeyRing GetCurrentKeyRingCoreOld(DateTime utcNow, bool forceRefresh) + { + // DateTimes are only meaningfully comparable if they share the same Kind - require Utc for consistency Debug.Assert(utcNow.Kind == DateTimeKind.Utc); // Can we return the cached keyring to the caller? @@ -292,6 +312,149 @@ internal IKeyRing GetCurrentKeyRingCore(DateTime utcNow, bool forceRefresh = fal } } + private IKeyRing GetCurrentKeyRingCoreNew(DateTime utcNow, bool forceRefresh) + { + // DateTimes are only meaningfully comparable if they share the same Kind - require Utc for consistency + Debug.Assert(utcNow.Kind == DateTimeKind.Utc); + + // The 99% and perf-critical case is that there is no task in-flight and the cached + // key ring is valid. We do what we can to avoid unnecessary overhead (locking, + // context switching, etc) on this path. + + CacheableKeyRing? existingCacheableKeyRing = null; + + // Can we return the cached keyring to the caller? + if (!forceRefresh) + { + existingCacheableKeyRing = Volatile.Read(ref _cacheableKeyRing); + if (CacheableKeyRing.IsValid(existingCacheableKeyRing, utcNow)) + { + return existingCacheableKeyRing.KeyRing; + } + } + + // If work kicked off by a previous caller has completed, we should use those results + // We check this outside the lock to reduce contention in the common case (no task). + // Logically, it would probably make more sense to check this before checking whether + // the cache is valid - there could be a newer value available - but keeping that path + // fast is more important. The next forced refresh or cache expiration will cause the + // new value to be picked up. + var existingTask = Volatile.Read(ref _cacheableKeyRingTask); + if (existingTask is not null && existingTask.IsCompleted) + { + var taskKeyRing = GetKeyRingFromCompletedTask(existingTask, utcNow); // Throws if the task failed + if (taskKeyRing is not null) + { + return taskKeyRing; + } + } + + // The cached keyring hasn't been created or must be refreshed. We'll allow one thread to + // create a task to update the keyring, and all threads will continue to use the existing cached + // keyring while the first thread performs the update. There is an exception: if there + // is no usable existing cached keyring, all callers must block until the keyring exists. + lock (_cacheableKeyRingLockObj) + { + // Update existingTask, in case we're not the first to acquire the lock + existingTask = Volatile.Read(ref _cacheableKeyRingTask); + if (existingTask is null) + { + // If there's no existing task, make one now + // PERF: Closing over utcNow substantially slows down the fast case (valid cache) in micro-benchmarks + // (closing over `this` for CacheableKeyRingProvider doesn't seem impactful) + existingTask = Task.Factory.StartNew( + utcNow => CacheableKeyRingProvider.GetCacheableKeyRing((DateTime)utcNow!), + utcNow, + CancellationToken.None, // GetKeyRingFromCompletedTask will need to react if this becomes cancellable + TaskCreationOptions.DenyChildAttach, + TaskScheduler.Default); + Volatile.Write(ref _cacheableKeyRingTask, existingTask); + } + } + + if (existingCacheableKeyRing is not null) + { + Debug.Assert(!forceRefresh, "Read cached key ring even though forceRefresh is true"); + Debug.Assert(!CacheableKeyRing.IsValid(existingCacheableKeyRing, utcNow), "Should already have returned a valid cached key ring"); + return existingCacheableKeyRing.KeyRing; + } + + // Since there's no cached key ring we can return, we have to wait. It's not ideal to wait for a task we + // just scheduled, but it makes the code a lot simpler (compared to having a separate, synchronous code path). + // Cleverness: swallow any exceptions - they'll be surfaced by GetKeyRingFromCompletedTask, if appropriate. + existingTask + .ContinueWith(static _ => { }, TaskScheduler.Default) + .Wait(); + + var newKeyRing = GetKeyRingFromCompletedTask(existingTask, utcNow); // Throws if the task failed (winning thread only) + if (newKeyRing is null) + { + // Another thread won - check whether it cached a new key ring + var newCacheableKeyRing = Volatile.Read(ref _cacheableKeyRing); + if (newCacheableKeyRing is null) + { + // There will have been a better exception from the winning thread + throw Error.KeyRingProvider_RefreshFailedOnOtherThread(); + } + + newKeyRing = newCacheableKeyRing.KeyRing; + } + + return newKeyRing; + } + + /// + /// Returns null if another thread already processed the completed task. + /// Otherwise, if the given completed task completed successfully, clears the task + /// and either caches and returns the resulting key ring or throws, according to the + /// successfulness of the task. + /// + private IKeyRing? GetKeyRingFromCompletedTask(Task task, DateTime utcNow) + { + Debug.Assert(task.IsCompleted); + + lock (_cacheableKeyRingLockObj) + { + // If the parameter doesn't match the field, another thread has already consumed the task (and it's reflected in _cacheableKeyRing) + if (!ReferenceEquals(task, Volatile.Read(ref _cacheableKeyRingTask))) + { + return null; + } + + Volatile.Write(ref _cacheableKeyRingTask, null); + + if (task.Status == TaskStatus.RanToCompletion) + { + var newCacheableKeyRing = task.Result; + Volatile.Write(ref _cacheableKeyRing, newCacheableKeyRing); + + // An unconsumed task result is considered to satisfy forceRefresh. One could quibble that this isn't really + // a forced refresh, but we'll still return a key ring newer than the one the caller was dissatisfied with. + return newCacheableKeyRing.KeyRing; + } + + Debug.Assert(!task.IsCanceled, "How did a task with no cancellation token get canceled?"); + Debug.Assert(task.Exception is not null, "Task should have either completed successfully or with an exception"); + var exception = task.Exception; + + var existingCacheableKeyRing = Volatile.Read(ref _cacheableKeyRing); + if (existingCacheableKeyRing is not null && !CacheableKeyRing.IsValid(existingCacheableKeyRing, utcNow)) + { + // If reading failed, we probably don't want to try again for a little bit, so slightly extend the + // lifetime of the current cache entry + Volatile.Write(ref _cacheableKeyRing, existingCacheableKeyRing.WithTemporaryExtendedLifetime(utcNow)); + + _logger.ErrorOccurredWhileRefreshingKeyRing(exception); // This one mentions the no-retry window + } + else + { + _logger.ErrorOccurredWhileReadingKeyRing(exception); + } + + throw exception.InnerExceptions.Count == 1 ? exception.InnerExceptions[0] : exception; + } + } + private static TimeSpan GetRefreshPeriodWithJitter(TimeSpan refreshPeriod) { // We'll fudge the refresh period up to -20% so that multiple applications don't try to diff --git a/src/DataProtection/DataProtection/src/Resources.resx b/src/DataProtection/DataProtection/src/Resources.resx index aa1eb24fea14..7a07e901aa21 100644 --- a/src/DataProtection/DataProtection/src/Resources.resx +++ b/src/DataProtection/DataProtection/src/Resources.resx @@ -190,6 +190,9 @@ The key ring's default data protection key {0:B} has been revoked. + + Another thread failed to refresh the key ring. + {0} must not be negative. diff --git a/src/DataProtection/DataProtection/test/Microsoft.AspNetCore.DataProtection.Tests/KeyManagement/KeyRingProviderTests.cs b/src/DataProtection/DataProtection/test/Microsoft.AspNetCore.DataProtection.Tests/KeyManagement/KeyRingProviderTests.cs index 243bdcd443af..c5c689c1685c 100644 --- a/src/DataProtection/DataProtection/test/Microsoft.AspNetCore.DataProtection.Tests/KeyManagement/KeyRingProviderTests.cs +++ b/src/DataProtection/DataProtection/test/Microsoft.AspNetCore.DataProtection.Tests/KeyManagement/KeyRingProviderTests.cs @@ -661,7 +661,7 @@ public void GetCurrentKeyRing_KeyRingCached_AfterExpiration_ClearsCache() // Act var retVal1 = keyRingProvider.GetCurrentKeyRingCore(now); - var retVal2 = keyRingProvider.GetCurrentKeyRingCore(now + TimeSpan.FromHours(1)); + var retVal2 = keyRingProvider.GetCurrentKeyRingCore(now + TimeSpan.FromHours(1), forceRefresh: true); // Assert - underlying provider only should have been called once Assert.Same(expectedKeyRing1, retVal1); @@ -718,46 +718,6 @@ public void GetCurrentKeyRing_NoExistingKeyRing_HoldsAllThreadsUntilKeyRingCreat mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(It.IsAny()), Times.Once); } - [Fact] - public void GetCurrentKeyRing_WithExpiredExistingKeyRing_AllowsOneThreadToUpdate_ReturnsExistingKeyRingToOtherCallersWithoutBlocking() - { - // Arrange - var originalKeyRing = new Mock().Object; - var originalKeyRingTime = StringToDateTime("2015-03-01 00:00:00Z"); - var updatedKeyRing = new Mock().Object; - var updatedKeyRingTime = StringToDateTime("2015-03-02 00:00:00Z"); - var mockCacheableKeyRingProvider = new Mock(); - var keyRingProvider = CreateKeyRingProvider(mockCacheableKeyRingProvider.Object); - - // In this test, the foreground thread acquires the critial section in GetCurrentKeyRing, - // and the background thread returns the original key ring rather than blocking while - // waiting for the foreground thread to update the key ring. - - TimeSpan testTimeout = TimeSpan.FromSeconds(10); - IKeyRing keyRingReturnedToBackgroundThread = null; - - mockCacheableKeyRingProvider.Setup(o => o.GetCacheableKeyRing(originalKeyRingTime)) - .Returns(new CacheableKeyRing(CancellationToken.None, StringToDateTime("2015-03-02 00:00:00Z"), originalKeyRing)); - mockCacheableKeyRingProvider.Setup(o => o.GetCacheableKeyRing(updatedKeyRingTime)) - .Returns(dto => - { - // at this point we're inside the critical section - spawn the background thread now - var backgroundGetKeyRingTask = Task.Run(() => - { - keyRingReturnedToBackgroundThread = keyRingProvider.GetCurrentKeyRingCore(updatedKeyRingTime); - }); - Assert.True(backgroundGetKeyRingTask.Wait(testTimeout), "Test timed out."); - - return new CacheableKeyRing(CancellationToken.None, StringToDateTime("2015-03-03 00:00:00Z"), updatedKeyRing); - }); - - // Assert - underlying provider only should have been called once with the updated time (by the foreground thread) - Assert.Same(originalKeyRing, keyRingProvider.GetCurrentKeyRingCore(originalKeyRingTime)); - Assert.Same(updatedKeyRing, keyRingProvider.GetCurrentKeyRingCore(updatedKeyRingTime)); - Assert.Same(originalKeyRing, keyRingReturnedToBackgroundThread); - mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(updatedKeyRingTime), Times.Once); - } - [Fact] public void GetCurrentKeyRing_WithExpiredExistingKeyRing_UpdateFails_ThrowsButCachesOldKeyRing() { @@ -779,9 +739,9 @@ public void GetCurrentKeyRing_WithExpiredExistingKeyRing_UpdateFails_ThrowsButCa // Act & assert Assert.Same(originalKeyRing, keyRingProvider.GetCurrentKeyRingCore(originalKeyRingTime)); cts.Cancel(); // invalidate the key ring - ExceptionAssert.Throws(() => keyRingProvider.GetCurrentKeyRingCore(throwKeyRingTime), "How exceptional."); - Assert.Same(originalKeyRing, keyRingProvider.GetCurrentKeyRingCore(throwKeyRingTime)); - Assert.Same(updatedKeyRing, keyRingProvider.GetCurrentKeyRingCore(updatedKeyRingTime)); + ExceptionAssert.Throws(() => keyRingProvider.GetCurrentKeyRingCore(throwKeyRingTime, forceRefresh: true), "How exceptional."); // forceRefresh to wait for exception + Assert.Same(originalKeyRing, keyRingProvider.GetCurrentKeyRingCore(throwKeyRingTime)); // Seeing the exception didn't clobber the cache + Assert.Same(updatedKeyRing, keyRingProvider.GetCurrentKeyRingCore(updatedKeyRingTime, forceRefresh: true)); // forceRefresh to wait for updated value mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(originalKeyRingTime), Times.Once); mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(throwKeyRingTime), Times.Once); mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(updatedKeyRingTime), Times.Once); @@ -844,6 +804,141 @@ private static ICacheableKeyRingProvider SetupCreateCacheableKeyRingTestAndCreat return CreateKeyRingProvider(mockKeyManager.Object, mockDefaultKeyResolver.Object, keyManagementOptions); } + [Theory] + [InlineData(true)] + [InlineData(false)] + public async Task MultipleThreadsForceRefresh(bool failsToReadKeyRing) + { + const int taskCount = 10; + var now = StringToDateTime("2015-03-01 00:00:00Z"); + + var expectedKeyRing = new Mock(); + var expectedException = new InvalidOperationException(nameof(MultipleThreadsForceRefresh)); + + var mockCacheableKeyRingProvider = new Mock(); + if (failsToReadKeyRing) + { + mockCacheableKeyRingProvider + .Setup(o => o.GetCacheableKeyRing(now)) + .Throws(expectedException); + } + else + { + mockCacheableKeyRingProvider + .Setup(o => o.GetCacheableKeyRing(now)) + .Returns(new CacheableKeyRing( + expirationToken: CancellationToken.None, + expirationTime: now.AddDays(1), + keyRing: expectedKeyRing.Object)); + } + + var keyRingProvider = CreateKeyRingProvider(mockCacheableKeyRingProvider.Object); + + var tasks = new Task[taskCount]; + for (var i = 0; i < taskCount; i++) + { + tasks[i] = Task.Run(() => + { + var keyRing = keyRingProvider.GetCurrentKeyRingCore(now, forceRefresh: true); + return keyRing; + }); + } + + if (failsToReadKeyRing) + { + await Task.WhenAll(tasks).ContinueWith(static _ => { }, TaskScheduler.Default); // Swallow exceptions - we'll inspect individual tasks + Assert.All(tasks, task => Assert.NotNull(task.Exception)); + + // We expect only one task to have thrown expectedException, but it's possible that multiple + // threads made it into the critical section (in sequence, obviously) and each saw expectedException. + // This check is descriptive, rather than normative - it would probably be preferable to have all of + // them see expectedException, but it's presently not propagated to threads that simply wait. + Assert.InRange(tasks.Count(task => ReferenceEquals(expectedException, task.Exception.InnerException)), 1, taskCount); + } + else + { + var actualKeyRings = await Task.WhenAll(tasks); + Assert.All(actualKeyRings, actualKeyRing => ReferenceEquals(expectedKeyRing, actualKeyRing)); + } + + // We'd like there to be exactly one call, but it's possible that the first thread will actually + // release the critical section before the last thread attempts to acquire it and the work will + // be redone (by design, since the refresh is forced). + // Even asserting < taskCount is probabilistic - it's possible, though very unlikely, that each + // thread could finish before the next even attempts to enter the criticial section. If this + // proves to be flaky, we could increase taskCount or intentionally slow down GetCacheableKeyRing. + mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(It.IsAny()), Times.AtMost(taskCount - 1)); + } + + [Fact] + public async Task MultipleThreadsSeeExpiredCachedValue() + { + const int taskCount = 10; + var time1 = StringToDateTime("2015-03-01 00:00:00Z"); + var time2 = time1.AddHours(1); + + var expectedKeyRing1 = new Mock().Object; + var expectedKeyRing2 = new Mock().Object; + + var cts = new CancellationTokenSource(); + + var mockCacheableKeyRingProvider = new Mock(); + mockCacheableKeyRingProvider + .Setup(o => o.GetCacheableKeyRing(time1)) + .Returns(new CacheableKeyRing( + expirationToken: cts.Token, + expirationTime: time1.AddDays(1), + keyRing: expectedKeyRing1)); + mockCacheableKeyRingProvider + .Setup(o => o.GetCacheableKeyRing(time2)) + .Returns(new CacheableKeyRing( + expirationToken: CancellationToken.None, + expirationTime: time2.AddDays(1), + keyRing: expectedKeyRing2)); + + var keyRingProvider = CreateKeyRingProvider(mockCacheableKeyRingProvider.Object); + + Assert.Same(expectedKeyRing1, keyRingProvider.GetCurrentKeyRingCore(time1)); // Ensure the cache is populated + + cts.Cancel(); // Invalidate (but don't clear) the cached value + + var tasks = new Task[taskCount]; + for (var i = 0; i < taskCount; i++) + { + tasks[i] = Task.Run(() => + { + var keyRing = keyRingProvider.GetCurrentKeyRingCore(time2); + return keyRing; + }); + } + + var actualKeyRings = await Task.WhenAll(tasks); + Assert.All(actualKeyRings, actualKeyRing => ReferenceEquals(expectedKeyRing1, actualKeyRing)); + + mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(time1), Times.Once); + + // We'd like there to be exactly one call, but it's possible that the first thread will actually + // release the critical section before the last thread attempts to acquire it and the work will + // be redone (by design, since the refresh is forced). + // Even asserting < taskCount is probabilistic - it's possible, though very unlikely, that each + // thread could finish before the next even attempts to enter the criticial section. If this + // proves to be flaky, we could increase taskCount or intentionally slow down GetCacheableKeyRing. + mockCacheableKeyRingProvider.Verify(o => o.GetCacheableKeyRing(time2), Times.AtMost(taskCount - 1)); + + // Verify that the updated value eventually becomes available (5 seconds max) + for (var i = 0; i < 10; i++) + { + var updatedKeyRing = keyRingProvider.GetCurrentKeyRingCore(time2); + if (ReferenceEquals(expectedKeyRing2, updatedKeyRing)) + { + break; + } + + Assert.Same(expectedKeyRing1, updatedKeyRing); + await Task.Delay(500); + } + } + private static KeyRingProvider CreateKeyRingProvider(ICacheableKeyRingProvider cacheableKeyRingProvider) { var mockEncryptorFactory = new Mock();