66using System . Diagnostics ;
77using System . Diagnostics . CodeAnalysis ;
88using System . Threading ;
9+ using System . Threading . Tasks ;
910using Microsoft . AspNetCore . Cryptography ;
1011using Microsoft . AspNetCore . DataProtection . KeyManagement . Internal ;
1112using Microsoft . Extensions . Logging ;
@@ -16,13 +17,17 @@ namespace Microsoft.AspNetCore.DataProtection.KeyManagement;
1617
1718internal sealed class KeyRingProvider : ICacheableKeyRingProvider , IKeyRingProvider
1819{
20+ private const string DisableAsyncKeyRingUpdateSwitchKey = "Microsoft.AspNetCore.DataProtection.KeyManagement.DisableAsyncKeyRingUpdate" ;
21+
1922 private CacheableKeyRing ? _cacheableKeyRing ;
2023 private readonly object _cacheableKeyRingLockObj = new object ( ) ;
24+ private Task < CacheableKeyRing > ? _cacheableKeyRingTask ; // Also covered by _cacheableKeyRingLockObj
2125 private readonly IDefaultKeyResolver _defaultKeyResolver ;
2226 private readonly bool _autoGenerateKeys ;
2327 private readonly TimeSpan _newKeyLifetime ;
2428 private readonly IKeyManager _keyManager ;
2529 private readonly ILogger _logger ;
30+ private readonly bool _disableAsyncKeyRingUpdate ;
2631
2732 public KeyRingProvider (
2833 IKeyManager keyManager ,
@@ -52,6 +57,8 @@ public KeyRingProvider(
5257
5358 // We will automatically refresh any unknown keys for 2 minutes see https://github.com/dotnet/aspnetcore/issues/3975
5459 AutoRefreshWindowEnd = DateTime . UtcNow . AddMinutes ( 2 ) ;
60+
61+ AppContext . TryGetSwitch ( DisableAsyncKeyRingUpdateSwitchKey , out _disableAsyncKeyRingUpdate ) ;
5562 }
5663
5764 // for testing
@@ -195,6 +202,19 @@ internal IKeyRing RefreshCurrentKeyRing()
195202
196203 internal IKeyRing GetCurrentKeyRingCore ( DateTime utcNow , bool forceRefresh = false )
197204 {
205+ // We're making a big, scary change to the way this cache is updated: now threads
206+ // only block during computation of the new value if no old value is available
207+ // (or if they force it). We'll leave the old code in place, behind an appcontext
208+ // switch in case it turns out to have unwelcome emergent behavior.
209+ // TODO: Delete one of these codepaths in 10.0.
210+ return _disableAsyncKeyRingUpdate
211+ ? GetCurrentKeyRingCoreOld ( utcNow , forceRefresh )
212+ : GetCurrentKeyRingCoreNew ( utcNow , forceRefresh ) ;
213+ }
214+
215+ private IKeyRing GetCurrentKeyRingCoreOld ( DateTime utcNow , bool forceRefresh )
216+ {
217+ // DateTimes are only meaningfully comparable if they share the same Kind - require Utc for consistency
198218 Debug . Assert ( utcNow . Kind == DateTimeKind . Utc ) ;
199219
200220 // Can we return the cached keyring to the caller?
@@ -292,6 +312,149 @@ internal IKeyRing GetCurrentKeyRingCore(DateTime utcNow, bool forceRefresh = fal
292312 }
293313 }
294314
315+ private IKeyRing GetCurrentKeyRingCoreNew ( DateTime utcNow , bool forceRefresh )
316+ {
317+ // DateTimes are only meaningfully comparable if they share the same Kind - require Utc for consistency
318+ Debug . Assert ( utcNow . Kind == DateTimeKind . Utc ) ;
319+
320+ // The 99% and perf-critical case is that there is no task in-flight and the cached
321+ // key ring is valid. We do what we can to avoid unnecessary overhead (locking,
322+ // context switching, etc) on this path.
323+
324+ CacheableKeyRing ? existingCacheableKeyRing = null ;
325+
326+ // Can we return the cached keyring to the caller?
327+ if ( ! forceRefresh )
328+ {
329+ existingCacheableKeyRing = Volatile . Read ( ref _cacheableKeyRing ) ;
330+ if ( CacheableKeyRing . IsValid ( existingCacheableKeyRing , utcNow ) )
331+ {
332+ return existingCacheableKeyRing . KeyRing ;
333+ }
334+ }
335+
336+ // If work kicked off by a previous caller has completed, we should use those results
337+ // We check this outside the lock to reduce contention in the common case (no task).
338+ // Logically, it would probably make more sense to check this before checking whether
339+ // the cache is valid - there could be a newer value available - but keeping that path
340+ // fast is more important. The next forced refresh or cache expiration will cause the
341+ // new value to be picked up.
342+ var existingTask = Volatile . Read ( ref _cacheableKeyRingTask ) ;
343+ if ( existingTask is not null && existingTask . IsCompleted )
344+ {
345+ var taskKeyRing = GetKeyRingFromCompletedTask ( existingTask , utcNow ) ; // Throws if the task failed
346+ if ( taskKeyRing is not null )
347+ {
348+ return taskKeyRing ;
349+ }
350+ }
351+
352+ // The cached keyring hasn't been created or must be refreshed. We'll allow one thread to
353+ // create a task to update the keyring, and all threads will continue to use the existing cached
354+ // keyring while the first thread performs the update. There is an exception: if there
355+ // is no usable existing cached keyring, all callers must block until the keyring exists.
356+ lock ( _cacheableKeyRingLockObj )
357+ {
358+ // Update existingTask, in case we're not the first to acquire the lock
359+ existingTask = Volatile . Read ( ref _cacheableKeyRingTask ) ;
360+ if ( existingTask is null )
361+ {
362+ // If there's no existing task, make one now
363+ // PERF: Closing over utcNow substantially slows down the fast case (valid cache) in micro-benchmarks
364+ // (closing over `this` for CacheableKeyRingProvider doesn't seem impactful)
365+ existingTask = Task . Factory . StartNew (
366+ utcNow => CacheableKeyRingProvider . GetCacheableKeyRing ( ( DateTime ) utcNow ! ) ,
367+ utcNow ,
368+ CancellationToken . None , // GetKeyRingFromCompletedTask will need to react if this becomes cancellable
369+ TaskCreationOptions . DenyChildAttach ,
370+ TaskScheduler . Default ) ;
371+ Volatile . Write ( ref _cacheableKeyRingTask , existingTask ) ;
372+ }
373+ }
374+
375+ if ( existingCacheableKeyRing is not null )
376+ {
377+ Debug . Assert ( ! forceRefresh , "Read cached key ring even though forceRefresh is true" ) ;
378+ Debug . Assert ( ! CacheableKeyRing . IsValid ( existingCacheableKeyRing , utcNow ) , "Should already have returned a valid cached key ring" ) ;
379+ return existingCacheableKeyRing . KeyRing ;
380+ }
381+
382+ // Since there's no cached key ring we can return, we have to wait. It's not ideal to wait for a task we
383+ // just scheduled, but it makes the code a lot simpler (compared to having a separate, synchronous code path).
384+ // Cleverness: swallow any exceptions - they'll be surfaced by GetKeyRingFromCompletedTask, if appropriate.
385+ existingTask
386+ . ContinueWith ( static _ => { } , TaskScheduler . Default )
387+ . Wait ( ) ;
388+
389+ var newKeyRing = GetKeyRingFromCompletedTask ( existingTask , utcNow ) ; // Throws if the task failed (winning thread only)
390+ if ( newKeyRing is null )
391+ {
392+ // Another thread won - check whether it cached a new key ring
393+ var newCacheableKeyRing = Volatile . Read ( ref _cacheableKeyRing ) ;
394+ if ( newCacheableKeyRing is null )
395+ {
396+ // There will have been a better exception from the winning thread
397+ throw Error . KeyRingProvider_RefreshFailedOnOtherThread ( ) ;
398+ }
399+
400+ newKeyRing = newCacheableKeyRing . KeyRing ;
401+ }
402+
403+ return newKeyRing ;
404+ }
405+
406+ /// <summary>
407+ /// Returns null if another thread already processed the completed task.
408+ /// Otherwise, if the given completed task completed successfully, clears the task
409+ /// and either caches and returns the resulting key ring or throws, according to the
410+ /// successfulness of the task.
411+ /// </summary>
412+ private IKeyRing ? GetKeyRingFromCompletedTask ( Task < CacheableKeyRing > task , DateTime utcNow )
413+ {
414+ Debug . Assert ( task . IsCompleted ) ;
415+
416+ lock ( _cacheableKeyRingLockObj )
417+ {
418+ // If the parameter doesn't match the field, another thread has already consumed the task (and it's reflected in _cacheableKeyRing)
419+ if ( ! ReferenceEquals ( task , Volatile . Read ( ref _cacheableKeyRingTask ) ) )
420+ {
421+ return null ;
422+ }
423+
424+ Volatile . Write ( ref _cacheableKeyRingTask , null ) ;
425+
426+ if ( task . Status == TaskStatus . RanToCompletion )
427+ {
428+ var newCacheableKeyRing = task . Result ;
429+ Volatile . Write ( ref _cacheableKeyRing , newCacheableKeyRing ) ;
430+
431+ // An unconsumed task result is considered to satisfy forceRefresh. One could quibble that this isn't really
432+ // a forced refresh, but we'll still return a key ring newer than the one the caller was dissatisfied with.
433+ return newCacheableKeyRing . KeyRing ;
434+ }
435+
436+ Debug . Assert ( ! task . IsCanceled , "How did a task with no cancellation token get canceled?" ) ;
437+ Debug . Assert ( task . Exception is not null , "Task should have either completed successfully or with an exception" ) ;
438+ var exception = task . Exception ;
439+
440+ var existingCacheableKeyRing = Volatile . Read ( ref _cacheableKeyRing ) ;
441+ if ( existingCacheableKeyRing is not null && ! CacheableKeyRing . IsValid ( existingCacheableKeyRing , utcNow ) )
442+ {
443+ // If reading failed, we probably don't want to try again for a little bit, so slightly extend the
444+ // lifetime of the current cache entry
445+ Volatile . Write ( ref _cacheableKeyRing , existingCacheableKeyRing . WithTemporaryExtendedLifetime ( utcNow ) ) ;
446+
447+ _logger . ErrorOccurredWhileRefreshingKeyRing ( exception ) ; // This one mentions the no-retry window
448+ }
449+ else
450+ {
451+ _logger . ErrorOccurredWhileReadingKeyRing ( exception ) ;
452+ }
453+
454+ throw exception . InnerExceptions . Count == 1 ? exception . InnerExceptions [ 0 ] : exception ;
455+ }
456+ }
457+
295458 private static TimeSpan GetRefreshPeriodWithJitter ( TimeSpan refreshPeriod )
296459 {
297460 // We'll fudge the refresh period up to -20% so that multiple applications don't try to
0 commit comments