22// The .NET Foundation licenses this file to you under the MIT license.
33// See the LICENSE file in the project root for more information.
44
5- using System ;
65using System . Collections . Concurrent ;
76using System . Diagnostics ;
87using System . Runtime . CompilerServices ;
98using System . Runtime . InteropServices ;
109using System . Threading ;
11- using System . Threading . Tasks ;
1210
1311namespace System . Net . Sockets
1412{
@@ -56,22 +54,40 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error)
5654
5755 private static readonly object s_lock = new object ( ) ;
5856
59- // In debug builds, force there to be 2 engines. In release builds, use half the number of processors when
60- // there are at least 6. The lower bound is to avoid using multiple engines on systems which aren't servers.
61- #pragma warning disable CA1802 // const works for debug, but needs to be static readonly for release
62- private static readonly int s_engineCount =
63- #if DEBUG
64- 2 ;
65- #else
66- Environment . ProcessorCount >= 6 ? Environment . ProcessorCount / 2 : 1 ;
67- #endif
68- #pragma warning restore CA1802
57+ private static readonly int s_maxEngineCount = GetEngineCount ( ) ;
58+
59+ private static int GetEngineCount ( )
60+ {
61+ // The responsibility of SocketAsyncEngine is to get notifications from epoll|kqueue
62+ // and schedule corresponding work items to ThreadPool (socket reads and writes).
63+ //
64+ // Using TechEmpower benchmarks that generate a LOT of SMALL socket reads and writes under a VERY HIGH load
65+ // we have observed that a single engine is capable of keeping busy up to thirty x64 and eight ARM64 CPU Cores.
66+ //
67+ // The vast majority of real-life scenarios is never going to generate such a huge load (hundreds of thousands of requests per second)
68+ // and having a single producer should be almost always enough.
69+ //
70+ // We want to be sure that we can handle extreme loads and that's why we have decided to use these values.
71+ //
72+ // It's impossible to predict all possible scenarios so we have added a possibility to configure this value using environment variables.
73+ if ( uint . TryParse ( Environment . GetEnvironmentVariable ( "DOTNET_SYSTEM_NET_SOCKETS_THREAD_COUNT" ) , out uint count ) )
74+ {
75+ return ( int ) count ;
76+ }
77+
78+ Architecture architecture = RuntimeInformation . ProcessArchitecture ;
79+ int coresPerEngine = architecture == Architecture . Arm64 || architecture == Architecture . Arm
80+ ? 8
81+ : 30 ;
82+
83+ return Math . Max ( 1 , ( int ) Math . Round ( Environment . ProcessorCount / ( double ) coresPerEngine ) ) ;
84+ }
6985
7086 //
7187 // The current engines. We replace an engine when it runs out of "handle" values.
7288 // Must be accessed under s_lock.
7389 //
74- private static readonly SocketAsyncEngine ? [ ] s_currentEngines = new SocketAsyncEngine ? [ s_engineCount ] ;
90+ private static readonly SocketAsyncEngine ? [ ] s_currentEngines = new SocketAsyncEngine ? [ s_maxEngineCount ] ;
7591 private static int s_allocateFromEngine = 0 ;
7692
7793 private readonly IntPtr _port ;
@@ -106,7 +122,7 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error)
106122 //
107123 private static readonly IntPtr MaxHandles = IntPtr . Size == 4 ? ( IntPtr ) int . MaxValue : ( IntPtr ) long . MaxValue ;
108124#endif
109- private static readonly IntPtr MinHandlesForAdditionalEngine = s_engineCount == 1 ? MaxHandles : ( IntPtr ) 32 ;
125+ private static readonly IntPtr MinHandlesForAdditionalEngine = s_maxEngineCount == 1 ? MaxHandles : ( IntPtr ) 32 ;
110126
111127 //
112128 // Sentinel handle value to identify events from the "shutdown pipe," used to signal an event loop to stop
@@ -129,7 +145,7 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error)
129145 //
130146 // Maps handle values to SocketAsyncContext instances.
131147 //
132- private readonly ConcurrentDictionary < IntPtr , SocketAsyncContext > _handleToContextMap = new ConcurrentDictionary < IntPtr , SocketAsyncContext > ( ) ;
148+ private readonly ConcurrentDictionary < IntPtr , SocketAsyncContextWrapper > _handleToContextMap = new ConcurrentDictionary < IntPtr , SocketAsyncContextWrapper > ( ) ;
133149
134150 //
135151 // Queue of events generated by EventLoop() that would be processed by the thread pool
@@ -197,7 +213,7 @@ private static void AllocateToken(SocketAsyncContext context, out SocketAsyncEng
197213 // Round-robin to the next engine once we have sufficient sockets on this one.
198214 if ( ! engine . HasLowNumberOfSockets )
199215 {
200- s_allocateFromEngine = ( s_allocateFromEngine + 1 ) % s_engineCount ;
216+ s_allocateFromEngine = ( s_allocateFromEngine + 1 ) % s_maxEngineCount ;
201217 }
202218 }
203219 }
@@ -208,7 +224,8 @@ private IntPtr AllocateHandle(SocketAsyncContext context)
208224 Debug . Assert ( ! IsFull , "Expected !IsFull" ) ;
209225
210226 IntPtr handle = _nextHandle ;
211- _handleToContextMap . TryAdd ( handle , context ) ;
227+ Debug . Assert ( handle != ShutdownHandle , "ShutdownHandle must not be added to the dictionary" ) ;
228+ _handleToContextMap . TryAdd ( handle , new SocketAsyncContextWrapper ( context ) ) ;
212229
213230 _nextHandle = IntPtr . Add ( _nextHandle , 1 ) ;
214231 _outstandingHandles = IntPtr . Add ( _outstandingHandles , 1 ) ;
@@ -318,8 +335,10 @@ private void EventLoop()
318335 {
319336 bool shutdown = false ;
320337 Interop . Sys . SocketEvent * buffer = _buffer ;
321- ConcurrentDictionary < IntPtr , SocketAsyncContext > handleToContextMap = _handleToContextMap ;
338+ ConcurrentDictionary < IntPtr , SocketAsyncContextWrapper > handleToContextMap = _handleToContextMap ;
322339 ConcurrentQueue < SocketIOEvent > eventQueue = _eventQueue ;
340+ IntPtr shutdownHandle = ShutdownHandle ;
341+ SocketAsyncContext ? context = null ;
323342 while ( ! shutdown )
324343 {
325344 int numEvents = EventBufferCount ;
@@ -333,38 +352,36 @@ private void EventLoop()
333352 Debug . Assert ( numEvents > 0 , $ "Unexpected numEvents: { numEvents } ") ;
334353
335354 bool enqueuedEvent = false ;
336- for ( int i = 0 ; i < numEvents ; i ++ )
355+ foreach ( var socketEvent in new ReadOnlySpan < Interop . Sys . SocketEvent > ( buffer , numEvents ) )
337356 {
338- IntPtr handle = buffer [ i ] . Data ;
339- if ( handle == ShutdownHandle )
340- {
341- shutdown = true ;
342- }
343- else
357+ IntPtr handle = socketEvent . Data ;
358+
359+ if ( handleToContextMap . TryGetValue ( handle , out SocketAsyncContextWrapper contextWrapper ) && ( context = contextWrapper . Context ) != null )
344360 {
345361 Debug . Assert ( handle . ToInt64 ( ) < MaxHandles . ToInt64 ( ) , $ "Unexpected values: handle={ handle } , MaxHandles={ MaxHandles } ") ;
346- handleToContextMap . TryGetValue ( handle , out SocketAsyncContext ? context ) ;
347- if ( context != null )
362+
363+ Interop . Sys . SocketEvents events = context . HandleSyncEventsSpeculatively ( socketEvent . Events ) ;
364+ if ( events != Interop . Sys . SocketEvents . None )
348365 {
349- Interop . Sys . SocketEvents events = buffer [ i ] . Events ;
350- events = context . HandleSyncEventsSpeculatively ( events ) ;
351- if ( events != Interop . Sys . SocketEvents . None )
352- {
353- var ev = new SocketIOEvent ( context , events ) ;
354- eventQueue . Enqueue ( ev ) ;
355- enqueuedEvent = true ;
356-
357- // This is necessary when the JIT generates unoptimized code (debug builds, live debugging,
358- // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as
359- // such code may keep the stack location live for longer than necessary
360- ev = default ;
361- }
366+ var ev = new SocketIOEvent ( context , events ) ;
367+ eventQueue . Enqueue ( ev ) ;
368+ enqueuedEvent = true ;
362369
363370 // This is necessary when the JIT generates unoptimized code (debug builds, live debugging,
364371 // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as
365372 // such code may keep the stack location live for longer than necessary
366- context = null ;
373+ ev = default ;
367374 }
375+
376+ // This is necessary when the JIT generates unoptimized code (debug builds, live debugging,
377+ // quick JIT, etc.) to ensure that the context does not remain referenced by this method, as
378+ // such code may keep the stack location live for longer than necessary
379+ context = null ;
380+ contextWrapper = default ;
381+ }
382+ else if ( handle == shutdownHandle )
383+ {
384+ shutdown = true ;
368385 }
369386 }
370387
@@ -488,6 +505,18 @@ private bool TryRegister(SafeSocketHandle socket, IntPtr handle, out Interop.Err
488505 return error == Interop . Error . SUCCESS ;
489506 }
490507
508+ // struct wrapper is used in order to improve the performance of the epoll thread hot path by up to 3% of some TechEmpower benchmarks
509+ // the goal is to have a dedicated generic instantiation and using:
510+ // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&)
511+ // instead of:
512+ // System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&)
513+ private readonly struct SocketAsyncContextWrapper
514+ {
515+ public SocketAsyncContextWrapper ( SocketAsyncContext context ) => Context = context ;
516+
517+ internal SocketAsyncContext Context { get ; }
518+ }
519+
491520 private readonly struct SocketIOEvent
492521 {
493522 public SocketAsyncContext Context { get ; }
0 commit comments