Skip to content

Commit 88a78d2

Browse files
authored
Change epoll thread count (#35800)
* use struct wrapper for better perf * check the most common case first * don't access static variable in a loop * use Span instead of raw pointers * change the heuristic, single epoll thread is not always enough * simplify the heuristic and add a comment * apply the naming suggestions
1 parent 08b244e commit 88a78d2

File tree

2 files changed

+72
-42
lines changed

2 files changed

+72
-42
lines changed

src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncContext.Unix.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1995,7 +1995,8 @@ public SocketError SendFileAsync(SafeFileHandle fileHandle, long offset, long co
19951995
// be scheduled instead. It's not functionally incorrect to schedule the release of a synchronous operation, just it may
19961996
// lead to thread pool starvation issues if the synchronous operations are blocking thread pool threads (typically not
19971997
// advised) and more threads are not immediately available to run work items that would release those operations.
1998-
public unsafe Interop.Sys.SocketEvents HandleSyncEventsSpeculatively(Interop.Sys.SocketEvents events)
1998+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1999+
public Interop.Sys.SocketEvents HandleSyncEventsSpeculatively(Interop.Sys.SocketEvents events)
19992000
{
20002001
if ((events & Interop.Sys.SocketEvents.Error) != 0)
20012002
{

src/libraries/System.Net.Sockets/src/System/Net/Sockets/SocketAsyncEngine.Unix.cs

Lines changed: 70 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,11 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
using System;
65
using System.Collections.Concurrent;
76
using System.Diagnostics;
87
using System.Runtime.CompilerServices;
98
using System.Runtime.InteropServices;
109
using System.Threading;
11-
using System.Threading.Tasks;
1210

1311
namespace System.Net.Sockets
1412
{
@@ -56,22 +54,40 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error)
5654

5755
private static readonly object s_lock = new object();
5856

59-
// In debug builds, force there to be 2 engines. In release builds, use half the number of processors when
60-
// there are at least 6. The lower bound is to avoid using multiple engines on systems which aren't servers.
61-
#pragma warning disable CA1802 // const works for debug, but needs to be static readonly for release
62-
private static readonly int s_engineCount =
63-
#if DEBUG
64-
2;
65-
#else
66-
Environment.ProcessorCount >= 6 ? Environment.ProcessorCount / 2 : 1;
67-
#endif
68-
#pragma warning restore CA1802
57+
private static readonly int s_maxEngineCount = GetEngineCount();
58+
59+
private static int GetEngineCount()
60+
{
61+
// The responsibility of SocketAsyncEngine is to get notifications from epoll|kqueue
62+
// and schedule corresponding work items to ThreadPool (socket reads and writes).
63+
//
64+
// Using TechEmpower benchmarks that generate a LOT of SMALL socket reads and writes under a VERY HIGH load
65+
// we have observed that a single engine is capable of keeping busy up to thirty x64 and eight ARM64 CPU Cores.
66+
//
67+
// The vast majority of real-life scenarios is never going to generate such a huge load (hundreds of thousands of requests per second)
68+
// and having a single producer should be almost always enough.
69+
//
70+
// We want to be sure that we can handle extreme loads and that's why we have decided to use these values.
71+
//
72+
// It's impossible to predict all possible scenarios so we have added a possibility to configure this value using environment variables.
73+
if (uint.TryParse(Environment.GetEnvironmentVariable("DOTNET_SYSTEM_NET_SOCKETS_THREAD_COUNT"), out uint count))
74+
{
75+
return (int)count;
76+
}
77+
78+
Architecture architecture = RuntimeInformation.ProcessArchitecture;
79+
int coresPerEngine = architecture == Architecture.Arm64 || architecture == Architecture.Arm
80+
? 8
81+
: 30;
82+
83+
return Math.Max(1, (int)Math.Round(Environment.ProcessorCount / (double)coresPerEngine));
84+
}
6985

7086
//
7187
// The current engines. We replace an engine when it runs out of "handle" values.
7288
// Must be accessed under s_lock.
7389
//
74-
private static readonly SocketAsyncEngine?[] s_currentEngines = new SocketAsyncEngine?[s_engineCount];
90+
private static readonly SocketAsyncEngine?[] s_currentEngines = new SocketAsyncEngine?[s_maxEngineCount];
7591
private static int s_allocateFromEngine = 0;
7692

7793
private readonly IntPtr _port;
@@ -106,7 +122,7 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error)
106122
//
107123
private static readonly IntPtr MaxHandles = IntPtr.Size == 4 ? (IntPtr)int.MaxValue : (IntPtr)long.MaxValue;
108124
#endif
109-
private static readonly IntPtr MinHandlesForAdditionalEngine = s_engineCount == 1 ? MaxHandles : (IntPtr)32;
125+
private static readonly IntPtr MinHandlesForAdditionalEngine = s_maxEngineCount == 1 ? MaxHandles : (IntPtr)32;
110126

111127
//
112128
// Sentinel handle value to identify events from the "shutdown pipe," used to signal an event loop to stop
@@ -129,7 +145,7 @@ public bool TryRegister(SafeSocketHandle socket, out Interop.Error error)
129145
//
130146
// Maps handle values to SocketAsyncContext instances.
131147
//
132-
private readonly ConcurrentDictionary<IntPtr, SocketAsyncContext> _handleToContextMap = new ConcurrentDictionary<IntPtr, SocketAsyncContext>();
148+
private readonly ConcurrentDictionary<IntPtr, SocketAsyncContextWrapper> _handleToContextMap = new ConcurrentDictionary<IntPtr, SocketAsyncContextWrapper>();
133149

134150
//
135151
// Queue of events generated by EventLoop() that would be processed by the thread pool
@@ -197,7 +213,7 @@ private static void AllocateToken(SocketAsyncContext context, out SocketAsyncEng
197213
// Round-robin to the next engine once we have sufficient sockets on this one.
198214
if (!engine.HasLowNumberOfSockets)
199215
{
200-
s_allocateFromEngine = (s_allocateFromEngine + 1) % s_engineCount;
216+
s_allocateFromEngine = (s_allocateFromEngine + 1) % s_maxEngineCount;
201217
}
202218
}
203219
}
@@ -208,7 +224,8 @@ private IntPtr AllocateHandle(SocketAsyncContext context)
208224
Debug.Assert(!IsFull, "Expected !IsFull");
209225

210226
IntPtr handle = _nextHandle;
211-
_handleToContextMap.TryAdd(handle, context);
227+
Debug.Assert(handle != ShutdownHandle, "ShutdownHandle must not be added to the dictionary");
228+
_handleToContextMap.TryAdd(handle, new SocketAsyncContextWrapper(context));
212229

213230
_nextHandle = IntPtr.Add(_nextHandle, 1);
214231
_outstandingHandles = IntPtr.Add(_outstandingHandles, 1);
@@ -318,8 +335,10 @@ private void EventLoop()
318335
{
319336
bool shutdown = false;
320337
Interop.Sys.SocketEvent* buffer = _buffer;
321-
ConcurrentDictionary<IntPtr, SocketAsyncContext> handleToContextMap = _handleToContextMap;
338+
ConcurrentDictionary<IntPtr, SocketAsyncContextWrapper> handleToContextMap = _handleToContextMap;
322339
ConcurrentQueue<SocketIOEvent> eventQueue = _eventQueue;
340+
IntPtr shutdownHandle = ShutdownHandle;
341+
SocketAsyncContext? context = null;
323342
while (!shutdown)
324343
{
325344
int numEvents = EventBufferCount;
@@ -333,38 +352,36 @@ private void EventLoop()
333352
Debug.Assert(numEvents > 0, $"Unexpected numEvents: {numEvents}");
334353

335354
bool enqueuedEvent = false;
336-
for (int i = 0; i < numEvents; i++)
355+
foreach (var socketEvent in new ReadOnlySpan<Interop.Sys.SocketEvent>(buffer, numEvents))
337356
{
338-
IntPtr handle = buffer[i].Data;
339-
if (handle == ShutdownHandle)
340-
{
341-
shutdown = true;
342-
}
343-
else
357+
IntPtr handle = socketEvent.Data;
358+
359+
if (handleToContextMap.TryGetValue(handle, out SocketAsyncContextWrapper contextWrapper) && (context = contextWrapper.Context) != null)
344360
{
345361
Debug.Assert(handle.ToInt64() < MaxHandles.ToInt64(), $"Unexpected values: handle={handle}, MaxHandles={MaxHandles}");
346-
handleToContextMap.TryGetValue(handle, out SocketAsyncContext? context);
347-
if (context != null)
362+
363+
Interop.Sys.SocketEvents events = context.HandleSyncEventsSpeculatively(socketEvent.Events);
364+
if (events != Interop.Sys.SocketEvents.None)
348365
{
349-
Interop.Sys.SocketEvents events = buffer[i].Events;
350-
events = context.HandleSyncEventsSpeculatively(events);
351-
if (events != Interop.Sys.SocketEvents.None)
352-
{
353-
var ev = new SocketIOEvent(context, events);
354-
eventQueue.Enqueue(ev);
355-
enqueuedEvent = true;
356-
357-
// This is necessary when the JIT generates unoptimized code (debug builds, live debugging,
358-
// quick JIT, etc.) to ensure that the context does not remain referenced by this method, as
359-
// such code may keep the stack location live for longer than necessary
360-
ev = default;
361-
}
366+
var ev = new SocketIOEvent(context, events);
367+
eventQueue.Enqueue(ev);
368+
enqueuedEvent = true;
362369

363370
// This is necessary when the JIT generates unoptimized code (debug builds, live debugging,
364371
// quick JIT, etc.) to ensure that the context does not remain referenced by this method, as
365372
// such code may keep the stack location live for longer than necessary
366-
context = null;
373+
ev = default;
367374
}
375+
376+
// This is necessary when the JIT generates unoptimized code (debug builds, live debugging,
377+
// quick JIT, etc.) to ensure that the context does not remain referenced by this method, as
378+
// such code may keep the stack location live for longer than necessary
379+
context = null;
380+
contextWrapper = default;
381+
}
382+
else if (handle == shutdownHandle)
383+
{
384+
shutdown = true;
368385
}
369386
}
370387

@@ -488,6 +505,18 @@ private bool TryRegister(SafeSocketHandle socket, IntPtr handle, out Interop.Err
488505
return error == Interop.Error.SUCCESS;
489506
}
490507

508+
// struct wrapper is used in order to improve the performance of the epoll thread hot path by up to 3% of some TechEmpower benchmarks
509+
// the goal is to have a dedicated generic instantiation and using:
510+
// System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.Net.Sockets.SocketAsyncContextWrapper]::TryGetValueInternal(!0,int32,!1&)
511+
// instead of:
512+
// System.Collections.Concurrent.ConcurrentDictionary`2[System.IntPtr,System.__Canon]::TryGetValueInternal(!0,int32,!1&)
513+
private readonly struct SocketAsyncContextWrapper
514+
{
515+
public SocketAsyncContextWrapper(SocketAsyncContext context) => Context = context;
516+
517+
internal SocketAsyncContext Context { get; }
518+
}
519+
491520
private readonly struct SocketIOEvent
492521
{
493522
public SocketAsyncContext Context { get; }

0 commit comments

Comments
 (0)