From 97fc5768304ce480345e6c34f12436f131cce9a4 Mon Sep 17 00:00:00 2001 From: coldWater Date: Tue, 12 Mar 2024 16:59:07 +0800 Subject: [PATCH 1/4] refactor createServerWaitGroup --- modules/graceful/manager.go | 2 +- modules/graceful/manager_common.go | 4 +-- modules/graceful/manager_unix.go | 45 ++++++++++++++--------------- modules/graceful/manager_windows.go | 24 +++++++++------ 4 files changed, 40 insertions(+), 35 deletions(-) diff --git a/modules/graceful/manager.go b/modules/graceful/manager.go index f3f412863ac3f..29c42e35ca9d8 100644 --- a/modules/graceful/manager.go +++ b/modules/graceful/manager.go @@ -233,7 +233,7 @@ func (g *Manager) setStateTransition(old, new state) bool { // At the moment the total number of servers (numberOfServersToCreate) are pre-defined as a const before global init, // so this function MUST be called if a server is not used. func (g *Manager) InformCleanup() { - g.createServerWaitGroup.Done() + g.createServerCond.Signal() } // Done allows the manager to be viewed as a context.Context, it returns a channel that is closed when the server is finished terminating diff --git a/modules/graceful/manager_common.go b/modules/graceful/manager_common.go index 27196e1531a84..65f8031a49202 100644 --- a/modules/graceful/manager_common.go +++ b/modules/graceful/manager_common.go @@ -42,8 +42,8 @@ type Manager struct { terminateCtxCancel context.CancelFunc managerCtxCancel context.CancelFunc runningServerWaitGroup sync.WaitGroup - createServerWaitGroup sync.WaitGroup terminateWaitGroup sync.WaitGroup + createServerCond sync.Cond shutdownRequested chan struct{} toRunAtShutdown []func() @@ -52,7 +52,7 @@ type Manager struct { func newGracefulManager(ctx context.Context) *Manager { manager := &Manager{ctx: ctx, shutdownRequested: make(chan struct{})} - manager.createServerWaitGroup.Add(numberOfServersToCreate) + manager.createServerCond.L = &sync.Mutex{} manager.prepare(ctx) manager.start() return manager diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index edf5fc248f99e..43da1752bb58e 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -57,20 +57,28 @@ func (g *Manager) start() { // Handle clean up of unused provided listeners and delayed start-up startupDone := make(chan struct{}) go func() { - defer close(startupDone) - // Wait till we're done getting all the listeners and then close the unused ones - func() { - // FIXME: there is a fundamental design problem of the "manager" and the "wait group". - // If nothing has started, the "Wait" just panics: sync: WaitGroup is reused before previous Wait has returned - // There is no clear solution besides a complete rewriting of the "manager" - defer func() { - _ = recover() - }() - g.createServerWaitGroup.Wait() + defer func() { + close(startupDone) + // Ignore the error here there's not much we can do with it, they're logged in the CloseProvidedListeners function + _ = CloseProvidedListeners() }() - // Ignore the error here there's not much we can do with it, they're logged in the CloseProvidedListeners function - _ = CloseProvidedListeners() - g.notify(readyMsg) + // Wait till we're done getting all the listeners and then close the unused ones + ready := 0 + for { + g.createServerCond.L.Lock() + g.createServerCond.Wait() + g.createServerCond.L.Unlock() + select { + case <-g.IsShutdown(): + return + default: + } + ready++ + if ready >= numberOfServersToCreate { + g.notify(readyMsg) + return + } + } }() if setting.StartupTimeout > 0 { go func() { @@ -78,16 +86,7 @@ func (g *Manager) start() { case <-startupDone: return case <-g.IsShutdown(): - func() { - // When WaitGroup counter goes negative it will panic - we don't care about this so we can just ignore it. - defer func() { - _ = recover() - }() - // Ensure that the createServerWaitGroup stops waiting - for { - g.createServerWaitGroup.Done() - } - }() + g.createServerCond.Signal() return case <-time.After(setting.StartupTimeout): log.Error("Startup took too long! Shutting down") diff --git a/modules/graceful/manager_windows.go b/modules/graceful/manager_windows.go index ecf30af3f3005..d62f5f5027718 100644 --- a/modules/graceful/manager_windows.go +++ b/modules/graceful/manager_windows.go @@ -150,15 +150,21 @@ func (g *Manager) awaitServer(limit time.Duration) bool { c := make(chan struct{}) go func() { defer close(c) - func() { - // FIXME: there is a fundamental design problem of the "manager" and the "wait group". - // If nothing has started, the "Wait" just panics: sync: WaitGroup is reused before previous Wait has returned - // There is no clear solution besides a complete rewriting of the "manager" - defer func() { - _ = recover() - }() - g.createServerWaitGroup.Wait() - }() + ready := 0 + for { + g.createServerCond.L.Lock() + g.createServerCond.Wait() + g.createServerCond.L.Unlock() + select { + case <-g.IsShutdown(): + return + default: + } + ready++ + if ready >= numberOfServersToCreate { + return + } + } }() if limit > 0 { select { From 947b49e77acd270265fc923813baaab57ac1d54b Mon Sep 17 00:00:00 2001 From: coldWater Date: Tue, 12 Mar 2024 17:16:04 +0800 Subject: [PATCH 2/4] fix --- modules/graceful/manager_windows.go | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/modules/graceful/manager_windows.go b/modules/graceful/manager_windows.go index d62f5f5027718..aa0c01291bd36 100644 --- a/modules/graceful/manager_windows.go +++ b/modules/graceful/manager_windows.go @@ -149,7 +149,6 @@ hammerLoop: func (g *Manager) awaitServer(limit time.Duration) bool { c := make(chan struct{}) go func() { - defer close(c) ready := 0 for { g.createServerCond.L.Lock() @@ -162,26 +161,24 @@ func (g *Manager) awaitServer(limit time.Duration) bool { } ready++ if ready >= numberOfServersToCreate { + close(c) return } } }() + + var tc <-chan time.Time if limit > 0 { - select { - case <-c: - return true // completed normally - case <-time.After(limit): - return false // timed out - case <-g.IsShutdown(): - return false - } - } else { - select { - case <-c: - return true // completed normally - case <-g.IsShutdown(): - return false - } + tc = time.After(limit) + } + select { + case <-c: + return true // completed normally + case <-tc: + return false // timed out + case <-g.IsShutdown(): + g.createServerCond.Signal() + return false } } From ce0fba88b4a28d469a4ed336f24ab4eed82a4d2d Mon Sep 17 00:00:00 2001 From: coldWater Date: Wed, 13 Mar 2024 05:17:52 +0800 Subject: [PATCH 3/4] update --- modules/graceful/manager.go | 3 +++ modules/graceful/manager_common.go | 1 + modules/graceful/manager_unix.go | 17 ++++++++--------- modules/graceful/manager_windows.go | 17 ++++++++--------- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/modules/graceful/manager.go b/modules/graceful/manager.go index 29c42e35ca9d8..3f1115066a054 100644 --- a/modules/graceful/manager.go +++ b/modules/graceful/manager.go @@ -233,6 +233,9 @@ func (g *Manager) setStateTransition(old, new state) bool { // At the moment the total number of servers (numberOfServersToCreate) are pre-defined as a const before global init, // so this function MUST be called if a server is not used. func (g *Manager) InformCleanup() { + g.createServerCond.L.Lock() + defer g.createServerCond.L.Unlock() + g.createdServer++ g.createServerCond.Signal() } diff --git a/modules/graceful/manager_common.go b/modules/graceful/manager_common.go index 65f8031a49202..f6dbcc748dc80 100644 --- a/modules/graceful/manager_common.go +++ b/modules/graceful/manager_common.go @@ -44,6 +44,7 @@ type Manager struct { runningServerWaitGroup sync.WaitGroup terminateWaitGroup sync.WaitGroup createServerCond sync.Cond + createdServer int shutdownRequested chan struct{} toRunAtShutdown []func() diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index 43da1752bb58e..0392daa8c3482 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -63,21 +63,20 @@ func (g *Manager) start() { _ = CloseProvidedListeners() }() // Wait till we're done getting all the listeners and then close the unused ones - ready := 0 + g.createServerCond.L.Lock() for { - g.createServerCond.L.Lock() - g.createServerCond.Wait() - g.createServerCond.L.Unlock() + if g.createdServer >= numberOfServersToCreate { + g.createServerCond.L.Unlock() + g.notify(readyMsg) + return + } select { case <-g.IsShutdown(): + g.createServerCond.L.Unlock() return default: } - ready++ - if ready >= numberOfServersToCreate { - g.notify(readyMsg) - return - } + g.createServerCond.Wait() } }() if setting.StartupTimeout > 0 { diff --git a/modules/graceful/manager_windows.go b/modules/graceful/manager_windows.go index aa0c01291bd36..d776e0e9f9e20 100644 --- a/modules/graceful/manager_windows.go +++ b/modules/graceful/manager_windows.go @@ -149,21 +149,20 @@ hammerLoop: func (g *Manager) awaitServer(limit time.Duration) bool { c := make(chan struct{}) go func() { - ready := 0 + g.createServerCond.L.Lock() for { - g.createServerCond.L.Lock() - g.createServerCond.Wait() - g.createServerCond.L.Unlock() + if g.createdServer >= numberOfServersToCreate { + g.createServerCond.L.Unlock() + close(c) + return + } select { case <-g.IsShutdown(): + g.createServerCond.L.Unlock() return default: } - ready++ - if ready >= numberOfServersToCreate { - close(c) - return - } + g.createServerCond.Wait() } }() From e183889876133e0358c1e6da20a2ca59c1ba36c9 Mon Sep 17 00:00:00 2001 From: coldWater <254244460@qq.com> Date: Fri, 15 Mar 2024 13:51:12 +0800 Subject: [PATCH 4/4] Update comment --- modules/graceful/manager_unix.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index 0392daa8c3482..f49c42650ccb0 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -59,10 +59,10 @@ func (g *Manager) start() { go func() { defer func() { close(startupDone) - // Ignore the error here there's not much we can do with it, they're logged in the CloseProvidedListeners function + // Close the unused listeners and ignore the error here there's not much we can do with it, they're logged in the CloseProvidedListeners function _ = CloseProvidedListeners() }() - // Wait till we're done getting all the listeners and then close the unused ones + // Wait for all servers to be created g.createServerCond.L.Lock() for { if g.createdServer >= numberOfServersToCreate {