From ea199c6ba841d0500718d571fdfde33aed605bf4 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 24 Mar 2022 14:27:38 +0000 Subject: [PATCH 1/3] Add pprof labels in processes and for lifecycles Use pprof labelling to help identify goroutines with stacks. Signed-off-by: Andrew Thornton --- modules/graceful/manager.go | 44 +++++++++-------------------- modules/graceful/manager_unix.go | 11 ++++++++ modules/graceful/manager_windows.go | 11 ++++++++ modules/process/manager.go | 19 +++++++------ 4 files changed, 46 insertions(+), 39 deletions(-) diff --git a/modules/graceful/manager.go b/modules/graceful/manager.go index f78357360746d..9455c9d3cac67 100644 --- a/modules/graceful/manager.go +++ b/modules/graceful/manager.go @@ -6,6 +6,7 @@ package graceful import ( "context" + "runtime/pprof" "sync" "time" @@ -62,7 +63,6 @@ type WithCallback func(callback func()) // Similarly the callback function provided to atTerminate must return once termination is complete. // Please note that use of the atShutdown and atTerminate callbacks will create go-routines that will wait till their respective signals // - users must therefore be careful to only call these as necessary. -// If run is not expected to run indefinitely RunWithShutdownChan is likely to be more appropriate. type RunnableWithShutdownFns func(atShutdown, atTerminate func(func())) // RunWithShutdownFns takes a function that has both atShutdown and atTerminate callbacks @@ -70,7 +70,6 @@ type RunnableWithShutdownFns func(atShutdown, atTerminate func(func())) // Similarly the callback function provided to atTerminate must return once termination is complete. // Please note that use of the atShutdown and atTerminate callbacks will create go-routines that will wait till their respective signals // - users must therefore be careful to only call these as necessary. -// If run is not expected to run indefinitely RunWithShutdownChan is likely to be more appropriate. func (g *Manager) RunWithShutdownFns(run RunnableWithShutdownFns) { g.runningServerWaitGroup.Add(1) defer g.runningServerWaitGroup.Done() @@ -98,32 +97,6 @@ func (g *Manager) RunWithShutdownFns(run RunnableWithShutdownFns) { }) } -// RunnableWithShutdownChan is a runnable with functions to run at shutdown and terminate. -// After the atShutdown channel is closed, the main function must return once shutdown is complete. -// (Optionally IsHammer may be waited for instead however, this should be avoided if possible.) -// The callback function provided to atTerminate must return once termination is complete. -// Please note that use of the atTerminate function will create a go-routine that will wait till terminate - users must therefore be careful to only call this as necessary. -type RunnableWithShutdownChan func(atShutdown <-chan struct{}, atTerminate WithCallback) - -// RunWithShutdownChan takes a function that has channel to watch for shutdown and atTerminate callbacks -// After the atShutdown channel is closed, the main function must return once shutdown is complete. -// (Optionally IsHammer may be waited for instead however, this should be avoided if possible.) -// The callback function provided to atTerminate must return once termination is complete. -// Please note that use of the atTerminate function will create a go-routine that will wait till terminate - users must therefore be careful to only call this as necessary. -func (g *Manager) RunWithShutdownChan(run RunnableWithShutdownChan) { - g.runningServerWaitGroup.Add(1) - defer g.runningServerWaitGroup.Done() - defer func() { - if err := recover(); err != nil { - log.Critical("PANIC during RunWithShutdownChan: %v\nStacktrace: %s", err, log.Stack(2)) - g.doShutdown() - } - }() - run(g.IsShutdown(), func(atTerminate func()) { - g.RunAtTerminate(atTerminate) - }) -} - // RunWithShutdownContext takes a function that has a context to watch for shutdown. // After the provided context is Done(), the main function must return once shutdown is complete. // (Optionally the HammerContext may be obtained and waited for however, this should be avoided if possible.) @@ -136,7 +109,9 @@ func (g *Manager) RunWithShutdownContext(run func(context.Context)) { g.doShutdown() } }() - run(g.ShutdownContext()) + ctx := g.ShutdownContext() + pprof.SetGoroutineLabels(ctx) // We don't have a label to restore back to but I think this is fine + run(ctx) } // RunAtTerminate adds to the terminate wait group and creates a go-routine to run the provided function at termination @@ -157,7 +132,7 @@ func (g *Manager) RunAtTerminate(terminate func()) { } // RunAtShutdown creates a go-routine to run the provided function at shutdown -func (g *Manager) RunAtShutdown(ctx context.Context, shutdown func()) { +func (g *Manager) RunAtShutdown(done context.Context, shutdown func()) { g.lock.Lock() defer g.lock.Unlock() g.toRunAtShutdown = append(g.toRunAtShutdown, @@ -168,7 +143,7 @@ func (g *Manager) RunAtShutdown(ctx context.Context, shutdown func()) { } }() select { - case <-ctx.Done(): + case <-done.Done(): return default: shutdown() @@ -198,6 +173,8 @@ func (g *Manager) doShutdown() { } g.lock.Lock() g.shutdownCtxCancel() + atShutdownCtx := pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "post-shutdown")) + pprof.SetGoroutineLabels(atShutdownCtx) for _, fn := range g.toRunAtShutdown { go fn() } @@ -227,6 +204,8 @@ func (g *Manager) doHammerTime(d time.Duration) { default: log.Warn("Setting Hammer condition") g.hammerCtxCancel() + atHammerCtx := pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "post-hammer")) + pprof.SetGoroutineLabels(atHammerCtx) for _, fn := range g.toRunAtHammer { go fn() } @@ -244,6 +223,9 @@ func (g *Manager) doTerminate() { default: log.Warn("Terminating") g.terminateCtxCancel() + atTerminateCtx := pprof.WithLabels(g.doneCtx, pprof.Labels("graceful-lifecycle", "post-terminate")) + pprof.SetGoroutineLabels(atTerminateCtx) + for _, fn := range g.toRunAtTerminate { go fn() } diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index 99e84d73e834b..64dce32eeec85 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -12,6 +12,7 @@ import ( "errors" "os" "os/signal" + "runtime/pprof" "sync" "syscall" "time" @@ -60,6 +61,16 @@ func (g *Manager) start(ctx context.Context) { g.hammerCtx, g.hammerCtxCancel = context.WithCancel(ctx) g.doneCtx, g.doneCtxCancel = context.WithCancel(ctx) + // Next add pprof labels to these contexts + g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate")) + g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown")) + g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer")) + g.doneCtx = pprof.WithLabels(g.doneCtx, pprof.Labels("graceful-lifecycle", "with-manager")) + + // Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager + pprof.SetGoroutineLabels(g.doneCtx) + defer pprof.SetGoroutineLabels(ctx) + // Set the running state & handle signals g.setState(stateRunning) go g.handleSignals(ctx) diff --git a/modules/graceful/manager_windows.go b/modules/graceful/manager_windows.go index e5f5541ed3c3c..6c2ac0480f1b9 100644 --- a/modules/graceful/manager_windows.go +++ b/modules/graceful/manager_windows.go @@ -11,6 +11,7 @@ package graceful import ( "context" "os" + "runtime/pprof" "strconv" "sync" "time" @@ -73,6 +74,16 @@ func (g *Manager) start() { g.hammerCtx, g.hammerCtxCancel = context.WithCancel(g.ctx) g.doneCtx, g.doneCtxCancel = context.WithCancel(g.ctx) + // Next add pprof labels to these contexts + g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate")) + g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown")) + g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer")) + g.doneCtx = pprof.WithLabels(g.doneCtx, pprof.Labels("graceful-lifecycle", "with-manager")) + + // Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager + pprof.SetGoroutineLabels(g.doneCtx) + defer pprof.SetGoroutineLabels(ctx) + // Make channels g.shutdownRequested = make(chan struct{}) diff --git a/modules/process/manager.go b/modules/process/manager.go index d9d2f8c3e5f9c..50dbbbe6c807c 100644 --- a/modules/process/manager.go +++ b/modules/process/manager.go @@ -11,6 +11,7 @@ import ( "fmt" "io" "os/exec" + "runtime/pprof" "sort" "strconv" "sync" @@ -66,11 +67,9 @@ func GetManager() *Manager { // Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the // process table. func (pm *Manager) AddContext(parent context.Context, description string) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) { - parentPID := GetParentPID(parent) - ctx, cancel = context.WithCancel(parent) - pid, finished := pm.Add(parentPID, description, cancel) + ctx, pid, finished := pm.Add(ctx, description, cancel) return &Context{ Context: ctx, @@ -87,11 +86,9 @@ func (pm *Manager) AddContext(parent context.Context, description string) (ctx c // Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the // process table. func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Duration, description string) (ctx context.Context, cancel context.CancelFunc, finshed FinishedFunc) { - parentPID := GetParentPID(parent) - ctx, cancel = context.WithTimeout(parent, timeout) - pid, finshed := pm.Add(parentPID, description, cancel) + ctx, pid, finshed := pm.Add(ctx, description, cancel) return &Context{ Context: ctx, @@ -100,7 +97,9 @@ func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Durati } // Add create a new process -func (pm *Manager) Add(parentPID IDType, description string, cancel context.CancelFunc) (IDType, FinishedFunc) { +func (pm *Manager) Add(ctx context.Context, description string, cancel context.CancelFunc) (context.Context, IDType, FinishedFunc) { + parentPID := GetParentPID(ctx) + pm.mutex.Lock() start, pid := pm.nextPID() @@ -120,6 +119,7 @@ func (pm *Manager) Add(parentPID IDType, description string, cancel context.Canc finished := func() { cancel() pm.remove(process) + pprof.SetGoroutineLabels(ctx) } if parent != nil { @@ -128,7 +128,10 @@ func (pm *Manager) Add(parentPID IDType, description string, cancel context.Canc pm.processes[pid] = process pm.mutex.Unlock() - return pid, finished + pprofCtx := pprof.WithLabels(ctx, pprof.Labels("process-description", description, "ppid", string(parentPID), "pid", string(pid))) + pprof.SetGoroutineLabels(pprofCtx) + + return pprofCtx, pid, finished } // nextPID will return the next available PID. pm.mutex should already be locked. From 55be52f24c9ffd2b5662bc14bb2cfddb9ec096ba Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 24 Mar 2022 15:09:13 +0000 Subject: [PATCH 2/3] as per review Signed-off-by: Andrew Thornton --- modules/graceful/manager.go | 4 ++-- modules/graceful/manager_windows.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/graceful/manager.go b/modules/graceful/manager.go index 9455c9d3cac67..ca937e80a9a9d 100644 --- a/modules/graceful/manager.go +++ b/modules/graceful/manager.go @@ -132,7 +132,7 @@ func (g *Manager) RunAtTerminate(terminate func()) { } // RunAtShutdown creates a go-routine to run the provided function at shutdown -func (g *Manager) RunAtShutdown(done context.Context, shutdown func()) { +func (g *Manager) RunAtShutdown(ctx context.Context, shutdown func()) { g.lock.Lock() defer g.lock.Unlock() g.toRunAtShutdown = append(g.toRunAtShutdown, @@ -143,7 +143,7 @@ func (g *Manager) RunAtShutdown(done context.Context, shutdown func()) { } }() select { - case <-done.Done(): + case <-ctx.Done(): return default: shutdown() diff --git a/modules/graceful/manager_windows.go b/modules/graceful/manager_windows.go index 6c2ac0480f1b9..13dd225318956 100644 --- a/modules/graceful/manager_windows.go +++ b/modules/graceful/manager_windows.go @@ -82,7 +82,7 @@ func (g *Manager) start() { // Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager pprof.SetGoroutineLabels(g.doneCtx) - defer pprof.SetGoroutineLabels(ctx) + defer pprof.SetGoroutineLabels(g.ctx) // Make channels g.shutdownRequested = make(chan struct{}) From 891e8544a2133ef228faed83ea3942f7da625654 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 24 Mar 2022 15:26:07 +0000 Subject: [PATCH 3/3] as per review Signed-off-by: Andrew Thornton --- modules/graceful/manager.go | 12 ++++++------ modules/graceful/manager_unix.go | 10 +++++----- modules/graceful/manager_windows.go | 10 +++++----- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/modules/graceful/manager.go b/modules/graceful/manager.go index ca937e80a9a9d..8766cfca0efb4 100644 --- a/modules/graceful/manager.go +++ b/modules/graceful/manager.go @@ -191,7 +191,7 @@ func (g *Manager) doShutdown() { g.doTerminate() g.WaitForTerminate() g.lock.Lock() - g.doneCtxCancel() + g.managerCtxCancel() g.lock.Unlock() }() } @@ -223,7 +223,7 @@ func (g *Manager) doTerminate() { default: log.Warn("Terminating") g.terminateCtxCancel() - atTerminateCtx := pprof.WithLabels(g.doneCtx, pprof.Labels("graceful-lifecycle", "post-terminate")) + atTerminateCtx := pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "post-terminate")) pprof.SetGoroutineLabels(atTerminateCtx) for _, fn := range g.toRunAtTerminate { @@ -313,20 +313,20 @@ func (g *Manager) InformCleanup() { // Done allows the manager to be viewed as a context.Context, it returns a channel that is closed when the server is finished terminating func (g *Manager) Done() <-chan struct{} { - return g.doneCtx.Done() + return g.managerCtx.Done() } // Err allows the manager to be viewed as a context.Context done at Terminate func (g *Manager) Err() error { - return g.doneCtx.Err() + return g.managerCtx.Err() } // Value allows the manager to be viewed as a context.Context done at Terminate func (g *Manager) Value(key interface{}) interface{} { - return g.doneCtx.Value(key) + return g.managerCtx.Value(key) } // Deadline returns nil as there is no fixed Deadline for the manager, it allows the manager to be viewed as a context.Context func (g *Manager) Deadline() (deadline time.Time, ok bool) { - return g.doneCtx.Deadline() + return g.managerCtx.Deadline() } diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index 64dce32eeec85..6fbb2bda29ab0 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -30,11 +30,11 @@ type Manager struct { shutdownCtx context.Context hammerCtx context.Context terminateCtx context.Context - doneCtx context.Context + managerCtx context.Context shutdownCtxCancel context.CancelFunc hammerCtxCancel context.CancelFunc terminateCtxCancel context.CancelFunc - doneCtxCancel context.CancelFunc + managerCtxCancel context.CancelFunc runningServerWaitGroup sync.WaitGroup createServerWaitGroup sync.WaitGroup terminateWaitGroup sync.WaitGroup @@ -59,16 +59,16 @@ func (g *Manager) start(ctx context.Context) { g.terminateCtx, g.terminateCtxCancel = context.WithCancel(ctx) g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(ctx) g.hammerCtx, g.hammerCtxCancel = context.WithCancel(ctx) - g.doneCtx, g.doneCtxCancel = context.WithCancel(ctx) + g.managerCtx, g.managerCtxCancel = context.WithCancel(ctx) // Next add pprof labels to these contexts g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate")) g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown")) g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer")) - g.doneCtx = pprof.WithLabels(g.doneCtx, pprof.Labels("graceful-lifecycle", "with-manager")) + g.managerCtx = pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "with-manager")) // Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager - pprof.SetGoroutineLabels(g.doneCtx) + pprof.SetGoroutineLabels(g.managerCtx) defer pprof.SetGoroutineLabels(ctx) // Set the running state & handle signals diff --git a/modules/graceful/manager_windows.go b/modules/graceful/manager_windows.go index 13dd225318956..66baddfa38ed1 100644 --- a/modules/graceful/manager_windows.go +++ b/modules/graceful/manager_windows.go @@ -41,11 +41,11 @@ type Manager struct { shutdownCtx context.Context hammerCtx context.Context terminateCtx context.Context - doneCtx context.Context + managerCtx context.Context shutdownCtxCancel context.CancelFunc hammerCtxCancel context.CancelFunc terminateCtxCancel context.CancelFunc - doneCtxCancel context.CancelFunc + managerCtxCancel context.CancelFunc runningServerWaitGroup sync.WaitGroup createServerWaitGroup sync.WaitGroup terminateWaitGroup sync.WaitGroup @@ -72,16 +72,16 @@ func (g *Manager) start() { g.terminateCtx, g.terminateCtxCancel = context.WithCancel(g.ctx) g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(g.ctx) g.hammerCtx, g.hammerCtxCancel = context.WithCancel(g.ctx) - g.doneCtx, g.doneCtxCancel = context.WithCancel(g.ctx) + g.managerCtx, g.managerCtxCancel = context.WithCancel(g.ctx) // Next add pprof labels to these contexts g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate")) g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown")) g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer")) - g.doneCtx = pprof.WithLabels(g.doneCtx, pprof.Labels("graceful-lifecycle", "with-manager")) + g.managerCtx = pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "with-manager")) // Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager - pprof.SetGoroutineLabels(g.doneCtx) + pprof.SetGoroutineLabels(g.managerCtx) defer pprof.SetGoroutineLabels(g.ctx) // Make channels