From 03ef142a9d148577fe5d1877cee307a85860dfb9 Mon Sep 17 00:00:00 2001 From: ltdk Date: Sat, 10 Sep 2022 22:54:38 -0400 Subject: [PATCH 1/7] Implement systemd-notify protocol This PR adds support for the systemd notify protocol. Several status messagess are provided. We should likely add a common notify/status message for graceful. Replaces #21140 Signed-off-by: Andrew Thornton --- contrib/systemd/gitea.service | 2 +- modules/graceful/manager_unix.go | 57 ++++++++++++++++++++++++++++++-- modules/graceful/net_unix.go | 51 ++++++++++++++++++++++++---- modules/graceful/restart_unix.go | 10 ++++-- 4 files changed, 108 insertions(+), 12 deletions(-) diff --git a/contrib/systemd/gitea.service b/contrib/systemd/gitea.service index 79c34564bc977..2eb951756838c 100644 --- a/contrib/systemd/gitea.service +++ b/contrib/systemd/gitea.service @@ -56,7 +56,7 @@ After=network.target #LimitMEMLOCK=infinity #LimitNOFILE=65535 RestartSec=2s -Type=simple +Type=notify User=git Group=git WorkingDirectory=/var/lib/gitea/ diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index ba1dbd38f6a08..005e18bea877c 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -12,6 +12,7 @@ import ( "os" "os/signal" "runtime/pprof" + "strconv" "sync" "syscall" "time" @@ -46,7 +47,7 @@ type Manager struct { func newGracefulManager(ctx context.Context) *Manager { manager := &Manager{ - isChild: len(os.Getenv(listenFDs)) > 0 && os.Getppid() > 1, + isChild: len(os.Getenv(listenFDsEnv)) > 0 && os.Getppid() > 1, lock: &sync.RWMutex{}, } manager.createServerWaitGroup.Add(numberOfServersToCreate) @@ -54,6 +55,40 @@ func newGracefulManager(ctx context.Context) *Manager { return manager } +type systemdNotifyMsg string + +const ( + readyMsg systemdNotifyMsg = "READY=1" + stoppingMsg systemdNotifyMsg = "STOPPING=1" + reloadingMsg systemdNotifyMsg = "RELOADING=1" +) + +func statusMsg(msg string) systemdNotifyMsg { + return systemdNotifyMsg("STATUS=" + msg) +} + +func pidMsg() systemdNotifyMsg { + return systemdNotifyMsg("MAINPID=" + strconv.Itoa(os.Getpid())) +} + +// Notify systemd of status via the notify protocol +func (g *Manager) notify(msg systemdNotifyMsg) { + conn, err := getNotifySocket() + if err != nil { + // the err is logged in getNotifySocket + return + } + if conn == nil { + return + } + defer conn.Close() + + if _, err = conn.Write([]byte(msg)); err != nil { + log.Warn("Failed to notify NOTIFY_SOCKET: %v", err) + return + } +} + func (g *Manager) start(ctx context.Context) { // Make contexts g.terminateCtx, g.terminateCtxCancel = context.WithCancel(ctx) @@ -73,6 +108,8 @@ func (g *Manager) start(ctx context.Context) { // Set the running state & handle signals g.setState(stateRunning) + g.notify(statusMsg("Starting Gitea")) + g.notify(pidMsg()) go g.handleSignals(g.managerCtx) // Handle clean up of unused provided listeners and delayed start-up @@ -90,6 +127,7 @@ func (g *Manager) start(ctx context.Context) { go func() { select { case <-startupDone: + g.notify(readyMsg) return case <-g.IsShutdown(): func() { @@ -105,6 +143,8 @@ func (g *Manager) start(ctx context.Context) { return case <-time.After(setting.StartupTimeout): log.Error("Startup took too long! Shutting down") + g.notify(statusMsg("Startup took too long! Shutting down")) + g.notify(stoppingMsg) g.doShutdown() } }() @@ -137,6 +177,7 @@ func (g *Manager) handleSignals(ctx context.Context) { g.DoGracefulRestart() case syscall.SIGUSR1: log.Warn("PID %d. Received SIGUSR1. Releasing and reopening logs", pid) + g.notify(statusMsg("Releasing and reopening logs")) if err := log.ReleaseReopen(); err != nil { log.Error("Error whilst releasing and reopening logs: %v", err) } @@ -170,6 +211,9 @@ func (g *Manager) doFork() error { } g.forked = true g.lock.Unlock() + + g.notify(reloadingMsg) + // We need to move the file logs to append pids setting.RestartLogsWithPIDSuffix() @@ -192,18 +236,27 @@ func (g *Manager) DoGracefulRestart() { } } else { log.Info("PID: %d. Not set restartable. Shutting down...", os.Getpid()) - + g.notify(stoppingMsg) g.doShutdown() } } // DoImmediateHammer causes an immediate hammer func (g *Manager) DoImmediateHammer() { + g.notify(statusMsg("Sending immediate hammer")) g.doHammerTime(0 * time.Second) } // DoGracefulShutdown causes a graceful shutdown func (g *Manager) DoGracefulShutdown() { + g.lock.Lock() + if !g.forked { + g.lock.Unlock() + g.notify(stoppingMsg) + } else { + g.lock.Unlock() + g.notify(statusMsg("shutting down after fork")) + } g.doShutdown() } diff --git a/modules/graceful/net_unix.go b/modules/graceful/net_unix.go index c7524a79dbc94..32ce69338aa58 100644 --- a/modules/graceful/net_unix.go +++ b/modules/graceful/net_unix.go @@ -21,9 +21,11 @@ import ( ) const ( - listenFDs = "LISTEN_FDS" - startFD = 3 - unlinkFDs = "GITEA_UNLINK_FDS" + listenFDsEnv = "LISTEN_FDS" + startFD = 3 + unlinkFDsEnv = "GITEA_UNLINK_FDS" + + notifySocketEnv = "NOTIFY_SOCKET" ) // In order to keep the working directory the same as when we started we record @@ -38,6 +40,8 @@ var ( activeListenersToUnlink = []bool{} providedListeners = []net.Listener{} activeListeners = []net.Listener{} + + notifySocketAddr string ) func getProvidedFDs() (savedErr error) { @@ -46,17 +50,17 @@ func getProvidedFDs() (savedErr error) { mutex.Lock() defer mutex.Unlock() - numFDs := os.Getenv(listenFDs) + numFDs := os.Getenv(listenFDsEnv) if numFDs == "" { return } n, err := strconv.Atoi(numFDs) if err != nil { - savedErr = fmt.Errorf("%s is not a number: %s. Err: %v", listenFDs, numFDs, err) + savedErr = fmt.Errorf("%s is not a number: %s. Err: %v", listenFDsEnv, numFDs, err) return } - fdsToUnlinkStr := strings.Split(os.Getenv(unlinkFDs), ",") + fdsToUnlinkStr := strings.Split(os.Getenv(unlinkFDsEnv), ",") providedListenersToUnlink = make([]bool, n) for _, fdStr := range fdsToUnlinkStr { i, err := strconv.Atoi(fdStr) @@ -84,6 +88,18 @@ func getProvidedFDs() (savedErr error) { savedErr = fmt.Errorf("Error getting provided socket fd %d: %v", i, err) return } + + notifySocketAddr = os.Getenv(notifySocketEnv) + if notifySocketAddr != "" { + log.Debug("Systemd Notify Socket provided: %s", notifySocketAddr) + savedErr = os.Unsetenv(notifySocketEnv) + if savedErr != nil { + log.Warn("Unable to Unset the NOTIFY_SOCKET environment variable: %v", savedErr) + return + } + } else { + log.Trace("No Systemd Notify Socket provided") + } }) return savedErr } @@ -255,3 +271,26 @@ func getActiveListenersToUnlink() []bool { copy(listenersToUnlink, activeListenersToUnlink) return listenersToUnlink } + +func getNotifySocket() (*net.UnixConn, error) { + if err := getProvidedFDs(); err != nil { + return nil, err + } + + if notifySocketAddr == "" { + return nil, nil + } + + socketAddr := &net.UnixAddr{ + Name: notifySocketAddr, + Net: "unixgram", + } + + notifySocket, err := net.DialUnix(socketAddr.Net, nil, socketAddr) + if err != nil { + log.Warn("failed to dial NOTIFY_SOCKET %s: %v", socketAddr, err) + return nil, err + } + + return notifySocket, nil +} diff --git a/modules/graceful/restart_unix.go b/modules/graceful/restart_unix.go index 1d0d1059e9fd1..ababc25f5a715 100644 --- a/modules/graceful/restart_unix.go +++ b/modules/graceful/restart_unix.go @@ -70,11 +70,15 @@ func RestartProcess() (int, error) { // Pass on the environment and replace the old count key with the new one. var env []string for _, v := range os.Environ() { - if !strings.HasPrefix(v, listenFDs+"=") { + if !strings.HasPrefix(v, listenFDsEnv+"=") { env = append(env, v) } } - env = append(env, fmt.Sprintf("%s=%d", listenFDs, len(listeners))) + env = append(env, fmt.Sprintf("%s=%d", listenFDsEnv, len(listeners))) + + if notifySocketAddr != "" { + env = append(env, fmt.Sprintf("%s=%s", notifySocketEnv, notifySocketAddr)) + } sb := &strings.Builder{} for i, unlink := range getActiveListenersToUnlink() { @@ -87,7 +91,7 @@ func RestartProcess() (int, error) { unlinkStr := sb.String() if len(unlinkStr) > 0 { unlinkStr = unlinkStr[:len(unlinkStr)-1] - env = append(env, fmt.Sprintf("%s=%s", unlinkFDs, unlinkStr)) + env = append(env, fmt.Sprintf("%s=%s", unlinkFDsEnv, unlinkStr)) } allFiles := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, files...) From 2d2b41aaf4dd04ef6cfcda8f190a7637b22c1ba2 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 2 Oct 2022 18:36:56 +0100 Subject: [PATCH 2/7] Handle watchdog Signed-off-by: Andrew Thornton --- modules/graceful/manager_unix.go | 10 ++++++++ modules/graceful/net_unix.go | 41 ++++++++++++++++++++++++++++++-- modules/graceful/restart_unix.go | 6 +++++ 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index 005e18bea877c..518e660d0d223 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -61,6 +61,7 @@ const ( readyMsg systemdNotifyMsg = "READY=1" stoppingMsg systemdNotifyMsg = "STOPPING=1" reloadingMsg systemdNotifyMsg = "RELOADING=1" + watchdogMsg systemdNotifyMsg = "WATCHDOG=1" ) func statusMsg(msg string) systemdNotifyMsg { @@ -167,6 +168,13 @@ func (g *Manager) handleSignals(ctx context.Context) { syscall.SIGTSTP, ) + watchdogTimeout := getWatchdogTimeout() + t := &time.Ticker{} + if watchdogTimeout != 0 { + g.notify(watchdogMsg) + t = time.NewTicker(watchdogTimeout / 2) + } + pid := syscall.Getpid() for { select { @@ -195,6 +203,8 @@ func (g *Manager) handleSignals(ctx context.Context) { default: log.Info("PID %d. Received %v.", pid, sig) } + case <-t.C: + g.notify(watchdogMsg) case <-ctx.Done(): log.Warn("PID: %d. Background context for manager closed - %v - Shutting down...", pid, ctx.Err()) g.DoGracefulShutdown() diff --git a/modules/graceful/net_unix.go b/modules/graceful/net_unix.go index 32ce69338aa58..239142013de63 100644 --- a/modules/graceful/net_unix.go +++ b/modules/graceful/net_unix.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "sync" + "time" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -25,7 +26,8 @@ const ( startFD = 3 unlinkFDsEnv = "GITEA_UNLINK_FDS" - notifySocketEnv = "NOTIFY_SOCKET" + notifySocketEnv = "NOTIFY_SOCKET" + watchdogTimeoutEnv = "WATCHDOG_USEC" ) // In order to keep the working directory the same as when we started we record @@ -42,6 +44,7 @@ var ( activeListeners = []net.Listener{} notifySocketAddr string + watchdogTimeout time.Duration ) func getProvidedFDs() (savedErr error) { @@ -89,6 +92,7 @@ func getProvidedFDs() (savedErr error) { return } + // now handle some additional systemd provided things notifySocketAddr = os.Getenv(notifySocketEnv) if notifySocketAddr != "" { log.Debug("Systemd Notify Socket provided: %s", notifySocketAddr) @@ -97,6 +101,29 @@ func getProvidedFDs() (savedErr error) { log.Warn("Unable to Unset the NOTIFY_SOCKET environment variable: %v", savedErr) return } + + // We don't handle WATCHDOG_PID + timeoutStr := os.Getenv(watchdogTimeoutEnv) + if timeoutStr != "" { + savedErr = os.Unsetenv(watchdogTimeoutEnv) + if savedErr != nil { + log.Warn("Unable to Unset the WATCHDOG_USEC environment variable: %v", savedErr) + return + } + + s, err := strconv.ParseInt(timeoutStr, 10, 64) + if err != nil { + log.Error("Unable to parse the provided WATCHDOG_USEC: %v", err) + savedErr = fmt.Errorf("unable to parse the provided WATCHDOG_USEC: %w", err) + return + } + if s <= 0 { + log.Error("Unable to parse the provided WATCHDOG_USEC: %s should be a positive number", timeoutStr) + savedErr = fmt.Errorf("unable to parse the provided WATCHDOG_USEC: %s should be a positive number", timeoutStr) + return + } + watchdogTimeout = time.Duration(s) * time.Microsecond + } } else { log.Trace("No Systemd Notify Socket provided") } @@ -274,7 +301,8 @@ func getActiveListenersToUnlink() []bool { func getNotifySocket() (*net.UnixConn, error) { if err := getProvidedFDs(); err != nil { - return nil, err + // This error will be logged elsewhere + return nil, nil } if notifySocketAddr == "" { @@ -294,3 +322,12 @@ func getNotifySocket() (*net.UnixConn, error) { return notifySocket, nil } + +func getWatchdogTimeout() time.Duration { + if err := getProvidedFDs(); err != nil { + // This error will be logged elsewhere + return 0 + } + + return watchdogTimeout +} diff --git a/modules/graceful/restart_unix.go b/modules/graceful/restart_unix.go index ababc25f5a715..cec40dbc51293 100644 --- a/modules/graceful/restart_unix.go +++ b/modules/graceful/restart_unix.go @@ -16,6 +16,7 @@ import ( "strings" "sync" "syscall" + "time" ) var killParent sync.Once @@ -80,6 +81,11 @@ func RestartProcess() (int, error) { env = append(env, fmt.Sprintf("%s=%s", notifySocketEnv, notifySocketAddr)) } + if watchdogTimeout != 0 { + watchdogStr := strconv.FormatInt(int64(watchdogTimeout/time.Millisecond), 10) + env = append(env, fmt.Sprintf("%s=%s", watchdogTimeoutEnv, watchdogStr)) + } + sb := &strings.Builder{} for i, unlink := range getActiveListenersToUnlink() { if !unlink { From a9baaaf4f7640f24e760f9c00d94e991b74c9570 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 2 Oct 2022 19:44:34 +0100 Subject: [PATCH 3/7] placate lint Signed-off-by: Andrew Thornton --- modules/graceful/manager_unix.go | 2 +- modules/graceful/net_unix.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index 518e660d0d223..8538ea2c9c4bc 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -72,7 +72,7 @@ func pidMsg() systemdNotifyMsg { return systemdNotifyMsg("MAINPID=" + strconv.Itoa(os.Getpid())) } -// Notify systemd of status via the notify protocol +// Notify systemd of status via the notify protocol func (g *Manager) notify(msg systemdNotifyMsg) { conn, err := getNotifySocket() if err != nil { diff --git a/modules/graceful/net_unix.go b/modules/graceful/net_unix.go index 239142013de63..2a327339367a9 100644 --- a/modules/graceful/net_unix.go +++ b/modules/graceful/net_unix.go @@ -102,7 +102,7 @@ func getProvidedFDs() (savedErr error) { return } - // We don't handle WATCHDOG_PID + // FIXME: We don't handle WATCHDOG_PID timeoutStr := os.Getenv(watchdogTimeoutEnv) if timeoutStr != "" { savedErr = os.Unsetenv(watchdogTimeoutEnv) From fb8283789b6725c195ce79ffabfb769e2f2cf7c7 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 10 Oct 2022 18:09:34 +0100 Subject: [PATCH 4/7] get notify address first Signed-off-by: Andrew Thornton --- modules/graceful/net_unix.go | 77 +++++++++++++++++------------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/modules/graceful/net_unix.go b/modules/graceful/net_unix.go index 2a327339367a9..d36be07bb50b8 100644 --- a/modules/graceful/net_unix.go +++ b/modules/graceful/net_unix.go @@ -52,46 +52,6 @@ func getProvidedFDs() (savedErr error) { once.Do(func() { mutex.Lock() defer mutex.Unlock() - - numFDs := os.Getenv(listenFDsEnv) - if numFDs == "" { - return - } - n, err := strconv.Atoi(numFDs) - if err != nil { - savedErr = fmt.Errorf("%s is not a number: %s. Err: %v", listenFDsEnv, numFDs, err) - return - } - - fdsToUnlinkStr := strings.Split(os.Getenv(unlinkFDsEnv), ",") - providedListenersToUnlink = make([]bool, n) - for _, fdStr := range fdsToUnlinkStr { - i, err := strconv.Atoi(fdStr) - if err != nil || i < 0 || i >= n { - continue - } - providedListenersToUnlink[i] = true - } - - for i := startFD; i < n+startFD; i++ { - file := os.NewFile(uintptr(i), fmt.Sprintf("listener_FD%d", i)) - - l, err := net.FileListener(file) - if err == nil { - // Close the inherited file if it's a listener - if err = file.Close(); err != nil { - savedErr = fmt.Errorf("error closing provided socket fd %d: %s", i, err) - return - } - providedListeners = append(providedListeners, l) - continue - } - - // If needed we can handle packetconns here. - savedErr = fmt.Errorf("Error getting provided socket fd %d: %v", i, err) - return - } - // now handle some additional systemd provided things notifySocketAddr = os.Getenv(notifySocketEnv) if notifySocketAddr != "" { @@ -127,6 +87,43 @@ func getProvidedFDs() (savedErr error) { } else { log.Trace("No Systemd Notify Socket provided") } + + if numFDs := os.Getenv(listenFDsEnv); numFDs != "" { + n, err := strconv.Atoi(numFDs) + if err != nil { + savedErr = fmt.Errorf("%s is not a number: %s. Err: %v", listenFDsEnv, numFDs, err) + return + } + + fdsToUnlinkStr := strings.Split(os.Getenv(unlinkFDsEnv), ",") + providedListenersToUnlink = make([]bool, n) + for _, fdStr := range fdsToUnlinkStr { + i, err := strconv.Atoi(fdStr) + if err != nil || i < 0 || i >= n { + continue + } + providedListenersToUnlink[i] = true + } + + for i := startFD; i < n+startFD; i++ { + file := os.NewFile(uintptr(i), fmt.Sprintf("listener_FD%d", i)) + + l, err := net.FileListener(file) + if err == nil { + // Close the inherited file if it's a listener + if err = file.Close(); err != nil { + savedErr = fmt.Errorf("error closing provided socket fd %d: %s", i, err) + return + } + providedListeners = append(providedListeners, l) + continue + } + + // If needed we can handle packetconns here. + savedErr = fmt.Errorf("Error getting provided socket fd %d: %v", i, err) + return + } + } }) return savedErr } From eb6fae29a61babfc9c57df3c32245e70b028d4d2 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 10 Oct 2022 18:54:22 +0100 Subject: [PATCH 5/7] move notify to cleanup routine Signed-off-by: Andrew Thornton --- modules/graceful/manager_unix.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index 8538ea2c9c4bc..87e79dce3cebb 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -123,12 +123,12 @@ func (g *Manager) start(ctx context.Context) { // Ignore the error here there's not much we can do with it // They're logged in the CloseProvidedListeners function _ = CloseProvidedListeners() + g.notify(readyMsg) }() if setting.StartupTimeout > 0 { go func() { select { case <-startupDone: - g.notify(readyMsg) return case <-g.IsShutdown(): func() { From d0c499ddb9d40fa8147214065b5ac1615fb3d14c Mon Sep 17 00:00:00 2001 From: zeripath Date: Mon, 10 Oct 2022 19:00:59 +0100 Subject: [PATCH 6/7] Update modules/graceful/manager_unix.go --- modules/graceful/manager_unix.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/graceful/manager_unix.go b/modules/graceful/manager_unix.go index 87e79dce3cebb..4087f2ccba31c 100644 --- a/modules/graceful/manager_unix.go +++ b/modules/graceful/manager_unix.go @@ -265,7 +265,7 @@ func (g *Manager) DoGracefulShutdown() { g.notify(stoppingMsg) } else { g.lock.Unlock() - g.notify(statusMsg("shutting down after fork")) + g.notify(statusMsg("Shutting down after fork")) } g.doShutdown() } From db2f35d3026d9b3f8b3ab8ff1c2383d28c179404 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 19 Oct 2022 20:48:38 +0100 Subject: [PATCH 7/7] Add WatchdogSec to gitea service Signed-off-by: Andrew Thornton --- contrib/systemd/gitea.service | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/systemd/gitea.service b/contrib/systemd/gitea.service index 57851012de472..c097fb0d17632 100644 --- a/contrib/systemd/gitea.service +++ b/contrib/systemd/gitea.service @@ -62,6 +62,7 @@ WorkingDirectory=/var/lib/gitea/ ExecStart=/usr/local/bin/gitea web --config /etc/gitea/app.ini Restart=always Environment=USER=git HOME=/home/git GITEA_WORK_DIR=/var/lib/gitea +WatchdogSec=30s # If you install Git to directory prefix other than default PATH (which happens # for example if you install other versions of Git side-to-side with # distribution version), uncomment below line and add that prefix to PATH