From 70f396d060e9135d16534286b5fc8e6e733bfa2d Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Thu, 2 Oct 2025 17:31:10 +0200 Subject: [PATCH 01/15] upssched: introduce passing of NOTIFYMSG to CMDSCRIPT [#3105] Signed-off-by: Jim Klimov --- NEWS.adoc | 3 + clients/upssched-cmd | 4 +- clients/upssched.c | 172 +++++++++++++++++++++++++++-------- conf/upssched.conf.sample.in | 11 ++- docs/man/upssched.conf.txt | 10 +- docs/man/upssched.txt | 14 ++- scripts/misc/notifyme-debug | 2 +- 7 files changed, 165 insertions(+), 51 deletions(-) diff --git a/NEWS.adoc b/NEWS.adoc index d62589886f..a9d8b34bae 100644 --- a/NEWS.adoc +++ b/NEWS.adoc @@ -120,6 +120,9 @@ https://github.com/networkupstools/nut/milestone/12 * Introduced `upssched -l` mode to list currently tracked timers. [#3097] * Make use of `setproctag()` and `getproctag()` to report parent/child process names. [#3084] + * Introduced optional passing of `NOTIFYMSG` text (normally originating + from `upsmon` which calls `upssched`) as an environment variable into + the ultimately executed `CMDSCRIPT` processes. [#3105] - `configure` script options: * Introduced `--with-python{,2,3}-modules-dir` to specify PyNUT(Client) diff --git a/clients/upssched-cmd b/clients/upssched-cmd index 160f50584c..59ad5236d3 100755 --- a/clients/upssched-cmd +++ b/clients/upssched-cmd @@ -17,8 +17,8 @@ echo "`date -u`: $0: THIS IS A SAMPLE SCRIPT, PLEASE TAILOR IT FOR YOUR DEPLOYMENT OF NUT!" >&2 logger -t upssched-cmd "THIS IS A SAMPLE SCRIPT, PLEASE TAILOR IT FOR YOUR DEPLOYMENT OF NUT!" -printf "`date -u`: UPSNAME='%s'\tNOTIFYTYPE='%s'\targs=%s\n" "$UPSNAME" "$NOTIFYTYPE" "$@" >&2 -printf "UPSNAME='%s' NOTIFYTYPE='%s' args=%s\n" "$UPSNAME" "$NOTIFYTYPE" "$@" | logger -t upssched-cmd-received-NOTIFYTYPE +printf "`date -u`: UPSNAME='%s'\tNOTIFYTYPE='%s'\tNOTIFYMSG='%s'\targs=%s\n" "$UPSNAME" "$NOTIFYTYPE" "$NOTIFYMSG" "$*" >&2 +printf "UPSNAME='%s' NOTIFYTYPE='%s' NOTIFYMSG='%s' args=%s\n" "$UPSNAME" "$NOTIFYTYPE" "$NOTIFYMSG" "$*" | logger -t upssched-cmd-received-NOTIFYTYPE #set diff --git a/clients/upssched.c b/clients/upssched.c index d458315e22..bcef61b7f0 100644 --- a/clients/upssched.c +++ b/clients/upssched.c @@ -28,6 +28,8 @@ /* design notes for the curious: * * 1. we get called with a ups_name and notify_type from upsmon + * (and notify_msg via first non-option argv[] element if + * present and not trivial) * 2. the config file is searched for an AT condition that matches * 3. the conditions on any matching lines are parsed * @@ -73,6 +75,7 @@ typedef struct ttype_s { time_t etime; char **upsnames; /* List of unique UPSNAME values that commanded to start this timer name */ char **notifytypes; /* List of unique NOTIFYTYPE values that commanded to start this timer name */ + char **notifymsgs; /* List of unique NOTIFYMSG values that commanded to start this timer name */ struct ttype_s *next; } ttype_t; @@ -83,7 +86,7 @@ static int nut_debug_level_args = 0, nut_debug_level_env = 0, nut_debug_level_co static int list_timers = 0; /* ups name and notify type (string) as received from upsmon */ -static const char *ups_name, *notify_type, *prog = NULL; +static const char *ups_name = NULL, *notify_type = NULL, *notify_msg = NULL, *prog = NULL; #ifdef WIN32 static OVERLAPPED connect_overlapped; @@ -221,8 +224,8 @@ static char* collect_string(char **string_arr, char *logtag, char *sep, size_t * static void exec_cmd_timer(ttype_t *item) { - char *upsnames = NULL, *notifytypes = NULL; - size_t upsnames_count = 0, notifytypes_count = 0; + char *upsnames = NULL, *notifytypes = NULL, *notifymsgs = NULL; + size_t upsnames_count = 0, notifytypes_count = 0, notifymsgs_count = 0; if (!item || !item->name || !(*(item->name))) { upsdebugx(1, "%s: SKIP bad call with null arg or its command name", __func__); @@ -238,15 +241,22 @@ static void exec_cmd_timer(ttype_t *item) notifytypes = collect_string(item->notifytypes, "NOTIFYTYPE", ",", NULL, ¬ifytypes_count); } + if (item->notifymsgs && *(item->notifymsgs) && **(item->notifymsgs)) { + notifymsgs = collect_string(item->notifymsgs, "NOTIFYMSG", ".\t", NULL, ¬ifymsgs_count); + } + if (upsnames) setenv("UPSNAME", upsnames, 1); if (notifytypes) setenv("NOTIFYTYPE", notifytypes, 1); + if (notifymsgs) + setenv("NOTIFYMSG", notifymsgs, 1); + if (nut_debug_level) - upslogx(LOG_INFO, "Executing command by timer: %s\t[%s]\t[%s]", - item->name, NUT_STRARG(notifytypes), NUT_STRARG(upsnames)); + upslogx(LOG_INFO, "Executing command by timer: %s\t[%s]\t[%s]\t[%s]", + item->name, NUT_STRARG(notifytypes), NUT_STRARG(upsnames), NUT_STRARG(notifymsgs)); exec_cmd(item->name); upsdebugx(3, "%s: returned from exec_cmd()", __func__); @@ -261,6 +271,11 @@ static void exec_cmd_timer(ttype_t *item) free(notifytypes); } + if (notifymsgs) { + unsetenv("NOTIFYMSG"); + free(notifymsgs); + } + upsdebugx(3, "%s: done", __func__); } @@ -295,6 +310,14 @@ static void removetimer(ttype_t *tfind) free(tmp->notifytypes); } + if (tmp->notifymsgs) { + char **ps; + for (ps = tmp->notifymsgs; ps != NULL && *ps != NULL; ps++) { + free(*ps); + } + free(tmp->notifymsgs); + } + upsdebugx(3, "%s: forgetting %s", __func__, tmp->name); free(tmp->name); free(tmp); @@ -367,7 +390,7 @@ static void checktimers(void) upsdebugx(3, "%s: done", __func__); } -static void start_timer(const char *name, const char *ofsstr, const char *notifytype, const char *upsname, int shared_timer) +static void start_timer(const char *name, const char *ofsstr, const char *notifytype, const char *upsname, const char *notifymsg, int shared_timer) { time_t now; long ofs; @@ -393,8 +416,8 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify while (tmp) { if (tmp->name && !strcmp(tmp->name, name)) { if (nut_debug_level) - upslogx(LOG_INFO, "Append data to shared timer: %s\t[%s]\t[%s]\t(will elapse in %g seconds)", - name, NUT_STRARG(notifytype), NUT_STRARG(upsname), + upslogx(LOG_INFO, "Append data to shared timer: %s\t[%s]\t[%s]\t[%s]\t(will elapse in %g seconds)", + name, NUT_STRARG(notifytype), NUT_STRARG(upsname), NUT_STRARG(notifymsg), difftime(tmp->etime, now)); /* FIXME? Consider only the first hit as the shared timer? @@ -423,6 +446,29 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify } } + if (notifymsg && *notifymsg) { + if (tmp->notifymsgs) { + char **ps = NULL; + size_t count = 0; /* amount of non-NULL entries, if we get to the end */ + + for (ps = tmp->notifymsgs; ps != NULL && *ps != NULL ; ps++) { + count++; + if (!strcmp(*ps, notifymsg)) + break; + } + + if (ps == NULL || *ps == NULL) { + tmp->notifymsgs = xrealloc(tmp->notifymsgs, count + 2); + tmp->notifymsgs[count] = xstrdup(notifymsg); + tmp->notifymsgs[count + 1] = NULL; + } + } else { + tmp->notifymsgs = xcalloc(2, sizeof(char*)); + tmp->notifymsgs[0] = xstrdup(notifymsg); + tmp->notifymsgs[1] = NULL; + } + } + if (upsname && *upsname) { if (tmp->upsnames) { char **ps = NULL; @@ -456,8 +502,8 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify } if (nut_debug_level) - upslogx(LOG_INFO, "New timer: %s\t[%s]\t[%s]\t(will elapse in %ld seconds)", - name, NUT_STRARG(notifytype), NUT_STRARG(upsname), ofs); + upslogx(LOG_INFO, "New timer: %s\t[%s]\t[%s]\t[%s]\t(will elapse in %ld seconds)", + name, NUT_STRARG(notifytype), NUT_STRARG(upsname), NUT_STRARG(notifymsg), ofs); /* now add to the queue */ if (!shared_timer) { @@ -473,6 +519,7 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify tmp->name = xstrdup(name); tmp->etime = now + ofs; tmp->notifytypes = NULL; + tmp->notifymsgs = NULL; tmp->upsnames = NULL; tmp->next = NULL; @@ -482,6 +529,12 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify tmp->notifytypes[1] = NULL; } + if (notifymsg && *notifymsg) { + tmp->notifymsgs = xcalloc(2, sizeof(char*)); + tmp->notifymsgs[0] = xstrdup(notifymsg); + tmp->notifymsgs[1] = NULL; + } + if (upsname && *upsname) { tmp->upsnames = xcalloc(2, sizeof(char*)); tmp->upsnames[0] = xstrdup(upsname); @@ -494,7 +547,7 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify thead = tmp; } -static void cancel_timer(const char *name, const char *cname, const char *notifytype, const char *upsname, int do_cancel_matched) +static void cancel_timer(const char *name, const char *cname, const char *notifytype, const char *upsname, const char *notifymsg, int do_cancel_matched) { ttype_t *tmp; size_t removed = 0; @@ -505,6 +558,7 @@ static void cancel_timer(const char *name, const char *cname, const char *notify for (tmp = thead; tmp != NULL; tmp = tmp->next) { if (!strcmp(tmp->name, name)) { /* match */ + /* Note we do not match "notifymsg" as it likely differs */ if (!do_cancel_matched || ( (!notifytype || !(*notifytype)) && (!upsname || !(*upsname)) ) @@ -553,8 +607,13 @@ static void cancel_timer(const char *name, const char *cname, const char *notify } } - if (nut_debug_level) - upslogx(LOG_INFO, "Cancelling timer: %s", name); + if (nut_debug_level) { + if (notifymsg && *notifymsg) { + upslogx(LOG_INFO, "Cancelling timer: %s: %s", name, notifymsg); + } else { + upslogx(LOG_INFO, "Cancelling timer: %s", name); + } + } removetimer(tmp); removed++; @@ -969,7 +1028,7 @@ static int sock_arg(conn_t *conn) /* LIST-TIMERS (no args expected now) * returns a list with tab-separated values for: - * NAME TO_ABS TO_REL NOTIFYTYPES UPSNAMES + * NAME TO_ABS TO_REL NOTIFYTYPES UPSNAMES NOTIFYMSGS_TABSEP */ if (!strcmp(conn->ctx.arglist[0], "LIST-TIMERS")) { ttype_t *item = thead; @@ -1003,6 +1062,20 @@ static int sock_arg(conn_t *conn) s = collect_string(item->upsnames, "UPSNAME", ",", NULL, NULL); } + if (s && *s) { + send_to_one(conn, "%s\t", s); + } else { + send_to_one(conn, "\"\"\t"); + } + if (s) { + free(s); + } + + s = NULL; + if (item->notifymsgs && *(item->notifymsgs) && **(item->notifymsgs)) { + s = collect_string(item->notifymsgs, "NOTIFYMSG", ".\t", NULL, NULL); + } + if (s && *s) { send_to_one(conn, "%s\n", s); } else { @@ -1020,7 +1093,7 @@ static int sock_arg(conn_t *conn) return 1; } - /* CANCEL [] [ [] [ ] */ { /* scoping */ int do_cancel = !strcmp(conn->ctx.arglist[0], "CANCEL"), do_cancel_matched = !strcmp(conn->ctx.arglist[0], "CANCEL-MATCHED"); @@ -1028,34 +1101,39 @@ static int sock_arg(conn_t *conn) if (do_cancel || do_cancel_matched) { /* "cmd" may be present and empty, this is handled in the method */ if (conn->ctx.numargs < 3) - cancel_timer(conn->ctx.arglist[1], NULL, NULL, NULL, do_cancel_matched); + cancel_timer(conn->ctx.arglist[1], NULL, + NULL, NULL, NULL, do_cancel_matched); else - if (conn->ctx.numargs < 5) - cancel_timer(conn->ctx.arglist[1], conn->ctx.arglist[2], NULL, NULL, do_cancel_matched); + if (conn->ctx.numargs < 6) + cancel_timer(conn->ctx.arglist[1], conn->ctx.arglist[2], + NULL, NULL, NULL, do_cancel_matched); else cancel_timer(conn->ctx.arglist[1], conn->ctx.arglist[2], - conn->ctx.arglist[3], conn->ctx.arglist[4], do_cancel_matched); + conn->ctx.arglist[3], conn->ctx.arglist[4], + conn->ctx.arglist[5], do_cancel_matched); send_to_one(conn, "OK\n"); return 1; } } - if (conn->ctx.numargs < 5) + if (conn->ctx.numargs < 6) return 0; - /* START */ + /* START */ if (!strcmp(conn->ctx.arglist[0], "START")) { start_timer(conn->ctx.arglist[1], conn->ctx.arglist[2], - conn->ctx.arglist[3], conn->ctx.arglist[4], 0); + conn->ctx.arglist[3], conn->ctx.arglist[4], + conn->ctx.arglist[5], 0); send_to_one(conn, "OK\n"); return 1; } - /* START-SHARED */ + /* START-SHARED */ if (!strcmp(conn->ctx.arglist[0], "START-SHARED")) { start_timer(conn->ctx.arglist[1], conn->ctx.arglist[2], - conn->ctx.arglist[3], conn->ctx.arglist[4], 1); + conn->ctx.arglist[3], conn->ctx.arglist[4], + conn->ctx.arglist[5], 1); send_to_one(conn, "OK\n"); return 1; } @@ -1293,6 +1371,7 @@ static void start_daemon(TYPE_FD lockfd) * CMDSCRIPT to run */ unsetenv("NOTIFYTYPE"); unsetenv("UPSNAME"); + unsetenv("NOTIFYMSG"); /* now watch for activity */ upsdebugx(2, "Timer daemon waiting for connections on pipefd %d", @@ -1404,6 +1483,7 @@ static void start_daemon(TYPE_FD lockfd) * CMDSCRIPT to run */ unsetenv("NOTIFYTYPE"); unsetenv("UPSNAME"); + unsetenv("NOTIFYMSG"); /* now watch for activity */ @@ -1587,7 +1667,7 @@ static void sendcmd(const char *cmd, const char *arg1, const char *arg2) int i; ssize_t ret; size_t enclen, buflen; - char buf[SMALLBUF], enc[SMALLBUF + 8]; + char buf[LARGEBUF], enc[LARGEBUF + 8]; #ifndef WIN32 int ret_s; struct timeval tv; @@ -1621,6 +1701,9 @@ static void sendcmd(const char *cmd, const char *arg1, const char *arg2) snprintfcat(buf, sizeof(buf), " \"%s\"", ups_name? pconf_encode(ups_name, enc, sizeof(enc)) : ""); + snprintfcat(buf, sizeof(buf), " \"%s\"", + notify_msg ? pconf_encode(notify_msg, enc, sizeof(enc)) : ""); + snprintf(enc, sizeof(enc), "%s\n", buf); /* Sanity checks, for static analyzers to sleep well */ @@ -1895,33 +1978,37 @@ static void parse_at(const char *ntype, const char *un, const char *cmd, /* if command is valid, send it to the daemon (which may start it) */ if (!strcmp(cmd, "START-TIMER")) { - upsdebugx(1, "%s: processing %s\t[%s]\t[%s]\t[%s]\t[%s]", __func__, cmd, + upsdebugx(1, "%s: processing %s\t[%s]\t[%s]\t[%s]\t[%s]\t[%s]", __func__, cmd, NUT_STRARG(ca1), NUT_STRARG(ca2), - NUT_STRARG(notify_type), NUT_STRARG(ups_name)); + NUT_STRARG(notify_type), NUT_STRARG(ups_name), + NUT_STRARG(notify_msg)); sendcmd("START", ca1, ca2); return; } if (!strcmp(cmd, "START-TIMER-SHARED")) { - upsdebugx(1, "%s: processing %s\t[%s]\t[%s]\t[%s]\t[%s]", __func__, cmd, + upsdebugx(1, "%s: processing %s\t[%s]\t[%s]\t[%s]\t[%s]\t[%s]", __func__, cmd, NUT_STRARG(ca1), NUT_STRARG(ca2), - NUT_STRARG(notify_type), NUT_STRARG(ups_name)); + NUT_STRARG(notify_type), NUT_STRARG(ups_name), + NUT_STRARG(notify_msg)); sendcmd("START-SHARED", ca1, ca2); return; } if (!strcmp(cmd, "CANCEL-TIMER")) { - upsdebugx(1, "%s: processing %s\t[%s]\t[%s]\t[%s]\t[%s]", __func__, cmd, + upsdebugx(1, "%s: processing %s\t[%s]\t[%s]\t[%s]\t[%s]\t[%s]", __func__, cmd, NUT_STRARG(ca1), NUT_STRARG(ca2), - NUT_STRARG(notify_type), NUT_STRARG(ups_name)); + NUT_STRARG(notify_type), NUT_STRARG(ups_name), + NUT_STRARG(notify_msg)); sendcmd("CANCEL", ca1, ca2); return; } if (!strcmp(cmd, "EXECUTE")) { - upsdebugx(1, "%s: processing %s\t[%s]\t[%s]\t[%s]\t[%s]", __func__, cmd, + upsdebugx(1, "%s: processing %s\t[%s]\t[%s]\t[%s]\t[%s]\t[%s]", __func__, cmd, NUT_STRARG(ca1), NUT_STRARG(ca2), - NUT_STRARG(notify_type), NUT_STRARG(ups_name)); + NUT_STRARG(notify_type), NUT_STRARG(ups_name), + NUT_STRARG(notify_msg)); if (ca1[0] == '\0') { upslogx(LOG_ERR, "Empty EXECUTE command argument"); @@ -1935,9 +2022,10 @@ static void parse_at(const char *ntype, const char *un, const char *cmd, return; } - upslogx(LOG_ERR, "Invalid command: %s\t[%s]\t[%s]\t[%s]\t[%s]", cmd, + upslogx(LOG_ERR, "Invalid command: %s\t[%s]\t[%s]\t[%s]\t[%s]\t[%s]", cmd, NUT_STRARG(ca1), NUT_STRARG(ca2), - NUT_STRARG(notify_type), NUT_STRARG(ups_name)); + NUT_STRARG(notify_type), NUT_STRARG(ups_name), + NUT_STRARG(notify_msg)); } static int conf_arg(size_t numargs, char **arg) @@ -2085,7 +2173,7 @@ static void help(const char *arg_progname) printf("upssched: upsmon's scheduling helper for offset timers\n"); printf("Practical behavior is managed by UPSNAME and NOTIFYTYPE envvars\n"); - printf("\nUsage: %s [OPTIONS]\n\n", arg_progname); + printf("\nUsage: %s [OPTIONS] [NOTIFYMSG]\n\n", arg_progname); printf(" -D raise debugging level (NOTE: keeps reporting when daemonized)\n"); printf(" -V display the version of this software\n"); printf(" -h display this help\n"); @@ -2100,7 +2188,7 @@ static void help(const char *arg_progname) int main(int argc, char **argv) { - int i; + int i, argn = 0; if (argc > 0) prog = xbasename(argv[0]); @@ -2108,6 +2196,7 @@ int main(int argc, char **argv) prog = "upssched"; while ((i = getopt(argc, argv, "+DVhl")) != -1) { + argn++; switch (i) { case 'D': nut_debug_level_args++; @@ -2153,6 +2242,9 @@ int main(int argc, char **argv) ups_name = getenv("UPSNAME"); notify_type = getenv("NOTIFYTYPE"); + upsdebugx(2, "Remaining argn=%d of argc=%d", argn, argc); + if (argc > argn + 1 && *argv[argn + 1]) + notify_msg = argv[argn + 1]; if ((!list_timers) && ((!ups_name) || (!notify_type))) { printf("Error: environment variables UPSNAME and NOTIFYTYPE must be set.\n"); @@ -2160,6 +2252,12 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } + upsdebugx(1, "Handling NOTIFYTYPE='%s' for UPSNAME='%s'", notify_type, ups_name); + if (notify_msg) + upsdebugx(1, "Got a NOTIFYMSG from command line: %s", notify_msg); + else + upsdebugx(1, "Did not get any NOTIFYMSG from command line"); + /* see if this matches anything in the config file */ /* This is actually the processing loop: * checkconf -> conf_arg -> parse_at -> sendcmd -> daemon if needed diff --git a/conf/upssched.conf.sample.in b/conf/upssched.conf.sample.in index 1d2961a1ce..bfb9b99f68 100644 --- a/conf/upssched.conf.sample.in +++ b/conf/upssched.conf.sample.in @@ -118,10 +118,10 @@ CMDSCRIPT @BINDIR@/upssched-cmd # Start a timer called that will trigger after # seconds, calling your CMDSCRIPT with as the first # argument. Each invocation checks if the was already -# started, and if so -- appends the current event's `UPSNAME` and -# `NOTIFYTYPE` to the list of unique values it would report via -# environment variables (as a comma-separated string) when the -# timer does execute. +# started, and if so -- appends the current event's `UPSNAME`, +# `NOTIFYTYPE` and `NOTIFYMSG` to the list of unique values it would +# report via environment variables (as a comma-separated string) when +# the timer does execute. # # NOTE: Currently this updates the first seen instance with the # (in case you managed to start many). @@ -156,7 +156,8 @@ CMDSCRIPT @BINDIR@/upssched-cmd # - CANCEL-TIMER-MATCHED [cmd] # # Similar to the above, but tries to only cancel the if it -# refers to the `UPSNAME` and `NOTIFYTYPE` values passed by caller. +# refers to the `UPSNAME` and `NOTIFYTYPE` values passed by caller (the +# `NOTIFYMSG` is ignored in this context). # # 1) If any UPS (*) reverts to utility power, then stop the timer before it # triggers ONLY if that UPS is associated with the already scheduled timer: diff --git a/docs/man/upssched.conf.txt b/docs/man/upssched.conf.txt index 33d3b2ca57..d9992e9853 100644 --- a/docs/man/upssched.conf.txt +++ b/docs/man/upssched.conf.txt @@ -98,10 +98,11 @@ gone for 10 seconds Start a timer of 'interval' seconds. When it triggers, it will pass the argument 'timername' as an argument to your CMDSCRIPT. Each invocation checks if the 'timername' was already -started, and if so -- appends the current event's `UPSNAME` and -`NOTIFYTYPE` to the list of unique values it would report via -environment variables (as a comma-separated string) when the -timer does execute. +started, and if so -- appends the current event's `UPSNAME`, +`NOTIFYTYPE` and `NOTIFYMSG` to the list of unique values it +would report via environment variables (as a comma-separated +string for `UPSNAME` and `NOTIFYTYPE`, and tab-separated +sentences for `NOTIFYMSG`) when the timer does execute. + NOTE: Currently this updates the first seen instance with the 'timername' (in case you managed to start many). @@ -131,6 +132,7 @@ stop the timer before it triggers *CANCEL-TIMER-MATCHED* 'timername' ['cmd'];; Similar to the above, but tries to only cancel the 'timername' if it refers to the `UPSNAME` and `NOTIFYTYPE` values passed by caller. +The `NOTIFYMSG` is ignored in this context. + Example: + diff --git a/docs/man/upssched.txt b/docs/man/upssched.txt index ef8ebd0bd9..2849965faa 100644 --- a/docs/man/upssched.txt +++ b/docs/man/upssched.txt @@ -9,7 +9,7 @@ upssched - Timer helper for scheduling events from upsmon SYNOPSIS -------- -*upssched* +*upssched* [OPTIONS] [NOTIFYMSG] NOTE: *upssched* should be run from linkman:upsmon[8] via the NOTIFYCMD. You should never run it directly during normal operations. @@ -17,7 +17,8 @@ You should never run it directly during normal operations. *upssched* -l List currently tracked timer events, if any. Report as a TAB-separated -table of: 'NAME', 'TIMEOUT_ABS', 'TIMEOUT_REL', 'NOTIFYTYPE', 'UPSNAME'. +table of: 'NAME', 'TIMEOUT_ABS', 'TIMEOUT_REL', 'NOTIFYTYPE', 'UPSNAME', +`NOTIFYMSG`. DESCRIPTION ----------- @@ -27,6 +28,15 @@ relative to events being monitored by linkman:upsmon[8]. The original purpose was to allow for a shutdown to occur after some fixed period on battery, but there are other uses that are possible. +OPTIONS +------- + +*NOTIFYMSG*:: +Optionally pass a text message (typically originates from linkman:upsmon[8] +call to `upssched` as its `NOTIFYCMD`) as an environment variable named +`NOTIFYMSG` to the `CMDSCRIPT` launched by `upssched` immediately or after +a timer expires. + INTEGRATION ----------- diff --git a/scripts/misc/notifyme-debug b/scripts/misc/notifyme-debug index ea1ad37b09..550e62e4f2 100755 --- a/scripts/misc/notifyme-debug +++ b/scripts/misc/notifyme-debug @@ -24,5 +24,5 @@ if [ -n "${TOP_BUILDDIR}" -a -x "${TOP_BUILDDIR}/clients/upssched" ] ; then if [ "${NUT_DEBUG_LEVEL-}" -gt 0 ] 2>/dev/null ; then printf '%s: %s\t%s\t[%s]:\targs: %s\t(%s arg tokens)\n' "`date -u`" "$0" "${NOTIFYTYPE-}" "${UPSNAME-}" "$*" "$#" >&2 fi - "${TOP_BUILDDIR}/clients/upssched" + "${TOP_BUILDDIR}/clients/upssched" "$@" fi From 595be136871b166831c3fd1e8bfad95ac5afda3e Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Thu, 2 Oct 2025 17:33:04 +0200 Subject: [PATCH 02/15] docs/man/upssched.txt: list common and unique options Signed-off-by: Jim Klimov --- docs/man/upssched.txt | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/man/upssched.txt b/docs/man/upssched.txt index 2849965faa..face7658f0 100644 --- a/docs/man/upssched.txt +++ b/docs/man/upssched.txt @@ -28,9 +28,30 @@ relative to events being monitored by linkman:upsmon[8]. The original purpose was to allow for a shutdown to occur after some fixed period on battery, but there are other uses that are possible. +COMMON OPTIONS +-------------- + +*-h*:: +Show the command-line help message. + +*-V*:: +Show NUT version banner. More details may be available if you also +`export NUT_DEBUG_LEVEL=1` or greater verbosity level. + +*-D*:: +Raise the debugging level. Use this option multiple times for more details. + OPTIONS ------- +By default `upssched` processes its configuration file and executes or queues +calls to its `CMDSCRIPT`, or cancels some previously queued item(s), based on +configuration and the `NOTIFYTYPE` it receives. One exception to this is the +queue listing mode `-l`. + +*-l*:: +List pending timers (if any) and exit. + *NOTIFYMSG*:: Optionally pass a text message (typically originates from linkman:upsmon[8] call to `upssched` as its `NOTIFYCMD`) as an environment variable named From 929daa26495ca2fdc6738b434cec8706f1409cac Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Thu, 2 Oct 2025 17:47:57 +0200 Subject: [PATCH 03/15] clients/upsmon.c: add debug logging to NOTIFYCMD (WIN32) [#3097] Signed-off-by: Jim Klimov --- clients/upsmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clients/upsmon.c b/clients/upsmon.c index 8a862f3307..378a382ef6 100644 --- a/clients/upsmon.c +++ b/clients/upsmon.c @@ -280,6 +280,7 @@ static unsigned __stdcall async_notify(LPVOID param) if (notifycmd != NULL) { snprintf(exec, sizeof(exec), "%s \"%s\"", notifycmd, data->notice); + upsdebugx(6, "%s: Calling NOTIFYCMD: %s", __func__, exec); if (data->upsname) setenv("UPSNAME", data->upsname, 1); else From 6900725c15c56f220dd8834cc449b3e9232bed5c Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Thu, 2 Oct 2025 17:56:53 +0200 Subject: [PATCH 04/15] tests/NIT/upssched.conf.in: rename parameters passed to "EXECUTE" methods, for less confusing dev-testing logs [#3105] Signed-off-by: Jim Klimov --- tests/NIT/upssched.conf.in | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/NIT/upssched.conf.in b/tests/NIT/upssched.conf.in index abf5129f48..5bc3a821a1 100644 --- a/tests/NIT/upssched.conf.in +++ b/tests/NIT/upssched.conf.in @@ -15,7 +15,7 @@ LOCKFN @NUT_STATEPATH@/upssched.lock # ============================================================================ # info2client -AT ONLINE * EXECUTE ONLINE +AT ONLINE * EXECUTE ONLINE-HANDLER # info2admin only AT ONLINE * CANCEL-TIMER BATT-STATUS-1s AT ONLINE * CANCEL-TIMER BATT-STATUS-2s @@ -41,7 +41,7 @@ AT ONLINE * START-TIMER LINE-STATUS-30 1800 AT ONLINE * START-TIMER LINE-STATUS-60 3600 # # info2client -AT ONBATT * EXECUTE ONBATT +AT ONBATT * EXECUTE ONBATT-HANDLER # info2admin only AT ONBATT * CANCEL-TIMER LINE-STATUS-1s AT ONBATT * CANCEL-TIMER LINE-STATUS-2s @@ -70,10 +70,10 @@ AT ONBATT * START-TIMER BATT-STATUS-60 3600 ######################### # info2client -AT REPLBATT * ONBATT * EXECUTE REPLBATT -AT NOCOMM * EXECUTE NOCOMM -AT FSD * EXECUTE FSD -AT SHUTDOWN * EXECUTE SHUTDOWN +AT REPLBATT * ONBATT * EXECUTE REPLBATT-HANDLER +AT NOCOMM * EXECUTE NOCOMM-HANDLER +AT FSD * EXECUTE FSD-HANDLER +AT SHUTDOWN * EXECUTE SHUTDOWN-HANDLER # info2admin only AT LOWBATT * EXECUTE LOWBATT-INFO From 4fed2e3a18bd1c94c71b09dafa2135615a3e0220 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Thu, 2 Oct 2025 18:04:50 +0200 Subject: [PATCH 05/15] clients/upssched.c: simplify loop processing [#3097] Signed-off-by: Jim Klimov --- clients/upssched.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clients/upssched.c b/clients/upssched.c index bcef61b7f0..b01787ae52 100644 --- a/clients/upssched.c +++ b/clients/upssched.c @@ -296,7 +296,7 @@ static void removetimer(ttype_t *tfind) if (tmp->upsnames) { char **ps; - for (ps = tmp->upsnames; ps != NULL && *ps != NULL; ps++) { + for (ps = tmp->upsnames; *ps != NULL; ps++) { free(*ps); } free(tmp->upsnames); @@ -304,7 +304,7 @@ static void removetimer(ttype_t *tfind) if (tmp->notifytypes) { char **ps; - for (ps = tmp->notifytypes; ps != NULL && *ps != NULL; ps++) { + for (ps = tmp->notifytypes; *ps != NULL; ps++) { free(*ps); } free(tmp->notifytypes); @@ -312,7 +312,7 @@ static void removetimer(ttype_t *tfind) if (tmp->notifymsgs) { char **ps; - for (ps = tmp->notifymsgs; ps != NULL && *ps != NULL; ps++) { + for (ps = tmp->notifymsgs; *ps != NULL; ps++) { free(*ps); } free(tmp->notifymsgs); @@ -428,7 +428,7 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify char **ps = NULL; size_t count = 0; /* amount of non-NULL entries, if we get to the end */ - for (ps = tmp->notifytypes; ps != NULL && *ps != NULL ; ps++) { + for (ps = tmp->notifytypes; *ps != NULL ; ps++) { count++; if (!strcmp(*ps, notifytype)) break; @@ -451,7 +451,7 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify char **ps = NULL; size_t count = 0; /* amount of non-NULL entries, if we get to the end */ - for (ps = tmp->notifymsgs; ps != NULL && *ps != NULL ; ps++) { + for (ps = tmp->notifymsgs; *ps != NULL ; ps++) { count++; if (!strcmp(*ps, notifymsg)) break; @@ -474,7 +474,7 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify char **ps = NULL; size_t count = 0; /* amount of non-NULL entries, if we get to the end */ - for (ps = tmp->upsnames; ps != NULL && *ps != NULL ; ps++) { + for (ps = tmp->upsnames; *ps != NULL ; ps++) { count++; if (!strcmp(*ps, upsname)) break; @@ -576,7 +576,7 @@ static void cancel_timer(const char *name, const char *cname, const char *notify upsdebugx(2, "%s: do not cancel timer %s due to lack of NOTIFYTYPE in it", __func__, name); continue; } - for (ps = tmp->notifytypes; ps != NULL && *ps != NULL ; ps++) { + for (ps = tmp->notifytypes; *ps != NULL ; ps++) { if (!strcmp(*ps, notifytype)) { matched = 1; break; @@ -594,7 +594,7 @@ static void cancel_timer(const char *name, const char *cname, const char *notify upsdebugx(2, "%s: do not cancel timer %s due to lack of UPSNAME in it", __func__, name); continue; } - for (ps = tmp->upsnames; ps != NULL && *ps != NULL ; ps++) { + for (ps = tmp->upsnames; *ps != NULL ; ps++) { if (!strcmp(*ps, upsname)) { matched = 1; break; From b5528ad261feba9c5697071b3a787bf330bbae9b Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Mon, 6 Oct 2025 22:36:34 +0200 Subject: [PATCH 06/15] clients/upsmon.{c,h}, NEWS.adoc: Make sure `FSD` notifications are always issued at latest when shutdown handling just starts (or earlier) [#3003, #3110] Signed-off-by: Jim Klimov --- NEWS.adoc | 11 +++++++++++ clients/upsmon.c | 27 +++++++++++++++++++++++---- clients/upsmon.h | 4 +++- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/NEWS.adoc b/NEWS.adoc index a9d8b34bae..85b026a970 100644 --- a/NEWS.adoc +++ b/NEWS.adoc @@ -100,6 +100,17 @@ https://github.com/networkupstools/nut/milestone/12 * Introduced a `SHUTDOWN_HOSTSYNC` notification message, to report that the primary `upsmon` initiated the shutdown and has some secondaries to wait for first. [#3084] + * Make sure an `FSD` notification is issued for each UPS when this primary + `upsmon` instance sets it (and does not return to usual data processing + loop to see and report it like secondaries do). This allows a `NOTIFYCMD` + such as `upssched` on the primary to handle the pending power outage + (e.g. begin stopping heavy services) even while `upsmon` waits for the + secondaries to complete their shutdowns and log out of the `upsd` data + server. [issue #3003, PR #3110] ++ +NOTE: If using `upssched` and monitoring multiple UPSes, consider setting up +a `START-TIMER-SHARED` rule with a short (approx. 1 second) timeout to group +several `FSD` notifications into one executed action. [PR #3097] - `upssched` tool updates: * Previously in PR #2896 (NUT releases v2.8.3 and v2.8.4) the `UPSNAME` and diff --git a/clients/upsmon.c b/clients/upsmon.c index 378a382ef6..0d4f5d3c94 100644 --- a/clients/upsmon.c +++ b/clients/upsmon.c @@ -1133,8 +1133,20 @@ static void setfsd(utype_t *ups) return; } - if (!strncmp(buf, "OK", 2)) + if (!strncmp(buf, "OK", 2)) { + upsdebugx(1, "%s: data server confirmed setting FSD for UPS [%s]", __func__, ups->sys); + + /* Let NOTIFYCMD (if any) know, and have a chance to react */ + if (ups->lastfsdnotify) { + /* e.g. upsd was still alive with a latched FSD + * status when this upsmon instance started */ + upsdebugx(2, "%s: not notifying about FSD for UPS [%s] because it was recently reported already", __func__, ups->sys); + } else { + time(&(ups->lastfsdnotify)); + do_notify(ups, NOTIFY_FSD, NULL); + } return; + } /* protocol error: upsd said something other than "OK" */ upslogx(LOG_ERR, "FSD set on UPS %s failed: %s", ups->sys, buf); @@ -1843,9 +1855,12 @@ static void ups_fsd(utype_t *ups) upsdebugx(3, "%s: %s (first time)", __func__, ups->sys); - /* must have changed from !FSD to FSD, so notify */ + /* must have changed from !FSD to FSD, so notify; avoid duplicates though */ - do_notify(ups, NOTIFY_FSD, NULL); + if (!(ups->lastfsdnotify)) { + time(&(ups->lastfsdnotify)); + do_notify(ups, NOTIFY_FSD, NULL); + } setflag(&ups->status, ST_FSD); } @@ -2113,6 +2128,8 @@ static void addups(int reloading, const char *sys, const char *pvs, tmp->lastrbwarn = 0; tmp->lastncwarn = 0; + tmp->lastfsdnotify = 0; + tmp->offsince = 0; tmp->oblbsince = 0; tmp->oversince = 0; @@ -2868,8 +2885,10 @@ static void parse_status(utype_t *ups, char *status, char *buzzword, char *buzzw /* clear these out early if they disappear */ if (!strstr(status, "LB")) clearflag(&ups->status, ST_LOWBATT); - if (!strstr(status, "FSD")) + if (!strstr(status, "FSD")) { clearflag(&ups->status, ST_FSD); + ups->lastfsdnotify = 0; + } /* similar to above - clear these flags and send notifications */ if (!strstr(status, "CAL")) diff --git a/clients/upsmon.h b/clients/upsmon.h index 118f554a63..4007cb1b40 100644 --- a/clients/upsmon.h +++ b/clients/upsmon.h @@ -92,10 +92,12 @@ typedef struct { int pollfail_log_throttle_count; /* How many pollfreq loops this UPS was in this state since last logged report? */ time_t lastpoll; /* time of last successful poll */ - time_t lastnoncrit; /* time of last non-crit poll */ + time_t lastnoncrit; /* time of last non-crit poll */ time_t lastrbwarn; /* time of last REPLBATT warning*/ time_t lastncwarn; /* time of last NOCOMM warning */ + time_t lastfsdnotify; /* time of last FSD notification (when first discovering the state, or setting it - avoid duplicate notification); 0 initially or if that state clears */ + time_t offsince; /* time of recent entry into OFF state */ time_t oblbsince; /* time of recent entry into OB LB state (normally this causes immediate shutdown alert, unless we are configured to delay it) */ time_t oversince; /* time of recent entry into OVER state */ From 16ac6510a74c5adb740f4bcc891e94b13624f5d0 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Mon, 6 Oct 2025 23:00:50 +0200 Subject: [PATCH 07/15] docs/man/upssched.conf.txt, docs/man/upssched.txt: note that command execution is not async in upssched Signed-off-by: Jim Klimov --- docs/man/upssched.conf.txt | 7 +++++++ docs/man/upssched.txt | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/docs/man/upssched.conf.txt b/docs/man/upssched.conf.txt index d9992e9853..b0d38f04c8 100644 --- a/docs/man/upssched.conf.txt +++ b/docs/man/upssched.conf.txt @@ -18,6 +18,13 @@ IMPORTANT NOTES * Contents of this file should be pure ASCII (character codes not in range would be ignored with a warning message). +* Command execution is synchronous (with the called tool process in case + of `EXECUTE` directive, or with the timer process). Consider using your + system shell abilities like `&` to send long-duration handling to the + background and let `upssched` timer daemon continue. This should not + impact `upsmon` daemon, which handles each notification in a separate + sub-process (and so not a problem for immediate `EXECUTE` events). + CONFIGURATION DIRECTIVES ------------------------ diff --git a/docs/man/upssched.txt b/docs/man/upssched.txt index face7658f0..ae09850300 100644 --- a/docs/man/upssched.txt +++ b/docs/man/upssched.txt @@ -81,6 +81,14 @@ If you also want to continue writing to the syslog, just add it in: For a full list of notify flags, see the linkman:upsmon[8] documentation. +Please note that command execution is synchronous (with the called `upssched` +tool process in case of `EXECUTE` directive, or with the timer process). +Consider using your system shell abilities like `&` to send long-duration +handling to the background and let `upssched` timer daemon continue. +This should not impact `upsmon` daemon, which handles each notification +in a separate sub-process (and so not a problem for immediate `EXECUTE` +events). + CONFIGURATION ------------- From 355caae1bdd5849860d744affe54ecc3fa1a7e50 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Mon, 6 Oct 2025 23:08:35 +0200 Subject: [PATCH 08/15] docs/man/upssched.txt: suggest using FSD notification for early shutdown activity on the upsmon primary system [#3003] Signed-off-by: Jim Klimov --- docs/man/upssched.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/man/upssched.txt b/docs/man/upssched.txt index ae09850300..3b790fbb0f 100644 --- a/docs/man/upssched.txt +++ b/docs/man/upssched.txt @@ -104,6 +104,24 @@ to shut down the slaves in a controlled manner. Be sure you cancel the timer if power returns (ONLINE). +EARLY PREPARATION FOR A SHUTDOWN ON UPSMON PRIMARY INSTANCE +----------------------------------------------------------- + +The linkman:upsmon[8] primary instance is responsible for telling the UPS(es) +to power off at the end of emergency shutdown. As such, if there are several +clients, the primary instance raises an "FSD" (Forced Shut Down) flag on the +data server for each UPS it manages, and waits for secondary instances to log +off (or for a timeout to expire). If there are activities that should happen +on the primary upsmon's computer during shutdown which take a long time, you +can use the `FSD` notification to begin those operations while the primary +`upsmon` instance waits for the secondaries to complete their shutdowns. + +If you have several UPSes, you may want to combine several notifications with +the `START-TIMER-SHARED` directive (with a short timeout), so you only react +once. Alternately, if the needed activity varies by the UPS (e.g. custom +remote-device shutdown scripts), you may actually want to use `EXECUTE` rules +right away (and dispatch further work in your `CMDSCRIPT`). + DEBOUNCING EVENTS ----------------- From 0c5fae926160174f44ddb53cdfeac23f630465da Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 7 Oct 2025 10:12:06 +0200 Subject: [PATCH 09/15] docs/man/upsmon.txt: reword intro (and name) of UPS CONNECTION TYPES AND UPSMON ROLES section Signed-off-by: Jim Klimov --- docs/man/upsmon.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/man/upsmon.txt b/docs/man/upsmon.txt index 521946b0dd..cfe1953cc3 100644 --- a/docs/man/upsmon.txt +++ b/docs/man/upsmon.txt @@ -394,16 +394,16 @@ This design allows you to lose some of your power supplies in a redundant power environment without bringing down the entire system, while still working properly for smaller systems. -UPS TYPES ---------- +UPS CONNECTION TYPES AND UPSMON ROLES +------------------------------------- *upsmon* and linkman:upsd[8] don't always run on the same system. When they -do, any UPSes that are directly attached to the upsmon host should be -monitored in "primary" mode. This makes upsmon take charge of that equipment, -and it will wait for the "secondary" systems to disconnect before shutting -down the local system. This allows the distant systems (monitoring over -the network) to shut down cleanly before `upsdrvctl shutdown` runs locally -and turns them all off. +do, any UPSes that are directly attached to that upsmon host should be +monitored in "primary" mode, which makes that upsmon instance take charge +of that equipment, and it will wait for the "secondary" systems to disconnect +before shutting down the local system. This allows the distant systems (just +monitoring over the network) to shut down cleanly before `upsdrvctl shutdown` +runs locally on the primary system and turns them all off. When upsmon runs as a secondary, it is relying on the distant system to tell it about the state of the UPS. When that UPS goes critical (on battery From a4af58209dc139ead6afcff9b0b13752bc16793e Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 7 Oct 2025 10:12:51 +0200 Subject: [PATCH 10/15] docs/man/upsmon.txt: from UPS CONNECTION TYPES AND UPSMON ROLES section, refer to TIMED SHUTDOWNS section Signed-off-by: Jim Klimov --- docs/man/upsmon.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/man/upsmon.txt b/docs/man/upsmon.txt index cfe1953cc3..f309f37eeb 100644 --- a/docs/man/upsmon.txt +++ b/docs/man/upsmon.txt @@ -423,7 +423,9 @@ should break somehow. This defaults to 15 seconds. If your primary system is shutting down too quickly, set the FINALDELAY interval to something greater than the default 15 seconds. Don't set this too high, or your UPS battery may run out of power before the -primary upsmon process shuts down that system. +primary upsmon process shuts down that system. If you do need more time, +consider starting the shutdown after a short time on battery, for details +see the Timed Shutdowns section. TIMED SHUTDOWNS --------------- From 266da08bdd39d9a4f2257dd918c25ef42f05c651 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 7 Oct 2025 10:26:27 +0200 Subject: [PATCH 11/15] docs/man/upsmon.txt: in SIMULATING POWER FAILURES section, refer to NIT scripts as a source of inspiration Signed-off-by: Jim Klimov --- docs/man/upsmon.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/man/upsmon.txt b/docs/man/upsmon.txt index f309f37eeb..2365e7121e 100644 --- a/docs/man/upsmon.txt +++ b/docs/man/upsmon.txt @@ -544,6 +544,16 @@ crawling under a desk to find the plug. Note you can also use a dummy SHUTDOWNCMD setting to just report that the systems would shut down at this point, without actually disrupting their work. +For inspiration, you can see the setup done by the NUT Integration Tests suite +under the `tests/NIT` directory in NUT sources, including references to the +shutdown and notification scripts which only log the activity (you may have +to configure at least a trivial NUT build and run `make check-NIT-sandbox` to +generate some of the configuration files -- or inspect the `nit.sh` script +which pieces them together). Notably, see `scripts/misc/notifyme-debug` as +not only a logger, but optionally a wrapper for `upssched` (in test runs), +and `clients/upssched-cmd` as a sample implementation of an linkman:upssched[8] +`CMDSCRIPT` which also focuses on logging. + WARNING: After such "dummy" experiments you may have to restart the NUT data server `upsd` to clear its "FSD" flag for the devices and clients involved, and make sure no files named by `POWERDOWNFLAG` option (e.g. `/etc/killpower`) From c14d5d75ace3199e17a14173a1d2d95a6ceca1fb Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 7 Oct 2025 10:31:07 +0200 Subject: [PATCH 12/15] docs/man/upsmon.txt, docs/nut.dict: introduce SHUTDOWN ACTIVITY WORKFLOW section [#3003, #3110] Signed-off-by: Jim Klimov --- docs/man/upsmon.txt | 49 +++++++++++++++++++++++++++++++++++++++++++++ docs/nut.dict | 4 +++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/docs/man/upsmon.txt b/docs/man/upsmon.txt index 2365e7121e..05c20e29c1 100644 --- a/docs/man/upsmon.txt +++ b/docs/man/upsmon.txt @@ -427,6 +427,8 @@ primary upsmon process shuts down that system. If you do need more time, consider starting the shutdown after a short time on battery, for details see the Timed Shutdowns section. +For a more technical take, please see the Shutdown Activity Workflow section. + TIMED SHUTDOWNS --------------- @@ -502,6 +504,8 @@ by starting another copy of the program with `-c fsd` command line argument. This is useful when you want to initiate a shutdown before the critical stage through some external means, such as linkman:upssched[8]. +For a more technical take, please see the Shutdown Activity Workflow section. + WARNING: Please note that by design, since we require power-cycling the load and don't want some systems to be powered off while others remain running if the "wall power" returns at the wrong moment as usual, the "FSD" @@ -559,6 +563,51 @@ server `upsd` to clear its "FSD" flag for the devices and clients involved, and make sure no files named by `POWERDOWNFLAG` option (e.g. `/etc/killpower`) remain on the `upsmon primary` systems under test. +SHUTDOWN ACTIVITY WORKFLOW +-------------------------- + +Looking into `clients/upsmon.c` sources as the ultimate authority, you +can find that the chain of events during a forced shutdown is. This can +help make sense of the timing variables involved, and notifications sent +(which you may want to handle, perhaps with linkman:upssched[8]): + +* The path to shutdown activity of a host starts when its locally running + `upsmon` client (in any role) decides the power situation is critical, + e.g. by having too few "healthy" power supplies in a real outage, and/or + by seeing `FSD` among `ups.status` tokens -- possibly still "latched" in + the `upsd` data server while you start a new `upsmon` instance, or when + you call `upsmon -c fsd` on that system to simulate the outage; +* Such `upsmon` instance ends up in `forceshutdown()` method; +* There it loops over all UPSes it `MONITOR`s as a `primary`, and calls the + `setfsd()` method for each (causing a local `FSD` notification, if one was + not sent earlier); +* If there were no such UPSes -- we are a secondary, and go into `doshutdown()` + method immediately +* Otherwise we are a primary, and only go into `doshutdown()` method after + first completing the `sync_secondaries()` method, which: + * runs an infinite loop until either there are no other persistent clients + logged on to the data server (`upsd`) for each UPS we are a primary for, + or until `HOSTSYNC` timeout elapses; + * it should issue a `SHUTDOWN_HOSTSYNC` notification if it is going to wait + at all (if there were other clients seen on first loop cycle). +* Finally, in the `doshutdown()` method, it: + * issues a `SHUTDOWN` notification; + * waits for `FINALDELAY`; + * starts the timer for `SHUTDOWNEXIT` (which may be used to force `upsmon` + process to linger after calling `SHUTDOWNCMD` -- e.g. can be used by a + secondary to block the primary from cutting power too early for some use + cases, like safely parking some external machinery); + * calls the `SHUTDOWNCMD` (either directly in mono-process mode, or by + telling the `root`-privileged part to do so in the common case); + * optionally linger, if `SHUTDOWNEXIT` is so configured; + * ultimately exit the daemon (also causes the `root`-privileged part to exit, + by breaking the communications pipe between them). + +Note that if your `upsmon` does split into privileged and unprivileged parts, +all notifications run in the unprivileged context (your handling scripts may +have to `sudo` explicitly if/when/where you want to do something to the system). +Only `SHUTDOWNCMD` is called in privileged context. + DEAD UPSES ---------- diff --git a/docs/nut.dict b/docs/nut.dict index aba9ef023f..af914be15d 100644 --- a/docs/nut.dict +++ b/docs/nut.dict @@ -1,4 +1,4 @@ -personal_ws-1.1 en 3555 utf-8 +personal_ws-1.1 en 3557 utf-8 AAC AAS ABI @@ -1961,6 +1961,7 @@ docinfo docs dod domxml +doshutdown dotnet downloadable dpkg @@ -3043,6 +3044,7 @@ servicebypass setFeature setaux setflags +setfsd setgid setinfo setpci From e63365cdba9f4e50d5862bc5661153d9291fa48d Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 7 Oct 2025 10:33:18 +0200 Subject: [PATCH 13/15] tests/NIT/upssched.conf.in: add handling for SHUTDOWN_HOSTSYNC [#3084] Signed-off-by: Jim Klimov --- tests/NIT/upssched.conf.in | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/NIT/upssched.conf.in b/tests/NIT/upssched.conf.in index 5bc3a821a1..2a6d630c09 100644 --- a/tests/NIT/upssched.conf.in +++ b/tests/NIT/upssched.conf.in @@ -74,6 +74,7 @@ AT REPLBATT * ONBATT * EXECUTE REPLBATT-HANDLER AT NOCOMM * EXECUTE NOCOMM-HANDLER AT FSD * EXECUTE FSD-HANDLER AT SHUTDOWN * EXECUTE SHUTDOWN-HANDLER +AT SHUTDOWN_HOSTSYNC * EXECUTE SHUTDOWN_HOSTSYNC-HANDLER # info2admin only AT LOWBATT * EXECUTE LOWBATT-INFO From 99d74b513009d026adf10458b642e61784fb5dd8 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 7 Oct 2025 10:40:36 +0200 Subject: [PATCH 14/15] conf/upsmon.conf.sample.in: align NOTIFYFLAG samples with NOTIFYMSG definitions [#3084] Also helps NIT generate an upsmon.conf to report those events in tests. Signed-off-by: Jim Klimov --- conf/upsmon.conf.sample.in | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/conf/upsmon.conf.sample.in b/conf/upsmon.conf.sample.in index 0dc09dc48c..1a0be42969 100644 --- a/conf/upsmon.conf.sample.in +++ b/conf/upsmon.conf.sample.in @@ -402,6 +402,7 @@ POWERDOWNFLAG "@POWERDOWNFLAG@" # NOTIFYFLAG COMMOK SYSLOG+WALL # NOTIFYFLAG COMMBAD SYSLOG+WALL # NOTIFYFLAG SHUTDOWN SYSLOG+WALL +# NOTIFYFLAG SHUTDOWN_HOSTSYNC SYSLOG+WALL # NOTIFYFLAG REPLBATT SYSLOG+WALL # NOTIFYFLAG NOCOMM SYSLOG+WALL # NOTIFYFLAG NOPARENT SYSLOG+WALL @@ -413,9 +414,17 @@ POWERDOWNFLAG "@POWERDOWNFLAG@" # NOTIFYFLAG NOTBYPASS SYSLOG+WALL # NOTIFYFLAG ECO SYSLOG+WALL # NOTIFYFLAG NOTECO SYSLOG+WALL +# # NOTIFYFLAG ALARM SYSLOG+WALL # NOTIFYFLAG NOTALARM SYSLOG+WALL # +# NOTIFYFLAG OVER SYSLOG+WALL +# NOTIFYFLAG NOTOVER SYSLOG+WALL +# NOTIFYFLAG TRIM SYSLOG+WALL +# NOTIFYFLAG NOTTRIM SYSLOG+WALL +# NOTIFYFLAG BOOST SYSLOG+WALL +# NOTIFYFLAG NOTBOOST SYSLOG+WALL +# # NOTIFYFLAG OTHER SYSLOG+WALL # NOTIFYFLAG NOTOTHER SYSLOG+WALL # From 7bbedea5aa0bcb6297b8e6844c5b972e4ff423bf Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 7 Oct 2025 13:44:54 +0200 Subject: [PATCH 15/15] clients/upssched.c: start_timer(): drop unneeded NULLness checks [#3105] Signed-off-by: Jim Klimov --- clients/upssched.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/upssched.c b/clients/upssched.c index b01787ae52..fe8f914a12 100644 --- a/clients/upssched.c +++ b/clients/upssched.c @@ -434,7 +434,7 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify break; } - if (ps == NULL || *ps == NULL) { + if (*ps == NULL) { tmp->notifytypes = xrealloc(tmp->notifytypes, count + 2); tmp->notifytypes[count] = xstrdup(notifytype); tmp->notifytypes[count + 1] = NULL; @@ -457,7 +457,7 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify break; } - if (ps == NULL || *ps == NULL) { + if (*ps == NULL) { tmp->notifymsgs = xrealloc(tmp->notifymsgs, count + 2); tmp->notifymsgs[count] = xstrdup(notifymsg); tmp->notifymsgs[count + 1] = NULL; @@ -480,7 +480,7 @@ static void start_timer(const char *name, const char *ofsstr, const char *notify break; } - if (ps == NULL || *ps == NULL) { + if (*ps == NULL) { tmp->upsnames = xrealloc(tmp->upsnames, count + 2); tmp->upsnames[count] = xstrdup(upsname); tmp->upsnames[count + 1] = NULL;