diff --git a/NEWS.adoc b/NEWS.adoc index 928c9aef6f..d28fb2224e 100644 --- a/NEWS.adoc +++ b/NEWS.adoc @@ -428,25 +428,34 @@ as part of https://github.com/networkupstools/nut/issues/1410 solution. - Clarified documentation in codebase according to end-user feedback [#1721, #1750 and others over time] - - Several fixes for `upsmon` behavior [#1761, #1680...], including new - ability to configure default POWERDOWNFLAG location -- packagers are - encouraged to pick optimal location for their distributions (which - remains mounted at least read-only late in shutdown) and a new optional - POLLFAIL_LOG_THROTTLE_MAX setting [#529, #506] - - - Also `upsmon` should now recognize `OFF` and `BYPASS` flags in `ups.status` - and report that these states begin or end. The `OFF` state usually means - than an administrative action happened to power off the load, but the UPS - device is still alive and communicating (USB, SNMP, etc.); corresponding - `MONITOR`'ed amount of power sources are considered not being "fed" for - the power value calculation purposes. The `BYPASS` state is now treated - similarly to `ONBATT`: currently this UPS "feeds" its load, but if later - communications fail, it is considered dead. This may have unintended - consequences for devices (or NUT drivers) that do not report these modes - correctly (e.g. an APC calibration routine seems to start with a few - seconds of "OFF" state), so the reported status is only considered as a - loss of feed if it persists for more than `OFFDURATION` seconds. [#2044, - #2104] + - upsmon client changes include: + * Several fixes for `upsmon` behavior [#1761, #1680...], including new + ability to configure default POWERDOWNFLAG location -- packagers are + encouraged to pick optimal location for their distributions (which + remains mounted at least read-only late in shutdown) and a new optional + POLLFAIL_LOG_THROTTLE_MAX setting [#529, #506] + * Also `upsmon` should now recognize `OFF` and `BYPASS` flags in `ups.status` + and report that these states begin or end. The `OFF` state usually means + than an administrative action happened to power off the load, but the UPS + device is still alive and communicating (USB, SNMP, etc.); corresponding + `MONITOR`'ed amount of power sources are considered not being "fed" for + the power value calculation purposes. The `BYPASS` state is now treated + similarly to `ONBATT`: currently this UPS "feeds" its load, but if later + communications fail, it is considered dead. This may have unintended + consequences for devices (or NUT drivers) that do not report these modes + correctly (e.g. an APC calibration routine seems to start with a few + seconds of "OFF" state), so the reported status is only considered as a + loss of feed if it persists for more than `OFFDURATION` seconds. [#2044, + #2104] + * Introduced `SHUTDOWNEXIT no` configuration toggle for systems which + require a long time to stop their workload such as virtual machines. + Since the disconnection of a "secondary" client is treated by the + "primary" system as permission to proceed with its own shutdown and + power-off for the UPS, the original (now merely default) behavior to + call `SHUTDOWNCMD` and immediately exit could be counter-productive. + An optional delay can also be introduced. [#2133] + * Note there were other changes detailed below which impacted several NUT + programs, including `upsmon`. - Extended Linux systemd support with optional notifications about daemon state (READY, RELOADING, STOPPING) and watchdog keep-alive messages [#1590] diff --git a/clients/upsmon.c b/clients/upsmon.c index 75e366052f..bcdbba5a2c 100644 --- a/clients/upsmon.c +++ b/clients/upsmon.c @@ -112,6 +112,7 @@ static char *certpasswd = NULL; static int certverify = 0; /* don't verify by default */ static int forcessl = 0; /* don't require ssl by default */ +static int shutdownexitdelay = 0; /* by default doshutdown() exits immediately */ static int userfsd = 0, pipefd[2]; /* Should we run "all in one" (e.g. as root) or split * into two upsmon processes for some more security? */ @@ -811,6 +812,28 @@ static void doshutdown(void) shutdowncmd); } + if (shutdownexitdelay == 0) { + upsdebugx(1, + "Exiting upsmon immediately " + "after initiating shutdown, by default"); + } else + if (shutdownexitdelay < 0) { + upslogx(LOG_WARNING, + "Configured to not exit upsmon " + "after initiating shutdown"); + /* Technically, here we sleep until SIGTERM or poweroff */ + do { + sleep(1); + } while (!exit_flag); + } else { + upslogx(LOG_WARNING, + "Configured to only exit upsmon %d sec " + "after initiating shutdown", shutdownexitdelay); + do { + sleep(1); + shutdownexitdelay--; + } while (!exit_flag && shutdownexitdelay); + } exit(EXIT_SUCCESS); } @@ -1581,6 +1604,28 @@ static int parse_conf_arg(size_t numargs, char **arg) return 1; } + /* SHUTDOWNEXIT */ + if (!strcmp(arg[0], "SHUTDOWNEXIT")) { + if (!strcasecmp(arg[1], "on") + || !strcasecmp(arg[1], "yes") + || !strcasecmp(arg[1], "true")) { + shutdownexitdelay = 0; + } else + if (!strcasecmp(arg[1], "off") + || !strcasecmp(arg[1], "no") + || !strcasecmp(arg[1], "false")) { + shutdownexitdelay = -1; + } else { + if (!str_to_int(arg[1], &shutdownexitdelay, 10)) { + upslogx(LOG_WARNING, + "SHUTDOWNEXIT value not recognized, " + "defaulting to 'yes'"); + shutdownexitdelay = 0; + } + } + return 1; + } + /* POWERDOWNFLAG */ if (!strcmp(arg[0], "POWERDOWNFLAG")) { checkmode(arg[0], powerdownflag, arg[1], reload_flag); diff --git a/conf/upsmon.conf.sample.in b/conf/upsmon.conf.sample.in index e26b487c91..89bfb87baf 100644 --- a/conf/upsmon.conf.sample.in +++ b/conf/upsmon.conf.sample.in @@ -159,6 +159,39 @@ MINSUPPLIES 1 SHUTDOWNCMD "/sbin/shutdown -h +0" +# -------------------------------------------------------------------------- +# SHUTDOWNEXIT +# +# After initiating shutdown, should this upsmon daemon itself exit? +# By doing so NUT secondary systems can tell the NUT primary that +# it can proceed with its own shutdown and eventually tell the UPS +# to cut power for the load. ("Yes" by default) +# +# Some "secondary" systems with workloads that take considerable time +# to stop (e.g. virtual machines or large databases) can benefit from +# reporting (by virtue of logging off the data server) that they are +# ready for the "primary" system to begin its own shutdown and eventually +# to tell the UPS to cut the power - not as soon as they have triggered +# their own shutdown, but at a later point (e.g. when the upsmon service +# is stopped AFTER the heavier workloads). +# +# Note that the actual ability to complete such shutdown depends on the +# remaining battery run-time at the moment when UPS power state becomes +# considered critical and the shutdowns begin. You may also have to tune +# HOSTSYNC on the NUT primary to be long enough for those secondaries to +# stop their services. In practice, it may be worthwhile to investigate +# ways to trigger shutdowns earlier on these systems, e.g. by setting up +# `upssched` integration, or `dummy-ups` driver with overrides for stricter +# `battery.charge` or `battery.runtime` triggers than used by the rest of +# your servers. +# +# This option supports Boolean-style strings (yes/on/true or no/off/false) +# or numbers to define a delay (in seconds) between calling SHUTDOWNCMD +# and exiting the daemon. Zero means immediate exit (default), negative +# values mean never exiting on its own accord. +# +#SHUTDOWNEXIT yes + # -------------------------------------------------------------------------- # NOTIFYCMD # diff --git a/docs/man/upsmon.conf.txt b/docs/man/upsmon.conf.txt index faf3135c9c..7ad81c1dd3 100644 --- a/docs/man/upsmon.conf.txt +++ b/docs/man/upsmon.conf.txt @@ -384,6 +384,36 @@ together, i.e.: SHUTDOWNCMD "/sbin/shutdown -h +0" +*SHUTDOWNEXIT* 'boolean|number':: + +After initiating shutdown, should this upsmon daemon itself exit? +By doing so NUT secondary systems can tell the NUT primary that +it can proceed with its own shutdown and eventually tell the UPS +to cut power for the load. ("Yes" by default) ++ +Some "secondary" systems with workloads that take considerable time +to stop (e.g. virtual machines or large databases) can benefit from +reporting (by virtue of logging off the data server) that they are +ready for the "primary" system to begin its own shutdown and eventually +to tell the UPS to cut the power - not as soon as they have triggered +their own shutdown, but at a later point (e.g. when the upsmon service +is stopped AFTER the heavier workloads). ++ +Note that the actual ability to complete such shutdown depends on the +remaining battery run-time at the moment when UPS power state becomes +considered critical and the shutdowns begin. You may also have to tune +`HOSTSYNC` on the NUT primary to be long enough for those secondaries to +stop their services. In practice, it may be worthwhile to investigate +ways to trigger shutdowns earlier on these systems, e.g. by setting up +`upssched` integration, or `dummy-ups` driver with overrides for stricter +`battery.charge` or `battery.runtime` triggers than used by the rest of +your servers. ++ +This option supports Boolean-style strings (yes/on/true or no/off/false) +or numbers to define a delay (in seconds) between calling `SHUTDOWNCMD` +and exiting the daemon. Zero means immediate exit (default), negative +values mean never exiting on its own accord. + *CERTPATH* 'certificate file or database':: When compiled with SSL support, you can enter the certificate path here. diff --git a/docs/nut.dict b/docs/nut.dict index 16b9ffb2f0..6431e49749 100644 --- a/docs/nut.dict +++ b/docs/nut.dict @@ -1,4 +1,4 @@ -personal_ws-1.1 en 3255 utf-8 +personal_ws-1.1 en 3257 utf-8 AAC AAS ABI @@ -1111,6 +1111,7 @@ SG SGI SHA SHUTDOWNCMD +SHUTDOWNEXIT SIG SIGHUP SIGINT @@ -1324,7 +1325,6 @@ UPS's UPSCONN UPSDESC UPSHOST -upsid UPSIMAGEPATH UPSLC UPSOutletSystemOutletDelayBeforeReboot @@ -3096,6 +3096,7 @@ upsfetch upsgone upsh upshandler +upsid upsidentmodel upsimage upsload