summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChris Down <chris@chrisdown.name>2022-11-01 00:17:21 +0000
committerCraig Small <csmall@dropbear.xyz>2023-01-15 04:05:40 +0000
commit866abacf8805a74fb7c59cae1f64963e0a540b14 (patch)
tree56ad0980a9a39cbfa8eabbe8406ffbcb98729ac2 /src
parent1db14dafd95e29b1d9aca5dc1775598aabc0a24b (diff)
downloadprocps-ng-866abacf8805a74fb7c59cae1f64963e0a540b14.tar.gz
pgrep: Support matching on the presence of a userspace signal handler
In production we've had several incidents over the years where a process has a signal handler registered for SIGHUP or one of the SIGUSR signals which can be used to signal a request to reload configs, rotate log files, and the like. While this may seem harmless enough, what we've seen happen repeatedly is something like the following: 1. A process is using SIGHUP/SIGUSR[12] to request some application-handled state change -- reloading configs, rotating a log file, etc; 2. This kind of request is deprecated and removed, so the signal handler is removed. However, a site where the signal might be sent from is missed (often logrotate or a service manager); 3. Because the default disposition of these signals is terminal, sooner or later these applications are going to be sent SIGHUP or similar and end up unexpectedly killed. I know for a fact that we're not the only organisation experiencing this: in general, signal use is pretty tricky to reason about and safely remove because of the fairly aggressive SIG_DFL behaviour for some common signals, especially for SIGHUP which has a particularly ambiguous meaning. Especially in a large, highly interconnected codebase, reasoning about signal interactions between system configuration and applications can be highly complex, and it's inevitable that on occasion a callsite will be missed. In some cases the right call to avoid this will be to migrate services towards other forms of IPC for this purpose, but inevitably there will be some services which must continue using signals, so we need a safe way to support them. This patch adds support for the -H/--require-handler flag, which matches on processes with a userspace handler present for the signal being sent. With this flag we can enforce that all SIGHUP reload cases and SIGUSR equivalents use --require-handler. This effectively mitigates the case we've seen time and time again where SIGHUP is used to rotate log files or reload configs, but the sending site is mistakenly left present after the removal of signal handler, resulting in unintended termination of the process. Signed-off-by: Chris Down <chris@chrisdown.name>
Diffstat (limited to 'src')
-rw-r--r--src/pgrep.c49
1 files changed, 39 insertions, 10 deletions
diff --git a/src/pgrep.c b/src/pgrep.c
index eeebf3e..7d0cc93 100644
--- a/src/pgrep.c
+++ b/src/pgrep.c
@@ -75,12 +75,13 @@ enum pids_item Items[] = {
PIDS_CMDLINE,
PIDS_STATE,
PIDS_TIME_ELAPSED,
- PIDS_CGROUP_V
+ PIDS_CGROUP_V,
+ PIDS_SIGCATCH
};
enum rel_items {
EU_PID, EU_PPID, EU_PGRP, EU_EUID, EU_RUID, EU_RGID, EU_SESSION,
EU_TGID, EU_STARTTIME, EU_TTYNAME, EU_CMD, EU_CMDLINE, EU_STA, EU_ELAPSED,
- EU_CGROUP
+ EU_CGROUP, EU_SIGCATCH
};
#define grow_size(x) do { \
if ((x) < 0 || (size_t)(x) >= INT_MAX / 5 / sizeof(struct el)) \
@@ -119,6 +120,7 @@ static int opt_echo = 0;
static int opt_threads = 0;
static pid_t opt_ns_pid = 0;
static bool use_sigqueue = false;
+static bool require_handler = false;
static union sigval sigval = {0};
static const char *opt_delim = "\n";
@@ -157,7 +159,7 @@ static int __attribute__ ((__noreturn__)) usage(int opt)
fputs(_(" -w, --lightweight list all TID\n"), fp);
break;
case PKILL:
- fputs(_(" -<sig>, --signal <sig> signal to send (either number or name)\n"), fp);
+ fputs(_(" -H, --require-handler match only if signal handler is present\n"), fp);
fputs(_(" -q, --queue <value> integer value to be sent with the signal\n"), fp);
fputs(_(" -e, --echo display what is killed\n"), fp);
break;
@@ -167,6 +169,7 @@ static int __attribute__ ((__noreturn__)) usage(int opt)
break;
#endif
}
+ fputs(_(" -<sig>, --signal <sig> signal to send (either number or name)\n"), fp);
fputs(_(" -c, --count count of matching processes\n"), fp);
fputs(_(" -f, --full use full process name to match\n"), fp);
fputs(_(" -g, --pgroup <PGID,...> match listed process group IDs\n"), fp);
@@ -212,7 +215,7 @@ static struct el *get_our_ancestors(void)
while (!done) {
struct pids_info *info = NULL;
- if (procps_pids_new(&info, Items, 15) < 0)
+ if (procps_pids_new(&info, Items, 16) < 0)
xerrx(EXIT_FATAL, _("Unable to create pid info structure"));
if (i == size) {
@@ -472,6 +475,24 @@ static int match_numlist (long value, const struct el *restrict list)
return found;
}
+static unsigned long long unhex (const char *restrict in)
+{
+ unsigned long long ret;
+ char *rem;
+ errno = 0;
+ ret = strtoull(in, &rem, 16);
+ if (errno || *rem != '\0') {
+ xwarnx(_("not a hex string: %s"), in);
+ return 0;
+ }
+ return ret;
+}
+
+static int match_signal_handler (const char *restrict sigcgt, const int signal)
+{
+ return sigcgt && (((1UL << (signal - 1)) & unhex(sigcgt)) != 0);
+}
+
static int match_strlist (const char *restrict value, const struct el *restrict list)
{
int found = 0;
@@ -648,7 +669,7 @@ static struct el * select_procs (int *num)
_("Error reading reference namespace information\n"));
}
- if (procps_pids_new(&info, Items, 15) < 0)
+ if (procps_pids_new(&info, Items, 16) < 0)
xerrx(EXIT_FATAL,
_("Unable to create pid info structure"));
which = PIDS_FETCH_TASKS_ONLY;
@@ -691,6 +712,8 @@ static struct el * select_procs (int *num)
match = 0;
else if (opt_cgroup && ! match_cgroup_list (PIDS_GETSTV(CGROUP), opt_cgroup))
match = 0;
+ else if (require_handler && ! match_signal_handler (PIDS_GETSTR(SIGCATCH), opt_signal))
+ match = 0;
task_cmdline = PIDS_GETSTR(CMDLINE);
@@ -796,6 +819,7 @@ static int pidfd_open (pid_t pid, unsigned int flags)
static void parse_opts (int argc, char **argv)
{
char opts[64] = "";
+ int sig;
int opt;
int criteria_count = 0;
@@ -808,6 +832,7 @@ static void parse_opts (int argc, char **argv)
static const struct option longopts[] = {
{"signal", required_argument, NULL, SIGNAL_OPTION},
{"ignore-ancestors", no_argument, NULL, 'A'},
+ {"require-handler", no_argument, NULL, 'H'},
{"count", no_argument, NULL, 'c'},
{"cgroup", required_argument, NULL, CGROUP_OPTION},
{"delimiter", required_argument, NULL, 'd'},
@@ -840,6 +865,10 @@ static void parse_opts (int argc, char **argv)
{NULL, 0, NULL, 0}
};
+ sig = signal_option(&argc, argv);
+ if (-1 < sig)
+ opt_signal = sig;
+
#ifdef ENABLE_PIDWAIT
if (strcmp (program_invocation_short_name, "pidwait") == 0 ||
strcmp (program_invocation_short_name, "lt-pidwait") == 0) {
@@ -849,18 +878,14 @@ static void parse_opts (int argc, char **argv)
#endif
if (strcmp (program_invocation_short_name, "pkill") == 0 ||
strcmp (program_invocation_short_name, "lt-pkill") == 0) {
- int sig;
prog_mode = PKILL;
- sig = signal_option(&argc, argv);
- if (-1 < sig)
- opt_signal = sig;
strcat (opts, "eq:");
} else {
strcat (opts, "lad:vw");
prog_mode = PGREP;
}
- strcat (opts, "LF:cfinoxP:O:Ag:s:u:U:G:t:r:?Vh");
+ strcat (opts, "LF:cfinoxP:O:AHg:s:u:U:G:t:r:?Vh");
while ((opt = getopt_long (argc, argv, opts, longopts, NULL)) != -1) {
switch (opt) {
@@ -1017,6 +1042,10 @@ static void parse_opts (int argc, char **argv)
usage ('?');
++criteria_count;
break;
+ case 'H':
+ require_handler = true;
+ ++criteria_count;
+ break;
case 'h':
case '?':
usage (opt);