diff options
author | Chen Jingpiao <chenjingpiao@gmail.com> | 2018-05-04 16:35:40 +0800 |
---|---|---|
committer | Chen Jingpiao <chenjingpiao@gmail.com> | 2018-08-14 22:08:42 +0800 |
commit | 27cb070fbf240964d8c02605e185610735c4a900 (patch) | |
tree | aed6a93695543b7e34736e8ba2fd52087ed22184 | |
parent | 6a56ba6d95bb9f39396ef8c7cdda21bddc336294 (diff) | |
download | strace-27cb070fbf240964d8c02605e185610735c4a900.tar.gz |
Introduce seccomp-assisted syscall filtering
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | filter_seccomp.c | 380 | ||||
-rw-r--r-- | filter_seccomp.h | 57 | ||||
-rw-r--r-- | strace.c | 38 |
4 files changed, 473 insertions, 4 deletions
diff --git a/Makefile.am b/Makefile.am index 9e5eef276..4e21511d7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -149,6 +149,8 @@ strace_SOURCES = \ file_ioctl.c \ filter_qualify.c \ filter.h \ + filter_seccomp.c \ + filter_seccomp.h \ flock.c \ flock.h \ fs_x_ioctl.c \ diff --git a/filter_seccomp.c b/filter_seccomp.c new file mode 100644 index 000000000..940415869 --- /dev/null +++ b/filter_seccomp.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2018 Chen Jingpiao <chenjingpiao@gmail.com> + * Copyright (c) 2018 The strace developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "defs.h" + +#include "ptrace.h" +#include <sys/prctl.h> +#include <sys/wait.h> +#include <linux/audit.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <asm/unistd.h> +#include <signal.h> + +#include "filter_seccomp.h" +#include "number_set.h" + +bool enable_seccomp_filter = false; +bool seccomp_before_ptrace; + +static void +check_seccomp_order_do_child(void) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_W + BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_getuid, 0, 1), + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRACE), + BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW) + }; + + struct sock_fprog prog = { + .len = ARRAY_SIZE(filter), + .filter = filter + }; + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) < 0) + perror_func_msg_and_die("ptrace(PTRACE_TRACEME, ..."); + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) + perror_func_msg_and_die("prctl(PR_SET_NO_NEW_PRIVS, 1, ..."); + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0) + perror_func_msg_and_die("prctl(PR_SET_SECCOMP)"); + kill(getpid(), SIGSTOP); + syscall(__NR_getuid); + pause(); + _exit(0); +} + +static void +check_seccomp_order_tracer(int pid) +{ + int status, tracee_pid, flags = 0; + + while (1) { + errno = 0; + tracee_pid = waitpid(pid, &status, 0); + if (tracee_pid <= 0) { + if (errno == EINTR) + continue; + perror_func_msg_and_die("unexpected wait result %d", + tracee_pid); + } + if (flags == 0) { + if (ptrace(PTRACE_SETOPTIONS, pid, 0, + PTRACE_O_TRACESECCOMP) < 0) + perror_func_msg_and_die("ptrace(PTRACE_SETOPTIONS, ..."); + if (ptrace(PTRACE_SYSCALL, pid, NULL, NULL) < 0) + perror_func_msg_and_die("ptrace(PTRACE_SYSCALL, ..."); + } else if (flags == 1) { + if ((status >> 16) == PTRACE_EVENT_SECCOMP) + seccomp_before_ptrace = true; + else + seccomp_before_ptrace = false; + kill(pid, SIGKILL); + } else { + if (WIFSIGNALED(status)) + break; + + error_func_msg_and_die("unexpected wait status %#x", + status); + } + flags++; + } +} + +static void +check_seccomp_order(void) +{ + int pid; + + pid = fork(); + if (pid < 0) + perror_func_msg_and_die("fork"); + + if (pid == 0) + check_seccomp_order_do_child(); + + check_seccomp_order_tracer(pid); +} + +static bool +traced_by_seccomp(unsigned int scno, unsigned int p) +{ + return !sysent_vec[p][scno].sys_func + || sysent_vec[p][scno].sys_flags & TRACE_INDIRECT_SUBCALL + || is_number_in_set_array(scno, trace_set, p) + || strcmp("execve", sysent_vec[p][scno].sys_name) == 0 + || strcmp("execveat", sysent_vec[p][scno].sys_name) == 0 +#if defined SPARC || defined SPARC64 + || strcmp("execv", sysent_vec[p][scno].sys_name) == 0 +#endif + || strcmp("socketcall", sysent_vec[p][scno].sys_name) == 0 + || strcmp("ipc", sysent_vec[p][scno].sys_name) == 0 +#ifdef LINUX_MIPSO32 + || strcmp("syscall", sysent_vec[p][scno].sys_name) == 0 +#endif + ; +} + +static void +check_bpf_instruction_number(void) +{ + for (unsigned int p = 0; p < SUPPORTED_PERSONALITIES; ++p) { + unsigned int lower = UINT_MAX, count = 0; + + for (unsigned int i = 0; i < nsyscall_vec[p]; ++i) { + if (traced_by_seccomp(i, p)) { + if (lower == UINT_MAX) + lower = i; + continue; + } + if (lower == UINT_MAX) + continue; + if (lower + 1 == i) + count++; + else + count += 2; + lower = UINT_MAX; + } + if (lower != UINT_MAX) + count += 2; + if (count > SECCOMP_TRACE_SYSCALL_MAX) { + enable_seccomp_filter = false; + break; + } + } +} + +void +check_seccomp_filter(void) +{ + if (!enable_seccomp_filter) + goto end; +#ifdef SECCOMP_MODE_FILTER + int rc; + + if (NOMMU_SYSTEM) { + enable_seccomp_filter = false; + goto end; + } + + rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, 0, 0); + if (rc < 0 && errno == EINVAL) + enable_seccomp_filter = false; + else + enable_seccomp_filter = true; + if (enable_seccomp_filter) + check_bpf_instruction_number(); + if (enable_seccomp_filter) + check_seccomp_order(); +#else + enable_seccomp_filter = false; +#endif +end: + debug_msg("seccomp-filter: %s", + enable_seccomp_filter ? "enable" : "disable"); +} + +static unsigned short +bpf_add_traced_syscall(struct sock_filter *filter, + unsigned int lower, unsigned int upper) +{ + if (lower + 1 == upper) { + /* filter[X].jt will set when return instruction added */ + SET_BPF_JUMP(filter, BPF_JMP + BPF_JEQ + BPF_K, lower, 0, 0); + return 1; + } else { + SET_BPF_JUMP(filter, BPF_JMP + BPF_JGE + BPF_K, lower, 0, 1); + ++filter; + /* filter[X].jf will set when return instruction added */ + SET_BPF_JUMP(filter, BPF_JMP + BPF_JGE + BPF_K, upper, 0, 0); + return 2; + } +} + +static void +dump_seccomp_bpf(const struct sock_filter *filter, unsigned short len) +{ + for (unsigned int i = 0; i < len; ++i) { + if (filter[i].code == BPF_LD + BPF_W + BPF_ABS) { + debug_msg("STMT(BPF_LD + BPF_W + BPF_ABS, %u)", filter[i].k); + } else if (filter[i].code == BPF_RET + BPF_K) { + debug_msg("STMT(BPF_RET + BPF_K, %u)", filter[i].k); + } else if (filter[i].code == BPF_JMP + BPF_JEQ + BPF_K) { + debug_msg("JUMP(BPF_JMP + BPF_JEQ + BPF_K, %u, %u, %u)", + filter[i].jt, filter[i].jf, filter[i].k); + } else if (filter[i].code == BPF_JMP + BPF_JGE + BPF_K) { + debug_msg("JUMP(BPF_JMP + BPF_JGE + BPF_K, %u, %u, %u)", + filter[i].jt, filter[i].jf, filter[i].k); + } else { + debug_msg("STMT(%u, %u, %u, %u)", + filter[i].code, filter[i].jt, + filter[i].jf, filter[i].k); + } + } +} + +static unsigned short +init_sock_filter(struct sock_filter *filter) +{ + unsigned short pos = 0; +#if SUPPORTED_PERSONALITIES > 1 + unsigned int audit_arch_vec[] = { +# if defined X86_64 + AUDIT_ARCH_X86_64, + AUDIT_ARCH_I386, + AUDIT_ARCH_X86_64 +# elif SUPPORTED_PERSONALITIES == 2 + AUDIT_ARCH_X86_64, + AUDIT_ARCH_I386 +# endif + }; +#endif + unsigned int syscall_bit_vec[] = { +#if defined X86_64 + 0, 0, __X32_SYSCALL_BIT +#elif defined X32 + __X32_SYSCALL_BIT, 0 +#elif SUPPORTED_PERSONALITIES == 2 + 0, 0 +#else + 0 +#endif + }; + +#if SUPPORTED_PERSONALITIES > 1 + SET_BPF_STMT(&filter[pos++], BPF_LD + BPF_W + BPF_ABS, + offsetof(struct seccomp_data, arch)); +#endif + for (unsigned int p = 0; p < SUPPORTED_PERSONALITIES; ++p) { + unsigned int lower = UINT_MAX; + unsigned short previous = pos, start, end; + +#if SUPPORTED_PERSONALITIES > 1 + /* filter[X].jf will set when return instruction added */ + SET_BPF_JUMP(&filter[pos++], BPF_JMP + BPF_JEQ + BPF_K, + audit_arch_vec[p], 0, 0); +#endif + SET_BPF_STMT(&filter[pos++], BPF_LD + BPF_W + BPF_ABS, + offsetof(struct seccomp_data, nr)); + + start = pos; + for (unsigned int i = 0; i < nsyscall_vec[p]; ++i) { + if (traced_by_seccomp(i, p)) { + if (lower == UINT_MAX) + lower = i; + continue; + } + if (lower == UINT_MAX) + continue; + pos += bpf_add_traced_syscall(filter + pos, + lower + syscall_bit_vec[p], + i + syscall_bit_vec[p]); + lower = UINT_MAX; + } + if (lower != UINT_MAX) + pos += bpf_add_traced_syscall(filter + pos, + lower + syscall_bit_vec[p], + nsyscall_vec[p] + syscall_bit_vec[p]); + end = pos; + +#ifdef X86_64 + if (p == 0) { + SET_BPF_JUMP(&filter[pos++], BPF_JMP + BPF_JGE + BPF_K, + __X32_SYSCALL_BIT, 0, 2); + SET_BPF_STMT(&filter[pos++], BPF_LD + BPF_W + BPF_ABS, + offsetof(struct seccomp_data, arch)); + SET_BPF_JUMP(&filter[pos++], BPF_JMP + BPF_JEQ + BPF_K, + AUDIT_ARCH_X86_64, 3, 0); + + SET_BPF_STMT(&filter[pos++], BPF_LD + BPF_W + BPF_ABS, + offsetof(struct seccomp_data, nr)); + } +#endif + SET_BPF_JUMP(&filter[pos++], BPF_JMP + BPF_JGE + BPF_K, + nsyscall_vec[p] + syscall_bit_vec[p], 1, 0); + + SET_BPF_STMT(&filter[pos++], BPF_RET + BPF_K, + SECCOMP_RET_ALLOW); + SET_BPF_STMT(&filter[pos++], BPF_RET + BPF_K, + SECCOMP_RET_TRACE); + filter[previous].jf = pos - previous - 1; + for (unsigned int i = start; i < end; ++i) { + if (BPF_CLASS(filter[i].code) != BPF_JMP) + continue; + if (BPF_OP(filter[i].code) == BPF_JEQ) + filter[i].jt = pos - i - 2; + else if (BPF_OP(filter[i].code) == BPF_JGE + && filter[i].jf == 0) + filter[i].jf = pos - i - 2; + } + } +#if SUPPORTED_PERSONALITIES > 1 + SET_BPF_STMT(&filter[pos++], BPF_RET + BPF_K, SECCOMP_RET_TRACE); +#endif + + dump_seccomp_bpf(filter, pos); + + return pos; +} + +static void +do_seccomp(struct sock_fprog *prog) +{ + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog) < 0) + perror_msg_and_die("prctl"); +} + +void +init_seccomp_filter(void) +{ + struct sock_filter filter[SECCOMP_BPF_MAXINSNS]; + unsigned short len; + + len = init_sock_filter(filter); + + struct sock_fprog prog = { + .len = len, + .filter = filter + }; + + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) + perror_msg_and_die("prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)"); + + do_seccomp(&prog); +} + +int +seccomp_filter_restart_operator(const struct tcb *tcp) +{ + if (tcp + && (tcp->flags & TCB_INSYSCALL) + && traced_by_seccomp(tcp->scno, current_personality)) + return PTRACE_SYSCALL; + return PTRACE_CONT; +} diff --git a/filter_seccomp.h b/filter_seccomp.h new file mode 100644 index 000000000..d9f23e724 --- /dev/null +++ b/filter_seccomp.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Chen Jingpiao <chenjingpiao@gmail.com> + * Copyright (c) 2018 The strace developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STRACE_SECCOMP_FILTER_H +#define STRACE_SECCOMP_FILTER_H + +#include "defs.h" + +#ifdef HAVE_LINUX_SECCOMP_H +# include <linux/seccomp.h> +#endif + +#define SECCOMP_TRACE_SYSCALL_MAX (SUPPORTED_PERSONALITIES * 150) +#define SECCOMP_BPF_MAXINSNS (SECCOMP_TRACE_SYSCALL_MAX + 200) + +extern bool enable_seccomp_filter; +extern bool seccomp_before_ptrace; + +extern void check_seccomp_filter(void); +extern void init_seccomp_filter(void); +extern int seccomp_filter_restart_operator(const struct tcb *); + +#define SET_BPF(filter, code, jt, jf, k) \ + (*(filter) = (struct sock_filter) { code, jt, jf, k }) + +#define SET_BPF_STMT(filter, code, k) \ + SET_BPF(filter, code, 0, 0, k) + +#define SET_BPF_JUMP(filter, code, k, jt, jf) \ + SET_BPF(filter, code, jt, jf, k) + +#endif /* !STRACE_SECCOMP_FILTER_H */ @@ -51,6 +51,7 @@ #endif #include <asm/unistd.h> +#include "filter_seccomp.h" #include "largefile_wrappers.h" #include "mmap_cache.h" #include "number_set.h" @@ -302,6 +303,8 @@ Startup:\n\ \n\ Miscellaneous:\n\ -d enable debug output to stderr\n\ + -n enable enable seccomp filtering\n\ + disable disable seccomp filtering\n\ -v verbose mode: print unabbreviated argv, stat, termios, etc. args\n\ -h print help message\n\ -V print version\n\ @@ -1222,6 +1225,8 @@ exec_or_die(void) if (params_for_tracee.child_sa.sa_handler != SIG_DFL) sigaction(SIGCHLD, ¶ms_for_tracee.child_sa, NULL); + if (enable_seccomp_filter) + init_seccomp_filter(); execv(params->pathname, params->argv); perror_msg_and_die("exec"); } @@ -1602,7 +1607,7 @@ init(int argc, char *argv[]) #ifdef ENABLE_STACKTRACE "k" #endif - "a:Ab:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxX:yz")) != EOF) { + "a:Ab:cCdDe:E:fFhiI:no:O:p:P:qrs:S:tTu:vVwxX:yz")) != EOF) { switch (c) { case 'a': acolumn = string_to_uint(optarg); @@ -1704,6 +1709,9 @@ init(int argc, char *argv[]) case 'u': username = optarg; break; + case 'n': + enable_seccomp_filter = true; + break; case 'v': qualify("abbrev=none"); break; @@ -1814,7 +1822,12 @@ init(int argc, char *argv[]) run_gid = getgid(); } - if (followfork) + if (enable_seccomp_filter) { + check_seccomp_filter(); + ptrace_setoptions |= PTRACE_O_TRACESECCOMP; + } + + if (followfork || enable_seccomp_filter) ptrace_setoptions |= PTRACE_O_TRACECLONE | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK; @@ -2316,6 +2329,11 @@ next_event(int *pstatus, siginfo_t *si) return TE_NEXT; } + if (!followfork && enable_seccomp_filter && pid != strace_child) { + ptrace(PTRACE_CONT, pid, 0, WSTOPSIG(*pstatus)); + return TE_NEXT; + } + if (debug_flag) print_debug_info(pid, status); @@ -2440,8 +2458,13 @@ trace_syscall(struct tcb *tcp, unsigned int *sig) static bool dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si) { - unsigned int restart_op = PTRACE_SYSCALL; unsigned int restart_sig = 0; + unsigned int restart_op; + + if (enable_seccomp_filter) + restart_op = seccomp_filter_restart_operator(current_tcp); + else + restart_op = PTRACE_SYSCALL; switch (ret) { case TE_BREAK: @@ -2454,7 +2477,11 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si) break; case TE_SECCOMP: - break; + if (seccomp_before_ptrace) { + restart_op = PTRACE_SYSCALL; + break; + } + ATTRIBUTE_FALLTHROUGH; case TE_SYSCALL_STOP: if (trace_syscall(current_tcp, &restart_sig) < 0) { @@ -2471,6 +2498,9 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si) */ return true; } + if (enable_seccomp_filter) + restart_op = (current_tcp->flags & TCB_INSYSCALL) + ? PTRACE_SYSCALL : PTRACE_CONT; break; case TE_SIGNAL_DELIVERY_STOP: |