/* * Copyright (c) 1991, 1992 Paul Kranenburg * Copyright (c) 1993 Branko Lankester * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey * Copyright (c) 1996-1999 Wichert Akkerman * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation * Linux for s390 port by D.J. Barrow * * Copyright (c) 1999-2023 The strace developers. * All rights reserved. * * SPDX-License-Identifier: LGPL-2.1-or-later */ #include "defs.h" #include "get_personality.h" #include "mmap_notify.h" #include "native_defs.h" #include "ptrace.h" #include "ptrace_syscall_info.h" #include "nsig.h" #include "number_set.h" #include "delay.h" #include "poke.h" #include "retval.h" #include #include /* for struct iovec */ #include /* for PR_SET_NAME */ #include /* for __X32_SYSCALL_BIT */ #include "scno.h" #include "regs.h" #if defined(SPARC64) # undef PTRACE_GETREGS # define PTRACE_GETREGS PTRACE_GETREGS64 # undef PTRACE_SETREGS # define PTRACE_SETREGS PTRACE_SETREGS64 #endif #include "sen.h" #include "xstring.h" #include "syscallent_base_nr.h" /* Define these shorthand notations to simplify the syscallent files. */ #include "sysent_shorthand_defs.h" #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name) const struct_sysent sysent0[] = { #include "syscallent.h" }; #if SUPPORTED_PERSONALITIES > 1 # include PERSONALITY1_INCLUDE_FUNCS static const struct_sysent sysent1[] = { # include "syscallent1.h" }; #endif #if SUPPORTED_PERSONALITIES > 2 # include PERSONALITY2_INCLUDE_FUNCS static const struct_sysent sysent2[] = { # include "syscallent2.h" }; #endif /* Now undef them since short defines cause wicked namespace pollution. */ #include "sysent_shorthand_undefs.h" const char *const errnoent[] = { #include "errnoent.h" }; const char *const signalent[] = { #include "signalent.h" }; /* * `ioctlent[012].h' files are automatically generated by the auxiliary * program `ioctlsort', such that the list is sorted by the `code' field. * This has the side-effect of resolving the _IO.. macros into * plain integers, eliminating the need to include here everything * in "/usr/include". */ const struct_ioctlent ioctlent0[] = { #include "ioctlent0.h" }; #if SUPPORTED_PERSONALITIES > 1 static const struct_ioctlent ioctlent1[] = { # include "ioctlent1.h" }; # include PERSONALITY0_INCLUDE_PRINTERS_DECLS static const struct_printers printers0 = { # include PERSONALITY0_INCLUDE_PRINTERS_DEFS }; # include PERSONALITY1_INCLUDE_PRINTERS_DECLS static const struct_printers printers1 = { # include PERSONALITY1_INCLUDE_PRINTERS_DEFS }; #endif #if SUPPORTED_PERSONALITIES > 2 static const struct_ioctlent ioctlent2[] = { # include "ioctlent2.h" }; # include PERSONALITY2_INCLUDE_PRINTERS_DECLS static const struct_printers printers2 = { # include PERSONALITY2_INCLUDE_PRINTERS_DEFS }; #endif enum { nsyscalls0 = ARRAY_SIZE(sysent0) #if SUPPORTED_PERSONALITIES > 1 , nsyscalls1 = ARRAY_SIZE(sysent1) # if SUPPORTED_PERSONALITIES > 2 , nsyscalls2 = ARRAY_SIZE(sysent2) # endif #endif }; enum { nioctlents0 = ARRAY_SIZE(ioctlent0) #if SUPPORTED_PERSONALITIES > 1 , nioctlents1 = ARRAY_SIZE(ioctlent1) # if SUPPORTED_PERSONALITIES > 2 , nioctlents2 = ARRAY_SIZE(ioctlent2) # endif #endif }; #if SUPPORTED_PERSONALITIES > 1 const struct_sysent *sysent = sysent0; const struct_ioctlent *ioctlent = ioctlent0; const struct_printers *printers = &printers0; #endif const unsigned int nerrnos = ARRAY_SIZE(errnoent); const unsigned int nsignals = ARRAY_SIZE(signalent); unsigned nsyscalls = nsyscalls0; unsigned nioctlents = nioctlents0; const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES] = { nsyscalls0, #if SUPPORTED_PERSONALITIES > 1 nsyscalls1, #endif #if SUPPORTED_PERSONALITIES > 2 nsyscalls2, #endif }; const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES] = { sysent0, #if SUPPORTED_PERSONALITIES > 1 sysent1, #endif #if SUPPORTED_PERSONALITIES > 2 sysent2, #endif }; const char *const personality_names[] = PERSONALITY_NAMES; static_assert(ARRAY_SIZE(personality_names) == SUPPORTED_PERSONALITIES, "ARRAY_SIZE(personality_names) != SUPPORTED_PERSONALITIES"); #if SUPPORTED_PERSONALITIES > 1 unsigned current_personality; # ifndef current_wordsize unsigned current_wordsize = PERSONALITY0_WORDSIZE; static const int personality_wordsize[SUPPORTED_PERSONALITIES] = { PERSONALITY0_WORDSIZE, PERSONALITY1_WORDSIZE, # if SUPPORTED_PERSONALITIES > 2 PERSONALITY2_WORDSIZE, # endif }; # endif # ifndef current_klongsize unsigned current_klongsize = PERSONALITY0_KLONGSIZE; static const int personality_klongsize[SUPPORTED_PERSONALITIES] = { PERSONALITY0_KLONGSIZE, PERSONALITY1_KLONGSIZE, # if SUPPORTED_PERSONALITIES > 2 PERSONALITY2_KLONGSIZE, # endif }; # endif void set_personality(unsigned int personality) { if (personality == current_personality) return; if (personality >= SUPPORTED_PERSONALITIES) error_msg_and_die("Requested switch to unsupported personality " "%u", personality); nsyscalls = nsyscall_vec[personality]; sysent = sysent_vec[personality]; switch (personality) { case 0: ioctlent = ioctlent0; nioctlents = nioctlents0; printers = &printers0; break; case 1: ioctlent = ioctlent1; nioctlents = nioctlents1; printers = &printers1; break; # if SUPPORTED_PERSONALITIES > 2 case 2: ioctlent = ioctlent2; nioctlents = nioctlents2; printers = &printers2; break; # endif } current_personality = personality; # ifndef current_wordsize current_wordsize = personality_wordsize[personality]; # endif # ifndef current_klongsize current_klongsize = personality_klongsize[personality]; # endif } static void update_personality(struct tcb *tcp, unsigned int personality) { static bool need_mpers_warning[] = { false, !HAVE_PERSONALITY_1_MPERS, !HAVE_PERSONALITY_2_MPERS }; set_personality(personality); if (personality == tcp->currpers) return; tcp->currpers = personality; if (!is_number_in_set(QUIET_PERSONALITY, quiet_set)) { printleader(tcp); tprintf_string("[ Process PID=%d runs in %s mode. ]\n", tcp->pid, personality_names[personality]); line_ended(); } if (need_mpers_warning[personality]) { error_msg("WARNING: Proper structure decoding for this " "personality is not supported, please consider " "building strace with mpers support enabled."); need_mpers_warning[personality] = false; } # if defined(ENABLE_STACKTRACE) && !defined(USE_LIBUNWIND) if (stack_trace_enabled) { unwind_tcb_fin(tcp); unwind_tcb_init(tcp); } # endif } #endif /* SUPPORTED_PERSONALITIES > 1 */ #ifdef SYS_socket_subcall static void decode_socket_subcall(struct tcb *tcp) { const int call = tcp->u_arg[0]; if (call < 1 || call >= SYS_socket_nsubcalls) return; const kernel_ulong_t scno = SYS_socket_subcall + call; const unsigned int nargs = sysent[scno].nargs; uint64_t buf[nargs]; if (umoven(tcp, tcp->u_arg[1], nargs * current_wordsize, buf) < 0) return; tcp->scno = scno; tcp->qual_flg = qual_flags(scno); tcp->s_ent = &sysent[scno]; for (unsigned int i = 0; i < nargs; ++i) tcp->u_arg[i] = (sizeof(uint32_t) == current_wordsize) ? ((uint32_t *) (void *) buf)[i] : buf[i]; } #endif /* SYS_socket_subcall */ #ifdef SYS_ipc_subcall static void decode_ipc_subcall(struct tcb *tcp) { unsigned int call = tcp->u_arg[0]; const unsigned int version = call >> 16; if (version) { # if defined S390 || defined S390X return; # else # ifdef SPARC64 if (current_wordsize == 8) return; # endif set_tcb_priv_ulong(tcp, version); call &= 0xffff; # endif } switch (call) { case 1: case 2: case 3: case 4: case 11: case 12: case 13: case 14: case 21: case 22: case 23: case 24: break; default: return; } tcp->scno = SYS_ipc_subcall + call; tcp->qual_flg = qual_flags(tcp->scno); tcp->s_ent = &sysent[tcp->scno]; const unsigned int n = n_args(tcp); for (unsigned int i = 0; i < n; ++i) tcp->u_arg[i] = tcp->u_arg[i + 1]; } #endif /* SYS_ipc_subcall */ #ifdef SYS_syscall_subcall /* The implementation is architecture specific. */ static void decode_syscall_subcall(struct tcb *); #endif /* SYS_syscall_subcall */ static void dumpio(struct tcb *tcp) { int fd = tcp->u_arg[0]; if (fd < 0) return; if (is_number_in_set(fd, write_set)) { switch (tcp_sysent(tcp)->sen) { case SEN_write: case SEN_pwrite: case SEN_send: case SEN_sendto: case SEN_mq_timedsend_time32: case SEN_mq_timedsend_time64: dumpstr(tcp, tcp->u_arg[1], tcp->u_arg[2]); break; case SEN_writev: case SEN_pwritev: case SEN_pwritev2: case SEN_vmsplice: dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1], -1); break; case SEN_sendmsg: dumpiov_in_msghdr(tcp, tcp->u_arg[1], -1); break; case SEN_sendmmsg: dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]); break; } } if (syserror(tcp)) return; if (is_number_in_set(fd, read_set)) { switch (tcp_sysent(tcp)->sen) { case SEN_read: case SEN_pread: case SEN_recv: case SEN_recvfrom: case SEN_mq_timedreceive_time32: case SEN_mq_timedreceive_time64: dumpstr(tcp, tcp->u_arg[1], tcp->u_rval); return; case SEN_readv: case SEN_preadv: case SEN_preadv2: dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1], tcp->u_rval); return; case SEN_recvmsg: dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval); return; case SEN_recvmmsg: case SEN_recvmmsg_time32: case SEN_recvmmsg_time64: dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]); return; } } } static const char * err_name(uint64_t err) { return err < nerrnos ? errnoent[err] : NULL; } void print_err(int64_t err, bool negated) { const char *str = err_name(negated ? -err : err); if (!str || xlat_verbose(xlat_verbosity) != XLAT_STYLE_ABBREV) tprintf_string(negated ? "%" PRId64 : "%" PRIu64, err); if (!str || xlat_verbose(xlat_verbosity) == XLAT_STYLE_RAW) return; (xlat_verbose(xlat_verbosity) == XLAT_STYLE_ABBREV ? tprintf_string : tprintf_comment)("%s%s", negated ? "-" : "", str); } static void print_err_ret(kernel_ulong_t ret, unsigned long u_error) { const char *u_error_str = err_name(u_error); tprints_sysret_next("retval"); PRINT_VAL_D(ret); if (u_error_str) { tprints_sysret_next("error"); tprints_string(u_error_str); tprints_sysret_string("strerror", strerror(u_error)); } else { tprints_sysret_next("strerror"); tprintf_string("(errno %lu)", u_error); } } static long get_regs(struct tcb *); static int get_syscall_args(struct tcb *); static int get_syscall_result(struct tcb *); static void get_error(struct tcb *, bool); static void set_error(struct tcb *, unsigned long); static void set_success(struct tcb *, kernel_long_t); static int arch_get_scno(struct tcb *); static int arch_check_scno(struct tcb *); static int arch_set_scno(struct tcb *, kernel_ulong_t); static int arch_get_syscall_args(struct tcb *); static void arch_get_error(struct tcb *, bool); static int arch_set_error(struct tcb *); static int arch_set_success(struct tcb *); #if MAX_ARGS > 6 static void arch_get_syscall_args_extra(struct tcb *, unsigned int); #endif struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES]; static struct inject_opts * tcb_inject_opts(struct tcb *tcp) { return (scno_in_range(tcp->scno) && tcp->inject_vec[current_personality]) ? &tcp->inject_vec[current_personality][tcp->scno] : NULL; } static long tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo) { if (!tcp->inject_vec[current_personality]) { tcp->inject_vec[current_personality] = xarraydup(inject_vec[current_personality], nsyscalls, sizeof(**inject_vec)); } struct inject_opts *opts = tcb_inject_opts(tcp); if (!opts || opts->first == 0 || opts->last == 0) return 0; if (opts->last != INJECT_LAST_INF) --opts->last; --opts->first; if (opts->first != 0) return 0; opts->first = opts->step; if (!recovering(tcp)) { if (opts->data.flags & INJECT_F_SIGNAL) *signo = opts->data.signo; if (opts->data.flags & (INJECT_F_ERROR | INJECT_F_RETVAL)) { kernel_long_t scno = (opts->data.flags & INJECT_F_SYSCALL) ? (kernel_long_t) shuffle_scno(opts->data.scno) : -1; if (!arch_set_scno(tcp, scno)) { tcp->flags |= TCB_TAMPERED; if (scno != -1) tcp->flags |= TCB_TAMPERED_NO_FAIL; #if ARCH_NEEDS_SET_ERROR_FOR_SCNO_TAMPERING /* * So far it's just a workaround for hppa, * but let's pretend it could be used elsewhere. */ else { kernel_long_t rval = (opts->data.flags & INJECT_F_RETVAL) ? ENOSYS : retval_get(opts->data.rval_idx); tcp->u_error = 0; /* force reset */ set_error(tcp, rval); } #endif } } if (opts->data.flags & INJECT_F_POKE_ENTER) poke_tcb(tcp, opts->data.poke_idx, true); if (opts->data.flags & INJECT_F_POKE_EXIT) tcp->flags |= TCB_INJECT_POKE_EXIT; if (opts->data.flags & INJECT_F_DELAY_ENTER) delay_tcb(tcp, opts->data.delay_idx, true); if (opts->data.flags & INJECT_F_DELAY_EXIT) tcp->flags |= TCB_INJECT_DELAY_EXIT; } return 0; } static long tamper_with_syscall_exiting(struct tcb *tcp) { struct inject_opts *opts = tcb_inject_opts(tcp); if (!opts) return 0; if (inject_poke_exit(tcp)) poke_tcb(tcp, opts->data.poke_idx, false); if (inject_delay_exit(tcp)) delay_tcb(tcp, opts->data.delay_idx, false); if (!syscall_tampered(tcp)) return 0; if (!syserror(tcp) ^ !!syscall_tampered_nofail(tcp)) { error_msg("Failed to tamper with process %d: unexpectedly got" " %serror (return value %#" PRI_klx ", error %lu)", tcp->pid, syscall_tampered_nofail(tcp) ? "" : "no ", tcp->u_rval, tcp->u_error); return 1; } if (opts->data.flags & INJECT_F_RETVAL) set_success(tcp, retval_get(opts->data.rval_idx)); else set_error(tcp, retval_get(opts->data.rval_idx)); return 0; } /* * Returns: * 0: "ignore this ptrace stop", bail out silently. * 1: ok, decoded; call * syscall_entering_finish(tcp, syscall_entering_trace(tcp, ...)). * other: error; call syscall_entering_finish(tcp, res), where res is the value * returned. */ int syscall_entering_decode(struct tcb *tcp) { int res = get_scno(tcp); if (res == 0) return res; if (res != 1 || (res = get_syscall_args(tcp)) != 1) { printleader(tcp); tprints_arg_begin(tcp_sysent(tcp)->sys_name); /* * " " will be added later by the code which * detects ptrace errors. */ return res; } #ifdef SYS_syscall_subcall if (tcp_sysent(tcp)->sen == SEN_syscall) decode_syscall_subcall(tcp); #endif #if defined SYS_ipc_subcall \ || defined SYS_socket_subcall switch (tcp_sysent(tcp)->sen) { # ifdef SYS_ipc_subcall case SEN_ipc: decode_ipc_subcall(tcp); break; # endif # ifdef SYS_socket_subcall case SEN_socketcall: decode_socket_subcall(tcp); break; # endif } #endif return 1; } int syscall_entering_trace(struct tcb *tcp, unsigned int *sig) { if (hide_log(tcp)) { /* * Restrain from fault injection * while the tracee executes strace code. */ tcp->qual_flg &= ~QUAL_INJECT; switch (tcp_sysent(tcp)->sen) { case SEN_execve: case SEN_execveat: case SEN_execv: /* * First exec* syscall makes the log visible. */ tcp->flags &= ~TCB_HIDE_LOG; /* * Check whether this exec* syscall succeeds. */ tcp->flags |= TCB_CHECK_EXEC_SYSCALL; break; } } if (hide_log(tcp) || !traced(tcp) || ((tracing_paths || tracing_fds) && !pathtrace_match(tcp))) { tcp->flags |= TCB_FILTERED; return 0; } tcp->flags &= ~TCB_FILTERED; if (inject(tcp)) tamper_with_syscall_entering(tcp, sig); if (cflag == CFLAG_ONLY_STATS) { return 0; } #ifdef ENABLE_STACKTRACE if (stack_trace_enabled && !check_exec_syscall(tcp) && tcp_sysent(tcp)->sys_flags & STACKTRACE_CAPTURE_ON_ENTER) { unwind_tcb_capture(tcp); } #endif if (!is_complete_set(status_set, NUMBER_OF_STATUSES)) strace_open_memstream(tcp); printleader(tcp); tprints_arg_begin(tcp_sysent(tcp)->sys_name); int res = raw(tcp) ? printargs(tcp) : tcp_sysent(tcp)->sys_func(tcp); fflush(tcp->outf); return res; } void syscall_entering_finish(struct tcb *tcp, int res) { tcp->flags |= TCB_INSYSCALL; tcp->sys_func_rval = res; /* Measure the entrance time as late as possible to avoid errors. */ if ((Tflag || cflag) && !filtered(tcp)) clock_gettime(CLOCK_MONOTONIC, &tcp->etime); /* Start tracking system time */ if (cflag) { if (debug_flag) { struct timespec dt; ts_sub(&dt, &tcp->stime, &tcp->ltime); if (ts_nz(&dt)) debug_func_msg("pid %d: %.9f seconds of system " "time spent since the last " "syscall exit", tcp->pid, ts_float(&dt)); } tcp->ltime = tcp->stime; } } /* Returns: * 0: "bail out". * 1: ok. * -1: error in one of ptrace ops. * * If not 0, call syscall_exiting_trace(tcp, res), where res is the return * value. Anyway, call syscall_exiting_finish(tcp) then. */ int syscall_exiting_decode(struct tcb *tcp, struct timespec *pts) { /* Measure the exit time as early as possible to avoid errors. */ if ((Tflag || cflag) && !filtered(tcp)) clock_gettime(CLOCK_MONOTONIC, pts); if (tcp_sysent(tcp)->sys_flags & MEMORY_MAPPING_CHANGE) mmap_notify_report(tcp); if ((tcp_sysent(tcp)->sys_flags & COMM_CHANGE) && !syserror(tcp) && (tcp_sysent(tcp)->sen != SEN_prctl || tcp->u_arg[0] == PR_SET_NAME)) maybe_load_task_comm(tcp); if (filtered(tcp)) return 0; if (check_exec_syscall(tcp)) { /* The check failed, hide the log. */ tcp->flags |= TCB_HIDE_LOG; } #if SUPPORTED_PERSONALITIES > 1 update_personality(tcp, tcp->currpers); #endif return get_syscall_result(tcp); } void print_syscall_resume(struct tcb *tcp) { /* If not in -ff mode, and printing_tcp != tcp, * then the log currently does not end with output * of _our syscall entry_, but with something else. * We need to say which syscall's return is this. * * Forced reprinting via TCB_REPRINT is used only by * "strace -ff -oLOG test/threaded_execve" corner case. * It's the only case when -ff mode needs reprinting. */ if ((!output_separately && printing_tcp != tcp && !tcp->staged_output_data) || (tcp->flags & TCB_REPRINT)) { tcp->flags &= ~TCB_REPRINT; printleader(tcp); tprintf_string("<... %s resumed>", tcp_sysent(tcp)->sys_name); } printing_tcp = tcp; } static void print_injected_note(struct tcb *tcp) { if (syscall_tampered(tcp) && syscall_tampered_poked(tcp)) tprints_sysret_string("inject", "INJECTED: args, retval"); else if (syscall_tampered_poked(tcp)) tprints_sysret_string("inject", "INJECTED: args"); else if (syscall_tampered(tcp)) tprints_sysret_string("inject", "INJECTED"); if (syscall_tampered_delayed(tcp)) tprints_sysret_string("delay", "DELAYED"); } static void print_erestart(const char *err_short, const char *err_long) { tprints_sysret_next("retval"); tprint_sysret_pseudo_rval(); tprints_sysret_next("error"); tprints_string(err_short); tprints_sysret_string("strerror", err_long); } int syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res) { if (syscall_tampered(tcp) || inject_delay_exit(tcp) || inject_poke_exit(tcp)) tamper_with_syscall_exiting(tcp); if (cflag != CFLAG_ONLY_STATS) print_syscall_resume(tcp); tcp->s_prev_ent = NULL; if (res != 1) { /* There was an error in one of prior ptrace ops. */ bool status_filtering = !is_complete_set(status_set, NUMBER_OF_STATUSES); bool publish = status_filtering ? is_number_in_set(STATUS_UNAVAILABLE, status_set) : true; if (cflag && publish) count_syscall(tcp, ts); if (cflag != CFLAG_ONLY_STATS) { tprint_arg_end(); tprint_space(); tabto(); tprint_sysret_begin(); tprints_sysret_next("retval"); tprint_sysret_pseudo_rval(); tprints_sysret_next("return"); tprints_string(""); tprint_sysret_end(); tprint_newline(); if (status_filtering) strace_close_memstream(tcp, publish); line_ended(); } return res; } tcp->s_prev_ent = tcp->s_ent; int sys_res = 0; if (cflag != CFLAG_ONLY_STATS) { if (raw(tcp)) { /* sys_res = printargs(tcp); - but it's nop on sysexit */ } else { if (tcp->sys_func_rval & RVAL_DECODED) sys_res = tcp->sys_func_rval; else sys_res = tcp_sysent(tcp)->sys_func(tcp); } } if (!is_complete_set(status_set, NUMBER_OF_STATUSES)) { bool publish = syserror(tcp) && is_number_in_set(STATUS_FAILED, status_set); publish |= !syserror(tcp) && is_number_in_set(STATUS_SUCCESSFUL, status_set); if (cflag != CFLAG_ONLY_STATS) strace_close_memstream(tcp, publish); if (!publish) { if (cflag != CFLAG_ONLY_STATS) line_ended(); return 0; } } if (syscall_limit != -1) syscall_limit--; if (cflag) { count_syscall(tcp, ts); if (cflag == CFLAG_ONLY_STATS) return 0; } tprint_arg_end(); tprint_space(); tabto(); tprint_sysret_begin(); if (raw(tcp)) { if (tcp->u_error) { print_err_ret(tcp->u_rval, tcp->u_error); } else { tprints_sysret_next("retval"); PRINT_VAL_X(tcp->u_rval); } } else if (!(sys_res & RVAL_NONE) && tcp->u_error) { switch (tcp->u_error) { /* Blocked signals do not interrupt any syscalls. * In this case syscalls don't return ERESTARTfoo codes. * * Deadly signals set to SIG_DFL interrupt syscalls * and kill the process regardless of which of the codes below * is returned by the interrupted syscall. * In some cases, kernel forces a kernel-generated deadly * signal to be unblocked and set to SIG_DFL (and thus cause * death) if it is blocked or SIG_IGNed: for example, SIGSEGV * or SIGILL. (The alternative is to leave process spinning * forever on the faulty instruction - not useful). * * SIG_IGNed signals and non-deadly signals set to SIG_DFL * (for example, SIGCHLD, SIGWINCH) interrupt syscalls, * but kernel will always restart them. */ case ERESTARTSYS: /* Most common type of signal-interrupted syscall exit code. * The system call will be restarted with the same arguments * if SA_RESTART is set; otherwise, it will fail with EINTR. */ print_erestart("ERESTARTSYS", "To be restarted if SA_RESTART is set"); break; case ERESTARTNOINTR: /* Rare. For example, fork() returns this if interrupted. * SA_RESTART is ignored (assumed set): the restart is unconditional. */ print_erestart("ERESTARTNOINTR", "To be restarted"); break; case ERESTARTNOHAND: /* pause(), rt_sigsuspend() etc use this code. * SA_RESTART is ignored (assumed not set): * syscall won't restart (will return EINTR instead) * even after signal with SA_RESTART set. However, * after SIG_IGN or SIG_DFL signal it will restart * (thus the name "restart only if has no handler"). */ print_erestart("ERESTARTNOHAND", "To be restarted if no handler"); break; case ERESTART_RESTARTBLOCK: /* Syscalls like nanosleep(), poll() which can't be * restarted with their original arguments use this * code. Kernel will execute restart_syscall() instead, * which changes arguments before restarting syscall. * SA_RESTART is ignored (assumed not set) similarly * to ERESTARTNOHAND. (Kernel can't honor SA_RESTART * since restart data is saved in "restart block" * in task struct, and if signal handler uses a syscall * which in turn saves another such restart block, * old data is lost and restart becomes impossible) */ print_erestart("ERESTART_RESTARTBLOCK", "Interrupted by signal"); break; default: print_err_ret(tcp->u_rval, tcp->u_error); break; } } else { tprints_sysret_next("retval"); if (sys_res & RVAL_NONE) tprint_sysret_pseudo_rval(); else { switch (sys_res & RVAL_MASK) { case RVAL_HEX: #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG if (current_klongsize < sizeof(tcp->u_rval)) { PRINT_VAL_X((unsigned int) tcp->u_rval); } else #endif { PRINT_VAL_X(tcp->u_rval); } break; case RVAL_OCTAL: { unsigned long long mode = zero_extend_signed_to_ull(tcp->u_rval); #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG if (current_klongsize < sizeof(tcp->u_rval)) mode = (unsigned int) mode; #endif print_numeric_ll_umode_t(mode); break; } case RVAL_UDECIMAL: #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG if (current_klongsize < sizeof(tcp->u_rval)) { PRINT_VAL_U((unsigned int) tcp->u_rval); } else #endif { PRINT_VAL_U(tcp->u_rval); } break; case RVAL_FD: /* * printfd accepts int as fd and it makes * little sense to pass negative fds to it. */ if ((current_klongsize < sizeof(tcp->u_rval)) || ((kernel_ulong_t) tcp->u_rval <= INT_MAX)) { printfd(tcp, tcp->u_rval); } else { PRINT_VAL_D(tcp->u_rval); } break; case RVAL_TID: case RVAL_SID: case RVAL_TGID: case RVAL_PGID: { #define _(_t) [RVAL_##_t - RVAL_TID] = PT_##_t static const enum pid_type types[] = { _(TID), _(SID), _(TGID), _(PGID), }; #undef _ printpid(tcp, tcp->u_rval, types[(sys_res & RVAL_MASK) - RVAL_TID]); break; } default: tprint_sysret_pseudo_rval(); error_msg("invalid rval format"); break; } } } if ((sys_res & RVAL_STR) && tcp->auxstr) tprints_sysret_string("retstr", tcp->auxstr); print_injected_note(tcp); if (Tflag) { tprints_sysret_next("time"); tprint_associated_info_begin(); ts_sub(ts, ts, &tcp->etime); PRINT_VAL_D(ts->tv_sec); if (Tflag_width) { tprintf_string(".%0*ld", Tflag_width, (long) ts->tv_nsec / Tflag_scale); } tprint_associated_info_end(); } tprint_sysret_end(); tprint_newline(); dumpio(tcp); line_ended(); #ifdef ENABLE_STACKTRACE if (stack_trace_enabled) unwind_tcb_print(tcp); #endif return 0; } void syscall_exiting_finish(struct tcb *tcp) { tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED | TCB_INJECT_DELAY_EXIT | TCB_INJECT_POKE_EXIT | TCB_TAMPERED_DELAYED | TCB_TAMPERED_POKED); tcp->sys_func_rval = 0; free_tcb_priv_data(tcp); #ifdef ENABLE_SECONTEXT tcp->last_dirfd = AT_FDCWD; #endif if (cflag) tcp->ltime = tcp->stime; } bool is_erestart(struct tcb *tcp) { switch (tcp->u_error) { case ERESTARTSYS: case ERESTARTNOINTR: case ERESTARTNOHAND: case ERESTART_RESTARTBLOCK: return true; default: return false; } } static unsigned long saved_u_error; void temporarily_clear_syserror(struct tcb *tcp) { saved_u_error = tcp->u_error; tcp->u_error = 0; } void restore_cleared_syserror(struct tcb *tcp) { tcp->u_error = saved_u_error; } static struct_ptrace_syscall_info ptrace_sci; static bool ptrace_syscall_info_is_valid(void) { return ptrace_get_syscall_info_supported && ptrace_sci.op <= PTRACE_SYSCALL_INFO_SECCOMP; } #define XLAT_MACROS_ONLY #include "xlat/nt_descriptor_types.h" #undef XLAT_MACROS_ONLY #define ARCH_MIGHT_USE_SET_REGS 1 #include "arch_regs.c" #if HAVE_ARCH_GETRVAL2 # include "arch_getrval2.c" #endif #include "getregs_old.h" #ifdef HAVE_GETREGS_OLD /* Either getregs_old() or set_regs() */ # undef ARCH_MIGHT_USE_SET_REGS # define ARCH_MIGHT_USE_SET_REGS 0 #endif #undef ptrace_getregset_or_getregs #undef ptrace_setregset_or_setregs #ifdef ARCH_REGS_FOR_GETREGSET # define ptrace_getregset_or_getregs ptrace_getregset static long ptrace_getregset(pid_t pid) { # ifdef ARCH_IOVEC_FOR_GETREGSET /* variable iovec */ ARCH_IOVEC_FOR_GETREGSET.iov_len = sizeof(ARCH_REGS_FOR_GETREGSET); return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &ARCH_IOVEC_FOR_GETREGSET); # else /* constant iovec */ static struct iovec io = { .iov_base = &ARCH_REGS_FOR_GETREGSET, .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET) }; return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &io); # endif } # if ARCH_MIGHT_USE_SET_REGS # define ptrace_setregset_or_setregs ptrace_setregset static int ptrace_setregset(pid_t pid) { # ifdef ARCH_IOVEC_FOR_GETREGSET /* variable iovec */ return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &ARCH_IOVEC_FOR_GETREGSET); # else /* constant iovec */ static struct iovec io = { .iov_base = &ARCH_REGS_FOR_GETREGSET, .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET) }; return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &io); # endif } # endif /* ARCH_MIGHT_USE_SET_REGS */ #elif defined ARCH_REGS_FOR_GETREGS # define ptrace_getregset_or_getregs ptrace_getregs static long ptrace_getregs(pid_t pid) { # if defined SPARC || defined SPARC64 /* SPARC systems have the meaning of data and addr reversed */ return ptrace(PTRACE_GETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0); # else return ptrace(PTRACE_GETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS); # endif } # if ARCH_MIGHT_USE_SET_REGS # define ptrace_setregset_or_setregs ptrace_setregs static int ptrace_setregs(pid_t pid) { # if defined SPARC || defined SPARC64 /* SPARC systems have the meaning of data and addr reversed */ return ptrace(PTRACE_SETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0); # else return ptrace(PTRACE_SETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS); # endif } # endif /* ARCH_MIGHT_USE_SET_REGS */ #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */ static long get_regs_error = -1; void clear_regs(struct tcb *tcp) { ptrace_sci.op = 0xff; get_regs_error = -1; } static long get_regs(struct tcb *const tcp) { #ifdef ptrace_getregset_or_getregs if (get_regs_error != -1) return get_regs_error; # ifdef HAVE_GETREGS_OLD /* * Try PTRACE_GETREGSET/PTRACE_GETREGS first, * fallback to getregs_old. */ static int use_getregs_old; if (use_getregs_old < 0) { return get_regs_error = ptrace_getregset_or_getregs(tcp->pid); } else if (use_getregs_old == 0) { get_regs_error = ptrace_getregset_or_getregs(tcp->pid); if (get_regs_error >= 0) { use_getregs_old = -1; return get_regs_error; } if (errno == EPERM || errno == ESRCH) return get_regs_error; use_getregs_old = 1; } return get_regs_error = getregs_old(tcp); # else /* !HAVE_GETREGS_OLD */ /* Assume that PTRACE_GETREGSET/PTRACE_GETREGS works. */ get_regs_error = ptrace_getregset_or_getregs(tcp->pid); # if defined ARCH_PERSONALITY_0_IOV_SIZE if (get_regs_error) return get_regs_error; switch (ARCH_IOVEC_FOR_GETREGSET.iov_len) { case ARCH_PERSONALITY_0_IOV_SIZE: update_personality(tcp, 0); break; case ARCH_PERSONALITY_1_IOV_SIZE: update_personality(tcp, 1); break; default: { static bool printed = false; if (!printed) { error_msg("Unsupported regset size returned by " "PTRACE_GETREGSET: %zu", ARCH_IOVEC_FOR_GETREGSET.iov_len); printed = true; } update_personality(tcp, 0); } } # endif /* ARCH_PERSONALITY_0_IOV_SIZE */ return get_regs_error; # endif /* !HAVE_GETREGS_OLD */ #else /* !ptrace_getregset_or_getregs */ # warning get_regs is not implemented for this architecture yet return 0; #endif /* !ptrace_getregset_or_getregs */ } #ifdef ptrace_setregset_or_setregs static int set_regs(pid_t pid) { return ptrace_setregset_or_setregs(pid); } #endif /* ptrace_setregset_or_setregs */ struct sysent_buf { struct tcb *tcp; struct_sysent ent; char buf[sizeof("syscall_0x") + sizeof(kernel_ulong_t) * 2]; }; static void free_sysent_buf(void *ptr) { struct sysent_buf *s = ptr; s->tcp->s_prev_ent = s->tcp->s_ent = NULL; free(ptr); } static bool strace_get_syscall_info(struct tcb *tcp) { /* * ptrace_get_syscall_info_supported should have been checked * by the caller. */ if (ptrace_sci.op == 0xff) { const size_t size = sizeof(ptrace_sci); if (ptrace(PTRACE_GET_SYSCALL_INFO, tcp->pid, (void *) size, &ptrace_sci) < 0) { get_regs_error = -2; return false; } #if SUPPORTED_PERSONALITIES > 1 int newpers = get_personality_from_syscall_info(&ptrace_sci); if (newpers >= 0) update_personality(tcp, newpers); #endif } if (entering(tcp)) { if (ptrace_sci.op == PTRACE_SYSCALL_INFO_EXIT) { error_msg("pid %d: entering" ", ptrace_syscall_info.op == %u", tcp->pid, ptrace_sci.op); /* TODO: handle this. */ } } else { if (ptrace_sci.op == PTRACE_SYSCALL_INFO_ENTRY) { error_msg("pid %d: exiting" ", ptrace_syscall_info.op == %u", tcp->pid, ptrace_sci.op); /* TODO: handle this. */ } } return true; } bool get_instruction_pointer(struct tcb *tcp, kernel_ulong_t *ip) { if (get_regs_error < -1) return false; if (ptrace_get_syscall_info_supported) { if (!strace_get_syscall_info(tcp)) return false; *ip = (kernel_ulong_t) ptrace_sci.instruction_pointer; return true; } #if defined ARCH_PC_REG if (get_regs(tcp) < 0) return false; *ip = (kernel_ulong_t) ARCH_PC_REG; return true; #elif defined ARCH_PC_PEEK_ADDR if (upeek(tcp, ARCH_PC_PEEK_ADDR, ip) < 0) return false; return true; #else # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined #endif } bool get_stack_pointer(struct tcb *tcp, kernel_ulong_t *sp) { if (get_regs_error < -1) return false; if (ptrace_get_syscall_info_supported) { if (!strace_get_syscall_info(tcp)) return false; *sp = (kernel_ulong_t) ptrace_sci.stack_pointer; return true; } #if defined ARCH_SP_REG if (get_regs(tcp) < 0) return false; *sp = (kernel_ulong_t) ARCH_SP_REG; return true; #elif defined ARCH_SP_PEEK_ADDR if (upeek(tcp, ARCH_SP_PEEK_ADDR, sp) < 0) return false; return true; #else return false; #endif } static int get_syscall_regs(struct tcb *tcp) { if (get_regs_error != -1) return get_regs_error; if (ptrace_get_syscall_info_supported) return strace_get_syscall_info(tcp) ? 0 : get_regs_error; return get_regs(tcp); } const struct_sysent stub_sysent = { .nargs = MAX_ARGS, .sys_flags = MEMORY_MAPPING_CHANGE, .sen = SEN_printargs, .sys_func = printargs, .sys_name = "???", }; /* * Returns: * 0: "ignore this ptrace stop", syscall_entering_decode() should return a "bail * out silently" code. * 1: ok, continue in syscall_entering_decode(). * other: error, syscall_entering_decode() should print error indicator * ("???" etc) and return an appropriate code. */ int get_scno(struct tcb *tcp) { tcp->scno = -1; tcp->s_ent = NULL; tcp->qual_flg = QUAL_RAW | DEFAULT_QUAL_FLAGS; if (get_syscall_regs(tcp) < 0) return -1; if (ptrace_syscall_info_is_valid()) { /* Apply arch-specific workarounds. */ int rc = arch_check_scno(tcp); if (rc != 1) return rc; tcp->scno = ptrace_sci.entry.nr; } else { int rc = arch_get_scno(tcp); if (rc != 1) return rc; } tcp->true_scno = tcp->scno; tcp->scno = shuffle_scno(tcp->scno); if (scno_is_valid(tcp->scno)) { tcp->s_ent = &sysent[tcp->scno]; tcp->qual_flg = qual_flags(tcp->scno); } else { struct sysent_buf *s = xzalloc(sizeof(*s)); s->tcp = tcp; s->ent = stub_sysent; s->ent.sys_name = s->buf; xsprintf(s->buf, "syscall_%#" PRI_klx, shuffle_scno(tcp->scno)); tcp->s_ent = &s->ent; set_tcb_priv_data(tcp, s, free_sysent_buf); debug_msg("pid %d invalid syscall %#" PRI_klx, tcp->pid, shuffle_scno(tcp->scno)); } /* * We refrain from argument decoding during recovering * as tracee memory mappings has changed and the registers * are very likely pointing to garbage already. */ if (recovering(tcp)) tcp->qual_flg |= QUAL_RAW; return 1; } static int get_syscall_args(struct tcb *tcp) { if (ptrace_syscall_info_is_valid()) { const unsigned int n = MIN(ARRAY_SIZE(tcp->u_arg), ARRAY_SIZE(ptrace_sci.entry.args)); for (unsigned int i = 0; i < n; ++i) tcp->u_arg[i] = ptrace_sci.entry.args[i]; #if SUPPORTED_PERSONALITIES > 1 if (tcp_sysent(tcp)->sys_flags & COMPAT_SYSCALL_TYPES) { for (unsigned int i = 0; i < n; ++i) tcp->u_arg[i] = (uint32_t) tcp->u_arg[i]; } #endif /* * So far it's just a workaround for mips o32, * but let's pretend it could be used elsewhere. */ #if MAX_ARGS > 6 arch_get_syscall_args_extra(tcp, n); #endif return 1; } return arch_get_syscall_args(tcp); } #ifdef ptrace_getregset_or_getregs # define get_syscall_result_regs get_syscall_regs #else static int get_syscall_result_regs(struct tcb *); #endif /* Returns: * 1: ok, continue in syscall_exiting_trace(). * -1: error, syscall_exiting_trace() should print error indicator * ("???" etc) and bail out. */ static int get_syscall_result(struct tcb *tcp) { if (get_syscall_result_regs(tcp) < 0) return -1; get_error(tcp, (!(tcp_sysent(tcp)->sys_flags & SYSCALL_NEVER_FAILS) || syscall_tampered(tcp)) && !syscall_tampered_nofail(tcp)); return 1; } static void get_error(struct tcb *tcp, const bool check_errno) { if (ptrace_syscall_info_is_valid()) { if (ptrace_sci.exit.is_error) { tcp->u_rval = -1; tcp->u_error = -ptrace_sci.exit.rval; } else { tcp->u_error = 0; tcp->u_rval = ptrace_sci.exit.rval; } } else { tcp->u_error = 0; arch_get_error(tcp, check_errno); } } static void set_error(struct tcb *tcp, unsigned long new_error) { const unsigned long old_error = tcp->u_error; if (new_error == old_error || new_error > MAX_ERRNO_VALUE) return; #ifdef ptrace_setregset_or_setregs /* if we are going to invoke set_regs, call get_regs first */ if (get_regs(tcp) < 0) return; #endif tcp->u_error = new_error; if (arch_set_error(tcp)) { tcp->u_error = old_error; /* arch_set_error does not update u_rval */ } else { if (ptrace_syscall_info_is_valid()) tcp->u_rval = -1; else get_error(tcp, !(tcp_sysent(tcp)->sys_flags & SYSCALL_NEVER_FAILS)); } } static void set_success(struct tcb *tcp, kernel_long_t new_rval) { const kernel_long_t old_rval = tcp->u_rval; #ifdef ptrace_setregset_or_setregs /* if we are going to invoke set_regs, call get_regs first */ if (get_regs(tcp) < 0) return; #endif tcp->u_rval = new_rval; if (arch_set_success(tcp)) { tcp->u_rval = old_rval; /* arch_set_success does not update u_error */ } else { if (ptrace_syscall_info_is_valid()) tcp->u_error = 0; else get_error(tcp, !(tcp_sysent(tcp)->sys_flags & SYSCALL_NEVER_FAILS)); } } #include "get_scno.c" #include "check_scno.c" #include "set_scno.c" #include "get_syscall_args.c" #ifndef ptrace_getregset_or_getregs # include "get_syscall_result.c" #endif #include "get_error.c" #include "set_error.c" #ifdef HAVE_GETREGS_OLD # include "getregs_old.c" #endif #include "shuffle_scno.c"