From 514540cd9e2601e80ccac5202077bb423b987e60 Mon Sep 17 00:00:00 2001 From: Eugene Syromyatnikov Date: Fri, 8 Jul 2022 12:25:21 +0200 Subject: [wip] Add support for wildcard path filters Resolves: https://github.com/strace/strace/issues/117 --- doc/strace.1.in | 46 ++++++++++++++++++++++++++++++++++++++++++++++ src/defs.h | 32 +++++++++++++++++++++++++++++--- src/pathtrace.c | 32 ++++++++++++++++++++++++++++---- src/strace.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------ src/util.c | 40 ++++++++++++++++++++++++++++------------ 5 files changed, 179 insertions(+), 25 deletions(-) diff --git a/doc/strace.1.in b/doc/strace.1.in index fdc7f11ee..8b92bedf7 100644 --- a/doc/strace.1.in +++ b/doc/strace.1.in @@ -753,11 +753,57 @@ with path provided in and the path .I is not unlinked from the file descriptor. +.TQ +.BR str +Indicates that +.I path_filter +is supposed to be interpreted as a literal string. +.TQ +.BR glob +Indicates that +.I path_filter +is supposed to be interpreted as a wildcard pattern +(as defined in +.BR glob (7) +manual page). +.TQ +.BR glob\-path +Indicates that +.I path_filter +is supposed to be interpreted as a wildcard pattern +with wildcards matching parts of the path and not only file name +(see the description of +.B FNM_PATHNAME +flag in the +.BR fnmatch (3) +manual page). +.TQ +.BR glob\-all +Indicates that +.I path_filter +is supposed to be interpreted as a wildcard pattern +with wildcards matching the leading dots of the file names +(see the description of +.B FNM_PERIOD +flag in the +.BR fnmatch (3) +manual page). +.TQ +.BR glob\-all\-path ", " glob\-path\-all +Indicates that +.I path_filter +is supposed to be interpreted as a wildcard pattern +with wildcards matching parts of the path and the leading dots. .RE .IP If neither of .BR pathstr ", " fd-deleted ", or " fd-not-deleted qualifiers has been provided, all three are assumed. +If neither of +.BR str ", " glob ", " glob\-all ", " glob\-path ", or " glob\-all-path +qualifiers has been provided, +.B str +is assumed. .IP .I path_filter is a byte string in which the following escape sequences are recognised: diff --git a/src/defs.h b/src/defs.h index e42a00730..18211de7e 100644 --- a/src/defs.h +++ b/src/defs.h @@ -514,21 +514,41 @@ extern int Tflag_width; extern bool iflag; extern bool count_wallclock; +enum { + PTF_TYPE_BITS = 2, + PTF_TYPE_MASK = MASK32(PTF_TYPE_BITS), +}; + +#define ptf_type(flags_) ((flags_) & PTF_TYPE_MASK) + enum path_trace_flag_bits { - PTF_PATH_STR_BIT, + PTF_PATH_STR_BIT = PTF_TYPE_BITS, PTF_FD_NOT_DELETED_BIT, PTF_FD_DELETED_BIT, + + PTF_GLOB_PATH_BIT, + PTF_GLOB_ALL_BIT, }; enum path_trace_flags { + /* filter types */ + PTF_TYPE_STR = 1, + PTF_TYPE_GLOB, + + /* filter options */ FLAG(PTF_PATH_STR), FLAG(PTF_FD_NOT_DELETED), FLAG(PTF_FD_DELETED), + + /* glob options */ + FLAG(PTF_GLOB_PATH), + FLAG(PTF_GLOB_ALL), }; struct path_set_item { const char *path; enum path_trace_flags flags; + int fnmatch_flags; }; /* are we filtering traces based on paths? */ @@ -865,6 +885,11 @@ str_strip_prefix_len(const char *str, const char *prefix, size_t prefix_len) */ extern int strnncmp(const char *s1, const char *s2, size_t n1, size_t n2); +enum string_unescape_style { + SUE_LITERAL, + SUE_GLOB, +}; + /** * Un-escapes a string that is escaped using the following rules: * * A backslash followed by '\', 'a', 'b', 'f', 'n', 'r', 't', 'v', @@ -894,8 +919,9 @@ extern int strnncmp(const char *s1, const char *s2, size_t n1, size_t n2); * the instr). */ extern int string_unescape(const char *instr, char *outstr, - const unsigned int size, const char *escape_chars, - unsigned int *outsize); + const unsigned int size, + const enum string_unescape_style, + const char *escape_chars, unsigned int *outsize); /** String is '\0'-terminated. */ # define QUOTE_0_TERMINATED 0x01 diff --git a/src/pathtrace.c b/src/pathtrace.c index 6836816ce..28ae5bdda 100644 --- a/src/pathtrace.c +++ b/src/pathtrace.c @@ -8,6 +8,7 @@ */ #include "defs.h" +#include #include #include #include @@ -30,8 +31,16 @@ static bool pathmatch(const char *path, struct path_set *set, enum path_trace_flags flags) { for (unsigned int i = 0; i < set->num_selected; ++i) { - if (strcmp(path, set->paths_selected[i].path)) - continue; + enum path_trace_flags type = + ptf_type(set->paths_selected[i].flags); + if (type == PTF_TYPE_STR) { + if (strcmp(path, set->paths_selected[i].path)) + continue; + } else if (type == PTF_TYPE_GLOB) { + if (fnmatch(set->paths_selected[i].path, path, + set->paths_selected[i].fnmatch_flags)) + continue; + } if (flags & set->paths_selected[i].flags & PTF_DELETED_MASK) return true; @@ -81,9 +90,24 @@ storepath(const char *path, enum path_trace_flags flags, struct path_set *set) xgrowarray(set->paths_selected, &set->size, sizeof(set->paths_selected[0])); + if (!(flags & PTF_DELETED_MASK)) + flags |= PTF_DELETED_MASK; + if (!ptf_type(flags)) + flags |= PTF_TYPE_STR; + + if (ptf_type(flags) == PTF_TYPE_GLOB) { + int fnmf = 0; + + if (!(flags & PTF_GLOB_PATH)) + fnmf |= FNM_PATHNAME; + if (!(flags & PTF_GLOB_ALL)) + fnmf |= FNM_PERIOD; + + set->paths_selected[set->num_selected].fnmatch_flags = fnmf; + } + set->paths_selected[set->num_selected].path = path; - set->paths_selected[set->num_selected].flags = - flags | (!(flags & PTF_DELETED_MASK) ? PTF_DELETED_MASK : 0); + set->paths_selected[set->num_selected].flags = flags; set->num_selected++; } diff --git a/src/strace.c b/src/strace.c index 0c295d6b2..a622431fa 100644 --- a/src/strace.c +++ b/src/strace.c @@ -341,7 +341,12 @@ Filtering:\n\ specify a path filter entry\n\ qualifiers: path-str, pathstr (PATH_STR is used as a string),\n\ fd-deleted (fd is unlinked from PATH_STR),\n\ - fd-not-deleted (fd that is linked to PATH_STR)\n\ + fd-not-deleted (fd that is linked to PATH_STR),\n\ + str (PATH_STR is a lieral string),\n\ + glob (PATH_STR is a glob expression),\n\ + glob-all (glob with wildcards matching leading dots),\n\ + glob-path (glob with wildcards matching slashes),\n\ + glob-all-path, glob-path-all (glob-all + glob-path)\n\ path_str: string with \\\\. \\:, \\a, \\b, \\f, \\n, \\r, \\t, \\v, \\0nnn, \\xnn\n\ escape sequences.\n\ -P PATH, --trace-path=PATH\n\ @@ -2162,10 +2167,25 @@ add_path_trace(struct pathtrace *pt, const char *path, /* Checks if a string is equal to some expected string literal */ #define CHECK_STR(str_, chk_, sz_) \ (!strnncmp((str_), (chk_), (sz_), sizeof(chk_) - 1)) +#define SET_PTF_TYPE(type_) \ + do { \ + if (ptf_type(flags) && (ptf_type(flags) != (type_))) { \ + error_msg_and_die("cannot set path filter type" \ + " to '%s' for '%s' since the type " \ + "'%s' is set already", \ + type_names[(type_)], optarg, \ + type_names[ptf_type(flags)]); \ + } \ + flags |= (type_); \ + } while (0) static void parse_path_filter_arg(struct pathtrace *pt, char *optarg) { + static const char *type_names[] = { + [PTF_TYPE_STR] = "string", + [PTF_TYPE_GLOB] = "glob", + }; char *arg = optarg; enum path_trace_flags flags = 0; @@ -2185,7 +2205,11 @@ parse_path_filter_arg(struct pathtrace *pt, char *optarg) pos += 1; - if (CHECK_STR(arg, "pathstr:", pos - arg)) { + if (CHECK_STR(arg, "str:", pos - arg)) { + SET_PTF_TYPE(PTF_TYPE_STR); + } else if (CHECK_STR(arg, "glob:", pos - arg)) { + SET_PTF_TYPE(PTF_TYPE_GLOB); + } else if (CHECK_STR(arg, "pathstr:", pos - arg)) { flags |= PTF_PATH_STR; } else if (CHECK_STR(arg, "path-str:", pos - arg)) { flags |= PTF_PATH_STR; @@ -2193,9 +2217,19 @@ parse_path_filter_arg(struct pathtrace *pt, char *optarg) flags |= PTF_FD_DELETED; } else if (CHECK_STR(arg, "fd-not-deleted:", pos - arg)) { flags |= PTF_FD_NOT_DELETED; + } else if (CHECK_STR(arg, "glob-path:", pos - arg)) { + SET_PTF_TYPE(PTF_TYPE_GLOB); + flags |= PTF_GLOB_PATH; + } else if (CHECK_STR(arg, "glob-all:", pos - arg)) { + SET_PTF_TYPE(PTF_TYPE_GLOB); + flags |= PTF_GLOB_ALL; + } else if (CHECK_STR(arg, "glob-path-all:", pos - arg) || + CHECK_STR(arg, "glob-all-path:", pos - arg)) { + SET_PTF_TYPE(PTF_TYPE_GLOB); + flags |= PTF_GLOB_ALL | PTF_GLOB_PATH; } else { error_msg_and_die("invalid path trace filter qualifier:" - " '%*s'", (int) (pos - arg), arg); + " '%.*s'", (int) (pos - arg), arg); } arg = pos; @@ -2203,14 +2237,22 @@ parse_path_filter_arg(struct pathtrace *pt, char *optarg) unsigned int argsz; size_t arglen = strlen(arg); - int ret = string_unescape(arg, arg, arglen, ":", &argsz); + int ret = string_unescape(arg, arg, arglen, + ptf_type(flags) == PTF_TYPE_GLOB + ? SUE_GLOB : SUE_LITERAL, + ":", &argsz); if (ret == INT_MIN) { error_msg_and_die("path trace filter argument is too big" " (size is %zu)", arglen); } else if (ret < 0) { - error_msg_and_die("invalid escaping: \\%c at position %d", - arg[-ret], -ret); + if (arg[-ret]) { + error_msg_and_die("invalid escaping: \\%c at position %d", + arg[-ret], -ret); + } else { + error_msg_and_die("backslash at the end of string" + " at position %d", -ret); + } } arglen = strlen(arg); diff --git a/src/util.c b/src/util.c index ee54be574..6be895951 100644 --- a/src/util.c +++ b/src/util.c @@ -855,8 +855,10 @@ strnncmp(const char *s1, const char *s2, size_t n1, size_t n2) int string_unescape(const char *instr, char *outstr, const unsigned int size, + const enum string_unescape_style style, const char *escape_chars, unsigned int *outsize) { + enum { UNINT_CHAR = 256 }; const char *inpos = instr; char *outpos = outstr; enum { @@ -867,6 +869,7 @@ string_unescape(const char *instr, char *outstr, const unsigned int size, } state = NORMAL; size_t cnt = 0; size_t val = 0; + uint16_t outchr; if ((size > INT_MAX) || ((uintptr_t) instr + size < (uintptr_t) instr)) return INT_MIN; @@ -885,27 +888,38 @@ string_unescape(const char *instr, char *outstr, const unsigned int size, break; case ESC: + outchr = UNINT_CHAR; switch (*inpos) { - case '\\': *outpos++ = '\\'; break; - case 'a': *outpos++ = '\a'; break; - case 'b': *outpos++ = '\b'; break; - case 'f': *outpos++ = '\f'; break; - case 'n': *outpos++ = '\n'; break; - case 'r': *outpos++ = '\r'; break; - case 't': *outpos++ = '\t'; break; - case 'v': *outpos++ = '\v'; break; + case '\\': outchr = '\\'; break; + case 'a': outchr = '\a'; break; + case 'b': outchr = '\b'; break; + case 'f': outchr = '\f'; break; + case 'n': outchr = '\n'; break; + case 'r': outchr = '\r'; break; + case 't': outchr = '\t'; break; + case 'v': outchr = '\v'; break; case '0': state = ESC_OCT; cnt = val = 0; break; case 'x': state = ESC_HEX; cnt = val = 0; break; default: - if (strchr(escape_chars, *inpos)) - *outpos++ = *inpos; + if ((style == SUE_GLOB) || + strchr(escape_chars, *inpos)) + outchr = *inpos; else return -(inpos - instr); } + + if (outchr != UNINT_CHAR) { + if (style == SUE_GLOB) + *outpos++ = '\\'; + *outpos++ = outchr; + } + break; case ESC_OCT: if (cnt >= 3 || *inpos < '0' || *inpos > '7') { + if (style == SUE_GLOB) + *outpos++ = '\\'; *outpos++ = val; state = NORMAL; inpos -= 1; @@ -928,6 +942,8 @@ string_unescape(const char *instr, char *outstr, const unsigned int size, state = NORMAL; if (state == NORMAL) { + if (style == SUE_GLOB) + *outpos++ = '\\'; *outpos++ = val; inpos -= 1; } else { @@ -940,12 +956,12 @@ string_unescape(const char *instr, char *outstr, const unsigned int size, inpos += 1; } out: - if (outpos - outstr < size) + if ((size_t) (outpos - outstr) < size) *outpos = '\0'; if (outsize) *outsize = outpos - outstr; - return inpos - instr < size ? inpos - instr : 0; + return (size_t) (inpos - instr) < size ? inpos - instr : 0; } /* -- cgit v1.2.1