diff options
Diffstat (limited to 'lib/glob')
-rw-r--r-- | lib/glob/collsyms.h | 129 | ||||
-rw-r--r-- | lib/glob/fnmatch.c | 816 | ||||
-rw-r--r-- | lib/glob/fnmatch.h | 22 | ||||
-rw-r--r-- | lib/glob/glob.c | 285 | ||||
-rw-r--r-- | lib/glob/glob.h | 1 |
5 files changed, 1002 insertions, 251 deletions
diff --git a/lib/glob/collsyms.h b/lib/glob/collsyms.h new file mode 100644 index 00000000..4f90083c --- /dev/null +++ b/lib/glob/collsyms.h @@ -0,0 +1,129 @@ +/* collsyms.h -- collating symbol names and their corresponding characters + (in ascii) as given by POSIX.2 in table 2.8. */ + +/* Copyright (C) 1997 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + Bash is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Bash; see the file COPYING. If not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _COLLSYMS_H_ +# define _COLLSYSMS_H_ + +/* The upper-case letters, lower-case letters, and digits are omitted from + this table. The digits are not included in the table in the POSIX.2 + spec. The upper and lower case letters are translated by the code + in fnmatch.c:collsym(). */ + +typedef struct _collsym { + char *name; + char code; +} COLLSYM; + +static COLLSYM posix_collsyms[] = +{ + "NUL", '\0', + "SOH", '\001', + "STX", '\002', + "ETX", '\003', + "EOT", '\004', + "ENQ", '\005', + "ACK", '\006', +#ifdef __STDC__ + "alert", '\a', +#else + "alert", '\007', +#endif + "backspace", '\b', + "tab", '\t', + "newline", '\n', + "vertical-tab", '\v', + "form-feed", '\f', + "carriage-return", '\r', + "SO", '\016', + "SI", '\017', + "DLE", '\020', + "DC1", '\021', + "DC2", '\022', + "DC3", '\023', + "DC4", '\024', + "NAK", '\025', + "SYN", '\026', + "ETB", '\027', + "CAN", '\030', + "EM", '\031', + "SUB", '\032', + "ESC", '\033', + "IS4", '\034', + "IS3", '\035', + "IS2", '\036', + "IS1", '\037', + "space", ' ', + "exclamation-mark", '!', + "quotation-mark", '"', + "number-sign", '#', + "dollar-sign", '$', + "percent-sign", '%', + "ampersand", '&', + "apostrophe", '\'', + "left-parenthesis", '(', + "right-parenthesis", ')', + "asterisk", '*', + "plus-sign", '+', + "comma", ',', + "hyphen", '-', + "minus", '-', /* extension from POSIX.2 */ + "dash", '-', /* extension from POSIX.2 */ + "period", '.', + "slash", '/', + "solidus", '/', /* extension from POSIX.2 */ + "zero", '0', + "one", '1', + "two", '2', + "three", '3', + "four", '4', + "five", '5', + "six", '6', + "seven", '7', + "eight", '8', + "nine", '9', + "colon", ':', + "semicolon", ';', + "less-than-sign", '<', + "equals-sign", '=', + "greater-than-sign", '>', + "question-mark", '?', + "commercial-at", '@', + /* upper-case letters omitted */ + "left-square-bracket",'[', + "backslash", '\\', + "reverse-solidus", '\\', + "right-square-bracket", ']', + "circumflex", '^', + "circumflex-accent", '^', /* extension from POSIX.2 */ + "underscore", '_', + "grave-accent", '`', + /* lower-case letters omitted */ + "left-brace", '{', /* extension from POSIX.2 */ + "left-curly-bracket", '{', + "vertical-line", '|', + "right-brace", '}', /* extension from POSIX.2 */ + "right-curly-bracket", '}', + "tilde", '~', + "DEL", '\177', + 0, 0, +}; + +#endif diff --git a/lib/glob/fnmatch.c b/lib/glob/fnmatch.c index 2f8eb240..b5fdcc16 100644 --- a/lib/glob/fnmatch.c +++ b/lib/glob/fnmatch.c @@ -1,238 +1,738 @@ -/* Copyright (C) 1991 Free Software Foundation, Inc. -This file is part of the GNU C Library. +/* fnmatch.c -- ksh-like extended pattern matching for the shell and filename + globbing. */ + +/* Copyright (C) 1991, 1997 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + Bash is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Bash; see the file COPYING. If not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#include <config.h> + +#include "fnmatch.h" +#include "collsyms.h" +#include <ctype.h> -The GNU C Library is free software; you can redistribute it and/or -modify it under the terms of the GNU Library General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. +static int gmatch (); +static char *brackmatch (); +#ifdef EXTENDED_GLOB +static int extmatch (); +#endif + +#if !defined (isascii) +# define isascii(c) ((unsigned int)(c) <= 0177) +#endif -The GNU C Library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. +/* Note that these evaluate C many times. */ -You should have received a copy of the GNU Library General Public -License along with the GNU C Library; see the file COPYING.LIB. If -not, write to the Free Software Foundation, Inc., 675 Mass Ave, -Cambridge, MA 02139, USA. */ +#define ISUPPER(c) (isascii (c) && isupper (c)) +#define ISLOWER(c) (isascii (c) && islower (c)) -#include <errno.h> -#include "fnmatch.h" +#ifndef isblank +# define isblank(c) ((c) == ' ' || (c) == '\t') +#endif -#if !defined (__GNU_LIBRARY__) && !defined (STDC_HEADERS) -# if !defined (errno) -extern int errno; -# endif /* !errno */ +#ifndef isgraph +# define isgraph(c) ((c) != ' ' && isprint((c))) #endif -/* Match STRING against the filename pattern PATTERN, returning zero if - it matches, FNM_NOMATCH if not. */ +#ifndef isxdigit +# define isxdigit(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F')) +#endif + +# define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) + +#ifndef STREQ +#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) +#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) +#endif + +#if defined (HAVE_STRCOLL) +static int rangecmp (c1, c2) + int c1, c2; +{ + static char s1[2] = { ' ', '\0' }; + static char s2[2] = { ' ', '\0' }; + int ret; + + /* Eight bits only. Period. */ + c1 &= 0xFF; + c2 &= 0xFF; + + if (c1 == c2) + return (0); + + s1[0] = c1; + s2[0] = c2; + + if ((ret = strcoll (s1, s2)) != 0) + return ret; + return (c1 - c2); +} +#else /* !HAVE_STRCOLL */ +# define rangecmp(c1, c2) ((c1) - (c2)) +#endif /* !HAVE_STRCOLL */ + +#if defined (HAVE_STRCOLL) +static int collequiv (c1, c2) + int c1, c2; +{ + return (rangecmp (c1, c2) == 0); +} +#else +# define collequiv(c1, c2) ((c1) == (c2)) +#endif + +static int +collsym (s, len) + char *s; + int len; +{ + register struct _collsym *csp; + + for (csp = posix_collsyms; csp->name; csp++) + { + if (STREQN(csp->name, s, len) && csp->name[len] == '\0') + return (csp->code); + } + if (len == 1) + return s[0]; + return -1; +} + int fnmatch (pattern, string, flags) char *pattern; char *string; int flags; { - register char *p = pattern, *n = string; - register char c; + char *se, *pe; - if ((flags & ~__FNM_FLAGS) != 0) - { - errno = EINVAL; - return (-1); - } + if (string == 0 || pattern == 0) + return FNM_NOMATCH; + + se = string + strlen (string); + pe = pattern + strlen (pattern); + + return (gmatch (string, se, pattern, pe, flags)); +} + +/* Match STRING against the filename pattern PATTERN, returning zero if + it matches, FNM_NOMATCH if not. */ +static int +gmatch (string, se, pattern, pe, flags) + char *string, *se; + char *pattern, *pe; + int flags; +{ + register char *p, *n; /* pattern, string */ + register char c; /* current pattern character */ + register char sc; /* current string character */ - while ((c = *p++) != '\0') + p = pattern; + n = string; + + if (string == 0 || pattern == 0) + return FNM_NOMATCH; + + while (p < pe) { + c = *p++; + c = FOLD (c); + + sc = n < se ? *n : '\0'; + +#ifdef EXTENDED_GLOB + if ((flags & FNM_EXTMATCH) && *p == '(' && + (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */ + /* extmatch () will handle recursively calling gmatch, so we can + just return what extmatch() returns. */ + return (extmatch (c, n, se, p, pe, flags)); +#endif + switch (c) { - case '?': - if (*n == '\0') - return (FNM_NOMATCH); - else if ((flags & FNM_PATHNAME) && *n == '/') + case '?': /* Match single character */ + if (sc == '\0') + return FNM_NOMATCH; + else if ((flags & FNM_PATHNAME) && sc == '/') /* If we are matching a pathname, `?' can never match a `/'. */ - return (FNM_NOMATCH); - else if ((flags & FNM_PERIOD) && *n == '.' && + return FNM_NOMATCH; + else if ((flags & FNM_PERIOD) && sc == '.' && (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) /* `?' cannot match a `.' if it is the first character of the string or if it is the first character following a slash and we are matching a pathname. */ - return (FNM_NOMATCH); + return FNM_NOMATCH; break; - case '\\': - if (!(flags & FNM_NOESCAPE)) + case '\\': /* backslash escape removes special meaning */ + if (p == pe) + return FNM_NOMATCH; + + if ((flags & FNM_NOESCAPE) == 0) { c = *p++; - if (c == '\0') - return (FNM_NOMATCH); + /* A trailing `\' cannot match. */ + if (p > pe) + return FNM_NOMATCH; + c = FOLD (c); } - if (*n != c) - return (FNM_NOMATCH); + if (FOLD (sc) != c) + return FNM_NOMATCH; break; - case '*': - if ((flags & FNM_PERIOD) && *n == '.' && + case '*': /* Match zero or more characters */ + if (p == pe) + return 0; + + if ((flags & FNM_PERIOD) && sc == '.' && (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) /* `*' cannot match a `.' if it is the first character of the string or if it is the first character following a slash and we are matching a pathname. */ - return (FNM_NOMATCH); + return FNM_NOMATCH; /* Collapse multiple consecutive, `*' and `?', but make sure that one character of the string is consumed for each `?'. */ - for (c = *p++; c == '?' || c == '*'; c = *p++) + for (c = *p++; (c == '?' || c == '*'); c = *p++) { - if ((flags & FNM_PATHNAME) && *n == '/') + if ((flags & FNM_PATHNAME) && sc == '/') /* A slash does not match a wildcard under FNM_PATHNAME. */ - return (FNM_NOMATCH); + return FNM_NOMATCH; else if (c == '?') { - if (*n == '\0') - return (FNM_NOMATCH); + if (sc == '\0') + return FNM_NOMATCH; /* One character of the string is consumed in matching this ? wildcard, so *??? won't match if there are fewer than three characters. */ n++; + sc = n < se ? *n : '\0'; } + +#ifdef EXTENDED_GLOB + /* Handle ******(patlist) */ + if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(') /*)*/ + return (extmatch (c, n, se, p, pe, flags)); +#endif + if (p == pe) + break; } - if (c == '\0') + /* If we've hit the end of the pattern and the last character of + the pattern was handled by the loop above, we've succeeded. + Otherwise, we need to match that last character. */ + if (p == pe && (c == '?' || c == '*')) return (0); /* General case, use recursion. */ { - char c1 = (!(flags & FNM_NOESCAPE) && c == '\\') ? *p : c; - for (--p; *n != '\0'; ++n) + char c1; + + c1 = ((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c; + c1 = FOLD (c1); + for (--p; n < se; ++n) /* Only call fnmatch if the first character indicates a possible match. */ - if ((c == '[' || *n == c1) && - fnmatch (p, n, flags & ~FNM_PERIOD) == 0) + if ((c == '[' || FOLD (*n) == c1) && + gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0) return (0); - return (FNM_NOMATCH); + return FNM_NOMATCH; } case '[': { - /* Nonzero if the sense of the character class is inverted. */ - register int not; - - if (*n == '\0') - return (FNM_NOMATCH); + if (sc == '\0' || n == se) + return FNM_NOMATCH; /* A character class cannot match a `.' if it is the first character of the string or if it is the first character following a slash and we are matching a pathname. */ - if ((flags & FNM_PERIOD) && *n == '.' && + if ((flags & FNM_PERIOD) && sc == '.' && (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) return (FNM_NOMATCH); - /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that - is not preceded by a backslash and is not part of a bracket - expression produces undefined results.' This implementation - treats the `[' as just a character to be matched if there is - not a closing `]'. This code will have to be changed when - POSIX.2 character classes are implemented. */ - { - register char *np; - - for (np = p; np && *np && *np != ']'; np++) - ; - - if (np && !*np) - { - if (*n != '[') - return (FNM_NOMATCH); - break; - } - } - - not = (*p == '!' || *p == '^'); - if (not) - ++p; - - c = *p++; - for (;;) - { - register char cstart, cend; - - /* Initialize cstart and cend in case `-' is the last - character of the pattern. */ - cstart = cend = c; - - if (!(flags & FNM_NOESCAPE) && c == '\\') - { - if (*p == '\0') - return FNM_NOMATCH; - cstart = cend = *p++; - } - - if (c == '\0') - /* [ (unterminated) loses. */ - return (FNM_NOMATCH); - - c = *p++; - - if ((flags & FNM_PATHNAME) && c == '/') - /* [/] can never match. */ - return (FNM_NOMATCH); - - /* This introduces a range, unless the `-' is the last - character of the class. Find the end of the range - and move past it. */ - if (c == '-' && *p != ']') - { - cend = *p++; - if (!(flags & FNM_NOESCAPE) && cend == '\\') - cend = *p++; - if (cend == '\0') - return (FNM_NOMATCH); - - c = *p++; - } - - if (*n >= cstart && *n <= cend) - goto matched; - - if (c == ']') - break; - } - if (!not) - return (FNM_NOMATCH); - break; - - matched: - /* Skip the rest of the [...] that already matched. */ - while (c != ']') - { - if (c == '\0') - /* [... (unterminated) loses. */ - return (FNM_NOMATCH); - - c = *p++; - if (!(flags & FNM_NOESCAPE) && c == '\\') - { - if (*p == '\0') - return FNM_NOMATCH; - /* XXX 1003.2d11 is unclear if this is right. */ - ++p; - } - } - if (not) - return (FNM_NOMATCH); + p = brackmatch (p, sc, flags); + if (p == 0) + return FNM_NOMATCH; } break; default: - if (c != *n) + if (c != FOLD (sc)) return (FNM_NOMATCH); } ++n; } - if (*n == '\0') + if (n == se) return (0); + if ((flags & FNM_LEADING_DIR) && *n == '/') + /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ + return 0; + + return (FNM_NOMATCH); +} + +/* Parse a bracket expression collating symbol ([.sym.]) starting at P, find + the value of the symbol, and move P past the collating symbol expression. + The value is returned in *VP, if VP is not null. */ +static char * +parse_collsym (p, vp) + char *p; + int *vp; +{ + register int pc; + int val; + + p++; /* move past the `.' */ + + for (pc = 0; p[pc]; pc++) + if (p[pc] == '.' && p[pc+1] == ']') + break; + val = collsym (p, pc); + if (vp) + *vp = val; + return (p + pc + 2); +} + +static char * +brackmatch (p, test, flags) + char *p; + unsigned char test; + int flags; +{ + register char cstart, cend, c; + register int not; /* Nonzero if the sense of the character class is inverted. */ + int pc, brcnt; + char *savep; + + test = FOLD (test); + + savep = p; + + /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the + circumflex (`^') in its role in a `nonmatching list'. A bracket + expression starging with an unquoted circumflex character produces + unspecified results. This implementation treats the two identically. */ + if (not = (*p == '!' || *p == '^')) + ++p; + + c = *p++; + for (;;) + { + /* Initialize cstart and cend in case `-' is the last + character of the pattern. */ + cstart = cend = c; + + /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find + the end of the equivalence class, move the pattern pointer past + it, and check for equivalence. XXX - this handles only + single-character equivalence classes, which is wrong, or at + least incomplete. */ + if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']') + { + pc = FOLD (p[1]); + p += 4; + if (collequiv (test, pc)) + goto matched; + else + { + c = *p++; + if (c == '\0') + return ((test == '[') ? savep : (char *)0); + c = FOLD (c); + continue; + } + } + + /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */ + if (c == '[' && *p == ':') + { + pc = 0; /* make sure invalid char classes don't match. */ + if (STREQN (p+1, "alnum:]", 7)) + { pc = isalnum (test); p += 8; } + else if (STREQN (p+1, "alpha:]", 7)) + { pc = isalpha (test); p += 8; } + else if (STREQN (p+1, "blank:]", 7)) + { pc = isblank (test); p += 8; } + else if (STREQN (p+1, "cntrl:]", 7)) + { pc = iscntrl (test); p += 8; } + else if (STREQN (p+1, "digit:]", 7)) + { pc = isdigit (test); p += 8; } + else if (STREQN (p+1, "graph:]", 7)) + { pc = isgraph (test); p += 8; } + else if (STREQN (p+1, "lower:]", 7)) + { pc = ISLOWER (test); p += 8; } + else if (STREQN (p+1, "print:]", 7)) + { pc = isprint (test); p += 8; } + else if (STREQN (p+1, "punct:]", 7)) + { pc = ispunct (test); p += 8; } + else if (STREQN (p+1, "space:]", 7)) + { pc = isspace (test); p += 8; } + else if (STREQN (p+1, "upper:]", 7)) + { pc = ISUPPER (test); p += 8; } + else if (STREQN (p+1, "xdigit:]", 8)) + { pc = isxdigit (test); p += 9; } + else if (STREQN (p+1, "ascii:]", 7)) + { pc = isascii (test); p += 8; } + if (pc) + goto matched; + else + { + /* continue the loop here, since this expression can't be + the first part of a range expression. */ + c = *p++; + if (c == '\0') + return ((test == '[') ? savep : (char *)0); + else if (c == ']') + break; + c = FOLD (c); + continue; + } + } + + /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of + the symbol name, make sure it is terminated by `.]', translate + the name to a character using the external table, and do the + comparison. */ + if (c == '[' && *p == '.') + { + p = parse_collsym (p, &pc); + /* An invalid collating symbol cannot be the first point of a + range. If it is, we set cstart to one greater than `test', + so any comparisons later will fail. */ + cstart = (pc == -1) ? test + 1 : pc; + } + + if (!(flags & FNM_NOESCAPE) && c == '\\') + { + if (*p == '\0') + return (char *)0; + cstart = cend = *p++; + } + + cstart = cend = FOLD (cstart); + + /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that + is not preceded by a backslash and is not part of a bracket + expression produces undefined results.' This implementation + treats the `[' as just a character to be matched if there is + not a closing `]'. */ + if (c == '\0') + return ((test == '[') ? savep : (char *)0); + + c = *p++; + c = FOLD (c); + + if ((flags & FNM_PATHNAME) && c == '/') + /* [/] can never match when matching a pathname. */ + return (char *)0; + + /* This introduces a range, unless the `-' is the last + character of the class. Find the end of the range + and move past it. */ + if (c == '-' && *p != ']') + { + cend = *p++; + if (!(flags & FNM_NOESCAPE) && cend == '\\') + cend = *p++; + if (cend == '\0') + return (char *)0; + if (cend == '[' && *p == '.') + { + p = parse_collsym (p, &pc); + /* An invalid collating symbol cannot be the second part of a + range expression. If we get one, we set cend to one fewer + than the test character to make sure the range test fails. */ + cend = (pc == -1) ? test - 1 : pc; + } + cend = FOLD (cend); + + c = *p++; + + /* POSIX.2 2.8.3.2: ``The ending range point shall collate + equal to or higher than the starting range point; otherwise + the expression shall be treated as invalid.'' Note that this + applies to only the range expression; the rest of the bracket + expression is still checked for matches. */ + if (rangecmp (cstart, cend) > 0) + { + if (c == ']') + break; + c = FOLD (c); + continue; + } + } + + if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0) + goto matched; + + if (c == ']') + break; + } + /* No match. */ + return (!not ? (char *)0 : p); + +matched: + /* Skip the rest of the [...] that already matched. */ + brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.')); + while (brcnt > 0) + { + /* A `[' without a matching `]' is just another character to match. */ + if (c == '\0') + return ((test == '[') ? savep : (char *)0); + + c = *p++; + if (c == '[' && (*p == '=' || *p == ':' || *p == '.')) + brcnt++; + else if (c == ']') + brcnt--; + else if (!(flags & FNM_NOESCAPE) && c == '\\') + { + if (*p == '\0') + return (char *)0; + /* XXX 1003.2d11 is unclear if this is right. */ + ++p; + } + } + return (not ? (char *)0 : p); +} + +#if defined (EXTENDED_GLOB) +/* ksh-like extended pattern matching: + + [?*+@!](pat-list) + + where pat-list is a list of one or patterns separated by `|'. Operation + is as follows: + + ?(patlist) match zero or one of the given patterns + *(patlist) match zero or more of the given patterns + +(patlist) match one or more of the given patterns + @(patlist) match exactly one of the given patterns + !(patlist) match anything except one of the given patterns +*/ + +/* Scan a pattern starting at STRING and ending at END, keeping track of + embedded () and []. If DELIM is 0, we scan until a matching `)' + because we're scanning a `patlist'. Otherwise, we scan until we see + DELIM. In all cases, we never scan past END. The return value is the + first character after the matching DELIM. */ +static char * +patscan (string, end, delim) + char *string, *end; + int delim; +{ + int pnest, bnest; + char *s, c; + + pnest = bnest = 0; + for (s = string; c = *s; s++) + { + switch (c) + { + case '\0': + return ((char *)0); + case '[': + bnest++; + break; + case ']': + if (bnest) + bnest--; + break; + case '(': + if (bnest == 0) + pnest++; + break; + case ')': + if (bnest == 0) + pnest--; + if (pnest <= 0) + return ++s; + break; + case '|': + if (bnest == 0 && pnest == 0 && delim == '|') + return ++s; + break; + } + } + return (char *)0; +} + +/* Return 0 if dequoted pattern matches S in the current locale. */ +static int +strcompare (p, pe, s, se) + char *p, *pe, *s, *se; +{ + int ret; + char c1, c2; + + c1 = *pe; + c2 = *se; + + *pe = *se = '\0'; +#if defined (HAVE_STRCOLL) + ret = strcoll (p, s); +#else + ret = strcmp (p, s); +#endif + + *pe = c1; + *se = c2; + + return (ret == 0 ? ret : FNM_NOMATCH); +} + +/* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or + 0 on success. This is handed the entire rest of the pattern and string + the first time an extended pattern specifier is encountered, so it calls + gmatch recursively. */ +static int +extmatch (xc, s, se, p, pe, flags) + int xc; /* select which operation */ + char *s, *se; + char *p, *pe; + int flags; +{ + char *prest; /* pointer to rest of pattern */ + char *psub; /* pointer to sub-pattern */ + char *pnext; /* pointer to next sub-pattern */ + char *srest; /* pointer to rest of string */ + int m1, m2; + + switch (xc) + { + case '+': /* match one or more occurrences */ + case '*': /* match zero or more occurrences */ + prest = patscan (p, pe, 0); + if (prest == 0) + /* If PREST is 0, we failed to scan a valid pattern. In this + case, we just want to compare the two as strings. */ + return (strcompare (p - 1, pe, s, se)); + + /* If we can get away with no matches, don't even bother. Just + call gmatch on the rest of the pattern and return success if + it succeeds. */ + if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0)) + return 0; + + /* OK, we have to do this the hard way. First, we make sure one of + the subpatterns matches, then we try to match the rest of the + string. */ + for (psub = p + 1; ; psub = pnext) + { + pnext = patscan (psub, pe, '|'); + for (srest = s; srest <= se; srest++) + { + /* Match this substring (S -> SREST) against this + subpattern (psub -> pnext - 1) */ + m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0; + /* OK, we matched a subpattern, so make sure the rest of the + string matches the rest of the pattern. Also handle + multiple matches of the pattern. */ + if (m1) + m2 = (gmatch (srest, se, prest, pe, flags) == 0) || + (s != srest && gmatch (srest, se, p - 1, pe, flags) == 0); + if (m1 && m2) + return (0); + } + if (pnext == prest) + break; + } + return (FNM_NOMATCH); + + case '?': /* match zero or one of the patterns */ + case '@': /* match exactly one of the patterns */ + prest = patscan (p, pe, 0); + if (prest == 0) + return (strcompare (p - 1, pe, s, se)); + + /* If we can get away with no matches, don't even bother. Just + call gmatch on the rest of the pattern and return success if + it succeeds. */ + if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0)) + return 0; + + /* OK, we have to do this the hard way. First, we see if one of + the subpatterns matches, then, if it does, we try to match the + rest of the string. */ + for (psub = p + 1; ; psub = pnext) + { + pnext = patscan (psub, pe, '|'); + srest = (prest == pe) ? se : s; + for ( ; srest <= se; srest++) + { + if (gmatch (s, srest, psub, pnext - 1, flags) == 0 && + gmatch (srest, se, prest, pe, flags) == 0) + return (0); + } + if (pnext == prest) + break; + } + return (FNM_NOMATCH); + + case '!': /* match anything *except* one of the patterns */ + prest = patscan (p, pe, 0); + if (prest == 0) + return (strcompare (p - 1, pe, s, se)); + + for (srest = s; srest <= se; srest++) + { + m1 = 0; + for (psub = p + 1; ; psub = pnext) + { + pnext = patscan (psub, pe, '|'); + /* If one of the patterns matches, just bail immediately. */ + if (m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0)) + break; + if (pnext == prest) + break; + } + if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0) + return (0); + } + return (FNM_NOMATCH); + } + return (FNM_NOMATCH); } +#endif /* EXTENDED_GLOB */ + +#ifdef TEST +main (c, v) + int c; + char **v; +{ + char *string, *pat; + + string = v[1]; + pat = v[2]; + + if (fnmatch (pat, string, 0) == 0) + { + printf ("%s matches %s\n", string, pat); + exit (0); + } + else + { + printf ("%s does not match %s\n", string, pat); + exit (1); + } +} +#endif diff --git a/lib/glob/fnmatch.h b/lib/glob/fnmatch.h index 62c8c8fa..ac0ba202 100644 --- a/lib/glob/fnmatch.h +++ b/lib/glob/fnmatch.h @@ -17,14 +17,24 @@ not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef _FNMATCH_H - #define _FNMATCH_H 1 +/* We #undef these before defining them because some losing systems + (HP-UX A.08.07 for example) define these in <unistd.h>. */ +#undef FNM_PATHNAME +#undef FNM_NOESCAPE +#undef FNM_PERIOD + /* Bits set in the FLAGS argument to `fnmatch'. */ -#define FNM_PATHNAME (1 << 0)/* No wildcard can ever match `/'. */ -#define FNM_NOESCAPE (1 << 1)/* Backslashes don't quote special chars. */ -#define FNM_PERIOD (1 << 2)/* Leading `.' is matched only explicitly. */ -#define __FNM_FLAGS (FNM_PATHNAME|FNM_NOESCAPE|FNM_PERIOD) +/* standard flags */ +#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */ +#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */ +#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */ + +/* extended flags */ +#define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */ +#define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */ +#define FNM_EXTMATCH (1 << 5) /* Use ksh-like extended matching. */ /* Value returned by `fnmatch' if STRING does not match PATTERN. */ #define FNM_NOMATCH 1 @@ -33,4 +43,4 @@ Cambridge, MA 02139, USA. */ returning zero if it matches, FNM_NOMATCH if not. */ extern int fnmatch(); -#endif /* fnmatch.h */ +#endif /* _FNMATCH_H */ diff --git a/lib/glob/glob.c b/lib/glob/glob.c index 6d2f58fa..6a9679fc 100644 --- a/lib/glob/glob.c +++ b/lib/glob/glob.c @@ -25,20 +25,29 @@ #pragma alloca #endif /* _AIX && RISC6000 && !__GNUC__ */ +#if defined (SHELL) +# include "bashtypes.h" +#else +# include <sys/types.h> +#endif + #if defined (HAVE_UNISTD_H) # include <unistd.h> #endif -#if defined (HAVE_STDLIB_H) -# include <stdlib.h> +#if defined (SHELL) +# include "bashansi.h" #else -# if defined (SHELL) -# include "ansi_stdlib.h" -# endif /* SHELL */ +# if defined (HAVE_STDLIB_H) +# include <stdlib.h> +# endif +# if defined (HAVE_STRING_H) +# include <string.h> +# else /* !HAVE_STRING_H */ +# include <strings.h> +# endif /* !HAVE_STRING_H */ #endif -#include <sys/types.h> - #if defined (HAVE_DIRENT_H) # include <dirent.h> # define D_NAMLEN(d) strlen ((d)->d_name) @@ -66,27 +75,25 @@ # define REAL_DIR_ENTRY(dp) (dp->d_ino != 0) #endif /* _POSIX_SOURCE */ -#if defined (HAVE_STRING_H) -# include <string.h> -#else /* !HAVE_STRING_H */ -# include <strings.h> -#endif /* !HAVE_STRING_H */ - #if !defined (HAVE_BCOPY) # define bcopy(s, d, n) ((void) memcpy ((d), (s), (n))) #endif /* !HAVE_BCOPY */ -/* If the opendir () on your system lets you open non-directory files, - then we consider that not robust. */ -#if defined (OPENDIR_NOT_ROBUST) -# if defined (SHELL) -# include "posixstat.h" -# else /* !SHELL */ -# include <sys/stat.h> -# endif /* !SHELL */ -#endif /* OPENDIR_NOT_ROBUST */ - -#include "memalloc.h" +#if defined (SHELL) +# include "posixstat.h" +#else /* !SHELL */ +# include <sys/stat.h> +#endif /* !SHELL */ + +#include "filecntl.h" +#if !defined (F_OK) +# define F_OK 0 +#endif + +#if defined (SHELL) +# include "memalloc.h" +#endif + #include "fnmatch.h" #if !defined (HAVE_STDLIB_H) && !defined (SHELL) @@ -104,14 +111,20 @@ extern void free (); #if defined (SHELL) extern void throw_to_top_level (); +extern int test_eaccess (); extern int interrupt_state; +extern int extended_glob; #endif /* SHELL */ /* Global variable which controls whether or not * matches .*. Non-zero means don't match .*. */ int noglob_dot_filenames = 1; +/* Global variable which controls whether or not filename globbing + is done without regard to case. */ +int glob_ignore_case = 0; + /* Global variable to return to signify an error in globbing. */ char *glob_error_return; @@ -120,9 +133,12 @@ int glob_pattern_p (pattern) char *pattern; { - register char *p = pattern; + register char *p; register char c; - int open = 0; + int bopen; + + p = pattern; + bopen = 0; while ((c = *p++) != '\0') switch (c) @@ -132,13 +148,20 @@ glob_pattern_p (pattern) return (1); case '[': /* Only accept an open brace if there is a close */ - open++; /* brace to match it. Bracket expressions must be */ + bopen++; /* brace to match it. Bracket expressions must be */ continue; /* complete, according to Posix.2 */ case ']': - if (open) + if (bopen) return (1); continue; + case '+': /* extended matching operators */ + case '@': + case '!': + if (*p == '(') /*) */ + return (1); + continue; + case '\\': if (*p++ == '\0') return (0); @@ -168,6 +191,35 @@ dequote_pathname (pathname) } + +/* Test whether NAME exists. */ + +#if defined (HAVE_LSTAT) +# define GLOB_TESTNAME(name) (lstat (name, &finfo)) +#else /* !HAVE_LSTAT */ +# if defined (SHELL) && !defined (AFS) +# define GLOB_TESTNAME(name) (test_eaccess (nextname, F_OK)) +# else /* !SHELL || AFS */ +# define GLOB_TESTNAME(name) (access (nextname, F_OK)) +# endif /* !SHELL || AFS */ +#endif /* !HAVE_LSTAT */ + +/* Return 0 if DIR is a directory, -1 otherwise. */ +static int +glob_testdir (dir) + char *dir; +{ + struct stat finfo; + + if (stat (dir, &finfo) < 0) + return (-1); + + if (S_ISDIR (finfo.st_mode) == 0) + return (-1); + + return (0); +} + /* Return a vector of names of files in directory DIR whose names match glob pattern PAT. The names are not in any particular order. @@ -204,103 +256,161 @@ glob_vector (pat, dir) int lose, skip; register char **name_vector; register unsigned int i; -#if defined (OPENDIR_NOT_ROBUST) - struct stat finfo; - - if (stat (dir, &finfo) < 0) - return ((char **) &glob_error_return); - - if (!S_ISDIR (finfo.st_mode)) - return ((char **) &glob_error_return); -#endif /* OPENDIR_NOT_ROBUST */ - - d = opendir (dir); - if (d == NULL) - return ((char **) &glob_error_return); + int flags; /* Flags passed to fnmatch (). */ lastlink = 0; - count = 0; - lose = 0; - skip = 0; + count = lose = skip = 0; /* If PAT is empty, skip the loop, but return one (empty) filename. */ - if (!pat || !*pat) + if (pat == 0 || *pat == '\0') { + if (glob_testdir (dir) < 0) + return ((char **) &glob_error_return); + nextlink = (struct globval *)alloca (sizeof (struct globval)); - nextlink->next = lastlink; + nextlink->next = (struct globval *)0; nextname = (char *) malloc (1); - if (!nextname) + if (nextname == 0) lose = 1; else { lastlink = nextlink; nextlink->name = nextname; nextname[0] = '\0'; - count++; + count = 1; } + skip = 1; } - /* Scan the directory, finding all names that match. - For each name that matches, allocate a struct globval - on the stack and store the name in it. - Chain those structs together; lastlink is the front of the chain. */ - while (!skip) + /* If the filename pattern (PAT) does not contain any globbing characters, + we can dispense with reading the directory, and just see if there is + a filename `DIR/PAT'. If there is, and we can access it, just make the + vector to return and bail immediately. */ + if (skip == 0 && glob_pattern_p (pat) == 0) { - int flags; /* Flags passed to fnmatch (). */ -#if defined (SHELL) - /* Make globbing interruptible in the bash shell. */ - if (interrupt_state) + int dirlen; + struct stat finfo; + + if (glob_testdir (dir) < 0) + return ((char **) &glob_error_return); + + dirlen = strlen (dir); + nextname = (char *)malloc (dirlen + strlen (pat) + 2); + if (nextname == 0) + lose = 1; + else { - closedir (d); - lose = 1; - goto lost; + strcpy (nextname, dir); + nextname[dirlen++] = '/'; + strcpy (nextname + dirlen, pat); + + if (GLOB_TESTNAME (nextname) >= 0) + { + free (nextname); + nextlink = (struct globval *)alloca (sizeof (struct globval)); + nextlink->next = (struct globval *)0; + nextname = (char *) malloc (strlen (pat) + 1); + if (nextname == 0) + lose = 1; + else + { + lastlink = nextlink; + nextlink->name = nextname; + strcpy (nextname, pat); + count = 1; + } + } + else + free (nextname); } -#endif /* SHELL */ - - dp = readdir (d); - if (dp == NULL) - break; - /* If this directory entry is not to be used, try again. */ - if (!REAL_DIR_ENTRY (dp)) - continue; + skip = 1; + } - /* If a dot must be explicity matched, check to see if they do. */ - if (noglob_dot_filenames && dp->d_name[0] == '.' && pat[0] != '.' && - (pat[0] != '\\' || pat[1] != '.')) - continue; + if (skip == 0) + { + /* Open the directory, punting immediately if we cannot. If opendir + is not robust (i.e., it opens non-directories successfully), test + that DIR is a directory and punt if it's not. */ +#if defined (OPENDIR_NOT_ROBUST) + if (glob_testdir (dir) < 0) + return ((char **) &glob_error_return); +#endif + d = opendir (dir); + if (d == NULL) + return ((char **) &glob_error_return); + + /* Compute the flags that will be passed to fnmatch(). We don't + need to do this every time through the loop. */ flags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME; - if (fnmatch (pat, dp->d_name, flags) != FNM_NOMATCH) +#ifdef FNM_CASEFOLD + if (glob_ignore_case) + flags |= FNM_CASEFOLD; +#endif + +#ifdef SHELL + if (extended_glob) + flags |= FNM_EXTMATCH; +#endif + + /* Scan the directory, finding all names that match. + For each name that matches, allocate a struct globval + on the stack and store the name in it. + Chain those structs together; lastlink is the front of the chain. */ + while (1) { - nextlink = (struct globval *) alloca (sizeof (struct globval)); - nextlink->next = lastlink; - nextname = (char *) malloc (D_NAMLEN (dp) + 1); - if (nextname == NULL) +#if defined (SHELL) + /* Make globbing interruptible in the shell. */ + if (interrupt_state) { lose = 1; break; } - lastlink = nextlink; - nextlink->name = nextname; - bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1); - ++count; +#endif /* SHELL */ + + dp = readdir (d); + if (dp == NULL) + break; + + /* If this directory entry is not to be used, try again. */ + if (REAL_DIR_ENTRY (dp) == 0) + continue; + + /* If a dot must be explicity matched, check to see if they do. */ + if (noglob_dot_filenames && dp->d_name[0] == '.' && pat[0] != '.' && + (pat[0] != '\\' || pat[1] != '.')) + continue; + + if (fnmatch (pat, dp->d_name, flags) != FNM_NOMATCH) + { + nextlink = (struct globval *) alloca (sizeof (struct globval)); + nextlink->next = lastlink; + nextname = (char *) malloc (D_NAMLEN (dp) + 1); + if (nextname == NULL) + { + lose = 1; + break; + } + lastlink = nextlink; + nextlink->name = nextname; + bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1); + ++count; + } } + + (void) closedir (d); } - (void) closedir (d); - if (!lose) + if (lose == 0) { name_vector = (char **) malloc ((count + 1) * sizeof (char *)); lose |= name_vector == NULL; } /* Have we run out of memory? */ -#if defined (SHELL) - lost: -#endif if (lose) { /* Here free the strings we have got. */ @@ -313,7 +423,8 @@ glob_vector (pat, dir) if (interrupt_state) throw_to_top_level (); #endif /* SHELL */ - return (NULL); + + return ((char **)NULL); } /* Copy the name pointers from the linked list into the vector. */ diff --git a/lib/glob/glob.h b/lib/glob/glob.h index a72dede1..ac83f1ef 100644 --- a/lib/glob/glob.h +++ b/lib/glob/glob.h @@ -26,5 +26,6 @@ extern char **glob_filename __P((char *)); extern char *glob_error_return; extern int noglob_dot_filenames; +extern int glob_ignore_case; #endif /* _GLOB_H_ */ |