diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2020-05-31 10:14:41 -0700 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2020-05-31 10:15:31 -0700 |
commit | 67306f600fe6a3bcf3fbb6d8bf4b8953b74a8fb7 (patch) | |
tree | bc875a8d73eeb3dec77730a8699c6a929e1210b6 /lib/fnmatch.c | |
parent | 9de2f9ad0a12f0c048e9c9e573f77f35288f0c43 (diff) | |
download | gnulib-67306f600fe6a3bcf3fbb6d8bf4b8953b74a8fb7.tar.gz |
fnmatch: merge from glibc
Also, merge in Gnulib’s more-recent methods of making it easier
to share between Gnulib and glibc.
* lib/fnmatch.c: Reorder includes to match glibc better.
Include libc-config.h instead of config.h.
Include alloca.h only if _LIBC || HAVE_ALLOCA.
Do not include "../locale/elem-hash.h" if _LIBC.
Define macros for btowc, etc. if _LIBC. All uses simplified.
Define FALLTHROUGH if _LIBC, instead of including attribute.h.
Include intprops.h, since glibc has it now.
(SIZE_MAX): Remove; use (size_t) -1 instead.
Omit the "Comment out all this code" ifdef, since Gnulib
has never really needed it.
(STREQ): Remove; no longer used.
(__libc_use_alloca, alloca, alloca_account): Define as
needed if !_LIBC.
(ISWCTYPE): Remove; all uses replaced by iswctype.
(HANDLE_MULTIBYTE): Remove. All uses removed by assuming true.
(internal_function): Remove. All uses removed.
(STRUCT): New macro.
(WIDE_CHAR_VERSION): Define to 0 instead of leaving undefined.
(WMEMCMP): New macro.
(FINDIDX): Define if _LIBC, and include <locale/weight.h>
and <locale/weightwc.h>.
(fnmatch): Prefer __glibc_likely and __glibc_unlikely to
__builtin_expect. Check for integer overflow more
systematically. Account for alloca storage better when
recursive. Use strnlen instead of strlen for efficiency.
* lib/fnmatch_loop.c: Include stdint.h if _LIBC, for int32_t etc.
(struct STRUCT): New type.
(FCT, EXT): New ENDS and ALLOCA_USED args.
All callers changed.
(FCT): Prefer __glibc_unlikely to __builtin_expect.
Simplify by assuming WIDE_CHAR_SUPPORT.
Copy _LIBC code from glibc without worrying Gnulib compatibility.
Cast cold to UCHAR to avoid signedness warning.
(END): Check for invalid pattern.
(EXT): Improve alloca/malloc checking (taken from glibc),
and improve it some more by using intprops.h and checking
for integer overflow and using bool for booleans.
* lib/libc-config.h (compat_symbol): New macro.
(versioned_symbol): Make it ‘extern int dummy’ so that it’s
acceptable to non-GCC when a trailing semicolon is added.
* modules/fnmatch (Depends-on): Add alloca-opt, intprops,
libc-config, strnlen. Remove alloca.
Diffstat (limited to 'lib/fnmatch.c')
-rw-r--r-- | lib/fnmatch.c | 392 |
1 files changed, 216 insertions, 176 deletions
diff --git a/lib/fnmatch.c b/lib/fnmatch.c index db4da5e6a6..4d017cfebe 100644 --- a/lib/fnmatch.c +++ b/lib/fnmatch.c @@ -1,20 +1,22 @@ -/* Copyright (C) 1991-1993, 1996-2007, 2009-2020 Free Software Foundation, Inc. +/* Copyright (C) 1991-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, see <https://www.gnu.org/licenses/>. */ + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ #ifndef _LIBC -# include <config.h> +# include <libc-config.h> #endif /* Enable GNU extensions in fnmatch.h. */ @@ -24,83 +26,87 @@ #include <fnmatch.h> -#include <alloca.h> #include <assert.h> -#include <ctype.h> #include <errno.h> -#include <stddef.h> -#include <stdbool.h> -#include <stdlib.h> +#include <ctype.h> #include <string.h> +#include <stdlib.h> +#if defined _LIBC || HAVE_ALLOCA +# include <alloca.h> +#endif #include <wchar.h> #include <wctype.h> +#include <stddef.h> +#include <stdbool.h> /* We need some of the locale data (the collation sequence information) but there is no interface to get this information in general. Therefore we support a correct implementation only in glibc. */ #ifdef _LIBC # include "../locale/localeinfo.h" -# include "../locale/elem-hash.h" # include "../locale/coll-lookup.h" # include <shlib-compat.h> # define CONCAT(a,b) __CONCAT(a,b) +# define btowc __btowc +# define iswctype __iswctype # define mbsrtowcs __mbsrtowcs +# define mempcpy __mempcpy +# define strnlen __strnlen +# define towlower __towlower +# define wcscat __wcscat +# define wcslen __wcslen +# define wctype __wctype +# define wmemchr __wmemchr +# define wmempcpy __wmempcpy # define fnmatch __fnmatch extern int fnmatch (const char *pattern, const char *string, int flags); #endif -#ifndef SIZE_MAX -# define SIZE_MAX ((size_t) -1) +#ifdef _LIBC +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#else +# include "attribute.h" #endif -#include "attribute.h" -#include "flexmember.h" +#include <intprops.h> +#include <flexmember.h> /* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */ #define NO_LEADING_PERIOD(flags) \ ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD)) -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself, and have not detected a bug - in the library. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand 'configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#if defined _LIBC || !defined __GNU_LIBRARY__ || !HAVE_FNMATCH_GNU - - -# define STREQ(s1, s2) (strcmp (s1, s2) == 0) +#ifndef _LIBC +# if HAVE_ALLOCA +/* The OS usually guarantees only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + allocate anything larger than 4096 bytes. Also care for the possibility + of a few compiler-allocated temporary stack slots. */ +# define __libc_use_alloca(n) ((n) < 4032) +# else +/* Just use malloc. */ +# define __libc_use_alloca(n) false +# undef alloca +# define alloca(n) malloc (n) +# endif +# define alloca_account(size, avar) ((avar) += (size), alloca (size)) +#endif /* Provide support for user-defined character classes, based on the functions from ISO C 90 amendment 1. */ -# ifdef CHARCLASS_NAME_MAX -# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX -# else +#ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +#else /* This shouldn't happen but some implementation might still have this problem. Use a reasonable default value. */ -# define CHAR_CLASS_MAX_LENGTH 256 -# endif - -# ifdef _LIBC -# define IS_CHAR_CLASS(string) __wctype (string) -# else -# define IS_CHAR_CLASS(string) wctype (string) -# endif - -# ifdef _LIBC -# define ISWCTYPE(WC, WT) __iswctype (WC, WT) -# else -# define ISWCTYPE(WC, WT) iswctype (WC, WT) -# endif +# define CHAR_CLASS_MAX_LENGTH 256 +#endif -# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC -/* In this case we are implementing the multibyte character handling. */ -# define HANDLE_MULTIBYTE 1 -# endif +#define IS_CHAR_CLASS(string) wctype (string) /* Avoid depending on library functions or files whose names are inconsistent. */ @@ -108,60 +114,53 @@ extern int fnmatch (const char *pattern, const char *string, int flags); /* Global variable. */ static int posixly_correct; -# ifndef internal_function -/* Inside GNU libc we mark some function in a special way. In other - environments simply ignore the marking. */ -# define internal_function -# endif - /* Note that this evaluates C many times. */ -# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) -# define CHAR char -# define UCHAR unsigned char -# define INT int -# define FCT internal_fnmatch -# define EXT ext_match -# define END end_pattern -# define L_(CS) CS -# ifdef _LIBC -# define BTOWC(C) __btowc (C) -# else -# define BTOWC(C) btowc (C) -# endif -# define STRLEN(S) strlen (S) -# define STRCAT(D, S) strcat (D, S) -# ifdef _LIBC -# define MEMPCPY(D, S, N) __mempcpy (D, S, N) -# else -# define MEMPCPY(D, S, N) mempcpy (D, S, N) -# endif -# define MEMCHR(S, C, N) memchr (S, C, N) -# include "fnmatch_loop.c" - - -# if HANDLE_MULTIBYTE -# define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c)) -# define CHAR wchar_t -# define UCHAR wint_t -# define INT wint_t -# define FCT internal_fnwmatch -# define EXT ext_wmatch -# define END end_wpattern -# define L_(CS) L##CS -# define BTOWC(C) (C) -# ifdef _LIBC -# define STRLEN(S) __wcslen (S) -# define STRCAT(D, S) __wcscat (D, S) -# define MEMPCPY(D, S, N) __wmempcpy (D, S, N) -# else -# define STRLEN(S) wcslen (S) -# define STRCAT(D, S) wcscat (D, S) -# define MEMPCPY(D, S, N) wmempcpy (D, S, N) -# endif -# define MEMCHR(S, C, N) wmemchr (S, C, N) -# define WIDE_CHAR_VERSION 1 - -# undef IS_CHAR_CLASS +#define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) +#define CHAR char +#define UCHAR unsigned char +#define INT int +#define FCT internal_fnmatch +#define EXT ext_match +#define END end_pattern +#define STRUCT fnmatch_struct +#define L_(CS) CS +#define BTOWC(C) btowc (C) +#define STRLEN(S) strlen (S) +#define STRCAT(D, S) strcat (D, S) +#define MEMPCPY(D, S, N) mempcpy (D, S, N) +#define MEMCHR(S, C, N) memchr (S, C, N) +#define WIDE_CHAR_VERSION 0 +#ifdef _LIBC +# include <locale/weight.h> +# define FINDIDX findidx +#endif +#include "fnmatch_loop.c" + + +#define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c)) +#define CHAR wchar_t +#define UCHAR wint_t +#define INT wint_t +#define FCT internal_fnwmatch +#define EXT ext_wmatch +#define END end_wpattern +#define L_(CS) L##CS +#define BTOWC(C) (C) +#define STRLEN(S) wcslen (S) +#define STRCAT(D, S) wcscat (D, S) +#define MEMPCPY(D, S, N) wmempcpy (D, S, N) +#define MEMCHR(S, C, N) wmemchr (S, C, N) +#define WIDE_CHAR_VERSION 1 +#ifdef _LIBC +/* Change the name the header defines so it doesn't conflict with + the <locale/weight.h> version included above. */ +# define findidx findidxwc +# include <locale/weightwc.h> +# undef findidx +# define FINDIDX findidxwc +#endif + +#undef IS_CHAR_CLASS /* We have to convert the wide character string in a multibyte string. But we know that the character class names consist of alphanumeric characters from the portable character set, and since the wide character encoding @@ -177,11 +176,11 @@ is_char_class (const wchar_t *wcs) do { /* Test for a printable character from the portable character set. */ -# ifdef _LIBC +#ifdef _LIBC if (*wcs < 0x20 || *wcs > 0x7e || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60) return (wctype_t) 0; -# else +#else switch (*wcs) { case L' ': case L'!': case L'"': case L'#': case L'%': @@ -208,7 +207,7 @@ is_char_class (const wchar_t *wcs) default: return (wctype_t) 0; } -# endif +#endif /* Avoid overrunning the buffer. */ if (cp == s + CHAR_CLASS_MAX_LENGTH) @@ -220,96 +219,137 @@ is_char_class (const wchar_t *wcs) *cp = '\0'; -# ifdef _LIBC - return __wctype (s); -# else return wctype (s); -# endif } -# define IS_CHAR_CLASS(string) is_char_class (string) +#define IS_CHAR_CLASS(string) is_char_class (string) -# include "fnmatch_loop.c" -# endif +#include "fnmatch_loop.c" int fnmatch (const char *pattern, const char *string, int flags) { -# if HANDLE_MULTIBYTE -# define ALLOCA_LIMIT 2000 - if (__builtin_expect (MB_CUR_MAX, 1) != 1) + if (__glibc_unlikely (MB_CUR_MAX != 1)) { mbstate_t ps; - size_t patsize; - size_t strsize; - size_t totsize; + size_t n; + const char *p; + wchar_t *wpattern_malloc = NULL; wchar_t *wpattern; + wchar_t *wstring_malloc = NULL; wchar_t *wstring; - int res; + size_t alloca_used = 0; - /* Calculate the size needed to convert the strings to - wide characters. */ + /* Convert the strings into wide characters. */ memset (&ps, '\0', sizeof (ps)); - patsize = mbsrtowcs (NULL, &pattern, 0, &ps) + 1; - if (__builtin_expect (patsize != 0, 1)) + p = pattern; + n = strnlen (pattern, 1024); + if (__glibc_likely (n < 1024)) { + wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), + alloca_used); + n = mbsrtowcs (wpattern, &p, n + 1, &ps); + if (__glibc_unlikely (n == (size_t) -1)) + /* Something wrong. + XXX Do we have to set 'errno' to something which mbsrtows hasn't + already done? */ + return -1; + if (p) + { + memset (&ps, '\0', sizeof (ps)); + goto prepare_wpattern; + } + } + else + { + prepare_wpattern: + n = mbsrtowcs (NULL, &pattern, 0, &ps); + if (__glibc_unlikely (n == (size_t) -1)) + /* Something wrong. + XXX Do we have to set 'errno' to something which mbsrtows hasn't + already done? */ + return -1; + if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) + { + __set_errno (ENOMEM); + return -2; + } + wpattern_malloc = wpattern + = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); assert (mbsinit (&ps)); - strsize = mbsrtowcs (NULL, &string, 0, &ps) + 1; - if (__builtin_expect (strsize != 0, 1)) + if (wpattern == NULL) + return -2; + (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps); + } + + assert (mbsinit (&ps)); + n = strnlen (string, 1024); + p = string; + if (__glibc_likely (n < 1024)) + { + wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), + alloca_used); + n = mbsrtowcs (wstring, &p, n + 1, &ps); + if (__glibc_unlikely (n == (size_t) -1)) { - assert (mbsinit (&ps)); - totsize = patsize + strsize; - if (__builtin_expect (! (patsize <= totsize - && totsize <= SIZE_MAX / sizeof (wchar_t)), - 0)) - { - errno = ENOMEM; - return -1; - } - - /* Allocate room for the wide characters. */ - if (__builtin_expect (totsize < ALLOCA_LIMIT, 1)) - wpattern = (wchar_t *) alloca (totsize * sizeof (wchar_t)); - else - { - wpattern = malloc (totsize * sizeof (wchar_t)); - if (__builtin_expect (! wpattern, 0)) - { - errno = ENOMEM; - return -1; - } - } - wstring = wpattern + patsize; - - /* Convert the strings into wide characters. */ - mbsrtowcs (wpattern, &pattern, patsize, &ps); - assert (mbsinit (&ps)); - mbsrtowcs (wstring, &string, strsize, &ps); - - res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1, - flags & FNM_PERIOD, flags); - - if (__builtin_expect (! (totsize < ALLOCA_LIMIT), 0)) - free (wpattern); - return res; + /* Something wrong. + XXX Do we have to set 'errno' to something which + mbsrtows hasn't already done? */ + free_return: + free (wpattern_malloc); + return -1; + } + if (p) + { + memset (&ps, '\0', sizeof (ps)); + goto prepare_wstring; } } - } + else + { + prepare_wstring: + n = mbsrtowcs (NULL, &string, 0, &ps); + if (__glibc_unlikely (n == (size_t) -1)) + /* Something wrong. + XXX Do we have to set 'errno' to something which mbsrtows hasn't + already done? */ + goto free_return; + if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) + { + free (wpattern_malloc); + __set_errno (ENOMEM); + return -2; + } -# endif /* HANDLE_MULTIBYTE */ + wstring_malloc = wstring + = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); + if (wstring == NULL) + { + free (wpattern_malloc); + return -2; + } + assert (mbsinit (&ps)); + (void) mbsrtowcs (wstring, &string, n + 1, &ps); + } + + int res = internal_fnwmatch (wpattern, wstring, wstring + n, + flags & FNM_PERIOD, flags, NULL, + alloca_used); + + free (wstring_malloc); + free (wpattern_malloc); + + return res; + } return internal_fnmatch (pattern, string, string + strlen (string), - flags & FNM_PERIOD, flags); + flags & FNM_PERIOD, flags, NULL, 0); } -# ifdef _LIBC -# undef fnmatch +#undef fnmatch versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3); -# if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3) +#if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3) strong_alias (__fnmatch, __fnmatch_old) compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0); -# endif +#endif libc_hidden_ver (__fnmatch, fnmatch) -# endif - -#endif /* _LIBC or not __GNU_LIBRARY__. */ |