summaryrefslogtreecommitdiff
path: root/lib/fnmatch.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2020-05-31 10:14:41 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2020-05-31 10:15:31 -0700
commit67306f600fe6a3bcf3fbb6d8bf4b8953b74a8fb7 (patch)
treebc875a8d73eeb3dec77730a8699c6a929e1210b6 /lib/fnmatch.c
parent9de2f9ad0a12f0c048e9c9e573f77f35288f0c43 (diff)
downloadgnulib-67306f600fe6a3bcf3fbb6d8bf4b8953b74a8fb7.tar.gz
fnmatch: merge from glibc
Also, merge in Gnulib’s more-recent methods of making it easier to share between Gnulib and glibc. * lib/fnmatch.c: Reorder includes to match glibc better. Include libc-config.h instead of config.h. Include alloca.h only if _LIBC || HAVE_ALLOCA. Do not include "../locale/elem-hash.h" if _LIBC. Define macros for btowc, etc. if _LIBC. All uses simplified. Define FALLTHROUGH if _LIBC, instead of including attribute.h. Include intprops.h, since glibc has it now. (SIZE_MAX): Remove; use (size_t) -1 instead. Omit the "Comment out all this code" ifdef, since Gnulib has never really needed it. (STREQ): Remove; no longer used. (__libc_use_alloca, alloca, alloca_account): Define as needed if !_LIBC. (ISWCTYPE): Remove; all uses replaced by iswctype. (HANDLE_MULTIBYTE): Remove. All uses removed by assuming true. (internal_function): Remove. All uses removed. (STRUCT): New macro. (WIDE_CHAR_VERSION): Define to 0 instead of leaving undefined. (WMEMCMP): New macro. (FINDIDX): Define if _LIBC, and include <locale/weight.h> and <locale/weightwc.h>. (fnmatch): Prefer __glibc_likely and __glibc_unlikely to __builtin_expect. Check for integer overflow more systematically. Account for alloca storage better when recursive. Use strnlen instead of strlen for efficiency. * lib/fnmatch_loop.c: Include stdint.h if _LIBC, for int32_t etc. (struct STRUCT): New type. (FCT, EXT): New ENDS and ALLOCA_USED args. All callers changed. (FCT): Prefer __glibc_unlikely to __builtin_expect. Simplify by assuming WIDE_CHAR_SUPPORT. Copy _LIBC code from glibc without worrying Gnulib compatibility. Cast cold to UCHAR to avoid signedness warning. (END): Check for invalid pattern. (EXT): Improve alloca/malloc checking (taken from glibc), and improve it some more by using intprops.h and checking for integer overflow and using bool for booleans. * lib/libc-config.h (compat_symbol): New macro. (versioned_symbol): Make it ‘extern int dummy’ so that it’s acceptable to non-GCC when a trailing semicolon is added. * modules/fnmatch (Depends-on): Add alloca-opt, intprops, libc-config, strnlen. Remove alloca.
Diffstat (limited to 'lib/fnmatch.c')
-rw-r--r--lib/fnmatch.c392
1 files changed, 216 insertions, 176 deletions
diff --git a/lib/fnmatch.c b/lib/fnmatch.c
index db4da5e6a6..4d017cfebe 100644
--- a/lib/fnmatch.c
+++ b/lib/fnmatch.c
@@ -1,20 +1,22 @@
-/* Copyright (C) 1991-1993, 1996-2007, 2009-2020 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2020 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
- This program is distributed in the hope that it will be useful,
+ The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, see <https://www.gnu.org/licenses/>. */
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
#ifndef _LIBC
-# include <config.h>
+# include <libc-config.h>
#endif
/* Enable GNU extensions in fnmatch.h. */
@@ -24,83 +26,87 @@
#include <fnmatch.h>
-#include <alloca.h>
#include <assert.h>
-#include <ctype.h>
#include <errno.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <stdlib.h>
+#include <ctype.h>
#include <string.h>
+#include <stdlib.h>
+#if defined _LIBC || HAVE_ALLOCA
+# include <alloca.h>
+#endif
#include <wchar.h>
#include <wctype.h>
+#include <stddef.h>
+#include <stdbool.h>
/* We need some of the locale data (the collation sequence information)
but there is no interface to get this information in general. Therefore
we support a correct implementation only in glibc. */
#ifdef _LIBC
# include "../locale/localeinfo.h"
-# include "../locale/elem-hash.h"
# include "../locale/coll-lookup.h"
# include <shlib-compat.h>
# define CONCAT(a,b) __CONCAT(a,b)
+# define btowc __btowc
+# define iswctype __iswctype
# define mbsrtowcs __mbsrtowcs
+# define mempcpy __mempcpy
+# define strnlen __strnlen
+# define towlower __towlower
+# define wcscat __wcscat
+# define wcslen __wcslen
+# define wctype __wctype
+# define wmemchr __wmemchr
+# define wmempcpy __wmempcpy
# define fnmatch __fnmatch
extern int fnmatch (const char *pattern, const char *string, int flags);
#endif
-#ifndef SIZE_MAX
-# define SIZE_MAX ((size_t) -1)
+#ifdef _LIBC
+# if __GNUC__ < 7
+# define FALLTHROUGH ((void) 0)
+# else
+# define FALLTHROUGH __attribute__ ((__fallthrough__))
+# endif
+#else
+# include "attribute.h"
#endif
-#include "attribute.h"
-#include "flexmember.h"
+#include <intprops.h>
+#include <flexmember.h>
/* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */
#define NO_LEADING_PERIOD(flags) \
((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD))
-/* Comment out all this code if we are using the GNU C Library, and are not
- actually compiling the library itself, and have not detected a bug
- in the library. This code is part of the GNU C
- Library, but also included in many other GNU distributions. Compiling
- and linking in this code is a waste when using the GNU C library
- (especially if it is a shared library). Rather than having every GNU
- program understand 'configure --with-gnu-libc' and omit the object files,
- it is simpler to just do this in the source for each such file. */
-
-#if defined _LIBC || !defined __GNU_LIBRARY__ || !HAVE_FNMATCH_GNU
-
-
-# define STREQ(s1, s2) (strcmp (s1, s2) == 0)
+#ifndef _LIBC
+# if HAVE_ALLOCA
+/* The OS usually guarantees only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ allocate anything larger than 4096 bytes. Also care for the possibility
+ of a few compiler-allocated temporary stack slots. */
+# define __libc_use_alloca(n) ((n) < 4032)
+# else
+/* Just use malloc. */
+# define __libc_use_alloca(n) false
+# undef alloca
+# define alloca(n) malloc (n)
+# endif
+# define alloca_account(size, avar) ((avar) += (size), alloca (size))
+#endif
/* Provide support for user-defined character classes, based on the functions
from ISO C 90 amendment 1. */
-# ifdef CHARCLASS_NAME_MAX
-# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
-# else
+#ifdef CHARCLASS_NAME_MAX
+# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
+#else
/* This shouldn't happen but some implementation might still have this
problem. Use a reasonable default value. */
-# define CHAR_CLASS_MAX_LENGTH 256
-# endif
-
-# ifdef _LIBC
-# define IS_CHAR_CLASS(string) __wctype (string)
-# else
-# define IS_CHAR_CLASS(string) wctype (string)
-# endif
-
-# ifdef _LIBC
-# define ISWCTYPE(WC, WT) __iswctype (WC, WT)
-# else
-# define ISWCTYPE(WC, WT) iswctype (WC, WT)
-# endif
+# define CHAR_CLASS_MAX_LENGTH 256
+#endif
-# if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC
-/* In this case we are implementing the multibyte character handling. */
-# define HANDLE_MULTIBYTE 1
-# endif
+#define IS_CHAR_CLASS(string) wctype (string)
/* Avoid depending on library functions or files
whose names are inconsistent. */
@@ -108,60 +114,53 @@ extern int fnmatch (const char *pattern, const char *string, int flags);
/* Global variable. */
static int posixly_correct;
-# ifndef internal_function
-/* Inside GNU libc we mark some function in a special way. In other
- environments simply ignore the marking. */
-# define internal_function
-# endif
-
/* Note that this evaluates C many times. */
-# define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
-# define CHAR char
-# define UCHAR unsigned char
-# define INT int
-# define FCT internal_fnmatch
-# define EXT ext_match
-# define END end_pattern
-# define L_(CS) CS
-# ifdef _LIBC
-# define BTOWC(C) __btowc (C)
-# else
-# define BTOWC(C) btowc (C)
-# endif
-# define STRLEN(S) strlen (S)
-# define STRCAT(D, S) strcat (D, S)
-# ifdef _LIBC
-# define MEMPCPY(D, S, N) __mempcpy (D, S, N)
-# else
-# define MEMPCPY(D, S, N) mempcpy (D, S, N)
-# endif
-# define MEMCHR(S, C, N) memchr (S, C, N)
-# include "fnmatch_loop.c"
-
-
-# if HANDLE_MULTIBYTE
-# define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c))
-# define CHAR wchar_t
-# define UCHAR wint_t
-# define INT wint_t
-# define FCT internal_fnwmatch
-# define EXT ext_wmatch
-# define END end_wpattern
-# define L_(CS) L##CS
-# define BTOWC(C) (C)
-# ifdef _LIBC
-# define STRLEN(S) __wcslen (S)
-# define STRCAT(D, S) __wcscat (D, S)
-# define MEMPCPY(D, S, N) __wmempcpy (D, S, N)
-# else
-# define STRLEN(S) wcslen (S)
-# define STRCAT(D, S) wcscat (D, S)
-# define MEMPCPY(D, S, N) wmempcpy (D, S, N)
-# endif
-# define MEMCHR(S, C, N) wmemchr (S, C, N)
-# define WIDE_CHAR_VERSION 1
-
-# undef IS_CHAR_CLASS
+#define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
+#define CHAR char
+#define UCHAR unsigned char
+#define INT int
+#define FCT internal_fnmatch
+#define EXT ext_match
+#define END end_pattern
+#define STRUCT fnmatch_struct
+#define L_(CS) CS
+#define BTOWC(C) btowc (C)
+#define STRLEN(S) strlen (S)
+#define STRCAT(D, S) strcat (D, S)
+#define MEMPCPY(D, S, N) mempcpy (D, S, N)
+#define MEMCHR(S, C, N) memchr (S, C, N)
+#define WIDE_CHAR_VERSION 0
+#ifdef _LIBC
+# include <locale/weight.h>
+# define FINDIDX findidx
+#endif
+#include "fnmatch_loop.c"
+
+
+#define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c))
+#define CHAR wchar_t
+#define UCHAR wint_t
+#define INT wint_t
+#define FCT internal_fnwmatch
+#define EXT ext_wmatch
+#define END end_wpattern
+#define L_(CS) L##CS
+#define BTOWC(C) (C)
+#define STRLEN(S) wcslen (S)
+#define STRCAT(D, S) wcscat (D, S)
+#define MEMPCPY(D, S, N) wmempcpy (D, S, N)
+#define MEMCHR(S, C, N) wmemchr (S, C, N)
+#define WIDE_CHAR_VERSION 1
+#ifdef _LIBC
+/* Change the name the header defines so it doesn't conflict with
+ the <locale/weight.h> version included above. */
+# define findidx findidxwc
+# include <locale/weightwc.h>
+# undef findidx
+# define FINDIDX findidxwc
+#endif
+
+#undef IS_CHAR_CLASS
/* We have to convert the wide character string in a multibyte string. But
we know that the character class names consist of alphanumeric characters
from the portable character set, and since the wide character encoding
@@ -177,11 +176,11 @@ is_char_class (const wchar_t *wcs)
do
{
/* Test for a printable character from the portable character set. */
-# ifdef _LIBC
+#ifdef _LIBC
if (*wcs < 0x20 || *wcs > 0x7e
|| *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60)
return (wctype_t) 0;
-# else
+#else
switch (*wcs)
{
case L' ': case L'!': case L'"': case L'#': case L'%':
@@ -208,7 +207,7 @@ is_char_class (const wchar_t *wcs)
default:
return (wctype_t) 0;
}
-# endif
+#endif
/* Avoid overrunning the buffer. */
if (cp == s + CHAR_CLASS_MAX_LENGTH)
@@ -220,96 +219,137 @@ is_char_class (const wchar_t *wcs)
*cp = '\0';
-# ifdef _LIBC
- return __wctype (s);
-# else
return wctype (s);
-# endif
}
-# define IS_CHAR_CLASS(string) is_char_class (string)
+#define IS_CHAR_CLASS(string) is_char_class (string)
-# include "fnmatch_loop.c"
-# endif
+#include "fnmatch_loop.c"
int
fnmatch (const char *pattern, const char *string, int flags)
{
-# if HANDLE_MULTIBYTE
-# define ALLOCA_LIMIT 2000
- if (__builtin_expect (MB_CUR_MAX, 1) != 1)
+ if (__glibc_unlikely (MB_CUR_MAX != 1))
{
mbstate_t ps;
- size_t patsize;
- size_t strsize;
- size_t totsize;
+ size_t n;
+ const char *p;
+ wchar_t *wpattern_malloc = NULL;
wchar_t *wpattern;
+ wchar_t *wstring_malloc = NULL;
wchar_t *wstring;
- int res;
+ size_t alloca_used = 0;
- /* Calculate the size needed to convert the strings to
- wide characters. */
+ /* Convert the strings into wide characters. */
memset (&ps, '\0', sizeof (ps));
- patsize = mbsrtowcs (NULL, &pattern, 0, &ps) + 1;
- if (__builtin_expect (patsize != 0, 1))
+ p = pattern;
+ n = strnlen (pattern, 1024);
+ if (__glibc_likely (n < 1024))
{
+ wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
+ alloca_used);
+ n = mbsrtowcs (wpattern, &p, n + 1, &ps);
+ if (__glibc_unlikely (n == (size_t) -1))
+ /* Something wrong.
+ XXX Do we have to set 'errno' to something which mbsrtows hasn't
+ already done? */
+ return -1;
+ if (p)
+ {
+ memset (&ps, '\0', sizeof (ps));
+ goto prepare_wpattern;
+ }
+ }
+ else
+ {
+ prepare_wpattern:
+ n = mbsrtowcs (NULL, &pattern, 0, &ps);
+ if (__glibc_unlikely (n == (size_t) -1))
+ /* Something wrong.
+ XXX Do we have to set 'errno' to something which mbsrtows hasn't
+ already done? */
+ return -1;
+ if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
+ {
+ __set_errno (ENOMEM);
+ return -2;
+ }
+ wpattern_malloc = wpattern
+ = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
assert (mbsinit (&ps));
- strsize = mbsrtowcs (NULL, &string, 0, &ps) + 1;
- if (__builtin_expect (strsize != 0, 1))
+ if (wpattern == NULL)
+ return -2;
+ (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
+ }
+
+ assert (mbsinit (&ps));
+ n = strnlen (string, 1024);
+ p = string;
+ if (__glibc_likely (n < 1024))
+ {
+ wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
+ alloca_used);
+ n = mbsrtowcs (wstring, &p, n + 1, &ps);
+ if (__glibc_unlikely (n == (size_t) -1))
{
- assert (mbsinit (&ps));
- totsize = patsize + strsize;
- if (__builtin_expect (! (patsize <= totsize
- && totsize <= SIZE_MAX / sizeof (wchar_t)),
- 0))
- {
- errno = ENOMEM;
- return -1;
- }
-
- /* Allocate room for the wide characters. */
- if (__builtin_expect (totsize < ALLOCA_LIMIT, 1))
- wpattern = (wchar_t *) alloca (totsize * sizeof (wchar_t));
- else
- {
- wpattern = malloc (totsize * sizeof (wchar_t));
- if (__builtin_expect (! wpattern, 0))
- {
- errno = ENOMEM;
- return -1;
- }
- }
- wstring = wpattern + patsize;
-
- /* Convert the strings into wide characters. */
- mbsrtowcs (wpattern, &pattern, patsize, &ps);
- assert (mbsinit (&ps));
- mbsrtowcs (wstring, &string, strsize, &ps);
-
- res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1,
- flags & FNM_PERIOD, flags);
-
- if (__builtin_expect (! (totsize < ALLOCA_LIMIT), 0))
- free (wpattern);
- return res;
+ /* Something wrong.
+ XXX Do we have to set 'errno' to something which
+ mbsrtows hasn't already done? */
+ free_return:
+ free (wpattern_malloc);
+ return -1;
+ }
+ if (p)
+ {
+ memset (&ps, '\0', sizeof (ps));
+ goto prepare_wstring;
}
}
- }
+ else
+ {
+ prepare_wstring:
+ n = mbsrtowcs (NULL, &string, 0, &ps);
+ if (__glibc_unlikely (n == (size_t) -1))
+ /* Something wrong.
+ XXX Do we have to set 'errno' to something which mbsrtows hasn't
+ already done? */
+ goto free_return;
+ if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
+ {
+ free (wpattern_malloc);
+ __set_errno (ENOMEM);
+ return -2;
+ }
-# endif /* HANDLE_MULTIBYTE */
+ wstring_malloc = wstring
+ = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
+ if (wstring == NULL)
+ {
+ free (wpattern_malloc);
+ return -2;
+ }
+ assert (mbsinit (&ps));
+ (void) mbsrtowcs (wstring, &string, n + 1, &ps);
+ }
+
+ int res = internal_fnwmatch (wpattern, wstring, wstring + n,
+ flags & FNM_PERIOD, flags, NULL,
+ alloca_used);
+
+ free (wstring_malloc);
+ free (wpattern_malloc);
+
+ return res;
+ }
return internal_fnmatch (pattern, string, string + strlen (string),
- flags & FNM_PERIOD, flags);
+ flags & FNM_PERIOD, flags, NULL, 0);
}
-# ifdef _LIBC
-# undef fnmatch
+#undef fnmatch
versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3);
-# if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3)
+#if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3)
strong_alias (__fnmatch, __fnmatch_old)
compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0);
-# endif
+#endif
libc_hidden_ver (__fnmatch, fnmatch)
-# endif
-
-#endif /* _LIBC or not __GNU_LIBRARY__. */