summaryrefslogtreecommitdiff
path: root/regex.c
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-16 13:09:56 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-16 13:09:56 +0300
commitbc70de7b3302d5a81515b901cae376b8b51d2004 (patch)
treed36d6743e65697f6923b79d0ea8f9f9bf4ef7398 /regex.c
parentb9e4a1fd4c8c8753ab8a9887bab55f03efe1e3e2 (diff)
downloadgawk-bc70de7b3302d5a81515b901cae376b8b51d2004.tar.gz
Move to gawk-3.1.0.gawk-3.1.0
Diffstat (limited to 'regex.c')
-rw-r--r--regex.c628
1 files changed, 369 insertions, 259 deletions
diff --git a/regex.c b/regex.c
index 8c7f4fe2..1da69e2c 100644
--- a/regex.c
+++ b/regex.c
@@ -2,8 +2,7 @@
version 0.12.
(Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
-
- Copyright (C) 1993,1994,1995,1996,1997,1999,2000 Free Software Foundation, Inc.
+ Copyright (C) 1993, 94, 95, 96, 97, 98, 99, 2000 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +19,7 @@
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/* AIX requires this to be the first thing in the file. */
-#if defined (_AIX) && !defined (REGEX_MALLOC)
+#if defined _AIX && !defined REGEX_MALLOC
#pragma alloca
#endif
@@ -28,25 +27,61 @@
#define _GNU_SOURCE
#ifdef HAVE_CONFIG_H
-#include <config.h>
+# include <config.h>
#endif
-#if defined(STDC_HEADERS) && !defined(emacs)
-#include <stddef.h>
+#ifndef PARAMS
+# if defined __GNUC__ || (defined __STDC__ && __STDC__)
+# define PARAMS(args) args
+# else
+# define PARAMS(args) ()
+# endif /* GCC. */
+#endif /* Not PARAMS. */
+
+#if defined STDC_HEADERS && !defined emacs
+# include <stddef.h>
#else
/* We need this for `regex.h', and perhaps for the Emacs include files. */
-#include <sys/types.h>
+# include <sys/types.h>
#endif
+#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
+
/* For platform which support the ISO C amendement 1 functionality we
support user defined character classes. */
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
-# include <wctype.h>
+#if defined _LIBC || WIDE_CHAR_SUPPORT
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
# include <wchar.h>
+# include <wctype.h>
+#endif
+
+#ifdef _LIBC
+/* We have to keep the namespace clean. */
+# define regfree(preg) __regfree (preg)
+# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
+# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
+# define regerror(errcode, preg, errbuf, errbuf_size) \
+ __regerror(errcode, preg, errbuf, errbuf_size)
+# define re_set_registers(bu, re, nu, st, en) \
+ __re_set_registers (bu, re, nu, st, en)
+# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
+ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+# define re_match(bufp, string, size, pos, regs) \
+ __re_match (bufp, string, size, pos, regs)
+# define re_search(bufp, string, size, startpos, range, regs) \
+ __re_search (bufp, string, size, startpos, range, regs)
+# define re_compile_pattern(pattern, length, bufp) \
+ __re_compile_pattern (pattern, length, bufp)
+# define re_set_syntax(syntax) __re_set_syntax (syntax)
+# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
+ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
+# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
+
+#define btowc __btowc
#endif
/* This is for other GNU distributions with internationalized messages. */
-#if HAVE_LIBINTL_H || defined (_LIBC)
+#if HAVE_LIBINTL_H || defined _LIBC
# include <libintl.h>
#else
# define gettext(msgid) (msgid)
@@ -55,79 +90,83 @@
#ifndef gettext_noop
/* This define is so xgettext can find the internationalizable
strings. */
-#define gettext_noop(String) String
+# define gettext_noop(String) String
#endif
/* The `emacs' switch turns on certain matching commands
that make sense only in Emacs. */
#ifdef emacs
-#include "lisp.h"
-#include "buffer.h"
-#include "syntax.h"
+# include "lisp.h"
+# include "buffer.h"
+# include "syntax.h"
#else /* not emacs */
/* If we are not linking with Emacs proper,
we can't use the relocating allocator
even if config.h says that we can. */
-#undef REL_ALLOC
+# undef REL_ALLOC
-#if defined (STDC_HEADERS) || defined (_LIBC)
-#include <stdlib.h>
-#else
+# if defined STDC_HEADERS || defined _LIBC
+# include <stdlib.h>
+# else
char *malloc ();
char *realloc ();
-#endif
+# endif
/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
If nothing else has been done, use the method below. */
-#ifdef INHIBIT_STRING_HEADER
-#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
-#if !defined (bzero) && !defined (bcopy)
-#undef INHIBIT_STRING_HEADER
-#endif
-#endif
-#endif
+# ifdef INHIBIT_STRING_HEADER
+# if !(defined HAVE_BZERO && defined HAVE_BCOPY)
+# if !defined bzero && !defined bcopy
+# undef INHIBIT_STRING_HEADER
+# endif
+# endif
+# endif
/* This is the normal way of making sure we have a bcopy and a bzero.
This is used in most programs--a few other programs avoid this
by defining INHIBIT_STRING_HEADER. */
-#ifndef INHIBIT_STRING_HEADER
-#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
-#include <string.h>
-#ifndef bcmp
-#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
-#endif
-#ifndef bcopy
-#define bcopy(s, d, n) memcpy ((d), (s), (n))
-#endif
-#ifndef bzero
-#define bzero(s, n) memset ((s), 0, (n))
-#endif
-#else
-#include <strings.h>
-#endif
-#endif
+# ifndef INHIBIT_STRING_HEADER
+# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
+# include <string.h>
+# ifndef bzero
+# ifndef _LIBC
+# define bzero(s, n) (memset (s, '\0', n), (s))
+# else
+# define bzero(s, n) __bzero (s, n)
+# endif
+# endif
+# else
+# include <strings.h>
+# ifndef memcmp
+# define memcmp(s1, s2, n) bcmp (s1, s2, n)
+# endif
+# ifndef memcpy
+# define memcpy(d, s, n) (bcopy (s, d, n), (d))
+# endif
+# endif
+# endif
/* Define the syntax stuff for \<, \>, etc. */
/* This must be nonzero for the wordchar and notwordchar pattern
commands in re_match_2. */
-#ifndef Sword
-#define Sword 1
-#endif
+# ifndef Sword
+# define Sword 1
+# endif
-#ifdef SWITCH_ENUM_BUG
-#define SWITCH_ENUM_CAST(x) ((int)(x))
-#else
-#define SWITCH_ENUM_CAST(x) (x)
-#endif
+# ifdef SWITCH_ENUM_BUG
+# define SWITCH_ENUM_CAST(x) ((int)(x))
+# else
+# define SWITCH_ENUM_CAST(x) (x)
+# endif
#endif /* not emacs */
/* Get the interface, including the syntax bits. */
-#include "regex.h"
+#include <regex.h>
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
@@ -141,25 +180,28 @@ char *realloc ();
STDC_HEADERS is defined, then autoconf has verified that the ctype
macros don't need to be guarded with references to isascii. ...
Defining isascii to 1 should let any compiler worth its salt
- eliminate the && through constant folding." */
+ eliminate the && through constant folding."
+ Solaris defines some of these symbols so we must undefine them first. */
-#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
-#define ISASCII(c) 1
+#undef ISASCII
+#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
+# define ISASCII(c) 1
#else
-#define ISASCII(c) isascii(c)
+# define ISASCII(c) isascii(c)
#endif
#ifdef isblank
-#define ISBLANK(c) (ISASCII (c) && isblank (c))
+# define ISBLANK(c) (ISASCII (c) && isblank (c))
#else
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
-#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
+# define ISGRAPH(c) (ISASCII (c) && isgraph (c))
#else
-#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
+# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
#endif
+#undef ISPRINT
#define ISPRINT(c) (ISASCII (c) && isprint (c))
#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
#define ISALNUM(c) (ISASCII (c) && isalnum (c))
@@ -171,8 +213,14 @@ char *realloc ();
#define ISUPPER(c) (ISASCII (c) && isupper (c))
#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
+#ifdef _tolower
+# define TOLOWER(c) _tolower(c)
+#else
+# define TOLOWER(c) tolower(c)
+#endif
+
#ifndef NULL
-#define NULL (void *)0
+# define NULL (void *)0
#endif
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
@@ -181,10 +229,10 @@ char *realloc ();
(Per Bothner suggested the basic approach.) */
#undef SIGN_EXTEND_CHAR
#if __STDC__
-#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+# define SIGN_EXTEND_CHAR(c) ((signed char) (c))
#else /* not __STDC__ */
/* As in Harbison and Steele. */
-#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
#endif
#ifndef emacs
@@ -236,74 +284,67 @@ init_syntax_once ()
#ifdef REGEX_MALLOC
-#define REGEX_ALLOCATE malloc
-#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
-#define REGEX_FREE free
+# define REGEX_ALLOCATE malloc
+# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE free
#else /* not REGEX_MALLOC */
/* Emacs already defines alloca, sometimes. */
-#ifndef alloca
+# ifndef alloca
/* Make alloca work the best possible way. */
-#ifdef __GNUC__
-#define alloca __builtin_alloca
-#else /* not __GNUC__ */
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#else /* not __GNUC__ or HAVE_ALLOCA_H */
-#if 0 /* It is a bad idea to declare alloca. We always cast the result. */
-#ifndef _AIX /* Already did AIX, up at the top. */
-char *alloca ();
-#endif /* not _AIX */
-#endif
-#endif /* not HAVE_ALLOCA_H */
-#endif /* not __GNUC__ */
+# ifdef __GNUC__
+# define alloca __builtin_alloca
+# else /* not __GNUC__ */
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# endif /* HAVE_ALLOCA_H */
+# endif /* not __GNUC__ */
-#endif /* not alloca */
+# endif /* not alloca */
-#define REGEX_ALLOCATE alloca
+# define REGEX_ALLOCATE alloca
/* Assumes a `char *destination' variable. */
-#define REGEX_REALLOCATE(source, osize, nsize) \
+# define REGEX_REALLOCATE(source, osize, nsize) \
(destination = (char *) alloca (nsize), \
- bcopy (source, destination, osize), \
- destination)
+ memcpy (destination, source, osize))
/* No need to do anything to free, after alloca. */
-#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
#endif /* not REGEX_MALLOC */
/* Define how to allocate the failure stack. */
-#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
+#if defined REL_ALLOC && defined REGEX_MALLOC
-#define REGEX_ALLOCATE_STACK(size) \
+# define REGEX_ALLOCATE_STACK(size) \
r_alloc (&failure_stack_ptr, (size))
-#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
r_re_alloc (&failure_stack_ptr, (nsize))
-#define REGEX_FREE_STACK(ptr) \
+# define REGEX_FREE_STACK(ptr) \
r_alloc_free (&failure_stack_ptr)
#else /* not using relocating allocator */
-#ifdef REGEX_MALLOC
+# ifdef REGEX_MALLOC
-#define REGEX_ALLOCATE_STACK malloc
-#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
-#define REGEX_FREE_STACK free
+# define REGEX_ALLOCATE_STACK malloc
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+# define REGEX_FREE_STACK free
-#else /* not REGEX_MALLOC */
+# else /* not REGEX_MALLOC */
-#define REGEX_ALLOCATE_STACK alloca
+# define REGEX_ALLOCATE_STACK alloca
-#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
REGEX_REALLOCATE (source, osize, nsize)
/* No need to explicitly free anything. */
-#define REGEX_FREE_STACK(arg)
+# define REGEX_FREE_STACK(arg)
-#endif /* not REGEX_MALLOC */
+# endif /* not REGEX_MALLOC */
#endif /* not using relocating allocator */
@@ -333,7 +374,12 @@ typedef char boolean;
#define false 0
#define true 1
-static int re_match_2_internal ();
+static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
+ const char *string1, int size1,
+ const char *string2, int size2,
+ int pos,
+ struct re_registers *regs,
+ int stop));
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
@@ -515,10 +561,10 @@ extract_number (dest, source)
*dest += temp << 8;
}
-#ifndef EXTRACT_MACROS /* To debug the macros. */
-#undef EXTRACT_NUMBER
-#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
-#endif /* not EXTRACT_MACROS */
+# ifndef EXTRACT_MACROS /* To debug the macros. */
+# undef EXTRACT_NUMBER
+# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+# endif /* not EXTRACT_MACROS */
#endif /* DEBUG */
@@ -543,11 +589,11 @@ extract_number_and_incr (destination, source)
*source += 2;
}
-#ifndef EXTRACT_MACROS
-#undef EXTRACT_NUMBER_AND_INCR
-#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+# ifndef EXTRACT_MACROS
+# undef EXTRACT_NUMBER_AND_INCR
+# define EXTRACT_NUMBER_AND_INCR(dest, src) \
extract_number_and_incr (&dest, &src)
-#endif /* not EXTRACT_MACROS */
+# endif /* not EXTRACT_MACROS */
#endif /* DEBUG */
@@ -560,21 +606,21 @@ extract_number_and_incr (destination, source)
#ifdef DEBUG
/* We use standard I/O for debugging. */
-#include <stdio.h>
+# include <stdio.h>
/* It is useful to test things that ``must'' be true when debugging. */
-#include <assert.h>
+# include <assert.h>
-static int debug = 0;
+static int debug;
-#define DEBUG_STATEMENT(e) e
-#define DEBUG_PRINT1(x) if (debug) printf (x)
-#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
-#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
-#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+# define DEBUG_STATEMENT(e) e
+# define DEBUG_PRINT1(x) if (debug) printf (x)
+# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
if (debug) print_partial_compiled_pattern (s, e)
-#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
if (debug) print_double_string (w, s1, sz1, s2, sz2)
@@ -794,7 +840,7 @@ print_partial_compiled_pattern (start, end)
case wordend:
printf ("/wordend");
-#ifdef emacs
+# ifdef emacs
case before_dot:
printf ("/before_dot");
break;
@@ -818,7 +864,7 @@ print_partial_compiled_pattern (start, end)
mcnt = *p++;
printf ("/%d", mcnt);
break;
-#endif /* emacs */
+# endif /* emacs */
case wordchar:
printf ("/wordchar");
@@ -911,16 +957,16 @@ printchar (c)
#else /* not DEBUG */
-#undef assert
-#define assert(e)
+# undef assert
+# define assert(e)
-#define DEBUG_STATEMENT(e)
-#define DEBUG_PRINT1(x)
-#define DEBUG_PRINT2(x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3)
-#define DEBUG_PRINT4(x1, x2, x3, x4)
-#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
-#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+# define DEBUG_STATEMENT(e)
+# define DEBUG_PRINT1(x)
+# define DEBUG_PRINT2(x1, x2)
+# define DEBUG_PRINT3(x1, x2, x3)
+# define DEBUG_PRINT4(x1, x2, x3, x4)
+# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
#endif /* not DEBUG */
@@ -929,7 +975,8 @@ printchar (c)
syntax, so it can be changed between regex compilations. */
/* This has no initializer because initialized variables in Emacs
become read-only after dumping. */
-reg_syntax_t re_syntax_options;
+reg_syntax_t re_syntax_options = 0; /* Gawk: initialize to force this one,
+ not the one in the C library. */
/* Specify the precise syntax of regexps for compilation. This provides
@@ -954,12 +1001,17 @@ re_set_syntax (syntax)
#endif /* DEBUG */
return ret;
}
+#ifdef _LIBC
+weak_alias (__re_set_syntax, re_set_syntax)
+#endif
/* This table gives an error message for each of the error codes listed
in regex.h. Obviously the order here has to be same as there.
POSIX doesn't require that we do anything for REG_NOERROR,
but why not be nice? */
+/* Gawk: 12 Dec 2000 --- revert to array of char * for use with K&R Compilers. */
+
static const char *re_error_msgid[] =
{
gettext_noop ("Success"), /* REG_NOERROR */
@@ -1006,7 +1058,7 @@ static const char *re_error_msgid[] =
/* When using GNU C, we are not REALLY using the C alloca, no matter
what config.h may say. So don't take precautions for it. */
#ifdef __GNUC__
-#undef C_ALLOCA
+# undef C_ALLOCA
#endif
/* The match routines may not allocate if (1) they would do it with malloc
@@ -1014,8 +1066,8 @@ static const char *re_error_msgid[] =
Note that if REL_ALLOC is defined, matching would not use malloc for the
failure stack, but we would still use it for the register vectors;
so REL_ALLOC should not affect this. */
-#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
-#undef MATCH_MAY_ALLOCATE
+#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
+# undef MATCH_MAY_ALLOCATE
#endif
@@ -1028,7 +1080,7 @@ static const char *re_error_msgid[] =
when matching. If this number is exceeded, we allocate more
space, so it is not a hard limit. */
#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
+# define INIT_FAILURE_ALLOC 5
#endif
/* Roughly the maximum number of failure points on the stack. Would be
@@ -1038,13 +1090,13 @@ static const char *re_error_msgid[] =
#ifdef INT_IS_16BIT
-#if defined (MATCH_MAY_ALLOCATE)
+# if defined MATCH_MAY_ALLOCATE
/* 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. */
long int re_max_failures = 4000;
-#else
+# else
long int re_max_failures = 2000;
-#endif
+# endif
union fail_stack_elt
{
@@ -1063,13 +1115,13 @@ typedef struct
#else /* not INT_IS_16BIT */
-#if defined (MATCH_MAY_ALLOCATE)
+# if defined MATCH_MAY_ALLOCATE
/* 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. */
int re_max_failures = 20000;
-#else
+# else
int re_max_failures = 2000;
-#endif
+# endif
union fail_stack_elt
{
@@ -1097,10 +1149,10 @@ typedef struct
Do `return -2' if the alloc fails. */
#ifdef MATCH_MAY_ALLOCATE
-#define INIT_FAIL_STACK() \
+# define INIT_FAIL_STACK() \
do { \
fail_stack.stack = (fail_stack_elt_t *) \
- REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
\
if (fail_stack.stack == NULL) \
return -2; \
@@ -1109,14 +1161,14 @@ typedef struct
fail_stack.avail = 0; \
} while (0)
-#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
#else
-#define INIT_FAIL_STACK() \
+# define INIT_FAIL_STACK() \
do { \
fail_stack.avail = 0; \
} while (0)
-#define RESET_FAIL_STACK()
+# define RESET_FAIL_STACK()
#endif
@@ -1177,11 +1229,11 @@ typedef struct
/* Used to omit pushing failure point id's when we're not debugging. */
#ifdef DEBUG
-#define DEBUG_PUSH PUSH_FAILURE_INT
-#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
+# define DEBUG_PUSH PUSH_FAILURE_INT
+# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
#else
-#define DEBUG_PUSH(item)
-#define DEBUG_POP(item_addr)
+# define DEBUG_PUSH(item)
+# define DEBUG_POP(item_addr)
#endif
@@ -1189,8 +1241,8 @@ typedef struct
if we ever fail back to it.
Requires variables fail_stack, regstart, regend, reg_info, and
- num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
- declared.
+ num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
+ be declared.
Does `return FAILURE_CODE' if runs out of memory. */
@@ -1202,7 +1254,7 @@ typedef struct
/* Can't be int, since there is not a shred of a guarantee that int \
is wide enough to hold a value of something to which pointer can \
be assigned */ \
- s_reg_t this_reg; \
+ active_reg_t this_reg; \
\
DEBUG_STATEMENT (failure_id++); \
DEBUG_STATEMENT (nfailure_points_pushed++); \
@@ -1210,7 +1262,7 @@ typedef struct
DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
\
- DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
\
/* Ensure we have enough space allocated for what we will push. */ \
@@ -1231,16 +1283,17 @@ typedef struct
for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
this_reg++) \
{ \
- DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
DEBUG_STATEMENT (num_regs_pushed++); \
\
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
PUSH_FAILURE_POINTER (regstart[this_reg]); \
\
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
PUSH_FAILURE_POINTER (regend[this_reg]); \
\
- DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" info: %p\n ", \
+ reg_info[this_reg].word.pointer); \
DEBUG_PRINT2 (" match_null=%d", \
REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
@@ -1252,17 +1305,17 @@ typedef struct
PUSH_FAILURE_ELT (reg_info[this_reg].word); \
} \
\
- DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
PUSH_FAILURE_INT (lowest_active_reg); \
\
- DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
PUSH_FAILURE_INT (highest_active_reg); \
\
- DEBUG_PRINT2 (" Pushing pattern 0x%x:\n", pattern_place); \
+ DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
PUSH_FAILURE_POINTER (pattern_place); \
\
- DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
size2); \
DEBUG_PRINT1 ("'\n"); \
@@ -1278,9 +1331,9 @@ typedef struct
/* Individual items aside from the registers. */
#ifdef DEBUG
-#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
#else
-#define NUM_NONREG_ITEMS 4
+# define NUM_NONREG_ITEMS 4
#endif
/* We push at most this many items on the stack. */
@@ -1314,8 +1367,8 @@ typedef struct
#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
{ \
- DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
- s_reg_t this_reg; \
+ DEBUG_STATEMENT (unsigned failure_id;) \
+ active_reg_t this_reg; \
const unsigned char *string_temp; \
\
assert (!FAIL_STACK_EMPTY ()); \
@@ -1337,34 +1390,35 @@ typedef struct
if (string_temp != NULL) \
str = (const char *) string_temp; \
\
- DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT2 (" Popping string %p: `", str); \
DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
DEBUG_PRINT1 ("'\n"); \
\
pat = (unsigned char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" Popping pattern 0x%x:\n", pat); \
+ DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
\
/* Restore register info. */ \
high_reg = (active_reg_t) POP_FAILURE_INT (); \
- DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
\
low_reg = (active_reg_t) POP_FAILURE_INT (); \
- DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
\
if (1) \
for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
{ \
- DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
\
reg_info[this_reg].word = POP_FAILURE_ELT (); \
- DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" info: %p\n", \
+ reg_info[this_reg].word.pointer); \
\
regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
\
regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
} \
else \
{ \
@@ -1470,7 +1524,7 @@ static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
string passed to us by the user to an unsigned char that we can use
as an array index (in, e.g., `translate'). */
#ifndef PATFETCH
-#define PATFETCH(c) \
+# define PATFETCH(c) \
do {if (p == pend) return REG_EEND; \
c = (unsigned char) *p++; \
if (translate) c = (unsigned char) translate[c]; \
@@ -1493,7 +1547,7 @@ static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
`char *', to avoid warnings when a string constant is passed. But
when we use a character as a subscript we must make it unsigned. */
#ifndef TRANSLATE
-#define TRANSLATE(d) \
+# define TRANSLATE(d) \
(translate ? (char) translate[(unsigned char) (d)] : (d))
#endif
@@ -1561,15 +1615,15 @@ static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
reallocating to 0 bytes. Such thing is not going to work too well.
You have been warned!! */
-#if defined(_MSC_VER) && !defined(WIN32)
+#if defined _MSC_VER && !defined WIN32
/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
The REALLOC define eliminates a flurry of conversion warnings,
but is not required. */
-#define MAX_BUF_SIZE 65500L
-#define REALLOC(p,s) realloc ((p), (size_t) (s))
+# define MAX_BUF_SIZE 65500L
+# define REALLOC(p,s) realloc ((p), (size_t) (s))
#else
-#define MAX_BUF_SIZE (1L << 16)
-#define REALLOC(p,s) realloc ((p), (s))
+# define MAX_BUF_SIZE (1L << 16)
+# define REALLOC(p,s) realloc ((p), (s))
#endif
/* Extend the buffer by twice its current size via realloc and
@@ -1691,7 +1745,7 @@ typedef struct
} \
}
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || WIDE_CHAR_SUPPORT
/* The GNU C library provides support for user-defined character classes
and the functions from ISO C amendement 1. */
# ifdef CHARCLASS_NAME_MAX
@@ -1702,7 +1756,11 @@ typedef struct
# define CHAR_CLASS_MAX_LENGTH 256
# endif
-# define IS_CHAR_CLASS(string) wctype (string)
+# ifdef _LIBC
+# define IS_CHAR_CLASS(string) __wctype (string)
+# else
+# define IS_CHAR_CLASS(string) wctype (string)
+# endif
#else
# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
@@ -1878,7 +1936,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Always count groups, whether or not bufp->no_sub is set. */
bufp->re_nsub = 0;
-#if !defined (emacs) && !defined (SYNTAX_TABLE)
+#if !defined emacs && !defined SYNTAX_TABLE
/* Initialize the syntax table. */
init_syntax_once ();
#endif
@@ -2179,25 +2237,28 @@ regex_compile (pattern, size, syntax, bufp)
for (;;)
{
PATFETCH (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
+ if ((c == ':' && *p == ']') || p == pend)
break;
- str[c1++] = c;
+ if (c1 < CHAR_CLASS_MAX_LENGTH)
+ str[c1++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
}
str[c1] = '\0';
- /* If isn't a word bracketed by `[:' and:`]':
+ /* If isn't a word bracketed by `[:' and `:]':
undo the ending character, the letters, and leave
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
-#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || WIDE_CHAR_SUPPORT
boolean is_lower = STREQ (str, "lower");
boolean is_upper = STREQ (str, "upper");
wctype_t wt;
int ch;
- wt = wctype (str);
+ wt = IS_CHAR_CLASS (str);
if (wt == 0)
FREE_STACK_RETURN (REG_ECTYPE);
@@ -2209,8 +2270,13 @@ regex_compile (pattern, size, syntax, bufp)
for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
{
+# ifdef _LIBC
+ if (__iswctype (__btowc (ch), wt))
+ SET_LIST_BIT (ch);
+# else
if (iswctype (btowc (ch), wt))
SET_LIST_BIT (ch);
+# endif
if (translate && (is_upper || is_lower)
&& (ISUPPER (ch) || ISLOWER (ch)))
@@ -2392,10 +2458,12 @@ regex_compile (pattern, size, syntax, bufp)
if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
if (COMPILE_STACK_EMPTY)
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_backslash;
- else
- FREE_STACK_RETURN (REG_ERPAREN);
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
handle_close:
if (fixup_alt_jump)
@@ -2412,10 +2480,12 @@ regex_compile (pattern, size, syntax, bufp)
/* See similar code for backslashed left paren above. */
if (COMPILE_STACK_EMPTY)
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_char;
- else
- FREE_STACK_RETURN (REG_ERPAREN);
+ {
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ FREE_STACK_RETURN (REG_ERPAREN);
+ }
/* Since we just checked for an empty stack above, this
``can't happen''. */
@@ -2690,7 +2760,7 @@ regex_compile (pattern, size, syntax, bufp)
case 'w':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
laststart = b;
BUF_PUSH (wordchar);
@@ -2698,7 +2768,7 @@ regex_compile (pattern, size, syntax, bufp)
case 'W':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
laststart = b;
BUF_PUSH (notwordchar);
@@ -2706,37 +2776,37 @@ regex_compile (pattern, size, syntax, bufp)
case '<':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordbeg);
break;
case '>':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordend);
break;
case 'b':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (wordbound);
break;
case 'B':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (notwordbound);
break;
case '`':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (begbuf);
break;
case '\'':
- if (re_syntax_options & RE_NO_GNU_OPS)
+ if (syntax & RE_NO_GNU_OPS)
goto normal_char;
BUF_PUSH (endbuf);
break;
@@ -2855,7 +2925,7 @@ regex_compile (pattern, size, syntax, bufp)
{
fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
-#ifdef emacs
+# ifdef emacs
if (! fail_stack.stack)
fail_stack.stack
= (fail_stack_elt_t *) xmalloc (fail_stack.size
@@ -2865,7 +2935,7 @@ regex_compile (pattern, size, syntax, bufp)
= (fail_stack_elt_t *) xrealloc (fail_stack.stack,
(fail_stack.size
* sizeof (fail_stack_elt_t)));
-#else /* not emacs */
+# else /* not emacs */
if (! fail_stack.stack)
fail_stack.stack
= (fail_stack_elt_t *) malloc (fail_stack.size
@@ -2875,7 +2945,7 @@ regex_compile (pattern, size, syntax, bufp)
= (fail_stack_elt_t *) realloc (fail_stack.stack,
(fail_stack.size
* sizeof (fail_stack_elt_t)));
-#endif /* not emacs */
+# endif /* not emacs */
}
regex_grow_registers (num_regs);
@@ -3374,6 +3444,9 @@ re_compile_fastmap (bufp)
RESET_FAIL_STACK ();
return 0;
} /* re_compile_fastmap */
+#ifdef _LIBC
+weak_alias (__re_compile_fastmap, re_compile_fastmap)
+#endif
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
@@ -3409,6 +3482,9 @@ re_set_registers (bufp, regs, num_regs, starts, ends)
regs->start = regs->end = (regoff_t *) 0;
}
}
+#ifdef _LIBC
+weak_alias (__re_set_registers, re_set_registers)
+#endif
/* Searching routines. */
@@ -3425,6 +3501,9 @@ re_search (bufp, string, size, startpos, range, regs)
return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
/* Using the compiled pattern in BUFP->buffer, first tries to match the
@@ -3478,7 +3557,11 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
/* If the search isn't to be a backwards one, don't waste time in a
search for a pattern that must be anchored. */
- if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ if (bufp->used > 0 && range > 0
+ && ((re_opcode_t) bufp->buffer[0] == begbuf
+ /* `begline' is like `begbuf' if it cannot match at newlines. */
+ || ((re_opcode_t) bufp->buffer[0] == begline
+ && !bufp->newline_anchor)))
{
if (startpos > 0)
return -1;
@@ -3554,9 +3637,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
val = re_match_2_internal (bufp, string1, size1, string2, size2,
startpos, regs, stop);
#ifndef REGEX_MALLOC
-#ifdef C_ALLOCA
+# ifdef C_ALLOCA
alloca (0);
-#endif
+# endif
#endif
if (val >= 0)
@@ -3581,6 +3664,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
}
return -1;
} /* re_search_2 */
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
@@ -3633,8 +3719,8 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
/* Free everything we malloc. */
#ifdef MATCH_MAY_ALLOCATE
-#define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
-#define FREE_VARIABLES() \
+# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
+# define FREE_VARIABLES() \
do { \
REGEX_FREE_STACK (fail_stack.stack); \
FREE_VAR (regstart); \
@@ -3648,7 +3734,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
FREE_VAR (reg_info_dummy); \
} while (0)
#else
-#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
+# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
#endif /* not MATCH_MAY_ALLOCATE */
/* These values must meet several constraints. They must not be valid
@@ -3675,13 +3761,16 @@ re_match (bufp, string, size, pos, regs)
{
int result = re_match_2_internal (bufp, NULL, 0, string, size,
pos, regs, size);
-#ifndef REGEX_MALLOC
-#ifdef C_ALLOCA
+# ifndef REGEX_MALLOC
+# ifdef C_ALLOCA
alloca (0);
-#endif
-#endif
+# endif
+# endif
return result;
}
+# ifdef _LIBC
+weak_alias (__re_match, re_match)
+# endif
#endif /* not emacs */
static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
@@ -3721,12 +3810,15 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
int result = re_match_2_internal (bufp, string1, size1, string2, size2,
pos, regs, stop);
#ifndef REGEX_MALLOC
-#ifdef C_ALLOCA
+# ifdef C_ALLOCA
alloca (0);
-#endif
+# endif
#endif
return result;
}
+#ifdef _LIBC
+weak_alias (__re_match_2, re_match_2)
+#endif
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
@@ -3777,7 +3869,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
fail_stack_type fail_stack;
#endif
#ifdef DEBUG
- static unsigned failure_id = 0;
+ static unsigned failure_id;
unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
@@ -4471,7 +4563,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
past them. */
if (translate
? bcmp_translate (d, d2, mcnt, translate)
- : bcmp (d, d2, mcnt))
+ : memcmp (d, d2, mcnt))
goto fail;
d += mcnt, d2 += mcnt;
@@ -4693,7 +4785,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
{
int not = (re_opcode_t) p1[3] == charset_not;
- if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ if (c < (unsigned) (p1[4] * BYTEWIDTH)
&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
not = !not;
@@ -4708,26 +4800,15 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
}
else if ((re_opcode_t) *p2 == charset)
{
-#ifdef DEBUG
- register unsigned char c
- = *p2 == (unsigned char) endline ? '\n' : p2[2];
-#endif
-
-#if 0
+ /* We win if the first character of the loop is not part
+ of the charset. */
if ((re_opcode_t) p1[3] == exactn
- && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
- && (p2[2 + p1[5] / BYTEWIDTH]
- & (1 << (p1[5] % BYTEWIDTH)))))
-#else
- if ((re_opcode_t) p1[3] == exactn
- && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
- && (p2[2 + p1[4] / BYTEWIDTH]
- & (1 << (p1[4] % BYTEWIDTH)))))
-#endif
- {
- p[-3] = (unsigned char) pop_failure_jump;
- DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
- c, p1[5]);
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
+ && (p2[2 + p1[5] / BYTEWIDTH]
+ & (1 << (p1[5] % BYTEWIDTH)))))
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
}
else if ((re_opcode_t) p1[3] == charset_not)
@@ -4837,7 +4918,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
/* It doesn't matter what we push for the string here. What
the code at `fail' tests is the value for the pattern. */
- PUSH_FAILURE_POINT (0, 0, -2);
+ PUSH_FAILURE_POINT (NULL, NULL, -2);
goto unconditional_jump;
@@ -4850,7 +4931,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
/* See comments just above at `dummy_failure_jump' about the
two zeroes. */
- PUSH_FAILURE_POINT (0, 0, -2);
+ PUSH_FAILURE_POINT (NULL, NULL, -2);
break;
/* Have to succeed matching what follows at least n times.
@@ -5420,11 +5501,14 @@ re_compile_pattern (pattern, length, bufp)
return NULL;
return gettext (re_error_msgid[(int) ret]);
}
+#ifdef _LIBC
+weak_alias (__re_compile_pattern, re_compile_pattern)
+#endif
/* Entry points compatible with 4.2 BSD regex library. We don't define
them unless specifically requested. */
-#if defined (_REGEX_RE_COMP) || defined (_LIBC)
+#if defined _REGEX_RE_COMP || defined _LIBC
/* BSD has one and only one pattern buffer. */
static struct re_pattern_buffer re_comp_buf;
@@ -5452,12 +5536,12 @@ re_comp (s)
{
re_comp_buf.buffer = (unsigned char *) malloc (200);
if (re_comp_buf.buffer == NULL)
- return gettext (re_error_msgid[(int) REG_ESPACE]);
+ return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
re_comp_buf.allocated = 200;
re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
if (re_comp_buf.fastmap == NULL)
- return gettext (re_error_msgid[(int) REG_ESPACE]);
+ return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
}
/* Since `re_exec' always passes NULL for the `regs' argument, we
@@ -5505,7 +5589,8 @@ re_exec (s)
REG_EXTENDED bit in CFLAGS is set; otherwise, to
RE_SYNTAX_POSIX_BASIC;
`newline_anchor' to REG_NEWLINE being set in CFLAGS;
- `fastmap' and `fastmap_accurate' to zero;
+ `fastmap' to an allocated space for the fastmap;
+ `fastmap_accurate' to zero;
`re_nsub' to the number of subexpressions in PATTERN.
PATTERN is the address of the pattern string.
@@ -5547,11 +5632,8 @@ regcomp (preg, pattern, cflags)
preg->allocated = 0;
preg->used = 0;
- /* Don't bother to use a fastmap when searching. This simplifies the
- REG_NEWLINE case: if we used a fastmap, we'd have to put all the
- characters after newlines into the fastmap. This way, we just try
- every character. */
- preg->fastmap = 0;
+ /* Try to allocate space for the fastmap. */
+ preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
if (cflags & REG_ICASE)
{
@@ -5565,7 +5647,7 @@ regcomp (preg, pattern, cflags)
/* Map uppercase characters to corresponding lowercase ones. */
for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+ preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
}
else
preg->translate = NULL;
@@ -5591,8 +5673,24 @@ regcomp (preg, pattern, cflags)
unmatched close-group: both are REG_EPAREN. */
if (ret == REG_ERPAREN) ret = REG_EPAREN;
+ if (ret == REG_NOERROR && preg->fastmap)
+ {
+ /* Compute the fastmap now, since regexec cannot modify the pattern
+ buffer. */
+ if (re_compile_fastmap (preg) == -2)
+ {
+ /* Some error occured while computing the fastmap, just forget
+ about it. */
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ }
+ }
+
return (int) ret;
}
+#ifdef _LIBC
+weak_alias (__regcomp, regcomp)
+#endif
/* regexec searches for a given pattern, specified by PREG, in the
@@ -5639,10 +5737,10 @@ regexec (preg, string, nmatch, pmatch, eflags)
if (want_reg_info)
{
regs.num_regs = nmatch;
- regs.start = TALLOC (nmatch, regoff_t);
- regs.end = TALLOC (nmatch, regoff_t);
- if (regs.start == NULL || regs.end == NULL)
+ regs.start = TALLOC (nmatch * 2, regoff_t);
+ if (regs.start == NULL)
return (int) REG_NOMATCH;
+ regs.end = regs.start + nmatch;
}
/* Perform the searching operation. */
@@ -5666,12 +5764,14 @@ regexec (preg, string, nmatch, pmatch, eflags)
/* If we needed the temporary register info, free the space now. */
free (regs.start);
- free (regs.end);
}
/* We want zero return to mean success, unlike `re_search'. */
return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
}
+#ifdef _LIBC
+weak_alias (__regexec, regexec)
+#endif
/* Returns a message corresponding to an error code, ERRCODE, returned
@@ -5704,15 +5804,22 @@ regerror (errcode, preg, errbuf, errbuf_size)
{
if (msg_size > errbuf_size)
{
- strncpy (errbuf, msg, errbuf_size - 1);
+#if defined HAVE_MEMPCPY || defined _LIBC
+ *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
+#else
+ memcpy (errbuf, msg, errbuf_size - 1);
errbuf[errbuf_size - 1] = 0;
+#endif
}
else
- strcpy (errbuf, msg);
+ memcpy (errbuf, msg, msg_size);
}
return msg_size;
}
+#ifdef _LIBC
+weak_alias (__regerror, regerror)
+#endif
/* Free dynamically allocated space used by PREG. */
@@ -5740,5 +5847,8 @@ regfree (preg)
free (preg->translate);
preg->translate = NULL;
}
+#ifdef _LIBC
+weak_alias (__regfree, regfree)
+#endif
#endif /* not emacs */