summaryrefslogtreecommitdiff
path: root/src/regex-emacs.c
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2018-08-05 18:41:20 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2018-08-05 19:36:09 -0700
commit3a6abe65c1324361bf0efcb65df61d22a39cfaaf (patch)
tree90ecb27f9ecbb8a0f8d9b24cf67a809b52b0b32d /src/regex-emacs.c
parentd904cc83f3036db96107a3976cee1a0112547de6 (diff)
downloademacs-3a6abe65c1324361bf0efcb65df61d22a39cfaaf.tar.gz
Simplify regex-emacs code by assuming Emacs
* src/regex-emacs.c: Omit no-longer-needed AIX code. Don’t ignore GCC warnings. Include regex-emacs.h immediately after config.h, to test that it’s independent. Omit the "#ifndef emacs" and "#ifdef REGEX_MALLOC" and "#if WIDE_CHAR_SUPPORT" or "#ifdef _REGEX_RE_COMP", code, as we are no longer interested in compiling outside Emacs (with or without debugging or native wide char support) or in avoiding alloca. (REGEX_EMACS_DEBUG, regex_emacs_debug): Rename from DEBUG and debug, to avoid collision with other DEBUGS. All uses changed. In debugging output, change %ld and %zd to %zu when appropriate. No need to include stddef.h, stdlib.h, sys/types.h, wchar.h, wctype.h, locale/localeinfo.h, locale/elem-hash.h, langinfo.h, libintl.h, unistd.h, stdbool.h, string.h, stdio.h, assert.h. All uses of assert changed to eassert. (RE_DUP_MAX, reg_syntax_t, RE_BACKSLASH_ESCAPE_IN_LISTS) (RE_BK_PLUS_QM, RE_CHAR_CLASSES, RE_CONTEXT_INDEP_ANCHORS) (RE_CONTEXT_INDEP_OPS, RE_CONTEXT_INVALID_OPS, RE_DOT_NEWLINE) (RE_DOT_NOT_NULL, RE_HAT_LISTS_NOT_NEWLINE, RE_INTERVALS) (RE_LIMITED_OPS, RE_NEWLINE_ALT, RE_NO_BK_BRACES) (RE_NO_BK_PARENS, RE_NO_BK_REFS, RE_NO_BK_VBAR) (RE_NO_EMPTY_RANGES, RE_UNMATCHED_RIGHT_PAREN_ORD) (RE_NO_POSIX_BACKTRACKING, RE_NO_GNU_OPS, RE_FRUGAL) (RE_SHY_GROUPS, RE_NO_NEWLINE_ANCHOR, RE_SYNTAX_EMACS) (REG_NOERROR, REG_NOMATCH, REG_BADPAT, REG_ECOLLATE) (REG_ECTYPE, REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN) (REG_EBRACE, REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT) (REG_EEND, REG_ESIZE, REG_ERPAREN, REG_ERANGEX, REG_ESIZEBR) (reg_errcode_t, REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED) (RE_NREGS, RE_TRANSLATE, RE_TRANSLATE_P): Move here from regex-emacs.h. (RE_NREGS): Define unconditionally. (boolean): Remove. All uses replaced by bool. (WIDE_CHAR_SUPPORT, regfree, regexec, regcomp, regerror): (re_set_syntax, re_syntax_options, WEAK_ALIAS, gettext, gettext_noop): Remove. All uses removed. (malloc, realloc, free): Do not redefine. Adjust all callers to use xmalloc, xrealloc, xfree instead. (re_error_msgid): Use C99 to avoid need to keep in same order as reg_error_t. (REGEX_USE_SAFE_ALLOCA): Simplify by using USE_SAFE_ALLOCA. (REGEX_ALLOCATE, REGEX_REALLOCATE, REGEX_FREE, REGEX_ALLOCATE_STACK) (REGEX_REALLOCATE_STACK, REGEX_FREE_STACK): Remove. All callers changed to use the non-REGEX_MALLOC version. (REGEX_TALLOC): Remove. All callers changed to use SAFE_ALLOCA. (re_set_syntax): Remove; unused. (MATCH_MAY_ALLOCATE): Remove; now always true. All uses simplified. (INIT_FAILURE_ALLOC): Define unconditionally. (re_compile_fastmap): Now static. (re_compile_pattern): Avoid unnecessary cast. * src/regex-emacs.h (EMACS_REGEX_H): Renamed from _REGEX_H to avoid possible collision with glibc. Don’t include sys/types.h. All uses of ssize_t changed to ptrdiff_t. Don’t worry about C++ or VMS. Assume emacs is defined and that _REGEX_RE_COMP and WIDE_CHAR_SUPPORT are not. Define struct re_registers before including lisp.h. (REG_ENOSYS, RE_TRANSLATE_TYPE): Remove; all uses replaced by Lisp_Object. (regoff_t): Remove. All uses replaced with ptrdiff_t. (re_match, regcomp, regexec, regerror, regfree): Remove decl of nonexistent functions. (RE_DEBUG, RE_SYNTAX_AWK, RE_SYNTAX_GNU_AWK) (RE_SYNTAX_POSIX_AWK, RE_SYNTAX_GREP, RE_SYNTAX_EGREP) (RE_SYNTAX_POSIX_EGREP, RE_SYNTAX_ED, RE_SYNTAX_SED) (_RE_SYNTAX_POSIX_COMMON, RE_SYNTAX_POSIX_BASIC) (RE_SYNTAX_POSIX_MINIMAL_BASIC, RE_SYNTAX_POSIX_EXTENDED) (RE_SYNTAX_POSIX_MINIMAL_EXTENDED, REG_EXTENDED, REG_ICASE) (REG_NEWLINE, REG_NOSUB, REG_NOTBOL, REG_NOTEOL, regmatch_t): Remove; unused. * src/search.c (Fset_match_data): Simplify range test now that we know it’s ptrdiff_t.
Diffstat (limited to 'src/regex-emacs.c')
-rw-r--r--src/regex-emacs.c2013
1 files changed, 442 insertions, 1571 deletions
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index 08fc8c67f1c..eb5970ffcf1 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -21,159 +21,187 @@
- structure the opcode space into opcode+flag.
- merge with glibc's regex.[ch].
- replace (succeed_n + jump_n + set_number_at) with something that doesn't
- need to modify the compiled regexp so that re_match can be reentrant.
+ need to modify the compiled regexp so that re_search can be reentrant.
- get rid of on_failure_jump_smart by doing the optimization in re_comp
- rather than at run-time, so that re_match can be reentrant.
+ rather than at run-time, so that re_search can be reentrant.
*/
-/* AIX requires this to be the first thing in the file. */
-#if defined _AIX && !defined REGEX_MALLOC
- #pragma alloca
-#endif
-
-/* Ignore some GCC warnings for now. This section should go away
- once the Emacs and Gnulib regex code is merged. */
-#if 4 < __GNUC__ + (5 <= __GNUC_MINOR__) || defined __clang__
-# pragma GCC diagnostic ignored "-Wstrict-overflow"
-# ifndef emacs
-# pragma GCC diagnostic ignored "-Wunused-function"
-# pragma GCC diagnostic ignored "-Wunused-macros"
-# pragma GCC diagnostic ignored "-Wunused-result"
-# pragma GCC diagnostic ignored "-Wunused-variable"
-# endif
-#endif
-
-#if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) && ! defined __clang__
-# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-
#include <config.h>
-#include <stddef.h>
-#include <stdlib.h>
-
-#ifdef emacs
-/* We need this for `regex-emacs.h', and perhaps for the Emacs include
- files. */
-# include <sys/types.h>
-#endif
-
-/* Whether to use ISO C Amendment 1 wide char functions.
- Those should not be used for Emacs since it uses its own. */
-#if defined _LIBC
-#define WIDE_CHAR_SUPPORT 1
-#else
-#define WIDE_CHAR_SUPPORT \
- (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
-#endif
+/* Get the interface, including the syntax bits. */
+#include "regex-emacs.h"
-/* For platform which support the ISO C amendment 1 functionality we
- support user defined character classes. */
-#if WIDE_CHAR_SUPPORT
-/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
-# include <wchar.h>
-# include <wctype.h>
-#endif
+#include <stdlib.h>
-#ifdef _LIBC
-/* We have to keep the namespace clean. */
-# define regfree(preg) __regfree (preg)
-# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
-# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
-# define regerror(err_code, preg, errbuf, errbuf_size) \
- __regerror (err_code, preg, errbuf, errbuf_size)
-# define re_set_registers(bu, re, nu, st, en) \
- __re_set_registers (bu, re, nu, st, en)
-# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
- __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
-# define re_match(bufp, string, size, pos, regs) \
- __re_match (bufp, string, size, pos, regs)
-# define re_search(bufp, string, size, startpos, range, regs) \
- __re_search (bufp, string, size, startpos, range, regs)
-# define re_compile_pattern(pattern, length, bufp) \
- __re_compile_pattern (pattern, length, bufp)
-# define re_set_syntax(syntax) __re_set_syntax (syntax)
-# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
- __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
-# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
-
-/* Make sure we call libc's function even if the user overrides them. */
-# define btowc __btowc
-# define iswctype __iswctype
-# define wctype __wctype
-
-# define WEAK_ALIAS(a,b) weak_alias (a, b)
-
-/* We are also using some library internals. */
-# include <locale/localeinfo.h>
-# include <locale/elem-hash.h>
-# include <langinfo.h>
-#else
-# define WEAK_ALIAS(a,b)
-#endif
+#include "character.h"
+#include "buffer.h"
-/* This is for other GNU distributions with internationalized messages. */
-#if HAVE_LIBINTL_H || defined _LIBC
-# include <libintl.h>
-#else
-# define gettext(msgid) (msgid)
-#endif
+#include "syntax.h"
+#include "category.h"
-#ifndef gettext_noop
-/* This define is so xgettext can find the internationalizable
- strings. */
-# define gettext_noop(String) String
+/* Maximum number of duplicates an interval can allow. Some systems
+ define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+# undef RE_DUP_MAX
#endif
-
-/* The `emacs' switch turns on certain matching commands
- that make sense only in Emacs. */
-#ifdef emacs
-
-# include "lisp.h"
-# include "character.h"
-# include "buffer.h"
-
-# include "syntax.h"
-# include "category.h"
+/* Repeat counts are stored in opcodes as 2 byte integers. This was
+ previously limited to 7fff because the parsing code uses signed
+ ints. But Emacs only runs on 32 bit platforms anyway. */
+#define RE_DUP_MAX (0xffff)
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings where historically chosen so
+ that Emacs syntax had the value 0.
+ The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned long reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+
+/* If this bit is set, do not process the GNU regex operators.
+ If not set, then the GNU regex operators are recognized. */
+#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
+
+/* If this bit is set, then *?, +? and ?? match non greedily. */
+#define RE_FRUGAL (RE_NO_GNU_OPS << 1)
+
+/* If this bit is set, then (?:...) is treated as a shy group. */
+#define RE_SHY_GROUPS (RE_FRUGAL << 1)
+
+/* If this bit is set, ^ and $ only match at beg/end of buffer. */
+#define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+/* extern reg_syntax_t re_syntax_options; */
+/* Define combinations of the above bits for the standard possibilities. */
+#define RE_SYNTAX_EMACS \
+ (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL)
/* Make syntax table lookup grant data in gl_state. */
-# define SYNTAX(c) syntax_property (c, 1)
-
-# ifdef malloc
-# undef malloc
-# endif
-# define malloc xmalloc
-# ifdef realloc
-# undef realloc
-# endif
-# define realloc xrealloc
-# ifdef free
-# undef free
-# endif
-# define free xfree
+#define SYNTAX(c) syntax_property (c, 1)
/* Converts the pointer to the char to BEG-based offset from the start. */
-# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
+#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
result to get the right base index. */
-# define POS_AS_IN_BUFFER(p) \
+#define POS_AS_IN_BUFFER(p) \
((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
-# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
-# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
-# define RE_STRING_CHAR(p, multibyte) \
+#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
+#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
+#define RE_STRING_CHAR(p, multibyte) \
(multibyte ? (STRING_CHAR (p)) : (*(p)))
-# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
+#define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
(multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p)))
-# define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
+#define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
-# define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
+#define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
/* Set C a (possibly converted to multibyte) character before P. P
points into a string which is the virtual concatenation of STR1
(which ends at END1) or STR2 (which ends at END2). */
-# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
+#define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
do { \
if (target_multibyte) \
{ \
@@ -191,7 +219,7 @@
/* Set C a (possibly converted to multibyte) character at P, and set
LEN to the byte length of that character. */
-# define GET_CHAR_AFTER(c, p, len) \
+#define GET_CHAR_AFTER(c, p, len) \
do { \
if (target_multibyte) \
(c) = STRING_CHAR_AND_LENGTH (p, len); \
@@ -202,235 +230,66 @@
(c) = RE_CHAR_TO_MULTIBYTE (c); \
} \
} while (0)
-
-#else /* not emacs */
-
-/* If we are not linking with Emacs proper,
- we can't use the relocating allocator
- even if config.h says that we can. */
-# undef REL_ALLOC
-
-# include <unistd.h>
-
-/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
-
-static ATTRIBUTE_MALLOC void *
-xmalloc (size_t size)
-{
- void *val = malloc (size);
- if (!val && size)
- {
- write (STDERR_FILENO, "virtual memory exhausted\n", 25);
- exit (1);
- }
- return val;
-}
-
-static void *
-xrealloc (void *block, size_t size)
-{
- void *val;
- /* We must call malloc explicitly when BLOCK is 0, since some
- reallocs don't do this. */
- if (! block)
- val = malloc (size);
- else
- val = realloc (block, size);
- if (!val && size)
- {
- write (STDERR_FILENO, "virtual memory exhausted\n", 25);
- exit (1);
- }
- return val;
-}
-
-# ifdef malloc
-# undef malloc
-# endif
-# define malloc xmalloc
-# ifdef realloc
-# undef realloc
-# endif
-# define realloc xrealloc
-
-# include <stdbool.h>
-# include <string.h>
-
-/* Define the syntax stuff for \<, \>, etc. */
-
-/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
-enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
-
-/* Dummy macros for non-Emacs environments. */
-# define MAX_MULTIBYTE_LENGTH 1
-# define RE_MULTIBYTE_P(x) 0
-# define RE_TARGET_MULTIBYTE_P(x) 0
-# define WORD_BOUNDARY_P(c1, c2) (0)
-# define BYTES_BY_CHAR_HEAD(p) (1)
-# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
-# define STRING_CHAR(p) (*(p))
-# define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
-# define CHAR_STRING(c, s) (*(s) = (c), 1)
-# define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p))
-# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len)
-# define RE_CHAR_TO_MULTIBYTE(c) (c)
-# define RE_CHAR_TO_UNIBYTE(c) (c)
-# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
- (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
-# define GET_CHAR_AFTER(c, p, len) \
- (c = *p, len = 1)
-# define CHAR_BYTE8_P(c) (0)
-# define CHAR_LEADING_CODE(c) (c)
-
-#endif /* not emacs */
-
-#ifndef RE_TRANSLATE
-# define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
-# define RE_TRANSLATE_P(TBL) (TBL)
-#endif
-/* Get the interface, including the syntax bits. */
-#include "regex-emacs.h"
-
/* isalpha etc. are used for the character classes. */
#include <ctype.h>
-#ifdef emacs
-
/* 1 if C is an ASCII character. */
-# define IS_REAL_ASCII(c) ((c) < 0200)
+#define IS_REAL_ASCII(c) ((c) < 0200)
/* 1 if C is a unibyte character. */
-# define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
+#define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
/* The Emacs definitions should not be directly affected by locales. */
/* In Emacs, these are only used for single-byte characters. */
-# define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
-# define ISCNTRL(c) ((c) < ' ')
-# define ISXDIGIT(c) (0 <= char_hexdigit (c))
+#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ISCNTRL(c) ((c) < ' ')
+#define ISXDIGIT(c) (0 <= char_hexdigit (c))
/* The rest must handle multibyte characters. */
-# define ISBLANK(c) (IS_REAL_ASCII (c) \
+#define ISBLANK(c) (IS_REAL_ASCII (c) \
? ((c) == ' ' || (c) == '\t') \
: blankp (c))
-# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
+#define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
: graphicp (c))
-# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
+#define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
: printablep (c))
-# define ISALNUM(c) (IS_REAL_ASCII (c) \
+#define ISALNUM(c) (IS_REAL_ASCII (c) \
? (((c) >= 'a' && (c) <= 'z') \
|| ((c) >= 'A' && (c) <= 'Z') \
|| ((c) >= '0' && (c) <= '9')) \
: alphanumericp (c))
-# define ISALPHA(c) (IS_REAL_ASCII (c) \
+#define ISALPHA(c) (IS_REAL_ASCII (c) \
? (((c) >= 'a' && (c) <= 'z') \
|| ((c) >= 'A' && (c) <= 'Z')) \
: alphabeticp (c))
-# define ISLOWER(c) lowercasep (c)
+#define ISLOWER(c) lowercasep (c)
-# define ISPUNCT(c) (IS_REAL_ASCII (c) \
+#define ISPUNCT(c) (IS_REAL_ASCII (c) \
? ((c) > ' ' && (c) < 0177 \
&& !(((c) >= 'a' && (c) <= 'z') \
|| ((c) >= 'A' && (c) <= 'Z') \
|| ((c) >= '0' && (c) <= '9'))) \
: SYNTAX (c) != Sword)
-# define ISSPACE(c) (SYNTAX (c) == Swhitespace)
+#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
-# define ISUPPER(c) uppercasep (c)
-
-# define ISWORD(c) (SYNTAX (c) == Sword)
-
-#else /* not emacs */
-
-/* 1 if C is an ASCII character. */
-# define IS_REAL_ASCII(c) ((c) < 0200)
-
-/* This distinction is not meaningful, except in Emacs. */
-# define ISUNIBYTE(c) 1
-
-# ifdef isblank
-# define ISBLANK(c) isblank (c)
-# else
-# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-# endif
-# ifdef isgraph
-# define ISGRAPH(c) isgraph (c)
-# else
-# define ISGRAPH(c) (isprint (c) && !isspace (c))
-# endif
-
-/* Solaris defines ISPRINT so we must undefine it first. */
-# undef ISPRINT
-# define ISPRINT(c) isprint (c)
-# define ISDIGIT(c) isdigit (c)
-# define ISALNUM(c) isalnum (c)
-# define ISALPHA(c) isalpha (c)
-# define ISCNTRL(c) iscntrl (c)
-# define ISLOWER(c) islower (c)
-# define ISPUNCT(c) ispunct (c)
-# define ISSPACE(c) isspace (c)
-# define ISUPPER(c) isupper (c)
-# define ISXDIGIT(c) isxdigit (c)
-
-# define ISWORD(c) ISALPHA (c)
-
-# ifdef _tolower
-# define TOLOWER(c) _tolower (c)
-# else
-# define TOLOWER(c) tolower (c)
-# endif
-
-/* How many characters in the character set. */
-# define CHAR_SET_SIZE 256
-
-# ifdef SYNTAX_TABLE
-
-extern char *re_syntax_table;
-
-# else /* not SYNTAX_TABLE */
-
-static char re_syntax_table[CHAR_SET_SIZE];
-
-static void
-init_syntax_once (void)
-{
- register int c;
- static int done = 0;
-
- if (done)
- return;
-
- memset (re_syntax_table, 0, sizeof re_syntax_table);
-
- for (c = 0; c < CHAR_SET_SIZE; ++c)
- if (ISALNUM (c))
- re_syntax_table[c] = Sword;
-
- re_syntax_table['_'] = Ssymbol;
-
- done = 1;
-}
+#define ISUPPER(c) uppercasep (c)
-# endif /* not SYNTAX_TABLE */
-
-# define SYNTAX(c) re_syntax_table[(c)]
-
-#endif /* not emacs */
+#define ISWORD(c) (SYNTAX (c) == Sword)
#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
-/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
- use `alloca' instead of `malloc'. This is because using malloc in
+/* Use alloca instead of malloc. This is because using malloc in
re_search* or re_match* could cause memory leaks when C-g is used
in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
via `record_xmalloc' which uses `unwind_protect' to ensure the
@@ -442,64 +301,17 @@ init_syntax_once (void)
not functions -- `alloca'-allocated space disappears at the end of the
function it is called in. */
-#ifdef REGEX_MALLOC
-
-# define REGEX_ALLOCATE malloc
-# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
-# define REGEX_FREE free
-
-#else /* not REGEX_MALLOC */
-
-# ifdef emacs
/* This may be adjusted in main(), if the stack is successfully grown. */
ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */
-# define REGEX_USE_SAFE_ALLOCA \
- ptrdiff_t sa_avail = emacs_re_safe_alloca; \
- ptrdiff_t sa_count = SPECPDL_INDEX ()
-
-# define REGEX_SAFE_FREE() SAFE_FREE ()
-# define REGEX_ALLOCATE SAFE_ALLOCA
-# else
-# include <alloca.h>
-# define REGEX_ALLOCATE alloca
-# endif
+#define REGEX_USE_SAFE_ALLOCA \
+ USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca
/* Assumes a `char *destination' variable. */
-# define REGEX_REALLOCATE(source, osize, nsize) \
- (destination = REGEX_ALLOCATE (nsize), \
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = SAFE_ALLOCA (nsize), \
memcpy (destination, source, osize))
-/* No need to do anything to free, after alloca. */
-# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
-
-#endif /* not REGEX_MALLOC */
-
-#ifndef REGEX_USE_SAFE_ALLOCA
-# define REGEX_USE_SAFE_ALLOCA ((void) 0)
-# define REGEX_SAFE_FREE() ((void) 0)
-#endif
-
-/* Define how to allocate the failure stack. */
-
-#if defined REL_ALLOC && defined REGEX_MALLOC
-
-# define REGEX_ALLOCATE_STACK(size) \
- r_alloc (&failure_stack_ptr, (size))
-# define REGEX_REALLOCATE_STACK(source, osize, nsize) \
- r_re_alloc (&failure_stack_ptr, (nsize))
-# define REGEX_FREE_STACK(ptr) \
- r_alloc_free (&failure_stack_ptr)
-
-#else /* not using relocating allocator */
-
-# define REGEX_ALLOCATE_STACK(size) REGEX_ALLOCATE (size)
-# define REGEX_REALLOCATE_STACK(source, o, n) REGEX_REALLOCATE (source, o, n)
-# define REGEX_FREE_STACK(ptr) REGEX_FREE (ptr)
-
-#endif /* not using relocating allocator */
-
-
/* True if `size1' is non-NULL and PTR is pointing anywhere inside
`string1' or just past its end. This works if PTR is NULL, which is
a good thing. */
@@ -507,30 +319,21 @@ ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
(size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
/* (Re)Allocate N items of type T using malloc, or fail. */
-#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
-#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
-#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+#define TALLOC(n, t) ((t *) xmalloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) xrealloc (addr, (n) * sizeof (t)))
#define BYTEWIDTH 8 /* In bits. */
-#ifndef emacs
-# undef max
-# undef min
-# define max(a, b) ((a) > (b) ? (a) : (b))
-# define min(a, b) ((a) < (b) ? (a) : (b))
-#endif
-
/* Type of source-pattern and string chars. */
typedef const unsigned char re_char;
-typedef char boolean;
-
-static regoff_t re_match_2_internal (struct re_pattern_buffer *bufp,
+static void re_compile_fastmap (struct re_pattern_buffer *);
+static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp,
re_char *string1, size_t size1,
re_char *string2, size_t size2,
- ssize_t pos,
+ ptrdiff_t pos,
struct re_registers *regs,
- ssize_t stop);
+ ptrdiff_t stop);
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
@@ -592,8 +395,7 @@ typedef enum
/* Fail unless at end of line. */
endline,
- /* Succeeds if at beginning of buffer (if emacs) or at beginning
- of string to be matched (if not). */
+ /* Succeeds if at beginning of buffer. */
begbuf,
/* Analogously, for end of buffer/string. */
@@ -658,10 +460,9 @@ typedef enum
syntaxspec,
/* Matches any character whose syntax is not that specified. */
- notsyntaxspec
+ notsyntaxspec,
-#ifdef emacs
- , at_dot, /* Succeeds if at point. */
+ at_dot, /* Succeeds if at point. */
/* Matches any character whose category-set contains the specified
category. The operator is followed by a byte which contains a
@@ -672,7 +473,6 @@ typedef enum
specified category. The operator is followed by a byte which
contains the category code (mnemonic ASCII character). */
notcategoryspec
-#endif /* emacs */
} re_opcode_t;
/* Common operations on the compiled pattern. */
@@ -760,12 +560,10 @@ extract_number_and_incr (re_char **source)
and the 2 bytes of flags at the start of the range table. */
#define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
-#ifdef emacs
/* Extract the bit flags that start a range table. */
#define CHARSET_RANGE_TABLE_BITS(p) \
((p)[2 + CHARSET_BITMAP_SIZE (p)] \
+ (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
-#endif
/* Return the address of end of RANGE_TABLE. COUNT is number of
ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
@@ -774,29 +572,23 @@ extract_number_and_incr (re_char **source)
#define CHARSET_RANGE_TABLE_END(range_table, count) \
((range_table) + (count) * 2 * 3)
-/* If DEBUG is defined, Regex prints many voluminous messages about what
- it is doing (if the variable `debug' is nonzero). If linked with the
- main program in `iregex.c', you can enter patterns and strings
- interactively. And if linked with the main program in `main.c' and
- the other test files, you can run the already-written tests. */
+/* If REGEX_EMACS_DEBUG is defined, print many voluminous messages
+ (if the variable regex_emacs_debug is positive). */
-#ifdef DEBUG
+#ifdef REGEX_EMACS_DEBUG
/* We use standard I/O for debugging. */
# include <stdio.h>
-/* It is useful to test things that ``must'' be true when debugging. */
-# include <assert.h>
-
-static int debug = -100000;
+static int regex_emacs_debug = -100000;
# define DEBUG_STATEMENT(e) e
-# define DEBUG_PRINT(...) if (debug > 0) printf (__VA_ARGS__)
+# define DEBUG_PRINT(...) if (regex_emacs_debug > 0) printf (__VA_ARGS__)
# define DEBUG_COMPILES_ARGUMENTS
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
- if (debug > 0) print_partial_compiled_pattern (s, e)
+ if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
- if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
+ if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
/* Print the fastmap in human-readable form. */
@@ -1085,7 +877,7 @@ print_compiled_pattern (struct re_pattern_buffer *bufp)
re_char *buffer = bufp->buffer;
print_partial_compiled_pattern (buffer, buffer + bufp->used);
- printf ("%ld bytes used/%ld bytes allocated.\n",
+ printf ("%zu bytes used/%zu bytes allocated.\n",
bufp->used, bufp->allocated);
if (bufp->fastmap_accurate && bufp->fastmap)
@@ -1131,146 +923,100 @@ print_double_string (re_char *where, re_char *string1, ssize_t size1,
}
}
-#else /* not DEBUG */
-
-# undef assert
-# define assert(e)
+#else /* not REGEX_EMACS_DEBUG */
# define DEBUG_STATEMENT(e)
# define DEBUG_PRINT(...)
# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
-#endif /* not DEBUG */
+#endif /* not REGEX_EMACS_DEBUG */
-#ifndef emacs
-
-/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
- also be assigned to arbitrarily: each pattern buffer stores its own
- syntax, so it can be changed between regex compilations. */
-/* This has no initializer because initialized variables in Emacs
- become read-only after dumping. */
-reg_syntax_t re_syntax_options;
-
-
-/* Specify the precise syntax of regexps for compilation. This provides
- for compatibility for various utilities which historically have
- different, incompatible syntaxes.
-
- The argument SYNTAX is a bit mask comprised of the various bits
- defined in regex-emacs.h. We return the old syntax. */
-
-reg_syntax_t
-re_set_syntax (reg_syntax_t syntax)
+typedef enum
{
- reg_syntax_t ret = re_syntax_options;
-
- re_syntax_options = syntax;
- return ret;
-}
-WEAK_ALIAS (__re_set_syntax, re_set_syntax)
-
-#endif
-
-/* This table gives an error message for each of the error codes listed
- in regex-emacs.h. Obviously the order here has to be same as there.
- POSIX doesn't require that we do anything for REG_NOERROR,
- but why not be nice? */
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) An older version of this code supported the POSIX
+ API; this version continues to use these names internally. */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN, /* Unmatched ) or \); not returned from regcomp. */
+ REG_ERANGEX, /* Range striding over charsets. */
+ REG_ESIZEBR /* n or m too big in \{n,m\} */
+} reg_errcode_t;
static const char *re_error_msgid[] =
{
- gettext_noop ("Success"), /* REG_NOERROR */
- gettext_noop ("No match"), /* REG_NOMATCH */
- gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
- gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
- gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
- gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
- gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
- gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
- gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
- gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
- gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
- gettext_noop ("Invalid range end"), /* REG_ERANGE */
- gettext_noop ("Memory exhausted"), /* REG_ESPACE */
- gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
- gettext_noop ("Premature end of regular expression"), /* REG_EEND */
- gettext_noop ("Regular expression too big"), /* REG_ESIZE */
- gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
- gettext_noop ("Range striding over charsets"), /* REG_ERANGEX */
- gettext_noop ("Invalid content of \\{\\}, repetitions too big") /* REG_ESIZEBR */
+ [REG_NOERROR] = "Success",
+ [REG_NOMATCH] = "No match",
+ [REG_BADPAT] = "Invalid regular expression",
+ [REG_ECOLLATE] = "Invalid collation character",
+ [REG_ECTYPE] = "Invalid character class name",
+ [REG_EESCAPE] = "Trailing backslash",
+ [REG_ESUBREG] = "Invalid back reference",
+ [REG_EBRACK] = "Unmatched [ or [^",
+ [REG_EPAREN] = "Unmatched ( or \\(",
+ [REG_EBRACE] = "Unmatched \\{",
+ [REG_BADBR] = "Invalid content of \\{\\}",
+ [REG_ERANGE] = "Invalid range end",
+ [REG_ESPACE] = "Memory exhausted",
+ [REG_BADRPT] = "Invalid preceding regular expression",
+ [REG_EEND] = "Premature end of regular expression",
+ [REG_ESIZE] = "Regular expression too big",
+ [REG_ERPAREN] = "Unmatched ) or \\)",
+ [REG_ERANGEX ] = "Range striding over charsets",
+ [REG_ESIZEBR ] = "Invalid content of \\{\\}",
};
-
-/* Whether to allocate memory during matching. */
-
-/* Define MATCH_MAY_ALLOCATE to allow the searching and matching
- functions allocate memory for the failure stack and registers.
- Normally should be defined, because otherwise searching and
- matching routines will have much smaller memory resources at their
- disposal, and therefore might fail to handle complex regexps.
- Therefore undefine MATCH_MAY_ALLOCATE only in the following
- exceptional situations:
-
- . When running on a system where memory is at premium.
- . When alloca cannot be used at all, perhaps due to bugs in
- its implementation, or its being unavailable, or due to a
- very small stack size. This requires to define REGEX_MALLOC
- to use malloc instead, which in turn could lead to memory
- leaks if search is interrupted by a signal. (For these
- reasons, defining REGEX_MALLOC when building Emacs
- automatically undefines MATCH_MAY_ALLOCATE, but outside
- Emacs you may not care about memory leaks.) If you want to
- prevent the memory leaks, undefine MATCH_MAY_ALLOCATE.
- . When code that calls the searching and matching functions
- cannot allow memory allocation, for whatever reasons. */
-
-/* Normally, this is fine. */
-#define MATCH_MAY_ALLOCATE
-
-/* The match routines may not allocate if (1) they would do it with malloc
- and (2) it's not safe for them to use malloc.
- Note that if REL_ALLOC is defined, matching would not use malloc for the
- failure stack, but we would still use it for the register vectors;
- so REL_ALLOC should not affect this. */
-#if defined REGEX_MALLOC && defined emacs
-# undef MATCH_MAY_ALLOCATE
-#endif
-/* While regex matching of a single compiled pattern isn't reentrant
- (because we compile regexes to bytecode programs, and the bytecode
- programs are self-modifying), the regex machinery must nevertheless
- be reentrant with respect to _different_ patterns, and we do that
- by avoiding global variables and using MATCH_MAY_ALLOCATE. */
-#if !defined MATCH_MAY_ALLOCATE && defined emacs
-# error "Emacs requires MATCH_MAY_ALLOCATE"
-#endif
+/* For 'regs_allocated'. */
+enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED };
+/* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ 're_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+enum { RE_NREGS = 30 };
+/* The searching and matching functions allocate memory for the
+ failure stack and registers. Otherwise searching and matching
+ routines would have much smaller memory resources at their
+ disposal, and therefore might fail to handle complex regexps. */
+
/* Failure stack declarations and macros; both re_compile_fastmap and
re_match_2 use a failure stack. These have to be macros because of
- REGEX_ALLOCATE_STACK. */
+ SAFE_ALLOCA. */
/* Approximate number of failure points for which to initially allocate space
when matching. If this number is exceeded, we allocate more
space, so it is not a hard limit. */
-#ifndef INIT_FAILURE_ALLOC
-# define INIT_FAILURE_ALLOC 20
-#endif
+#define INIT_FAILURE_ALLOC 20
/* Roughly the maximum number of failure points on the stack. Would be
exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
This is a variable only so users of regex can assign to it; we never
change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE
before using it, so it should probably be a byte-count instead. */
-# if defined MATCH_MAY_ALLOCATE
/* Note that 4400 was enough to cause a crash on Alpha OSF/1,
whose default stack limit is 2mb. In order for a larger
value to work reliably, you have to try to make it accord
with the process stack limit. */
size_t emacs_re_max_failures = 40000;
-# else
-size_t emacs_re_max_failures = 4000;
-# endif
union fail_stack_elt
{
@@ -1292,33 +1038,17 @@ typedef struct
#define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
-/* Define macros to initialize and free the failure stack.
- Do `return -2' if the alloc fails. */
+/* Define macros to initialize and free the failure stack. */
-#ifdef MATCH_MAY_ALLOCATE
-# define INIT_FAIL_STACK() \
+#define INIT_FAIL_STACK() \
do { \
fail_stack.stack = \
- REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
- * sizeof (fail_stack_elt_t)); \
- \
- if (fail_stack.stack == NULL) \
- return -2; \
- \
+ SAFE_ALLOCA (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
+ * sizeof (fail_stack_elt_t)); \
fail_stack.size = INIT_FAILURE_ALLOC; \
fail_stack.avail = 0; \
fail_stack.frame = 0; \
} while (0)
-#else
-# define INIT_FAIL_STACK() \
- do { \
- fail_stack.avail = 0; \
- fail_stack.frame = 0; \
- } while (0)
-
-# define RETALLOC_IF(addr, n, t) \
- if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
-#endif
/* Double the size of FAIL_STACK, up to a limit
@@ -1327,7 +1057,7 @@ typedef struct
Return 1 if succeeds, and 0 if either ran out of memory
allocating space for it or it was already too large.
- REGEX_REALLOCATE_STACK requires `destination' be declared. */
+ REGEX_REALLOCATE requires `destination' be declared. */
/* Factor to increase the failure stack size by
when we increase it.
@@ -1340,18 +1070,15 @@ typedef struct
(((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \
? 0 \
: ((fail_stack).stack \
- = REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ = REGEX_REALLOCATE ((fail_stack).stack, \
(fail_stack).size * sizeof (fail_stack_elt_t), \
min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \
* sizeof (fail_stack_elt_t)), \
- \
- (fail_stack).stack == NULL \
- ? 0 \
- : ((fail_stack).size \
- = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
- ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \
- 1)))
+ ((fail_stack).size \
+ = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
+ ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))), \
+ 1))
/* Push a pointer value onto the failure stack.
@@ -1385,8 +1112,8 @@ typedef struct
while (REMAINING_AVAIL_SLOTS <= space) { \
if (!GROW_FAIL_STACK (fail_stack)) \
return -2; \
- DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\
- DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\
+ DEBUG_PRINT ("\n Doubled stack; size now: %zu\n", (fail_stack).size);\
+ DEBUG_PRINT (" slots available: %zu\n", REMAINING_AVAIL_SLOTS);\
}
/* Push register NUM onto the stack. */
@@ -1424,7 +1151,7 @@ do { \
if (pfreg == -1) \
{ \
/* It's a counter. */ \
- /* Here, we discard `const', making re_match non-reentrant. */ \
+ /* Discard 'const', making re_search non-reentrant. */ \
unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \
pfreg = POP_FAILURE_INT (); \
STORE_NUMBER (ptr, pfreg); \
@@ -1442,14 +1169,14 @@ do { \
/* Check that we are not stuck in an infinite loop. */
#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
do { \
- ssize_t failure = TOP_FAILURE_HANDLE (); \
+ ptrdiff_t failure = TOP_FAILURE_HANDLE (); \
/* Check for infinite matching loops */ \
while (failure > 0 \
&& (FAILURE_STR (failure) == string_place \
|| FAILURE_STR (failure) == NULL)) \
{ \
- assert (FAILURE_PAT (failure) >= bufp->buffer \
- && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
+ eassert (FAILURE_PAT (failure) >= bufp->buffer \
+ && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
if (FAILURE_PAT (failure) == pat_cur) \
{ \
cycle = 1; \
@@ -1478,14 +1205,14 @@ do { \
\
DEBUG_STATEMENT (nfailure_points_pushed++); \
DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \
- DEBUG_PRINT (" Before push, next avail: %zd\n", (fail_stack).avail); \
- DEBUG_PRINT (" size: %zd\n", (fail_stack).size);\
+ DEBUG_PRINT (" Before push, next avail: %zu\n", (fail_stack).avail); \
+ DEBUG_PRINT (" size: %zu\n", (fail_stack).size);\
\
ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
\
DEBUG_PRINT ("\n"); \
\
- DEBUG_PRINT (" Push frame index: %zd\n", fail_stack.frame); \
+ DEBUG_PRINT (" Push frame index: %zu\n", fail_stack.frame); \
PUSH_FAILURE_INT (fail_stack.frame); \
\
DEBUG_PRINT (" Push string %p: \"", string_place); \
@@ -1523,12 +1250,12 @@ do { \
#define POP_FAILURE_POINT(str, pat) \
do { \
- assert (!FAIL_STACK_EMPTY ()); \
+ eassert (!FAIL_STACK_EMPTY ()); \
\
/* Remove failure points and point to how many regs pushed. */ \
DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \
- DEBUG_PRINT (" Before pop, next avail: %zd\n", fail_stack.avail); \
- DEBUG_PRINT (" size: %zd\n", fail_stack.size); \
+ DEBUG_PRINT (" Before pop, next avail: %zu\n", fail_stack.avail); \
+ DEBUG_PRINT (" size: %zu\n", fail_stack.size); \
\
/* Pop the saved registers. */ \
while (fail_stack.frame < fail_stack.avail) \
@@ -1547,10 +1274,10 @@ do { \
DEBUG_PRINT ("\"\n"); \
\
fail_stack.frame = POP_FAILURE_INT (); \
- DEBUG_PRINT (" Popping frame index: %zd\n", fail_stack.frame); \
+ DEBUG_PRINT (" Popping frame index: %zu\n", fail_stack.frame); \
\
- assert (fail_stack.avail >= 0); \
- assert (fail_stack.frame <= fail_stack.avail); \
+ eassert (fail_stack.avail >= 0); \
+ eassert (fail_stack.frame <= fail_stack.avail); \
\
DEBUG_STATEMENT (nfailure_points_popped++); \
} while (0) /* POP_FAILURE_POINT */
@@ -1563,12 +1290,8 @@ do { \
/* Subroutine declarations and macros for regex_compile. */
static reg_errcode_t regex_compile (re_char *pattern, size_t size,
-#ifdef emacs
bool posix_backtracking,
const char *whitespace_regexp,
-#else
- reg_syntax_t syntax,
-#endif
struct re_pattern_buffer *bufp);
static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
@@ -1576,10 +1299,10 @@ static void insert_op1 (re_opcode_t op, unsigned char *loc,
int arg, unsigned char *end);
static void insert_op2 (re_opcode_t op, unsigned char *loc,
int arg1, int arg2, unsigned char *end);
-static boolean at_begline_loc_p (re_char *pattern, re_char *p,
- reg_syntax_t syntax);
-static boolean at_endline_loc_p (re_char *p, re_char *pend,
- reg_syntax_t syntax);
+static bool at_begline_loc_p (re_char *pattern, re_char *p,
+ reg_syntax_t syntax);
+static bool at_endline_loc_p (re_char *p, re_char *pend,
+ reg_syntax_t syntax);
static re_char *skip_one_char (re_char *p);
static int analyze_first (re_char *p, re_char *pend,
char *fastmap, const int multibyte);
@@ -1595,14 +1318,15 @@ static int analyze_first (re_char *p, re_char *pend,
} while (0)
-/* If `translate' is non-null, return translate[D], else just D. We
+#define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
+#define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0)))
+
+/* If `translate' is non-zero, return translate[D], else just D. We
cast the subscript to translate because some data is declared as
`char *', to avoid warnings when a string constant is passed. But
when we use a character as a subscript we must make it unsigned. */
-#ifndef TRANSLATE
-# define TRANSLATE(d) \
+#define TRANSLATE(d) \
(RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
-#endif
/* Macros for outputting the compiled pattern into `buffer'. */
@@ -1677,8 +1401,6 @@ static int analyze_first (re_char *p, re_char *pend,
if (laststart_set) laststart_off = laststart - old_buffer; \
if (pending_exact_set) pending_exact_off = pending_exact - old_buffer; \
RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \
- if (bufp->buffer == NULL) \
- return REG_ESPACE; \
unsigned char *new_buffer = bufp->buffer; \
b = new_buffer + b_off; \
begalt = new_buffer + begalt_off; \
@@ -1729,12 +1451,6 @@ typedef struct
/* The next available element. */
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
-
-/* Explicit quit checking is needed for Emacs, which uses polling to
- process input events. */
-#ifndef emacs
-static void maybe_quit (void) {}
-#endif
/* Structure to manage work area for range table. */
struct range_table_work_area
@@ -1745,8 +1461,6 @@ struct range_table_work_area
int bits; /* flag to record character classes */
};
-#ifdef emacs
-
/* Make sure that WORK_AREA can hold more N multibyte characters.
This is used only in set_image_of_range and set_image_of_range_1.
It expects WORK_AREA to be a pointer.
@@ -1773,13 +1487,11 @@ struct range_table_work_area
(work_area).table[(work_area).used++] = (range_end); \
} while (0)
-#endif /* emacs */
-
/* Free allocated memory for WORK_AREA. */
#define FREE_RANGE_TABLE_WORK_AREA(work_area) \
do { \
if ((work_area).table) \
- free ((work_area).table); \
+ xfree ((work_area).table); \
} while (0)
#define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
@@ -1807,8 +1519,6 @@ struct range_table_work_area
#define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
-#ifdef emacs
-
/* Store characters in the range FROM to TO in the bitmap at B (for
ASCII and unibyte characters) and WORK_AREA (for multibyte
characters) while translating them and paying attention to the
@@ -1912,8 +1622,6 @@ struct range_table_work_area
} \
} while (0)
-#endif /* emacs */
-
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_INTERVAL_COUNT(num) \
do { \
@@ -1936,8 +1644,6 @@ struct range_table_work_area
} \
} while (0)
-#if ! WIDE_CHAR_SUPPORT
-
/* Parse a character class, i.e. string such as "[:name:]". *strp
points to the string to be parsed and limit is length, in bytes, of
that string.
@@ -2031,7 +1737,7 @@ re_wctype_parse (const unsigned char **strp, unsigned limit)
}
/* True if CH is in the char class CC. */
-boolean
+bool
re_iswctype (int ch, re_wctype_t cc)
{
switch (cc)
@@ -2084,7 +1790,6 @@ re_wctype_to_bit (re_wctype_t cc)
abort ();
}
}
-#endif
/* Filling in the work area of a range. */
@@ -2094,288 +1799,16 @@ static void
extend_range_table_work_area (struct range_table_work_area *work_area)
{
work_area->allocated += 16 * sizeof (int);
- work_area->table = realloc (work_area->table, work_area->allocated);
+ work_area->table = xrealloc (work_area->table, work_area->allocated);
}
-
-#if 0
-#ifdef emacs
-
-/* Carefully find the ranges of codes that are equivalent
- under case conversion to the range start..end when passed through
- TRANSLATE. Handle the case where non-letters can come in between
- two upper-case letters (which happens in Latin-1).
- Also handle the case of groups of more than 2 case-equivalent chars.
-
- The basic method is to look at consecutive characters and see
- if they can form a run that can be handled as one.
-
- Returns -1 if successful, REG_ESPACE if ran out of space. */
-
-static int
-set_image_of_range_1 (struct range_table_work_area *work_area,
- re_wchar_t start, re_wchar_t end,
- RE_TRANSLATE_TYPE translate)
-{
- /* `one_case' indicates a character, or a run of characters,
- each of which is an isolate (no case-equivalents).
- This includes all ASCII non-letters.
-
- `two_case' indicates a character, or a run of characters,
- each of which has two case-equivalent forms.
- This includes all ASCII letters.
-
- `strange' indicates a character that has more than one
- case-equivalent. */
-
- enum case_type {one_case, two_case, strange};
-
- /* Describe the run that is in progress,
- which the next character can try to extend.
- If run_type is strange, that means there really is no run.
- If run_type is one_case, then run_start...run_end is the run.
- If run_type is two_case, then the run is run_start...run_end,
- and the case-equivalents end at run_eqv_end. */
-
- enum case_type run_type = strange;
- int run_start, run_end, run_eqv_end;
-
- Lisp_Object eqv_table;
-
- if (!RE_TRANSLATE_P (translate))
- {
- EXTEND_RANGE_TABLE (work_area, 2);
- work_area->table[work_area->used++] = (start);
- work_area->table[work_area->used++] = (end);
- return -1;
- }
-
- eqv_table = XCHAR_TABLE (translate)->extras[2];
-
- for (; start <= end; start++)
- {
- enum case_type this_type;
- int eqv = RE_TRANSLATE (eqv_table, start);
- int minchar, maxchar;
-
- /* Classify this character */
- if (eqv == start)
- this_type = one_case;
- else if (RE_TRANSLATE (eqv_table, eqv) == start)
- this_type = two_case;
- else
- this_type = strange;
-
- if (start < eqv)
- minchar = start, maxchar = eqv;
- else
- minchar = eqv, maxchar = start;
-
- /* Can this character extend the run in progress? */
- if (this_type == strange || this_type != run_type
- || !(minchar == run_end + 1
- && (run_type == two_case
- ? maxchar == run_eqv_end + 1 : 1)))
- {
- /* No, end the run.
- Record each of its equivalent ranges. */
- if (run_type == one_case)
- {
- EXTEND_RANGE_TABLE (work_area, 2);
- work_area->table[work_area->used++] = run_start;
- work_area->table[work_area->used++] = run_end;
- }
- else if (run_type == two_case)
- {
- EXTEND_RANGE_TABLE (work_area, 4);
- work_area->table[work_area->used++] = run_start;
- work_area->table[work_area->used++] = run_end;
- work_area->table[work_area->used++]
- = RE_TRANSLATE (eqv_table, run_start);
- work_area->table[work_area->used++]
- = RE_TRANSLATE (eqv_table, run_end);
- }
- run_type = strange;
- }
-
- if (this_type == strange)
- {
- /* For a strange character, add each of its equivalents, one
- by one. Don't start a range. */
- do
- {
- EXTEND_RANGE_TABLE (work_area, 2);
- work_area->table[work_area->used++] = eqv;
- work_area->table[work_area->used++] = eqv;
- eqv = RE_TRANSLATE (eqv_table, eqv);
- }
- while (eqv != start);
- }
-
- /* Add this char to the run, or start a new run. */
- else if (run_type == strange)
- {
- /* Initialize a new range. */
- run_type = this_type;
- run_start = start;
- run_end = start;
- run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
- }
- else
- {
- /* Extend a running range. */
- run_end = minchar;
- run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
- }
- }
-
- /* If a run is still in progress at the end, finish it now
- by recording its equivalent ranges. */
- if (run_type == one_case)
- {
- EXTEND_RANGE_TABLE (work_area, 2);
- work_area->table[work_area->used++] = run_start;
- work_area->table[work_area->used++] = run_end;
- }
- else if (run_type == two_case)
- {
- EXTEND_RANGE_TABLE (work_area, 4);
- work_area->table[work_area->used++] = run_start;
- work_area->table[work_area->used++] = run_end;
- work_area->table[work_area->used++]
- = RE_TRANSLATE (eqv_table, run_start);
- work_area->table[work_area->used++]
- = RE_TRANSLATE (eqv_table, run_end);
- }
-
- return -1;
-}
-
-#endif /* emacs */
-
-/* Record the image of the range start..end when passed through
- TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end)
- and is not even necessarily contiguous.
- Normally we approximate it with the smallest contiguous range that contains
- all the chars we need. However, for Latin-1 we go to extra effort
- to do a better job.
-
- This function is not called for ASCII ranges.
-
- Returns -1 if successful, REG_ESPACE if ran out of space. */
-
-static int
-set_image_of_range (struct range_table_work_area *work_area,
- re_wchar_t start, re_wchar_t end,
- RE_TRANSLATE_TYPE translate)
-{
- re_wchar_t cmin, cmax;
-
-#ifdef emacs
- /* For Latin-1 ranges, use set_image_of_range_1
- to get proper handling of ranges that include letters and nonletters.
- For a range that includes the whole of Latin-1, this is not necessary.
- For other character sets, we don't bother to get this right. */
- if (RE_TRANSLATE_P (translate) && start < 04400
- && !(start < 04200 && end >= 04377))
- {
- int newend;
- int tem;
- newend = end;
- if (newend > 04377)
- newend = 04377;
- tem = set_image_of_range_1 (work_area, start, newend, translate);
- if (tem > 0)
- return tem;
-
- start = 04400;
- if (end < 04400)
- return -1;
- }
-#endif
-
- EXTEND_RANGE_TABLE (work_area, 2);
- work_area->table[work_area->used++] = (start);
- work_area->table[work_area->used++] = (end);
-
- cmin = -1, cmax = -1;
-
- if (RE_TRANSLATE_P (translate))
- {
- int ch;
-
- for (ch = start; ch <= end; ch++)
- {
- re_wchar_t c = TRANSLATE (ch);
- if (! (start <= c && c <= end))
- {
- if (cmin == -1)
- cmin = c, cmax = c;
- else
- {
- cmin = min (cmin, c);
- cmax = max (cmax, c);
- }
- }
- }
-
- if (cmin != -1)
- {
- EXTEND_RANGE_TABLE (work_area, 2);
- work_area->table[work_area->used++] = (cmin);
- work_area->table[work_area->used++] = (cmax);
- }
- }
-
- return -1;
-}
-#endif /* 0 */
-
-#ifndef MATCH_MAY_ALLOCATE
-
-/* If we cannot allocate large objects within re_match_2_internal,
- we make the fail stack and register vectors global.
- The fail stack, we grow to the maximum size when a regexp
- is compiled.
- The register vectors, we adjust in size each time we
- compile a regexp, according to the number of registers it needs. */
-
-static fail_stack_type fail_stack;
-
-/* Size with which the following vectors are currently allocated.
- That is so we can make them bigger as needed,
- but never make them smaller. */
-static int regs_allocated_size;
-
-static re_char ** regstart, ** regend;
-static re_char **best_regstart, **best_regend;
-
-/* Make the register vectors big enough for NUM_REGS registers,
- but don't make them smaller. */
-
-static
-regex_grow_registers (int num_regs)
-{
- if (num_regs > regs_allocated_size)
- {
- RETALLOC_IF (regstart, num_regs, re_char *);
- RETALLOC_IF (regend, num_regs, re_char *);
- RETALLOC_IF (best_regstart, num_regs, re_char *);
- RETALLOC_IF (best_regend, num_regs, re_char *);
-
- regs_allocated_size = num_regs;
- }
-}
-
-#endif /* not MATCH_MAY_ALLOCATE */
-static boolean group_in_compile_stack (compile_stack_type compile_stack,
- regnum_t regnum);
+static bool group_in_compile_stack (compile_stack_type, regnum_t);
/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
Returns one of error codes defined in `regex-emacs.h', or zero for success.
- If WHITESPACE_REGEXP is given (only #ifdef emacs), it is used instead of
- a space character in PATTERN.
+ If WHITESPACE_REGEXP is given, it is used instead of a space
+ character in PATTERN.
Assumes the `allocated' (and perhaps `buffer') and `translate'
fields are set in BUFP on entry.
@@ -2404,42 +1837,33 @@ do { \
#define FREE_STACK_RETURN(value) \
do { \
FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
- free (compile_stack.stack); \
+ xfree (compile_stack.stack); \
return value; \
} while (0)
static reg_errcode_t
regex_compile (re_char *pattern, size_t size,
-#ifdef emacs
-# define syntax RE_SYNTAX_EMACS
bool posix_backtracking,
const char *whitespace_regexp,
-#else
- reg_syntax_t syntax,
-# define posix_backtracking (!(syntax & RE_NO_POSIX_BACKTRACKING))
-#endif
struct re_pattern_buffer *bufp)
{
+ reg_syntax_t syntax = RE_SYNTAX_EMACS;
+
/* We fetch characters from PATTERN here. */
- register re_wchar_t c, c1;
+ int c, c1;
/* Points to the end of the buffer, where we should append. */
- register unsigned char *b;
+ unsigned char *b;
/* Keeps track of unclosed groups. */
compile_stack_type compile_stack;
/* Points to the current (ending) position in the pattern. */
-#ifdef AIX
- /* `const' makes AIX compiler fail. */
- unsigned char *p = pattern;
-#else
re_char *p = pattern;
-#endif
re_char *pend = pattern + size;
/* How to translate the characters in the pattern. */
- RE_TRANSLATE_TYPE translate = bufp->translate;
+ Lisp_Object translate = bufp->translate;
/* Address of the count-byte of the most recently inserted `exactn'
command. This makes it possible to tell if a new exact-match
@@ -2468,9 +1892,8 @@ regex_compile (re_char *pattern, size_t size,
struct range_table_work_area range_table_work;
/* If the object matched can contain multibyte characters. */
- const boolean multibyte = RE_MULTIBYTE_P (bufp);
+ bool multibyte = RE_MULTIBYTE_P (bufp);
-#ifdef emacs
/* Nonzero if we have pushed down into a subpattern. */
int in_subpattern = 0;
@@ -2479,26 +1902,22 @@ regex_compile (re_char *pattern, size_t size,
re_char *main_p;
re_char *main_pattern;
re_char *main_pend;
-#endif
-#ifdef DEBUG
- debug++;
+#ifdef REGEX_EMACS_DEBUG
+ regex_emacs_debug++;
DEBUG_PRINT ("\nCompiling pattern: ");
- if (debug > 0)
+ if (regex_emacs_debug > 0)
{
- unsigned debug_count;
+ size_t debug_count;
for (debug_count = 0; debug_count < size; debug_count++)
putchar (pattern[debug_count]);
putchar ('\n');
}
-#endif /* DEBUG */
+#endif
/* Initialize the compile stack. */
compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
- if (compile_stack.stack == NULL)
- return REG_ESPACE;
-
compile_stack.size = INIT_COMPILE_STACK_SIZE;
compile_stack.avail = 0;
@@ -2506,9 +1925,6 @@ regex_compile (re_char *pattern, size_t size,
range_table_work.allocated = 0;
/* Initialize the pattern buffer. */
-#ifndef emacs
- bufp->syntax = syntax;
-#endif
bufp->fastmap_accurate = 0;
bufp->not_bol = bufp->not_eol = 0;
bufp->used_syntax = 0;
@@ -2521,11 +1937,6 @@ regex_compile (re_char *pattern, size_t size,
/* Always count groups, whether or not bufp->no_sub is set. */
bufp->re_nsub = 0;
-#if !defined emacs && !defined SYNTAX_TABLE
- /* Initialize the syntax table. */
- init_syntax_once ();
-#endif
-
if (bufp->allocated == 0)
{
if (bufp->buffer)
@@ -2538,8 +1949,6 @@ regex_compile (re_char *pattern, size_t size,
{ /* Caller did not allocate a buffer. Do it for them. */
bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
}
- if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
-
bufp->allocated = INIT_BUF_SIZE;
}
@@ -2550,7 +1959,6 @@ regex_compile (re_char *pattern, size_t size,
{
if (p == pend)
{
-#ifdef emacs
/* If this is the end of an included regexp,
pop back to the main regexp and try again. */
if (in_subpattern)
@@ -2561,7 +1969,6 @@ regex_compile (re_char *pattern, size_t size,
pend = main_pend;
continue;
}
-#endif
/* If this is the end of the main regexp, we are done. */
break;
}
@@ -2570,7 +1977,6 @@ regex_compile (re_char *pattern, size_t size,
switch (c)
{
-#ifdef emacs
case ' ':
{
re_char *p1 = p;
@@ -2603,7 +2009,6 @@ regex_compile (re_char *pattern, size_t size,
pend = p + strlen (whitespace_regexp);
break;
}
-#endif
case '^':
{
@@ -2654,8 +2059,8 @@ regex_compile (re_char *pattern, size_t size,
{
/* 1 means zero (many) matches is allowed. */
- boolean zero_times_ok = 0, many_times_ok = 0;
- boolean greedy = 1;
+ bool zero_times_ok = false, many_times_ok = false;
+ bool greedy = true;
/* If there is a sequence of repetition chars, collapse it
down to just one (the right one). We can't combine
@@ -2666,7 +2071,7 @@ regex_compile (re_char *pattern, size_t size,
{
if ((syntax & RE_FRUGAL)
&& c == '?' && (zero_times_ok || many_times_ok))
- greedy = 0;
+ greedy = false;
else
{
zero_times_ok |= c != '+';
@@ -2705,13 +2110,13 @@ regex_compile (re_char *pattern, size_t size,
{
if (many_times_ok)
{
- boolean simple = skip_one_char (laststart) == b;
+ bool simple = skip_one_char (laststart) == b;
size_t startoffset = 0;
re_opcode_t ofj =
/* Check if the loop can match the empty string. */
(simple || !analyze_first (laststart, b, NULL, 0))
? on_failure_jump : on_failure_jump_loop;
- assert (skip_one_char (laststart) <= b);
+ eassert (skip_one_char (laststart) <= b);
if (!zero_times_ok && simple)
{ /* Since simple * loops can be made faster by using
@@ -2744,7 +2149,7 @@ regex_compile (re_char *pattern, size_t size,
else
{
/* A simple ? pattern. */
- assert (zero_times_ok);
+ eassert (zero_times_ok);
GET_BUFFER_SPACE (3);
INSERT_JUMP (on_failure_jump, laststart, b + 3);
b += 3;
@@ -2756,7 +2161,7 @@ regex_compile (re_char *pattern, size_t size,
GET_BUFFER_SPACE (7); /* We might use less. */
if (many_times_ok)
{
- boolean emptyp = analyze_first (laststart, b, NULL, 0);
+ bool emptyp = analyze_first (laststart, b, NULL, 0);
/* The non-greedy multiple match looks like
a repeat..until: we only need a conditional jump
@@ -2831,10 +2236,9 @@ regex_compile (re_char *pattern, size_t size,
/* Read in characters and ranges, setting map bits. */
for (;;)
{
- boolean escaped_char = false;
const unsigned char *p2 = p;
re_wctype_t cc;
- re_wchar_t ch;
+ int ch;
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@@ -2849,15 +2253,6 @@ regex_compile (re_char *pattern, size_t size,
if (p == pend)
FREE_STACK_RETURN (REG_EBRACK);
-#ifndef emacs
- for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
- if (re_iswctype (btowc (ch), cc))
- {
- c = TRANSLATE (ch);
- if (c < (1 << BYTEWIDTH))
- SET_LIST_BIT (c);
- }
-#else /* emacs */
/* Most character classes in a multibyte match just set
a flag. Exceptions are is_blank, is_digit, is_cntrl, and
is_xdigit, since they can only match ASCII characters.
@@ -2884,7 +2279,7 @@ regex_compile (re_char *pattern, size_t size,
}
SET_RANGE_TABLE_WORK_AREA_BIT
(range_table_work, re_wctype_to_bit (cc));
-#endif /* emacs */
+
/* In most cases the matching rule for char classes only
uses the syntax table for multibyte chars, so that the
content of the syntax-table is not hardcoded in the
@@ -2908,7 +2303,6 @@ regex_compile (re_char *pattern, size_t size,
if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c);
- escaped_char = true;
}
else
{
@@ -2927,13 +2321,12 @@ regex_compile (re_char *pattern, size_t size,
/* Fetch the character which ends the range. */
PATFETCH (c1);
-#ifdef emacs
+
if (CHAR_BYTE8_P (c1)
&& ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
/* Treat the range from a multibyte character to
raw-byte character as empty. */
c = c1 + 1;
-#endif /* emacs */
}
else
/* Range from C to C. */
@@ -2947,15 +2340,6 @@ regex_compile (re_char *pattern, size_t size,
}
else
{
-#ifndef emacs
- /* Set the range into bitmap */
- for (; c <= c1; c++)
- {
- ch = TRANSLATE (c);
- if (ch < (1 << BYTEWIDTH))
- SET_LIST_BIT (ch);
- }
-#else /* emacs */
if (c < 128)
{
ch = min (127, c1);
@@ -2982,7 +2366,6 @@ regex_compile (re_char *pattern, size_t size,
SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
}
}
-#endif /* emacs */
}
}
@@ -3007,8 +2390,7 @@ regex_compile (re_char *pattern, size_t size,
/* Indicate the existence of range table. */
laststart[1] |= 0x80;
- /* Store the character class flag bits into the range table.
- If not in emacs, these flag bits are always 0. */
+ /* Store the character class flag bits into the range table. */
*b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
*b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
@@ -3127,8 +2509,6 @@ regex_compile (re_char *pattern, size_t size,
{
RETALLOC (compile_stack.stack, compile_stack.size << 1,
compile_stack_elt_t);
- if (compile_stack.stack == NULL) return REG_ESPACE;
-
compile_stack.size <<= 1;
}
@@ -3184,7 +2564,7 @@ regex_compile (re_char *pattern, size_t size,
/* Since we just checked for an empty stack above, this
``can't happen''. */
- assert (compile_stack.avail != 0);
+ eassert (compile_stack.avail != 0);
{
/* We don't just want to restore into `regnum', because
later groups should continue to be numbered higher,
@@ -3410,7 +2790,7 @@ regex_compile (re_char *pattern, size_t size,
unfetch_interval:
/* If an invalid interval, match the characters as literals. */
- assert (beg_interval);
+ eassert (beg_interval);
p = beg_interval;
beg_interval = NULL;
@@ -3419,13 +2799,12 @@ regex_compile (re_char *pattern, size_t size,
if (!(syntax & RE_NO_BK_BRACES))
{
- assert (p > pattern && p[-1] == '\\');
+ eassert (p > pattern && p[-1] == '\\');
goto normal_backslash;
}
else
goto normal_char;
-#ifdef emacs
case '=':
laststart = b;
BUF_PUSH (at_dot);
@@ -3454,8 +2833,6 @@ regex_compile (re_char *pattern, size_t size,
PATFETCH (c);
BUF_PUSH_2 (notcategoryspec, c);
break;
-#endif /* emacs */
-
case 'w':
if (syntax & RE_NO_GNU_OPS)
@@ -3607,7 +2984,7 @@ regex_compile (re_char *pattern, size_t size,
c1 = RE_CHAR_TO_MULTIBYTE (c);
if (! CHAR_BYTE8_P (c1))
{
- re_wchar_t c2 = TRANSLATE (c1);
+ int c2 = TRANSLATE (c1);
if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
c = c1;
@@ -3638,41 +3015,18 @@ regex_compile (re_char *pattern, size_t size,
/* We have succeeded; set the length of the buffer. */
bufp->used = b - bufp->buffer;
-#ifdef DEBUG
- if (debug > 0)
+#ifdef REGEX_EMACS_DEBUG
+ if (regex_emacs_debug > 0)
{
re_compile_fastmap (bufp);
DEBUG_PRINT ("\nCompiled pattern: \n");
print_compiled_pattern (bufp);
}
- debug--;
-#endif /* DEBUG */
-
-#ifndef MATCH_MAY_ALLOCATE
- /* Initialize the failure stack to the largest possible stack. This
- isn't necessary unless we're trying to avoid calling alloca in
- the search and match routines. */
- {
- int num_regs = bufp->re_nsub + 1;
-
- if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE)
- {
- fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE;
- falk_stack.stack = realloc (fail_stack.stack,
- fail_stack.size * sizeof *falk_stack.stack);
- }
-
- regex_grow_registers (num_regs);
- }
-#endif /* not MATCH_MAY_ALLOCATE */
+ regex_emacs_debug--;
+#endif
FREE_STACK_RETURN (REG_NOERROR);
-#ifdef emacs
-# undef syntax
-#else
-# undef posix_backtracking
-#endif
} /* regex_compile */
/* Subroutines for `regex_compile'. */
@@ -3733,11 +3087,11 @@ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha
after an alternative or a begin-subexpression. We assume there is at
least one character before the ^. */
-static boolean
+static bool
at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
{
re_char *prev = p - 2;
- boolean odd_backslashes;
+ bool odd_backslashes;
/* After a subexpression? */
if (*prev == '(')
@@ -3774,11 +3128,11 @@ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
/* The dual of at_begline_loc_p. This one is for $. We assume there is
at least one character after the $, i.e., `P < PEND'. */
-static boolean
+static bool
at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax)
{
re_char *next = p;
- boolean next_backslash = *next == '\\';
+ bool next_backslash = *next == '\\';
re_char *next_next = p + 1 < pend ? p + 1 : 0;
return
@@ -3794,10 +3148,10 @@ at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax)
/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
false if it's not. */
-static boolean
+static bool
group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
{
- ssize_t this_element;
+ ptrdiff_t this_element;
for (this_element = compile_stack.avail - 1;
this_element >= 0;
@@ -3823,13 +3177,13 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
const int multibyte)
{
int j, k;
- boolean not;
+ bool not;
/* If all elements for base leading-codes in fastmap is set, this
flag is set true. */
- boolean match_any_multibyte_characters = false;
+ bool match_any_multibyte_characters = false;
- assert (p);
+ eassert (p);
/* The loop below works as follows:
- It has a working-list kept in the PATTERN_STACK and which basically
@@ -3920,7 +3274,6 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
fastmap[j] = 1;
-#ifdef emacs
if (/* Any leading code can possibly start a character
which doesn't match the specified set of characters. */
not
@@ -3966,20 +3319,11 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
fastmap[j] = 1;
}
}
-#endif
break;
case syntaxspec:
case notsyntaxspec:
if (!fastmap) break;
-#ifndef emacs
- not = (re_opcode_t)p[-1] == notsyntaxspec;
- k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
- fastmap[j] = 1;
- break;
-#else /* emacs */
/* This match depends on text properties. These end with
aborting optimizations. */
return -1;
@@ -4008,7 +3352,6 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
`continue'. */
case at_dot:
-#endif /* !emacs */
case no_op:
case begline:
case endline:
@@ -4066,7 +3409,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
case jump_n:
/* This code simply does not properly handle forward jump_n. */
- DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0));
+ DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
p += 4;
/* jump_n can either jump or fall through. The (backward) jump
case has already been handled, so we only need to look at the
@@ -4075,7 +3418,7 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
case succeed_n:
/* If N == 0, it should be an on_failure_jump_loop instead. */
- DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
+ DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0));
p += 4;
/* We only care about one iteration of the loop, so we don't
need to consider the case where this behaves like an
@@ -4126,13 +3469,13 @@ analyze_first (re_char *p, re_char *pend, char *fastmap,
Returns 0 if we succeed, -2 if an internal error. */
-int
+static void
re_compile_fastmap (struct re_pattern_buffer *bufp)
{
char *fastmap = bufp->fastmap;
int analysis;
- assert (fastmap && bufp->buffer);
+ eassert (fastmap && bufp->buffer);
memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */
bufp->fastmap_accurate = 1; /* It will be when we're done. */
@@ -4140,14 +3483,13 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used,
fastmap, RE_MULTIBYTE_P (bufp));
bufp->can_be_null = (analysis != 0);
- return 0;
} /* re_compile_fastmap */
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
this memory for recording register information. STARTS and ENDS
must be allocated using the malloc library routine, and must each
- be at least NUM_REGS * sizeof (regoff_t) bytes long.
+ be at least NUM_REGS * sizeof (ptrdiff_t) bytes long.
If NUM_REGS == 0, then subsequent matches should allocate their own
register data.
@@ -4157,7 +3499,8 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
freeing the old data. */
void
-re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, unsigned int num_regs, regoff_t *starts, regoff_t *ends)
+re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
+ unsigned int num_regs, ptrdiff_t *starts, ptrdiff_t *ends)
{
if (num_regs)
{
@@ -4173,21 +3516,19 @@ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, uns
regs->start = regs->end = 0;
}
}
-WEAK_ALIAS (__re_set_registers, re_set_registers)
/* Searching routines. */
/* Like re_search_2, below, but only one string is specified, and
doesn't let you say where to stop matching. */
-regoff_t
+ptrdiff_t
re_search (struct re_pattern_buffer *bufp, const char *string, size_t size,
- ssize_t startpos, ssize_t range, struct re_registers *regs)
+ ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs)
{
return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
}
-WEAK_ALIAS (__re_search, re_search)
/* Head address of virtual concatenation of string. */
#define HEAD_ADDR_VSTRING(P) \
@@ -4218,21 +3559,21 @@ WEAK_ALIAS (__re_search, re_search)
found, -1 if no match, or -2 if error (such as failure
stack overflow). */
-regoff_t
+ptrdiff_t
re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
- const char *str2, size_t size2, ssize_t startpos, ssize_t range,
- struct re_registers *regs, ssize_t stop)
+ const char *str2, size_t size2, ptrdiff_t startpos, ptrdiff_t range,
+ struct re_registers *regs, ptrdiff_t stop)
{
- regoff_t val;
+ ptrdiff_t val;
re_char *string1 = (re_char *) str1;
re_char *string2 = (re_char *) str2;
- register char *fastmap = bufp->fastmap;
- register RE_TRANSLATE_TYPE translate = bufp->translate;
+ char *fastmap = bufp->fastmap;
+ Lisp_Object translate = bufp->translate;
size_t total_size = size1 + size2;
- ssize_t endpos = startpos + range;
- boolean anchored_start;
+ ptrdiff_t endpos = startpos + range;
+ bool anchored_start;
/* Nonzero if we are searching multibyte string. */
- const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp);
+ bool multibyte = RE_TARGET_MULTIBYTE_P (bufp);
/* Check for out-of-range STARTPOS. */
if (startpos < 0 || startpos > total_size)
@@ -4256,7 +3597,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
range = 0;
}
-#ifdef emacs
/* In a forward search for something that starts with \=.
don't keep searching past point. */
if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
@@ -4265,7 +3605,6 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
if (range < 0)
return -1;
}
-#endif /* emacs */
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
@@ -4274,14 +3613,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
/* See whether the pattern is anchored. */
anchored_start = (bufp->buffer[0] == begline);
-#ifdef emacs
gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
{
- ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
+ ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
}
-#endif
/* Loop through the string, looking for a place to start matching. */
for (;;)
@@ -4304,14 +3641,14 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
the first null string. */
if (fastmap && startpos < total_size && !bufp->can_be_null)
{
- register re_char *d;
- register re_wchar_t buf_ch;
+ re_char *d;
+ int buf_ch;
d = POS_ADDR_VSTRING (startpos);
if (range > 0) /* Searching forwards. */
{
- ssize_t irange = range, lim = 0;
+ ptrdiff_t irange = range, lim = 0;
if (startpos < size1 && startpos + range >= size1)
lim = range - (size1 - startpos);
@@ -4336,11 +3673,9 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
else
while (range > lim)
{
- register re_wchar_t ch, translated;
-
buf_ch = *d;
- ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
- translated = RE_TRANSLATE (translate, ch);
+ int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
+ int translated = RE_TRANSLATE (translate, ch);
if (translated != ch
&& (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
buf_ch = ch;
@@ -4383,11 +3718,9 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
}
else
{
- register re_wchar_t ch, translated;
-
buf_ch = *d;
- ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
- translated = TRANSLATE (ch);
+ int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
+ int translated = TRANSLATE (ch);
if (translated != ch
&& (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
buf_ch = ch;
@@ -4457,13 +3790,12 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
}
return -1;
} /* re_search_2 */
-WEAK_ALIAS (__re_search_2, re_search_2)
/* Declarations and macros for re_match_2. */
static int bcmp_translate (re_char *s1, re_char *s2,
- register ssize_t len,
- RE_TRANSLATE_TYPE translate,
+ ptrdiff_t len,
+ Lisp_Object translate,
const int multibyte);
/* This converts PTR, a pointer into one of the search strings `string1'
@@ -4531,29 +3863,6 @@ static int bcmp_translate (re_char *s1, re_char *s2,
|| WORDCHAR_P (d - 1) != WORDCHAR_P (d))
#endif
-/* Free everything we malloc. */
-#ifdef MATCH_MAY_ALLOCATE
-# define FREE_VAR(var) \
- do { \
- if (var) \
- { \
- REGEX_FREE (var); \
- var = NULL; \
- } \
- } while (0)
-# define FREE_VARIABLES() \
- do { \
- REGEX_FREE_STACK (fail_stack.stack); \
- FREE_VAR (regstart); \
- FREE_VAR (regend); \
- FREE_VAR (best_regstart); \
- FREE_VAR (best_regend); \
- REGEX_SAFE_FREE (); \
- } while (0)
-#else
-# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
-#endif /* not MATCH_MAY_ALLOCATE */
-
/* Optimization routines. */
@@ -4586,10 +3895,8 @@ skip_one_char (re_char *p)
case syntaxspec:
case notsyntaxspec:
-#ifdef emacs
case categoryspec:
case notcategoryspec:
-#endif /* emacs */
p++;
break;
@@ -4623,7 +3930,7 @@ skip_noops (re_char *p, re_char *pend)
return p;
}
}
- assert (p == pend);
+ eassert (p == pend);
return p;
}
@@ -4656,11 +3963,10 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte)
&& p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
return !not;
}
-#ifdef emacs
else if (rtp)
{
int class_bits = CHARSET_RANGE_TABLE_BITS (p);
- re_wchar_t range_start, range_end;
+ int range_start, range_end;
/* Sort tests by the most commonly used classes with some adjustment to which
tests are easiest to perform. Take a look at comment in re_wctype_parse
@@ -4691,7 +3997,7 @@ execute_charset (re_char **pp, unsigned c, unsigned corig, bool unibyte)
return !not;
}
}
-#endif /* emacs */
+
return not;
}
@@ -4701,11 +4007,11 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
re_char *p2)
{
re_opcode_t op2;
- const boolean multibyte = RE_MULTIBYTE_P (bufp);
+ bool multibyte = RE_MULTIBYTE_P (bufp);
unsigned char *pend = bufp->buffer + bufp->used;
- assert (p1 >= bufp->buffer && p1 < pend
- && p2 >= bufp->buffer && p2 <= pend);
+ eassert (p1 >= bufp->buffer && p1 < pend
+ && p2 >= bufp->buffer && p2 <= pend);
/* Skip over open/close-group commands.
If what follows this loop is a ...+ construct,
@@ -4716,8 +4022,8 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
is only used in the case where p1 is a simple match operator. */
/* p1 = skip_noops (p1, pend); */
- assert (p1 >= bufp->buffer && p1 < pend
- && p2 >= bufp->buffer && p2 <= pend);
+ eassert (p1 >= bufp->buffer && p1 < pend
+ && p2 >= bufp->buffer && p2 <= pend);
op2 = p2 == pend ? succeed : *p2;
@@ -4736,7 +4042,7 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
case endline:
case exactn:
{
- register re_wchar_t c
+ int c
= (re_opcode_t) *p2 == endline ? '\n'
: RE_STRING_CHAR (p2 + 2, multibyte);
@@ -4866,12 +4172,10 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
|| (re_opcode_t) *p1 == syntaxspec)
&& p1[1] == Sword);
-#ifdef emacs
case categoryspec:
return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
case notcategoryspec:
return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
-#endif /* emacs */
default:
;
@@ -4884,20 +4188,6 @@ mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
/* Matching routines. */
-#ifndef emacs /* Emacs never uses this. */
-/* re_match is like re_match_2 except it takes only a single string. */
-
-regoff_t
-re_match (struct re_pattern_buffer *bufp, const char *string,
- size_t size, ssize_t pos, struct re_registers *regs)
-{
- regoff_t result = re_match_2_internal (bufp, NULL, 0, (re_char *) string,
- size, pos, regs, size);
- return result;
-}
-WEAK_ALIAS (__re_match, re_match)
-#endif /* not emacs */
-
/* re_match_2 matches the compiled pattern in BUFP against the
the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
and SIZE2, respectively). We start matching at POS, and stop
@@ -4911,34 +4201,31 @@ WEAK_ALIAS (__re_match, re_match)
failure stack overflowing). Otherwise, we return the length of the
matched substring. */
-regoff_t
+ptrdiff_t
re_match_2 (struct re_pattern_buffer *bufp, const char *string1,
- size_t size1, const char *string2, size_t size2, ssize_t pos,
- struct re_registers *regs, ssize_t stop)
+ size_t size1, const char *string2, size_t size2, ptrdiff_t pos,
+ struct re_registers *regs, ptrdiff_t stop)
{
- regoff_t result;
+ ptrdiff_t result;
-#ifdef emacs
- ssize_t charpos;
+ ptrdiff_t charpos;
gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
-#endif
result = re_match_2_internal (bufp, (re_char *) string1, size1,
(re_char *) string2, size2,
pos, regs, stop);
return result;
}
-WEAK_ALIAS (__re_match_2, re_match_2)
/* This is a separate function so that we can force an alloca cleanup
afterwards. */
-static regoff_t
+static ptrdiff_t
re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
size_t size1, re_char *string2, size_t size2,
- ssize_t pos, struct re_registers *regs, ssize_t stop)
+ ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
{
/* General temporaries. */
int mcnt;
@@ -4965,13 +4252,13 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
re_char *pend = p + bufp->used;
/* We use this to map every character in the string. */
- RE_TRANSLATE_TYPE translate = bufp->translate;
+ Lisp_Object translate = bufp->translate;
- /* Nonzero if BUFP is setup from a multibyte regex. */
- const boolean multibyte = RE_MULTIBYTE_P (bufp);
+ /* True if BUFP is setup from a multibyte regex. */
+ bool multibyte = RE_MULTIBYTE_P (bufp);
- /* Nonzero if STRING1/STRING2 are multibyte. */
- const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
+ /* True if STRING1/STRING2 are multibyte. */
+ bool target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
/* Failure point stack. Each place that can handle a failure further
down the line pushes a failure point on this stack. It consists of
@@ -4980,19 +4267,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
registers, and, finally, two char *'s. The first char * is where
to resume scanning the pattern; the second one is where to resume
scanning the strings. */
-#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
fail_stack_type fail_stack;
-#endif
#ifdef DEBUG_COMPILES_ARGUMENTS
unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
-#if defined REL_ALLOC && defined REGEX_MALLOC
- /* This holds the pointer to the failure stack, when
- it is allocated relocatably. */
- fail_stack_elt_t *failure_stack_ptr;
-#endif
-
/* We fill all the registers internally, independent of what we
return, for use in backreferences. The number here includes
an element for register zero. */
@@ -5005,18 +4284,14 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
matching and the regnum-th regend points to right after where we
stopped matching the regnum-th subexpression. (The zeroth register
keeps track of what the whole pattern matches.) */
-#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
- re_char **regstart, **regend;
-#endif
+ re_char **regstart UNINIT, **regend UNINIT;
/* The following record the register info as found in the above
variables when we find a match better than any we've seen before.
This happens as we backtrack through the failure points, which in
turn happens only if we have not yet matched the entire string. */
unsigned best_regs_set = false;
-#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
- re_char **best_regstart, **best_regend;
-#endif
+ re_char **best_regstart UNINIT, **best_regend UNINIT;
/* Logically, this is `best_regend[0]'. But we don't want to have to
allocate space for that if we're not allocating space for anything
@@ -5039,7 +4314,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
INIT_FAIL_STACK ();
-#ifdef MATCH_MAY_ALLOCATE
/* Do not bother to initialize all the register variables if there are
no groups in the pattern, as it takes a fair amount of time. If
there are groups, we include space for register 0 (the whole
@@ -5047,29 +4321,16 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
array indexing. We should fix this. */
if (bufp->re_nsub)
{
- regstart = REGEX_TALLOC (num_regs, re_char *);
- regend = REGEX_TALLOC (num_regs, re_char *);
- best_regstart = REGEX_TALLOC (num_regs, re_char *);
- best_regend = REGEX_TALLOC (num_regs, re_char *);
-
- if (!(regstart && regend && best_regstart && best_regend))
- {
- FREE_VARIABLES ();
- return -2;
- }
+ regstart = SAFE_ALLOCA (num_regs * 4 * sizeof *regstart);
+ regend = regstart + num_regs;
+ best_regstart = regend + num_regs;
+ best_regend = best_regstart + num_regs;
}
- else
- {
- /* We must initialize all our variables to NULL, so that
- `FREE_VARIABLES' doesn't try to free them. */
- regstart = regend = best_regstart = best_regend = NULL;
- }
-#endif /* MATCH_MAY_ALLOCATE */
/* The starting position is bogus. */
if (pos < 0 || pos > size1 + size2)
{
- FREE_VARIABLES ();
+ SAFE_FREE ();
return -1;
}
@@ -5229,13 +4490,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
extra element beyond `num_regs' for the `-1' marker
GNU code uses. */
regs->num_regs = max (RE_NREGS, num_regs + 1);
- regs->start = TALLOC (regs->num_regs, regoff_t);
- regs->end = TALLOC (regs->num_regs, regoff_t);
- if (regs->start == NULL || regs->end == NULL)
- {
- FREE_VARIABLES ();
- return -2;
- }
+ regs->start = TALLOC (regs->num_regs, ptrdiff_t);
+ regs->end = TALLOC (regs->num_regs, ptrdiff_t);
bufp->regs_allocated = REGS_REALLOCATE;
}
else if (bufp->regs_allocated == REGS_REALLOCATE)
@@ -5245,21 +4501,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
if (regs->num_regs < num_regs + 1)
{
regs->num_regs = num_regs + 1;
- RETALLOC (regs->start, regs->num_regs, regoff_t);
- RETALLOC (regs->end, regs->num_regs, regoff_t);
- if (regs->start == NULL || regs->end == NULL)
- {
- FREE_VARIABLES ();
- return -2;
- }
+ RETALLOC (regs->start, regs->num_regs, ptrdiff_t);
+ RETALLOC (regs->end, regs->num_regs, ptrdiff_t);
}
}
else
- {
- /* These braces fend off a "empty body in an else-statement"
- warning under GCC when assert expands to nothing. */
- assert (bufp->regs_allocated == REGS_FIXED);
- }
+ eassert (bufp->regs_allocated == REGS_FIXED);
/* Convert the pointer data in `regstart' and `regend' to
indices. Register zero has to be set differently,
@@ -5301,7 +4548,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
- FREE_VARIABLES ();
+ SAFE_FREE ();
return dcnt;
}
@@ -5328,33 +4575,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
/* Remember the start point to rollback upon failure. */
dfail = d;
-#ifndef emacs
- /* This is written out as an if-else so we don't waste time
- testing `translate' inside the loop. */
- if (RE_TRANSLATE_P (translate))
- do
- {
- PREFETCH ();
- if (RE_TRANSLATE (translate, *d) != *p++)
- {
- d = dfail;
- goto fail;
- }
- d++;
- }
- while (--mcnt);
- else
- do
- {
- PREFETCH ();
- if (*d++ != *p++)
- {
- d = dfail;
- goto fail;
- }
- }
- while (--mcnt);
-#else /* emacs */
/* The cost of testing `translate' is comparatively small. */
if (target_multibyte)
do
@@ -5419,7 +4639,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
d++;
}
while (--mcnt);
-#endif
+
break;
@@ -5427,7 +4647,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
case anychar:
{
int buf_charlen;
- re_wchar_t buf_ch;
+ int buf_ch;
reg_syntax_t syntax;
DEBUG_PRINT ("EXECUTING anychar.\n");
@@ -5437,11 +4657,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
target_multibyte);
buf_ch = TRANSLATE (buf_ch);
-#ifdef emacs
syntax = RE_SYNTAX_EMACS;
-#else
- syntax = bufp->syntax;
-#endif
if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n')
|| ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000'))
@@ -5460,7 +4676,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
int len;
/* Whether matching against a unibyte character. */
- boolean unibyte_char = false;
+ bool unibyte_char = false;
DEBUG_PRINT ("EXECUTING charset%s.\n",
(re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
@@ -5530,10 +4746,10 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
case stop_memory:
DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p);
- assert (!REG_UNSET (regstart[*p]));
+ eassert (!REG_UNSET (regstart[*p]));
/* Strictly speaking, there should be code such as:
- assert (REG_UNSET (regend[*p]));
+ eassert (REG_UNSET (regend[*p]));
PUSH_FAILURE_REGSTOP ((unsigned int)*p);
But the only info to be pushed is regend[*p] and it is known to
@@ -5557,7 +4773,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
followed by the numeric value of <digit> as the register number. */
case duplicate:
{
- register re_char *d2, *dend2;
+ re_char *d2, *dend2;
int regno = *p++; /* Get which register to match against. */
DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno);
@@ -5719,7 +4935,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
mcnt, p + mcnt);
- assert ((re_opcode_t)p[-4] == no_op);
+ eassert ((re_opcode_t)p[-4] == no_op);
{
int cycle = 0;
CHECK_INFINITE_LOOP (p - 4, d);
@@ -5788,7 +5004,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
mcnt, p + mcnt);
{
re_char *p1 = p; /* Next operation. */
- /* Here, we discard `const', making re_match non-reentrant. */
+ /* Discard 'const', making re_search non-reentrant. */
unsigned char *p2 = (unsigned char *) p + mcnt; /* Jump dest. */
unsigned char *p3 = (unsigned char *) p - 3; /* opcode location. */
@@ -5799,9 +5015,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
/* Ensure this is indeed the trivial kind of loop
we are expecting. */
- assert (skip_one_char (p1) == p2 - 3);
- assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
- DEBUG_STATEMENT (debug += 2);
+ eassert (skip_one_char (p1) == p2 - 3);
+ eassert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
+ DEBUG_STATEMENT (regex_emacs_debug += 2);
if (mutually_exclusive_p (bufp, p1, p2))
{
/* Use a fast `on_failure_keep_string_jump' loop. */
@@ -5815,7 +5031,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
DEBUG_PRINT (" smart default => slow loop.\n");
*p3 = (unsigned char) on_failure_jump;
}
- DEBUG_STATEMENT (debug -= 2);
+ DEBUG_STATEMENT (regex_emacs_debug -= 2);
}
break;
@@ -5840,7 +5056,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
/* Originally, mcnt is how many times we HAVE to succeed. */
if (mcnt != 0)
{
- /* Here, we discard `const', making re_match non-reentrant. */
+ /* Discard 'const', making re_search non-reentrant. */
unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */
mcnt--;
p += 4;
@@ -5859,7 +5075,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
/* Originally, this is how many times we CAN jump. */
if (mcnt != 0)
{
- /* Here, we discard `const', making re_match non-reentrant. */
+ /* Discard 'const', making re_search non-reentrant. */
unsigned char *p2 = (unsigned char *) p + 2; /* counter loc. */
mcnt--;
PUSH_NUMBER (p2, mcnt);
@@ -5876,7 +5092,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
DEBUG_PRINT ("EXECUTING set_number_at.\n");
EXTRACT_NUMBER_AND_INCR (mcnt, p);
- /* Here, we discard `const', making re_match non-reentrant. */
+ /* Discard 'const', making re_search non-reentrant. */
p2 = (unsigned char *) p + mcnt;
/* Signedness doesn't matter since we only copy MCNT's bits. */
EXTRACT_NUMBER_AND_INCR (mcnt, p);
@@ -5888,7 +5104,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
case wordbound:
case notwordbound:
{
- boolean not = (re_opcode_t) *(p - 1) == notwordbound;
+ bool not = (re_opcode_t) *(p - 1) == notwordbound;
DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : "");
/* We SUCCEED (or FAIL) in one of the following cases: */
@@ -5900,19 +5116,15 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
- re_wchar_t c1, c2;
+ int c1, c2;
int s1, s2;
int dummy;
-#ifdef emacs
- ssize_t offset = PTR_TO_OFFSET (d - 1);
- ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = PTR_TO_OFFSET (d - 1);
+ ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
-#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
-#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
-#endif
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
s2 = SYNTAX (c2);
@@ -5942,14 +5154,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
- re_wchar_t c1, c2;
+ int c1, c2;
int s1, s2;
int dummy;
-#ifdef emacs
- ssize_t offset = PTR_TO_OFFSET (d);
- ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = PTR_TO_OFFSET (d);
+ ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
-#endif
PREFETCH ();
GET_CHAR_AFTER (c2, d, dummy);
s2 = SYNTAX (c2);
@@ -5962,9 +5172,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
if (!AT_STRINGS_BEG (d))
{
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
-#ifdef emacs
UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
-#endif
s1 = SYNTAX (c1);
/* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
@@ -5987,14 +5195,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
- re_wchar_t c1, c2;
+ int c1, c2;
int s1, s2;
int dummy;
-#ifdef emacs
- ssize_t offset = PTR_TO_OFFSET (d) - 1;
- ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = PTR_TO_OFFSET (d) - 1;
+ ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
-#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
@@ -6007,9 +5213,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
-#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
-#endif
s2 = SYNTAX (c2);
/* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
@@ -6032,13 +5236,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
- re_wchar_t c1, c2;
+ int c1, c2;
int s1, s2;
-#ifdef emacs
- ssize_t offset = PTR_TO_OFFSET (d);
- ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = PTR_TO_OFFSET (d);
+ ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
-#endif
PREFETCH ();
c2 = RE_STRING_CHAR (d, target_multibyte);
s2 = SYNTAX (c2);
@@ -6051,9 +5253,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
if (!AT_STRINGS_BEG (d))
{
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
-#ifdef emacs
UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
-#endif
s1 = SYNTAX (c1);
/* ... and S1 is Sword or Ssymbol. */
@@ -6075,13 +5275,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
/* C1 is the character before D, S1 is the syntax of C1, C2
is the character at D, and S2 is the syntax of C2. */
- re_wchar_t c1, c2;
+ int c1, c2;
int s1, s2;
-#ifdef emacs
- ssize_t offset = PTR_TO_OFFSET (d) - 1;
- ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = PTR_TO_OFFSET (d) - 1;
+ ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
-#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
@@ -6094,9 +5292,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
PREFETCH_NOLIMIT ();
c2 = RE_STRING_CHAR (d, target_multibyte);
-#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
-#endif
s2 = SYNTAX (c2);
/* ... and S2 is Sword or Ssymbol. */
@@ -6109,21 +5305,19 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
case syntaxspec:
case notsyntaxspec:
{
- boolean not = (re_opcode_t) *(p - 1) == notsyntaxspec;
+ bool not = (re_opcode_t) *(p - 1) == notsyntaxspec;
mcnt = *p++;
DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "",
mcnt);
PREFETCH ();
-#ifdef emacs
{
- ssize_t offset = PTR_TO_OFFSET (d);
- ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ ptrdiff_t offset = PTR_TO_OFFSET (d);
+ ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (pos1);
}
-#endif
{
int len;
- re_wchar_t c;
+ int c;
GET_CHAR_AFTER (c, d, len);
if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
@@ -6133,7 +5327,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
}
break;
-#ifdef emacs
case at_dot:
DEBUG_PRINT ("EXECUTING at_dot.\n");
if (PTR_BYTE_POS (d) != PT_BYTE)
@@ -6143,7 +5336,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
case categoryspec:
case notcategoryspec:
{
- boolean not = (re_opcode_t) *(p - 1) == notcategoryspec;
+ bool not = (re_opcode_t) *(p - 1) == notcategoryspec;
mcnt = *p++;
DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n",
not ? "not" : "", mcnt);
@@ -6151,7 +5344,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
{
int len;
- re_wchar_t c;
+ int c;
GET_CHAR_AFTER (c, d, len);
if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
goto fail;
@@ -6160,8 +5353,6 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
}
break;
-#endif /* emacs */
-
default:
abort ();
}
@@ -6180,11 +5371,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
switch (*pat++)
{
case on_failure_keep_string_jump:
- assert (str == NULL);
+ eassert (str == NULL);
goto continue_failure_jump;
case on_failure_jump_nastyloop:
- assert ((re_opcode_t)pat[-2] == no_op);
+ eassert ((re_opcode_t)pat[-2] == no_op);
PUSH_FAILURE_POINT (pat - 2, str);
FALLTHROUGH;
case on_failure_jump_loop:
@@ -6204,7 +5395,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
abort ();
}
- assert (p >= bufp->buffer && p <= pend);
+ eassert (p >= bufp->buffer && p <= pend);
if (d >= string1 && d <= end1)
dend = end_match_1;
@@ -6216,7 +5407,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
if (best_regs_set)
goto restore_best_regs;
- FREE_VARIABLES ();
+ SAFE_FREE ();
return -1; /* Failure to match. */
}
@@ -6227,8 +5418,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
bytes; nonzero otherwise. */
static int
-bcmp_translate (re_char *s1, re_char *s2, ssize_t len,
- RE_TRANSLATE_TYPE translate, const int target_multibyte)
+bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len,
+ Lisp_Object translate, int target_multibyte)
{
re_char *p1 = s1, *p2 = s2;
re_char *p1_end = s1 + len;
@@ -6239,7 +5430,7 @@ bcmp_translate (re_char *s1, re_char *s2, ssize_t len,
while (p1 < p1_end && p2 < p2_end)
{
int p1_charlen, p2_charlen;
- re_wchar_t p1_ch, p2_ch;
+ int p1_ch, p2_ch;
GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
@@ -6270,9 +5461,7 @@ bcmp_translate (re_char *s1, re_char *s2, ssize_t len,
const char *
re_compile_pattern (const char *pattern, size_t length,
-#ifdef emacs
bool posix_backtracking, const char *whitespace_regexp,
-#endif
struct re_pattern_buffer *bufp)
{
reg_errcode_t ret;
@@ -6282,334 +5471,16 @@ re_compile_pattern (const char *pattern, size_t length,
bufp->regs_allocated = REGS_UNALLOCATED;
/* And GNU code determines whether or not to get register information
- by passing null for the REGS argument to re_match, etc., not by
+ by passing null for the REGS argument to re_search, etc., not by
setting no_sub. */
bufp->no_sub = 0;
ret = regex_compile ((re_char *) pattern, length,
-#ifdef emacs
posix_backtracking,
whitespace_regexp,
-#else
- re_syntax_options,
-#endif
bufp);
if (!ret)
return NULL;
- return gettext (re_error_msgid[(int) ret]);
-}
-WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
-
-/* Entry points compatible with 4.2 BSD regex library. We don't define
- them unless specifically requested. */
-
-#if defined _REGEX_RE_COMP || defined _LIBC
-
-/* BSD has one and only one pattern buffer. */
-static struct re_pattern_buffer re_comp_buf;
-
-char *
-# ifdef _LIBC
-/* Make these definitions weak in libc, so POSIX programs can redefine
- these names if they don't use our functions, and still use
- regcomp/regexec below without link errors. */
-weak_function
-# endif
-re_comp (const char *s)
-{
- reg_errcode_t ret;
-
- if (!s)
- {
- if (!re_comp_buf.buffer)
- /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
- return (char *) gettext ("No previous regular expression");
- return 0;
- }
-
- if (!re_comp_buf.buffer)
- {
- re_comp_buf.buffer = malloc (200);
- if (re_comp_buf.buffer == NULL)
- /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
- return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
- re_comp_buf.allocated = 200;
-
- re_comp_buf.fastmap = malloc (1 << BYTEWIDTH);
- if (re_comp_buf.fastmap == NULL)
- /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
- return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
- }
-
- /* Since `re_exec' always passes NULL for the `regs' argument, we
- don't need to initialize the pattern buffer fields which affect it. */
-
- ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
-
- if (!ret)
- return NULL;
-
- /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
- return (char *) gettext (re_error_msgid[(int) ret]);
-}
-
-
-int
-# ifdef _LIBC
-weak_function
-# endif
-re_exec (const char *s)
-{
- const size_t len = strlen (s);
- return re_search (&re_comp_buf, s, len, 0, len, 0) >= 0;
+ return re_error_msgid[ret];
}
-#endif /* _REGEX_RE_COMP */
-
-/* POSIX.2 functions. Don't define these for Emacs. */
-
-#ifndef emacs
-
-/* regcomp takes a regular expression as a string and compiles it.
-
- PREG is a regex_t *. We do not expect any fields to be initialized,
- since POSIX says we shouldn't. Thus, we set
-
- `buffer' to the compiled pattern;
- `used' to the length of the compiled pattern;
- `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
- REG_EXTENDED bit in CFLAGS is set; otherwise, to
- RE_SYNTAX_POSIX_BASIC;
- `fastmap' to an allocated space for the fastmap;
- `fastmap_accurate' to zero;
- `re_nsub' to the number of subexpressions in PATTERN.
-
- PATTERN is the address of the pattern string.
-
- CFLAGS is a series of bits which affect compilation.
-
- If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
- use POSIX basic syntax.
-
- If REG_NEWLINE is set, then . and [^...] don't match newline.
- Also, regexec will try a match beginning after every newline.
-
- If REG_ICASE is set, then we considers upper- and lowercase
- versions of letters to be equivalent when matching.
-
- If REG_NOSUB is set, then when PREG is passed to regexec, that
- routine will report only success or failure, and nothing about the
- registers.
-
- It returns 0 if it succeeds, nonzero if it doesn't. (See regex-emacs.h for
- the return codes and their meanings.) */
-
-reg_errcode_t
-regcomp (regex_t *_Restrict_ preg, const char *_Restrict_ pattern,
- int cflags)
-{
- reg_errcode_t ret;
- reg_syntax_t syntax
- = (cflags & REG_EXTENDED) ?
- RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
-
- /* regex_compile will allocate the space for the compiled pattern. */
- preg->buffer = 0;
- preg->allocated = 0;
- preg->used = 0;
-
- /* Try to allocate space for the fastmap. */
- preg->fastmap = malloc (1 << BYTEWIDTH);
-
- if (cflags & REG_ICASE)
- {
- unsigned i;
-
- preg->translate = malloc (CHAR_SET_SIZE * sizeof *preg->translate);
- if (preg->translate == NULL)
- return (int) REG_ESPACE;
-
- /* Map uppercase characters to corresponding lowercase ones. */
- for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
- }
- else
- preg->translate = NULL;
-
- /* If REG_NEWLINE is set, newlines are treated differently. */
- if (cflags & REG_NEWLINE)
- { /* REG_NEWLINE implies neither . nor [^...] match newline. */
- syntax &= ~RE_DOT_NEWLINE;
- syntax |= RE_HAT_LISTS_NOT_NEWLINE;
- }
- else
- syntax |= RE_NO_NEWLINE_ANCHOR;
-
- preg->no_sub = !!(cflags & REG_NOSUB);
-
- /* POSIX says a null character in the pattern terminates it, so we
- can use strlen here in compiling the pattern. */
- ret = regex_compile ((re_char *) pattern, strlen (pattern), syntax, preg);
-
- /* POSIX doesn't distinguish between an unmatched open-group and an
- unmatched close-group: both are REG_EPAREN. */
- if (ret == REG_ERPAREN)
- ret = REG_EPAREN;
-
- if (ret == REG_NOERROR && preg->fastmap)
- { /* Compute the fastmap now, since regexec cannot modify the pattern
- buffer. */
- re_compile_fastmap (preg);
- if (preg->can_be_null)
- { /* The fastmap can't be used anyway. */
- free (preg->fastmap);
- preg->fastmap = NULL;
- }
- }
- return ret;
-}
-WEAK_ALIAS (__regcomp, regcomp)
-
-
-/* regexec searches for a given pattern, specified by PREG, in the
- string STRING.
-
- If NMATCH is zero or REG_NOSUB was set in the cflags argument to
- `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
- least NMATCH elements, and we set them to the offsets of the
- corresponding matched substrings.
-
- EFLAGS specifies `execution flags' which affect matching: if
- REG_NOTBOL is set, then ^ does not match at the beginning of the
- string; if REG_NOTEOL is set, then $ does not match at the end.
-
- We return 0 if we find a match and REG_NOMATCH if not. */
-
-reg_errcode_t
-regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string,
- size_t nmatch, regmatch_t pmatch[_Restrict_arr_], int eflags)
-{
- regoff_t ret;
- struct re_registers regs;
- regex_t private_preg;
- size_t len = strlen (string);
- boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
-
- private_preg = *preg;
-
- private_preg.not_bol = !!(eflags & REG_NOTBOL);
- private_preg.not_eol = !!(eflags & REG_NOTEOL);
-
- /* The user has told us exactly how many registers to return
- information about, via `nmatch'. We have to pass that on to the
- matching routines. */
- private_preg.regs_allocated = REGS_FIXED;
-
- if (want_reg_info)
- {
- regs.num_regs = nmatch;
- regs.start = TALLOC (nmatch * 2, regoff_t);
- if (regs.start == NULL)
- return REG_NOMATCH;
- regs.end = regs.start + nmatch;
- }
-
- /* Instead of using not_eol to implement REG_NOTEOL, we could simply
- pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
- was a little bit longer but still only matching the real part.
- This works because the `endline' will check for a '\n' and will find a
- '\0', correctly deciding that this is not the end of a line.
- But it doesn't work out so nicely for REG_NOTBOL, since we don't have
- a convenient '\0' there. For all we know, the string could be preceded
- by '\n' which would throw things off. */
-
- /* Perform the searching operation. */
- ret = re_search (&private_preg, string, len,
- /* start: */ 0, /* range: */ len,
- want_reg_info ? &regs : 0);
-
- /* Copy the register information to the POSIX structure. */
- if (want_reg_info)
- {
- if (ret >= 0)
- {
- unsigned r;
-
- for (r = 0; r < nmatch; r++)
- {
- pmatch[r].rm_so = regs.start[r];
- pmatch[r].rm_eo = regs.end[r];
- }
- }
-
- /* If we needed the temporary register info, free the space now. */
- free (regs.start);
- }
-
- /* We want zero return to mean success, unlike `re_search'. */
- return ret >= 0 ? REG_NOERROR : REG_NOMATCH;
-}
-WEAK_ALIAS (__regexec, regexec)
-
-
-/* Returns a message corresponding to an error code, ERR_CODE, returned
- from either regcomp or regexec. We don't use PREG here.
-
- ERR_CODE was previously called ERRCODE, but that name causes an
- error with msvc8 compiler. */
-
-size_t
-regerror (int err_code, const regex_t *preg, char *errbuf, size_t errbuf_size)
-{
- const char *msg;
- size_t msg_size;
-
- if (err_code < 0
- || err_code >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
- /* Only error codes returned by the rest of the code should be passed
- to this routine. If we are given anything else, or if other regex
- code generates an invalid error code, then the program has a bug.
- Dump core so we can fix it. */
- abort ();
-
- msg = gettext (re_error_msgid[err_code]);
-
- msg_size = strlen (msg) + 1; /* Includes the null. */
-
- if (errbuf_size != 0)
- {
- if (msg_size > errbuf_size)
- {
- memcpy (errbuf, msg, errbuf_size - 1);
- errbuf[errbuf_size - 1] = 0;
- }
- else
- strcpy (errbuf, msg);
- }
-
- return msg_size;
-}
-WEAK_ALIAS (__regerror, regerror)
-
-
-/* Free dynamically allocated space used by PREG. */
-
-void
-regfree (regex_t *preg)
-{
- free (preg->buffer);
- preg->buffer = NULL;
-
- preg->allocated = 0;
- preg->used = 0;
-
- free (preg->fastmap);
- preg->fastmap = NULL;
- preg->fastmap_accurate = 0;
-
- free (preg->translate);
- preg->translate = NULL;
-}
-WEAK_ALIAS (__regfree, regfree)
-
-#endif /* not emacs */