summaryrefslogtreecommitdiff
path: root/src/regex.c
diff options
context:
space:
mode:
authorAlan Mackenzie <acm@muc.de>2017-02-05 16:28:53 +0000
committerAlan Mackenzie <acm@muc.de>2017-02-05 16:28:53 +0000
commitd5514332d4a6092673ce1f78fadcae0c57f7be64 (patch)
tree1780337154904dcfad8ecfa76614b47c082160dd /src/regex.c
parentcecc25c68f5a1834c356e18259aa2af402a70ce1 (diff)
parentde3336051ef74e0c3069374ced5b5fc7bb9fba15 (diff)
downloademacs-d5514332d4a6092673ce1f78fadcae0c57f7be64.tar.gz
Merge branch 'master' into comment-cache
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c96
1 files changed, 55 insertions, 41 deletions
diff --git a/src/regex.c b/src/regex.c
index f1686cf700c..db3f0c16a2d 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -2,7 +2,7 @@
0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993-2016 Free Software Foundation, Inc.
+ Copyright (C) 1993-2017 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -310,11 +310,12 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
|| ((c) >= 'a' && (c) <= 'f') \
|| ((c) >= 'A' && (c) <= 'F'))
-/* This is only used for single-byte characters. */
-# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-
/* The rest must handle multibyte characters. */
+# define ISBLANK(c) (IS_REAL_ASCII (c) \
+ ? ((c) == ' ' || (c) == '\t') \
+ : blankp (c))
+
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
: graphicp (c))
@@ -430,9 +431,12 @@ init_syntax_once (void)
/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
use `alloca' instead of `malloc'. This is because using malloc in
- re_search* or re_match* could cause memory leaks when C-g is used in
- Emacs; also, malloc is slower and causes storage fragmentation. On
- the other hand, malloc is more portable, and easier to debug.
+ re_search* or re_match* could cause memory leaks when C-g is used
+ in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
+ via `record_xmalloc' which uses `unwind_protect' to ensure the
+ memory is freed even in case of non-local exits); also, malloc is
+ slower and causes storage fragmentation. On the other hand, malloc
+ is more portable, and easier to debug.
Because we sometimes use alloca, some routines have to be macros,
not functions -- `alloca'-allocated space disappears at the end of the
@@ -447,7 +451,13 @@ init_syntax_once (void)
#else /* not REGEX_MALLOC */
# ifdef emacs
-# define REGEX_USE_SAFE_ALLOCA USE_SAFE_ALLOCA
+/* This may be adjusted in main(), if the stack is successfully grown. */
+ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
+/* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca. */
+# define REGEX_USE_SAFE_ALLOCA \
+ ptrdiff_t sa_avail = emacs_re_safe_alloca; \
+ ptrdiff_t sa_count = SPECPDL_INDEX (); bool sa_must_free = false
+
# define REGEX_SAFE_FREE() SAFE_FREE ()
# define REGEX_ALLOCATE SAFE_ALLOCA
# else
@@ -1195,24 +1205,28 @@ static const char *re_error_msgid[] =
gettext_noop ("Range striding over charsets") /* REG_ERANGEX */
};
-/* Avoiding alloca during matching, to placate r_alloc. */
-
-/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
- searching and matching functions should not call alloca. On some
- systems, alloca is implemented in terms of malloc, and if we're
- using the relocating allocator routines, then malloc could cause a
- relocation, which might (if the strings being searched are in the
- ralloc heap) shift the data out from underneath the regexp
- routines.
-
- Here's another reason to avoid allocation: Emacs
- processes input from X in a signal handler; processing X input may
- call malloc; if input arrives while a matching routine is calling
- malloc, then we're scrod. But Emacs can't just block input while
- calling matching routines; then we don't notice interrupts when
- they come in. So, Emacs blocks input around all regexp calls
- except the matching calls, which it leaves unprotected, in the
- faith that they will not malloc. */
+/* Whether to allocate memory during matching. */
+
+/* Define MATCH_MAY_ALLOCATE to allow the searching and matching
+ functions allocate memory for the failure stack and registers.
+ Normally should be defined, because otherwise searching and
+ matching routines will have much smaller memory resources at their
+ disposal, and therefore might fail to handle complex regexps.
+ Therefore undefine MATCH_MAY_ALLOCATE only in the following
+ exceptional situations:
+
+ . When running on a system where memory is at premium.
+ . When alloca cannot be used at all, perhaps due to bugs in
+ its implementation, or its being unavailable, or due to a
+ very small stack size. This requires to define REGEX_MALLOC
+ to use malloc instead, which in turn could lead to memory
+ leaks if search is interrupted by a signal. (For these
+ reasons, defining REGEX_MALLOC when building Emacs
+ automatically undefines MATCH_MAY_ALLOCATE, but outside
+ Emacs you may not care about memory leaks.) If you want to
+ prevent the memory leaks, undefine MATCH_MAY_ALLOCATE.
+ . When code that calls the searching and matching functions
+ cannot allow memory allocation, for whatever reasons. */
/* Normally, this is fine. */
#define MATCH_MAY_ALLOCATE
@@ -1249,9 +1263,9 @@ static const char *re_error_msgid[] =
whose default stack limit is 2mb. In order for a larger
value to work reliably, you have to try to make it accord
with the process stack limit. */
-size_t re_max_failures = 40000;
+size_t emacs_re_max_failures = 40000;
# else
-size_t re_max_failures = 4000;
+size_t emacs_re_max_failures = 4000;
# endif
union fail_stack_elt
@@ -1304,7 +1318,7 @@ typedef struct
/* Double the size of FAIL_STACK, up to a limit
- which allows approximately `re_max_failures' items.
+ which allows approximately `emacs_re_max_failures' items.
Return 1 if succeeds, and 0 if either ran out of memory
allocating space for it or it was already too large.
@@ -1319,23 +1333,20 @@ typedef struct
#define FAIL_STACK_GROWTH_FACTOR 4
#define GROW_FAIL_STACK(fail_stack) \
- (((fail_stack).size * sizeof (fail_stack_elt_t) \
- >= re_max_failures * TYPICAL_FAILURE_SIZE) \
+ (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \
? 0 \
: ((fail_stack).stack \
= REGEX_REALLOCATE_STACK ((fail_stack).stack, \
(fail_stack).size * sizeof (fail_stack_elt_t), \
- min (re_max_failures * TYPICAL_FAILURE_SIZE, \
- ((fail_stack).size * sizeof (fail_stack_elt_t) \
- * FAIL_STACK_GROWTH_FACTOR))), \
+ min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
+ ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \
+ * sizeof (fail_stack_elt_t)), \
\
(fail_stack).stack == NULL \
? 0 \
: ((fail_stack).size \
- = (min (re_max_failures * TYPICAL_FAILURE_SIZE, \
- ((fail_stack).size * sizeof (fail_stack_elt_t) \
- * FAIL_STACK_GROWTH_FACTOR)) \
- / sizeof (fail_stack_elt_t)), \
+ = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
+ ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))), \
1)))
@@ -1790,6 +1801,7 @@ struct range_table_work_area
#define BIT_ALNUM 0x80
#define BIT_GRAPH 0x100
#define BIT_PRINT 0x200
+#define BIT_BLANK 0x400
/* Set the bit for character C in a list. */
@@ -2066,8 +2078,9 @@ re_wctype_to_bit (re_wctype_t cc)
case RECC_SPACE: return BIT_SPACE;
case RECC_GRAPH: return BIT_GRAPH;
case RECC_PRINT: return BIT_PRINT;
+ case RECC_BLANK: return BIT_BLANK;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
- case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
+ case RECC_UNIBYTE: case RECC_ERROR: return 0;
default:
abort ();
}
@@ -3641,9 +3654,9 @@ regex_compile (const_re_char *pattern, size_t size,
{
int num_regs = bufp->re_nsub + 1;
- if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
+ if (fail_stack.size < emacs_re_max_failures * TYPICAL_FAILURE_SIZE)
{
- fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
+ fail_stack.size = emacs_re_max_failures * TYPICAL_FAILURE_SIZE;
falk_stack.stack = realloc (fail_stack.stack,
fail_stack.size * sizeof *falk_stack.stack);
}
@@ -4658,6 +4671,7 @@ execute_charset (const_re_char **pp, unsigned c, unsigned corig, bool unibyte)
(class_bits & BIT_ALNUM && ISALNUM (c)) ||
(class_bits & BIT_ALPHA && ISALPHA (c)) ||
(class_bits & BIT_SPACE && ISSPACE (c)) ||
+ (class_bits & BIT_BLANK && ISBLANK (c)) ||
(class_bits & BIT_WORD && ISWORD (c)) ||
((class_bits & BIT_UPPER) &&
(ISUPPER (c) || (corig != c &&