summaryrefslogtreecommitdiff
path: root/src/regex.c
diff options
context:
space:
mode:
authorNoam Postavsky <npostavs@gmail.com>2016-10-19 20:23:50 -0400
committerNoam Postavsky <npostavs@gmail.com>2016-10-21 22:24:54 -0400
commitad66b3fadb7ae22a4cbb82bb1507c39ceadf3897 (patch)
treebc3857bb1d0eeccfd16a0fb3e4d8cb44a9ebec56 /src/regex.c
parent5a26c9b0e1b0d9a2de35e0a8b0a803017e70def0 (diff)
downloademacs-ad66b3fadb7ae22a4cbb82bb1507c39ceadf3897.tar.gz
Fix handling of allocation in regex matching
`re_match_2_internal' uses pointers to the lisp objects that it searches. Since it may call malloc when growing the "fail stack", these pointers may be invalidated while searching, resulting in memory curruption (Bug #24358). To fix this, we check the pointer that the lisp object (as specified by re_match_object) points to before and after growing the stack, and update existing pointers accordingly. * src/regex.c (STR_BASE_PTR): New macro. (ENSURE_FAIL_STACK, re_search_2): Use it to convert pointers into offsets before possible malloc call, and back into pointers again afterwards. (POS_AS_IN_BUFFER): Add explanatory comment about punning trick. * src/search.c (search_buffer): Instead of storing search location as pointers, store them as pointers and recompute the corresponding address for each call to `re_search_2'. (string_match_1, fast_string_match_internal, fast_looking_at): * src/dired.c (directory_files_internal): Set `re_match_object' to Qnil after calling `re_search' or `re_match_2'. * src/regex.h (re_match_object): Mention new usage in commentary.
Diffstat (limited to 'src/regex.c')
-rw-r--r--src/regex.c76
1 files changed, 73 insertions, 3 deletions
diff --git a/src/regex.c b/src/regex.c
index 164eb4612ae..1346ef401cb 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -152,6 +152,8 @@
/* Converts the pointer to the char to BEG-based offset from the start. */
# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
+/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
+ result to get the right base index. */
# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
@@ -1436,11 +1438,62 @@ typedef struct
#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
#define TOP_FAILURE_HANDLE() fail_stack.frame
+#ifdef emacs
+#define STR_BASE_PTR(obj) \
+ (NILP (obj) ? current_buffer->text->beg : \
+ STRINGP (obj) ? SDATA (obj) : \
+ NULL)
+#else
+#define STR_BASE_PTR(obj) NULL
+#endif
#define ENSURE_FAIL_STACK(space) \
while (REMAINING_AVAIL_SLOTS <= space) { \
+ re_char* orig_base = STR_BASE_PTR (re_match_object); \
+ ptrdiff_t string1_off, end1_off, end_match_1_off; \
+ ptrdiff_t string2_off, end2_off, end_match_2_off; \
+ ptrdiff_t d_off, dend_off, dfail_off; \
+ if (orig_base) \
+ { \
+ if (string1) \
+ { \
+ string1_off = string1 - orig_base; \
+ end1_off = end1 - orig_base; \
+ end_match_1_off = end_match_1 - orig_base; \
+ } \
+ if (string2) \
+ { \
+ string2_off = string2 - orig_base; \
+ end2_off = end2 - orig_base; \
+ end_match_2_off = end_match_2 - orig_base; \
+ } \
+ d_off = d - orig_base; \
+ dend_off = dend - orig_base; \
+ dfail_off = dfail - orig_base; \
+ } \
if (!GROW_FAIL_STACK (fail_stack)) \
- return -2; \
+ return -2; \
+ /* GROW_FAIL_STACK may call malloc and relocate the string */ \
+ /* pointers. */ \
+ re_char* new_base = STR_BASE_PTR (re_match_object); \
+ if (new_base && new_base != orig_base) \
+ { \
+ if (string1) \
+ { \
+ string1 = new_base + string1_off; \
+ end1 = new_base + end1_off; \
+ end_match_1 = new_base + end_match_1_off; \
+ } \
+ if (string2) \
+ { \
+ string2 = new_base + string2_off; \
+ end2 = new_base + end2_off; \
+ end_match_2 = new_base + end_match_2_off; \
+ } \
+ d = new_base + d_off; \
+ dend = new_base + dend_off; \
+ dfail = new_base + dfail_off; \
+ } \
DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\
DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\
}
@@ -4443,6 +4496,16 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
&& !bufp->can_be_null)
return -1;
+ /* re_match_2_internal may allocate, causing a relocation of the
+ lisp text object that we're searching. */
+ ptrdiff_t offset1, offset2;
+ re_char *orig_base = STR_BASE_PTR (re_match_object);
+ if (orig_base)
+ {
+ if (string1) offset1 = string1 - orig_base;
+ if (string2) offset2 = string2 - orig_base;
+ }
+
val = re_match_2_internal (bufp, string1, size1, string2, size2,
startpos, regs, stop);
@@ -4452,6 +4515,13 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
if (val == -2)
return -2;
+ re_char *new_base = STR_BASE_PTR (re_match_object);
+ if (new_base && new_base != orig_base)
+ {
+ if (string1) string1 = offset1 + new_base;
+ if (string2) string2 = offset2 + new_base;
+ }
+
advance:
if (!range)
break;
@@ -4887,8 +4957,8 @@ WEAK_ALIAS (__re_match, re_match)
#endif /* not emacs */
#ifdef emacs
-/* In Emacs, this is the string or buffer in which we
- are matching. It is used for looking up syntax properties. */
+/* In Emacs, this is the string or buffer in which we are matching.
+ See the declaration in regex.h for details. */
Lisp_Object re_match_object;
#endif