summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-02-12 17:36:44 +0000
committerSteve Hay <steve.m.hay@googlemail.com>2020-02-12 17:37:06 +0000
commit9067ea008cc26b58a0502acd0e9cda2a89acb22a (patch)
tree3f9628d9f841cebf023154599cf685281db98e35
parent284d721f55846bff21c6b143cb5139d4e64e8c41 (diff)
downloadperl-9067ea008cc26b58a0502acd0e9cda2a89acb22a.tar.gz
PATCH: [perl #134329] Use after free in regcomp.c
A compiled regex is composed of nodes, forming a linked list, with normally a maximum of 16 bits used to specify the offset of the next link. For patterns that require more space than this, the nodes that jump around are replaced with ones that have wider offsets. Most nodes are unaffected, as they just contain the offset of the next node, and that number is always small. The jump nodes are the ones affected. When compiling a pattern, the 16 bit mechanism is used, until it overflows, at which point the pattern is recompiled with the long jumps instead. When I rewrote the compiler last year to make it generally one pass, I noticed a lot of the cases where a node was added didn't check if the result overflowed (the function that does this returns FALSE in that case). I presumed the prior authors knew better, and did not change things, except to put in a bogus value in the link (offset) field that should cause a crash if it were used. That's what's happening in this ticket. But seeing this example, it's clear that the return value should be checked every time, because you can reach the limit at any time. This commit changes to do that, and to require the function's return value to not be ignored, to guard against future changes. My guess is that the reason it generally worked when there were multiple passes is that the first pass didn't do anything except count space, and that at some point before the end of the pass the return value did get checked, so by the time the nodes were allocated for real, it knew enough to use the long jumps. (cherry picked from commit 3b2e5620ed4a6b341f97ffd1d4b6466cc2c4bc5b)
-rw-r--r--MANIFEST1
-rw-r--r--embed.fnc4
-rw-r--r--proto.h8
-rw-r--r--regcomp.c109
-rw-r--r--t/re/bigfuzzy_not_utf8.tbin0 -> 36399 bytes
5 files changed, 88 insertions, 34 deletions
diff --git a/MANIFEST b/MANIFEST
index 7068d5f0cd..2656e61250 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5870,6 +5870,7 @@ t/porting/test_bootstrap.t Test that the instructions for test bootstrapping are
t/porting/utils.t Check that utility scripts still compile
t/re/alpha_assertions.t See if things like '(*postive_lookahed:...) work properly
t/re/anyof.t See if bracketed char classes [...] compile properly
+t/re/bigfuzzy_not_utf8.t Big and ugly tests not storable as UTF-8
t/re/charset.t See if regex modifiers like /d, /u work properly
t/re/fold_grind.pl Core file to see if regex case folding works properly
t/re/fold_grind_8.t Wrapper for fold_grind.pl for /l testing with a UTF-8 locale
diff --git a/embed.fnc b/embed.fnc
index 45597f67b6..aedb4baef1 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2425,7 +2425,7 @@ Es |void |reginsert |NN RExC_state_t *pRExC_state \
|const U8 op \
|const regnode_offset operand \
|const U32 depth
-Es |bool |regtail |NN RExC_state_t * pRExC_state \
+EsR |bool |regtail |NN RExC_state_t * pRExC_state \
|NN const regnode_offset p \
|NN const regnode_offset val \
|const U32 depth
@@ -2559,7 +2559,7 @@ Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\
Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
|NULLOK HV* widecharmap|NN AV *revcharmap\
|U32 next_alloc|U32 depth
-Es |bool |regtail_study |NN RExC_state_t *pRExC_state \
+EsR |bool |regtail_study |NN RExC_state_t *pRExC_state \
|NN regnode_offset p|NN const regnode_offset val|U32 depth
# endif
#endif
diff --git a/proto.h b/proto.h
index 0f8feed187..141ddbaee6 100644
--- a/proto.h
+++ b/proto.h
@@ -4452,9 +4452,11 @@ PERL_CALLCONV int Perl_re_indentf(pTHX_ const char *fmt, U32 depth, ...);
assert(fmt)
STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
-STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth);
+STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth)
+ __attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_REGTAIL_STUDY \
assert(pRExC_state); assert(p); assert(val)
+
# endif
# if defined(PERL_IN_REGEXEC_C)
STATIC void S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8, const char *start, const char *end, const char *blurb);
@@ -5587,9 +5589,11 @@ STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 o
STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
-STATIC bool S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth);
+STATIC bool S_regtail(pTHX_ RExC_state_t * pRExC_state, const regnode_offset p, const regnode_offset val, const U32 depth)
+ __attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_REGTAIL \
assert(pRExC_state); assert(p); assert(val)
+
STATIC void S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, struct scan_data_t *data, SSize_t *minlenp, int is_inf);
#define PERL_ARGS_ASSERT_SCAN_COMMIT \
assert(pRExC_state); assert(data); assert(minlenp)
diff --git a/regcomp.c b/regcomp.c
index bf84ac46b9..fce1e6fa45 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -11307,10 +11307,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
return 0;
}
- REGTAIL(pRExC_state, ret, atomic);
+ if (! REGTAIL(pRExC_state, ret, atomic)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
- REGTAIL(pRExC_state, atomic,
- reg_node(pRExC_state, SRCLOSE));
+ if (! REGTAIL(pRExC_state, atomic, reg_node(pRExC_state,
+ SRCLOSE)))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
RExC_in_script_run = 0;
return ret;
@@ -11769,7 +11774,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
RExC_flags & RXf_PMf_COMPILETIME
);
FLAGS(REGNODE_p(ret)) = 2;
- REGTAIL(pRExC_state, ret, eval);
+ if (! REGTAIL(pRExC_state, ret, eval)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
/* deal with the length of this later - MJD */
return ret;
}
@@ -11822,7 +11829,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
tail = reg(pRExC_state, 1, &flag, depth+1);
RETURN_FAIL_ON_RESTART(flag, flagp);
- REGTAIL(pRExC_state, ret, tail);
+ if (! REGTAIL(pRExC_state, ret, tail)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
goto insert_if;
}
else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */
@@ -11914,15 +11923,22 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
nextchar(pRExC_state);
insert_if:
- REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0));
+ if (! REGTAIL(pRExC_state, ret, reganode(pRExC_state,
+ IFTHEN, 0)))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
br = regbranch(pRExC_state, &flags, 1, depth+1);
if (br == 0) {
RETURN_FAIL_ON_RESTART(flags,flagp);
FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
(UV) flags);
} else
- REGTAIL(pRExC_state, br, reganode(pRExC_state,
- LONGJMP, 0));
+ if (! REGTAIL(pRExC_state, br, reganode(pRExC_state,
+ LONGJMP, 0)))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
c = UCHARAT(RExC_parse);
nextchar(pRExC_state);
if (flags&HASWIDTH)
@@ -11939,7 +11955,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
(UV) flags);
}
- REGTAIL(pRExC_state, ret, lastbr);
+ if (! REGTAIL(pRExC_state, ret, lastbr)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
if (flags&HASWIDTH)
*flagp |= HASWIDTH;
c = UCHARAT(RExC_parse);
@@ -11954,16 +11972,26 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
vFAIL("Switch (?(condition)... contains too many branches");
}
ender = reg_node(pRExC_state, TAIL);
- REGTAIL(pRExC_state, br, ender);
+ if (! REGTAIL(pRExC_state, br, ender)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
if (lastbr) {
- REGTAIL(pRExC_state, lastbr, ender);
- REGTAIL(pRExC_state, REGNODE_OFFSET(
- NEXTOPER(
- NEXTOPER(REGNODE_p(lastbr)))),
- ender);
+ if (! REGTAIL(pRExC_state, lastbr, ender)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
+ if (! REGTAIL(pRExC_state,
+ REGNODE_OFFSET(
+ NEXTOPER(
+ NEXTOPER(REGNODE_p(lastbr)))),
+ ender))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
}
else
- REGTAIL(pRExC_state, ret, ender);
+ if (! REGTAIL(pRExC_state, ret, ender)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
#if 0 /* Removing this doesn't cause failures in the test suite -- khw */
RExC_size++; /* XXX WHY do we need this?!!
For large programs it seems to be required
@@ -12113,7 +12141,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
*flagp |= flags&SIMPLE;
}
if (is_open) { /* Starts with OPEN. */
- REGTAIL(pRExC_state, ret, br); /* OPEN -> first. */
+ if (! REGTAIL(pRExC_state, ret, br)) { /* OPEN -> first. */
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
}
else if (paren != '?') /* Not Conditional */
ret = br;
@@ -12121,12 +12151,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
lastbr = br;
while (*RExC_parse == '|') {
if (RExC_use_BRANCHJ) {
+ bool shut_gcc_up;
+
ender = reganode(pRExC_state, LONGJMP, 0);
/* Append to the previous. */
- REGTAIL(pRExC_state,
- REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
- ender);
+ shut_gcc_up = REGTAIL(pRExC_state,
+ REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
+ ender);
+ PERL_UNUSED_VAR(shut_gcc_up);
}
nextchar(pRExC_state);
if (freeze_paren) {
@@ -12237,9 +12270,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
is_nothing= 0;
}
else if (op == BRANCHJ) {
- REGTAIL_STUDY(pRExC_state,
- REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
- ender);
+ bool shut_gcc_up = REGTAIL_STUDY(pRExC_state,
+ REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
+ ender);
+ PERL_UNUSED_VAR(shut_gcc_up);
/* for now we always disable this optimisation * /
if ( OP(NEXTOPER(NEXTOPER(br))) != NOTHING
|| regnext(NEXTOPER(NEXTOPER(br))) != REGNODE_p(ender))
@@ -12551,7 +12585,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
const regnode_offset w = reg_node(pRExC_state, WHILEM);
FLAGS(REGNODE_p(w)) = 0;
- REGTAIL(pRExC_state, ret, w);
+ if (! REGTAIL(pRExC_state, ret, w)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
if (RExC_use_BRANCHJ) {
reginsert(pRExC_state, LONGJMP, ret, depth+1);
reginsert(pRExC_state, NOTHING, ret, depth+1);
@@ -12566,7 +12602,11 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
if (RExC_use_BRANCHJ)
NEXT_OFF(REGNODE_p(ret)) = 3; /* Go over NOTHING to
LONGJMP. */
- REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
+ if (! REGTAIL(pRExC_state, ret, reg_node(pRExC_state,
+ NOTHING)))
+ {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
RExC_whilem_seen++;
MARK_NAUGHTY_EXP(1, 4); /* compound interest */
}
@@ -12638,16 +12678,22 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
if (*RExC_parse == '?') {
nextchar(pRExC_state);
reginsert(pRExC_state, MINMOD, ret, depth+1);
- REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE);
+ if (! REGTAIL(pRExC_state, ret, ret + NODE_STEP_REGNODE)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
}
else if (*RExC_parse == '+') {
regnode_offset ender;
nextchar(pRExC_state);
ender = reg_node(pRExC_state, SUCCEED);
- REGTAIL(pRExC_state, ret, ender);
+ if (! REGTAIL(pRExC_state, ret, ender)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
reginsert(pRExC_state, SUSPEND, ret, depth+1);
ender = reg_node(pRExC_state, TAIL);
- REGTAIL(pRExC_state, ret, ender);
+ if (! REGTAIL(pRExC_state, ret, ender)) {
+ REQUIRE_BRANCHJ(flagp, 0);
+ }
}
if (ISMULT2(RExC_parse)) {
@@ -19822,8 +19868,8 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
}
else {
if (val - scan > U16_MAX) {
- /* Since not all callers check the return value, populate this with
- * something that won't loop and will likely lead to a crash if
+ /* Populate this with something that won't loop and will likely
+ * lead to a crash if the caller ignores the failure return, and
* execution continues */
NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
return FALSE;
@@ -19934,6 +19980,9 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
}
else {
if (val - scan > U16_MAX) {
+ /* Populate this with something that won't loop and will likely
+ * lead to a crash if the caller ignores the failure return, and
+ * execution continues */
NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
return FALSE;
}
diff --git a/t/re/bigfuzzy_not_utf8.t b/t/re/bigfuzzy_not_utf8.t
new file mode 100644
index 0000000000..b4dfd150a9
--- /dev/null
+++ b/t/re/bigfuzzy_not_utf8.t
Binary files differ