diff options
author | Hugo van der Sanden <hv@crypt.org> | 2014-12-20 12:21:27 +0000 |
---|---|---|
committer | Hugo van der Sanden <hv@crypt.org> | 2014-12-20 12:53:20 +0000 |
commit | 99807a43ff8067f13ddb17de59a7e1331a0051ea (patch) | |
tree | 5bcebaac9752503871da350d5bd7a74f38c3fa43 /regcomp.c | |
parent | 9745959a89f3a201c789b8e4ce494405f95b2a7a (diff) | |
download | perl-99807a43ff8067f13ddb17de59a7e1331a0051ea.tar.gz |
protect RExC_naughty changes behind macros
Avoid overflow, and add a bit of explanation.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 42 |
1 files changed, 27 insertions, 15 deletions
@@ -225,7 +225,6 @@ struct RExC_state_t { #define RExC_emit_dummy (pRExC_state->emit_dummy) #define RExC_emit_start (pRExC_state->emit_start) #define RExC_emit_bound (pRExC_state->emit_bound) -#define RExC_naughty (pRExC_state->naughty) #define RExC_sawback (pRExC_state->sawback) #define RExC_seen (pRExC_state->seen) #define RExC_size (pRExC_state->size) @@ -255,6 +254,19 @@ struct RExC_state_t { #define RExC_frame_last (pRExC_state->frame_last) #define RExC_frame_count (pRExC_state->frame_count) +/* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set + * a flag to disable back-off on the fixed/floating substrings - if it's + * a high complexity pattern we assume the benefit of avoiding a full match + * is worth the cost of checking for the substrings even if they rarely help. + */ +#define RExC_naughty (pRExC_state->naughty) +#define TOO_NAUGHTY (10) +#define MARK_NAUGHTY(add) \ + if (RExC_naughty < TOO_NAUGHTY) \ + RExC_naughty += (add) +#define MARK_NAUGHTY_EXP(exp, add) \ + if (RExC_naughty < TOO_NAUGHTY) \ + RExC_naughty += RExC_naughty / (exp) + (add) #define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?') #define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \ @@ -6911,7 +6923,7 @@ reStudy: if (UTF) SvUTF8_on(rx); /* Unicode in it? */ ri->regstclass = NULL; - if (RExC_naughty >= 10) /* Probably an expensive pattern. */ + if (RExC_naughty >= TOO_NAUGHTY) /* Probably an expensive pattern. */ r->intflags |= PREGf_NAUGHTY; scan = ri->program + 1; /* First BRANCH. */ @@ -10727,7 +10739,9 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) if (chain == NULL) /* First piece. */ *flagp |= flags&SPSTART; else { - RExC_naughty++; + /* FIXME adding one for every branch after the first is probably + * excessive now we have TRIE support. (hv) */ + MARK_NAUGHTY(1); REGTAIL(pRExC_state, chain, latest); } chain = latest; @@ -10859,8 +10873,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) do_curly: if ((flags&SIMPLE)) { - if (RExC_naughty < I32_MAX / 2) - RExC_naughty += 2 + RExC_naughty / 2; + MARK_NAUGHTY_EXP(2, 2); reginsert(pRExC_state, CURLY, ret, depth+1); Set_Node_Offset(ret, parse_start+1); /* MJD */ Set_Node_Cur_Length(ret, parse_start); @@ -10886,8 +10899,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING)); if (SIZE_ONLY) RExC_whilem_seen++, RExC_extralen += 3; - if (RExC_naughty < I32_MAX / 4) - RExC_naughty += 4 + RExC_naughty; /* compound interest */ + MARK_NAUGHTY_EXP(1, 4); /* compound interest */ } ret->flags = 0; @@ -10937,7 +10949,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) if (op == '*' && (flags&SIMPLE)) { reginsert(pRExC_state, STAR, ret, depth+1); ret->flags = 0; - RExC_naughty += 4; + MARK_NAUGHTY(4); RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN; } else if (op == '*') { @@ -10947,7 +10959,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) else if (op == '+' && (flags&SIMPLE)) { reginsert(pRExC_state, PLUS, ret, depth+1); ret->flags = 0; - RExC_naughty += 3; + MARK_NAUGHTY(3); RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN; } else if (op == '+') { @@ -11118,7 +11130,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, nextchar(pRExC_state); *node_p = reg_node(pRExC_state, REG_ANY); *flagp |= HASWIDTH|SIMPLE; - RExC_naughty++; + MARK_NAUGHTY(1); Set_Node_Length(*node_p, 1); /* MJD */ return 1; } @@ -11635,7 +11647,7 @@ tryagain: else ret = reg_node(pRExC_state, REG_ANY); *flagp |= HASWIDTH|SIMPLE; - RExC_naughty++; + MARK_NAUGHTY(1); Set_Node_Length(ret, 1); /* MJD */ break; case '[': @@ -13814,7 +13826,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, RExC_parse++; invert = TRUE; allow_multi_folds = FALSE; - RExC_naughty++; + MARK_NAUGHTY(1); if (skip_white) { RExC_parse = regpatws(pRExC_state, RExC_parse, FALSE /* means don't recognize comments */ ); @@ -14804,7 +14816,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, if (! LOC && value == '\n') { op = REG_ANY; /* Optimize [^\n] */ *flagp |= HASWIDTH|SIMPLE; - RExC_naughty++; + MARK_NAUGHTY(1); } } else if (value < 256 || UTF) { @@ -15311,7 +15323,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, if (end == UV_MAX) { op = SANY; *flagp |= HASWIDTH|SIMPLE; - RExC_naughty++; + MARK_NAUGHTY(1); } else if (end == '\n' - 1 && invlist_iternext(cp_list, &start, &end) @@ -15319,7 +15331,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, { op = REG_ANY; *flagp |= HASWIDTH|SIMPLE; - RExC_naughty++; + MARK_NAUGHTY(1); } } invlist_iterfinish(cp_list); |