diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2019-06-10 16:41:22 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2019-06-10 16:41:22 +0000 |
commit | 223899c5055f2f9def65be3132a11e6f2fbd3776 (patch) | |
tree | d6c714de7faea0358f77f1fe289dc3b4a91219a4 /src/pcre2_compile.c | |
parent | ab143663e4004f483db857196dd4e9ea44360fe3 (diff) | |
download | pcre2-223899c5055f2f9def65be3132a11e6f2fbd3776.tar.gz |
Allow (*ACCEPT) to be quantified.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1101 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_compile.c')
-rw-r--r-- | src/pcre2_compile.c | 44 |
1 files changed, 33 insertions, 11 deletions
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index cf24101..739c919 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1419,9 +1419,6 @@ the result is "not a repeat quantifier". */ EXIT: if (yield || *errorcodeptr != 0) *ptrptr = p; return yield; - - - } @@ -2450,8 +2447,9 @@ must be last. */ enum { RANGE_NO, RANGE_STARTED, RANGE_OK_ESCAPED, RANGE_OK_LITERAL }; -/* Only in 32-bit mode can there be literals > META_END. A macros encapsulates -the storing of literal values in the parsed pattern. */ +/* Only in 32-bit mode can there be literals > META_END. A macro encapsulates +the storing of literal values in the main parsed pattern, where they can always +be quantified. */ #if PCRE2_CODE_UNIT_WIDTH == 32 #define PARSED_LITERAL(c, p) \ @@ -2474,6 +2472,7 @@ uint32_t delimiter; uint32_t namelen; uint32_t class_range_state; uint32_t *verblengthptr = NULL; /* Value avoids compiler warning */ +uint32_t *verbstartptr = NULL; uint32_t *previous_callout = NULL; uint32_t *parsed_pattern = cb->parsed_pattern; uint32_t *parsed_pattern_end = cb->parsed_pattern_end; @@ -2640,13 +2639,15 @@ while (ptr < ptrend) switch(c) { - default: - PARSED_LITERAL(c, parsed_pattern); + default: /* Don't use PARSED_LITERAL() because it */ +#if PCRE2_CODE_UNIT_WIDTH == 32 /* sets okquantifier. */ + if (c >= META_END) *parsed_pattern++ = META_BIGVALUE; +#endif + *parsed_pattern++ = c; break; - + case CHAR_RIGHT_PARENTHESIS: inverbname = FALSE; - okquantifier = FALSE; /* Was probably set by literals */ /* This is the length in characters */ verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1); /* But the limit on the length is in code units */ @@ -3135,6 +3136,21 @@ while (ptr < ptrend) goto FAILED_BACK; } + /* Most (*VERB)s are not allowed to be quantified, but an ungreedy + quantifier can be useful for (*ACCEPT) - meaning "succeed on backtrack", a + sort of negated (*COMMIT). We therefore allow (*ACCEPT) to be quantified by + wrapping it in non-capturing brackets, but we have to allow for a preceding + (*MARK) for when (*ACCEPT) has an argument. */ + + if (parsed_pattern[-1] == META_ACCEPT) + { + uint32_t *p; + for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0]; + *verbstartptr = META_NOCAPTURE; + parsed_pattern[1] = META_KET; + parsed_pattern += 2; + } + /* Now we can put the quantifier into the parsed pattern vector. At this stage, we have only the basic quantifier. The check for a following + or ? modifier happens at the top of the loop, after any intervening comments @@ -3775,6 +3791,12 @@ while (ptr < ptrend) goto FAILED; } + /* Remember where this verb, possibly with a preceding (*MARK), starts, + for handling quantified (*ACCEPT). */ + + verbstartptr = parsed_pattern; + okquantifier = (verbs[i].meta == META_ACCEPT); + /* It appears that Perl allows any characters whatsoever, other than a closing parenthesis, to appear in arguments ("names"), so we no longer insist on letters, digits, and underscores. Perl does not, however, do @@ -9503,10 +9525,10 @@ if (pattern == NULL) if (ccontext == NULL) ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context)); - + /* PCRE2_MATCH_INVALID_UTF implies UTF */ -if ((options & PCRE2_MATCH_INVALID_UTF) != 0) options |= PCRE2_UTF; +if ((options & PCRE2_MATCH_INVALID_UTF) != 0) options |= PCRE2_UTF; /* Check that all undefined public option bits are zero. */ |