diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2017-04-18 12:32:52 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2017-04-18 12:32:52 +0000 |
commit | 7a654c9328e342a531e068ca9e236e8e794e9534 (patch) | |
tree | 57d59915163e03bef30bf8cc03b2f3be0a26c291 /src/pcre2_compile.c | |
parent | f2c52afa4e7625b8680b5858f7ea8f007856c336 (diff) | |
download | pcre2-7a654c9328e342a531e068ca9e236e8e794e9534.tar.gz |
Implement PCRE2_EXTENDED_MORE and friends.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@758 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_compile.c')
-rw-r--r-- | src/pcre2_compile.c | 69 |
1 files changed, 47 insertions, 22 deletions
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index b4d7608..aa682d1 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -160,7 +160,7 @@ the length of compiled items varies with this. In the real compile phase, this workspace is not currently used. */ -#define COMPILE_WORK_SIZE (2048*LINK_SIZE) /* Size in code units */ +#define COMPILE_WORK_SIZE (3000*LINK_SIZE) /* Size in code units */ #define C16_WORK_SIZE \ ((COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint16_t)) @@ -695,7 +695,8 @@ static int posix_substitutes[] = { #define PUBLIC_COMPILE_OPTIONS \ (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ - PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ + PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED| \ + PCRE2_EXTENDED_MORE|PCRE2_FIRSTLINE| \ PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \ PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ @@ -2226,12 +2227,17 @@ typedef struct nest_save { uint16_t reset_group; uint16_t max_group; uint16_t flags; + uint32_t options; } nest_save; -#define NSF_RESET 0x0001u -#define NSF_EXTENDED 0x0002u -#define NSF_DUPNAMES 0x0004u -#define NSF_CONDASSERT 0x0008u +#define NSF_RESET 0x0001u +#define NSF_CONDASSERT 0x0002u + +/* These options (changeable within the pattern) are tracked during parsing. +The rest are put into META_OPTIONS items and used when compiling. */ + +#define PARSE_TRACKED_OPTIONS \ + (PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_DUPNAMES) /* States used for analyzing ranges in character classes. The two OK values must be last. */ @@ -2291,6 +2297,10 @@ creating a nest_save that spans the end of the workspace. */ end_nests = (nest_save *)((char *)end_nests - ((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save))); + +/* PCRE2_EXTENDED_MORE implies PCRE2_EXTENDED */ + +if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED; /* Now scan the pattern */ @@ -2907,7 +2917,8 @@ while (ptr < ptrend) /* Process a regular character class. If the first character is '^', set the negation flag. If the first few characters (either before or after ^) - are \Q\E or \E we skip them too. This makes for compatibility with Perl. */ + are \Q\E or \E or space or tab in extended-more mode, we skip them too. + This makes for compatibility with Perl. */ negate_class = FALSE; while (ptr < ptrend) @@ -2922,6 +2933,9 @@ while (ptr < ptrend) else break; } + else if ((options & PCRE2_EXTENDED_MORE) != 0 && + (c == CHAR_SPACE || c == CHAR_HT)) /* Note: just these two */ + continue; else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) negate_class = TRUE; else break; @@ -2955,7 +2969,7 @@ while (ptr < ptrend) for (;;) { BOOL char_is_literal = TRUE; - + /* Inside \Q...\E everything is literal except \E */ if (inescq) @@ -2968,6 +2982,12 @@ while (ptr < ptrend) } goto CLASS_LITERAL; } + + /* Skip over space and tab (only) in extended-more mode. */ + + if ((options & PCRE2_EXTENDED_MORE) != 0 && + (c == CHAR_SPACE || c == CHAR_HT)) + goto CLASS_CONTINUE; /* Handle POSIX class names. Perl allows a negation extension of the form [:^name:]. A square bracket that doesn't match the syntax is @@ -3387,8 +3407,7 @@ while (ptr < ptrend) } top_nest->nest_depth = nest_depth; top_nest->flags = 0; - if ((options & PCRE2_EXTENDED) != 0) top_nest->flags |= NSF_EXTENDED; - if ((options & PCRE2_DUPNAMES) != 0) top_nest->flags |= NSF_DUPNAMES; + top_nest->options = options & PARSE_TRACKED_OPTIONS; /* Start of non-capturing group that resets the capture count for each branch. */ @@ -3403,9 +3422,9 @@ while (ptr < ptrend) ptr++; } - /* Scan for options imsxJU. We need to keep track of (?x) and (?J) for - use while scanning. The other options are used during the compiling - phases. */ + /* Scan for options imsxJU. Some of them are tracked during parsing (see + PARSE_TRACKED_OPTIONS) as they are local to groups. Others are not needed + till compile time. */ else { @@ -3429,8 +3448,14 @@ while (ptr < ptrend) case CHAR_i: *optset |= PCRE2_CASELESS; break; case CHAR_m: *optset |= PCRE2_MULTILINE; break; case CHAR_s: *optset |= PCRE2_DOTALL; break; - case CHAR_x: *optset |= PCRE2_EXTENDED; break; case CHAR_U: *optset |= PCRE2_UNGREEDY; break; + + /* If x appears twice it sets the extended extended option. */ + + case CHAR_x: + *optset |= ((*optset & PCRE2_EXTENDED) != 0)? + PCRE2_EXTENDED_MORE : PCRE2_EXTENDED; + break; default: errorcode = ERR11; @@ -3439,6 +3464,10 @@ while (ptr < ptrend) } } options = (options | set) & (~unset); + + /* Unsetting extended should also get rid of extended-more. */ + + if ((options & PCRE2_EXTENDED) == 0) options &= ~PCRE2_EXTENDED_MORE; /* If the options ended with ')' this is not the start of a nested group with option changes, so the options change at this level. @@ -3916,8 +3945,7 @@ while (ptr < ptrend) } top_nest->nest_depth = nest_depth; top_nest->flags = NSF_CONDASSERT; - if ((options & PCRE2_EXTENDED) != 0) top_nest->flags |= NSF_EXTENDED; - if ((options & PCRE2_DUPNAMES) != 0) top_nest->flags |= NSF_DUPNAMES; + top_nest->options = options & PARSE_TRACKED_OPTIONS; } break; @@ -4038,20 +4066,17 @@ while (ptr < ptrend) break; /* End of group; reset the capture count to the maximum if we are in a (?| - group and/or reset the extended and dupnames options. Disallow quantifier - for a condition that is an assertion. */ + group and/or reset the options that are tracked during parsing. Disallow + quantifier for a condition that is an assertion. */ case CHAR_RIGHT_PARENTHESIS: okquantifier = TRUE; if (top_nest != NULL && top_nest->nest_depth == nest_depth) { + options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options; if ((top_nest->flags & NSF_RESET) != 0 && top_nest->max_group > cb->bracount) cb->bracount = top_nest->max_group; - if ((top_nest->flags & NSF_EXTENDED) != 0) options |= PCRE2_EXTENDED; - else options &= ~PCRE2_EXTENDED; - if ((top_nest->flags & NSF_DUPNAMES) != 0) options |= PCRE2_DUPNAMES; - else options &= ~PCRE2_DUPNAMES; if ((top_nest->flags & NSF_CONDASSERT) != 0) okquantifier = FALSE; if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL; |