diff options
author | zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> | 2017-05-24 10:14:43 +0000 |
---|---|---|
committer | zherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069> | 2017-05-24 10:14:43 +0000 |
commit | 292895b249c9e24dd537f17b7859f9a77c5faef8 (patch) | |
tree | 8aeb2598d9025da6ce1ecb175afc983b274037d5 /src/pcre2_convert.c | |
parent | 5e7da1ba26b69e72ec94fda8a58515a8b8d110e8 (diff) | |
download | pcre2-292895b249c9e24dd537f17b7859f9a77c5faef8.tar.gz |
Rework character range parsing in glob conversion.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@802 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_convert.c')
-rw-r--r-- | src/pcre2_convert.c | 252 |
1 files changed, 150 insertions, 102 deletions
diff --git a/src/pcre2_convert.c b/src/pcre2_convert.c index 0cf74a8..1d055f8 100644 --- a/src/pcre2_convert.c +++ b/src/pcre2_convert.c @@ -423,6 +423,47 @@ out->output_size = output_size; } +/* Prints the separator into the output. + +Arguments: + out output context + separator glob separator + with_escape backslash is needed before separator +*/ + +static void +convert_glob_print_separator(pcre2_output_context *out, + PCRE2_UCHAR separator, BOOL with_escape) +{ +if (with_escape) + convert_glob_write(out, CHAR_BACKSLASH); + +convert_glob_write(out, separator); +} + + +/* Prints a wildcard into the output. + +Arguments: + out output context + separator glob separator + with_escape backslash is needed before separator +*/ + +static void +convert_glob_print_wildcard(pcre2_output_context *out, + PCRE2_UCHAR separator, BOOL with_escape) +{ +out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; +out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; +convert_glob_write_str(out, 2); + +convert_glob_print_separator(out, separator, with_escape); + +convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET); +} + + /* Parse a posix class. Arguments: @@ -519,77 +560,89 @@ Returns: 0 => success static int convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end, - pcre2_output_context *out, PCRE2_UCHAR separator, BOOL with_escape) + pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator, + BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep) { +BOOL is_negative = FALSE; +BOOL separator_seen = FALSE; +BOOL has_prev_c; PCRE2_SPTR pattern = *from; -PCRE2_UCHAR c; +PCRE2_SPTR char_start = NULL; +uint32_t c, prev_c; int result, len; +(void)utf; /* Avoid compiler warning. */ + if (pattern >= pattern_end) { *from = pattern; return ERROR_MISSING_SQUARE_BRACKET; } -c = *pattern; - -if (c == CHAR_EXCLAMATION_MARK - || c == CHAR_CIRCUMFLEX_ACCENT) - { - out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; - out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; - len = 2; - } -else - { - out->out_str[0] = CHAR_LEFT_PARENTHESIS; - out->out_str[1] = CHAR_QUESTION_MARK; - out->out_str[2] = CHAR_EXCLAMATION_MARK; - len = 3; - } - -if (with_escape) - { - out->out_str[len] = CHAR_BACKSLASH; - len++; - } - -out->out_str[len] = (uint8_t) separator; - -convert_glob_write_str(out, len + 1); - -if (c == CHAR_EXCLAMATION_MARK - || c == CHAR_CIRCUMFLEX_ACCENT) +if (*pattern == CHAR_EXCLAMATION_MARK + || *pattern == CHAR_CIRCUMFLEX_ACCENT) { pattern++; + if (pattern >= pattern_end) { *from = pattern; return ERROR_MISSING_SQUARE_BRACKET; } - c = *pattern; + + is_negative = TRUE; + + out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; + out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; + len = 2; + + if (!no_wildsep) + { + if (with_escape) + { + out->out_str[len] = CHAR_BACKSLASH; + len++; + } + out->out_str[len] = (uint8_t) separator; + } + + convert_glob_write_str(out, len + 1); } else - { - out->out_str[0] = CHAR_RIGHT_PARENTHESIS; - out->out_str[1] = CHAR_LEFT_SQUARE_BRACKET; - convert_glob_write_str(out, 2); - } + convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET); + +has_prev_c = FALSE; +prev_c = 0; -if (c == CHAR_MINUS || c == CHAR_RIGHT_SQUARE_BRACKET) +if (*pattern == CHAR_RIGHT_SQUARE_BRACKET) { - convert_glob_write(out, CHAR_BACKSLASH); - convert_glob_write(out, c); + convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET); + has_prev_c = TRUE; + prev_c = CHAR_RIGHT_SQUARE_BRACKET; pattern++; } while (pattern < pattern_end) { - c = *pattern++; + char_start = pattern; + GETCHARINCTEST(c, pattern); if (c == CHAR_RIGHT_SQUARE_BRACKET) { convert_glob_write(out, c); + + if (!is_negative && !no_wildsep && separator_seen) + { + out->out_str[0] = CHAR_LEFT_PARENTHESIS; + out->out_str[1] = CHAR_QUESTION_MARK; + out->out_str[2] = CHAR_LESS_THAN_SIGN; + out->out_str[3] = CHAR_EXCLAMATION_MARK; + convert_glob_write_str(out, 4); + + convert_glob_print_separator(out, separator, with_escape); + convert_glob_write(out, CHAR_RIGHT_PARENTHESIS); + } + *from = pattern; return 0; } @@ -605,32 +658,64 @@ while (pattern < pattern_end) pattern = *from; - /* A dash after a character class is a normal character. */ - if (pattern >= pattern_end || *pattern != CHAR_MINUS) - continue; - - c = CHAR_MINUS; - pattern++; + has_prev_c = FALSE; + prev_c = 0; + separator_seen = TRUE; + continue; } - else if (c == CHAR_MINUS) + else if (c == CHAR_MINUS && has_prev_c && + *pattern != CHAR_RIGHT_SQUARE_BRACKET) { convert_glob_write(out, CHAR_MINUS); - c = *pattern++; - if (c == CHAR_BACKSLASH) + char_start = pattern; + GETCHARINCTEST(c, pattern); + + if (pattern >= pattern_end) break; + + if (escape != 0 && c == escape) + { + char_start = pattern; + GETCHARINCTEST(c, pattern); + } + else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON) + { + *from = pattern; + return PCRE2_ERROR_CONVERT_SYNTAX; + } + + if (prev_c > c) { + *from = pattern; + return PCRE2_ERROR_CONVERT_SYNTAX; + } + + if (prev_c < separator && separator < c) separator_seen = TRUE; + + has_prev_c = FALSE; + prev_c = 0; + } + else + { + if (escape != 0 && c == escape) + { + char_start = pattern; + GETCHARINCTEST(c, pattern); + if (pattern >= pattern_end) break; - c = *pattern++; } + + has_prev_c = TRUE; + prev_c = c; } - else if (c == CHAR_BACKSLASH) - c = *pattern++; if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET || c == CHAR_BACKSLASH || c == CHAR_MINUS) convert_glob_write(out, CHAR_BACKSLASH); - convert_glob_write(out, c); + if (c == separator) separator_seen = TRUE; + + do convert_glob_write(out, *char_start++); while (char_start < pattern); } *from = pattern; @@ -638,47 +723,6 @@ return ERROR_MISSING_SQUARE_BRACKET; } -/* Prints the separator into the output. - -Arguments: - out output context - separator glob separator - with_escape backslash is needed before separator -*/ - -static void -convert_glob_print_separator(pcre2_output_context *out, - PCRE2_UCHAR separator, BOOL with_escape) -{ -if (with_escape) - convert_glob_write(out, CHAR_BACKSLASH); - -convert_glob_write(out, separator); -} - - -/* Prints a wildcard into the output. - -Arguments: - out output context - separator glob separator - with_escape backslash is needed before separator -*/ - -static void -convert_glob_print_wildcard(pcre2_output_context *out, - PCRE2_UCHAR separator, BOOL with_escape) -{ -out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET; -out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT; -convert_glob_write_str(out, 2); - -convert_glob_print_separator(out, separator, with_escape); - -convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET); -} - - /* Prints a (*COMMIT) into the output. Arguments: @@ -727,8 +771,8 @@ pcre2_output_context out; PCRE2_SPTR pattern_start = pattern; PCRE2_SPTR pattern_end = pattern + plength; PCRE2_UCHAR separator = ccontext->glob_separator; +PCRE2_UCHAR escape = ccontext->glob_escape; PCRE2_UCHAR c; -BOOL no_escape = ccontext->glob_escape == 0; BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0; BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0; BOOL in_atomic = FALSE; @@ -736,14 +780,16 @@ BOOL after_starstar = FALSE; BOOL with_escape, is_start; int result, len; -(void)utf; /* Avoid compiler warning */ +(void)utf; /* Avoid compiler warning. */ -if (separator >= 128) +#ifdef SUPPORT_UNICODE +if (utf && (separator >= 128 || escape >= 128)) { - /* Currently only ASCII separators are supported. */ + /* Currently only ASCII characters are supported. */ *bufflenptr = 0; return PCRE2_ERROR_CONVERT_SYNTAX; } +#endif with_escape = strchr(pcre2_escaped_literals, separator) != NULL; @@ -809,7 +855,7 @@ while (pattern < pattern_end) break; } - if (!no_escape && *pattern == ccontext->glob_escape) + if (escape != 0 && *pattern == escape) { pattern++; if (pattern >= pattern_end) @@ -908,6 +954,8 @@ while (pattern < pattern_end) out.out_str[0] = CHAR_ASTERISK; out.out_str[1] = CHAR_QUESTION_MARK; + if (pattern >= pattern_end) + out.out_str[1] = CHAR_PLUS; convert_glob_write_str(&out, 2); continue; } @@ -924,12 +972,12 @@ while (pattern < pattern_end) if (c == CHAR_LEFT_SQUARE_BRACKET) { result = convert_glob_parse_range(&pattern, pattern_end, - &out, separator, with_escape); + &out, utf, separator, with_escape, escape, no_wildsep); if (result != 0) break; continue; } - if (!no_escape && c == ccontext->glob_escape) + if (escape != 0 && c == escape) { if (pattern >= pattern_end) { |