diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-04-10 16:21:55 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-04-10 16:36:24 -0600 |
commit | f6a766d53013594a5dc80a234f754c6b75c3b724 (patch) | |
tree | c7b1063fcaf22d870beaff1c5babc14017b73d55 /regcomp.c | |
parent | ff3f26d2308508ca78054f72cc6eb2819386898d (diff) | |
download | perl-f6a766d53013594a5dc80a234f754c6b75c3b724.tar.gz |
PATCH: partial [perl #86972]: Allow /(?aia)/
This allows a second regex 'a' modifier in the infix form to not have to
be contiguous with the first, and improves the message if there are extra
modifiers.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 48 |
1 files changed, 35 insertions, 13 deletions
@@ -7070,7 +7070,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) { U32 posflags = 0, negflags = 0; U32 *flagsp = &posflags; - bool has_charset_modifier = 0; + char has_charset_modifier = '\0'; regex_charset cs = (RExC_utf8 || RExC_uni_semantics) ? REGEX_UNICODE_CHARSET : REGEX_DEPENDS_CHARSET; @@ -7082,40 +7082,50 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) switch (*RExC_parse) { CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp); case LOCALE_PAT_MOD: - if (has_charset_modifier || flagsp == &negflags) { + if (has_charset_modifier) { + goto excess_modifier; + } + else if (flagsp == &negflags) { goto fail_modifiers; } cs = REGEX_LOCALE_CHARSET; - has_charset_modifier = 1; + has_charset_modifier = LOCALE_PAT_MOD; RExC_contains_locale = 1; break; case UNICODE_PAT_MOD: - if (has_charset_modifier || flagsp == &negflags) { + if (has_charset_modifier) { + goto excess_modifier; + } + else if (flagsp == &negflags) { goto fail_modifiers; } cs = REGEX_UNICODE_CHARSET; - has_charset_modifier = 1; + has_charset_modifier = UNICODE_PAT_MOD; break; case ASCII_RESTRICT_PAT_MOD: - if (has_charset_modifier || flagsp == &negflags) { + if (flagsp == &negflags) { goto fail_modifiers; } - if (*(RExC_parse + 1) == ASCII_RESTRICT_PAT_MOD) { + if (has_charset_modifier) { + if (cs != REGEX_ASCII_RESTRICTED_CHARSET) { + goto excess_modifier; + } /* Doubled modifier implies more restricted */ - cs = REGEX_ASCII_MORE_RESTRICTED_CHARSET; - RExC_parse++; - } + cs = REGEX_ASCII_MORE_RESTRICTED_CHARSET; + } else { cs = REGEX_ASCII_RESTRICTED_CHARSET; } - has_charset_modifier = 1; + has_charset_modifier = ASCII_RESTRICT_PAT_MOD; break; case DEPENDS_PAT_MOD: if (has_use_defaults - || has_charset_modifier || flagsp == &negflags) { goto fail_modifiers; + } + else if (has_charset_modifier) { + goto excess_modifier; } /* The dual charset means unicode semantics if the @@ -7125,8 +7135,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) cs = (RExC_utf8 || RExC_uni_semantics) ? REGEX_UNICODE_CHARSET : REGEX_DEPENDS_CHARSET; - has_charset_modifier = 1; + has_charset_modifier = DEPENDS_PAT_MOD; break; + excess_modifier: + RExC_parse++; + if (has_charset_modifier == ASCII_RESTRICT_PAT_MOD) { + vFAIL2("Regexp modifier \"/%c\" may appear a maximum of twice", ASCII_RESTRICT_PAT_MOD); + } + else if (has_charset_modifier == *(RExC_parse - 1)) { + vFAIL2("Regexp modifier \"/%c\" may not appear twice", *(RExC_parse - 1)); + } + else { + vFAIL3("Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", has_charset_modifier, *(RExC_parse - 1)); + } + /*NOTREACHED*/ case ONCE_PAT_MOD: /* 'o' */ case GLOBAL_PAT_MOD: /* 'g' */ if (SIZE_ONLY && ckWARN(WARN_REGEXP)) { |