diff options
-rw-r--r-- | pod/perlre.pod | 15 | ||||
-rw-r--r-- | regcomp.c | 18 | ||||
-rw-r--r-- | t/op/re_tests | 29 |
3 files changed, 60 insertions, 2 deletions
diff --git a/pod/perlre.pod b/pod/perlre.pod index f6fdc29eea..c72a71c0ac 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -277,6 +277,8 @@ C<)> in the comment. =item C<(?:pattern)> +=item C<(?imsx-imsx:pattern)> + This is for clustering, not capturing; it groups subexpressions like "()", but doesn't make backreferences as "()" does. So @@ -288,6 +290,15 @@ is like but doesn't spit out extra fields. +The letters between C<?> and C<:> act as flags modifiers, see +L<C<(?imsx-imsx)>>. In particular, + + /(?s-i:more.*than).*million/i + +is equivalent to more verbose + + /(?:(?s-i)more.*than).*million/i + =item C<(?=pattern)> A zero-width positive lookahead assertion. For example, C</\w+(?=\t)/> @@ -448,7 +459,7 @@ Say, matches a chunk of non-parentheses, possibly included in parentheses themselves. -=item C<(?imsx)> +=item C<(?imsx-imsx)> One or more embedded pattern-match modifiers. This is particularly useful for patterns that are specified in a table somewhere, some of @@ -464,6 +475,8 @@ pattern. For example: $pattern = "(?i)foobar"; if ( /$pattern/ ) { } +Letters after C<-> switch modifiers off. + These modifiers are localized inside an enclosing group (if any). Say, ( (?i) blah ) \s+ \1 @@ -982,6 +982,9 @@ reg(I32 paren, I32 *flagp) /* Make an OPEN node, if parenthesized. */ if (paren) { if (*regcomp_parse == '?') { + U16 posflags = 0, negflags = 0; + U16 *flagsp = &posflags; + regcomp_parse++; paren = *regcomp_parse++; ret = NULL; /* For look-ahead/behind. */ @@ -1117,11 +1120,24 @@ reg(I32 paren, I32 *flagp) break; default: --regcomp_parse; + parse_flags: while (*regcomp_parse && strchr("iogcmsx", *regcomp_parse)) { if (*regcomp_parse != 'o') - pmflag(®flags, *regcomp_parse); + pmflag(flagsp, *regcomp_parse); + ++regcomp_parse; + } + if (*regcomp_parse == '-') { + flagsp = &negflags; ++regcomp_parse; + goto parse_flags; } + regflags |= posflags; + regflags &= ~negflags; + if (*regcomp_parse == ':') { + regcomp_parse++; + paren = ':'; + break; + } unknown: if (*regcomp_parse != ')') FAIL2("Sequence (?%c...) not recognized", *regcomp_parse); diff --git a/t/op/re_tests b/t/op/re_tests index 78d89be11c..7ac20c3852 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -359,6 +359,35 @@ a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce ((?i)a)b Ab y $&:$1 Ab:A (?:(?i)a)b aB n - - ((?i)a)b aB n - - +(?i:a)b ab y $& ab +((?i:a))b ab y $&:$1 ab:a +(?i:a)b Ab y $& Ab +((?i:a))b Ab y $&:$1 Ab:A +(?i:a)b aB n - - +((?i:a))b aB n - - +'(?:(?-i)a)b'i ab y $& ab +'((?-i)a)b'i ab y $&:$1 ab:a +'(?:(?-i)a)b'i aB y $& aB +'((?-i)a)b'i aB y $&:$1 aB:a +'(?:(?-i)a)b'i Ab n - - +'((?-i)a)b'i Ab n - - +'(?:(?-i)a)b'i aB y $& aB +'((?-i)a)b'i aB y $1 a +'(?:(?-i)a)b'i AB n - - +'((?-i)a)b'i AB n - - +'(?-i:a)b'i ab y $& ab +'((?-i:a))b'i ab y $&:$1 ab:a +'(?-i:a)b'i aB y $& aB +'((?-i:a))b'i aB y $&:$1 aB:a +'(?-i:a)b'i Ab n - - +'((?-i:a))b'i Ab n - - +'(?-i:a)b'i aB y $& aB +'((?-i:a))b'i aB y $1 a +'(?-i:a)b'i AB n - - +'((?-i:a))b'i AB n - - +'((?-i:a.))b'i a\nB n - - +'((?s-i:a.))b'i a\nB y $1 a\n +'((?s-i:a.))b'i B\nB n - - (?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b))) cabbbb y $& cabbbb (?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb))) caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb y $& caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb '(ab)\d\1'i Ab4ab y $1 Ab |