summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pod/perlre.pod15
-rw-r--r--regcomp.c18
-rw-r--r--t/op/re_tests29
3 files changed, 60 insertions, 2 deletions
diff --git a/pod/perlre.pod b/pod/perlre.pod
index f6fdc29eea..c72a71c0ac 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -277,6 +277,8 @@ C<)> in the comment.
=item C<(?:pattern)>
+=item C<(?imsx-imsx:pattern)>
+
This is for clustering, not capturing; it groups subexpressions like
"()", but doesn't make backreferences as "()" does. So
@@ -288,6 +290,15 @@ is like
but doesn't spit out extra fields.
+The letters between C<?> and C<:> act as flags modifiers, see
+L<C<(?imsx-imsx)>>. In particular,
+
+ /(?s-i:more.*than).*million/i
+
+is equivalent to more verbose
+
+ /(?:(?s-i)more.*than).*million/i
+
=item C<(?=pattern)>
A zero-width positive lookahead assertion. For example, C</\w+(?=\t)/>
@@ -448,7 +459,7 @@ Say,
matches a chunk of non-parentheses, possibly included in parentheses
themselves.
-=item C<(?imsx)>
+=item C<(?imsx-imsx)>
One or more embedded pattern-match modifiers. This is particularly
useful for patterns that are specified in a table somewhere, some of
@@ -464,6 +475,8 @@ pattern. For example:
$pattern = "(?i)foobar";
if ( /$pattern/ ) { }
+Letters after C<-> switch modifiers off.
+
These modifiers are localized inside an enclosing group (if any). Say,
( (?i) blah ) \s+ \1
diff --git a/regcomp.c b/regcomp.c
index 6292466929..663933da64 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -982,6 +982,9 @@ reg(I32 paren, I32 *flagp)
/* Make an OPEN node, if parenthesized. */
if (paren) {
if (*regcomp_parse == '?') {
+ U16 posflags = 0, negflags = 0;
+ U16 *flagsp = &posflags;
+
regcomp_parse++;
paren = *regcomp_parse++;
ret = NULL; /* For look-ahead/behind. */
@@ -1117,11 +1120,24 @@ reg(I32 paren, I32 *flagp)
break;
default:
--regcomp_parse;
+ parse_flags:
while (*regcomp_parse && strchr("iogcmsx", *regcomp_parse)) {
if (*regcomp_parse != 'o')
- pmflag(&regflags, *regcomp_parse);
+ pmflag(flagsp, *regcomp_parse);
+ ++regcomp_parse;
+ }
+ if (*regcomp_parse == '-') {
+ flagsp = &negflags;
++regcomp_parse;
+ goto parse_flags;
}
+ regflags |= posflags;
+ regflags &= ~negflags;
+ if (*regcomp_parse == ':') {
+ regcomp_parse++;
+ paren = ':';
+ break;
+ }
unknown:
if (*regcomp_parse != ')')
FAIL2("Sequence (?%c...) not recognized", *regcomp_parse);
diff --git a/t/op/re_tests b/t/op/re_tests
index 78d89be11c..7ac20c3852 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -359,6 +359,35 @@ a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce
((?i)a)b Ab y $&:$1 Ab:A
(?:(?i)a)b aB n - -
((?i)a)b aB n - -
+(?i:a)b ab y $& ab
+((?i:a))b ab y $&:$1 ab:a
+(?i:a)b Ab y $& Ab
+((?i:a))b Ab y $&:$1 Ab:A
+(?i:a)b aB n - -
+((?i:a))b aB n - -
+'(?:(?-i)a)b'i ab y $& ab
+'((?-i)a)b'i ab y $&:$1 ab:a
+'(?:(?-i)a)b'i aB y $& aB
+'((?-i)a)b'i aB y $&:$1 aB:a
+'(?:(?-i)a)b'i Ab n - -
+'((?-i)a)b'i Ab n - -
+'(?:(?-i)a)b'i aB y $& aB
+'((?-i)a)b'i aB y $1 a
+'(?:(?-i)a)b'i AB n - -
+'((?-i)a)b'i AB n - -
+'(?-i:a)b'i ab y $& ab
+'((?-i:a))b'i ab y $&:$1 ab:a
+'(?-i:a)b'i aB y $& aB
+'((?-i:a))b'i aB y $&:$1 aB:a
+'(?-i:a)b'i Ab n - -
+'((?-i:a))b'i Ab n - -
+'(?-i:a)b'i aB y $& aB
+'((?-i:a))b'i aB y $1 a
+'(?-i:a)b'i AB n - -
+'((?-i:a))b'i AB n - -
+'((?-i:a.))b'i a\nB n - -
+'((?s-i:a.))b'i a\nB y $1 a\n
+'((?s-i:a.))b'i B\nB n - -
(?:c|d)(?:)(?:a(?:)(?:b)(?:b(?:))(?:b(?:)(?:b))) cabbbb y $& cabbbb
(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb))) caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb y $& caaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
'(ab)\d\1'i Ab4ab y $1 Ab