diff options
-rw-r--r-- | op_reg_common.h | 2 | ||||
-rw-r--r-- | pod/perlfunc.pod | 3 | ||||
-rw-r--r-- | pod/perllocale.pod | 2 | ||||
-rw-r--r-- | pod/perlop.pod | 17 | ||||
-rw-r--r-- | pod/perlre.pod | 22 | ||||
-rw-r--r-- | pod/perlreapi.pod | 2 | ||||
-rw-r--r-- | pod/perlreref.pod | 5 | ||||
-rw-r--r-- | pod/perlretut.pod | 6 | ||||
-rw-r--r-- | regcomp.c | 2 | ||||
-rw-r--r-- | regexec.c | 2 | ||||
-rw-r--r-- | universal.c | 2 |
11 files changed, 47 insertions, 18 deletions
diff --git a/op_reg_common.h b/op_reg_common.h index 3edd4d8fa2..956a5b8a23 100644 --- a/op_reg_common.h +++ b/op_reg_common.h @@ -23,7 +23,7 @@ #define RXf_PMf_STD_PMMOD_SHIFT 0 -/* The bits need to be ordered so that the msix are contiguous starting at bit +/* The bits need to be ordered so that the msixn are contiguous starting at bit * RXf_PMf_STD_PMMOD_SHIFT, followed by the p. See STD_PAT_MODS and * INT_PAT_MODS in regexp.h for the reason contiguity is needed */ /* Make sure to update lib/re.pm when changing these! */ diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index ce945eb050..9dc4cc6c60 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -7000,7 +7000,8 @@ uses empty string matches as separators to produce the output list of its component characters. As a special case for C<split>, the empty pattern given in -L<match operator|perlop/"m/PATTERN/msixpodualgc"> syntax (C<//>) specifically matches the empty string, which is contrary to its usual +L<match operator|perlop/"m/PATTERN/msixpodualngc"> syntax (C<//>) +specifically matches the empty string, which is contrary to its usual interpretation as the last successful match. If PATTERN is C</^/>, then it is treated as if it used the diff --git a/pod/perllocale.pod b/pod/perllocale.pod index 3b2d79dcc8..a44ffbc948 100644 --- a/pod/perllocale.pod +++ b/pod/perllocale.pod @@ -239,7 +239,7 @@ is. =item * Regular expression patterns can be compiled using -L<qrE<sol>E<sol>|perlop/qrE<sol>STRINGE<sol>msixpodual> with actual +L<qrE<sol>E<sol>|perlop/qrE<sol>STRINGE<sol>msixpodualn> with actual matching deferred to later. Again, it is whether or not the compilation was done within the scope of C<use locale> that determines the match behavior, not if the matches are done within such a scope or not. diff --git a/pod/perlop.pod b/pod/perlop.pod index dc6d2cfdc1..d4b0cd102b 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1629,14 +1629,14 @@ matching and related activities. =over 8 -=item qr/STRING/msixpodual +=item qr/STRING/msixpodualn X<qr> X</i> X</m> X</o> X</s> X</x> X</p> This operator quotes (and possibly compiles) its I<STRING> as a regular expression. I<STRING> is interpolated the same way as I<PATTERN> in C<m/PATTERN/>. If "'" is used as the delimiter, no interpolation is done. Returns a Perl value which may be used instead of the -corresponding C</STRING/msixpodual> expression. The returned value is a +corresponding C</STRING/msixpodualn> expression. The returned value is a normalized version of the original pattern. It magically differs from a string containing the same characters: C<ref(qr/x/)> returns "Regexp"; however, dereferencing it is not well defined (you currently get the @@ -1699,9 +1699,10 @@ Options (specified by the following modifiers) are: l Use the locale. u Use Unicode rules. d Use Unicode or native charset, as in 5.12 and earlier. + n Non-capture mode. Don't let () fill in $1, $2, etc... If a precompiled pattern is embedded in a larger pattern then the effect -of "msixpluad" will be propagated appropriately. The effect the "o" +of "msixpluadn" will be propagated appropriately. The effect the "o" modifier has is not propagated, being restricted to those patterns explicitly using it. @@ -1715,12 +1716,12 @@ for a detailed look at the semantics of regular expressions. In particular, all modifiers except the largely obsolete C</o> are further explained in L<perlre/Modifiers>. C</o> is described in the next section. -=item m/PATTERN/msixpodualgc +=item m/PATTERN/msixpodualngc X<m> X<operator, match> X<regexp, options> X<regexp> X<regex, options> X<regex> X</m> X</s> X</i> X</x> X</p> X</o> X</g> X</c> -=item /PATTERN/msixpodualgc +=item /PATTERN/msixpodualngc Searches a string for a pattern match, and in scalar context returns true if it succeeds, false if it fails. If no string is specified @@ -1973,10 +1974,10 @@ Here is the output (split into several lines): lowercase line-noise lowercase lowercase line-noise lowercase lowercase line-noise MiXeD line-noise. That's all! -=item m?PATTERN?msixpodualgc +=item m?PATTERN?msixpodualngc X<?> X<operator, match-once> -=item ?PATTERN?msixpodualgc +=item ?PATTERN?msixpodualngc This is just like the C<m/PATTERN/> search, except that it matches only once between calls to the reset() operator. This is a useful @@ -2005,7 +2006,7 @@ but the resulting C<?PATTERN?> syntax is deprecated, will warn on usage and might be removed from a future stable release of Perl (without further notice!). -=item s/PATTERN/REPLACEMENT/msixpodualgcer +=item s/PATTERN/REPLACEMENT/msixpodualngcer X<substitute> X<substitution> X<replace> X<regexp, replace> X<regexp, substitute> X</m> X</s> X</i> X</x> X</p> X</o> X</g> X</c> X</e> X</r> diff --git a/pod/perlre.pod b/pod/perlre.pod index f11e5ff268..ff8cb18789 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -102,6 +102,26 @@ These modifiers, all new in 5.14, affect which character-set rules (Unicode, etc.) are used, as described below in L</Character set modifiers>. +=item n +X</n> X<regex, non-capture> X<regexp, non-capture> +X<regular expression, non-capture> + +Prevent the grouping metacharacters C<()> from capturing. This modifier, +new in 5.22, will stop C<$1>, C<$2>, etc... from being filled in. + + "hello" =~ /(hi|hello)/; # $1 is "hello" + "hello" =~ /(hi|hello)/n; # $1 is undef + +This is equivalent to putting ?: at the beginning of every capturing group: + + "hello" =~ /(?:hi|hello)/; # $1 is undef + +C</n> can be negated on a per-group basis. Alternatively, named captures +may still be used. + + "hello" =~ /(?-n:(hi|hello))/n; # $1 is "hello" + "hello" =~ /(?<greet>hi|hello)/n; # $1 is "hello", $+{greet} is "hello" + =item Other Modifiers There are a number of flags that can be found at the end of regular @@ -117,7 +137,7 @@ L<perlretut/"Using regular expressions in Perl"> are: Substitution-specific modifiers described in -L<perlop/"s/PATTERN/REPLACEMENT/msixpodualgcer"> are: +L<perlop/"s/PATTERN/REPLACEMENT/msixpodualngcer"> are: e - evaluate the right-hand side as an expression ee - evaluate the right side as a string then eval the result diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod index 3e25626cc5..c11ff9e52b 100644 --- a/pod/perlreapi.pod +++ b/pod/perlreapi.pod @@ -97,7 +97,7 @@ stringify everything using the snippet above, but that doesn't mean other engines have to. The C<flags> parameter is a bitfield which indicates which of the -C<msixp> flags the regex was compiled with. It also contains +C<msixpn> flags the regex was compiled with. It also contains additional info, such as if C<use locale> is in effect. The C<eogc> flags are stripped out before being passed to the comp diff --git a/pod/perlreref.pod b/pod/perlreref.pod index 97d64cb5a0..7ae8f6cfdf 100644 --- a/pod/perlreref.pod +++ b/pod/perlreref.pod @@ -21,7 +21,7 @@ false if the match succeeds, and true if it fails. $var !~ /foo/; -C<m/pattern/msixpogcdual> searches a string for a pattern match, +C<m/pattern/msixpogcdualn> searches a string for a pattern match, applying the given options. m Multiline mode - ^ and $ match internal lines @@ -39,13 +39,14 @@ applying the given options. u match according to Unicode rules d match according to native rules unless something indicates Unicode + n Non-capture mode. Don't let () fill in $1, $2, etc... If 'pattern' is an empty string, the last I<successfully> matched regex is used. Delimiters other than '/' may be used for both this operator and the following ones. The leading C<m> can be omitted if the delimiter is '/'. -C<qr/pattern/msixpodual> lets you store a regex in a variable, +C<qr/pattern/msixpodualn> lets you store a regex in a variable, or pass one around. Modifiers as for C<m//>, and are stored within the regex. diff --git a/pod/perlretut.pod b/pod/perlretut.pod index 79400fc19d..957b29686d 100644 --- a/pod/perlretut.pod +++ b/pod/perlretut.pod @@ -958,6 +958,12 @@ required for some reason: @num = split /(a|b)+/, $x; # @num = ('12','a','34','a','5') @num = split /(?:a|b)+/, $x; # @num = ('12','34','5') +In Perl 5.22 and later, all groups within a regexp can be set to +non-capturing by using the new C</n> flag: + + "hello" =~ /(hi|hello)/n; # $1 is not set! + +See L<perlre/"n"> for more information. =head2 Matching repetitions @@ -6785,7 +6785,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, == REG_RUN_ON_COMMENT_SEEN); U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) >> RXf_PMf_STD_PMMOD_SHIFT); - const char *fptr = STD_PAT_MODS; /*"msix"*/ + const char *fptr = STD_PAT_MODS; /*"msixn"*/ char *p; /* Allocate for the worst case, which is all the std flags are turned * on. If more precision is desired, we could do a population count of @@ -5529,7 +5529,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) assert(!(scan->flags & ~RXf_PMf_COMPILETIME)); re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL, rex->engine, NULL, NULL, - /* copy /msix etc to inner pattern */ + /* copy /msixn etc to inner pattern */ ARG2L(scan), pm_flags); diff --git a/universal.c b/universal.c index 5d78fd6327..17ec475e26 100644 --- a/universal.c +++ b/universal.c @@ -966,7 +966,7 @@ XS(XS_re_regexp_pattern) XSRETURN(2); } else { /* Scalar, so use the string that Perl would return */ - /* return the pattern in (?msix:..) format */ + /* return the pattern in (?msixn:..) format */ #if PERL_VERSION >= 11 pattern = sv_2mortal(newSVsv(MUTABLE_SV(re))); #else |