summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2010-10-18 17:59:50 -0700
committerFather Chrysostomos <sprout@cpan.org>2010-10-21 05:53:40 -0700
commit1e2159890b8bf881fbc717f671f87ba2dec1da46 (patch)
tree13eb0b5586141bec3d77958ac79459837b8d74be
parenta7aaec61655ef1580eb319cf234db0f3d5c9981e (diff)
downloadperl-1e2159890b8bf881fbc717f671f87ba2dec1da46.tar.gz
[perl #78072] use re '/xism';
-rw-r--r--MANIFEST1
-rw-r--r--ext/re/re.pm82
-rw-r--r--ext/re/t/reflags.t116
-rw-r--r--op.c13
-rw-r--r--op_reg_common.h2
-rw-r--r--perl.h2
6 files changed, 216 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
index cfe75a635c..237cb5ac69 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3312,6 +3312,7 @@ ext/re/re.xs re extension external subroutines
ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug'
ext/re/t/lexical_debug.t test that lexical re 'debug' works
ext/re/t/qr.t test that qr// is a Regexp
+ext/re/t/reflags.t see if re '/xism' pragma works
ext/re/t/re_funcs.t See if exportable 're' funcs in re.xs work
ext/re/t/regop.pl generate debug output for various patterns
ext/re/t/regop.t test RE optimizations by scraping debug output
diff --git a/ext/re/re.pm b/ext/re/re.pm
index 90e31f3ff9..881323222d 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -16,6 +16,20 @@ my %bitmask = (
eval => 0x00200000, # HINT_RE_EVAL
);
+my $flags_hint = 0x02000000; # HINT_RE_FLAGS
+my $PMMOD_SHIFT = 0;
+my %reflags = (
+ m => 1 << ($PMMOD_SHIFT + 0),
+ s => 1 << ($PMMOD_SHIFT + 1),
+ i => 1 << ($PMMOD_SHIFT + 2),
+ x => 1 << ($PMMOD_SHIFT + 3),
+ p => 1 << ($PMMOD_SHIFT + 4),
+# special cases:
+ l => 1 << ($PMMOD_SHIFT + 5),
+ u => 1 << ($PMMOD_SHIFT + 6),
+ d => 0,
+);
+
sub setcolor {
eval { # Ignore errors
require Term::Cap;
@@ -96,6 +110,7 @@ sub bits {
require Carp;
Carp::carp("Useless use of \"re\" pragma");
}
+ ARG:
foreach my $idx (0..$#_){
my $s=$_[$idx];
if ($s eq 'Debug' or $s eq 'Debugcolor') {
@@ -125,6 +140,33 @@ sub bits {
} elsif ($EXPORT_OK{$s}) {
require Exporter;
re->export_to_level(2, 're', $s);
+ } elsif ($s =~ s/^\///) {
+ my $reflags = $^H{reflags} || 0;
+ for(split//, $s) {
+ if (/[dul]/) {
+ if ($on) {
+ $^H{reflags_dul} = $reflags{$_};
+ }
+ else {
+ delete $^H{reflags_dul}
+ if defined $^H{reflags_dul}
+ && $^H{reflags_dul} == $reflags{$_};
+ }
+ } elsif (exists $reflags{$_}) {
+ $on
+ ? $reflags |= $reflags{$_}
+ : ($reflags &= ~$reflags{$_});
+ } else {
+ require Carp;
+ Carp::carp(
+ qq'Unknown regular expression flag "$_"'
+ );
+ next ARG;
+ }
+ }
+ ($^H{reflags} = $reflags or defined $^H{reflags_dul})
+ ? $^H |= $flags_hint
+ : ($^H &= ~$flags_hint);
} else {
require Carp;
Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ",
@@ -170,6 +212,11 @@ re - Perl pragma to alter regular expression behaviour
/foo${pat}bar/; # disallowed (with or without -T switch)
}
+ use re '/ix';
+ "FOO" =~ / foo /; # /ix implied
+ no re '/x';
+ "FOO" =~ /foo/; # just /i implied
+
use re 'debug'; # output debugging info during
/^(.*)$/s; # compile and run time
@@ -220,6 +267,41 @@ interpolation. Thus:
I<is> allowed if $pat is a precompiled regular expression, even
if $pat contains C<(?{ ... })> assertions or C<(??{ ... })> subexpressions.
+=head2 '/flags' mode
+
+When C<use re '/flags'> is specified, the given flags are automatically
+added to every regular expression till the end of the lexical scope.
+
+C<no re '/flags'> will turn off the effect of C<use re '/flags'> for the
+given flags.
+
+For example, if you want all your regular expressions to have /msx on by
+default, simply put
+
+ use re '/msx';
+
+at the top of your code.
+
+The /dul flags cancel each other out. So, in this example,
+
+ use re "/u";
+ "ss" =~ /\xdf/;
+ use re "/d";
+ "ss" =~ /\xdf/;
+
+The second C<use re> does an implicit C<no re '/u'>.
+
+Turning on the /l and /u flags with C<use re> takes precedence over the
+C<locale> pragma and the 'unicode_strings' C<feature>, for regular
+expressions. Turning off one of these flags when it is active reverts to
+the behaviour specified by whatever other pragmata are in scope. For
+example:
+
+ use feature "unicode_strings";
+ no re "/u"; # does nothing
+ use re "/l";
+ no re "/l"; # reverts to unicode_strings behaviour
+
=head2 'debug' mode
When C<use re 'debug'> is in effect, perl emits debugging messages when
diff --git a/ext/re/t/reflags.t b/ext/re/t/reflags.t
new file mode 100644
index 0000000000..26e8f05235
--- /dev/null
+++ b/ext/re/t/reflags.t
@@ -0,0 +1,116 @@
+#!./perl
+
+BEGIN {
+ require Config;
+ if (($Config::Config{'extensions'} !~ /\bre\b/) ){
+ print "1..0 # Skip -- Perl configured without re module\n";
+ exit 0;
+ }
+}
+
+use strict;
+
+use Test::More tests => 32;
+
+use re '/i';
+ok "Foo" =~ /foo/, 'use re "/i"';
+no re '/i';
+ok "Foo" !~ /foo/, 'no re "/i"';
+use re '/x';
+ok "foo" =~ / foo /, 'use re "/x"';
+no re '/x';
+ok "foo" !~ / foo /, 'no re "/x"';
+use re '/s';
+ok "\n" =~ /./, 'use re "/s"';
+no re '/s';
+ok "\n" !~ /./, 'no re "/s"';
+use re '/m';
+ok "\nfoo" =~ /^foo/, 'use re "/m"';
+no re '/m';
+ok "\nfoo" !~ /^foo/, 'no re "/m"';
+
+use re '/xism';
+ok qr// =~ /(?=.*x)(?=.*i)(?=.*s)(?=.*m)/, 'use re "/multiple"';
+no re '/ix';
+ok qr// =~ /(?!.*x)(?!.*i)(?=.*s)(?=.*m)/, 'no re "/i" only turns off /ix';
+no re '/sm';
+
+{
+ use re '/x';
+ ok 'frelp' =~ /f r e l p/, "use re '/x' in a lexical scope"
+}
+ok 'f r e l p' =~ /f r e l p/,
+ "use re '/x' turns off when it drops out of scope";
+
+SKIP: {
+ if (
+ !$Config::Config{d_setlocale}
+ || $Config::Config{ccflags} =~ /\bD?NO_LOCALE\b/
+ ) {
+ skip "no locale support", 7
+ }
+ use locale;
+ use re '/u';
+ is qr//, '(?^u:)', 'use re "/u" with active locale';
+ no re '/u';
+ is qr//, '(?^l:)', 'no re "/u" reverts to /l with locale in scope';
+ no re '/l';
+ is qr//, '(?^l:)', 'no re "/l" is a no-op with locale in scope';
+ use re '/d';
+ is qr//, '(?^:)', 'use re "/d" with locale in scope';
+ no re '/l';
+ no re '/u';
+ is qr//, '(?^:)',
+ 'no re "/l" and "/u" are no-ops when not on (locale scope)';
+ no re "/d";
+ is qr//, '(?^l:)', 'no re "/d" reverts to /l with locale in scope';
+ use re "/u";
+ no re "/d";
+ is qr//, '(?^u:)', 'no re "/d" is a no-op when not on (locale scope)';
+}
+
+{
+ use feature "unicode_strings";
+ use re '/d';
+ is qr//, '(?^:)', 'use re "/d" in Unicode scope';
+ no re '/d';
+ is qr//, '(?^u:)', 'no re "/d" reverts to /u in Unicode scope';
+ no re '/u';
+ is qr//, '(?^u:)', 'no re "/u" is a no-op in Unicode scope';
+ no re '/d';
+ is qr//, '(?^u:)', 'no re "/d" is a no-op when not on';
+ use re '/u';
+ no feature 'unicode_strings';
+ is qr//, '(?^u:)', 'use re "/u" is not tied to unicode_strings feature';
+}
+
+use re '/u';
+is qr//, '(?^u:)', 'use re "/u"';
+no re '/u';
+is qr//, '(?^:)', 'no re "/u" reverts to /d';
+no re '/u';
+is qr//, '(?^:)', 'no re "/u" is a no-op when not on';
+no re '/d';
+is qr//, '(?^:)', 'no re "/d" is a no-op when not on';
+
+{
+ local $SIG{__WARN__} = sub {
+ ok $_[0] =~ /Unknown regular expression flag "\x{100}"/,
+ "warning with unknown regexp flags in use re '/flags'"
+ };
+ import re "/\x{100}"
+}
+
+# use re '/flags' in combination with explicit flags
+use re '/xi';
+ok "A\n\n" =~ / a.$/sm, 'use re "/xi" in combination with explicit /sm';
+{
+ local $::TODO = "test requires perl 5.16 syntax";
+ # (remove the evals, the quotes, and the ‘no warnings’ when removing the
+ # to-do notice)
+ no warnings;
+ use re '/u';
+ is eval 'qr//d', '(?^:)', 'explicit /d in re "/u" scope';
+ use re '/d';
+ is eval 'qr//u', '(?^u:)', 'explicit /u in re "/d" scope';
+}
diff --git a/op.c b/op.c
index 528ecac1dc..acffe22457 100644
--- a/op.c
+++ b/op.c
@@ -3734,6 +3734,19 @@ Perl_newPMOP(pTHX_ I32 type, I32 flags)
else if ((! (PL_hints & HINT_BYTES)) && (PL_hints & HINT_UNI_8_BIT)) {
pmop->op_pmflags |= RXf_PMf_UNICODE;
}
+ if (PL_hints & HINT_RE_FLAGS) {
+ SV *reflags = Perl_refcounted_he_fetch(aTHX_
+ PL_compiling.cop_hints_hash, 0, STR_WITH_LEN("reflags"), 0, 0
+ );
+ if (reflags && SvOK(reflags)) pmop->op_pmflags |= SvIV(reflags);
+ reflags = Perl_refcounted_he_fetch(aTHX_
+ PL_compiling.cop_hints_hash, 0, STR_WITH_LEN("reflags_dul"), 0, 0
+ );
+ if (reflags && SvOK(reflags)) {
+ pmop->op_pmflags &= ~(RXf_PMf_LOCALE|RXf_PMf_UNICODE);
+ pmop->op_pmflags |= SvIV(reflags);
+ }
+ }
#ifdef USE_ITHREADS
diff --git a/op_reg_common.h b/op_reg_common.h
index ce12da542f..5b49ec769a 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -18,11 +18,13 @@
/* This tells where the first of these bits is. Setting it to 0 saved cycles
* and memory. I (khw) think the code will work if changed back, but haven't
* tested it */
+/* Make sure to update lib/re.pm when changing this! */
#define RXf_PMf_STD_PMMOD_SHIFT 0
/* The bits need to be ordered so that the msix are contiguous starting at bit
* RXf_PMf_STD_PMMOD_SHIFT, followed by the p. See STD_PAT_MODS and
* INT_PAT_MODS in regexp.h for the reason contiguity is needed */
+/* Make sure to update lib/re.pm when changing these! */
#define RXf_PMf_MULTILINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+0)) /* /m */
#define RXf_PMf_SINGLELINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+1)) /* /s */
#define RXf_PMf_FOLD (1 << (RXf_PMf_STD_PMMOD_SHIFT+2)) /* /i */
diff --git a/perl.h b/perl.h
index a680e763ce..ccdc0782fd 100644
--- a/perl.h
+++ b/perl.h
@@ -4823,6 +4823,8 @@ enum { /* pass one of these to get_vtbl */
#define HINT_NO_AMAGIC 0x01000000 /* overloading pragma */
+#define HINT_RE_FLAGS 0x02000000 /* re '/xism' pragma */
+
/* The following are stored in $^H{sort}, not in PL_hints */
#define HINT_SORT_SORT_BITS 0x000000FF /* allow 256 different ones */
#define HINT_SORT_QUICKSORT 0x00000001