diff options
Diffstat (limited to 'ext/re/re.pm')
-rw-r--r-- | ext/re/re.pm | 341 |
1 files changed, 218 insertions, 123 deletions
diff --git a/ext/re/re.pm b/ext/re/re.pm index dfdfe8668a..e9d710f1b4 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -1,6 +1,170 @@ package re; -our $VERSION = 0.06_03; +# pragma for controlling the regex engine +use strict; +use warnings; + +our $VERSION = "0.06_03"; +our @ISA = qw(Exporter); +our @EXPORT_OK = qw(is_regexp regexp_pattern); +our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK; + +# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** +# +# If you modify these values see comment below! + +my %bitmask = ( + taint => 0x00100000, # HINT_RE_TAINT + eval => 0x00200000, # HINT_RE_EVAL +); + +# - File::Basename contains a literal for 'taint' as a fallback. If +# taint is changed here, File::Basename must be updated as well. +# +# - ExtUtils::ParseXS uses a hardcoded +# BEGIN { $^H |= 0x00200000 } +# in it to allow re.xs to be built. So if 'eval' is changed here then +# ExtUtils::ParseXS must be changed as well. +# +# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** + +sub setcolor { + eval { # Ignore errors + require Term::Cap; + + my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. + my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; + my @props = split /,/, $props; + my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; + + $colors =~ s/\0//g; + $ENV{PERL_RE_COLORS} = $colors; + }; + if ($@) { + $ENV{PERL_RE_COLORS} ||= qq'\t\t> <\t> <\t\t'; + } + +} + +my %flags = ( + COMPILE => 0x0000FF, + PARSE => 0x000001, + OPTIMISE => 0x000002, + TRIEC => 0x000004, + DUMP => 0x000008, + + EXECUTE => 0x00FF00, + INTUIT => 0x000100, + MATCH => 0x000200, + TRIEE => 0x000400, + + EXTRA => 0xFF0000, + TRIEM => 0x010000, + OFFSETS => 0x020000, + OFFSETSDBG => 0x040000, + STATE => 0x080000, + OPTIMISEM => 0x100000, + STACK => 0x280000, +); +$flags{ALL} = -1; +$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; +$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE}; +$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; +$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE}; +$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC}; + +my $installed; +my $installed_error; + +sub _do_install { + if ( ! defined($installed) ) { + require XSLoader; + $installed = eval { XSLoader::load('re', $VERSION) } || 0; + $installed_error = $@; + } +} + +sub _load_unload { + my ($on)= @_; + if ($on) { + _do_install(); + if ( ! $installed ) { + die "'re' not installed!? ($installed_error)"; + } else { + # We call install() every time, as if we didn't, we wouldn't + # "see" any changes to the color environment var since + # the last time it was called. + + # install() returns an integer, which if casted properly + # in C resolves to a structure containing the regex + # hooks. Setting it to a random integer will guarantee + # segfaults. + $^H{regcomp} = install(); + } + } else { + delete $^H{regcomp}; + } +} + +sub bits { + my $on = shift; + my $bits = 0; + unless (@_) { + require Carp; + Carp::carp("Useless use of \"re\" pragma"); + } + foreach my $idx (0..$#_){ + my $s=$_[$idx]; + if ($s eq 'Debug' or $s eq 'Debugcolor') { + setcolor() if $s =~/color/i; + ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS}; + for my $idx ($idx+1..$#_) { + if ($flags{$_[$idx]}) { + if ($on) { + ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]}; + } else { + ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]}; + } + } else { + require Carp; + Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ", + join(", ",sort keys %flags ) ); + } + } + _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS}); + last; + } elsif ($s eq 'debug' or $s eq 'debugcolor') { + setcolor() if $s =~/color/i; + _load_unload($on); + } elsif (exists $bitmask{$s}) { + $bits |= $bitmask{$s}; + } elsif ($EXPORT_OK{$s}) { + _do_install(); + require Exporter; + re->export_to_level(2, 're', $s); + } else { + require Carp; + Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", + join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask), + ")"); + } + } + $bits; +} + +sub import { + shift; + $^H |= bits(1, @_); +} + +sub unimport { + shift; + $^H &= ~ bits(0, @_); +} + +1; + +__END__ =head1 NAME @@ -33,17 +197,29 @@ re - Perl pragma to alter regular expression behaviour use re qw(Debug All); # Finer tuned debugging options. use re qw(Debug More); no re qw(Debug ALL); # Turn of all re debugging in this scope + + use re qw(is_regexp regexp_pattern); # import utility functions + my ($pat,$mods)=regexp_pattern(qr/foo/i); + if (is_regexp($obj)) { + print "Got regexp: ", + scalar regexp_pattern($obj); # just as perl would stringify it + } # but no hassle with blessed re's. + (We use $^X in these examples because it's tainted by default.) =head1 DESCRIPTION +=head2 'taint' mode + When C<use re 'taint'> is in effect, and a tainted string is the target of a regex, the regex memories (or values returned by the m// operator in list context) are tainted. This feature is useful when regex operations on tainted data aren't meant to extract safe substrings, but to perform other transformations. +=head2 'eval' mode + When C<use re 'eval'> is in effect, a regex is allowed to contain C<(?{ ... })> zero-width assertions even if regular expression contains variable interpolation. That is normally disallowed, since it is a @@ -60,6 +236,8 @@ interpolation. Thus: I<is> allowed if $pat is a precompiled regular expression, even if $pat contains C<(?{ ... })> assertions. +=head2 'debug' mode + When C<use re 'debug'> is in effect, perl emits debugging messages when compiling and using regular expressions. The output is the same as that obtained by running a C<-DDEBUGGING>-enabled perl interpreter with the @@ -71,6 +249,14 @@ comma-separated list of C<termcap> properties to use for highlighting strings on/off, pre-point part on/off. See L<perldebug/"Debugging regular expressions"> for additional info. +As of 5.9.5 the directive C<use re 'debug'> and its equivalents are +lexically scoped, as the other directives are. However they have both +compile-time and run-time effects. + +See L<perlmodlib/Pragmatic Modules>. + +=head2 'Debug' mode + Similarly C<use re 'Debug'> produces debugging output, the difference being that it allows the fine tuning of what debugging output will be emitted. Options are divided into three groups, those related to @@ -208,141 +394,50 @@ As of 5.9.5 the directive C<use re 'debug'> and its equivalents are lexically scoped, as the other directives are. However they have both compile-time and run-time effects. -See L<perlmodlib/Pragmatic Modules>. +=head2 Exportable Functions -=cut +As of perl 5.9.5 're' debug contains a number of utility functions that +may be optionally exported into the callers namespace. They are listed +below. -# N.B. File::Basename contains a literal for 'taint' as a fallback. If -# taint is changed here, File::Basename must be updated as well. -my %bitmask = ( -taint => 0x00100000, # HINT_RE_TAINT -eval => 0x00200000, # HINT_RE_EVAL -); +=over 4 -sub setcolor { - eval { # Ignore errors - require Term::Cap; +=item is_regexp($ref) - my $terminal = Tgetent Term::Cap ({OSPEED => 9600}); # Avoid warning. - my $props = $ENV{PERL_RE_TC} || 'md,me,so,se,us,ue'; - my @props = split /,/, $props; - my $colors = join "\t", map {$terminal->Tputs($_,1)} @props; +Returns true if the argument is a compiled regular expression as returned +by C<qr//>, false if it is not. - $colors =~ s/\0//g; - $ENV{PERL_RE_COLORS} = $colors; - }; - if ($@) { - $ENV{PERL_RE_COLORS}||=qq'\t\t> <\t> <\t\t' - } +This function will not be confused by overloading or blessing. In +internals terms this extracts the regexp pointer out of the +PERL_MAGIC_qr structure so it it cannot be fooled. -} +=item regexp_pattern($ref) -my %flags = ( - COMPILE => 0x0000FF, - PARSE => 0x000001, - OPTIMISE => 0x000002, - TRIEC => 0x000004, - DUMP => 0x000008, +If the argument is a compiled regular expression as returned by C<qr//> +then this function returns the pattern. - EXECUTE => 0x00FF00, - INTUIT => 0x000100, - MATCH => 0x000200, - TRIEE => 0x000400, +In list context it returns a two element list, the first element +containing the pattern and the second containing the modifiers used when +the pattern was compiled. - EXTRA => 0xFF0000, - TRIEM => 0x010000, - OFFSETS => 0x020000, - OFFSETSDBG => 0x040000, - STATE => 0x080000, - OPTIMISEM => 0x100000, - STACK => 0x280000, -); -$flags{ALL} = -1; -$flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; -$flags{Extra} = $flags{EXECUTE} | $flags{COMPILE}; -$flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; -$flags{State} = $flags{DUMP} | $flags{EXECUTE} | $flags{STATE}; -$flags{TRIE} = $flags{DUMP} | $flags{EXECUTE} | $flags{TRIEC}; + my ($pat,$mods)=regexp_pattern($ref); -my $installed; -my $installed_error; +In scalar context it returns the same as perl would when strigifying a +raw qr// with the same pattern inside. If the argument is not a +compiled reference then this routine returns false but defined in scalar +context, and the empty list in list context. Thus the following -sub _load_unload { - my ($on)= @_; - if ($on) { - if ( ! defined($installed) ) { - require XSLoader; - $installed = eval { XSLoader::load('re') } || 0; - $installed_error = $@; - } - if ( ! $installed ) { - die "'re' not installed!? ($installed_error)"; - } else { - # We call install() every time, as if we didn't, we wouldn't - # "see" any changes to the color environment var since - # the last time it was called. + if (regexp_pattern($ref) eq '(?i-xsm:foo)') - # install() returns an integer, which if casted properly - # in C resolves to a structure containing the regex - # hooks. Setting it to a random integer will guarantee - # segfaults. - $^H{regcomp} = install(); - } - } else { - delete $^H{regcomp}; - } -} +will be warning free regardless of what $ref actually is. -sub bits { - my $on = shift; - my $bits = 0; - unless (@_) { - require Carp; - Carp::carp("Useless use of \"re\" pragma"); - } - foreach my $idx (0..$#_){ - my $s=$_[$idx]; - if ($s eq 'Debug' or $s eq 'Debugcolor') { - setcolor() if $s =~/color/i; - ${^RE_DEBUG_FLAGS} = 0 unless defined ${^RE_DEBUG_FLAGS}; - for my $idx ($idx+1..$#_) { - if ($flags{$_[$idx]}) { - if ($on) { - ${^RE_DEBUG_FLAGS} |= $flags{$_[$idx]}; - } else { - ${^RE_DEBUG_FLAGS} &= ~ $flags{$_[$idx]}; - } - } else { - require Carp; - Carp::carp("Unknown \"re\" Debug flag '$_[$idx]', possible flags: ", - join(", ",sort keys %flags ) ); - } - } - _load_unload($on ? 1 : ${^RE_DEBUG_FLAGS}); - last; - } elsif ($s eq 'debug' or $s eq 'debugcolor') { - setcolor() if $s =~/color/i; - _load_unload($on); - } elsif (exists $bitmask{$s}) { - $bits |= $bitmask{$s}; - } else { - require Carp; - Carp::carp("Unknown \"re\" subpragma '$s' (known ones are: ", - join(', ', map {qq('$_')} 'debug', 'debugcolor', sort keys %bitmask), - ")"); - } - } - $bits; -} +Like c<is_regexp> this function will not be confused by overloading +or blessing of the object. -sub import { - shift; - $^H |= bits(1, @_); -} +=back -sub unimport { - shift; - $^H &= ~ bits(0, @_); -} +=head1 SEE ALSO -1; +L<perlmodlib/Pragmatic Modules>. + +=cut |