diff options
author | Robin Houston <robin@cpan.org> | 2001-05-15 20:09:35 +0100 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-05-16 12:55:33 +0000 |
commit | b7dad2dc25528ee3fc23c1bca51116d48f988b04 (patch) | |
tree | ee4f0c4cae615db67ed421c0e10d0f6f96f2e81b /ext/B | |
parent | f03b998d08e7c001de73bf7d09048e2e120c56df (diff) | |
download | perl-b7dad2dc25528ee3fc23c1bca51116d48f988b04.tar.gz |
More on /x regexes
Message-ID: <20010515190935.A27268@penderel>
p4raw-id: //depot/perl@10120
Diffstat (limited to 'ext/B')
-rw-r--r-- | ext/B/B/Deparse.pm | 65 |
1 files changed, 53 insertions, 12 deletions
diff --git a/ext/B/B/Deparse.pm b/ext/B/B/Deparse.pm index 79a005e6a3..7b8bf13d1c 100644 --- a/ext/B/B/Deparse.pm +++ b/ext/B/B/Deparse.pm @@ -2962,13 +2962,12 @@ sub uninterp { return $str; } -# the same, but treat $|, $), $( and $ at the end of the string differently -sub re_uninterp { - my($str) = @_; - +{ +my $bal; +BEGIN { use re "eval"; # Matches any string which is balanced with respect to {braces} - my $bal = qr( + $bal = qr( (?: [^\\{}] | \\\\ @@ -2976,6 +2975,11 @@ sub re_uninterp { | \{(??{$bal})\} )* )x; +} + +# the same, but treat $|, $), $( and $ at the end of the string differently +sub re_uninterp { + my($str) = @_; $str =~ s/ ( ^|\G # $1 @@ -2998,11 +3002,34 @@ sub re_uninterp { return $str; } +# This is for regular expressions with the /x modifier +# We have to leave comments unmangled. sub re_uninterp_extended { - my ($str) = @_; - $str =~ s/^([^#]*)/re_uninterp($1)/emg; + my($str) = @_; + + $str =~ s/ + ( ^|\G # $1 + | [^\\] + ) + + ( # $2 + (?:\\\\)* + ) + + ( # $3 + ( \(\?\??\{$bal\}\) # $4 (skip over (?{}) and (??{}) blocks) + | \#[^\n]* # (skip over comments) + ) + | [\$\@] + (?!\||\)|\(|$) + | \\[uUlLQE] + ) + + /length($4) ? "$1$2$4" : "$1$2\\$3"/xeg; + return $str; } +} # character escapes, but not delimiters that might need to be escaped sub escape_str { # ASCII, UTF8 @@ -3020,6 +3047,8 @@ sub escape_str { # ASCII, UTF8 return $str; } +# For regexes with the /x modifier. +# Leave whitespace unmangled. sub escape_extended_re { my($str) = @_; $str =~ s/(.)/ord($1)>255 ? sprintf("\\x{%x}", ord($1)) : $1/eg; @@ -3153,7 +3182,7 @@ sub dq { my $first = $self->dq($op->first); my $last = $self->dq($op->last); - # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]" + # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]" ($last =~ /^[A-Z\\\^\[\]_?]/ && $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc || ($last =~ /^[{\[\w_]/ && @@ -3655,10 +3684,17 @@ sub pp_subst { $repl = $self->dq($repl); } } + my $extended = ($op->pmflags & PMf_EXTENDED); if (null $kid) { - $re = re_uninterp(escape_str(re_unback($op->precomp))); + my $unbacked = re_unback($op->precomp); + if ($extended) { + $re = re_uninterp_extended(escape_extended_re($unbacked)); + } + else { + $re = re_uninterp(escape_str($unbacked)); + } } else { - $re = $self->deparse($kid, 1); + ($re) = $self->regcomp($kid, 1, $extended); } $flags .= "e" if $op->pmflags & PMf_EVAL; $flags .= "g" if $op->pmflags & PMf_GLOBAL; @@ -3666,7 +3702,7 @@ sub pp_subst { $flags .= "m" if $op->pmflags & PMf_MULTILINE; $flags .= "o" if $op->pmflags & PMf_KEEP; $flags .= "s" if $op->pmflags & PMf_SINGLELINE; - $flags .= "x" if $op->pmflags & PMf_EXTENDED; + $flags .= "x" if $extended; $flags = $substwords{$flags} if $substwords{$flags}; if ($binop) { return $self->maybe_parens("$var =~ s" @@ -4081,11 +4117,16 @@ The obvious fix doesn't work, because these are different: =item * Constants (other than simple strings or numbers) don't work properly. -Examples that fail include: +Pathological examples that fail (and probably always will) include: use constant E2BIG => ($!=7); use constant x=>\$x; print x +The following could (and should) be made to work: + + use constant regex => qr/blah/; + print regex; + =item * An input file that uses source filtering probably won't be deparsed into |