summaryrefslogtreecommitdiff
path: root/mad/P5re.pm
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2006-03-09 22:03:55 +0000
committerNicholas Clark <nick@ccl4.org>2006-03-09 22:03:55 +0000
commit6a28abbc8c08ff5da570415ad3f8a343b51e103d (patch)
tree4a8c897e11e6ccb3b7d2824f1472042a20980cd3 /mad/P5re.pm
parent32d45c1d3bb497e6d65453056058531c637f7772 (diff)
downloadperl-6a28abbc8c08ff5da570415ad3f8a343b51e103d.tar.gz
Add the Perl 5 to Perl 5 convertor scripts.
p4raw-id: //depot/perl@27453
Diffstat (limited to 'mad/P5re.pm')
-rwxr-xr-xmad/P5re.pm650
1 files changed, 650 insertions, 0 deletions
diff --git a/mad/P5re.pm b/mad/P5re.pm
new file mode 100755
index 0000000000..24037ecc0d
--- /dev/null
+++ b/mad/P5re.pm
@@ -0,0 +1,650 @@
+#!/usr/bin/perl
+
+# Copyright (C) 2005, Larry Wall
+# This software may be copied under the same terms as Perl.
+
+package P5re;
+
+use strict;
+use warnings;
+
+our @EXPORT_OK = qw(re re2xml qr2xml);
+
+my $indent = 0;
+my $in = "";
+my $delim = 1;
+my $debug = 0;
+my $maxbrack;
+
+our $extended;
+our $insensitive;
+our $singleline;
+our $multiline;
+
+my %xmlish = (
+ chr(0x00) => "STUPIDXML(#x00)",
+ chr(0x01) => "STUPIDXML(#x01)",
+ chr(0x02) => "STUPIDXML(#x02)",
+ chr(0x03) => "STUPIDXML(#x03)",
+ chr(0x04) => "STUPIDXML(#x04)",
+ chr(0x05) => "STUPIDXML(#x05)",
+ chr(0x06) => "STUPIDXML(#x06)",
+ chr(0x07) => "STUPIDXML(#x07)",
+ chr(0x08) => "STUPIDXML(#x08)",
+ chr(0x09) => "&#9;",
+ chr(0x0a) => "&#10;",
+ chr(0x0b) => "STUPIDXML(#x0b)",
+ chr(0x0c) => "STUPIDXML(#x0c)",
+ chr(0x0d) => "&#13;",
+ chr(0x0e) => "STUPIDXML(#x0e)",
+ chr(0x0f) => "STUPIDXML(#x0f)",
+ chr(0x10) => "STUPIDXML(#x10)",
+ chr(0x11) => "STUPIDXML(#x11)",
+ chr(0x12) => "STUPIDXML(#x12)",
+ chr(0x13) => "STUPIDXML(#x13)",
+ chr(0x14) => "STUPIDXML(#x14)",
+ chr(0x15) => "STUPIDXML(#x15)",
+ chr(0x16) => "STUPIDXML(#x16)",
+ chr(0x17) => "STUPIDXML(#x17)",
+ chr(0x18) => "STUPIDXML(#x18)",
+ chr(0x19) => "STUPIDXML(#x19)",
+ chr(0x1a) => "STUPIDXML(#x1a)",
+ chr(0x1b) => "STUPIDXML(#x1b)",
+ chr(0x1c) => "STUPIDXML(#x1c)",
+ chr(0x1d) => "STUPIDXML(#x1d)",
+ chr(0x1e) => "STUPIDXML(#x1e)",
+ chr(0x1f) => "STUPIDXML(#x1f)",
+ chr(0x7f) => "STUPIDXML(#x7f)",
+ chr(0x80) => "STUPIDXML(#x80)",
+ chr(0x81) => "STUPIDXML(#x81)",
+ chr(0x82) => "STUPIDXML(#x82)",
+ chr(0x83) => "STUPIDXML(#x83)",
+ chr(0x84) => "STUPIDXML(#x84)",
+ chr(0x86) => "STUPIDXML(#x86)",
+ chr(0x87) => "STUPIDXML(#x87)",
+ chr(0x88) => "STUPIDXML(#x88)",
+ chr(0x89) => "STUPIDXML(#x89)",
+ chr(0x90) => "STUPIDXML(#x90)",
+ chr(0x91) => "STUPIDXML(#x91)",
+ chr(0x92) => "STUPIDXML(#x92)",
+ chr(0x93) => "STUPIDXML(#x93)",
+ chr(0x94) => "STUPIDXML(#x94)",
+ chr(0x95) => "STUPIDXML(#x95)",
+ chr(0x96) => "STUPIDXML(#x96)",
+ chr(0x97) => "STUPIDXML(#x97)",
+ chr(0x98) => "STUPIDXML(#x98)",
+ chr(0x99) => "STUPIDXML(#x99)",
+ chr(0x9a) => "STUPIDXML(#x9a)",
+ chr(0x9b) => "STUPIDXML(#x9b)",
+ chr(0x9c) => "STUPIDXML(#x9c)",
+ chr(0x9d) => "STUPIDXML(#x9d)",
+ chr(0x9e) => "STUPIDXML(#x9e)",
+ chr(0x9f) => "STUPIDXML(#x9f)",
+ '<' => "&lt;",
+ '>' => "&gt;",
+ '&' => "&amp;",
+ '"' => "&#34;", # XML idiocy
+);
+
+sub xmlquote {
+ my $text = shift;
+ $text =~ s/(.)/$xmlish{$1} || $1/seg;
+ return $text;
+}
+
+sub text {
+ my $self = shift;
+ return xmlquote($self->{text});
+}
+
+sub rep {
+ my $self = shift;
+ return xmlquote($self->{rep});
+}
+
+sub xmlkids {
+ my $self = shift;
+ my $array = $self->{Kids};
+ my $ret = "";
+ $indent += 2;
+ $in = ' ' x $indent;
+ foreach my $chunk (@$array) {
+ if (ref $chunk eq "ARRAY") {
+ die;
+ }
+ elsif (ref $chunk) {
+ $ret .= $chunk->xml();
+ }
+ else {
+ warn $chunk;
+ }
+ }
+ $indent -= 2;
+ $in = ' ' x $indent;
+ return $ret;
+};
+
+package P5re::RE; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ my %flags = @_;
+ if ($flags{indent}) {
+ $indent = delete $flags{indent} || 0;
+ $in = ' ' x $indent;
+ }
+
+ my $kind = $self->{kind};
+
+ my $first = $self->{Kids}[0];
+ if ($first and ref $first eq 'P5re::Mod') {
+ for my $c (qw(i m s x)) {
+ next unless defined $first->{$c};
+ $self->{$c} = $first->{$c};
+ delete $first->{$c};
+ }
+ }
+
+ my $modifiers = "";
+ foreach my $k (sort keys %$self) {
+ next if $k eq 'kind' or $k eq "Kids";
+ my $v = $self->{$k};
+ $k =~ s/^[A-Z]//;
+ $modifiers .= " $k=\"$v\"";
+ }
+ my $text = "$in<$kind$modifiers>\n";
+ $text .= $self->xmlkids();
+ $text .= "$in</$kind>\n";
+ return $text;
+}
+
+package P5re::Alt; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ my $text = "$in<alt>\n";
+ $text .= $self->xmlkids();
+ $text .= "$in</alt>\n";
+ return $text;
+}
+
+#package P5re::Atom; our @ISA = 'P5re';
+#
+#sub xml {
+# my $self = shift;
+# my $text = "$in<atom>\n";
+# $text .= $self->xmlkids();
+# $text .= "$in</atom>\n";
+# return $text;
+#}
+
+package P5re::Quant; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ my $q = $self->{rep};
+ my $min = $self->{min};
+ my $max = $self->{max};
+ my $greedy = $self->{greedy};
+ my $text = "$in<quant rep=\"$q\" min=\"$min\" max=\"$max\" greedy=\"$greedy\">\n";
+ $text .= $self->xmlkids();
+ $text .= "$in</quant>\n";
+ return $text;
+}
+
+package P5re::White; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ return "$in<white text=\"" . $self->text() . "\" />\n";
+}
+
+package P5re::Char; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ return "$in<char text=\"" . $self->text() . "\" />\n";
+}
+
+package P5re::Comment; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ return "$in<comment rep=\"" . $self->rep() . "\" />\n";
+}
+
+package P5re::Mod; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ my $modifiers = "";
+ foreach my $k (sort keys %$self) {
+ next if $k eq 'kind' or $k eq "Kids";
+ my $v = $self->{$k};
+ $k =~ s/^[A-Z]//;
+ $modifiers .= " $k=\"$v\"";
+ }
+ return "$in<mod$modifiers />\n";
+}
+
+package P5re::Meta; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ my $sem = "";
+ if ($self->{sem}) {
+ $sem = 'sem="' . $self->{sem} . '" '
+ }
+ return "$in<meta rep=\"" . $self->rep() . "\" $sem/>\n";
+}
+
+package P5re::Back; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ return "$in<backref to=\"" . P5re::xmlquote($self->{to}) . "\"/>\n";
+}
+
+package P5re::Var; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ return "$in<var name=\"" . $self->{name} . "\" />\n";
+}
+
+package P5re::Closure; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ return "$in<closure rep=\"" . P5re::xmlquote($self->{rep}) . "\" />\n";
+}
+
+package P5re::CClass; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ my $neg = $self->{neg} ? "negated" : "normal";
+ my $text = "$in<cclass match=\"$neg\">\n";
+ $text .= $self->xmlkids();
+ $text .= "$in</cclass>\n";
+ return $text;
+}
+
+package P5re::Range; our @ISA = 'P5re';
+
+sub xml {
+ my $self = shift;
+ my $text = "$in<range>\n";
+ $text .= $self->xmlkids();
+ $text .= "$in</range>\n";
+ return $text;
+}
+
+package P5re;
+
+unless (caller) {
+ while (<>) {
+ chomp;
+ print qr2xml($_);
+ print "#######################################\n";
+ }
+}
+
+sub qrparse {
+ my $qr = shift;
+ my $mod;
+ if ($qr =~ /^s/) {
+ $qr =~ s/^(?:\w*)(\W)((?:\\.|.)*?)\1(.*)\1(\w*)$/$2/;
+ $mod = $4;
+ }
+ else {
+ $qr =~ s/^(?:\w*)(\W)(.*)\1(\w*)$/$2/;
+ $mod = $3;
+ }
+ substr($qr,0,0) = "(?$mod)" if defined $mod and $mod ne "";
+ return parse($qr,@_);
+}
+
+sub qr2xml {
+ return qrparse(@_)->xml();
+}
+
+sub re2xml {
+ my $re = shift;
+ return parse($re,@_)->xml();
+}
+
+sub parse {
+ local($_) = shift;
+ my %flags = @_;
+ $maxbrack = 0;
+ $indent = delete $flags{indent} || 0;
+ $in = ' ' x $indent;
+ warn "$_\n" if $debug;
+ my $re = re('re');
+ @$re{keys %flags} = values %flags;
+ return $re;
+}
+
+sub re {
+ my $kind = shift;
+
+ my $oldextended = $extended;
+ my $oldinsensitive = $insensitive;
+ my $oldmultiline = $multiline;
+ my $oldsingleline = $singleline;
+
+ local $extended = $extended;
+ local $insensitive = $insensitive;
+ local $multiline = $multiline;
+ local $singleline = $singleline;
+
+ my $first = alt();
+
+ my $re;
+ if (not /^\|/) {
+ $first->{kind} = $kind;
+ $re = bless $first, "P5re::RE"; # rebless to remove single alt
+ }
+ else {
+ my @alts = ($first);
+
+ while (s/^\|//) {
+ push(@alts, alt());
+ }
+ $re = bless { Kids => [@alts], kind => $kind }, "P5re::RE";
+ }
+
+ $re->{x} = $oldextended || 0;
+ $re->{i} = $oldinsensitive || 0;
+ $re->{m} = $oldmultiline || 0;
+ $re->{s} = $oldsingleline || 0;
+ return $re;
+}
+
+sub alt {
+ my @quants;
+
+ my $quant;
+ while ($quant = quant()) {
+ if (@quants and
+ ref $quant eq ref $quants[-1] and
+ exists $quants[-1]{text} and
+ exists $quant->{text} )
+ {
+ $quants[-1]{text} .= $quant->{text};
+ }
+ else {
+ push(@quants, $quant);
+ }
+ }
+ return bless { Kids => [@quants] }, "P5re::Alt";
+}
+
+sub quant {
+ my $atom = atom();
+ return 0 unless $atom;
+# $atom = bless { Kids => [$atom] }, "P5re::Atom";
+ if (s/^(([*+?])(\??)|\{(\d+)(?:(,)(\d*))?\}(\??))//) {
+ my $min = 0;
+ my $max = "Inf";
+ my $greed = 1;
+ if ($2) {
+ if ($2 eq '+') {
+ $min = 1;
+ }
+ elsif ($2 eq '?') {
+ $max = 1;
+ }
+ $greed = 0 if $3;
+ }
+ elsif (defined $4) {
+ $min = $4;
+ if ($5) {
+ $max = $6 if $6;
+ }
+ else {
+ $max = $min;
+ }
+ $greed = 0 if $7;
+ }
+ $greed = "na" if $min == $max;
+ return bless { Kids => [$atom],
+ rep => $1,
+ min => $min,
+ max => $max,
+ greedy => $greed
+ }, "P5re::Quant";
+ }
+ return $atom;
+}
+
+sub atom {
+ my $re;
+ if ($_ eq "") { return 0 }
+ if (/^[)|]/) { return 0 }
+
+ # whitespace is special because we don't know if /x is in effect
+ if ($extended) {
+ if (s/^(?=\s|#)(\s*(?:#.*)?)//) { return bless { text => $1 }, "P5re::White"; }
+ }
+
+ # all the parenthesized forms
+ if (s/^\(//) {
+ if (s/^\?://) {
+ $re = re('bracket');
+ }
+ elsif (s/^(\?#.*?)\)/)/) {
+ $re = bless { rep => "($1)" }, "P5re::Comment";
+ }
+ elsif (s/^\?=//) {
+ $re = re('lookahead');
+ }
+ elsif (s/^\?!//) {
+ $re = re('neglookahead');
+ }
+ elsif (s/^\?<=//) {
+ $re = re('lookbehind');
+ }
+ elsif (s/^\?<!//) {
+ $re = re('neglookbehind');
+ }
+ elsif (s/^\?>//) {
+ $re = re('nobacktrack');
+ }
+ elsif (s/^(\?\??\{.*?\})\)/)/) {
+ $re = bless { rep => "($1)" }, "P5re::Closure";
+ }
+ elsif (s/^(\?\(\d+\))//) {
+ my $mods = $1;
+ $re = re('conditional');
+ $re->{Arep} = "$mods";
+ }
+ elsif (s/^\?(?=\(\?)//) {
+ my $mods = $1;
+ my $cond = atom();
+ $re = re('conditional');
+ unshift(@{$re->{Kids}}, $cond);
+ }
+ elsif (s/^(\?[-\w]+)://) {
+ my $mods = $1;
+ local $extended = $extended;
+ local $insensitive = $insensitive;
+ local $multiline = $multiline;
+ local $singleline = $singleline;
+ setmods($mods);
+ $re = re('bracket');
+ $re->{Arep} = "($mods)";
+ $re->{x} = $extended || 0;
+ $re->{i} = $insensitive || 0;
+ $re->{m} = $multiline || 0;
+ $re->{s} = $singleline || 0;
+ }
+ elsif (s/^(\?[-\w]+)//) {
+ my $mods = $1;
+ $re = bless { Arep => "($mods)" }, "P5re::Mod";
+ setmods($mods);
+ $re->{x} = $extended || 0;
+ $re->{i} = $insensitive || 0;
+ $re->{m} = $multiline || 0;
+ $re->{s} = $singleline || 0;
+ }
+ elsif (s/^\?//) {
+ $re = re('UNRECOGNIZED');
+ }
+ else {
+ my $brack = ++$maxbrack;
+ $re = re('capture');
+ $re->{Ato} = $brack;
+ }
+
+ if (not s/^\)//) { warn "Expected right paren at: '$_'" }
+ return $re;
+ }
+
+ # special meta
+ if (s/^\.//) {
+ my $s = $singleline ? '.' : '\N';
+ return bless { rep => '.', sem => $s }, "P5re::Meta";
+ }
+ if (s/^\^//) {
+ my $s = $multiline ? '^^' : '^';
+ return bless { rep => '^', sem => $s }, "P5re::Meta";
+ }
+ if (s/^\$(?:$|(?=[|)]))//) {
+ my $s = $multiline ? '$$' : '$';
+ return bless { rep => '$', sem => $s }, "P5re::Meta";
+ }
+ if (s/^([\$\@](\w+|.))//) { # XXX need to handle subscripts here
+ return bless { name => $1 }, "P5re::Var";
+ }
+
+ # character classes
+ if (s/^\[//) {
+ my $re = cclass();
+ if (not s/^\]//) { warn "Expected right bracket at: '$_'" }
+ return $re;
+ }
+
+ # backwhacks
+ if (/^\\([1-9]\d*)/ and $1 <= $maxbrack) {
+ my $to = $1;
+ onechar();
+ return bless { to => $to }, "P5re::Back";
+ }
+
+ # backwhacks
+ if (/^\\(?=\w)/) {
+ return bless { rep => onechar() }, "P5re::Meta";
+ }
+
+ # backwhacks
+ if (s/^\\(.)//) {
+ return bless { text => $1 }, "P5re::Char";
+ }
+
+ # optimization, would happen anyway
+ if (s/^(\w+)//) { return bless { text => $1 }, "P5re::Char"; }
+
+ # random character
+ if (s/^(.)//) { return bless { text => $1 }, "P5re::Char"; }
+}
+
+sub cclass {
+ my @cclass;
+ my $cclass = "";
+ my $neg = 0;
+ if (s/^\^//) { $neg = 1 }
+ if (s/^([\]\-])//) { $cclass .= $1 }
+
+ while ($_ ne "" and not /^\]/) {
+ # backwhacks
+ if (/^\\(?=.)|.-/) {
+ my $o1 = onecharobj();
+ if ($cclass ne "") {
+ push @cclass, bless { text => $cclass }, "P5re::Char";
+ $cclass = "";
+ }
+
+ if (s/^-(?=[^]])//) {
+ my $o2 = onecharobj();
+ push @cclass, bless { Kids => [$o1, $o2] }, "P5re::Range";
+ }
+ else {
+ push @cclass, $o1;
+ }
+ }
+ elsif (s/^(\[([:=.])\^?\w*\2\])//) {
+ if ($cclass ne "") {
+ push @cclass, bless { text => $cclass }, "P5re::Char";
+ $cclass = "";
+ }
+ push @cclass, bless { rep => $1 }, "P5re::Meta";
+ }
+ else {
+ $cclass .= onechar();
+ }
+ }
+
+ if ($cclass ne "") {
+ push @cclass, bless { text => $cclass }, "P5re::Char";
+ }
+ return bless { Kids => [@cclass], neg => $neg }, "P5re::CClass";
+}
+
+sub onecharobj {
+ my $ch = onechar();
+ if ($ch =~ /^\\/) {
+ $ch = bless { rep => $ch }, "P5re::Meta";
+ }
+ else {
+ $ch = bless { text => $ch }, "P5re::Char";
+ }
+}
+
+sub onechar {
+ die "Oops, short cclass" unless s/^(.)//;
+ my $ch = $1;
+ if ($ch eq '\\') {
+ if (s/^([rntf]|[0-7]{1,4})//) { $ch .= $1 }
+ elsif (s/^(x[0-9a-fA-f]{1,2})//) { $ch .= $1 }
+ elsif (s/^(x\{[0-9a-fA-f]+\})//) { $ch .= $1 }
+ elsif (s/^([NpP]\{.*?\})//) { $ch .= $1 }
+ elsif (s/^([cpP].)//) { $ch .= $1 }
+ elsif (s/^(.)//) { $ch .= $1 }
+ else {
+ die "Oops, short backwhack";
+ }
+ }
+ return $ch;
+}
+
+sub setmods {
+ my $mods = shift;
+ if ($mods =~ /\-.*x/) {
+ $extended = 0;
+ }
+ elsif ($mods =~ /x/) {
+ $extended = 1;
+ }
+ if ($mods =~ /\-.*i/) {
+ $insensitive = 0;
+ }
+ elsif ($mods =~ /i/) {
+ $insensitive = 1;
+ }
+ if ($mods =~ /\-.*m/) {
+ $multiline = 0;
+ }
+ elsif ($mods =~ /m/) {
+ $multiline = 1;
+ }
+ if ($mods =~ /\-.*s/) {
+ $singleline = 0;
+ }
+ elsif ($mods =~ /s/) {
+ $singleline = 1;
+ }
+}
+
+1;