From 87d86da5f7830848364c3d8df2f53c8ddee4414a Mon Sep 17 00:00:00 2001 From: Todd Rinaldo Date: Thu, 15 Jul 2010 14:24:07 -0400 Subject: Clean up older Locale::Maketext internals to no longer do crazy string eval tricks. --- dist/Locale-Maketext/lib/Locale/Maketext.pm | 327 ++++++++++++++++++++- dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm | 326 +------------------- .../lib/Locale/Maketext/GutsLoader.pm | 57 +--- 3 files changed, 347 insertions(+), 363 deletions(-) (limited to 'dist') diff --git a/dist/Locale-Maketext/lib/Locale/Maketext.pm b/dist/Locale-Maketext/lib/Locale/Maketext.pm index 7a10ffb91e..929a70e38b 100644 --- a/dist/Locale-Maketext/lib/Locale/Maketext.pm +++ b/dist/Locale-Maketext/lib/Locale/Maketext.pm @@ -10,6 +10,21 @@ use I18N::LangTags 0.30 (); BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } } # define the constant 'DEBUG' at compile-time +# turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially ) +# use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8'; +BEGIN { + + # if we have it || we can load it + if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) { + utf8->import(); + DEBUG and warn " utf8 on for _compile()\n"; + } + else { + DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n"; + } +} + + $VERSION = '1.15'; @ISA = (); @@ -404,10 +419,6 @@ sub _add_supers { # ########################################################################### -use Locale::Maketext::GutsLoader; - -########################################################################### - my %tried = (); # memoization of whether we've used this module, or found it unusable. @@ -470,4 +481,312 @@ sub _lex_refs { # report the lexicon references for this handle's class sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity! +#-------------------------------------------------------------------------- + +sub _compile { + # This big scary routine compiles an entry. + # It returns either a coderef if there's brackety bits in this, or + # otherwise a ref to a scalar. + + my $target = ref($_[0]) || $_[0]; + + my(@code); + my(@c) = (''); # "chunks" -- scratch. + my $call_count = 0; + my $big_pile = ''; + { + my $in_group = 0; # start out outside a group + my($m, @params); # scratch + + while($_[1] =~ # Iterate over chunks. + m/\G( + [^\~\[\]]+ # non-~[] stuff + | + ~. # ~[, ~], ~~, ~other + | + \[ # [ presumably opening a group + | + \] # ] presumably closing a group + | + ~ # terminal ~ ? + | + $ + )/xgs + ) { + DEBUG>2 and warn qq{ "$1"\n}; + + if($1 eq '[' or $1 eq '') { # "[" or end + # Whether this is "[" or end, force processing of any + # preceding literal. + if($in_group) { + if($1 eq '') { + $target->_die_pointing($_[1], 'Unterminated bracket group'); + } + else { + $target->_die_pointing($_[1], 'You can\'t nest bracket groups'); + } + } + else { + if ($1 eq '') { + DEBUG>2 and warn " [end-string]\n"; + } + else { + $in_group = 1; + } + die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity + if(length $c[-1]) { + # Now actually processing the preceding literal + $big_pile .= $c[-1]; + if($USE_LITERALS and ( + (ord('A') == 65) + ? $c[-1] !~ m/[^\x20-\x7E]/s + # ASCII very safe chars + : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s + # EBCDIC very safe chars + )) { + # normal case -- all very safe chars + $c[-1] =~ s/'/\\'/g; + push @code, q{ '} . $c[-1] . "',\n"; + $c[-1] = ''; # reuse this slot + } + else { + push @code, ' $c[' . $#c . "],\n"; + push @c, ''; # new chunk + } + } + # else just ignore the empty string. + } + + } + elsif($1 eq ']') { # "]" + # close group -- go back in-band + if($in_group) { + $in_group = 0; + + DEBUG>2 and warn " --Closing group [$c[-1]]\n"; + + # And now process the group... + + if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { + DEBUG>2 and warn " -- (Ignoring)\n"; + $c[-1] = ''; # reset out chink + next; + } + + #$c[-1] =~ s/^\s+//s; + #$c[-1] =~ s/\s+$//s; + ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/ + + # A bit of a hack -- we've turned "~,"'s into DELs, so turn + # 'em into real commas here. + if (ord('A') == 65) { # ASCII, etc + foreach($m, @params) { tr/\x7F/,/ } + } + else { # EBCDIC (1047, 0037, POSIX-BC) + # Thanks to Peter Prymmer for the EBCDIC handling + foreach($m, @params) { tr/\x07/,/ } + } + + # Special-case handling of some method names: + if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) { + # Treat [_1,...] as [,_1,...], etc. + unshift @params, $m; + $m = ''; + } + elsif($m eq '*') { + $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" + } + elsif($m eq '#') { + $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" + } + + # Most common case: a simple, legal-looking method name + if($m eq '') { + # 0-length method name means to just interpolate: + push @code, ' ('; + } + elsif($m =~ /^\w+(?:\:\:\w+)*$/s + and $m !~ m/(?:^|\:)\d/s + # exclude starting a (sub)package or symbol with a digit + ) { + # Yes, it even supports the demented (and undocumented?) + # $obj->Foo::bar(...) syntax. + $target->_die_pointing( + $_[1], q{Can't use "SUPER::" in a bracket-group method}, + 2 + length($c[-1]) + ) + if $m =~ m/^SUPER::/s; + # Because for SUPER:: to work, we'd have to compile this into + # the right package, and that seems just not worth the bother, + # unless someone convinces me otherwise. + + push @code, ' $_[0]->' . $m . '('; + } + else { + # TODO: implement something? or just too icky to consider? + $target->_die_pointing( + $_[1], + "Can't use \"$m\" as a method name in bracket group", + 2 + length($c[-1]) + ); + } + + pop @c; # we don't need that chunk anymore + ++$call_count; + + foreach my $p (@params) { + if($p eq '_*') { + # Meaning: all parameters except $_[0] + $code[-1] .= ' @_[1 .. $#_], '; + # and yes, that does the right thing for all @_ < 3 + } + elsif($p =~ m/^_(-?\d+)$/s) { + # _3 meaning $_[3] + $code[-1] .= '$_[' . (0 + $1) . '], '; + } + elsif($USE_LITERALS and ( + (ord('A') == 65) + ? $p !~ m/[^\x20-\x7E]/s + # ASCII very safe chars + : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s + # EBCDIC very safe chars + )) { + # Normal case: a literal containing only safe characters + $p =~ s/'/\\'/g; + $code[-1] .= q{'} . $p . q{', }; + } + else { + # Stow it on the chunk-stack, and just refer to that. + push @c, $p; + push @code, ' $c[' . $#c . '], '; + } + } + $code[-1] .= "),\n"; + + push @c, ''; + } + else { + $target->_die_pointing($_[1], q{Unbalanced ']'}); + } + + } + elsif(substr($1,0,1) ne '~') { + # it's stuff not containing "~" or "[" or "]" + # i.e., a literal blob + $c[-1] .= $1; + + } + elsif($1 eq '~~') { # "~~" + $c[-1] .= '~'; + + } + elsif($1 eq '~[') { # "~[" + $c[-1] .= '['; + + } + elsif($1 eq '~]') { # "~]" + $c[-1] .= ']'; + + } + elsif($1 eq '~,') { # "~," + if($in_group) { + # This is a hack, based on the assumption that no-one will actually + # want a DEL inside a bracket group. Let's hope that's it's true. + if (ord('A') == 65) { # ASCII etc + $c[-1] .= "\x7F"; + } + else { # EBCDIC (cp 1047, 0037, POSIX-BC) + $c[-1] .= "\x07"; + } + } + else { + $c[-1] .= '~,'; + } + + } + elsif($1 eq '~') { # possible only at string-end, it seems. + $c[-1] .= '~'; + + } + else { + # It's a "~X" where X is not a special character. + # Consider it a literal ~ and X. + $c[-1] .= $1; + } + } + } + + if($call_count) { + undef $big_pile; # Well, nevermind that. + } + else { + # It's all literals! Ahwell, that can happen. + # So don't bother with the eval. Return a SCALAR reference. + return \$big_pile; + } + + die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity + DEBUG and warn scalar(@c), " chunks under closure\n"; + if(@code == 0) { # not possible? + DEBUG and warn "Empty code\n"; + return \''; + } + elsif(@code > 1) { # most cases, presumably! + unshift @code, "join '',\n"; + } + unshift @code, "use strict; sub {\n"; + push @code, "}\n"; + + DEBUG and warn @code; + my $sub = eval(join '', @code); + die "$@ while evalling" . join('', @code) if $@; # Should be impossible. + return $sub; +} + +#-------------------------------------------------------------------------- + +sub _die_pointing { + # This is used by _compile to throw a fatal error + my $target = shift; # class name + # ...leaving $_[0] the error-causing text, and $_[1] the error message + + my $i = index($_[0], "\n"); + + my $pointy; + my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; + if($pos < 1) { + $pointy = "^=== near there\n"; + } + else { # we need to space over + my $first_tab = index($_[0], "\t"); + if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { + # No tabs, or the first tab is harmlessly after where we will point to, + # AND we're far enough from the margin that we can draw a proper arrow. + $pointy = ('=' x $pos) . "^ near there\n"; + } + else { + # tabs screw everything up! + $pointy = substr($_[0],0,$pos); + $pointy =~ tr/\t //cd; + # make everything into whitespace, but preseving tabs + $pointy .= "^=== near there\n"; + } + } + + my $errmsg = "$_[1], in\:\n$_[0]"; + + if($i == -1) { + # No newline. + $errmsg .= "\n" . $pointy; + } + elsif($i == (length($_[0]) - 1) ) { + # Already has a newline at end. + $errmsg .= $pointy; + } + else { + # don't bother with the pointy bit, I guess. + } + Carp::croak( "$errmsg via $target, as used" ); +} + 1; diff --git a/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm b/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm index 9af292c61c..f4d1cd1572 100644 --- a/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm +++ b/dist/Locale-Maketext/lib/Locale/Maketext/Guts.pm @@ -1,328 +1,20 @@ package Locale::Maketext::Guts; -$VERSION = '1.13'; +use Locale::Maketext; -BEGIN { - # Just so we're nice and define SOMETHING in "our" package. - *zorp = sub { return scalar @_ } unless defined &zorp; -} +=head1 NAME -package Locale::Maketext; -use strict; -use vars qw($USE_LITERALS $GUTSPATH); +Locale::Maketext::Guts - Deprecated module to load Locale::Maketext utf8 code -BEGIN { - $GUTSPATH = __FILE__; - *DEBUG = sub () {0} unless defined &DEBUG; -} +=head1 SYNOPSIS -use utf8; + # Do this instead please + use Locale::Maketext -sub _compile { - # This big scary routine compiles an entry. - # It returns either a coderef if there's brackety bits in this, or - # otherwise a ref to a scalar. +=head1 DESCRIPTION - my $target = ref($_[0]) || $_[0]; +Previously majic was done to load Locale::Maketext when utf8 was unavailable. The subs this module provided were merged back into Locale::Maketext - my(@code); - my(@c) = (''); # "chunks" -- scratch. - my $call_count = 0; - my $big_pile = ''; - { - my $in_group = 0; # start out outside a group - my($m, @params); # scratch - - while($_[1] =~ # Iterate over chunks. - m/\G( - [^\~\[\]]+ # non-~[] stuff - | - ~. # ~[, ~], ~~, ~other - | - \[ # [ presumably opening a group - | - \] # ] presumably closing a group - | - ~ # terminal ~ ? - | - $ - )/xgs - ) { - DEBUG>2 and print qq{ "$1"\n}; - - if($1 eq '[' or $1 eq '') { # "[" or end - # Whether this is "[" or end, force processing of any - # preceding literal. - if($in_group) { - if($1 eq '') { - $target->_die_pointing($_[1], 'Unterminated bracket group'); - } - else { - $target->_die_pointing($_[1], 'You can\'t nest bracket groups'); - } - } - else { - if ($1 eq '') { - DEBUG>2 and print " [end-string]\n"; - } - else { - $in_group = 1; - } - die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity - if(length $c[-1]) { - # Now actually processing the preceding literal - $big_pile .= $c[-1]; - if($USE_LITERALS and ( - (ord('A') == 65) - ? $c[-1] !~ m/[^\x20-\x7E]/s - # ASCII very safe chars - : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s - # EBCDIC very safe chars - )) { - # normal case -- all very safe chars - $c[-1] =~ s/'/\\'/g; - push @code, q{ '} . $c[-1] . "',\n"; - $c[-1] = ''; # reuse this slot - } - else { - push @code, ' $c[' . $#c . "],\n"; - push @c, ''; # new chunk - } - } - # else just ignore the empty string. - } - - } - elsif($1 eq ']') { # "]" - # close group -- go back in-band - if($in_group) { - $in_group = 0; - - DEBUG>2 and print " --Closing group [$c[-1]]\n"; - - # And now process the group... - - if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { - DEBUG > 2 and print " -- (Ignoring)\n"; - $c[-1] = ''; # reset out chink - next; - } - - #$c[-1] =~ s/^\s+//s; - #$c[-1] =~ s/\s+$//s; - ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/ - - # A bit of a hack -- we've turned "~,"'s into DELs, so turn - # 'em into real commas here. - if (ord('A') == 65) { # ASCII, etc - foreach($m, @params) { tr/\x7F/,/ } - } - else { # EBCDIC (1047, 0037, POSIX-BC) - # Thanks to Peter Prymmer for the EBCDIC handling - foreach($m, @params) { tr/\x07/,/ } - } - - # Special-case handling of some method names: - if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) { - # Treat [_1,...] as [,_1,...], etc. - unshift @params, $m; - $m = ''; - } - elsif($m eq '*') { - $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" - } - elsif($m eq '#') { - $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" - } - - # Most common case: a simple, legal-looking method name - if($m eq '') { - # 0-length method name means to just interpolate: - push @code, ' ('; - } - elsif($m =~ /^\w+(?:\:\:\w+)*$/s - and $m !~ m/(?:^|\:)\d/s - # exclude starting a (sub)package or symbol with a digit - ) { - # Yes, it even supports the demented (and undocumented?) - # $obj->Foo::bar(...) syntax. - $target->_die_pointing( - $_[1], q{Can't use "SUPER::" in a bracket-group method}, - 2 + length($c[-1]) - ) - if $m =~ m/^SUPER::/s; - # Because for SUPER:: to work, we'd have to compile this into - # the right package, and that seems just not worth the bother, - # unless someone convinces me otherwise. - - push @code, ' $_[0]->' . $m . '('; - } - else { - # TODO: implement something? or just too icky to consider? - $target->_die_pointing( - $_[1], - "Can't use \"$m\" as a method name in bracket group", - 2 + length($c[-1]) - ); - } - - pop @c; # we don't need that chunk anymore - ++$call_count; - - foreach my $p (@params) { - if($p eq '_*') { - # Meaning: all parameters except $_[0] - $code[-1] .= ' @_[1 .. $#_], '; - # and yes, that does the right thing for all @_ < 3 - } - elsif($p =~ m/^_(-?\d+)$/s) { - # _3 meaning $_[3] - $code[-1] .= '$_[' . (0 + $1) . '], '; - } - elsif($USE_LITERALS and ( - (ord('A') == 65) - ? $p !~ m/[^\x20-\x7E]/s - # ASCII very safe chars - : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s - # EBCDIC very safe chars - )) { - # Normal case: a literal containing only safe characters - $p =~ s/'/\\'/g; - $code[-1] .= q{'} . $p . q{', }; - } - else { - # Stow it on the chunk-stack, and just refer to that. - push @c, $p; - push @code, ' $c[' . $#c . '], '; - } - } - $code[-1] .= "),\n"; - - push @c, ''; - } - else { - $target->_die_pointing($_[1], q{Unbalanced ']'}); - } - - } - elsif(substr($1,0,1) ne '~') { - # it's stuff not containing "~" or "[" or "]" - # i.e., a literal blob - $c[-1] .= $1; - - } - elsif($1 eq '~~') { # "~~" - $c[-1] .= '~'; - - } - elsif($1 eq '~[') { # "~[" - $c[-1] .= '['; - - } - elsif($1 eq '~]') { # "~]" - $c[-1] .= ']'; - - } - elsif($1 eq '~,') { # "~," - if($in_group) { - # This is a hack, based on the assumption that no-one will actually - # want a DEL inside a bracket group. Let's hope that's it's true. - if (ord('A') == 65) { # ASCII etc - $c[-1] .= "\x7F"; - } - else { # EBCDIC (cp 1047, 0037, POSIX-BC) - $c[-1] .= "\x07"; - } - } - else { - $c[-1] .= '~,'; - } - - } - elsif($1 eq '~') { # possible only at string-end, it seems. - $c[-1] .= '~'; - - } - else { - # It's a "~X" where X is not a special character. - # Consider it a literal ~ and X. - $c[-1] .= $1; - } - } - } - - if($call_count) { - undef $big_pile; # Well, nevermind that. - } - else { - # It's all literals! Ahwell, that can happen. - # So don't bother with the eval. Return a SCALAR reference. - return \$big_pile; - } - - die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity - DEBUG and warn scalar(@c), " chunks under closure\n"; - if(@code == 0) { # not possible? - DEBUG and warn "Empty code\n"; - return \''; - } - elsif(@code > 1) { # most cases, presumably! - unshift @code, "join '',\n"; - } - unshift @code, "use strict; sub {\n"; - push @code, "}\n"; - - DEBUG and warn @code; - my $sub = eval(join '', @code); - die "$@ while evalling" . join('', @code) if $@; # Should be impossible. - return $sub; -} - -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -sub _die_pointing { - # This is used by _compile to throw a fatal error - my $target = shift; # class name - # ...leaving $_[0] the error-causing text, and $_[1] the error message - - my $i = index($_[0], "\n"); - - my $pointy; - my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; - if($pos < 1) { - $pointy = "^=== near there\n"; - } - else { # we need to space over - my $first_tab = index($_[0], "\t"); - if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { - # No tabs, or the first tab is harmlessly after where we will point to, - # AND we're far enough from the margin that we can draw a proper arrow. - $pointy = ('=' x $pos) . "^ near there\n"; - } - else { - # tabs screw everything up! - $pointy = substr($_[0],0,$pos); - $pointy =~ tr/\t //cd; - # make everything into whitespace, but preseving tabs - $pointy .= "^=== near there\n"; - } - } - - my $errmsg = "$_[1], in\:\n$_[0]"; - - if($i == -1) { - # No newline. - $errmsg .= "\n" . $pointy; - } - elsif($i == (length($_[0]) - 1) ) { - # Already has a newline at end. - $errmsg .= $pointy; - } - else { - # don't bother with the pointy bit, I guess. - } - Carp::croak( "$errmsg via $target, as used" ); -} +=cut 1; - diff --git a/dist/Locale-Maketext/lib/Locale/Maketext/GutsLoader.pm b/dist/Locale-Maketext/lib/Locale/Maketext/GutsLoader.pm index daa9840260..421d538d09 100644 --- a/dist/Locale-Maketext/lib/Locale/Maketext/GutsLoader.pm +++ b/dist/Locale-Maketext/lib/Locale/Maketext/GutsLoader.pm @@ -1,49 +1,22 @@ package Locale::Maketext::GutsLoader; -$VERSION = '1.13'; +use Locale::Maketext; -use strict; sub zorp { return scalar @_ } -BEGIN { - $Locale::Maketext::GutsLoader::GUTSPATH = __FILE__; - *Locale::Maketext::DEBUG = sub () {0} - unless defined &Locale::Maketext::DEBUG; -} - -# -# This whole drama is so that we can load the utf8'd code -# in Locale::Maketext::Guts, but if that fails, snip the -# utf8 and then try THAT. -# - -$Locale::Maketext::GUTSPATH = ''; -Locale::Maketext::DEBUG and warn "Requiring Locale::Maketext::Guts...\n"; -eval 'require Locale::Maketext::Guts'; - -if ($@) { - my $path = $Locale::Maketext::GUTSPATH; - - die "Can't load Locale::Maketext::Guts\nAborting" unless $path; - - die "No readable file $Locale::Maketext::GutsLoader::GUTSPATH\nAborting" - unless -e $path and -f _ and -r _; - - open(IN, $path) or die "Can't read-open $path\nAborting"; - - my $source; - { local $/; $source = ; } - close(IN); - unless( $source =~ s/\b(use utf8)/# $1/ ) { - Locale::Maketext::DEBUG and - print "I didn't see 'use utf8' in $path\n"; - } - eval $source; - die "Can't compile $path\n...The error I got was:\n$@\nAborting" if $@; - Locale::Maketext::DEBUG and warn "Non-utf8'd Locale::Maketext::Guts fine\n"; -} -else { - Locale::Maketext::DEBUG and warn "Loaded Locale::Maketext::Guts fine\n"; -} +=head1 NAME + +Locale::Maketext::GutsLoader - Deprecated module to load Locale::Maketext utf8 code + +=head1 SYNOPSIS + + # Do this instead please + use Locale::Maketext + +=head1 DESCRIPTION + +Previously majic was done to load Locale::Maketext when utf8 was unavailable. The subs this module provided were merged back into Locale::Maketext + +=cut 1; -- cgit v1.2.1