diff options
author | Todd Rinaldo <toddr@cpanel.net> | 2010-07-15 14:24:07 -0400 |
---|---|---|
committer | Jesse Vincent <jesse@bestpractical.com> | 2010-07-15 14:25:14 -0400 |
commit | 87d86da5f7830848364c3d8df2f53c8ddee4414a (patch) | |
tree | a1b0c681b6df1885aa0917aca1946fcd3c1a5fb3 /dist/Locale-Maketext/lib/Locale/Maketext.pm | |
parent | 67d00ddd69566625e96a71ba8e8a51d9ce0a0310 (diff) | |
download | perl-87d86da5f7830848364c3d8df2f53c8ddee4414a.tar.gz |
Clean up older Locale::Maketext internals to no longer do crazy string
eval tricks.
Diffstat (limited to 'dist/Locale-Maketext/lib/Locale/Maketext.pm')
-rw-r--r-- | dist/Locale-Maketext/lib/Locale/Maketext.pm | 327 |
1 files changed, 323 insertions, 4 deletions
diff --git a/dist/Locale-Maketext/lib/Locale/Maketext.pm b/dist/Locale-Maketext/lib/Locale/Maketext.pm index 7a10ffb91e..929a70e38b 100644 --- a/dist/Locale-Maketext/lib/Locale/Maketext.pm +++ b/dist/Locale-Maketext/lib/Locale/Maketext.pm @@ -10,6 +10,21 @@ use I18N::LangTags 0.30 (); BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } } # define the constant 'DEBUG' at compile-time +# turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially ) +# use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8'; +BEGIN { + + # if we have it || we can load it + if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) { + utf8->import(); + DEBUG and warn " utf8 on for _compile()\n"; + } + else { + DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n"; + } +} + + $VERSION = '1.15'; @ISA = (); @@ -404,10 +419,6 @@ sub _add_supers { # ########################################################################### -use Locale::Maketext::GutsLoader; - -########################################################################### - my %tried = (); # memoization of whether we've used this module, or found it unusable. @@ -470,4 +481,312 @@ sub _lex_refs { # report the lexicon references for this handle's class sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity! +#-------------------------------------------------------------------------- + +sub _compile { + # This big scary routine compiles an entry. + # It returns either a coderef if there's brackety bits in this, or + # otherwise a ref to a scalar. + + my $target = ref($_[0]) || $_[0]; + + my(@code); + my(@c) = (''); # "chunks" -- scratch. + my $call_count = 0; + my $big_pile = ''; + { + my $in_group = 0; # start out outside a group + my($m, @params); # scratch + + while($_[1] =~ # Iterate over chunks. + m/\G( + [^\~\[\]]+ # non-~[] stuff + | + ~. # ~[, ~], ~~, ~other + | + \[ # [ presumably opening a group + | + \] # ] presumably closing a group + | + ~ # terminal ~ ? + | + $ + )/xgs + ) { + DEBUG>2 and warn qq{ "$1"\n}; + + if($1 eq '[' or $1 eq '') { # "[" or end + # Whether this is "[" or end, force processing of any + # preceding literal. + if($in_group) { + if($1 eq '') { + $target->_die_pointing($_[1], 'Unterminated bracket group'); + } + else { + $target->_die_pointing($_[1], 'You can\'t nest bracket groups'); + } + } + else { + if ($1 eq '') { + DEBUG>2 and warn " [end-string]\n"; + } + else { + $in_group = 1; + } + die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity + if(length $c[-1]) { + # Now actually processing the preceding literal + $big_pile .= $c[-1]; + if($USE_LITERALS and ( + (ord('A') == 65) + ? $c[-1] !~ m/[^\x20-\x7E]/s + # ASCII very safe chars + : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s + # EBCDIC very safe chars + )) { + # normal case -- all very safe chars + $c[-1] =~ s/'/\\'/g; + push @code, q{ '} . $c[-1] . "',\n"; + $c[-1] = ''; # reuse this slot + } + else { + push @code, ' $c[' . $#c . "],\n"; + push @c, ''; # new chunk + } + } + # else just ignore the empty string. + } + + } + elsif($1 eq ']') { # "]" + # close group -- go back in-band + if($in_group) { + $in_group = 0; + + DEBUG>2 and warn " --Closing group [$c[-1]]\n"; + + # And now process the group... + + if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { + DEBUG>2 and warn " -- (Ignoring)\n"; + $c[-1] = ''; # reset out chink + next; + } + + #$c[-1] =~ s/^\s+//s; + #$c[-1] =~ s/\s+$//s; + ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/ + + # A bit of a hack -- we've turned "~,"'s into DELs, so turn + # 'em into real commas here. + if (ord('A') == 65) { # ASCII, etc + foreach($m, @params) { tr/\x7F/,/ } + } + else { # EBCDIC (1047, 0037, POSIX-BC) + # Thanks to Peter Prymmer for the EBCDIC handling + foreach($m, @params) { tr/\x07/,/ } + } + + # Special-case handling of some method names: + if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) { + # Treat [_1,...] as [,_1,...], etc. + unshift @params, $m; + $m = ''; + } + elsif($m eq '*') { + $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars" + } + elsif($m eq '#') { + $m = 'numf'; # "#" for "number": [#,_1] for "the number _1" + } + + # Most common case: a simple, legal-looking method name + if($m eq '') { + # 0-length method name means to just interpolate: + push @code, ' ('; + } + elsif($m =~ /^\w+(?:\:\:\w+)*$/s + and $m !~ m/(?:^|\:)\d/s + # exclude starting a (sub)package or symbol with a digit + ) { + # Yes, it even supports the demented (and undocumented?) + # $obj->Foo::bar(...) syntax. + $target->_die_pointing( + $_[1], q{Can't use "SUPER::" in a bracket-group method}, + 2 + length($c[-1]) + ) + if $m =~ m/^SUPER::/s; + # Because for SUPER:: to work, we'd have to compile this into + # the right package, and that seems just not worth the bother, + # unless someone convinces me otherwise. + + push @code, ' $_[0]->' . $m . '('; + } + else { + # TODO: implement something? or just too icky to consider? + $target->_die_pointing( + $_[1], + "Can't use \"$m\" as a method name in bracket group", + 2 + length($c[-1]) + ); + } + + pop @c; # we don't need that chunk anymore + ++$call_count; + + foreach my $p (@params) { + if($p eq '_*') { + # Meaning: all parameters except $_[0] + $code[-1] .= ' @_[1 .. $#_], '; + # and yes, that does the right thing for all @_ < 3 + } + elsif($p =~ m/^_(-?\d+)$/s) { + # _3 meaning $_[3] + $code[-1] .= '$_[' . (0 + $1) . '], '; + } + elsif($USE_LITERALS and ( + (ord('A') == 65) + ? $p !~ m/[^\x20-\x7E]/s + # ASCII very safe chars + : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s + # EBCDIC very safe chars + )) { + # Normal case: a literal containing only safe characters + $p =~ s/'/\\'/g; + $code[-1] .= q{'} . $p . q{', }; + } + else { + # Stow it on the chunk-stack, and just refer to that. + push @c, $p; + push @code, ' $c[' . $#c . '], '; + } + } + $code[-1] .= "),\n"; + + push @c, ''; + } + else { + $target->_die_pointing($_[1], q{Unbalanced ']'}); + } + + } + elsif(substr($1,0,1) ne '~') { + # it's stuff not containing "~" or "[" or "]" + # i.e., a literal blob + $c[-1] .= $1; + + } + elsif($1 eq '~~') { # "~~" + $c[-1] .= '~'; + + } + elsif($1 eq '~[') { # "~[" + $c[-1] .= '['; + + } + elsif($1 eq '~]') { # "~]" + $c[-1] .= ']'; + + } + elsif($1 eq '~,') { # "~," + if($in_group) { + # This is a hack, based on the assumption that no-one will actually + # want a DEL inside a bracket group. Let's hope that's it's true. + if (ord('A') == 65) { # ASCII etc + $c[-1] .= "\x7F"; + } + else { # EBCDIC (cp 1047, 0037, POSIX-BC) + $c[-1] .= "\x07"; + } + } + else { + $c[-1] .= '~,'; + } + + } + elsif($1 eq '~') { # possible only at string-end, it seems. + $c[-1] .= '~'; + + } + else { + # It's a "~X" where X is not a special character. + # Consider it a literal ~ and X. + $c[-1] .= $1; + } + } + } + + if($call_count) { + undef $big_pile; # Well, nevermind that. + } + else { + # It's all literals! Ahwell, that can happen. + # So don't bother with the eval. Return a SCALAR reference. + return \$big_pile; + } + + die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity + DEBUG and warn scalar(@c), " chunks under closure\n"; + if(@code == 0) { # not possible? + DEBUG and warn "Empty code\n"; + return \''; + } + elsif(@code > 1) { # most cases, presumably! + unshift @code, "join '',\n"; + } + unshift @code, "use strict; sub {\n"; + push @code, "}\n"; + + DEBUG and warn @code; + my $sub = eval(join '', @code); + die "$@ while evalling" . join('', @code) if $@; # Should be impossible. + return $sub; +} + +#-------------------------------------------------------------------------- + +sub _die_pointing { + # This is used by _compile to throw a fatal error + my $target = shift; # class name + # ...leaving $_[0] the error-causing text, and $_[1] the error message + + my $i = index($_[0], "\n"); + + my $pointy; + my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; + if($pos < 1) { + $pointy = "^=== near there\n"; + } + else { # we need to space over + my $first_tab = index($_[0], "\t"); + if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { + # No tabs, or the first tab is harmlessly after where we will point to, + # AND we're far enough from the margin that we can draw a proper arrow. + $pointy = ('=' x $pos) . "^ near there\n"; + } + else { + # tabs screw everything up! + $pointy = substr($_[0],0,$pos); + $pointy =~ tr/\t //cd; + # make everything into whitespace, but preseving tabs + $pointy .= "^=== near there\n"; + } + } + + my $errmsg = "$_[1], in\:\n$_[0]"; + + if($i == -1) { + # No newline. + $errmsg .= "\n" . $pointy; + } + elsif($i == (length($_[0]) - 1) ) { + # Already has a newline at end. + $errmsg .= $pointy; + } + else { + # don't bother with the pointy bit, I guess. + } + Carp::croak( "$errmsg via $target, as used" ); +} + 1; |