summaryrefslogtreecommitdiff
path: root/dist/Locale-Maketext/lib/Locale/Maketext.pm
diff options
context:
space:
mode:
authorTodd Rinaldo <toddr@cpanel.net>2010-07-15 14:24:07 -0400
committerJesse Vincent <jesse@bestpractical.com>2010-07-15 14:25:14 -0400
commit87d86da5f7830848364c3d8df2f53c8ddee4414a (patch)
treea1b0c681b6df1885aa0917aca1946fcd3c1a5fb3 /dist/Locale-Maketext/lib/Locale/Maketext.pm
parent67d00ddd69566625e96a71ba8e8a51d9ce0a0310 (diff)
downloadperl-87d86da5f7830848364c3d8df2f53c8ddee4414a.tar.gz
Clean up older Locale::Maketext internals to no longer do crazy string
eval tricks.
Diffstat (limited to 'dist/Locale-Maketext/lib/Locale/Maketext.pm')
-rw-r--r--dist/Locale-Maketext/lib/Locale/Maketext.pm327
1 files changed, 323 insertions, 4 deletions
diff --git a/dist/Locale-Maketext/lib/Locale/Maketext.pm b/dist/Locale-Maketext/lib/Locale/Maketext.pm
index 7a10ffb91e..929a70e38b 100644
--- a/dist/Locale-Maketext/lib/Locale/Maketext.pm
+++ b/dist/Locale-Maketext/lib/Locale/Maketext.pm
@@ -10,6 +10,21 @@ use I18N::LangTags 0.30 ();
BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
# define the constant 'DEBUG' at compile-time
+# turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially )
+# use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8';
+BEGIN {
+
+ # if we have it || we can load it
+ if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) {
+ utf8->import();
+ DEBUG and warn " utf8 on for _compile()\n";
+ }
+ else {
+ DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n";
+ }
+}
+
+
$VERSION = '1.15';
@ISA = ();
@@ -404,10 +419,6 @@ sub _add_supers {
#
###########################################################################
-use Locale::Maketext::GutsLoader;
-
-###########################################################################
-
my %tried = ();
# memoization of whether we've used this module, or found it unusable.
@@ -470,4 +481,312 @@ sub _lex_refs { # report the lexicon references for this handle's class
sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity!
+#--------------------------------------------------------------------------
+
+sub _compile {
+ # This big scary routine compiles an entry.
+ # It returns either a coderef if there's brackety bits in this, or
+ # otherwise a ref to a scalar.
+
+ my $target = ref($_[0]) || $_[0];
+
+ my(@code);
+ my(@c) = (''); # "chunks" -- scratch.
+ my $call_count = 0;
+ my $big_pile = '';
+ {
+ my $in_group = 0; # start out outside a group
+ my($m, @params); # scratch
+
+ while($_[1] =~ # Iterate over chunks.
+ m/\G(
+ [^\~\[\]]+ # non-~[] stuff
+ |
+ ~. # ~[, ~], ~~, ~other
+ |
+ \[ # [ presumably opening a group
+ |
+ \] # ] presumably closing a group
+ |
+ ~ # terminal ~ ?
+ |
+ $
+ )/xgs
+ ) {
+ DEBUG>2 and warn qq{ "$1"\n};
+
+ if($1 eq '[' or $1 eq '') { # "[" or end
+ # Whether this is "[" or end, force processing of any
+ # preceding literal.
+ if($in_group) {
+ if($1 eq '') {
+ $target->_die_pointing($_[1], 'Unterminated bracket group');
+ }
+ else {
+ $target->_die_pointing($_[1], 'You can\'t nest bracket groups');
+ }
+ }
+ else {
+ if ($1 eq '') {
+ DEBUG>2 and warn " [end-string]\n";
+ }
+ else {
+ $in_group = 1;
+ }
+ die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity
+ if(length $c[-1]) {
+ # Now actually processing the preceding literal
+ $big_pile .= $c[-1];
+ if($USE_LITERALS and (
+ (ord('A') == 65)
+ ? $c[-1] !~ m/[^\x20-\x7E]/s
+ # ASCII very safe chars
+ : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
+ # EBCDIC very safe chars
+ )) {
+ # normal case -- all very safe chars
+ $c[-1] =~ s/'/\\'/g;
+ push @code, q{ '} . $c[-1] . "',\n";
+ $c[-1] = ''; # reuse this slot
+ }
+ else {
+ push @code, ' $c[' . $#c . "],\n";
+ push @c, ''; # new chunk
+ }
+ }
+ # else just ignore the empty string.
+ }
+
+ }
+ elsif($1 eq ']') { # "]"
+ # close group -- go back in-band
+ if($in_group) {
+ $in_group = 0;
+
+ DEBUG>2 and warn " --Closing group [$c[-1]]\n";
+
+ # And now process the group...
+
+ if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
+ DEBUG>2 and warn " -- (Ignoring)\n";
+ $c[-1] = ''; # reset out chink
+ next;
+ }
+
+ #$c[-1] =~ s/^\s+//s;
+ #$c[-1] =~ s/\s+$//s;
+ ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/
+
+ # A bit of a hack -- we've turned "~,"'s into DELs, so turn
+ # 'em into real commas here.
+ if (ord('A') == 65) { # ASCII, etc
+ foreach($m, @params) { tr/\x7F/,/ }
+ }
+ else { # EBCDIC (1047, 0037, POSIX-BC)
+ # Thanks to Peter Prymmer for the EBCDIC handling
+ foreach($m, @params) { tr/\x07/,/ }
+ }
+
+ # Special-case handling of some method names:
+ if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) {
+ # Treat [_1,...] as [,_1,...], etc.
+ unshift @params, $m;
+ $m = '';
+ }
+ elsif($m eq '*') {
+ $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
+ }
+ elsif($m eq '#') {
+ $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
+ }
+
+ # Most common case: a simple, legal-looking method name
+ if($m eq '') {
+ # 0-length method name means to just interpolate:
+ push @code, ' (';
+ }
+ elsif($m =~ /^\w+(?:\:\:\w+)*$/s
+ and $m !~ m/(?:^|\:)\d/s
+ # exclude starting a (sub)package or symbol with a digit
+ ) {
+ # Yes, it even supports the demented (and undocumented?)
+ # $obj->Foo::bar(...) syntax.
+ $target->_die_pointing(
+ $_[1], q{Can't use "SUPER::" in a bracket-group method},
+ 2 + length($c[-1])
+ )
+ if $m =~ m/^SUPER::/s;
+ # Because for SUPER:: to work, we'd have to compile this into
+ # the right package, and that seems just not worth the bother,
+ # unless someone convinces me otherwise.
+
+ push @code, ' $_[0]->' . $m . '(';
+ }
+ else {
+ # TODO: implement something? or just too icky to consider?
+ $target->_die_pointing(
+ $_[1],
+ "Can't use \"$m\" as a method name in bracket group",
+ 2 + length($c[-1])
+ );
+ }
+
+ pop @c; # we don't need that chunk anymore
+ ++$call_count;
+
+ foreach my $p (@params) {
+ if($p eq '_*') {
+ # Meaning: all parameters except $_[0]
+ $code[-1] .= ' @_[1 .. $#_], ';
+ # and yes, that does the right thing for all @_ < 3
+ }
+ elsif($p =~ m/^_(-?\d+)$/s) {
+ # _3 meaning $_[3]
+ $code[-1] .= '$_[' . (0 + $1) . '], ';
+ }
+ elsif($USE_LITERALS and (
+ (ord('A') == 65)
+ ? $p !~ m/[^\x20-\x7E]/s
+ # ASCII very safe chars
+ : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
+ # EBCDIC very safe chars
+ )) {
+ # Normal case: a literal containing only safe characters
+ $p =~ s/'/\\'/g;
+ $code[-1] .= q{'} . $p . q{', };
+ }
+ else {
+ # Stow it on the chunk-stack, and just refer to that.
+ push @c, $p;
+ push @code, ' $c[' . $#c . '], ';
+ }
+ }
+ $code[-1] .= "),\n";
+
+ push @c, '';
+ }
+ else {
+ $target->_die_pointing($_[1], q{Unbalanced ']'});
+ }
+
+ }
+ elsif(substr($1,0,1) ne '~') {
+ # it's stuff not containing "~" or "[" or "]"
+ # i.e., a literal blob
+ $c[-1] .= $1;
+
+ }
+ elsif($1 eq '~~') { # "~~"
+ $c[-1] .= '~';
+
+ }
+ elsif($1 eq '~[') { # "~["
+ $c[-1] .= '[';
+
+ }
+ elsif($1 eq '~]') { # "~]"
+ $c[-1] .= ']';
+
+ }
+ elsif($1 eq '~,') { # "~,"
+ if($in_group) {
+ # This is a hack, based on the assumption that no-one will actually
+ # want a DEL inside a bracket group. Let's hope that's it's true.
+ if (ord('A') == 65) { # ASCII etc
+ $c[-1] .= "\x7F";
+ }
+ else { # EBCDIC (cp 1047, 0037, POSIX-BC)
+ $c[-1] .= "\x07";
+ }
+ }
+ else {
+ $c[-1] .= '~,';
+ }
+
+ }
+ elsif($1 eq '~') { # possible only at string-end, it seems.
+ $c[-1] .= '~';
+
+ }
+ else {
+ # It's a "~X" where X is not a special character.
+ # Consider it a literal ~ and X.
+ $c[-1] .= $1;
+ }
+ }
+ }
+
+ if($call_count) {
+ undef $big_pile; # Well, nevermind that.
+ }
+ else {
+ # It's all literals! Ahwell, that can happen.
+ # So don't bother with the eval. Return a SCALAR reference.
+ return \$big_pile;
+ }
+
+ die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity
+ DEBUG and warn scalar(@c), " chunks under closure\n";
+ if(@code == 0) { # not possible?
+ DEBUG and warn "Empty code\n";
+ return \'';
+ }
+ elsif(@code > 1) { # most cases, presumably!
+ unshift @code, "join '',\n";
+ }
+ unshift @code, "use strict; sub {\n";
+ push @code, "}\n";
+
+ DEBUG and warn @code;
+ my $sub = eval(join '', @code);
+ die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
+ return $sub;
+}
+
+#--------------------------------------------------------------------------
+
+sub _die_pointing {
+ # This is used by _compile to throw a fatal error
+ my $target = shift; # class name
+ # ...leaving $_[0] the error-causing text, and $_[1] the error message
+
+ my $i = index($_[0], "\n");
+
+ my $pointy;
+ my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
+ if($pos < 1) {
+ $pointy = "^=== near there\n";
+ }
+ else { # we need to space over
+ my $first_tab = index($_[0], "\t");
+ if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
+ # No tabs, or the first tab is harmlessly after where we will point to,
+ # AND we're far enough from the margin that we can draw a proper arrow.
+ $pointy = ('=' x $pos) . "^ near there\n";
+ }
+ else {
+ # tabs screw everything up!
+ $pointy = substr($_[0],0,$pos);
+ $pointy =~ tr/\t //cd;
+ # make everything into whitespace, but preseving tabs
+ $pointy .= "^=== near there\n";
+ }
+ }
+
+ my $errmsg = "$_[1], in\:\n$_[0]";
+
+ if($i == -1) {
+ # No newline.
+ $errmsg .= "\n" . $pointy;
+ }
+ elsif($i == (length($_[0]) - 1) ) {
+ # Already has a newline at end.
+ $errmsg .= $pointy;
+ }
+ else {
+ # don't bother with the pointy bit, I guess.
+ }
+ Carp::croak( "$errmsg via $target, as used" );
+}
+
1;