Integrate Locale::Maketext 1.01 from Sean Burke.

p4raw-id: //depot/perl@10229
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-05-27 13:50:57 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-05-27 13:50:57 +0000
commit: 9378c5814a1c38be33358baa5cfd56712c3b71d4 (patch)
tree: 712d3a8e3142e76139998b3f83f430343ce1e173 /lib/Locale
parent: 4b053158ffba5bda82094dc0b0cd80c9d2867b97 (diff)
download: perl-9378c5814a1c38be33358baa5cfd56712c3b71d4.tar.gz
3 files changed, 2724 insertions, 0 deletions
diff --git a/lib/Locale/Maketext.pm b/lib/Locale/Maketext.pm
new file mode 100644
index 0000000000..a39383fc30
--- /dev/null
+++ b/lib/Locale/Maketext.pm
@@ -0,0 +1,646 @@
+
+# Time-stamp: "2001-05-25 07:49:06 MDT"
+
+require 5;
+package Locale::Maketext;
+use strict;
+use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS
+             $USE_LITERALS);
+use Carp ();
+use I18N::LangTags 0.21 ();
+
+#--------------------------------------------------------------------------
+
+BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
+ # define the constant 'DEBUG' at compile-time
+
+$VERSION = "1.01";
+@ISA = ();
+
+$MATCH_SUPERS = 1;
+$USING_LANGUAGE_TAGS = 1;
+ # Turning this off is somewhat of a security risk in that little or no
+ # checking will be done on the legality of tokens passed to the
+ # eval("use $module_name") in _try_use.  If you turn this off, you have
+ # to do your own taint checking.
+
+$USE_LITERALS = 1 unless defined $USE_LITERALS;
+ # a hint for compiling bracket-notation things.
+
+my %isa_scan = ();
+
+###########################################################################
+
+sub quant {
+  my($handle, $num, @forms) = @_;
+
+  return $num if @forms == 0; # what should this mean?
+  return $forms[2] if @forms > 2 and $num == 0; # special zeroth case
+
+  # Normal case:
+  # Note that the formatting of $num is preserved.
+  return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) );
+   # Most human languages put the number phrase before the qualified phrase.
+}
+
+
+sub numerate {
+ # return this lexical item in a form appropriate to this number
+  my($handle, $num, @forms) = @_;
+  my $s = ($num == 1);
+
+  return '' unless @forms;
+  if(@forms == 1) { # only the headword form specified
+    return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack.
+  } else { # sing and plural were specified
+    return $s ? $forms[0] : $forms[1];
+  }
+}
+
+#--------------------------------------------------------------------------
+
+sub numf {
+  my($handle, $num) = @_[0,1];
+  if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) {
+    $num += 0;  # Just use normal integer stringification.
+         # Specifically, don't let %G turn ten million into 1E+007
+  } else {
+    $num = CORE::sprintf("%G", $num);
+     # "CORE::" is there to avoid confusion with the above sub sprintf.
+  }
+  while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1}  # right from perlfaq5
+   # The initial \d+ gobbles as many digits as it can, and then we
+   #  backtrack so it un-eats the rightmost three, and then we
+   #  insert the comma there.
+
+  $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'};
+   # This is just a lame hack instead of using Number::Format
+  return $num;
+}
+
+sub sprintf {
+  no integer;
+  my($handle, $format, @params) = @_;
+  return CORE::sprintf($format, @params);
+    # "CORE::" is there to avoid confusion with myself!
+}
+
+#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
+
+use integer; # vroom vroom... applies to the whole rest of the module
+
+sub language_tag {
+  my $it = ref($_[0]) || $_[0];
+  return undef unless $it =~ m/([^':]+)(?:::)?$/s;
+  $it = lc($1);
+  $it =~ tr<_><->;
+  return $it;
+}
+
+sub encoding {
+  my $it = $_[0];
+  return(
+   (ref($it) && $it->{'encoding'})
+   || "iso-8859-1"   # Latin-1
+  );
+} 
+
+#--------------------------------------------------------------------------
+
+sub fallback_languages { return('i-default', 'en', 'en-US') }
+
+sub fallback_language_classes { return () }
+
+#--------------------------------------------------------------------------
+
+sub fail_with { # an actual attribute method!
+  my($handle, @params) = @_;
+  return unless ref($handle);
+  $handle->{'fail'} = $params[0] if @params;
+  return $handle->{'fail'};
+}
+
+#--------------------------------------------------------------------------
+
+sub failure_handler_auto {
+  # Meant to be used like:
+  #  $handle->fail_with('failure_handler_auto')
+
+  my($handle, $phrase, @params) = @_;
+  $handle->{'failure_lex'} ||= {};
+  my $lex = $handle->{'failure_lex'};
+
+  my $value;
+  $lex->{$phrase} ||= ($value = $handle->_compile($phrase));
+
+  # Dumbly copied from sub maketext:
+  {
+    local $SIG{'__DIE__'};
+    eval { $value = &$value($handle, @_) };
+  }
+  # If we make it here, there was an exception thrown in the
+  #  call to $value, and so scream:
+  if($@) {
+    my $err = $@;
+    # pretty up the error message
+    $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?>
+             <\n in bracket code [compiled line $1],>s;
+    #$err =~ s/\n?$/\n/s;
+    Carp::croak "Error in maketexting \"$phrase\":\n$err as used";
+    # Rather unexpected, but suppose that the sub tried calling
+    # a method that didn't exist.
+  } else {
+    return $value;
+  }
+}
+
+#==========================================================================
+
+sub new {
+  # Nothing fancy!
+  my $class = ref($_[0]) || $_[0];
+  my $handle = bless {}, $class;
+  $handle->init;
+  return $handle;
+}
+
+sub init { return } # no-op
+
+###########################################################################
+
+sub maketext {
+  # Remember, this can fail.  Failure is controllable many ways.
+  Carp::croak "maketext requires at least one parameter" unless @_ > 1;
+
+  my($handle, $phrase) = splice(@_,0,2);
+
+  # Look up the value:
+
+  my $value;
+  foreach my $h_r (
+    @{  $isa_scan{ref($handle) || $handle} || $handle->_lex_refs  }
+  ) {
+    print "* Looking up \"$phrase\" in $h_r\n" if DEBUG;
+    if(exists $h_r->{$phrase}) {
+      print "  Found \"$phrase\" in $h_r\n" if DEBUG;
+      unless(ref($value = $h_r->{$phrase})) {
+        # Nonref means it's not yet compiled.  Compile and replace.
+        $value = $h_r->{$phrase} = $handle->_compile($value);
+      }
+      last;
+    } elsif($phrase !~ m/^_/s and $h_r->{'_AUTO'}) {
+      # it's an auto lex, and this is an autoable key!
+      print "  Automaking \"$phrase\" into $h_r\n" if DEBUG;
+      
+      $value = $h_r->{$phrase} = $handle->_compile($phrase);
+      last;
+    }
+    print "  Not found in $h_r, nor automakable\n" if DEBUG > 1;
+    # else keep looking
+  }
+
+  unless(defined($value)) {
+    print "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle,
+      " fails.\n" if DEBUG;
+    if(ref($handle) and $handle->{'fail'}) {
+      print "WARNING0: maketext fails looking for <$phrase>\n" if DEBUG;
+      my $fail;
+      if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference
+        return &{$fail}($handle, $phrase, @_);
+         # If it ever returns, it should return a good value.
+      } else { # It's a method name
+        return $handle->$fail($phrase, @_);
+         # If it ever returns, it should return a good value.
+      }
+    } else {
+      # All we know how to do is this;
+      Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed");
+    }
+  }
+
+  return $$value if ref($value) eq 'SCALAR';
+  return $value unless ref($value) eq 'CODE';
+  
+  {
+    local $SIG{'__DIE__'};
+    eval { $value = &$value($handle, @_) };
+  }
+  # If we make it here, there was an exception thrown in the
+  #  call to $value, and so scream:
+  if($@) {
+    my $err = $@;
+    # pretty up the error message
+    $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?>
+             <\n in bracket code [compiled line $1],>s;
+    #$err =~ s/\n?$/\n/s;
+    Carp::croak "Error in maketexting \"$phrase\":\n$err as used";
+    # Rather unexpected, but suppose that the sub tried calling
+    # a method that didn't exist.
+  } else {
+    return $value;
+  }
+}
+
+###########################################################################
+
+sub get_handle {  # This is a constructor and, yes, it CAN FAIL.
+  # Its class argument has to be the base class for the current
+  # application's l10n files.
+  my($base_class, @languages) = @_;
+  $base_class = ref($base_class) || $base_class;
+   # Complain if they use __PACKAGE__ as a project base class?
+
+  unless(@languages) {  # Calling with no args is magical!  wooo, magic!
+    if(length( $ENV{'REQUEST_METHOD'} || '' )) { # I'm a CGI
+      my $in = $ENV{'HTTP_ACCEPT_LANGUAGE'} || '';
+        # supposedly that works under mod_perl, too.
+      $in =~ s<\([\)]*\)><>g; # Kill parens'd things -- just a hack.
+      @languages = &I18N::LangTags::extract_language_tags($in) if length $in;
+        # ...which untaints, incidentally.
+      
+    } else { # Not running as a CGI: try to puzzle out from the environment
+      if(length( $ENV{'LANG'} || '' )) {
+	push @languages, split m/[,:]/, $ENV{'LANG'};
+         # LANG can be only /one/ locale as far as I know, but what the hey.
+      }
+      if(length( $ENV{'LANGUAGE'} || '' )) {
+	push @languages, split m/[,:]/, $ENV{'LANGUAGE'};
+      }
+      print "Noting ENV LANG ", join(',', @languages),"\n" if DEBUG;
+      # Those are really locale IDs, but they get xlated a few lines down.
+      
+      if(&_try_use('Win32::Locale')) {
+        # If we have that module installed...
+        push @languages, Win32::Locale::get_language()
+         if defined &Win32::Locale::get_language;
+      }
+    }
+  }
+
+  #------------------------------------------------------------------------
+  print "Lgs1: ", map("<$_>", @languages), "\n" if DEBUG;
+
+  if($USING_LANGUAGE_TAGS) {
+    @languages = map &I18N::LangTags::locale2language_tag($_), @languages;
+     # if it's a lg tag, fine, pass thru (untainted)
+     # if it's a locale ID, try converting to a lg tag (untainted),
+     # otherwise nix it.
+
+    push @languages, map &I18N::LangTags::super_languages($_), @languages
+     if $MATCH_SUPERS;
+
+    @languages =  map { $_, &I18N::LangTags::alternate_language_tags($_) }
+                      @languages;    # catch alternation
+
+    push @languages, $base_class->fallback_languages;
+     # You are free to override fallback_languages to return empty-list!
+
+    @languages =  # final bit of processing:
+      map {
+        my $it = $_;  # copy
+        $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _
+        $it =~ tr<_a-z0-9><>cd;  # remove all but a-z0-9_
+        $it;
+      } @languages
+    ;
+  }
+  print "Lgs2: ", map("<$_>", @languages), "\n" if DEBUG > 1;
+
+  push @languages, $base_class->fallback_language_classes;
+   # You are free to override that to return whatever.
+
+
+  my %seen = ();
+  foreach my $module_name ( map { $base_class . "::" . $_ }  @languages )
+  {
+    next unless length $module_name; # sanity
+    next if $seen{$module_name}++        # Already been here, and it was no-go
+            || !&_try_use($module_name); # Try to use() it, but can't it.
+    return($module_name->new); # Make it!
+  }
+
+  return undef; # Fail!
+}
+
+###########################################################################
+#
+# This is where most people should stop reading.
+#
+###########################################################################
+
+sub _compile {
+  # This big scarp routine compiles an entry.
+  # It returns either a coderef if there's brackety bits in this, or
+  #  otherwise a ref to a scalar.
+  
+  my $target = ref($_[0]) || $_[0];
+  
+  my(@code);
+  my(@c) = (''); # "chunks" -- scratch.
+  my $call_count = 0;
+  my $big_pile = '';
+  {
+    my $in_group = 0; # start out outside a group
+    my($m, @params); # scratch
+    
+    while($_[1] =~  # Iterate over chunks.
+     m<\G(
+       [^\~\[\]]+  # non-~[] stuff
+       |
+       ~.       # ~[, ~], ~~, ~other
+       |
+       \x5B        # [
+       |
+       \x5D        # ]
+       |
+       ~           # terminal ~?
+       |
+       $
+     )>xgs
+    ) {
+      print "  \"$1\"\n" if DEBUG > 2;
+
+      if($1 eq '[' or $1 eq '') {       # "[" or end
+        # Whether this is "[" or end, force processing of any
+        #  preceding literal.
+        if($in_group) {
+          if($1 eq '') {
+            $target->_die_pointing($_[1], "Unterminated bracket group");
+          } else {
+            $target->_die_pointing($_[1], "You can't nest bracket groups");
+          }
+        } else {
+          if($1 eq '') {
+            print "   [end-string]\n" if DEBUG > 2;
+          } else {
+            $in_group = 1;
+          }
+          die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity
+          if(length $c[-1]) {
+            # Now actually processing the preceding literal
+            $big_pile .= $c[-1];
+            if($USE_LITERALS and $c[-1] !~ m<[^\x20-\x7E]>s) {
+              # normal case -- all very safe chars
+              $c[-1] =~ s/'/\\'/g;
+              push @code, q{ '} . $c[-1] . "',\n";
+              $c[-1] = ''; # reuse this slot
+            } else {
+              push @code, ' $c[' . $#c . "],\n";
+              push @c, ''; # new chunk
+            }
+          }
+           # else just ignore the empty string.
+        }
+
+      } elsif($1 eq ']') {  # "]"
+        # close group -- go back in-band
+        if($in_group) {
+          $in_group = 0;
+          
+          print "   --Closing group [$c[-1]]\n" if DEBUG > 2;
+          
+          # And now process the group...
+          
+          if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
+            DEBUG > 2 and print "   -- (Ignoring)\n";
+            $c[-1] = ''; # reset out chink
+            next;
+          }
+          
+           #$c[-1] =~ s/^\s+//s;
+           #$c[-1] =~ s/\s+$//s;
+          ($m,@params) = split(",", $c[-1], -1);  # was /\s*,\s*/
+          
+          foreach($m, @params) { tr/\x7F/,/ }
+           # A bit of a hack -- we've turned "~,"'s into \x7F's, so turn
+           #  'em into real commas here.
+          
+          if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) {
+            # Treat [_1,...] as [,_1,...], etc.
+            unshift @params, $m;
+            $m = '';
+          }
+
+          # Most common case: a simple, legal-looking method name
+          if($m eq '') {
+            # 0-length method name means to just interpolate:
+            push @code, ' (';
+          } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s
+            and $m !~ m<(?:^|\:)\d>s
+             # exclude starting a (sub)package or symbol with a digit 
+          ) {
+            # Yes, it even supports the demented (and undocumented?)
+            #  $obj->Foo::bar(...) syntax.
+            $target->_die_pointing(
+              $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method",
+              2 + length($c[-1])
+            )
+             if $m =~ m/^SUPER::/s;
+              # Because for SUPER:: to work, we'd have to compile this into
+              #  the right package, and that seems just not worth the bother,
+              #  unless someone convinces me otherwise.
+            
+            push @code, ' $_[0]->' . $m . '(';
+          } else {
+            # TODO: implement something?  or just too icky to consider?
+            $target->_die_pointing(
+             $_[1],
+             "Can't use \"$m\" as a method name in bracket group",
+             2 + length($c[-1])
+            );
+          }
+          
+          pop @c; # we don't need that chunk anymore
+          ++$call_count;
+          
+          foreach my $p (@params) {
+            if($p eq '_*') {
+              # Meaning: all parameters except $_[0]
+              $code[-1] .= ' @_[1 .. $#_], ';
+               # and yes, that does the right thing for all @_ < 3
+            } elsif($p =~ m<^_(-?\d+)$>s) {
+              # _3 meaning $_[3]
+              $code[-1] .= '$_[' . (0 + $1) . '], ';
+            } elsif($USE_LITERALS and $p !~ m<[^\x20-\x7E]>s) {
+              # Normal case: a literal containing only safe characters
+              $p =~ s/'/\\'/g;
+              $code[-1] .= q{'} . $p . q{', };
+            } else {
+              # Stow it on the chunk-stack, and just refer to that.
+              push @c, $p;
+              push @code, ' $c[' . $#c . "], ";
+            }
+          }
+          $code[-1] .= "),\n";
+
+          push @c, '';
+        } else {
+          $target->_die_pointing($_[1], "Unbalanced ']'");
+        }
+        
+      } elsif(substr($1,0,1) ne '~') {
+        # it's stuff not containing "~" or "[" or "]"
+        # i.e., a literal blob
+        $c[-1] .= $1;
+        
+      } elsif($1 eq '~~') { # "~~"
+        $c[-1] .= '~';
+        
+      } elsif($1 eq '~[') { # "~["
+        $c[-1] .= '[';
+        
+      } elsif($1 eq '~]') { # "~]"
+        $c[-1] .= ']';
+
+      } elsif($1 eq '~,') { # "~,"
+        if($in_group) {
+          $c[-1] .= "\x7F";
+           # This is a hack, based on the assumption that no-one will actually
+           # want a \x7f inside a bracket group.  Let's hope that's it's true.
+        } else {
+          $c[-1] .= '~,';
+        }
+        
+      } elsif($1 eq '~') { # possible only at string-end, it seems.
+        $c[-1] .= '~';
+        
+      } else {
+        # It's a "~X" where X is not a special character.
+        # Consider it a literal ~ and X.
+        $c[-1] .= $1;
+      }
+    }
+  }
+
+  if($call_count) {
+    undef $big_pile; # Well, nevermind that.
+  } else {
+    # It's all literals!  Ahwell, that can happen.
+    # So don't bother with the eval.  Return a SCALAR reference.
+    return \$big_pile;
+  }
+
+  die "Last chunk isn't null??" if @c and length $c[-1]; # sanity
+  print scalar(@c), " chunks under closure\n" if DEBUG;
+  if(@code == 0) { # not possible?
+    print "Empty code\n" if DEBUG;
+    return \'';
+  } elsif(@code > 1) { # most cases, presumably!
+    unshift @code, "join '',\n";
+  }
+  unshift @code, "use strict; sub {\n";
+  push @code, "}\n";
+
+  print @code if DEBUG;
+  my $sub = eval(join '', @code);
+  die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
+  return $sub;
+}
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+sub _die_pointing {
+  # This is used by _compile to throw a fatal error
+  my $target = shift; # class name
+  # ...leaving $_[0] the error-causing text, and $_[1] the error message
+  
+  my $i = index($_[0], "\n");
+
+  my $pointy;
+  my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
+  if($pos < 1) {
+    $pointy = "^=== near there\n";
+  } else { # we need to space over
+    my $first_tab = index($_[0], "\t");
+    if($pos > 2 and ( -1 == $first_tab  or  $first_tab > pos($_[0]))) {
+      # No tabs, or the first tab is harmlessly after where we will point to,
+      # AND we're far enough from the margin that we can draw a proper arrow.
+      $pointy = ('=' x $pos) . "^ near there\n";
+    } else {
+      # tabs screw everything up!
+      $pointy = substr($_[0],0,$pos);
+      $pointy =~ tr/\t //cd;
+       # make everything into whitespace, but preseving tabs
+      $pointy .= "^=== near there\n";
+    }
+  }
+  
+  my $errmsg = "$_[1], in\:\n$_[0]";
+  
+  if($i == -1) {
+    # No newline.
+    $errmsg .= "\n" . $pointy;
+  } elsif($i == (length($_[0]) - 1)  ) {
+    # Already has a newline at end.
+    $errmsg .= $pointy;
+  } else {
+    # don't bother with the pointy bit, I guess.
+  }
+  Carp::croak( "$errmsg via $target, as used" );
+}
+
+###########################################################################
+
+my %tried = ();
+  # memoization of whether we've used this module, or found it unusable.
+
+sub _try_use {   # Basically a wrapper around "require Modulename"
+  # "Many men have tried..."  "They tried and failed?"  "They tried and died."
+  return $tried{$_[0]} if exists $tried{$_[0]};  # memoization
+
+  my $module = $_[0];   # ASSUME sane module name!
+  { no strict 'refs';
+    return($tried{$module} = 1)
+     if defined(%{$module . "::Lexicon"}) or defined(@{$module . "::ISA"});
+    # weird case: we never use'd it, but there it is!
+  }
+
+  print " About to use $module ...\n" if DEBUG;
+  {
+    local $SIG{'__DIE__'};
+    eval "require $module"; # used to be "use $module", but no point in that.
+  }
+  if($@) {
+    print "Error using $module \: $@\n" if DEBUG > 1;
+    return $tried{$module} = 0;
+  } else {
+    print " OK, $module is used\n" if DEBUG;
+    return $tried{$module} = 1;
+  }
+}
+
+#--------------------------------------------------------------------------
+
+sub _lex_refs {  # report the lexicon references for this handle's class
+  # returns an arrayREF!
+  no strict 'refs';
+  my $class = ref($_[0]) || $_[0];
+  print "Lex refs lookup on $class\n" if DEBUG > 1;
+  return $isa_scan{$class} if exists $isa_scan{$class};  # memoization!
+
+  my @lex_refs;
+  my $seen_r = ref($_[1]) ? $_[1] : {};
+
+  if( defined( *{$class . '::Lexicon'}{'HASH'} )) {
+    push @lex_refs, *{$class . '::Lexicon'}{'HASH'};
+    print "%" . $class . "::Lexicon contains ",
+         scalar(keys %{$class . '::Lexicon'}), " entries\n" if DEBUG;
+  }
+
+  # Implements depth(height?)-first recursive searching of superclasses
+  foreach my $superclass (@{$class . "::ISA"}) {
+    print " Super-class search into $superclass\n" if DEBUG;
+    next if $seen_r->{$superclass}++;
+    push @lex_refs, @{&_lex_refs($superclass, $seen_r)};  # call myself
+  }
+
+  $isa_scan{$class} = \@lex_refs; # save for next time
+  return \@lex_refs;
+}
+
+sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity!
+
+###########################################################################
+1;
+
+
diff --git a/lib/Locale/Maketext.pod b/lib/Locale/Maketext.pod
new file mode 100644
index 0000000000..b28a9d83c8
--- /dev/null
+++ b/lib/Locale/Maketext.pod
@@ -0,0 +1,1302 @@
+
+# Time-stamp: "2001-05-25 07:50:08 MDT"
+
+=head1 NAME
+
+Locale::Maketext -- framework for localization
+
+=head1 SYNOPSIS
+
+  package MyProgram;
+  use strict;
+  use MyProgram::L10N;
+   # ...which inherits from Locale::Maketext
+  my $lh = MyProgram::L10N->get_handle() || die "What language?";
+  ...
+  # And then any messages your program emits, like:
+  warn $lh->maketext( "Can't open file [_1]: [_2]\n", $f, $! );
+  ...
+
+=head1 DESCRIPTION
+
+It is a common feature of applications (whether run directly,
+or via the Web) for them to be "localized" -- i.e., for them
+to a present an English interface to an English-speaker, a German
+interface to a German-speaker, and so on for all languages it's
+programmed with.  Locale::Maketext
+is a framework for software localization; it provides you with the
+tools for organizing and accessing the bits of text and text-processing
+code that you need for producing localized applications.
+
+In order to make sense of Maketext and how all its
+components fit together, you should probably
+go read L<Locale::Maketext::TPJ13|Locale::Maketext::TPJ13>, and
+I<then> read the following documentation.
+
+You may also want to read over the source for C<File::Findgrep>
+and its constituent modules -- they are a complete (if small)
+example application that uses Maketext.
+
+=head1 QUICK OVERVIEW
+
+The basic design of Locale::Maketext is object-oriented, and
+Locale::Maketext is an abstract base class, from which you
+derive a "project class".
+The project class (with a name like "TkBocciBall::Localize",
+which you then use in your module) is in turn the base class
+for all the "language classes" for your project
+(with names "TkBocciBall::Localize::it", 
+"TkBocciBall::Localize::en",
+"TkBocciBall::Localize::fr", etc.).
+
+A language class is
+a class containing a lexicon of phrases as class data,
+and possibly also some methods that are of use in interpreting
+phrases in the lexicon, or otherwise dealing with text in that
+language.
+
+An object belonging to a language class is called a "language
+handle"; it's typically a flyweight object.
+
+The normal course of action is to call:
+
+  use TkBocciBall::Localize;  # the localization project class
+  $lh = TkBocciBall::Localize->get_handle();
+   # Depending on the user's locale, etc., this will
+   # make a language handle from among the classes available,
+   # and any defaults that you declare.
+  die "Couldn't make a language handle??" unless $lh;
+
+From then on, you use the C<maketext> function to access
+entries in whatever lexicon(s) belong to the language handle
+you got.  So, this:
+
+  print $lh->maketext("You won!"), "\n";
+
+...emits the right text for this language.  If the object
+in C<$lh> belongs to class "TkBocciBall::Localize::fr" and
+%TkBocciBall::Localize::fr::Lexicon contains C<("You won!"
+=E<gt> "Tu as gagnE<eacute>!")>, then the above
+code happily tells the user "Tu as gagnE<eacute>!".
+
+=head1 METHODS
+
+Locale::Maketext offers a variety of methods, which fall
+into three categories:
+
+=over
+
+=item *
+
+Methods to do with constructing language handles.
+
+=item *
+
+C<maketext> and other methods to do with accessing %Lexicon data
+for a given language handle.
+
+=item *
+
+Methods that you may find it handy to use, from routines of
+yours that you put in %Lexicon entries.
+
+=back
+
+These are covered in the following section.
+
+=head2 Construction Methods
+
+These are to do with constructing a language handle:
+
+=over
+
+=item $lh = YourProjClass->get_handle( ...langtags... ) || die "lg-handle?";
+
+This tries loading classes based on the language-tags you give (like
+C<("en-US", "sk", "kon", "es-MX", "ja", "i-klingon")>, and for the first class
+that succeeds, returns YourProjClass::I<language>->new().
+
+It runs thru the entire given list of language-tags, and finds no classes
+for those exact terms, it then tries "superordinate" language classes.
+So if no "en-US" class (i.e., YourProjClass::en_us)
+was found, nor classes for anything else in that list, we then try
+its superordinate, "en" (i.e., YourProjClass::en), and so on thru 
+the other language-tags in the given list: "es".
+(The other language-tags in our example list: 
+happen to have no superordinates.)
+
+If none of those language-tags leads to loadable classes, we then
+try classes derived from YourProjClass->fallback_languages() and
+then if nothing comes of that, we use classes named by
+YourProjClass->fallback_language_classes().  Then in the (probably
+quite unlikely) event that that fails, we just return undef.
+
+=item $lh = YourProjClass->get_handleB<()> || die "lg-handle?";
+
+When C<get_handle> is called with an empty parameter list, magic happens:
+
+If C<get_handle> senses that it's running in program that was
+invoked as a CGI, then it tries to get language-tags out of the
+environment variable "HTTP_ACCEPT_LANGUAGE", and it pretends that
+those were the languages passed as parameters to C<get_handle>.
+
+Otherwise (i.e., if not a CGI), this tries various OS-specific ways
+to get the language-tags for the current locale/language, and then
+pretends that those were the value(s) passed to C<cet_handle>.
+
+Currently this OS-specific stuff consists of looking in the environment
+variables "LANG" and "LANGUAGE"; and on MSWin machines (where those
+variables are typically unused), this also tries using
+the module Win32::Locale to get a language-tag for whatever language/locale
+is currently selected in the "Regional Settings" (or "International"?)
+Control Panel.  I welcome further
+suggestions for making this do the Right Thing under other operating
+systems that support localization.
+
+If you're using localization in an application that keeps a configuration
+file, you might consider something like this in your project class:
+
+  sub get_handle_via_config {
+    my $class = $_[0];
+    my $preferred_language = $Config_settings{'language'};
+    my $lh;
+    if($preferred_language) {
+      $lh = $class->get_handle($chosen_language)
+       || die "No language handle for \"$chosen_language\" or the like";
+    } else {
+      # Config file missing, maybe?
+      $lh = $class->get_handle()
+       || die "Can't get a language handle";
+    }
+    return $lh;
+  }
+
+=item $lh = YourProjClass::langname->new();
+
+This constructs a language handle.  You usually B<don't> call this
+directly, but instead let C<get_handle> find a language class to C<use>
+and to then call ->new on.
+
+=item $lh->init();
+
+This is called by ->new to initialize newly-constructed language handles.
+If you define an init method in your class, remember that it's usually
+considered a good idea to call $lh->SUPER::init in it (presumably at the
+beginning), so that all classes get a chance to initialize a new object
+however they see fit.
+
+=item YourProjClass->fallback_languages()
+
+C<get_handle> appends the return value of this to the end of
+whatever list of languages you pass C<get_handle>.  Unless
+you override this method, your project class
+will inherit Locale::Maketext's C<fallback_languages>, which
+currently returns C<('i-default', 'en', 'en-US')>.
+("i-default" is defined in RFC 2277).
+
+This method (by having it return the name
+of a language-tag that has an existing language class)
+can be used for making sure that
+C<get_handle> will always manage to construct a language
+handle (assuming your language classes are in an appropriate
+@INC directory).  Or you can use the next method:
+
+=item YourProjClass->fallback_language_classes()
+
+C<get_handle> appends the return value of this to the end
+of the list of classes it will try using.  Unless
+you override this method, your project class
+will inherit Locale::Maketext's C<fallback_language_classes>,
+which currently returns an empty list, C<()>.
+By setting this to some value (namely, the name of a loadable
+language class), you can be sure that
+C<get_handle> will always manage to construct a language
+handle.
+
+=back
+
+=head2 The "maketext" Method
+
+This is the most important method in Locale::Maketext:
+
+$text = $lh->maketext(I<key>, ...parameters for this phrase...);
+
+This looks in the %Lexicon of the language handle
+$lh and all its superclasses, looking
+for an entry whose key is the string I<key>.  Assuming such
+an entry is found, various things then happen, depending on the
+value found:
+
+If the value is a scalarref, the scalar is dereferenced and returned
+(and any parameters are ignored).
+If the value is a coderef, we return &$value($lh, ...parameters...).
+If the value is a string that I<doesn't> look like it's in Bracket Notation,
+we return it (after replacing it with a scalarref, in its %Lexicon).
+If the value I<does> look like it's in Bracket Notation, then we compile
+it into a sub, replace the string in the %Lexicon with the new coderef,
+and then we return &$new_sub($lh, ...parameters...).
+
+Bracket Notation is discussed in a later section.  Note
+that trying to compile a string into Bracket Notation can throw
+an exception if the string is not syntactically valid (say, by not
+balancing brackets right.)
+
+Also, calling &$coderef($lh, ...parameters...) can throw any sort of
+exception (if, say, code in that sub tries to divide by zero).  But
+a very common exception occurs when you have Bracket
+Notation text that says to call a method "foo", but there is no such
+method.  (E.g., "You have [quaB<tn>,_1,ball]." will throw an exception
+on trying to call $lh->quaB<tn>($_[1],'ball') -- you presumably meant
+"quant".)  C<maketext> catches these exceptions, but only to make the
+error message more readable, at which point it rethrows the exception.
+
+An exception I<may> be thrown if I<key> is not found in any
+of $lh's %Lexicon hashes.  What happens if a key is not found,
+is discussed in a later section, "Controlling Lookup Failure".
+
+Note that you might find it useful in some cases to override
+the C<maketext> method with an "after method", if you want to
+translate encodings, or even scripts:
+
+    package YrProj::zh_cn; # Chinese with PRC-style glyphs
+    use base ('YrProj::zh_tw');  # Taiwan-style
+    sub maketext {
+      my $self = shift(@_);
+      my $value = $self->maketext(@_);
+      return Chineeze::taiwan2mainland($value);
+    }
+
+Or you may want to override it with something that traps
+any exceptions, if that's critical to your program:
+
+  sub maketext {
+    my($lh, @stuff) = @_;
+    my $out;
+    eval { $out = $lh->SUPER::maketext(@stuff) };
+    return $out unless $@;
+    ...otherwise deal with the exception...
+  }
+
+Other than those two situations, I don't imagine that
+it's useful to override the C<maketext> method.  (If
+you run into a situation where it is useful, I'd be
+interested in hearing about it.)
+
+=over
+
+=item $lh->fail_with I<or> $lh->fail_with(I<PARAM>)
+
+=item $lh->failure_handler_auto
+
+These two methods are discussed in the section "Controlling
+Lookup Failure".
+
+=back
+
+=head2 Utility Methods
+
+These are methods that you may find it handy to use, generally
+from %Lexicon routines of yours (whether expressed as
+Bracket Notation or not).
+
+=over
+
+=item $language->quant($number, $singular)
+
+=item $language->quant($number, $singular, $plural)
+
+=item $language->quant($number, $singular, $plural, $negative)
+
+This is generally meant to be called from inside Bracket Notation
+(which is discussed later), as in 
+
+     "Your search matched [quant,_1,document]!"
+
+It's for I<quantifying> a noun (i.e., saying how much of it there is,
+while giving the currect form of it).  The behavior of this method is
+handy for English and a few other Western European languages, and you
+should override it for languages where it's not suitable.  You can feel
+free to read the source, but the current implementation is basically
+as this pseudocode describes:
+
+     if $number is 0 and there's a $negative,
+        return $negative;
+     elsif $number is 1,
+        return "1 $singular";
+     elsif there's a $plural,
+        return "$number $plural";
+     else
+        return "$number " . $singular . "s";
+     #
+     # ...except that we actually call numf to
+     #  stringify $number before returning it.
+
+So for English (with Bracket Notation)
+C<"...[quant,_1,file]..."> is fine (for 0 it returns "0 files",
+for 1 it returns "1 file", and for more it returns "2 files", etc.)
+
+But for "directory", you'd want C<"[quant,_1,direcory,directories]">
+so that our elementary C<quant> method doesn't think that the
+plural of "directory" is "directorys".  And you might find that the
+output may sound better if you specify a negative form, as in:
+
+     "[quant,_1,file,files,No files] matched your query.\n"
+
+Remember to keep in mind verb agreement (or adjectives too, in
+other languages), as in:
+
+     "[quant,_1,document] were matched.\n"
+
+Because if _1 is one, you get "1 document B<were> matched".
+An acceptable hack here is to do something like this:
+
+     "[quant,_1,document was, documents were] matched.\n"
+
+=item $language->numf($number)
+
+This returns the given number formatted nicely according to
+this language's conventions.  Maketext's default method is
+mostly to just take the normal string form of the number
+(applying sprintf "%G" for only very large numbers), and then
+to add commas as necessary.  (Except that
+we apply C<tr/,./.,/> if $language->{'numf_comma'} is true;
+that's a bit of a hack that's useful for languages that express
+two million as "2.000.000" and not as "2,000,000").
+
+If you want anything fancier, consider overriding this with something
+that uses L<Number::Format|Number::Format>, or does something else
+entirely.
+
+Note that numf is called by quant for stringifying all quantifying
+numbers.
+
+=item $language->sprintf($format, @items)
+
+This is just a wrapper around Perl's normal C<sprintf> function.
+It's provided so that you can use "sprintf" in Bracket Notation:
+
+     "Couldn't access datanode [sprintf,%10x=~[%s~],_1,_2]!\n"
+
+returning...
+
+     Couldn't access datanode      Stuff=[thangamabob]!
+
+=item $language->language_tag()
+
+Currently this just takes the last bit of C<ref($language)>, turns
+underscores to dashes, and returns it.  So if $language is
+an object of class Hee::HOO::Haw::en_us, $language->language_tag()
+returns "en-us".  (Yes, the usual representation for that language
+tag is "en-US", but case is I<never> considered meaningful in
+language-tag comparison.)
+
+You may override this as you like; Maketext doesn't use it for
+anything.
+
+=item $language->encoding()
+
+Currently this isn't used for anything, but it's provided
+(with default value of
+C<(ref($language) && $language-E<gt>{'encoding'})) or "iso-8859-1">
+) as a sort of suggestion that it may be useful/necessary to
+associate encodings with your language handles (whether on a
+per-class or even per-handle basis.)
+
+=back
+
+=head2 Language Handle Attributes and Internals
+
+A language handle is a flyweight object -- i.e., it doesn't (necessarily)
+carry any data of interest, other than just being a member of
+whatever class it belongs to.
+
+A language handle is implemented as a blessed hash.  Subclasses of yours
+can store whatever data you want in the hash.  Currently the only hash
+entry used by any crucial Maketext method is "fail", so feel free to
+use anything else as you like.
+
+B<Remember: Don't be afraid to read the Maketext source if there's
+any point on which this documentation is unclear.>  This documentation
+is vastly longer than the module source itself.
+
+=over
+
+=back
+
+=head1 LANGUAGE CLASS HIERARCHIES
+
+These are Locale::Maketext's assumptions about the class
+hierarchy formed by all your language classes:
+
+=over
+
+=item *
+
+You must have a project base class, which you load, and
+which you then use as the first argument in
+the call to YourProjClass->get_handle(...).  It should derive
+(whether directly or indirectly) from Locale::Maketext.
+It B<doesn't matter> how you name this class, altho assuming this
+is the localization component of your Super Mega Program,
+good names for your project class might be
+SuperMegaProgram::Localization, SuperMegaProgram::L10N,
+SuperMegaProgram::I18N, SuperMegaProgram::International,
+or even SuperMegaProgram::Languages or SuperMegaProgram::Messages.
+
+=item *
+
+Language classes are what YourProjClass->get_handle will try to load.
+It will look for them by taking each language-tag (B<skipping> it
+if it doesn't look like a language-tag or locale-tag!), turning it to
+all lowercase, turning and dashes to underscores, and appending it
+to YourProjClass . "::".  So this:
+
+  $lh = YourProjClass->get_handle(
+    'en-US', 'fr', 'kon', 'i-klingon', 'i-klingon-romanized'
+  );
+
+will try loading the classes 
+YourProjClass::en_us (note lowercase!), YourProjClass::fr, 
+YourProjClass::kon,
+YourProjClass::i_klingon
+and YourProjClass::i_klingon_romanized.  (And it'll stop at the
+first one that actually loads.)
+
+=item *
+
+I assume that each language class derives (directly or indirectly)
+from your project class, and also defines its @ISA, its %Lexicon,
+or both.  But I anticipate no dire consequences if these assumptions
+do not hold.
+
+=item *
+
+Language classes may derive from other language classes (altho they
+should have "use I<Thatclassname>" or "use base qw(I<...classes...>)").
+They may derive from the project
+class.  They may derive from some other class altogether.  Or via
+multiple inheritance, it may derive from any mixture of these.
+
+=item *
+
+I foresee no problems with having multiple inheritance in
+your hierarchy of language classes.  (As usual, however, Perl will
+complain bitterly if you have a cycle in the hierarchy: i.e., if
+any class is its own ancestor.)
+
+=back
+
+=head1 ENTRIES IN EACH LEXICON
+
+A typical %Lexicon entry is meant to signify a phrase,
+taking some number (0 or more) of parameters.  An entry
+is meant to be accessed by via
+a string I<key> in $lh->maketext(I<key>, ...parameters...),
+which should return a string that is generally meant for
+be used for "output" to the user -- regardless of whether
+this actually means printing to STDOUT, writing to a file,
+or putting into a GUI widget.
+
+While the key must be a string value (since that's a basic
+restriction that Perl places on hash keys), the value in
+the lexicon can currenly be of several types:
+a defined scalar, scalarref, or coderef.  The use of these is
+explained above, in the section 'The "maketext" Method', and
+Bracket Notation for strings is discussed in the next section.
+
+While you can use arbitrary unique IDs for lexicon keys
+(like "_min_larger_max_error"), it is often
+useful for if an entry's key is itself a valid value, like
+this example error message:
+
+  "Minimum ([_1]) is larger than maximum ([_2])!\n",
+
+Compare this code that uses an arbitrary ID...
+
+  die $lh->maketext( "_min_larger_max_error", $min, $max )
+   if $min > $max;
+
+...to this code that uses a key-as-value:
+
+  die $lh->maketext(
+   "Minimum ([_1]) is larger than maximum ([_2])!\n",
+   $min, $max
+  ) if $min > $max;
+
+The second is, in short, more readable.  In particular, it's obvious
+that the number of parameters you're feeding to that phrase (two) is
+the number of parameters that it I<wants> to be fed.  (Since you see
+_1 and a _2 being used in the key there.)
+
+Also, once a project is otherwise
+complete and you start to localize it, you can scrape together
+all the various keys you use, and pass it to a translator; and then
+the translator's work will go faster if what he's presented is this:
+
+ "Minimum ([_1]) is larger than maximum ([_2])!\n",
+  => "",   # fill in something here, Jacques!
+
+rather than this more cryptic mess:
+
+ "_min_larger_max_error"
+  => "",   # fill in something here, Jacques
+
+I think that keys as lexicon values makes the completed lexicon
+entries more readable:
+
+ "Minimum ([_1]) is larger than maximum ([_2])!\n",
+  => "Le minimum ([_1]) est plus grand que le maximum ([_2])!\n",
+
+Also, having valid values as keys becomes very useful if you set
+up an _AUTO lexicon.  _AUTO lexicons are discussed in a later
+section.
+
+I almost always use keys that are themselves
+valid lexicon values.  One notable exception is when the value is
+quite long.  For example, to get the screenful of data that
+a command-line program might returns when given an unknown switch,
+I often just use a key "_USAGE_MESSAGE".  At that point I then go
+and immediately to define that lexicon entry in the
+ProjectClass::L10N::en lexicon (since English is always my "project
+lanuage"):
+
+  '_USAGE_MESSAGE' => <<'EOSTUFF',
+  ...long long message...
+  EOSTUFF
+
+and then I can use it as:
+
+  getopt('oDI', \%opts) or die $lh->maketext('_USAGE_MESSAGE');
+
+Incidentally,
+note that each class's C<%Lexicon> inherits-and-extends
+the lexicons in its superclasses.  This is not because these are
+special hashes I<per se>, but because you access them via the
+C<maketext> method, which looks for entries across all the
+C<%Lexicon>'s in a language class I<and> all its ancestor classes.
+(This is because the idea of "class data" isn't directly implemented
+in Perl, but is instead left to individual class-systems to implement
+as they see fit..)
+
+Note that you may have things stored in a lexicon
+besides just phrases for output:  for example, if your program
+takes input from the keyboard, asking a "(Y/N)" question,
+you probably need to know what equivalent of "Y[es]/N[o]" is
+in whatever language.  You probably also need to know what
+the equivalents of the answers "y" and "n" are.  You can
+store that information in the lexicon (say, under the keys
+"~answer_y" and "~answer_n", and the long forms as
+"~answer_yes" and "~answer_no", where "~" is just an ad-hoc
+character meant to indicate to programmers/translators that
+these are not phrases for output).
+
+Or instead of storing this in the language class's lexicon,
+you can (and, in some cases, really should) represent the same bit
+of knowledge as code is a method in the language class.  (That
+leaves a tidy distinction between the lexicon as the things we
+know how to I<say>, and the rest of the things in the lexicon class
+as things that we know how to I<do>.)  Consider
+this example of a processor for responses to French "oui/non"
+questions:
+
+  sub y_or_n {
+    return undef unless defined $_[1] and length $_[1];
+    my $answer = lc $_[1];  # smash case
+    return 1 if $answer eq 'o' or $answer eq 'oui';
+    return 0 if $answer eq 'n' or $answer eq 'non';
+    return undef;
+  }
+
+...which you'd then call in a construct like this:
+
+  my $response;
+  until(defined $response) {
+    print $lh->maketext("Open the pod bay door (y/n)? ");
+    $response = $lh->y_or_n( get_input_from_keyboard_somehow() );
+  }
+  if($response) { $pod_bay_door->open()         }
+  else          { $pod_bay_door->leave_closed() }
+
+Other data worth storing in a lexicon might be things like
+filenames for language-targetted resources:
+
+  ...
+  "_main_splash_png"
+    => "/styles/en_us/main_splash.png",
+  "_main_splash_imagemap"
+    => "/styles/en_us/main_splash.incl",
+  "_general_graphics_path"
+    => "/styles/en_us/",
+  "_alert_sound"
+    => "/styles/en_us/hey_there.wav",
+  "_forward_icon"
+   => "left_arrow.png",
+  "_backward_icon"
+   => "right_arrow.png",
+  # In some other languages, left equals
+  #  BACKwards, and right is FOREwards.
+  ...
+
+You might want to do the same thing for expressing key bindings
+or the like (since hardwiring "q" as the binding for the function
+that quits a screen/menu/program is useful only if your language
+happens to associate "q" with "quit"!)
+
+=head1 BRACKET NOTATION
+
+Bracket Notation is a crucial feature of Locale::Maketext.  I mean
+Bracket Notation to provide a replacement for sprintf formatting.
+Everything you do with Bracket Notation could be done with a sub block,
+but bracket notation is meant to be much more concise.
+
+Bracket Notation is a like a miniature "template" system (in the sense
+of L<Text::Template|Text::Template>, not in the sense of C++ templates),
+where normal text is passed thru basically as is, but text is special
+regions is specially interpreted.  In Bracket Notation, you use brackets
+("[...]" -- not "{...}"!) to note sections that are specially interpreted.
+
+For example, here all the areas that are taken literally are underlined with
+a "^", and all the in-bracket special regions are underlined with an X:
+
+  "Minimum ([_1]) is larger than maximum ([_2])!\n",
+   ^^^^^^^^^ XX ^^^^^^^^^^^^^^^^^^^^^^^^^^ XX ^^^^
+
+When that string is compiled from bracket notation into a real Perl sub,
+it's basically turned into:
+
+  sub {
+    my $lh = $_[0];
+    my @params = @_;
+    return join '',
+      "Minimum (",
+      ...some code here...
+      ") is larger than maximum (",
+      ...some code here...
+      ")!\n",
+  }
+  # to be called by $lh->maketext(KEY, params...)
+   
+In other words, text outside bracket groups is turned into string
+literals.  Text in brackets is rather more complex, and currently follows
+these rules:
+
+=over
+
+=item *
+
+Bracket groups that are empty, or which consist only of whitespace,
+are ignored.  (Examples: "[]", "[    ]", or a [ and a ] with returns
+and/or tabs and/or spaces between them.
+
+Otherwise, each group is taken to be a comma-separated group of items,
+and each item is interpreted as follows:
+
+=item *
+
+An item that is "_I<digits>" or "_-I<digits>" is interpreted as
+$_[I<value>].  I.e., "_1" is becomes with $_[1], and "_-3" is interpreted
+as $_[-3] (in which case @_ should have at least three elements in it).
+Note that $_[0] is the language handle, and is typically not named
+directly.
+
+=item *
+
+An item "_*" is interpreted to mean "all of @_ except $_[0]".
+I.e., C<@_[1..$#_]>.  Note that this is an empty list in the case
+of calls like $lh->maketext(I<key>) where there are no
+parameters (except $_[0], the language handle).
+
+=item *
+
+Otherwise, each item is interpreted as a string literal.
+
+=back
+
+The group as a whole is interpreted as follows:
+
+=over
+
+=item *
+
+If the first item in a bracket group looks like a method name,
+then that group is interpreted like this:
+
+  $lh->that_method_name(
+    ...rest of items in this group...
+  ),
+
+=item *
+
+If the first item in a bracket group is empty-string, or "_*"
+or "_I<digits>" or "_-I<digits>", then that group is interpreted
+as just the interpolation of all its items:
+
+  join('',
+    ...rest of items in this group...
+  ),
+
+Examples:  "[_1]" and "[,_1]", which are synonymous; and
+"[,ID-(,_4,-,_2,)]", which compiles as
+C<join "", "ID-(", $_[4], "-", $_[2], ")">.
+
+=item *
+
+Otherwise this bracket group is invalid.  For example, in the group
+"[!@#,whatever]", the first item C<"!@#"> is neither empty-string,
+"_I<number>", "_-I<number>", "_*", nor a valid method name; and so
+Locale::Maketext will throw an exception of you try compiling an
+expression containing this bracket group.
+
+=back
+
+Note, incidentally, that items in each group are comma-separated,
+not C</\s*,\s*/>-separated.  That is, you might expect that this
+bracket group:
+
+  "Hoohah [foo, _1 , bar ,baz]!"
+
+would compile to this:
+
+  sub {
+    my $lh = $_[0];
+    return join '',
+      "Hoohah ",
+      $lh->foo( $_[1], "bar", "baz"),
+      "!",
+  }
+
+But it actually compiles as this:
+
+  sub {
+    my $lh = $_[0];
+    return join '',
+      "Hoohah ",
+      $lh->foo(" _1 ", " bar ", "baz"),  #!!!
+      "!",
+  }
+
+In the notation discussed so far, the characters "[" and "]" are given
+special meaning, for opening and closing bracket groups, and "," has
+a special meaning inside bracket groups, where it separates items in the
+group.  This begs the question of how you'd express a literal "[" or
+"]" in a Bracket Notation string, and how you'd express a literal
+comma inside a bracket group.  For this purpose I've adopted "~" (tilde)
+as an escape character:  "~[" means a literal '[' character anywhere
+in Bracket Notation (i.e., regardless of whether you're in a bracket
+group or not), and ditto for "~]" meaning a literal ']', and "~," meaning
+a literal comma.  (Altho "," means a literal comma outside of
+bracket groups -- it's only inside bracket groups that commas are special.)
+
+And on the off chance you need a literal tilde in a bracket expression,
+you get it with "~~".
+
+Currently, an unescaped "~" before a character
+other than a bracket or a comma is taken to mean just a "~" and that
+charecter.  I.e., "~X" means the same as "~~X" -- i.e., one literal tilde,
+and then one literal "X".  However, by using "~X", you are assuming that
+no future version of Maketext will use "~X" as a magic escape sequence.
+In practice this is not a great problem, since first off you can just
+write "~~X" and not worry about it; second off, I doubt I'll add lots
+of new magic characters to bracket notation; and third off, you
+aren't likely to want literal "~" characters in your messages anyway,
+since it's not a character with wide use in natural language text.
+
+Brackets must be balanced -- every openbracket must have
+one matching closebracket, and vice versa.  So these are all B<invalid>:
+
+  "I ate [quant,_1,rhubarb pie."
+  "I ate [quant,_1,rhubarb pie[."
+  "I ate quant,_1,rhubarb pie]."
+  "I ate quant,_1,rhubarb pie[."
+
+Currently, bracket groups do not nest.  That is, you B<cannot> say:
+
+  "Foo [bar,baz,[quux,quuux]]\n";
+
+If you need a notation that's that powerful, use normal Perl:
+
+  %Lexicon = (
+    ...
+    "some_key" => sub {
+      my $lh = $_[0];
+      join '',
+        "Foo ",
+        $lh->bar('baz', $lh->quux('quuux')),
+        "\n",
+    },
+    ...
+  );
+
+Or write the "bar" method so you don't need to pass it the
+output from calling quux.
+
+I do not anticipate that you will need (or particularly want)
+to nest bracket groups, but you are welcome to email me with
+convincing (real-life) arguments to the contrary.
+
+=head1 AUTO LEXICONS
+
+If maketext goes to look in an individual %Lexicon for an entry
+for I<key> (where I<key> does not start with an underscore), and
+sees none, B<but does see> an entry of "_AUTO" => I<some_true_value>,
+then we actually define $Lexicon{I<key>} = I<key> right then and there,
+and then use that value as if it had been there all
+along.  This happens before we even look in any superclass %Lexicons!
+
+(This is meant to be somewhat like the AUTOLOAD mechanism in
+Perl's function call system -- or, looked at another way,
+like the L<AutoLoader|AutoLoader> module.)
+
+I can picture all sorts of circumstances where you just
+do not want lookup to be able to fail (since failing
+normally means that maketext throws a C<die>, altho
+see the next section for greater control over that).  But
+here's one circumstance where _AUTO lexicons are meant to
+be I<especially> useful:
+
+As you're writing an application, you decide as you go what messages
+you need to emit.  Normally you'd go to write this:
+
+  if(-e $filename) {
+    go_process_file($filename)
+  } else {
+    print "Couldn't find file \"$filename\"!\n";
+  }
+
+but since you anticipate localizing this, you write:
+
+  use ThisProject::I18N;
+  my $lh = ThisProject::I18N->get_handle();
+   # For the moment, assume that things are set up so
+   # that we load class ThisProject::I18N::en
+   # and that that's the class that $lh belongs to.
+  ...
+  if(-e $filename) {
+    go_process_file($filename)
+  } else {
+    print $lh->maketext(
+      "Couldn't find file \"[_1]\"!\n", $filename
+    );
+  }
+
+Now, right after you've just written the above lines, you'd
+normally have to go open the file 
+ThisProject/I18N/en.pm, and immediately add an entry:
+
+  "Couldn't find file \"[_1]\"!\n"
+  => "Couldn't find file \"[_1]\"!\n",
+
+But I consider that somewhat of a distraction from the work
+of getting the main code working -- to say nothing of the fact
+that I often have to play with the program a few times before
+I can decide exactly what wording I want in the messages (which
+in this case would require me to go changing three lines of code:
+the call to maketext with that key, and then the two lines in
+ThisProject/I18N/en.pm).
+
+However, if you set "_AUTO => 1" in the %Lexicon in,
+ThisProject/I18N/en.pm (assuming that English (en) is
+the language that all your programmers will be using for this
+project's internal message keys), then you don't ever have to
+go adding lines like this
+
+  "Couldn't find file \"[_1]\"!\n"
+  => "Couldn't find file \"[_1]\"!\n",
+
+to ThisProject/I18N/en.pm, because if _AUTO is true there,
+then just looking for an entry with the key "Couldn't find
+file \"[_1]\"!\n" in that lexicon will cause it to be added,
+with that value!
+
+Note that the reason that keys that start with "_"
+are immune to _AUTO isn't anything generally magical about
+the underscore character -- I just wanted a way to have most
+lexicon keys be autoable, except for possibly a few, and I
+arbitrarily decided to use a leading underscore as a signal
+to distinguish those few.
+
+=head1 CONTROLLING LOOKUP FAILURE
+
+If you call $lh->maketext(I<key>, ...parameters...),
+and there's no entry I<key> in $lh's class's %Lexicon, nor
+in the superclass %Lexicon hash, I<and> if we can't auto-make
+I<key> (because either it starts with a "_", or because none
+of its lexicons have C<_AUTO =E<gt> 1,>), then we have
+failed to find a normal way to maketext I<key>.  What then
+happens in these failure conditions, depends on the $lh object
+"fail" attribute.
+
+If the language handle has no "fail" attribute, maketext
+will simply throw an exception (i.e., it calls C<die>, mentioning
+the I<key> whose lookup failed, and naming the line number where
+the calling $lh->maketext(I<key>,...) was.
+
+If the language handle has a "fail" attribute whose value is a
+coderef, then $lh->maketext(I<key>,...params...) gives up and calls:
+
+  return &{$that_subref}($lh, $key, @params);
+
+Otherwise, the "fail" attribute's value should be a string denoting
+a method name, so that $lh->maketext(I<key>,...params...) can
+give up with:
+
+  return $lh->$that_method_name($phrase, @params);
+
+The "fail" attribute can be accessed with the C<fail_with> method:
+
+  # Set to a coderef:
+  $lh->fail_with( \&failure_handler );
+
+  # Set to a method name:
+  $lh->fail_with( 'failure_method' );
+  
+  # Set to nothing (i.e., so failure throws a plain exception)
+  $lh->fail_with( undef );
+  
+  # Simply read:
+  $handler = $lh->fail_with();
+
+Now, as to what you may want to do with these handlers:  Maybe you'd
+want to log what key failed for what class, and then die.  Maybe
+you don't like C<die> and instead you want to send the error message
+to STDOUT (or wherever) and then merely C<exit()>.
+
+Or maybe you don't want to C<die> at all!  Maybe you could use a
+handler like this:
+
+  # Make all lookups fall back onto an English value,
+  #  but after we log it for later fingerpointing.
+  my $lh_backup = ThisProject->get_handle('en');
+  open(LEX_FAIL_LOG, ">>wherever/lex.log") || die "GNAARGH $!";
+  sub lex_fail {
+    my($failing_lh, $key, $params) = @_;
+    print LEX_FAIL_LOG scalar(localtime), "\t",
+       ref($failing_lh), "\t", $key, "\n";
+    return $lh_backup->maketext($key,@params);
+  }
+
+Some users have expressed that they think this whole mechanism of
+having a "fail" attribute at all, seems a rather pointless complication.
+But I want Locale::Maketext to be usable for software projects of I<any>
+scale and type; and different software projects have different ideas
+of what the right thing is to do in failure conditions.  I could simply
+say that failure always throws an exception, and that if you want to be
+careful, you'll just have to wrap every call to $lh->maketext in an
+S<eval { }>.  However, I want programmers to reserve the right (via
+the "fail" attribute) to treat lookup failure as something other than
+an exception of the same level of severity as a config file being
+unreadable, or some essential resource being inaccessable.
+
+One possibly useful value for the "fail" attribute is the method name
+"failure_handler_auto".  This is a method defined in class
+Locale::Maketext itself.  You set it with:
+
+  $lh->fail_with('failure_handler_auto');
+
+Then when you call $lh->maketext(I<key>, ...parameters...) and
+there's no I<key> in any of those lexicons, maketext gives up with
+
+  return $lh->failure_handler_auto($key, @params);
+
+But failure_handler_auto, instead of dying or anything, compiles
+$key, caching it in $lh->{'failure_lex'}{$key} = $complied,
+and then calls the compiled value, and returns that.  (I.e., if
+$key looks like bracket notation, $compiled is a sub, and we return
+&{$compiled}(@params); but if $key is just a plain string, we just
+return that.)
+
+The effect of using "failure_auto_handler"
+is like an AUTO lexicon, except that it 1) compiles $key even if
+it starts with "_", and 2) you have a record in the new hashref
+$lh->{'failure_lex'} of all the keys that have failed for
+this object.  This should avoid your program dying -- as long
+as your keys aren't actually invalid as bracket code, and as
+long as they don't try calling methods that don't exist.
+
+"failure_auto_handler" may not be exactly what you want, but I
+hope it at least shows you that maketext failure can be mitigated
+in any number of very flexible ways.  If you can formalize exactly
+what you want, you should be able to express that as a failure
+handler.  You can even make it default for every object of a given
+class, by setting it in that class's init:
+
+  sub init {
+    my $lh = $_[0];  # a newborn handle
+    $lh->SUPER::init();
+    $lh->fail_with('my_clever_failure_handler');
+    return;
+  }
+  sub my_clever_failure_handler {
+    ...you clever things here...
+  }
+
+=head1 HOW TO USE MAKETEXT
+
+Here is a brief checklist on how to use Maketext to localize
+applications:
+
+=over
+
+=item *
+
+Decide what system you'll use for lexicon keys.  If you insist,
+you can use opaque IDs (if you're nostalgic for C<catgets>),
+but I have better suggestions in the
+section "Entries in Each Lexicon", above.  Assuming you opt for
+meaningful keys that double as values (like "Minimum ([_1]) is
+larger than maximum ([_2])!\n"), you'll have to settle on what
+language those should be in.  For the sake of argument, I'll
+call this English, specifically American English, "en-US".
+
+=item *
+
+Create a class for your localization project.  This is
+the name of the class that you'll use in the idiom:
+
+  use Projname::L10N;
+  my $lh = Projname::L10N->get_handle(...) || die "Language?";
+
+Assuming your call your class Projname::L10N, create a class
+consisting minimally of:
+
+  package Projname::L10N;
+  use base qw(Locale::Maketext);
+  ...any methods you might want all your languages to share...
+  
+  # And, assuming you want the base class to be an _AUTO lexicon,
+  # as is discussed a few sections up:
+  
+  1;
+
+=item *
+
+Create a class for the language your internal keys are in.  Name
+the class after the language-tag for that language, in lowercase,
+with dashes changed to underscores.  Assuming your project's first
+language is US English, you should call this Projname::L10N::en_us.
+It should consist minimally of:
+
+  package Projname::L10N::en_us;
+  use base qw(Projname::L10N);
+  %Lexicon = (
+    '_AUTO' => 1,
+  );
+  1;
+
+(For the rest of this section, I'll assume that this "first
+language class" of Projname::L10N::en_us has
+_AUTO lexicon.)
+
+=item *
+
+Go and write your program.  Everywhere in your program where 
+you would say:
+
+  print "Foobar $thing stuff\n";
+
+instead do it thru maketext, using no variable interpolation in
+the key:
+
+  print $lh->maketext("Foobar [_1] stuff\n", $thing);
+
+If you get tired of constantly saying C<print $lh-E<gt>maketext>,
+consider making a functional wrapper for it, like so:
+
+  use Projname::L10N;
+  use vars qw($lh);
+  $lh = Projname::L10N->get_handle(...) || die "Language?";
+  sub pmt (@) { print( $lh->maketext(@_)) }
+   # "pmt" is short for "Print MakeText"
+  $Carp::Verbose = 1;
+   # so if maketext fails, we see made the call to pmt
+
+Besides whole phrases meant for output, anything language-dependent
+should be put into the class Projname::L10N::en_us,
+whether as methods, or as lexicon entries -- this is discussed
+in the section "Entries in Each Lexicon", above.
+
+=item *
+
+Once the program is otherwise done, and once its localization for
+the first language works right (via the data and methods in
+Projname::L10N::en_us), you can get together the data for translation.
+If your first language lexicon isn't an _AUTO lexicon, then you already
+have all the messages explicitly in the lexicon (or else you'd be
+getting exceptions thrown when you call $lh->maketext to get
+messages that aren't in there).  But if you were (advisedly) lazy and are
+using an _AUTO lexicon, then you've got to make a list of all the phrases
+that you've so far been letting _AUTO generate for you.  There are very
+many ways to assemble such a list.  The most straightforward is to simply
+grep the source for every occurrence of "maketext" (or calls
+to wrappers around it, like the above C<pmt> function), and to log the
+following phrase.
+
+=item *
+
+You may at this point want to consider whether the your base class 
+(Projname::L10N) that all lexicons inherit from (Projname::L10N::en,
+Projname::L10N::es, etc.) should be an _AUTO lexicon.  It may be true
+that in theory, all needed messages will be in each language class;
+but in the presumably unlikely or "impossible" case of lookup failure,
+you should consider whether your program should throw an exception,
+emit text in English (or whatever your project's first language is),
+or some more complex solution as described in the section
+"Controlling Lookup Failure", above.
+
+=item *
+
+Submit all messages/phrases/etc. to translators.
+
+(You may, in fact, want to start with localizing to I<one> other language
+at first, if you're not sure that you've property abstracted the
+language-dependent parts of your code.)
+
+Translators may request clarification of the situation in which a
+particular phrase is found.  For example, in English we are entirely happy
+saying "I<n> files found", regardless of whether we mean "I looked for files,
+and found I<n> of them" or the rather distinct situation of "I looked for
+something else (like lines in files), and along the way I saw I<n>
+files."  This may involve rethinking things that you thought quite clear:
+should "Edit" on a toolbar be a noun ("editing") or a verb ("to edit")?  Is
+there already a conventionalized way to express that menu option, separate
+from the target language's normal word for "to edit"?
+
+In all cases where the very common phenomenon of quantification
+(saying "I<N> files", for B<any> value of N)
+is involved, each translator should make clear what dependencies the
+number causes in the sentence.  In many cases, dependency is
+limited to words adjacent to the number, in places where you might
+expect them ("I found the-?PLURAL I<N>
+empty-?PLURAL directory-?PLURAL"), but in some cases there are
+unexpected dependencies ("I found-?PLURAL ..."!) as well as long-distance
+dependencies "The I<N> directory-?PLURAL could not be deleted-?PLURAL"!).
+
+Remind the translators to consider the case where N is 0:
+"0 files found" isn't exactly natural-sounding in any language, but it
+may be unacceptable in many -- or it may condition special
+kinds of agreement (similar to English "I didN'T find ANY files").
+
+Remember to ask your translators about numeral formatting in their
+language, so that you can override the C<numf> method as
+appropriate.  Typical variables in number formatting are:  what to
+use as a decimal point (comma? period?); what to use as a thousands
+separator (space? nonbreakinng space? comma? period? small
+middot? prime? apostrophe?); and even whether the so-called "thousands
+separator" is actually for every third digit -- I've heard reports of
+two hundred thousand being expressable as "2,00,000" for some Indian
+(Subcontinental) languages, besides the less surprising "S<200 000>",
+"200.000", "200,000", and "200'000".  Also, using a set of numeral
+glyphs other than the usual ASCII "0"-"9" might be appreciated, as via
+C<tr/0-9/\x{0966}-\x{096F}/> for getting digits in Devanagari script
+(for Hindi, Konkani, others).
+
+The basic C<quant> method that Locale::Maketext provides should be
+good for many languages.  For some languages, it might be useful
+to modify it (or its constituent C<numerate> method)
+to take a plural form in the two-argument call to C<quant>
+(as in "[quant,_1,files]") if
+it's all-around easier to infer the singular form from the plural, than
+to infer the plural form from the singular.
+
+But for other languages (as is discussed at length
+in L<Locale::Maketext::TPJ13|Locale::Maketext::TPJ13>), simple
+C<quant>/C<numerify> is not enough.  For the particularly problematic
+Slavic languages, what you may need is a method which you provide
+with the number, the citation form of the noun to quantify, and
+the case and gender that the sentence's syntax projects onto that
+noun slot.  The method would then be responsible for determining
+what grammatical number that numeral projects onto its noun phrase,
+and what case and gender it may override the normal case and gender
+with; and then it would look up the noun in a lexicon providing
+all needed inflected forms.
+
+=item *
+
+You may also wish to discuss with the translators the question of
+how to relate different subforms of the same language tag,
+considering how this reacts with C<get_handle>'s treatment of
+these.  For example, if a user accepts interfaces in "en, fr", and
+you have interfaces available in "en-US" and "fr", what should
+they get?  You may wish to resolve this by establishing that "en"
+and "en-US" are effectively synonymous, by having one class
+zero-derive from the other.
+
+For some languages this issue may never come up (Danish is rarely
+expressed as "da-DK", but instead is just "da").  And for other
+languages, the whole concept of a "generic" form may verge on
+being uselessly vague, particularly for interfaces involving voice
+media in forms of Arabic or Chinese.
+
+=item *
+
+Once you've localized your program/site/etc. for all desired
+languages, be sure to show the result (whether live, or via
+screenshots) to the translators.  Once they approve, make every
+effort to have it then checked by at least one other speaker of
+that language.  This holds true even when (or especially when) the
+translation is done by one of your own programmers.  Some
+kinds of systems may be harder to find testers for than others,
+depending on the amount of domain-specific jargon and concepts
+involved -- it's easier to find people who can tell you whether
+they approve of your translation for "delete this message" in an
+email-via-Web interface, than to find people who can give you
+an informed opinion on your translation for "attribute value"
+in an XML query tool's interface.
+
+=back
+
+=head1 SEE ALSO
+
+I recommend reading all of these:
+
+L<Locale::Maketext::TPJ13|Locale::Maketext::TPJ13> -- my I<The Perl
+Journal> article about Maketext.  It explains many important concepts
+underlying Locale::Maketext's design, and some insight into why
+Maketext is better than the plain old approach of just having 
+message catalogs that are just databases of sprintf formats.
+
+L<File::Findgrep|File::Findgrep> is a sample application/module
+that uses Locale::Maketext to localize its messages.
+
+L<I18N::LangTags|I18N::LangTags>.
+
+L<Win32::Locale|Win32::Locale>.
+
+RFC 3066, I<Tags for the Identification of Languages>,
+as at http://sunsite.dk/RFC/rfc/rfc3066.html
+
+RFC 2277, I<IETF Policy on Character Sets and Languages>
+is at http://sunsite.dk/RFC/rfc/rfc2277.html -- much of it is
+just things of interest to protocol designers, but it explains
+some basic concepts, like the distinction between locales and
+language-tags.
+
+The manual for GNU C<gettext>.  The gettext dist is available in
+C<ftp://prep.ai.mit.edu/pub/gnu/> -- get
+a recent gettext tarball and look in its "doc/" directory, there's
+an easily browsable HTML version in there.  The
+gettext documentation asks lots of questions worth thinking
+about, even if some of their answers are sometimes wonky,
+particularly where they start talking about pluralization.
+
+The Locale/Maketext.pm source.  Obverse that the module is much
+shorter than its documentation!
+
+=head1 COPYRIGHT AND DISCLAIMER
+
+Copyright (c) 1999-2001 Sean M. Burke.  All rights reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+This program is distributed in the hope that it will be useful, but
+without any warranty; without even the implied warranty of
+merchantability or fitness for a particular purpose.
+
+=head1 AUTHOR
+
+Sean M. Burke C<sburke@cpan.org>
+
+=cut
+
+# Zing!
diff --git a/lib/Locale/Maketext/TPJ13.pod b/lib/Locale/Maketext/TPJ13.pod
new file mode 100644
index 0000000000..db22478215
--- /dev/null
+++ b/lib/Locale/Maketext/TPJ13.pod
@@ -0,0 +1,776 @@
+
+# This document contains text in Perl "POD" format.
+# Use a POD viewer like perldoc or perlman to render it.
+
+=head1 NAME
+
+Locale::Maketext::TPJ13 -- article about software localization
+
+=head1 SYNOPSIS
+
+  # This an article, not a module.
+
+=head1 DESCRIPTION
+
+The following article by Sean M. Burke and Jordan Lachler
+first appeared in I<The Perl
+Journal> #13 and is copyright 1999 The Perl Journal. It appears
+courtesy of Jon Orwant and The Perl Journal.  This document may be
+distributed under the same terms as Perl itself.
+
+=head1 Localization and Perl: gettext breaks, Maketext fixes
+
+by Sean M. Burke and Jordan Lachler
+
+This article points out cases where gettext (a common system for
+localizing software interfaces -- i.e., making them work in the user's
+language of choice) fails because of basic differences between human
+languages.  This article then describes Maketext, a new system capable
+of correctly treating these differences.
+
+=head2 A Localization Horror Story: It Could Happen To You
+
+=over
+
+"There are a number of languages spoken by human beings in this
+world."
+
+-- Harald Tveit Alvestrand, in RFC 1766, "Tags for the
+Identification of Languages"
+
+=back
+
+Imagine that your task for the day is to localize a piece of software
+-- and luckily for you, the only output the program emits is two
+messages, like this:
+
+  I scanned 12 directories.
+
+  Your query matched 10 files in 4 directories.
+
+So how hard could that be?  You look at the code that produces
+produces the first item, and it reads:
+
+  printf("I scanned %g directories.",
+         $directory_count);
+
+You think about that, and realize that it doesn't even work right for
+English, as it can produce this output:
+
+  I scanned 1 directories.
+
+So you rewrite it to read:
+
+  printf("I scanned %g %s.",
+         $directory_count,
+         $directory_count == 1 ?
+           "directory" : "directories",
+  );
+
+...which does the Right Thing.  (In case you don't recall, "%g" is for
+locale-specific number interpolation, and "%s" is for string
+interpolation.)
+
+But you still have to localize it for all the languages you're
+producing this software for, so you pull Locale::gettext off of CPAN
+so you can access the C<gettext> C functions you've heard are standard
+for localization tasks.
+
+And you write:
+
+  printf(gettext("I scanned %g %s."),
+         $dir_scan_count,
+         $dir_scan_count == 1 ?
+           gettext("directory") : gettext("directory"),
+  );
+
+But you then read in the gettext manual (Drepper, Miller, and Pinard 1995)
+that this is not a good idea, since how a single word like "directory"
+or "directories" is translated may depend on context -- and this is
+true, since in a case language like German or Russian, you'd may need
+these words with a different case ending in the first instance (where the
+word is the object of a verb) than in the second instance, which you haven't even
+gotten to yet (where the word is the object of a preposition, "in %g
+directories") -- assuming these keep the same syntax when translated
+into those languages.
+
+So, on the advice of the gettext manual, you rewrite:
+
+  printf( $dir_scan_count == 1 ?
+           gettext("I scanned %g directory.") :
+           gettext("I scanned %g directories."),
+         $dir_scan_count );
+
+So, you email your various translators (the boss decides that the
+languages du jour are Chinese, Arabic, Russian, and Italian, so you
+have one translator for each), asking for translations for "I scanned
+%g directory." and "I scanned %g directories.".  When they reply,
+you'll put that in the lexicons for gettext to use when it localizes
+your software, so that when the user is running under the "zh"
+(Chinese) locale, gettext("I scanned %g directory.") will return the
+appropriate Chinese text, with a "%g" in there where printf can then
+interpolate $dir_scan.
+
+Your Chinese translator emails right back -- he says both of these
+phrases translate to the same thing in Chinese, because, in linguistic
+jargon, Chinese "doesn't have number as a grammatical category" --
+whereas English does.  That is, English has grammatical rules that
+refer to "number", i.e., whether something is grammatically singular
+or plural; and one of these rules is the one that forces nouns to take
+a plural suffix (generally "s") when in a plural context, as they are when
+they follow a number other than "one" (including, oddly enough, "zero").
+Chinese has no such rules, and so has just the one phrase where English
+has two.  But, no problem, you can have this one Chinese phrase appear
+as the translation for the two English phrases in the "zh" gettext
+lexicon for your program.
+
+Emboldened by this, you dive into the second phrase that your software
+needs to output: "Your query matched 10 files in 4 directories.".  You notice
+that if you want to treat phrases as indivisible, as the gettext
+manual wisely advises, you need four cases now, instead of two, to
+cover the permutations of singular and plural on the two items,
+$dir_count and $file_count.  So you try this:
+
+  printf( $file_count == 1 ?
+    ( $directory_count == 1 ?
+     gettext("Your query matched %g file in %g directory.") :
+     gettext("Your query matched %g file in %g directories.") ) :
+    ( $directory_count == 1 ?
+     gettext("Your query matched %g files in %g directory.") :
+     gettext("Your query matched %g files in %g directories.") ),
+   $file_count, $directory_count,
+  );
+
+(The case of "1 file in 2 [or more] directories" could, I suppose,
+occur in the case of symlinking or something of the sort.)
+
+It occurs to you that this is not the prettiest code you've ever
+written, but this seems the way to go.  You mail off to the
+translators asking for translations for these four cases.  The
+Chinese guy replies with the one phrase that these all translate to in
+Chinese, and that phrase has two "%g"s in it, as it should -- but
+there's a problem.  He translates it word-for-word back: "To your
+question, in %g directories you would find %g answers."  The "%g"
+slots are in an order reverse to what they are in English.  You wonder
+how you'll get gettext to handle that.
+
+But you put it aside for the moment, and optimistically hope that the
+other translators won't have this problem, and that their languages
+will be better behaved -- i.e., that they will be just like English.
+
+But the Arabic translator is the next to write back.  First off, your
+code for "I scanned %g directory." or "I scanned %g directories."
+assumes there's only singular or plural.  But, to use linguistic
+jargon again, Arabic has grammatical number, like English (but unlike
+Chinese), but it's a three-term category: singular, dual, and plural.
+In other words, the way you say "directory" depends on whether there's
+one directory, or I<two> of them, or I<more than two> of them.  Your
+test of C<($directory == 1)> no longer does the job.  And it means
+that where English's grammatical category of number necessitates
+only the two permutations of the first sentence based on "directory
+[singular]" and "directories [plural]", Arabic has three -- and,
+worse, in the second sentence ("Your query matched %g file in %g
+directory."), where English has four, Arabic has nine.  You sense
+an unwelcome, exponential trend taking shape.
+
+Your Italian translator emails you back and says that "I searched 0
+directories" (a possible English output of your program) is stilted,
+and if you think that's fine English, that's your problem, but that
+I<just will not do> in the language of Dante.  He insists that where
+$directory_count is 0, your program should produce the Italian text
+for "I I<didn't> scan I<any> directories.".  And ditto for "I didn't
+match any files in any directories", although he says the last part
+about "in any directories" should probably just be left off.
+
+You wonder how you'll get gettext to handle this; to accomodate the
+ways Arabic, Chinese, and Italian deal with numbers in just these few
+very simple phrases, you need to write code that will ask gettext for
+different queries depending on whether the numerical values in
+question are 1, 2, more than 2, or in some cases 0, and you still haven't
+figured out the problem with the different word order in Chinese.
+
+Then your Russian translator calls on the phone, to I<personally> tell
+you the bad news about how really unpleasant your life is about to
+become:
+
+Russian, like German or Latin, is an inflectional language; that is, nouns
+and adjectives have to take endings that depend on their case
+(i.e., nominative, accusative, genitive, etc...) -- which is roughly a matter of
+what role they have in syntax of the sentence --
+as well as on the grammatical gender (i.e., masculine, feminine, neuter)
+and number (i.e., singular or plural) of the noun, as well as on the
+declension class of the noun.  But unlike with most other inflected languages,
+putting a number-phrase (like "ten" or "forty-three", or their Arabic
+numeral equivalents) in front of noun in Russian can change the case and
+number that noun is, and therefore the endings you have to put on it.
+
+He elaborates:  In "I scanned %g directories", you'd I<expect>
+"directories" to be in the accusative case (since it is the direct
+object in the sentnce) and the plural number,
+except where $directory_count is 1, then you'd expect the singular, of
+course.  Just like Latin or German.  I<But!>  Where $directory_count %
+10 is 1 ("%" for modulo, remember), assuming $directory count is an
+integer, and except where $directory_count % 100 is 11, "directories"
+is forced to become grammatically singular, which means it gets the
+ending for the accusative singular...  You begin to visualize the code
+it'd take to test for the problem so far, I<and still work for Chinese
+and Arabic and Italian>, and how many gettext items that'd take, but
+he keeps going...  But where $directory_count % 10 is 2, 3, or 4
+(except where $directory_count % 100 is 12, 13, or 14), the word for
+"directories" is forced to be genitive singular -- which means another
+ending... The room begins to spin around you, slowly at first...  But
+with I<all other> integer values, since "directory" is an inanimate
+noun, when preceded by a number and in the nominative or accusative
+cases (as it is here, just your luck!), it does stay plural, but it is
+forced into the genitive case -- yet another another ending...  And
+you never hear him get to the part about how you're going to run into
+similar (but maybe subtly different) problems with other Slavic
+languages like Polish, because the floor comes up to meet you, and you
+fade into unconsciousness.
+
+
+The above cautionary tale relates how an attempt at localization can
+lead from programmer consternation, to program obfuscation, to a need
+for sedation.  But careful evaluation shows that your choice of tools
+merely needed further consideration.
+
+=head2 The Linguistic View
+
+=over
+
+"It is more complicated than you think." 
+
+-- The Eighth Networking Truth, from RFC 1925
+
+=back
+
+The field of Linguistics has expended a great deal of effort over the
+past century trying to find grammatical patterns which hold across
+languages; it's been a constant process
+of people making generalizations that should apply to all languages,
+only to find out that, all too often, these generalizations fail --
+sometimes failing for just a few languages, sometimes whole classes of
+languages, and sometimes nearly every language in the world except
+English.  Broad statistical trends are evident in what the "average
+language" is like as far as what its rules can look like, must look
+like, and cannot look like.  But the "average language" is just as
+unreal a concept as the "average person" -- it runs up against the
+fact no language (or person) is, in fact, average.  The wisdom of past
+experience leads us to believe that any given language can do whatever
+it wants, in any order, with appeal to any kind of grammatical
+categories wants -- case, number, tense, real or metaphoric
+characteristics of the things that words refer to, arbitrary or
+predictable classifications of words based on what endings or prefixes
+they can take, degree or means of certainty about the truth of
+statements expressed, and so on, ad infinitum.
+
+Mercifully, most localization tasks are a matter of finding ways to
+translate whole phrases, generally sentences, where the context is
+relatively set, and where the only variation in content is I<usually>
+in a number being expressed -- as in the example sentences above.
+Translating specific, fully-formed sentences is, in practice, fairly
+foolproof -- which is good, because that's what's in the phrasebooks
+that so many tourists rely on.  Now, a given phrase (whether in a
+phrasebook or in a gettext lexicon) in one language I<might> have a
+greater or lesser applicability than that phrase's translation into
+another language -- for example, strictly speaking, in Arabic, the
+"your" in "Your query matched..." would take a different form
+depending on whether the user is male or female; so the Arabic
+translation "your[feminine] query" is applicable in fewer cases than
+the corresponding English phrase, which doesn't distinguish the user's
+gender.  (In practice, it's not feasable to have a program know the
+user's gender, so the masculine "you" in Arabic is usually used, by
+default.)
+
+But in general, such surprises are rare when entire sentences are
+being translated, especially when the functional context is restricted
+to that of a computer interacting with a user either to convey a fact
+or to prompt for a piece of information.  So, for purposes of
+localization, translation by phrase (generally by sentence) is both the
+simplest and the least problematic.
+
+=head2 Breaking gettext
+
+=over
+
+"It Has To Work."
+
+-- First Networking Truth, RFC 1925
+
+=back
+
+Consider that sentences in a tourist phrasebook are of two types: ones
+like "How do I get to the marketplace?" that don't have any blanks to
+fill in, and ones like "How much do these ___ cost?", where there's
+one or more blanks to fill in (and these are usually linked to a
+list of words that you can put in that blank: "fish", "potatoes",
+"tomatoes", etc.)  The ones with no blanks are no problem, but the
+fill-in-the-blank ones may not be really straightforward. If it's a
+Swahili phrasebook, for example, the authors probably didn't bother to
+tell you the complicated ways that the verb "cost" changes its
+inflectional prefix depending on the noun you're putting in the blank.
+The trader in the marketplace will still understand what you're saying if
+you say "how much do these potatoes cost?" with the wrong
+inflectional prefix on "cost".  After all, I<you> can't speak proper Swahili,
+I<you're> just a tourist.  But while tourists can be stupid, computers
+are supposed to be smart; the computer should be able to fill in the
+blank, and still have the results be grammatical.
+
+In other words, a phrasebook entry takes some values as parameters
+(the things that you fill in the blank or blanks), and provides a value
+based on these parameters, where the way you get that final value from
+the given values can, properly speaking, involve an arbitrarily
+complex series of operations.  (In the case of Chinese, it'd be not at
+all complex, at least in cases like the examples at the beginning of
+this article; whereas in the case of Russian it'd be a rather complex
+series of operations.  And in some languages, the
+complexity could be spread around differently: while the act of
+putting a number-expression in front of a noun phrase might not be
+complex by itself, it may change how you have to, for example, inflect
+a verb elsewhere in the sentence.  This is what in syntax is called
+"long-distance dependencies".)
+
+This talk of parameters and arbitrary complexity is just another way
+to say that an entry in a phrasebook is what in a programming language
+would be called a "function".  Just so you don't miss it, this is the
+crux of this article: I<A phrase is a function; a phrasebook is a
+bunch of functions.>
+
+The reason that using gettext runs into walls (as in the above
+second-person horror story) is that you're trying to use a string (or
+worse, a choice among a bunch of strings) to do what you really need a
+function for -- which is futile.  Preforming (s)printf interpolation
+on the strings which you get back from gettext does allow you to do I<some>
+common things passably well... sometimes... sort of; but, to paraphrase
+what some people say about C<csh> script programming, "it fools you
+into thinking you can use it for real things, but you can't, and you
+don't discover this until you've already spent too much time trying,
+and by then it's too late."
+
+=head2 Replacing gettext
+
+So, what needs to replace gettext is a system that supports lexicons
+of functions instead of lexicons of strings.  An entry in a lexicon
+from such a system should I<not> look like this:
+
+  "J'ai trouv\xE9 %g fichiers dans %g r\xE9pertoires"
+
+[\xE9 is e-acute in Latin-1.  Some pod renderers would
+scream if I used the actual character here. -- SB]
+
+but instead like this, bearing in mind that this is just a first stab:
+
+  sub I_found_X1_files_in_X2_directories {
+    my( $files, $dirs ) = @_[0,1];
+    $files = sprintf("%g %s", $files,
+      $files == 1 ? 'fichier' : 'fichiers');
+    $dirs = sprintf("%g %s", $dirs,
+      $dirs == 1 ? "r\xE9pertoire" : "r\xE9pertoires");
+    return "J'ai trouv\xE9 $files dans $dirs.";
+  }
+
+Now, there's no particularly obvious way to store anything but strings
+in a gettext lexicon; so it looks like we just have to start over and
+make something better, from scratch.  I call my shot at a
+gettext-replacement system "Maketext", or, in CPAN terms,
+Locale::Maketext.
+
+When designing Maketext, I chose to plan its main features in terms of
+"buzzword compliance".  And here are the buzzwords:
+
+=head2 Buzzwords: Abstraction and Encapsulation
+
+The complexity of the language you're trying to output a phrase in is
+entirely abstracted inside (and encapsulated within) the Maketext module
+for that interface.  When you call:
+
+  print $lang->maketext("You have [quant,_1,piece] of new mail.",
+                       scalar(@messages));
+
+you don't know (and in fact can't easily find out) whether this will
+involve lots of figuring, as in Russian (if $lang is a handle to the
+Russian module), or relatively little, as in Chinese.  That kind of
+abstraction and encapsulation may encourage other pleasant buzzwords
+like modularization and stratification, depending on what design
+decisions you make.
+
+=head2 Buzzword: Isomorphism
+
+"Isomorphism" means "having the same structure or form"; in discussions
+of program design, the word takes on the special, specific meaning that
+your implementation of a solution to a problem I<has the same
+structure> as, say, an informal verbal description of the solution, or
+maybe of the problem itself.  Isomorphism is, all things considered,
+a good thing -- it's what problem-solving (and solution-implementing)
+should look like.
+
+What's wrong the with gettext-using code like this...
+
+  printf( $file_count == 1 ?
+    ( $directory_count == 1 ?
+     "Your query matched %g file in %g directory." :
+     "Your query matched %g file in %g directories." ) :
+    ( $directory_count == 1 ?
+     "Your query matched %g files in %g directory." :
+     "Your query matched %g files in %g directories." ),
+   $file_count, $directory_count,
+  );
+
+is first off that it's not well abstracted -- these ways of testing
+for grammatical number (as in the expressions like C<foo == 1 ?
+singular_form : plural_form>) should be abstracted to each language
+module, since how you get grammatical number is language-specific.
+
+But second off, it's not isomorphic -- the "solution" (i.e., the
+phrasebook entries) for Chinese maps from these four English phrases to
+the one Chinese phrase that fits for all of them.  In other words, the
+informal solution would be "The way to say what you want in Chinese is
+with the one phrase 'For your question, in Y directories you would
+find X files'" -- and so the implemented solution should be,
+isomorphically, just a straightforward way to spit out that one
+phrase, with numerals properly interpolated.  It shouldn't have to map
+from the complexity of other languages to the simplicity of this one.
+
+=head2 Buzzword: Inheritance
+
+There's a great deal of reuse possible for sharing of phrases between
+modules for related dialects, or for sharing of auxiliary functions
+between related languages.  (By "auxiliary functions", I mean
+functions that don't produce phrase-text, but which, say, return an
+answer to "does this number require a plural noun after it?".  Such
+auxiliary functions would be used in the internal logic of functions
+that actually do produce phrase-text.)
+
+In the case of sharing phrases, consider that you have an interface
+already localized for American English (probably by having been
+written with that as the native locale, but that's incidental).
+Localizing it for UK English should, in practical terms, be just a
+matter of running it past a British person with the instructions to
+indicate what few phrases would benefit from a change in spelling or
+possibly minor rewording.  In that case, you should be able to put in
+the UK English localization module I<only> those phrases that are
+UK-specific, and for all the rest, I<inherit> from the American
+English module.  (And I expect this same situation would apply with
+Brazilian and Continental Portugese, possbily with some I<very>
+closely related languages like Czech and Slovak, and possibly with the
+slightly different "versions" of written Mandarin Chinese, as I hear exist in
+Taiwan and mainland China.)
+
+As to sharing of auxiliary functions, consider the problem of Russian
+numbers from the beginning of this article; obviously, you'd want to
+write only once the hairy code that, given a numeric value, would
+return some specification of which case and number a given quanitified
+noun should use.  But suppose that you discover, while localizing an
+interface for, say, Ukranian (a Slavic language related to Russian,
+spoken by several million people, many of whom would be relieved to
+find that your Web site's or software's interface is available in
+their language), that the rules in Ukranian are the same as in Russian
+for quantification, and probably for many other grammatical functions.
+While there may well be no phrases in common between Russian and
+Ukranian, you could still choose to have the Ukranian module inherit
+from the Russian module, just for the sake of inheriting all the
+various grammatical methods.  Or, probably better organizationally,
+you could move those functions to a module called C<_E_Slavic> or
+something, which Russian and Ukranian could inherit useful functions
+from, but which would (presumably) provide no lexicon.
+
+=head2 Buzzword: Concision
+
+Okay, concision isn't a buzzword.  But it should be, so I decree that
+as a new buzzword, "concision" means that simple common things should
+be expressible in very few lines (or maybe even just a few characters)
+of code -- call it a special case of "making simple things easy and
+hard things possible", and see also the role it played in the
+MIDI::Simple language, discussed elsewhere in this issue [TPJ#13].
+
+Consider our first stab at an entry in our "phrasebook of functions":
+
+  sub I_found_X1_files_in_X2_directories {
+    my( $files, $dirs ) = @_[0,1];
+    $files = sprintf("%g %s", $files,
+      $files == 1 ? 'fichier' : 'fichiers');
+    $dirs = sprintf("%g %s", $dirs,
+      $dirs == 1 ? "r\xE9pertoire" : "r\xE9pertoires");
+    return "J'ai trouv\xE9 $files dans $dirs.";
+  }
+
+You may sense that a lexicon (to use a non-committal catch-all term for a
+collection of things you know how to say, regardless of whether they're
+phrases or words) consisting of functions I<expressed> as above would
+make for rather long-winded and repetitive code -- even if you wisely
+rewrote this to have quantification (as we call adding a number
+expression to a noun phrase) be a function called like:
+
+  sub I_found_X1_files_in_X2_directories {
+    my( $files, $dirs ) = @_[0,1];
+    $files = quant($files, "fichier");
+    $dirs =  quant($dirs,  "r\xE9pertoire");
+    return "J'ai trouv\xE9 $files dans $dirs.";
+  }
+
+And you may also sense that you do not want to bother your translators
+with having to write Perl code -- you'd much rather that they spend
+their I<very costly time> on just translation.  And this is to say
+nothing of the near impossibility of finding a commercial translator
+who would know even simple Perl.
+
+In a first-hack implementation of Maketext, each language-module's
+lexicon looked like this:
+
+ %Lexicon = (
+   "I found %g files in %g directories"
+   => sub {
+      my( $files, $dirs ) = @_[0,1];
+      $files = quant($files, "fichier");
+      $dirs =  quant($dirs,  "r\xE9pertoire");
+      return "J'ai trouv\xE9 $files dans $dirs.";
+    },
+  ... and so on with other phrase => sub mappings ...
+ );
+
+but I immediately went looking for some more concise way to basically
+denote the same phrase-function -- a way that would also serve to
+concisely denote I<most> phrase-functions in the lexicon for I<most>
+languages.  After much time and even some actual thought, I decided on
+this system:
+
+* Where a value in a %Lexicon hash is a contentful string instead of
+an anonymous sub (or, conceivably, a coderef), it would be interpreted
+as a sort of shorthand expression of what the sub does.  When accessed
+for the first time in a session, it is parsed, turned into Perl code,
+and then eval'd into an anonymous sub; then that sub replaces the
+original string in that lexicon.  (That way, the work of parsing and
+evaling the shorthand form for a given phrase is done no more than
+once per session.)
+
+* Calls to C<maketext> (as Maketext's main function is called) happen
+thru a "language session handle", notionally very much like an IO
+handle, in that you open one at the start of the session, and use it
+for "sending signals" to an object in order to have it return the text
+you want.
+
+So, this:
+
+  $lang->maketext("You have [quant,_1,piece] of new mail.",
+                 scalar(@messages));
+
+basically means this: look in the lexicon for $lang (which may inherit
+from any number of other lexicons), and find the function that we
+happen to associate with the string "You have [quant,_1,piece] of new
+mail" (which is, and should be, a functioning "shorthand" for this
+function in the native locale -- English in this case).  If you find
+such a function, call it with $lang as its first parameter (as if it
+were a method), and then a copy of scalar(@messages) as its second,
+and then return that value.  If that function was found, but was in
+string shorthand instead of being a fully specified function, parse it
+and make it into a function before calling it the first time.
+
+* The shorthand uses code in brackets to indicate method calls that
+should be performed.  A full explanation is not in order here, but a
+few examples will suffice:
+
+  "You have [quant,_1,piece] of new mail."
+
+The above code is shorthand for, and will be interpreted as,
+this:
+
+  sub {
+    my $handle = $_[0];
+    my(@params) = @_;
+    return join '',
+      "You have ",
+      $handle->quant($params[1], 'piece'),
+      "of new mail.";
+  }
+
+where "quant" is the name of a method you're using to quantify the
+noun "piece" with the number $params[0].
+
+A string with no brackety calls, like this:
+
+  "Your search expression was malformed."
+
+is somewhat of a degerate case, and just gets turned into:
+
+  sub { return "Your search expression was malformed." }
+
+However, not everything you can write in Perl code can be written in
+the above shorthand system -- not by a long shot.  For example, consider
+the Italian translator from the beginning of this article, who wanted
+the Italian for "I didn't find any files" as a special case, instead
+of "I found 0 files".  That couldn't be specified (at least not easily
+or simply) in our shorthand system, and it would have to be written
+out in full, like this:
+
+  sub {  # pretend the English strings are in Italian
+    my($handle, $files, $dirs) = @_[0,1,2];
+    return "I didn't find any files" unless $files;
+    return join '',
+      "I found ",
+      $handle->quant($files, 'file'),
+      " in ",
+      $handle->quant($dirs,  'directory'),
+      ".";
+  }
+
+Next to a lexicon full of shorthand code, that sort of sticks out like a
+sore thumb -- but this I<is> a special case, after all; and at least
+it's possible, if not as concise as usual.
+
+As to how you'd implement the Russian example from the beginning of
+the article, well, There's More Than One Way To Do It, but it could be
+something like this (using English words for Russian, just so you know
+what's going on):
+
+  "I [quant,_1,directory,accusative] scanned."
+
+This shifts the burden of complexity off to the quant method.  That
+method's parameters are: the numeric value it's going to use to
+quantify something; the Russian word it's going to quantify; and the
+parameter "accusative", which you're using to mean that this
+sentence's syntax wants a noun in the accusative case there, although
+that quantification method may have to overrule, for grammatical
+reasons you may recall from the beginning of this article.
+
+Now, the Russian quant method here is responsible not only for
+implementing the strange logic necessary for figuring out how Russian
+number-phrases impose case and number on their noun-phrases, but also
+for inflecting the Russian word for "directory".  How that inflection
+is to be carried out is no small issue, and among the solutions I've
+seen, some (like variations on a simple lookup in a hash where all
+possible forms are provided for all necessary words) are
+straightforward but I<can> become cumbersome when you need to inflect
+more than a few dozen words; and other solutions (like using
+algorithms to model the inflections, storing only root forms and
+irregularities) I<can> involve more overhead than is justifiable for
+all but the largest lexicons.
+
+Mercifully, this design decision becomes crucial only in the hairiest
+of inflected languages, of which Russian is by no means the I<worst> case
+scenario, but is worse than most.  Most languages have simpler
+inflection systems; for example, in English or Swahili, there are
+generally no more than two possible inflected forms for a given noun
+("error/errors"; "kosa/makosa"), and the
+rules for producing these forms are fairly simple -- or at least,
+simple rules can be formulated that work for most words, and you can
+then treat the exceptions as just "irregular", at least relative to
+your ad hoc rules.  A simpler inflection system (simpler rules, fewer
+forms) means that design decisions are less crucial to maintaining
+sanity, whereas the same decisions could incur
+overhead-versus-scalability problems in languages like Russian.  It
+may I<also> be likely that code (possibly in Perl, as with
+Lingua::EN::Inflect, for English nouns) has already
+been written for the language in question, whether simple or complex.
+
+Moreover, a third possibility may even be simpler than anything
+discussed above: "Just require that all possible (or at least
+applicable) forms be provided in the call to the given language's quant
+method, as in:"
+
+  "I found [quant,_1,file,files]."
+
+That way, quant just has to chose which form it needs, without having
+to look up or generate anything.  While possibly not optimal for
+Russian, this should work well for most other languages, where
+quantification is not as complicated an operation.
+
+=head2 The Devil in the Details
+
+There's plenty more to Maketext than described above -- for example,
+there's the details of how language tags ("en-US", "x-cree", "fi",
+etc.) or locale IDs ("en_US") interact with actual module naming
+("BogoQuery/Locale/en_us.pm"), and what magic can ensue; there's the
+details of how to record (and possibly negotiate) what character
+encoding Maketext will return text in (UTF8? Latin-1? KOI8?).  There's
+the interesting fact that Maketext is for localization, but nowhere
+actually has a "C<use locale;>" anywhere in it.  For the curious,
+there's the somewhat frightening details of how I actually
+implement something like data inheritance so that searches across
+modules' %Lexicon hashes can parallel how Perl implements method
+inheritance.
+
+And, most importantly, there's all the practical details of how to
+actually go about deriving from Maketext so you can use it for your
+interfaces, and the various tools and conventions for starting out and
+maintaining individual language modules.
+
+That is all covered in the documentation for Locale::Maketext and the
+modules that come with it, available in CPAN.  After having read this
+article, which covers the why's of Maketext, the documentation,
+which covers the how's of it, should be quite straightfoward.
+
+=head2 The Proof in the Pudding: Localizing Web Sites
+
+Maketext and gettext have a notable difference: gettext is in C,
+accessible thru C library calls, whereas Maketext is in Perl, and
+really can't work without a Perl interpreter (although I suppose
+something like it could be written for C).  Accidents of history (and
+not necessarily lucky ones) have made C++ the most common language for
+the implementation of applications like word processors, Web browsers,
+and even many in-house applications like custom query systems.  Current
+conditions make it somewhat unlikely that the next one of any of these
+kinds of applications will be written in Perl, albeit clearly more for
+reasons of custom and inertia than out of consideration of what is the
+right tool for the job.
+
+However, other accidents of history have made Perl a well-accepted
+language for design of server-side programs (generally in CGI form)
+for Web site interfaces.  Localization of static pages in Web sites is
+trivial, feasable either with simple language-negotiation features in
+servers like Apache, or with some kind of server-side inclusions of
+language-appropriate text into layout templates.  However, I think
+that the localization of Perl-based search systems (or other kinds of
+dynamic content) in Web sites, be they public or access-restricted,
+is where Maketext will see the greatest use.
+
+I presume that it would be only the exceptional Web site that gets
+localized for English I<and> Chinese I<and> Italian I<and> Arabic
+I<and> Russian, to recall the languages from the beginning of this
+article -- to say nothing of German, Spanish, French, Japanese,
+Finnish, and Hindi, to name a few languages that benefit from large
+numbers of programmers or Web viewers or both.
+
+However, the ever-increasing internationalization of the Web (whether
+measured in terms of amount of content, of numbers of content writers
+or programmers, or of size of content audiences) makes it increasingly
+likely that the interface to the average Web-based dynamic content
+service will be localized for two or maybe three languages.  It is my
+hope that Maketext will make that task as simple as possible, and will
+remove previous barriers to localization for languages dissimilar to
+English.
+
+ __END__
+
+Sean M. Burke (sburkeE<64>cpan.org) has a Master's in linguistics
+from Northwestern University; he specializes in language technology.
+Jordan Lachler (lachlerE<64>unm.edu) is a PhD student in the Department of
+Linguistics at the University of New Mexico; he specializes in
+morphology and pedagogy of North American native languages.
+
+=head2 References
+
+Alvestrand, Harald Tveit.  1995.  I<RFC 1766: Tags for the
+Identification of Languages.>
+C<ftp://ftp.isi.edu/in-notes/rfc1766.txt>
+[Now see RFC 3066.]
+
+Callon, Ross, editor.  1996.  I<RFC 1925: The Twelve
+Networking Truths.>
+C<ftp://ftp.isi.edu/in-notes/rfc1925.txt>
+
+Drepper, Ulrich, Peter Miller,
+and FranE<ccedil>ois Pinard.  1995-2001.  GNU
+C<gettext>.  Available in C<ftp://prep.ai.mit.edu/pub/gnu/>, with
+extensive docs in the distribution tarball.  [Since
+I wrote this article in 1998, I now see that the
+gettext docs are now trying more to come to terms with
+plurality.  Whether useful conclusions have come from it
+is another question altogether. -- SMB, May 2001]
+
+Forbes, Nevill.  1964.  I<Russian Grammar.>  Third Edition, revised
+by J. C. Dumbreck.  Oxford University Press.
+
+=cut
+
+#End
+
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-05-27 13:50:57 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-05-27 13:50:57 +0000
commit	9378c5814a1c38be33358baa5cfd56712c3b71d4 (patch)
tree	712d3a8e3142e76139998b3f83f430343ce1e173 /lib/Locale
parent	4b053158ffba5bda82094dc0b0cd80c9d2867b97 (diff)
download	perl-9378c5814a1c38be33358baa5cfd56712c3b71d4.tar.gz