diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2002-02-19 13:49:52 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2002-02-19 13:49:52 +0000 |
commit | 6b14ceb78325f07f9267f4d0b22adc748311a9a0 (patch) | |
tree | 6de8dd195cd1e3aa0c3fe5a060f93fc6b62b275a /lib/Locale/Script.pm | |
parent | 5d6b5b93661d8e778bcde761acff338795b8e86c (diff) | |
download | perl-6b14ceb78325f07f9267f4d0b22adc748311a9a0.tar.gz |
Upgrade to Locale::Codes 2.01.
p4raw-id: //depot/perl@14770
Diffstat (limited to 'lib/Locale/Script.pm')
-rw-r--r-- | lib/Locale/Script.pm | 305 |
1 files changed, 34 insertions, 271 deletions
diff --git a/lib/Locale/Script.pm b/lib/Locale/Script.pm index a7168fe929..6b75afbc07 100644 --- a/lib/Locale/Script.pm +++ b/lib/Locale/Script.pm @@ -1,111 +1,13 @@ -#----------------------------------------------------------------------- - -=head1 NAME - -Locale::Script - ISO codes for script identification (ISO 15924) - -=head1 SYNOPSIS - - use Locale::Script; - use Locale::Constants; - - $script = code2script('ph'); # 'Phoenician' - $code = script2code('Tibetan'); # 'bo' - $code3 = script2code('Tibetan', - LOCALE_CODE_ALPHA_3); # 'bod' - $codeN = script2code('Tibetan', - LOCALE_CODE_ALPHA_NUMERIC); # 330 - - @codes = all_script_codes(); - @scripts = all_script_names(); - -=cut - -#----------------------------------------------------------------------- +# +# Locale::Script - ISO codes for script identification (ISO 15924) +# +# $Id: Script.pm,v 2.1 2002/02/06 04:07:10 neilb Exp $ +# package Locale::Script; use strict; require 5.002; -#----------------------------------------------------------------------- - -=head1 DESCRIPTION - -The C<Locale::Script> module provides access to the ISO -codes for identifying scripts, as defined in ISO 15924. -For example, Egyptian hieroglyphs are denoted by the two-letter -code 'eg', the three-letter code 'egy', and the numeric code 050. - -You can either access the codes via the conversion routines -(described below), or with the two functions which return lists -of all script codes or all script names. - -There are three different code sets you can use for identifying -scripts: - -=over 4 - -=item B<alpha-2> - -Two letter codes, such as 'bo' for Tibetan. -This code set is identified with the symbol C<LOCALE_CODE_ALPHA_2>. - -=item B<alpha-3> - -Three letter codes, such as 'ell' for Greek. -This code set is identified with the symbol C<LOCALE_CODE_ALPHA_3>. - -=item B<numeric> - -Numeric codes, such as 410 for Hiragana. -This code set is identified with the symbol C<LOCALE_CODE_NUMERIC>. - -=back - -All of the routines take an optional additional argument -which specifies the code set to use. -If not specified, it defaults to the two-letter codes. -This is partly for backwards compatibility (previous versions -of Locale modules only supported the alpha-2 codes), and -partly because they are the most widely used codes. - -The alpha-2 and alpha-3 codes are not case-dependent, -so you can use 'BO', 'Bo', 'bO' or 'bo' for Tibetan. -When a code is returned by one of the functions in -this module, it will always be lower-case. - -=head2 SPECIAL CODES - -The standard defines various special codes. - -=over 4 - -=item * - -The standard reserves codes in the ranges B<qa> - B<qt>, -B<qaa> - B<qat>, and B<900> - B<919>, for private use. - -=item * - -B<zx>, B<zxx>, and B<997>, are the codes for unwritten languages. - -=item * - -B<zy>, B<zyy>, and B<998>, are the codes for an undetermined script. - -=item * - -B<zz>, B<zzz>, and B<999>, are the codes for an uncoded script. - -=back - -The private codes are not recognised by Locale::Script, -but the others are. - -=cut - -#----------------------------------------------------------------------- - require Exporter; use Carp; use Locale::Constants; @@ -115,7 +17,7 @@ use Locale::Constants; # Public Global Variables #----------------------------------------------------------------------- use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); -$VERSION = sprintf("%d.%02d", q$Revision: 2.0 $ =~ /(\d+)\.(\d+)/); +$VERSION = sprintf("%d.%02d", q$Revision: 2.1 $ =~ /(\d+)\.(\d+)/); @ISA = qw(Exporter); @EXPORT = qw(code2script script2code all_script_codes all_script_names @@ -130,54 +32,9 @@ my $COUNTRIES = []; #======================================================================= - -=head1 CONVERSION ROUTINES - -There are three conversion routines: C<code2script()>, C<script2code()>, -and C<script_code2code()>. - -=over 8 - -=item code2script( CODE, [ CODESET ] ) - -This function takes a script code and returns a string -which contains the name of the script identified. -If the code is not a valid script code, as defined by ISO 15924, -then C<undef> will be returned: - - $script = code2script('cy'); # Cyrillic - -=item script2code( STRING, [ CODESET ] ) - -This function takes a script name and returns the corresponding -script code, if such exists. -If the argument could not be identified as a script name, -then C<undef> will be returned: - - $code = script2code('Gothic', LOCALE_CODE_ALPHA_3); - # $code will now be 'gth' - -The case of the script name is not important. -See the section L<KNOWN BUGS AND LIMITATIONS> below. - -=item script_code2code( CODE, CODESET, CODESET ) - -This function takes a script code from one code set, -and returns the corresponding code from another code set. - - $alpha2 = script_code2code('jwi', - LOCALE_CODE_ALPHA_3 => LOCALE_CODE_ALPHA_2); - # $alpha2 will now be 'jw' (Javanese) - -If the code passed is not a valid script code in -the first code set, or if there isn't a code for the -corresponding script in the second code set, -then C<undef> will be returned. - -=back - -=cut - +# +# code2script ( CODE [, CODESET ] ) +# #======================================================================= sub code2script { @@ -216,6 +73,12 @@ sub code2script } } + +#======================================================================= +# +# script2code ( SCRIPT [, CODESET ] ) +# +#======================================================================= sub script2code { my $script = shift; @@ -237,6 +100,12 @@ sub script2code } } + +#======================================================================= +# +# script_code2code ( CODE, IN-CODESET, OUT-CODESET ) +# +#======================================================================= sub script_code2code { (@_ == 3) or croak "script_code2code() takes 3 arguments!"; @@ -244,7 +113,7 @@ sub script_code2code my $code = shift; my $inset = shift; my $outset = shift; - my $outcode = shift; + my $outcode; my $script; @@ -255,32 +124,12 @@ sub script_code2code return $outcode; } -#======================================================================= - -=head1 QUERY ROUTINES - -There are two function which can be used to obtain a list of all codes, -or all script names: - -=over 8 - -=item C<all_script_codes ( [ CODESET ] )> - -Returns a list of all two-letter script codes. -The codes are guaranteed to be all lower-case, -and not in any particular order. - -=item C<all_script_names ( [ CODESET ] )> - -Returns a list of all script names for which there is a corresponding -script code in the specified code set. -The names are capitalised, and not returned in any particular order. - -=back - -=cut #======================================================================= +# +# all_script_codes() +# +#======================================================================= sub all_script_codes { my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; @@ -288,6 +137,12 @@ sub all_script_codes return keys %{ $CODES->[$codeset] }; } + +#======================================================================= +# +# all_script_names() +# +#======================================================================= sub all_script_names { my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; @@ -296,102 +151,10 @@ sub all_script_names } -#----------------------------------------------------------------------- - -=head1 EXAMPLES - -The following example illustrates use of the C<code2script()> function. -The user is prompted for a script code, and then told the corresponding -script name: - - $| = 1; # turn off buffering - - print "Enter script code: "; - chop($code = <STDIN>); - $script = code2script($code, LOCALE_CODE_ALPHA_2); - if (defined $script) - { - print "$code = $script\n"; - } - else - { - print "'$code' is not a valid script code!\n"; - } - - -=head1 KNOWN BUGS AND LIMITATIONS - -=over 4 - -=item * - -When using C<script2code()>, the script name must currently appear -exactly as it does in the source of the module. For example, - - script2code('Egyptian hieroglyphs') - -will return B<eg>, as expected. But the following will all return C<undef>: - - script2code('hieroglyphs') - script2code('Egyptian Hieroglypics') - -If there's need for it, a future version could have variants -for script names. - -=item * - -In the current implementation, all data is read in when the -module is loaded, and then held in memory. -A lazy implementation would be more memory friendly. - -=back - -=head1 SEE ALSO - -=over 4 - -=item Locale::Language - -ISO two letter codes for identification of language (ISO 639). - -=item Locale::Currency - -ISO three letter codes for identification of currencies -and funds (ISO 4217). - -=item Locale::Country - -ISO three letter codes for identification of countries (ISO 3166) - -=item ISO 15924 - -The ISO standard which defines these codes. - -=item http://www.evertype.com/standards/iso15924/ - -Home page for ISO 15924. - - -=back - - -=head1 AUTHOR - -Neil Bowers E<lt>neil@bowers.comE<gt> - -=head1 COPYRIGHT - -Copyright (c) 2002 Neil Bowers. - -This module is free software; you can redistribute it and/or -modify it under the same terms as Perl itself. - -=cut - -#----------------------------------------------------------------------- - #======================================================================= +# # initialisation code - stuff the DATA into the ALPHA2 hash +# #======================================================================= { my ($alpha2, $alpha3, $numeric); |