# # Locale::Script - ISO codes for script identification (ISO 15924) # # $Id: Script.pm,v 2.2 2002/07/10 16:33:28 neilb Exp $ # package Locale::Script; use strict; require 5.002; require Exporter; use Carp; use Locale::Constants; #----------------------------------------------------------------------- # Public Global Variables #----------------------------------------------------------------------- use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); $VERSION = sprintf("%d.%02d", q$Revision: 2.2 $ =~ /(\d+)\.(\d+)/); @ISA = qw(Exporter); @EXPORT = qw(code2script script2code all_script_codes all_script_names script_code2code LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC); #----------------------------------------------------------------------- # Private Global Variables #----------------------------------------------------------------------- my $CODES = []; my $COUNTRIES = []; #======================================================================= # # code2script ( CODE [, CODESET ] ) # #======================================================================= sub code2script { my $code = shift; my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; return undef unless defined $code; #------------------------------------------------------------------- # Make sure the code is in the right form before we use it # to look up the corresponding script. # We have to sprintf because the codes are given as 3-digits, # with leading 0's. Eg 070 for Egyptian demotic. #------------------------------------------------------------------- if ($codeset == LOCALE_CODE_NUMERIC) { return undef if ($code =~ /\D/); $code = sprintf("%.3d", $code); } else { $code = lc($code); } if (exists $CODES->[$codeset]->{$code}) { return $CODES->[$codeset]->{$code}; } else { #--------------------------------------------------------------- # no such script code! #--------------------------------------------------------------- return undef; } } #======================================================================= # # script2code ( SCRIPT [, CODESET ] ) # #======================================================================= sub script2code { my $script = shift; my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; return undef unless defined $script; $script = lc($script); if (exists $COUNTRIES->[$codeset]->{$script}) { return $COUNTRIES->[$codeset]->{$script}; } else { #--------------------------------------------------------------- # no such script! #--------------------------------------------------------------- return undef; } } #======================================================================= # # script_code2code ( CODE, IN-CODESET, OUT-CODESET ) # #======================================================================= sub script_code2code { (@_ == 3) or croak "script_code2code() takes 3 arguments!"; my $code = shift; my $inset = shift; my $outset = shift; my $outcode; my $script; return undef if $inset == $outset; $script = code2script($code, $inset); return undef if not defined $script; $outcode = script2code($script, $outset); return $outcode; } #======================================================================= # # all_script_codes() # #======================================================================= sub all_script_codes { my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; return keys %{ $CODES->[$codeset] }; } #======================================================================= # # all_script_names() # #======================================================================= sub all_script_names { my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; return values %{ $CODES->[$codeset] }; } #======================================================================= # # initialisation code - stuff the DATA into the ALPHA2 hash # #======================================================================= { my ($alpha2, $alpha3, $numeric); my $script; while () { next unless /\S/; chop; ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4); $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script; $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2; if ($alpha3) { $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script; $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3; } if ($numeric) { $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script; $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric; } } close(DATA); } 1; __DATA__ am:ama:130:Aramaic ar:ara:160:Arabic av:ave:151:Avestan bh:bhm:300:Brahmi (Ashoka) bi:bid:372:Buhid bn:ben:325:Bengali bo:bod:330:Tibetan bp:bpm:285:Bopomofo br:brl:570:Braille bt:btk:365:Batak bu:bug:367:Buginese (Makassar) by:bys:550:Blissymbols ca:cam:358:Cham ch:chu:221:Old Church Slavonic ci:cir:291:Cirth cm:cmn:402:Cypro-Minoan co:cop:205:Coptic cp:cpr:403:Cypriote syllabary cy:cyr:220:Cyrillic ds:dsr:250:Deserel (Mormon) dv:dvn:315:Devanagari (Nagari) ed:egd:070:Egyptian demotic eg:egy:050:Egyptian hieroglyphs eh:egh:060:Egyptian hieratic el:ell:200:Greek eo:eos:210:Etruscan and Oscan et:eth:430:Ethiopic gl:glg:225:Glagolitic gm:gmu:310:Gurmukhi gt:gth:206:Gothic gu:guj:320:Gujarati ha:han:500:Han ideographs he:heb:125:Hebrew hg:hgl:420:Hangul hm:hmo:450:Pahawh Hmong ho:hoo:371:Hanunoo hr:hrg:410:Hiragana hu:hun:176:Old Hungarian runic hv:hvn:175:Kok Turki runic hy:hye:230:Armenian iv:ivl:610:Indus Valley ja:jap:930:(alias for Han + Hiragana + Katakana) jl:jlg:445:Cherokee syllabary jw:jwi:360:Javanese ka:kam:241:Georgian (Mxedruli) kh:khn:931:(alias for Hangul + Han) kk:kkn:411:Katakana km:khm:354:Khmer kn:kan:345:Kannada kr:krn:357:Karenni (Kayah Li) ks:kst:305:Kharoshthi kx:kax:240:Georgian (Xucuri) la:lat:217:Latin lf:laf:215:Latin (Fraktur variant) lg:lag:216:Latin (Gaelic variant) lo:lao:356:Lao lp:lpc:335:Lepcha (Rong) md:mda:140:Mandaean me:mer:100:Meroitic mh:may:090:Mayan hieroglyphs ml:mlm:347:Malayalam mn:mon:145:Mongolian my:mya:350:Burmese na:naa:400:Linear A nb:nbb:401:Linear B og:ogm:212:Ogham or:ory:327:Oriya os:osm:260:Osmanya ph:phx:115:Phoenician ph:pah:150:Pahlavi pl:pld:282:Pollard Phonetic pq:pqd:295:Klingon plQaD pr:prm:227:Old Permic ps:pst:600:Phaistos Disk rn:rnr:211:Runic (Germanic) rr:rro:620:Rongo-rongo sa:sar:110:South Arabian si:sin:348:Sinhala sj:syj:137:Syriac (Jacobite variant) sl:slb:440:Unified Canadian Aboriginal Syllabics sn:syn:136:Syriac (Nestorian variant) sw:sww:281:Shavian (Shaw) sy:syr:135:Syriac (Estrangelo) ta:tam:346:Tamil tb:tbw:373:Tagbanwa te:tel:340:Telugu tf:tfn:120:Tifnagh tg:tag:370:Tagalog th:tha:352:Thai tn:tna:170:Thaana tw:twr:290:Tengwar va:vai:470:Vai vs:vsp:280:Visible Speech xa:xas:000:Cuneiform, Sumero-Akkadian xf:xfa:105:Cuneiform, Old Persian xk:xkn:412:(alias for Hiragana + Katakana) xu:xug:106:Cuneiform, Ugaritic yi:yii:460:Yi zx:zxx:997:Unwritten language zy:zyy:998:Undetermined script zz:zzz:999:Uncoded script