summaryrefslogtreecommitdiff
path: root/lib/locale.t
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-03-27 23:48:15 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-03-27 23:48:15 +0000
commitef4a39e599a07c97b0213b41bfa69f4a6f4a17ed (patch)
treee41527ecd5558d8b2c680ca4126516bafad5685e /lib/locale.t
parent3eb9224ade6d1fb49a2cd92811272334fefa9fea (diff)
downloadperl-ef4a39e599a07c97b0213b41bfa69f4a6f4a17ed.tar.gz
Try to handle UTF-8 locales.
p4raw-id: //depot/perl@15561
Diffstat (limited to 'lib/locale.t')
-rw-r--r--lib/locale.t97
1 files changed, 45 insertions, 52 deletions
diff --git a/lib/locale.t b/lib/locale.t
index e9ba3fad21..ee75c650e9 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -308,7 +308,7 @@ if ($^O eq 'os390') {
$locales =~ s/Thai:th:th:11 tis620\n//;
}
-sub in_utf8 () { $^H & 0x08 }
+sub in_utf8 () { $^H & 0x08 || (${^OPEN} || "") =~ /:utf8/ }
if (in_utf8) {
require "lib/locale/utf8";
@@ -430,8 +430,6 @@ if (-x "/usr/bin/locale" && open(LOCALES, "/usr/bin/locale -a 2>/dev/null|")) {
setlocale(LC_ALL, "C");
-sub utf8locale { $_[0] =~ /utf-?8/i }
-
@Locale = sort @Locale;
debug "# Locales = @Locale\n";
@@ -520,18 +518,19 @@ foreach $Locale (@Locale) {
# Test \w.
- if (utf8locale($Locale)) {
- # utf8 and locales do not mix.
- debug "# skipping UTF-8 locale '$Locale'\n";
- push @utf8locale, $Locale;
- @utf8skip{99..102} = ();
- } else {
- my $word = join('', @Neoalpha);
+ my $word = join('', @Neoalpha);
- $word =~ /^(\w+)$/;
-
- tryneoalpha($Locale, 99, $1 eq $word);
+ if ($Locale =~ /utf-?8/i) {
+ debug "# unknown whether locale and Unicode have the same \\w, skipping test 99 for locale '$Locale'\n";
+ push @{$Okay{99}}, $Locale;
+ } else {
+ if ($word =~ /^(\w+)$/) {
+ tryneoalpha($Locale, 99, 1);
+ } else {
+ tryneoalpha($Locale, 99, 0);
+ }
}
+
# Cross-check the whole 8-bit character set.
for (map { chr } 0..255) {
@@ -712,50 +711,44 @@ foreach $Locale (@Locale) {
# case-insensitively the UPPER, and does the UPPER match
# case-insensitively the lc of the UPPER. And vice versa.
{
- if (utf8locale($Locale)) {
- # utf8 and locales do not mix.
- debug "# skipping UTF-8 locale '$Locale'\n";
- push @utf8locale, $Locale;
- $utf8skip{117}++;
- } else {
- use locale;
- no utf8;
- my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
-
- my @f = ();
- foreach my $x (keys %UPPER) {
- my $y = lc $x;
- next unless uc $y eq $x;
- print "# UPPER $x lc $y ",
- $x =~ /$y/i ? 1 : 0, " ",
- $y =~ /$x/i ? 1 : 0, "\n" if 0;
- # If $x and $y contain regular expression characters
- # AND THEY lowercase (/i) to regular expression characters,
- # regcomp() will be mightily confused. No, the \Q doesn't
- # help here (maybe regex engine internal lowercasing
- # is done after the \Q?) An example of this happening is
- # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
- # the chr(173) (the "[") is the lowercase of the chr(235).
- # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
- # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
- # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
- # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
- # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
- # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
- if ($x =~ $re || $y =~ $re) {
- print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
- next;
- }
- # With utf8 both will fail since the locale concept
- # of upper/lower does not work well in Unicode.
- push @f, $x unless $x =~ /$y/i == $y =~ /$x/i;
+ use locale;
+ no utf8;
+ my $re = qr/[\[\(\{\*\+\?\|\^\$\\]/;
+
+ my @f = ();
+ foreach my $x (keys %UPPER) {
+ my $y = lc $x;
+ next unless uc $y eq $x;
+ print "# UPPER $x lc $y ",
+ $x =~ /$y/i ? 1 : 0, " ",
+ $y =~ /$x/i ? 1 : 0, "\n" if 0;
+ # If $x and $y contain regular expression characters
+ # AND THEY lowercase (/i) to regular expression characters,
+ # regcomp() will be mightily confused. No, the \Q doesn't
+ # help here (maybe regex engine internal lowercasing
+ # is done after the \Q?) An example of this happening is
+ # the bg_BG (Bulgarian) locale under EBCDIC (OS/390 USS):
+ # the chr(173) (the "[") is the lowercase of the chr(235).
+ # Similarly losing EBCDIC locales include cs_cz, cs_CZ,
+ # el_gr, el_GR, en_us.IBM-037 (!), en_US.IBM-037 (!),
+ # et_ee, et_EE, hr_hr, hr_HR, hu_hu, hu_HU, lt_LT,
+ # mk_mk, mk_MK, nl_nl.IBM-037, nl_NL.IBM-037,
+ # pl_pl, pl_PL, ro_ro, ro_RO, ru_ru, ru_RU,
+ # sk_sk, sk_SK, sl_si, sl_SI, tr_tr, tr_TR.
+ if ($x =~ $re || $y =~ $re) {
+ print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
+ next;
}
+ # With utf8 both will fail since the locale concept
+ # of upper/lower does not work well in Unicode.
+ push @f, $x unless $x =~ /$y/i == $y =~ /$x/i;
+
foreach my $x (keys %lower) {
my $y = uc $x;
next unless lc $y eq $x;
print "# lower $x uc $y ",
- $x =~ /$y/i ? 1 : 0, " ",
- $y =~ /$x/i ? 1 : 0, "\n" if 0;
+ $x =~ /$y/i ? 1 : 0, " ",
+ $y =~ /$x/i ? 1 : 0, "\n" if 0;
if ($x =~ $re || $y =~ $re) { # See above.
print "# Regex characters in '$x' or '$y', skipping test 117 for locale '$Locale'\n";
next;