summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-01-16 01:58:39 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-01-16 01:58:39 +0000
commitfde18df140d5f64815bdd632a127ecd5ce3d97fa (patch)
tree80f64f24787f44508939d3f4b0912ac97d9cf9dc
parentcd2d1bacbf7960cece81f64bfbaaedda360c78aa (diff)
downloadperl-fde18df140d5f64815bdd632a127ecd5ce3d97fa.tar.gz
Make the locale-induced UTF-8-ification of STD fhs
and the default file open layer explicit (either -C or PERL_UTF8_LOCALE), instead of implicit (and unasked-for). p4raw-id: //depot/perl@18490
-rw-r--r--embedvar.h4
-rw-r--r--gv.c15
-rw-r--r--intrpvar.h2
-rw-r--r--locale.c39
-rw-r--r--mg.c18
-rw-r--r--perl.c11
-rw-r--r--perlapi.h4
-rw-r--r--pod/perlrun.pod20
-rw-r--r--pod/perlunicode.pod21
-rw-r--r--pod/perluniintro.pod16
-rw-r--r--pod/perlvar.pod25
11 files changed, 101 insertions, 74 deletions
diff --git a/embedvar.h b/embedvar.h
index cc709260cc..202cea0b91 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -413,10 +413,10 @@
#define PL_utf8_toupper (vTHX->Iutf8_toupper)
#define PL_utf8_upper (vTHX->Iutf8_upper)
#define PL_utf8_xdigit (vTHX->Iutf8_xdigit)
+#define PL_utf8locale (vTHX->Iutf8locale)
#define PL_uudmap (vTHX->Iuudmap)
#define PL_wantutf8 (vTHX->Iwantutf8)
#define PL_warnhook (vTHX->Iwarnhook)
-#define PL_widesyscalls (vTHX->Iwidesyscalls)
#define PL_xiv_arenaroot (vTHX->Ixiv_arenaroot)
#define PL_xiv_root (vTHX->Ixiv_root)
#define PL_xnv_arenaroot (vTHX->Ixnv_arenaroot)
@@ -702,10 +702,10 @@
#define PL_Iutf8_toupper PL_utf8_toupper
#define PL_Iutf8_upper PL_utf8_upper
#define PL_Iutf8_xdigit PL_utf8_xdigit
+#define PL_Iutf8locale PL_utf8locale
#define PL_Iuudmap PL_uudmap
#define PL_Iwantutf8 PL_wantutf8
#define PL_Iwarnhook PL_warnhook
-#define PL_Iwidesyscalls PL_widesyscalls
#define PL_Ixiv_arenaroot PL_xiv_arenaroot
#define PL_Ixiv_root PL_xiv_root
#define PL_Ixnv_arenaroot PL_xnv_arenaroot
diff --git a/gv.c b/gv.c
index 08dd7c3811..8dfa932e42 100644
--- a/gv.c
+++ b/gv.c
@@ -974,9 +974,15 @@ Perl_gv_fetchpv(pTHX_ const char *nambeg, I32 add, I32 sv_type)
goto ro_magicalize;
else
break;
+ case '\025':
+ if (len > 1 && strNE(name, "\025TF8_LOCALE"))
+ break;
+ goto ro_magicalize;
+
case '\027': /* $^W & $^WARNING_BITS */
- if (len > 1 && strNE(name, "\027ARNING_BITS")
- && strNE(name, "\027IDE_SYSTEM_CALLS"))
+ if (len > 1
+ && strNE(name, "\027ARNING_BITS")
+ )
break;
goto magicalize;
@@ -1793,10 +1799,13 @@ Perl_is_gv_magical(pTHX_ char *name, STRLEN len, U32 flags)
goto yes;
}
break;
+ case '\025':
+ if (len > 1 && strEQ(name, "\025TF8_LOCALE"))
+ goto yes;
case '\027': /* $^W & $^WARNING_BITS */
if (len == 1
|| (len == 12 && strEQ(name, "\027ARNING_BITS"))
- || (len == 17 && strEQ(name, "\027IDE_SYSTEM_CALLS")))
+ )
{
goto yes;
}
diff --git a/intrpvar.h b/intrpvar.h
index f44cccefcd..7320725412 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -48,7 +48,7 @@ The C variable which corresponds to Perl's $^W warning variable.
*/
PERLVAR(Idowarn, U8)
-PERLVAR(Iwidesyscalls, bool) /* wide system calls */
+PERLVAR(Iutf8locale, bool) /* utf8 locale detected */
PERLVAR(Idoextract, bool)
PERLVAR(Isawampersand, bool) /* must save all match strings */
PERLVAR(Iunsafe, bool)
diff --git a/locale.c b/locale.c
index c03451ba1d..9d522444ca 100644
--- a/locale.c
+++ b/locale.c
@@ -475,7 +475,7 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
#ifdef USE_PERLIO
{
- /* Set PL_wantutf8 to TRUE if using PerlIO _and_
+ /* Set PL_utf8locale to TRUE if using PerlIO _and_
any of the following are true:
- nl_langinfo(CODESET) contains /^utf-?8/i
- $ENV{LC_ALL} contains /^utf-?8/i
@@ -487,37 +487,44 @@ Perl_init_i18nl10n(pTHX_ int printwarn)
it overrides LC_MESSAGES for GNU gettext, and it also
can have more than one locale, separated by spaces,
in case you need to know.)
- If PL_wantutf8 is true, perl.c:S_parse_body()
- will turn on the PerlIO :utf8 discipline on STDIN, STDOUT,
- STDERR, _and_ the default open discipline.
+ If PL_utf8locale and PL_wantutf8 (set by -C) are true,
+ perl.c:S_parse_body() will turn on the PerlIO :utf8 layer
+ on STDIN, STDOUT, STDERR, _and_ the default open discipline.
*/
- bool wantutf8 = FALSE;
+ bool utf8locale = FALSE;
char *codeset = NULL;
#if defined(HAS_NL_LANGINFO) && defined(CODESET)
codeset = nl_langinfo(CODESET);
#endif
if (codeset)
- wantutf8 = (ibcmp(codeset, "UTF-8", 5) == 0 ||
- ibcmp(codeset, "UTF8", 4) == 0);
+ utf8locale = (ibcmp(codeset, "UTF-8", 5) == 0 ||
+ ibcmp(codeset, "UTF8", 4) == 0);
#if defined(USE_LOCALE)
else { /* nl_langinfo(CODESET) is supposed to correctly
* interpret the locale environment variables,
* but just in case it fails, let's do this manually. */
if (lang)
- wantutf8 = (ibcmp(lang, "UTF-8", 5) == 0 ||
- ibcmp(lang, "UTF8", 4) == 0);
+ utf8locale = (ibcmp(lang, "UTF-8", 5) == 0 ||
+ ibcmp(lang, "UTF8", 4) == 0);
#ifdef USE_LOCALE_CTYPE
if (curctype)
- wantutf8 = (ibcmp(curctype, "UTF-8", 5) == 0 ||
- ibcmp(curctype, "UTF8", 4) == 0);
+ utf8locale = (ibcmp(curctype, "UTF-8", 5) == 0 ||
+ ibcmp(curctype, "UTF8", 4) == 0);
#endif
if (lc_all)
- wantutf8 = (ibcmp(lc_all, "UTF-8", 5) == 0 ||
- ibcmp(lc_all, "UTF8", 4) == 0);
-#endif /* USE_LOCALE */
+ utf8locale = (ibcmp(lc_all, "UTF-8", 5) == 0 ||
+ ibcmp(lc_all, "UTF8", 4) == 0);
}
- if (wantutf8)
- PL_wantutf8 = TRUE;
+#endif /* USE_LOCALE */
+ if (utf8locale)
+ PL_utf8locale = TRUE;
+ }
+ /* Set PL_wantutf8 to $ENV{PERL_UTF8_LOCALE} if using PerlIO.
+ This is an alternative to using the -C command line switch
+ (the -C if present will override this). */
+ {
+ char *p = PerlEnv_getenv("PERL_UTF8_LOCALE");
+ PL_wantutf8 = p ? (bool) atoi(p) : FALSE;
}
#endif
diff --git a/mg.c b/mg.c
index bdf204bd8f..72c8fdfcd8 100644
--- a/mg.c
+++ b/mg.c
@@ -662,7 +662,11 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
? (PL_taint_warn || PL_unsafe ? -1 : 1)
: 0);
break;
- case '\027': /* ^W & $^WARNING_BITS & ^WIDE_SYSTEM_CALLS */
+ case '\025': /* $^UTF8_LOCALE */
+ if (strEQ(mg->mg_ptr, "\025TF8_LOCALE"))
+ sv_setiv(sv, (IV) (PL_wantutf8 && PL_utf8locale));
+ break;
+ case '\027': /* ^W & $^WARNING_BITS */
if (*(mg->mg_ptr+1) == '\0')
sv_setiv(sv, (IV)((PL_dowarn & G_WARN_ON) ? TRUE : FALSE));
else if (strEQ(mg->mg_ptr+1, "ARNING_BITS")) {
@@ -679,8 +683,6 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
}
SvPOK_only(sv);
}
- else if (strEQ(mg->mg_ptr+1, "IDE_SYSTEM_CALLS"))
- sv_setiv(sv, (IV)PL_widesyscalls);
break;
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '&':
@@ -1925,7 +1927,13 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
PL_basetime = (Time_t)(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
#endif
break;
- case '\027': /* ^W & $^WARNING_BITS & ^WIDE_SYSTEM_CALLS */
+ case '\025': /* $^UTF8_LOCALE */
+ if (SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv))
+ PL_wantutf8 = PL_utf8locale;
+ else
+ PL_wantutf8 = FALSE;
+ break;
+ case '\027': /* ^W & $^WARNING_BITS */
if (*(mg->mg_ptr+1) == '\0') {
if ( ! (PL_dowarn & G_WARN_ALL_MASK)) {
i = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
@@ -1967,8 +1975,6 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
}
}
}
- else if (strEQ(mg->mg_ptr+1, "IDE_SYSTEM_CALLS"))
- PL_widesyscalls = (bool)SvTRUE(sv);
break;
case '.':
if (PL_localizing) {
diff --git a/perl.c b/perl.c
index 8b73d25fad..3493cd806f 100644
--- a/perl.c
+++ b/perl.c
@@ -1355,10 +1355,11 @@ print \" \\@INC:\\n @INC\\n\";");
if (!PL_do_undump)
init_postdump_symbols(argc,argv,env);
- /* PL_wantutf8 is conditionally turned on by
+ /* PL_utf8locale is conditionally turned on by
* locale.c:Perl_init_i18nl10n() if the environment
- * look like the user wants to use UTF-8. */
- if (PL_wantutf8) { /* Requires init_predump_symbols(). */
+ * look like the user wants to use UTF-8.
+ * PL_wantutf8 is turned on by -C or by $ENV{PERL_UTF8_LOCALE}. */
+ if (PL_utf8locale && PL_wantutf8) { /* Requires init_predump_symbols(). */
IO* io;
PerlIO* fp;
SV* sv;
@@ -2156,7 +2157,7 @@ Perl_moreswitches(pTHX_ char *s)
return s + numlen;
}
case 'C':
- PL_widesyscalls = TRUE;
+ PL_wantutf8 = TRUE; /* Can be set earlier by $ENV{PERL_UTF8_LOCALE}. */
s++;
return s;
case 'F':
@@ -3397,7 +3398,7 @@ Perl_init_argv_symbols(pTHX_ register int argc, register char **argv)
for (; argc > 0; argc--,argv++) {
SV *sv = newSVpv(argv[0],0);
av_push(GvAVn(PL_argvgv),sv);
- if (PL_widesyscalls)
+ if (PL_wantutf8)
(void)sv_utf8_decode(sv);
}
}
diff --git a/perlapi.h b/perlapi.h
index 451a4d9ad0..ff344ab642 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -584,14 +584,14 @@ END_EXTERN_C
#define PL_utf8_upper (*Perl_Iutf8_upper_ptr(aTHX))
#undef PL_utf8_xdigit
#define PL_utf8_xdigit (*Perl_Iutf8_xdigit_ptr(aTHX))
+#undef PL_utf8locale
+#define PL_utf8locale (*Perl_Iutf8locale_ptr(aTHX))
#undef PL_uudmap
#define PL_uudmap (*Perl_Iuudmap_ptr(aTHX))
#undef PL_wantutf8
#define PL_wantutf8 (*Perl_Iwantutf8_ptr(aTHX))
#undef PL_warnhook
#define PL_warnhook (*Perl_Iwarnhook_ptr(aTHX))
-#undef PL_widesyscalls
-#define PL_widesyscalls (*Perl_Iwidesyscalls_ptr(aTHX))
#undef PL_xiv_arenaroot
#define PL_xiv_arenaroot (*Perl_Ixiv_arenaroot_ptr(aTHX))
#undef PL_xiv_root
diff --git a/pod/perlrun.pod b/pod/perlrun.pod
index 72517122a4..46e18493d4 100644
--- a/pod/perlrun.pod
+++ b/pod/perlrun.pod
@@ -266,11 +266,21 @@ An alternate delimiter may be specified using B<-F>.
=item B<-C>
-enables Perl to use the native wide character APIs on the target system.
-The magic variable C<${^WIDE_SYSTEM_CALLS}> reflects the state of
-this switch. See L<perlvar/"${^WIDE_SYSTEM_CALLS}">.
-
-This feature is currently only implemented on the Win32 platform.
+enables Perl to use the Unicode APIs on the target system.
+
+As of Perl 5.8.1, if C<-C> is used and the locale settings (the LC_ALL,
+LC_CTYPE, and LANG environment variables) indicate a UTF-8 locale,
+the STDIN is expected to be in UTF-8, the STDOUT and STDERR are
+expected to be in UTF-8, and C<:utf8> is the default file open layer.
+See L<perluniintro>, L<perlfunc/open>, and L<open> for more information.
+The magic variable C<${^UTF8_LOCALE}> reflects this state,
+see L<perlvar/"${^UTF8_LOCALE}">. (Another way of setting this
+variable is to set the environment variable PERL_UTF8_LOCALE.)
+
+(In Perls earlier than 5.8.1 the C<-C> switch was a Win32-only switch
+that enabled the use of Unicode-aware "wide system call" Win32 APIs.
+This feature was practically unused, however, and the command line
+switch was therefore "recycled".)
=item B<-c>
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index ee8b6efe7e..1d3f84626f 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -67,13 +67,6 @@ character data. Such data may come from filehandles, from calls to
external programs, from information provided by the system (such as %ENV),
or from literals and constants in the source text.
-On Windows platforms, if the C<-C> command line switch is used or the
-${^WIDE_SYSTEM_CALLS} global flag is set to C<1>, all system calls
-will use the corresponding wide-character APIs. This feature is
-available only on Windows to conform to the API standard already
-established for that platform--and there are very few non-Windows
-platforms that have Unicode-aware APIs.
-
The C<bytes> pragma will always, regardless of platform, force byte
semantics in a particular lexical scope. See L<bytes>.
@@ -1050,10 +1043,14 @@ there are a couple of exceptions:
=item *
-If your locale environment variables (LANGUAGE, LC_ALL, LC_CTYPE, LANG)
-contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching),
-the default encodings of your STDIN, STDOUT, and STDERR, and of
-B<any subsequent file open>, are considered to be UTF-8.
+If your locale environment variables (LC_ALL, LC_CTYPE, LANG)
+contain the strings 'UTF-8' or 'UTF8' (matched case-insensitively)
+B<and> you enable using UTF-8 either by using the C<-C> command line
+switch or setting the PERL_UTF8_LOCALE environment variable to a true
+value, then the default encodings of your STDIN, STDOUT, and STDERR,
+and of B<any subsequent file open>, are considered to be UTF-8.
+See L<perluniintro>, L<perlfunc/open>, and L<open> for more
+information. The magic variable C<${^UTF8_LOCALE}> will also be set.
=item *
@@ -1410,6 +1407,6 @@ the UTF-8 flag:
=head1 SEE ALSO
L<perluniintro>, L<encoding>, L<Encode>, L<open>, L<utf8>, L<bytes>,
-L<perlretut>, L<perlvar/"${^WIDE_SYSTEM_CALLS}">
+L<perlretut>, L<perlvar/"${^UTF8_LOCALE}">
=cut
diff --git a/pod/perluniintro.pod b/pod/perluniintro.pod
index 21f0fa7600..3a2346004c 100644
--- a/pod/perluniintro.pod
+++ b/pod/perluniintro.pod
@@ -172,13 +172,15 @@ To output UTF-8, use the C<:utf8> output layer. Prepending
to this sample program ensures that the output is completely UTF-8,
and removes the program's warning.
-If your locale environment variables (C<LANGUAGE>, C<LC_ALL>,
-C<LC_CTYPE>, C<LANG>) contain the strings 'UTF-8' or 'UTF8',
-regardless of case, then the default encoding of your STDIN, STDOUT,
-and STDERR and of B<any subsequent file open>, is UTF-8. Note that
-this means that Perl expects other software to work, too: if Perl has
-been led to believe that STDIN should be UTF-8, but then STDIN coming
-in from another command is not UTF-8, Perl will complain about the
+If your locale environment variables (C<LC_ALL>, C<LC_CTYPE>, C<LANG>)
+contain the strings 'UTF-8' or 'UTF8' (matched case-insensitively)
+B<and> you enable using UTF-8 either by using the C<-C> command line
+switch or by setting the PERL_UTF8_LOCALE environment variable to
+a true value, then the default encoding of your STDIN, STDOUT, and
+STDERR, and of B<any subsequent file open>, is UTF-8. Note that this
+means that Perl expects other software to work, too: if Perl has been
+led to believe that STDIN should be UTF-8, but then STDIN coming in
+from another command is not UTF-8, Perl will complain about the
malformed UTF-8.
All features that combine Unicode and I/O also require using the new
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index 08235c2cb4..7621be0c0d 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -1109,6 +1109,16 @@ Reflects if taint mode is on or off. 1 for on (the program was run with
B<-T>), 0 for off, -1 when only taint warnings are enabled (i.e. with
B<-t> or B<-TU>). This variable is read-only.
+=item ${^UTF8_LOCALE}
+
+Reflects whether the locale settings indicated the use of UTF-8 and that
+the use of UTF-8 was enabled either by the C<-C> command line switch or
+by setting the PERL_UTF8_LOCALE environment variable to a true value.
+This variable is read-only. If true, the STDIN is expected to be in
+UTF-8, the STDOUT and STDERR are in UTF-8, and C<:utf8> is the default
+file open layer. See L<perluniintro>, L<perlfunc/open>, and L<open>
+for more information.
+
=item $PERL_VERSION
=item $^V
@@ -1148,21 +1158,6 @@ related to the B<-w> switch.) See also L<warnings>.
The current set of warning checks enabled by the C<use warnings> pragma.
See the documentation of C<warnings> for more details.
-=item ${^WIDE_SYSTEM_CALLS}
-
-Global flag that enables system calls made by Perl to use wide character
-APIs native to the system, if available. This is currently only implemented
-on the Windows platform.
-
-This can also be enabled from the command line using the C<-C> switch.
-
-The initial value is typically C<0> for compatibility with Perl versions
-earlier than 5.6, but may be automatically set to C<1> by Perl if the system
-provides a user-settable default (e.g., C<$ENV{LC_CTYPE}>).
-
-The C<bytes> pragma always overrides the effect of this flag in the current
-lexical scope. See L<bytes>.
-
=item $EXECUTABLE_NAME
=item $^X