summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>2000-01-31 20:19:34 +0000
committerGurusamy Sarathy <gsar@cpan.org>2000-01-31 20:19:34 +0000
commitd41ff1b8ad987cfcb928deba4254681c1a4c0e36 (patch)
tree1fe5e3007d4c0adad93b501f54394bf383983d52
parent426c1a18afb84430666a6b4f0111dbc3205bd349 (diff)
downloadperl-d41ff1b8ad987cfcb928deba4254681c1a4c0e36.tar.gz
introduce $^U, a global bit to indicate whether system
calls should using widechar APIs; chr and sprintf "%c" also follow this flag in the absense of "use byte"; "use utf8" sets $^U=1 (this appears kludgey) p4raw-id: //depot/perl@4937
-rw-r--r--embedvar.h4
-rw-r--r--gv.c1
-rw-r--r--intrpvar.h3
-rw-r--r--lib/charnames.pm16
-rw-r--r--lib/utf8.pm2
-rw-r--r--mg.c6
-rw-r--r--op.c2
-rw-r--r--op.h3
-rw-r--r--perlapi.h2
-rw-r--r--pod/perlvar.pod35
-rw-r--r--pp.c2
-rw-r--r--sv.c8
-rw-r--r--sv.h11
-rw-r--r--t/lib/charnames.t2
-rw-r--r--win32/win32.h8
15 files changed, 83 insertions, 22 deletions
diff --git a/embedvar.h b/embedvar.h
index 342f5433b2..c9a0cec8e7 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -196,6 +196,7 @@
#define PL_argvoutgv (PERL_GET_INTERP->Iargvoutgv)
#define PL_basetime (PERL_GET_INTERP->Ibasetime)
#define PL_beginav (PERL_GET_INTERP->Ibeginav)
+#define PL_bigchar (PERL_GET_INTERP->Ibigchar)
#define PL_bitcount (PERL_GET_INTERP->Ibitcount)
#define PL_bufend (PERL_GET_INTERP->Ibufend)
#define PL_bufptr (PERL_GET_INTERP->Ibufptr)
@@ -460,6 +461,7 @@
#define PL_argvoutgv (vTHX->Iargvoutgv)
#define PL_basetime (vTHX->Ibasetime)
#define PL_beginav (vTHX->Ibeginav)
+#define PL_bigchar (vTHX->Ibigchar)
#define PL_bitcount (vTHX->Ibitcount)
#define PL_bufend (vTHX->Ibufend)
#define PL_bufptr (vTHX->Ibufptr)
@@ -861,6 +863,7 @@
#define PL_argvoutgv (aTHXo->interp.Iargvoutgv)
#define PL_basetime (aTHXo->interp.Ibasetime)
#define PL_beginav (aTHXo->interp.Ibeginav)
+#define PL_bigchar (aTHXo->interp.Ibigchar)
#define PL_bitcount (aTHXo->interp.Ibitcount)
#define PL_bufend (aTHXo->interp.Ibufend)
#define PL_bufptr (aTHXo->interp.Ibufptr)
@@ -1126,6 +1129,7 @@
#define PL_Iargvoutgv PL_argvoutgv
#define PL_Ibasetime PL_basetime
#define PL_Ibeginav PL_beginav
+#define PL_Ibigchar PL_bigchar
#define PL_Ibitcount PL_bitcount
#define PL_Ibufend PL_bufend
#define PL_Ibufptr PL_bufptr
diff --git a/gv.c b/gv.c
index acd85012e7..b8fef0d8b0 100644
--- a/gv.c
+++ b/gv.c
@@ -837,6 +837,7 @@ Perl_gv_fetchpv(pTHX_ const char *nambeg, I32 add, I32 sv_type)
case '\017': /* $^O */
case '\020': /* $^P */
case '\024': /* $^T */
+ case '\025': /* $^U */
if (len > 1)
break;
goto magicalize;
diff --git a/intrpvar.h b/intrpvar.h
index 2dde0dc4cd..869897dff6 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -44,7 +44,8 @@ The C variable which corresponds to Perl's $^W warning variable.
=cut
*/
-PERLVAR(Idowarn, bool)
+PERLVAR(Idowarn, U8)
+PERLVAR(Ibigchar, bool)
PERLVAR(Idoextract, bool)
PERLVAR(Isawampersand, bool) /* must save all match strings */
PERLVAR(Iunsafe, bool)
diff --git a/lib/charnames.pm b/lib/charnames.pm
index bd97983abc..59350b2df9 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -29,17 +29,15 @@ sub charnames {
}
die "Unknown charname '$name'" unless @off;
- # use caller 'encoding'; # Does not work at compile time?
-
my $ord = hex substr $txt, $off[0] - 4, 4;
- if ($^H & 0x8) {
- use utf8;
- return chr $ord;
+ if ($^H & 0x10) { # "use byte" in effect?
+ use byte;
+ return chr $ord if $ord <= 255;
+ my $hex = sprintf '%X=0%o', $ord, $ord;
+ my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
+ die "Character 0x$hex with name '$fname' is above 0xFF";
}
- return chr $ord if $ord <= 255;
- my $hex = sprintf '%X=0%o', $ord, $ord;
- my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2;
- die "Character 0x$hex with name '$fname' is above 0xFF";
+ return chr $ord;
}
sub import {
diff --git a/lib/utf8.pm b/lib/utf8.pm
index 5ddd4ba21a..691de0d630 100644
--- a/lib/utf8.pm
+++ b/lib/utf8.pm
@@ -1,5 +1,7 @@
package utf8;
+$^U = 1;
+
sub import {
$^H |= 0x00000008;
$enc{caller()} = $_[1] if $_[1];
diff --git a/mg.c b/mg.c
index 3ba3d08883..f0c3bf3d8c 100644
--- a/mg.c
+++ b/mg.c
@@ -567,6 +567,9 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
sv_setiv(sv, (IV)PL_basetime);
#endif
break;
+ case '\025': /* ^U */
+ sv_setiv(sv, (IV)PL_bigchar);
+ break;
case '\027': /* ^W & $^Warnings*/
if (*(mg->mg_ptr+1) == '\0')
sv_setiv(sv, (IV)((PL_dowarn & G_WARN_ON) ? TRUE : FALSE));
@@ -1707,6 +1710,9 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
PL_basetime = (Time_t)(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
#endif
break;
+ case '\025': /* ^U */
+ PL_bigchar = SvTRUE(sv);
+ break;
case '\027': /* ^W & $^Warnings */
if (*(mg->mg_ptr+1) == '\0') {
if ( ! (PL_dowarn & G_WARN_ALL_MASK)) {
diff --git a/op.c b/op.c
index fdfdf27b93..6bb7876683 100644
--- a/op.c
+++ b/op.c
@@ -3401,7 +3401,7 @@ Perl_newSTATEOP(pTHX_ I32 flags, char *label, OP *o)
cop->op_ppaddr = PL_ppaddr[ OP_NEXTSTATE ];
}
cop->op_flags = flags;
- cop->op_private = (PL_hints & HINT_UTF8);
+ cop->op_private = (PL_hints & (HINT_UTF8|HINT_BYTE));
#ifdef NATIVE_HINTS
cop->op_private |= NATIVE_HINTS;
#endif
diff --git a/op.h b/op.h
index 8bc82769e7..2360f9b381 100644
--- a/op.h
+++ b/op.h
@@ -107,6 +107,9 @@ Deprecated. Use C<GIMME_V> instead.
: G_SCALAR) \
: dowantarray())
+/* NOTE: OP_NEXTSTATE, OP_DBSTATE, and OP_SETSTATE (i.e. COPs) carry lower
+ * bits of PL_hints in op_private */
+
/* Private for lvalues */
#define OPpLVAL_INTRO 128 /* Lvalue must be localized or lvalue sub */
diff --git a/perlapi.h b/perlapi.h
index b2b8a32666..22117ed6a1 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -130,6 +130,8 @@ START_EXTERN_C
#define PL_basetime (*Perl_Ibasetime_ptr(aTHXo))
#undef PL_beginav
#define PL_beginav (*Perl_Ibeginav_ptr(aTHXo))
+#undef PL_bigchar
+#define PL_bigchar (*Perl_Ibigchar_ptr(aTHXo))
#undef PL_bitcount
#define PL_bitcount (*Perl_Ibitcount_ptr(aTHXo))
#undef PL_bufend
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index 309425179f..3393fd930f 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -856,6 +856,41 @@ The time at which the program began running, in seconds since the
epoch (beginning of 1970). The values returned by the B<-M>, B<-A>,
and B<-C> filetests are based on this value.
+=item $^U
+
+Global flag that switches on Unicode character support in the Perl
+interpreter. The initial value is usually C<0> for compatibility
+with Perl versions earlier than 5.6, but may be automatically set
+to C<1> by Perl if the system provides a user-settable default
+(e.g., C<$ENV{LC_CTYPE}>). It is also implicitly set to C<1>
+whenever the utf8 pragma is loaded.
+
+Setting it to C<1> has the following effects:
+
+=over
+
+=item *
+
+C<chr> produces UTF-8 encoded Unicode characters. These are the same
+as the corresponding ASCII characters if the argument is less than 128.
+
+=item *
+
+The C<%c> format in C<sprintf> generates a UTF-8 encoded Unicode
+character. This is the same as the corresponding ASCII character
+if the argument is less than 128.
+
+=item *
+
+Any system calls made by Perl will use wide character APIs native to
+the system, if available. This is currently only implemented on the
+Windows platform.
+
+=back
+
+The C<byte> pragma overrides the value of this flag in the current
+lexical scope. See L<byte>.
+
=item $^V
The revision, version, and subversion of the Perl interpreter, represented
diff --git a/pp.c b/pp.c
index 45654a9445..aec5073e93 100644
--- a/pp.c
+++ b/pp.c
@@ -2202,7 +2202,7 @@ PP(pp_chr)
SvUTF8_off(TARG); /* decontaminate */
(void)SvUPGRADE(TARG,SVt_PV);
- if (value >= 128 && !IN_BYTE) {
+ if (value >= 128 && PL_bigchar && !IN_BYTE) {
SvGROW(TARG,8);
tmps = SvPVX(TARG);
tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value);
diff --git a/sv.c b/sv.c
index d76752fcf9..0697d8ed88 100644
--- a/sv.c
+++ b/sv.c
@@ -3046,7 +3046,7 @@ Perl_sv_catpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN
Move(ptr,SvPVX(sv)+tlen,len,char);
SvCUR(sv) += len;
*SvEND(sv) = '\0';
- (void)SvPOK_only(sv); /* validate pointer */
+ (void)SvPOK_only_UTF8(sv); /* validate pointer */
SvTAINT(sv);
}
@@ -3083,6 +3083,8 @@ Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr)
return;
if (s = SvPV(sstr, len))
sv_catpvn(dstr,s,len);
+ if (SvUTF8(sstr))
+ SvUTF8_on(dstr);
}
/*
@@ -3125,7 +3127,7 @@ Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr)
ptr = SvPVX(sv);
Move(ptr,SvPVX(sv)+tlen,len+1,char);
SvCUR(sv) += len;
- (void)SvPOK_only(sv); /* validate pointer */
+ (void)SvPOK_only_UTF8(sv); /* validate pointer */
SvTAINT(sv);
}
@@ -5828,7 +5830,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
uv = va_arg(*args, int);
else
uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0;
- if (uv >= 128 && !IN_BYTE) {
+ if (uv >= 128 && PL_bigchar && !IN_BYTE) {
eptr = (char*)utf8buf;
elen = uv_to_utf8((U8*)eptr, uv) - utf8buf;
is_utf = TRUE;
diff --git a/sv.h b/sv.h
index 91fd17bceb..d8cd48789c 100644
--- a/sv.h
+++ b/sv.h
@@ -503,9 +503,10 @@ Set the length of the string which is in the SV. See C<SvCUR>.
#define SvOK(sv) (SvFLAGS(sv) & SVf_OK)
#define SvOK_off(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \
- SVf_IVisUV), \
+ SVf_IVisUV|SVf_UTF8), \
SvOOK_off(sv))
-#define SvOK_off_exc_UV(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC), \
+#define SvOK_off_exc_UV(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \
+ SVf_UTF8), \
SvOOK_off(sv))
#define SvOKp(sv) (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK))
@@ -547,7 +548,11 @@ Set the length of the string which is in the SV. See C<SvCUR>.
#define SvPOK(sv) (SvFLAGS(sv) & SVf_POK)
#define SvPOK_on(sv) (SvFLAGS(sv) |= (SVf_POK|SVp_POK))
#define SvPOK_off(sv) (SvFLAGS(sv) &= ~(SVf_POK|SVp_POK))
-#define SvPOK_only(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC|SVf_IVisUV), \
+#define SvPOK_only(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \
+ SVf_IVisUV|SVf_UTF8), \
+ SvFLAGS(sv) |= (SVf_POK|SVp_POK))
+#define SvPOK_only_UTF8(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \
+ SVf_IVisUV), \
SvFLAGS(sv) |= (SVf_POK|SVp_POK))
#define SvOOK(sv) (SvFLAGS(sv) & SVf_OOK)
diff --git a/t/lib/charnames.t b/t/lib/charnames.t
index 9775b141b2..84949896ac 100644
--- a/t/lib/charnames.t
+++ b/t/lib/charnames.t
@@ -16,7 +16,7 @@ print "not " unless "Here\N{EXCLAMATION MARK}?" eq "Here\041?";
print "ok 1\n";
{
- no utf8; # UTEST can switch it on
+ use byte; # UTEST can switch utf8 on
print "# \$res=$res \$\@='$@'\nnot "
if $res = eval <<'EOE'
diff --git a/win32/win32.h b/win32/win32.h
index 69a4caf063..4fed26a911 100644
--- a/win32/win32.h
+++ b/win32/win32.h
@@ -422,12 +422,14 @@ struct interp_intern {
/* Use CP_UTF8 when mode is UTF8 */
#define A2WHELPER(lpa, lpw, nBytes)\
- lpw[0] = 0, MultiByteToWideChar((IN_UTF8) ? CP_UTF8 : CP_ACP, 0, lpa, -1, lpw, (nBytes/sizeof(WCHAR)))
+ lpw[0] = 0, MultiByteToWideChar((IN_BYTE) ? CP_ACP : CP_UTF8, 0, \
+ lpa, -1, lpw, (nBytes/sizeof(WCHAR)))
#define W2AHELPER(lpw, lpa, nChars)\
- lpa[0] = '\0', WideCharToMultiByte((IN_UTF8) ? CP_UTF8 : CP_ACP, 0, lpw, -1, (LPSTR)lpa, nChars, NULL, NULL)
+ lpa[0] = '\0', WideCharToMultiByte((IN_BYTE) ? CP_ACP : CP_UTF8, 0, \
+ lpw, -1, (LPSTR)lpa, nChars, NULL, NULL)
-#define USING_WIDE() (PerlEnv_os_id() == VER_PLATFORM_WIN32_NT)
+#define USING_WIDE() (PL_bigchar && PerlEnv_os_id() == VER_PLATFORM_WIN32_NT)
#ifdef USE_ITHREADS
# define PERL_WAIT_FOR_CHILDREN \