diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 2000-01-31 20:19:34 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 2000-01-31 20:19:34 +0000 |
commit | d41ff1b8ad987cfcb928deba4254681c1a4c0e36 (patch) | |
tree | 1fe5e3007d4c0adad93b501f54394bf383983d52 | |
parent | 426c1a18afb84430666a6b4f0111dbc3205bd349 (diff) | |
download | perl-d41ff1b8ad987cfcb928deba4254681c1a4c0e36.tar.gz |
introduce $^U, a global bit to indicate whether system
calls should using widechar APIs; chr and sprintf "%c" also
follow this flag in the absense of "use byte"; "use utf8"
sets $^U=1 (this appears kludgey)
p4raw-id: //depot/perl@4937
-rw-r--r-- | embedvar.h | 4 | ||||
-rw-r--r-- | gv.c | 1 | ||||
-rw-r--r-- | intrpvar.h | 3 | ||||
-rw-r--r-- | lib/charnames.pm | 16 | ||||
-rw-r--r-- | lib/utf8.pm | 2 | ||||
-rw-r--r-- | mg.c | 6 | ||||
-rw-r--r-- | op.c | 2 | ||||
-rw-r--r-- | op.h | 3 | ||||
-rw-r--r-- | perlapi.h | 2 | ||||
-rw-r--r-- | pod/perlvar.pod | 35 | ||||
-rw-r--r-- | pp.c | 2 | ||||
-rw-r--r-- | sv.c | 8 | ||||
-rw-r--r-- | sv.h | 11 | ||||
-rw-r--r-- | t/lib/charnames.t | 2 | ||||
-rw-r--r-- | win32/win32.h | 8 |
15 files changed, 83 insertions, 22 deletions
diff --git a/embedvar.h b/embedvar.h index 342f5433b2..c9a0cec8e7 100644 --- a/embedvar.h +++ b/embedvar.h @@ -196,6 +196,7 @@ #define PL_argvoutgv (PERL_GET_INTERP->Iargvoutgv) #define PL_basetime (PERL_GET_INTERP->Ibasetime) #define PL_beginav (PERL_GET_INTERP->Ibeginav) +#define PL_bigchar (PERL_GET_INTERP->Ibigchar) #define PL_bitcount (PERL_GET_INTERP->Ibitcount) #define PL_bufend (PERL_GET_INTERP->Ibufend) #define PL_bufptr (PERL_GET_INTERP->Ibufptr) @@ -460,6 +461,7 @@ #define PL_argvoutgv (vTHX->Iargvoutgv) #define PL_basetime (vTHX->Ibasetime) #define PL_beginav (vTHX->Ibeginav) +#define PL_bigchar (vTHX->Ibigchar) #define PL_bitcount (vTHX->Ibitcount) #define PL_bufend (vTHX->Ibufend) #define PL_bufptr (vTHX->Ibufptr) @@ -861,6 +863,7 @@ #define PL_argvoutgv (aTHXo->interp.Iargvoutgv) #define PL_basetime (aTHXo->interp.Ibasetime) #define PL_beginav (aTHXo->interp.Ibeginav) +#define PL_bigchar (aTHXo->interp.Ibigchar) #define PL_bitcount (aTHXo->interp.Ibitcount) #define PL_bufend (aTHXo->interp.Ibufend) #define PL_bufptr (aTHXo->interp.Ibufptr) @@ -1126,6 +1129,7 @@ #define PL_Iargvoutgv PL_argvoutgv #define PL_Ibasetime PL_basetime #define PL_Ibeginav PL_beginav +#define PL_Ibigchar PL_bigchar #define PL_Ibitcount PL_bitcount #define PL_Ibufend PL_bufend #define PL_Ibufptr PL_bufptr @@ -837,6 +837,7 @@ Perl_gv_fetchpv(pTHX_ const char *nambeg, I32 add, I32 sv_type) case '\017': /* $^O */ case '\020': /* $^P */ case '\024': /* $^T */ + case '\025': /* $^U */ if (len > 1) break; goto magicalize; diff --git a/intrpvar.h b/intrpvar.h index 2dde0dc4cd..869897dff6 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -44,7 +44,8 @@ The C variable which corresponds to Perl's $^W warning variable. =cut */ -PERLVAR(Idowarn, bool) +PERLVAR(Idowarn, U8) +PERLVAR(Ibigchar, bool) PERLVAR(Idoextract, bool) PERLVAR(Isawampersand, bool) /* must save all match strings */ PERLVAR(Iunsafe, bool) diff --git a/lib/charnames.pm b/lib/charnames.pm index bd97983abc..59350b2df9 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -29,17 +29,15 @@ sub charnames { } die "Unknown charname '$name'" unless @off; - # use caller 'encoding'; # Does not work at compile time? - my $ord = hex substr $txt, $off[0] - 4, 4; - if ($^H & 0x8) { - use utf8; - return chr $ord; + if ($^H & 0x10) { # "use byte" in effect? + use byte; + return chr $ord if $ord <= 255; + my $hex = sprintf '%X=0%o', $ord, $ord; + my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2; + die "Character 0x$hex with name '$fname' is above 0xFF"; } - return chr $ord if $ord <= 255; - my $hex = sprintf '%X=0%o', $ord, $ord; - my $fname = substr $txt, $off[0] + 2, $off[1] - $off[0] - 2; - die "Character 0x$hex with name '$fname' is above 0xFF"; + return chr $ord; } sub import { diff --git a/lib/utf8.pm b/lib/utf8.pm index 5ddd4ba21a..691de0d630 100644 --- a/lib/utf8.pm +++ b/lib/utf8.pm @@ -1,5 +1,7 @@ package utf8; +$^U = 1; + sub import { $^H |= 0x00000008; $enc{caller()} = $_[1] if $_[1]; @@ -567,6 +567,9 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) sv_setiv(sv, (IV)PL_basetime); #endif break; + case '\025': /* ^U */ + sv_setiv(sv, (IV)PL_bigchar); + break; case '\027': /* ^W & $^Warnings*/ if (*(mg->mg_ptr+1) == '\0') sv_setiv(sv, (IV)((PL_dowarn & G_WARN_ON) ? TRUE : FALSE)); @@ -1707,6 +1710,9 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg) PL_basetime = (Time_t)(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv)); #endif break; + case '\025': /* ^U */ + PL_bigchar = SvTRUE(sv); + break; case '\027': /* ^W & $^Warnings */ if (*(mg->mg_ptr+1) == '\0') { if ( ! (PL_dowarn & G_WARN_ALL_MASK)) { @@ -3401,7 +3401,7 @@ Perl_newSTATEOP(pTHX_ I32 flags, char *label, OP *o) cop->op_ppaddr = PL_ppaddr[ OP_NEXTSTATE ]; } cop->op_flags = flags; - cop->op_private = (PL_hints & HINT_UTF8); + cop->op_private = (PL_hints & (HINT_UTF8|HINT_BYTE)); #ifdef NATIVE_HINTS cop->op_private |= NATIVE_HINTS; #endif @@ -107,6 +107,9 @@ Deprecated. Use C<GIMME_V> instead. : G_SCALAR) \ : dowantarray()) +/* NOTE: OP_NEXTSTATE, OP_DBSTATE, and OP_SETSTATE (i.e. COPs) carry lower + * bits of PL_hints in op_private */ + /* Private for lvalues */ #define OPpLVAL_INTRO 128 /* Lvalue must be localized or lvalue sub */ @@ -130,6 +130,8 @@ START_EXTERN_C #define PL_basetime (*Perl_Ibasetime_ptr(aTHXo)) #undef PL_beginav #define PL_beginav (*Perl_Ibeginav_ptr(aTHXo)) +#undef PL_bigchar +#define PL_bigchar (*Perl_Ibigchar_ptr(aTHXo)) #undef PL_bitcount #define PL_bitcount (*Perl_Ibitcount_ptr(aTHXo)) #undef PL_bufend diff --git a/pod/perlvar.pod b/pod/perlvar.pod index 309425179f..3393fd930f 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -856,6 +856,41 @@ The time at which the program began running, in seconds since the epoch (beginning of 1970). The values returned by the B<-M>, B<-A>, and B<-C> filetests are based on this value. +=item $^U + +Global flag that switches on Unicode character support in the Perl +interpreter. The initial value is usually C<0> for compatibility +with Perl versions earlier than 5.6, but may be automatically set +to C<1> by Perl if the system provides a user-settable default +(e.g., C<$ENV{LC_CTYPE}>). It is also implicitly set to C<1> +whenever the utf8 pragma is loaded. + +Setting it to C<1> has the following effects: + +=over + +=item * + +C<chr> produces UTF-8 encoded Unicode characters. These are the same +as the corresponding ASCII characters if the argument is less than 128. + +=item * + +The C<%c> format in C<sprintf> generates a UTF-8 encoded Unicode +character. This is the same as the corresponding ASCII character +if the argument is less than 128. + +=item * + +Any system calls made by Perl will use wide character APIs native to +the system, if available. This is currently only implemented on the +Windows platform. + +=back + +The C<byte> pragma overrides the value of this flag in the current +lexical scope. See L<byte>. + =item $^V The revision, version, and subversion of the Perl interpreter, represented @@ -2202,7 +2202,7 @@ PP(pp_chr) SvUTF8_off(TARG); /* decontaminate */ (void)SvUPGRADE(TARG,SVt_PV); - if (value >= 128 && !IN_BYTE) { + if (value >= 128 && PL_bigchar && !IN_BYTE) { SvGROW(TARG,8); tmps = SvPVX(TARG); tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value); @@ -3046,7 +3046,7 @@ Perl_sv_catpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN Move(ptr,SvPVX(sv)+tlen,len,char); SvCUR(sv) += len; *SvEND(sv) = '\0'; - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -3083,6 +3083,8 @@ Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr) return; if (s = SvPV(sstr, len)) sv_catpvn(dstr,s,len); + if (SvUTF8(sstr)) + SvUTF8_on(dstr); } /* @@ -3125,7 +3127,7 @@ Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) ptr = SvPVX(sv); Move(ptr,SvPVX(sv)+tlen,len+1,char); SvCUR(sv) += len; - (void)SvPOK_only(sv); /* validate pointer */ + (void)SvPOK_only_UTF8(sv); /* validate pointer */ SvTAINT(sv); } @@ -5828,7 +5830,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV uv = va_arg(*args, int); else uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; - if (uv >= 128 && !IN_BYTE) { + if (uv >= 128 && PL_bigchar && !IN_BYTE) { eptr = (char*)utf8buf; elen = uv_to_utf8((U8*)eptr, uv) - utf8buf; is_utf = TRUE; @@ -503,9 +503,10 @@ Set the length of the string which is in the SV. See C<SvCUR>. #define SvOK(sv) (SvFLAGS(sv) & SVf_OK) #define SvOK_off(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \ - SVf_IVisUV), \ + SVf_IVisUV|SVf_UTF8), \ SvOOK_off(sv)) -#define SvOK_off_exc_UV(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC), \ +#define SvOK_off_exc_UV(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \ + SVf_UTF8), \ SvOOK_off(sv)) #define SvOKp(sv) (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK)) @@ -547,7 +548,11 @@ Set the length of the string which is in the SV. See C<SvCUR>. #define SvPOK(sv) (SvFLAGS(sv) & SVf_POK) #define SvPOK_on(sv) (SvFLAGS(sv) |= (SVf_POK|SVp_POK)) #define SvPOK_off(sv) (SvFLAGS(sv) &= ~(SVf_POK|SVp_POK)) -#define SvPOK_only(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC|SVf_IVisUV), \ +#define SvPOK_only(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \ + SVf_IVisUV|SVf_UTF8), \ + SvFLAGS(sv) |= (SVf_POK|SVp_POK)) +#define SvPOK_only_UTF8(sv) (SvFLAGS(sv) &= ~(SVf_OK|SVf_AMAGIC| \ + SVf_IVisUV), \ SvFLAGS(sv) |= (SVf_POK|SVp_POK)) #define SvOOK(sv) (SvFLAGS(sv) & SVf_OOK) diff --git a/t/lib/charnames.t b/t/lib/charnames.t index 9775b141b2..84949896ac 100644 --- a/t/lib/charnames.t +++ b/t/lib/charnames.t @@ -16,7 +16,7 @@ print "not " unless "Here\N{EXCLAMATION MARK}?" eq "Here\041?"; print "ok 1\n"; { - no utf8; # UTEST can switch it on + use byte; # UTEST can switch utf8 on print "# \$res=$res \$\@='$@'\nnot " if $res = eval <<'EOE' diff --git a/win32/win32.h b/win32/win32.h index 69a4caf063..4fed26a911 100644 --- a/win32/win32.h +++ b/win32/win32.h @@ -422,12 +422,14 @@ struct interp_intern { /* Use CP_UTF8 when mode is UTF8 */ #define A2WHELPER(lpa, lpw, nBytes)\ - lpw[0] = 0, MultiByteToWideChar((IN_UTF8) ? CP_UTF8 : CP_ACP, 0, lpa, -1, lpw, (nBytes/sizeof(WCHAR))) + lpw[0] = 0, MultiByteToWideChar((IN_BYTE) ? CP_ACP : CP_UTF8, 0, \ + lpa, -1, lpw, (nBytes/sizeof(WCHAR))) #define W2AHELPER(lpw, lpa, nChars)\ - lpa[0] = '\0', WideCharToMultiByte((IN_UTF8) ? CP_UTF8 : CP_ACP, 0, lpw, -1, (LPSTR)lpa, nChars, NULL, NULL) + lpa[0] = '\0', WideCharToMultiByte((IN_BYTE) ? CP_ACP : CP_UTF8, 0, \ + lpw, -1, (LPSTR)lpa, nChars, NULL, NULL) -#define USING_WIDE() (PerlEnv_os_id() == VER_PLATFORM_WIN32_NT) +#define USING_WIDE() (PL_bigchar && PerlEnv_os_id() == VER_PLATFORM_WIN32_NT) #ifdef USE_ITHREADS # define PERL_WAIT_FOR_CHILDREN \ |