diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-05-31 00:16:44 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-05-31 00:16:44 +0000 |
commit | 0064a8a9866779dceb087452b9bfaa733c51adce (patch) | |
tree | 3fdfd380d1e3b371bed489f787cf1c3a69e22234 | |
parent | 9c5ffd7c3fe1ab64d3e7d06810ac3ab42426718b (diff) | |
download | perl-0064a8a9866779dceb087452b9bfaa733c51adce.tar.gz |
Salvage bits and pieces from the experimental 'utf8 everywhere'
patch: rename HINT_BYTE and IN_BYTE to HINT_BYTES and IN_BYTES
to match the pragma name; various robustness cleanups.
p4raw-id: //depot/perl@10339
-rw-r--r-- | lib/utf8_heavy.pl | 4 | ||||
-rw-r--r-- | mg.c | 6 | ||||
-rw-r--r-- | perl.h | 2 | ||||
-rw-r--r-- | pp.c | 2 | ||||
-rw-r--r-- | pp_ctl.c | 2 | ||||
-rw-r--r-- | pp_sys.c | 4 | ||||
-rw-r--r-- | regexec.c | 6 | ||||
-rw-r--r-- | sv.c | 8 | ||||
-rw-r--r-- | toke.c | 8 | ||||
-rw-r--r-- | utf8.c | 2 | ||||
-rw-r--r-- | utf8.h | 8 | ||||
-rw-r--r-- | utfebcdic.h | 4 |
12 files changed, 31 insertions, 25 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 2327d3d818..a843737164 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -36,7 +36,9 @@ sub SWASHNEW { } { - $list ||= ($caller ne 'main' && eval { $caller->$type(); }) + $list ||= + ( exists &{"${caller}::${type}"} && + eval { $caller->$type() } ) || do "$file.pl" || do "$encoding/$file.pl" || do "$encoding/Is/${type}.pl" @@ -375,9 +375,11 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg) if (i > 0 && DO_UTF8(PL_reg_sv)) { char *b = rx->subbeg; - i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i)); + if (b) + i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i)); } - sv_setiv(sv,i); + + sv_setiv(sv, i); } } return 0; @@ -2891,7 +2891,7 @@ enum { /* pass one of these to get_vtbl */ #define HINT_INTEGER 0x00000001 #define HINT_STRICT_REFS 0x00000002 /* #define HINT_notused4 0x00000004 */ -#define HINT_BYTE 0x00000008 +#define HINT_BYTES 0x00000008 /* #define HINT_notused10 0x00000010 */ /* Note: 20,40,80 used for NATIVE_HINTS */ @@ -3001,7 +3001,7 @@ PP(pp_chr) (void)SvUPGRADE(TARG,SVt_PV); - if (value > 255 && !IN_BYTE) { + if (value > 255 && !IN_BYTES) { SvGROW(TARG, UNISKIP(value)+1); tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value); SvCUR_set(TARG, tmps - SvPVX(TARG)); @@ -323,7 +323,7 @@ PP(pp_formline) NV value; bool gotsome = FALSE; STRLEN len; - STRLEN fudge = SvCUR(tmpForm) * (IN_BYTE ? 1 : 3) + 1; + STRLEN fudge = SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1; bool item_is_utf = FALSE; if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) { @@ -1535,7 +1535,7 @@ PP(pp_sysread) io = GvIO(gv); if (!io || !IoIFP(io)) goto say_undef; - if ((fp_utf8 = PerlIO_isutf8(IoIFP(io))) && !IN_BYTE) { + if ((fp_utf8 = PerlIO_isutf8(IoIFP(io))) && !IN_BYTES) { buffer = SvPVutf8_force(bufsv, blen); /* UTF8 may not have been set if they are all low bytes */ SvUTF8_on(bufsv); @@ -1663,7 +1663,7 @@ PP(pp_sysread) SvCUR_set(bufsv, count+(buffer - SvPVX(bufsv))); *SvEND(bufsv) = '\0'; (void)SvPOK_only(bufsv); - if (fp_utf8 && !IN_BYTE) { + if (fp_utf8 && !IN_BYTES) { /* Look at utf8 we got back and count the characters */ char *bend = buffer + count; while (buffer < bend) { @@ -966,7 +966,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta else { U8 *r = reghop3((U8*)s, -1, (U8*)startpos); - tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0); + if (s > (char*)r) + tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0); } tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0); @@ -1009,7 +1010,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta else { U8 *r = reghop3((U8*)s, -1, (U8*)startpos); - tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0); + if (s > (char*)r) + tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0); } tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0); @@ -3067,7 +3067,7 @@ Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) if (fail_ok) return FALSE; #ifdef USE_BYTES_DOWNGRADES - else if (IN_BYTE) { + else if (IN_BYTES) { U8 *d = s; U8 *e = (U8 *) SvEND(sv); int first = 1; @@ -4893,7 +4893,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) pv2 = SvPV(sv2, cur2); /* do not utf8ize the comparands as a side-effect */ - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { bool is_utf8 = TRUE; /* UTF-8ness differs */ if (PL_hints & HINT_UTF8_DISTINCT) @@ -4960,7 +4960,7 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) pv2 = SvPV(sv2, cur2); /* do not utf8ize the comparands as a side-effect */ - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) { + if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { if (PL_hints & HINT_UTF8_DISTINCT) return SvUTF8(sv1) ? 1 : -1; @@ -7265,7 +7265,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV uv = args ? va_arg(*args, int) : SvIVx(argsv); if ((uv > 255 || (!UNI_IS_INVARIANT(uv) && SvUTF8(sv))) - && !IN_BYTE) { + && !IN_BYTES) { eptr = (char*)utf8buf; elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf; is_utf = TRUE; @@ -3867,7 +3867,7 @@ Perl_yylex(pTHX) CLINE; yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0)); yylval.opval->op_private = OPpCONST_BARE; - if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len)) + if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) SvUTF8_on(((SVOP*)yylval.opval)->op_sv); TERM(WORD); } @@ -4028,7 +4028,7 @@ Perl_yylex(pTHX) if (*s == '=' && s[1] == '>') { CLINE; sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf); - if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len)) + if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len)) SvUTF8_on(((SVOP*)yylval.opval)->op_sv); TERM(WORD); } @@ -4204,7 +4204,7 @@ Perl_yylex(pTHX) } #endif #ifdef PERLIO_LAYERS - if (UTF && !IN_BYTE) + if (UTF && !IN_BYTES) PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8"); #endif PL_rsfp = Nullfp; @@ -6532,7 +6532,7 @@ retval: Renew(SvPVX(tmpstr), SvLEN(tmpstr), char); } SvREFCNT_dec(herewas); - if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) + if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) SvUTF8_on(tmpstr); PL_lex_stuff = tmpstr; yylval.ival = op_type; @@ -507,7 +507,7 @@ Perl_utf8_length(pTHX_ U8 *s, U8 *e) U8 t = UTF8SKIP(s); if (e - s < t) - Perl_croak(aTHX_ "panic: utf8_length: s=%p (%02X) e=%p l=%d - unaligned end",s,*s,e,t); + Perl_croak(aTHX_ "panic: utf8_length: unaligned end"); s += t; len++; } @@ -111,10 +111,10 @@ END_EXTERN_C * (that is, the two high bits are set). Otherwise we risk loading in the * heavy-duty SWASHINIT and SWASHGET routines unnecessarily. */ -#define isIDFIRST_lazy_if(p,c) ((IN_BYTE || (!c || (*((U8*)p) < 0xc0))) \ +#define isIDFIRST_lazy_if(p,c) ((IN_BYTES || (!c || (*((U8*)p) < 0xc0))) \ ? isIDFIRST(*(p)) \ : isIDFIRST_utf8((U8*)p)) -#define isALNUM_lazy_if(p,c) ((IN_BYTE || (!c || (*((U8*)p) < 0xc0))) \ +#define isALNUM_lazy_if(p,c) ((IN_BYTES || (!c || (*((U8*)p) < 0xc0))) \ ? isALNUM(*(p)) \ : isALNUM_utf8((U8*)p)) @@ -129,8 +129,8 @@ END_EXTERN_C #define UTF8_MAXLEN 13 /* how wide can a single UTF8 encoded character become */ /* #define IN_UTF8 (PL_curcop->op_private & HINT_UTF8) */ -#define IN_BYTE (PL_curcop->op_private & HINT_BYTE) -#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTE) +#define IN_BYTES (PL_curcop->op_private & HINT_BYTES) +#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTES) #define UTF8_ALLOW_EMPTY 0x0001 #define UTF8_ALLOW_CONTINUATION 0x0002 diff --git a/utfebcdic.h b/utfebcdic.h index 0dd73d2bb0..2c56006ff6 100644 --- a/utfebcdic.h +++ b/utfebcdic.h @@ -234,10 +234,10 @@ END_EXTERN_C * unnecessarily. */ -#define isIDFIRST_lazy_if(p,c) ((IN_BYTE || (!c || UTF8_IS_INVARIANT(*p))) \ +#define isIDFIRST_lazy_if(p,c) ((IN_BYTES || (!c || UTF8_IS_INVARIANT(*p))) \ ? isIDFIRST(*(p)) \ : isIDFIRST_utf8((U8*)p)) -#define isALNUM_lazy_if(p,c) ((IN_BYTE || (!c || UTF8_IS_INVARIANT(*p))) \ +#define isALNUM_lazy_if(p,c) ((IN_BYTES || (!c || UTF8_IS_INVARIANT(*p))) \ ? isALNUM(*(p)) \ : isALNUM_utf8((U8*)p)) |