summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-05-31 00:16:44 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-05-31 00:16:44 +0000
commit0064a8a9866779dceb087452b9bfaa733c51adce (patch)
tree3fdfd380d1e3b371bed489f787cf1c3a69e22234
parent9c5ffd7c3fe1ab64d3e7d06810ac3ab42426718b (diff)
downloadperl-0064a8a9866779dceb087452b9bfaa733c51adce.tar.gz
Salvage bits and pieces from the experimental 'utf8 everywhere'
patch: rename HINT_BYTE and IN_BYTE to HINT_BYTES and IN_BYTES to match the pragma name; various robustness cleanups. p4raw-id: //depot/perl@10339
-rw-r--r--lib/utf8_heavy.pl4
-rw-r--r--mg.c6
-rw-r--r--perl.h2
-rw-r--r--pp.c2
-rw-r--r--pp_ctl.c2
-rw-r--r--pp_sys.c4
-rw-r--r--regexec.c6
-rw-r--r--sv.c8
-rw-r--r--toke.c8
-rw-r--r--utf8.c2
-rw-r--r--utf8.h8
-rw-r--r--utfebcdic.h4
12 files changed, 31 insertions, 25 deletions
diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl
index 2327d3d818..a843737164 100644
--- a/lib/utf8_heavy.pl
+++ b/lib/utf8_heavy.pl
@@ -36,7 +36,9 @@ sub SWASHNEW {
}
{
- $list ||= ($caller ne 'main' && eval { $caller->$type(); })
+ $list ||=
+ ( exists &{"${caller}::${type}"} &&
+ eval { $caller->$type() } )
|| do "$file.pl"
|| do "$encoding/$file.pl"
|| do "$encoding/Is/${type}.pl"
diff --git a/mg.c b/mg.c
index 4b6d25de18..b9a027e476 100644
--- a/mg.c
+++ b/mg.c
@@ -375,9 +375,11 @@ Perl_magic_regdatum_get(pTHX_ SV *sv, MAGIC *mg)
if (i > 0 && DO_UTF8(PL_reg_sv)) {
char *b = rx->subbeg;
- i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i));
+ if (b)
+ i = Perl_utf8_length(aTHX_ (U8*)b, (U8*)(b+i));
}
- sv_setiv(sv,i);
+
+ sv_setiv(sv, i);
}
}
return 0;
diff --git a/perl.h b/perl.h
index 9283e8e2e5..7286921c9b 100644
--- a/perl.h
+++ b/perl.h
@@ -2891,7 +2891,7 @@ enum { /* pass one of these to get_vtbl */
#define HINT_INTEGER 0x00000001
#define HINT_STRICT_REFS 0x00000002
/* #define HINT_notused4 0x00000004 */
-#define HINT_BYTE 0x00000008
+#define HINT_BYTES 0x00000008
/* #define HINT_notused10 0x00000010 */
/* Note: 20,40,80 used for NATIVE_HINTS */
diff --git a/pp.c b/pp.c
index 4bb8d3e377..6328a6cbd5 100644
--- a/pp.c
+++ b/pp.c
@@ -3001,7 +3001,7 @@ PP(pp_chr)
(void)SvUPGRADE(TARG,SVt_PV);
- if (value > 255 && !IN_BYTE) {
+ if (value > 255 && !IN_BYTES) {
SvGROW(TARG, UNISKIP(value)+1);
tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value);
SvCUR_set(TARG, tmps - SvPVX(TARG));
diff --git a/pp_ctl.c b/pp_ctl.c
index 736f84d9d2..032be2e33d 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -323,7 +323,7 @@ PP(pp_formline)
NV value;
bool gotsome = FALSE;
STRLEN len;
- STRLEN fudge = SvCUR(tmpForm) * (IN_BYTE ? 1 : 3) + 1;
+ STRLEN fudge = SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1;
bool item_is_utf = FALSE;
if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) {
diff --git a/pp_sys.c b/pp_sys.c
index 062cee0d65..e64fbef820 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -1535,7 +1535,7 @@ PP(pp_sysread)
io = GvIO(gv);
if (!io || !IoIFP(io))
goto say_undef;
- if ((fp_utf8 = PerlIO_isutf8(IoIFP(io))) && !IN_BYTE) {
+ if ((fp_utf8 = PerlIO_isutf8(IoIFP(io))) && !IN_BYTES) {
buffer = SvPVutf8_force(bufsv, blen);
/* UTF8 may not have been set if they are all low bytes */
SvUTF8_on(bufsv);
@@ -1663,7 +1663,7 @@ PP(pp_sysread)
SvCUR_set(bufsv, count+(buffer - SvPVX(bufsv)));
*SvEND(bufsv) = '\0';
(void)SvPOK_only(bufsv);
- if (fp_utf8 && !IN_BYTE) {
+ if (fp_utf8 && !IN_BYTES) {
/* Look at utf8 we got back and count the characters */
char *bend = buffer + count;
while (buffer < bend) {
diff --git a/regexec.c b/regexec.c
index b8659f23b3..c795a5b3d5 100644
--- a/regexec.c
+++ b/regexec.c
@@ -966,7 +966,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
else {
U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
- tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
+ if (s > (char*)r)
+ tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
}
tmp = ((OP(c) == BOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
@@ -1009,7 +1010,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
else {
U8 *r = reghop3((U8*)s, -1, (U8*)startpos);
- tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
+ if (s > (char*)r)
+ tmp = (I32)utf8n_to_uvchr(r, s - (char*)r, 0, 0);
}
tmp = ((OP(c) == NBOUND ?
isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
diff --git a/sv.c b/sv.c
index 2279ea093b..6b933b112c 100644
--- a/sv.c
+++ b/sv.c
@@ -3067,7 +3067,7 @@ Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok)
if (fail_ok)
return FALSE;
#ifdef USE_BYTES_DOWNGRADES
- else if (IN_BYTE) {
+ else if (IN_BYTES) {
U8 *d = s;
U8 *e = (U8 *) SvEND(sv);
int first = 1;
@@ -4893,7 +4893,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
pv2 = SvPV(sv2, cur2);
/* do not utf8ize the comparands as a side-effect */
- if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) {
+ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
bool is_utf8 = TRUE;
/* UTF-8ness differs */
if (PL_hints & HINT_UTF8_DISTINCT)
@@ -4960,7 +4960,7 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
pv2 = SvPV(sv2, cur2);
/* do not utf8ize the comparands as a side-effect */
- if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) {
+ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
if (PL_hints & HINT_UTF8_DISTINCT)
return SvUTF8(sv1) ? 1 : -1;
@@ -7265,7 +7265,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
uv = args ? va_arg(*args, int) : SvIVx(argsv);
if ((uv > 255 ||
(!UNI_IS_INVARIANT(uv) && SvUTF8(sv)))
- && !IN_BYTE) {
+ && !IN_BYTES) {
eptr = (char*)utf8buf;
elen = uvchr_to_utf8((U8*)eptr, uv) - utf8buf;
is_utf = TRUE;
diff --git a/toke.c b/toke.c
index 54fcd1617a..851b7591b8 100644
--- a/toke.c
+++ b/toke.c
@@ -3867,7 +3867,7 @@ Perl_yylex(pTHX)
CLINE;
yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
yylval.opval->op_private = OPpCONST_BARE;
- if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+ if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
TERM(WORD);
}
@@ -4028,7 +4028,7 @@ Perl_yylex(pTHX)
if (*s == '=' && s[1] == '>') {
CLINE;
sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
- if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+ if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
TERM(WORD);
}
@@ -4204,7 +4204,7 @@ Perl_yylex(pTHX)
}
#endif
#ifdef PERLIO_LAYERS
- if (UTF && !IN_BYTE)
+ if (UTF && !IN_BYTES)
PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
#endif
PL_rsfp = Nullfp;
@@ -6532,7 +6532,7 @@ retval:
Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
}
SvREFCNT_dec(herewas);
- if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
+ if (UTF && !IN_BYTES && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
SvUTF8_on(tmpstr);
PL_lex_stuff = tmpstr;
yylval.ival = op_type;
diff --git a/utf8.c b/utf8.c
index f692164e2e..1c1a5d4f70 100644
--- a/utf8.c
+++ b/utf8.c
@@ -507,7 +507,7 @@ Perl_utf8_length(pTHX_ U8 *s, U8 *e)
U8 t = UTF8SKIP(s);
if (e - s < t)
- Perl_croak(aTHX_ "panic: utf8_length: s=%p (%02X) e=%p l=%d - unaligned end",s,*s,e,t);
+ Perl_croak(aTHX_ "panic: utf8_length: unaligned end");
s += t;
len++;
}
diff --git a/utf8.h b/utf8.h
index 8898cf164c..baee5ae2c4 100644
--- a/utf8.h
+++ b/utf8.h
@@ -111,10 +111,10 @@ END_EXTERN_C
* (that is, the two high bits are set). Otherwise we risk loading in the
* heavy-duty SWASHINIT and SWASHGET routines unnecessarily.
*/
-#define isIDFIRST_lazy_if(p,c) ((IN_BYTE || (!c || (*((U8*)p) < 0xc0))) \
+#define isIDFIRST_lazy_if(p,c) ((IN_BYTES || (!c || (*((U8*)p) < 0xc0))) \
? isIDFIRST(*(p)) \
: isIDFIRST_utf8((U8*)p))
-#define isALNUM_lazy_if(p,c) ((IN_BYTE || (!c || (*((U8*)p) < 0xc0))) \
+#define isALNUM_lazy_if(p,c) ((IN_BYTES || (!c || (*((U8*)p) < 0xc0))) \
? isALNUM(*(p)) \
: isALNUM_utf8((U8*)p))
@@ -129,8 +129,8 @@ END_EXTERN_C
#define UTF8_MAXLEN 13 /* how wide can a single UTF8 encoded character become */
/* #define IN_UTF8 (PL_curcop->op_private & HINT_UTF8) */
-#define IN_BYTE (PL_curcop->op_private & HINT_BYTE)
-#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTE)
+#define IN_BYTES (PL_curcop->op_private & HINT_BYTES)
+#define DO_UTF8(sv) (SvUTF8(sv) && !IN_BYTES)
#define UTF8_ALLOW_EMPTY 0x0001
#define UTF8_ALLOW_CONTINUATION 0x0002
diff --git a/utfebcdic.h b/utfebcdic.h
index 0dd73d2bb0..2c56006ff6 100644
--- a/utfebcdic.h
+++ b/utfebcdic.h
@@ -234,10 +234,10 @@ END_EXTERN_C
* unnecessarily.
*/
-#define isIDFIRST_lazy_if(p,c) ((IN_BYTE || (!c || UTF8_IS_INVARIANT(*p))) \
+#define isIDFIRST_lazy_if(p,c) ((IN_BYTES || (!c || UTF8_IS_INVARIANT(*p))) \
? isIDFIRST(*(p)) \
: isIDFIRST_utf8((U8*)p))
-#define isALNUM_lazy_if(p,c) ((IN_BYTE || (!c || UTF8_IS_INVARIANT(*p))) \
+#define isALNUM_lazy_if(p,c) ((IN_BYTES || (!c || UTF8_IS_INVARIANT(*p))) \
? isALNUM(*(p)) \
: isALNUM_utf8((U8*)p))