summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorNick Ing-Simmons <nik@tiuk.ti.com>2001-03-10 11:55:43 +0000
committerNick Ing-Simmons <nik@tiuk.ti.com>2001-03-10 11:55:43 +0000
commit9041c2e396c8c7de7680a2007dc341a9f65be0d0 (patch)
tree19075254fbc0495a697b5e15ca1f19a99e02ac77 /pp.c
parent2ef28da1578e18cf36b9a30b71ac471521d2b507 (diff)
downloadperl-9041c2e396c8c7de7680a2007dc341a9f65be0d0.tar.gz
EBCDIC sanity - phase I
- rename utf8/uv functions to indicate what sort of uv they provide (uvuni/uvchr) - use utf8n_xxxx (c.f. pvn) for forms which take length. - back out vN.N and $^V exceptions to e2a/a2e - make "locale" isxxx macros be uvchr (may be redundant?) Not clear yet that toUPPER_uni et. al. return being handled correctly. The tr// and rexexp stuff still needs an audit, assumption is they are working in Unicode space. Need to provide v5.6 names for XS modules (decide is uni or chr ?). p4raw-id: //depot/perlio@9096
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c49
1 files changed, 28 insertions, 21 deletions
diff --git a/pp.c b/pp.c
index 4c21f1b5b4..e02e002093 100644
--- a/pp.c
+++ b/pp.c
@@ -1281,7 +1281,7 @@ PP(pp_subtract)
UV result;
register UV buv;
bool buvok = SvUOK(TOPs);
-
+
if (buvok)
buv = SvUVX(TOPs);
else {
@@ -2138,7 +2138,7 @@ PP(pp_complement)
send = tmps + len;
while (tmps < send) {
- UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
+ UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
tmps += UTF8SKIP(tmps);
targlen += UNISKIP(~c);
nchar++;
@@ -2152,9 +2152,9 @@ PP(pp_complement)
if (nwide) {
Newz(0, result, targlen + 1, U8);
while (tmps < send) {
- UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
+ UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
tmps += UTF8SKIP(tmps);
- result = uv_to_utf8(result, ~c);
+ result = uvchr_to_utf8(result, ~c);
}
*result = '\0';
result -= targlen;
@@ -2164,7 +2164,7 @@ PP(pp_complement)
else {
Newz(0, result, nchar + 1, U8);
while (tmps < send) {
- U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+ U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY);
tmps += UTF8SKIP(tmps);
*result++ = ~c;
}
@@ -2934,7 +2934,7 @@ PP(pp_ord)
STRLEN len;
U8 *s = (U8*)SvPVx(argsv, len);
- XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff));
+ XPUSHu(DO_UTF8(argsv) ? utf8_to_uvchr(s, 0) : (*s & 0xff));
RETURN;
}
@@ -2948,7 +2948,7 @@ PP(pp_chr)
if (value > 255 && !IN_BYTE) {
SvGROW(TARG, UNISKIP(value)+1);
- tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value);
+ tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value);
SvCUR_set(TARG, tmps - SvPVX(TARG));
*tmps = '\0';
(void)SvPOK_only(TARG);
@@ -2997,17 +2997,17 @@ PP(pp_ucfirst)
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tend;
- UV uv = utf8_to_uv(s, slen, &ulen, 0);
+ UV uv;
if (PL_op->op_private & OPpLOCALE) {
TAINT;
SvTAINTED_on(sv);
- uv = toTITLE_LC_uni(uv);
+ uv = toTITLE_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0));
}
else
uv = toTITLE_utf8(s);
- tend = uv_to_utf8(tmpbuf, uv);
+ tend = uvchr_to_utf8(tmpbuf, uv);
if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
dTARGET;
@@ -3056,17 +3056,17 @@ PP(pp_lcfirst)
STRLEN ulen;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tend;
- UV uv = utf8_to_uv(s, slen, &ulen, 0);
+ UV uv;
if (PL_op->op_private & OPpLOCALE) {
TAINT;
SvTAINTED_on(sv);
- uv = toLOWER_LC_uni(uv);
+ uv = toLOWER_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0));
}
else
uv = toLOWER_utf8(s);
- tend = uv_to_utf8(tmpbuf, uv);
+ tend = uvchr_to_utf8(tmpbuf, uv);
if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
dTARGET;
@@ -3133,13 +3133,13 @@ PP(pp_uc)
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
+ d = uvchr_to_utf8(d, toUPPER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0)));
s += ulen;
}
}
else {
while (s < send) {
- d = uv_to_utf8(d, toUPPER_utf8( s ));
+ d = uvchr_to_utf8(d, toUPPER_utf8( s ));
s += UTF8SKIP(s);
}
}
@@ -3207,13 +3207,13 @@ PP(pp_lc)
TAINT;
SvTAINTED_on(TARG);
while (s < send) {
- d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
+ d = uvchr_to_utf8(d, toLOWER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0)));
s += ulen;
}
}
else {
while (s < send) {
- d = uv_to_utf8(d, toLOWER_utf8(s));
+ d = uvchr_to_utf8(d, toLOWER_utf8(s));
s += UTF8SKIP(s);
}
}
@@ -3967,7 +3967,7 @@ PP(pp_reverse)
continue;
}
else {
- if (!utf8_to_uv_simple(s, 0))
+ if (!utf8_to_uvchr(s, 0))
break;
up = (char*)s;
s += UTF8SKIP(s);
@@ -4046,7 +4046,14 @@ PP(pp_unpack)
STRLEN llen;
STRLEN rlen;
register char *pat = SvPV(left, llen);
+#if 0
+ /* Packed side is assumed to be octets - so force downgrade if it
+ has been UTF-8 encoded by accident
+ */
+ register char *s = SvPVbyte(right, rlen);
+#else
register char *s = SvPV(right, rlen);
+#endif
char *strend = s + rlen;
char *strbeg = s;
register char *patend = pat + llen;
@@ -4355,7 +4362,7 @@ PP(pp_unpack)
if (checksum) {
while (len-- > 0 && s < strend) {
STRLEN alen;
- auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
+ auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
along = alen;
s += along;
if (checksum > 32)
@@ -4369,7 +4376,7 @@ PP(pp_unpack)
EXTEND_MORTAL(len);
while (len-- > 0 && s < strend) {
STRLEN alen;
- auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
+ auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
along = alen;
s += along;
sv = NEWSV(37, 0);
@@ -5407,7 +5414,7 @@ PP(pp_pack)
fromstr = NEXTFROM;
auint = SvUV(fromstr);
SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
- SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint)
+ SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint)
- SvPVX(cat));
}
*SvEND(cat) = '\0';