EBCDIC sanity - phase I

- rename utf8/uv functions to indicate what sort of uv they provide (uvuni/uvchr) - use utf8n_xxxx (c.f. pvn) for forms which take length. - back out vN.N and $^V exceptions to e2a/a2e - make "locale" isxxx macros be uvchr (may be redundant?) Not clear yet that toUPPER_uni et. al. return being handled correctly. The tr// and rexexp stuff still needs an audit, assumption is they are working in Unicode space. Need to provide v5.6 names for XS modules (decide is uni or chr ?). p4raw-id: //depot/perlio@9096
author: Nick Ing-Simmons <nik@tiuk.ti.com> 2001-03-10 11:55:43 +0000
committer: Nick Ing-Simmons <nik@tiuk.ti.com> 2001-03-10 11:55:43 +0000
commit: 9041c2e396c8c7de7680a2007dc341a9f65be0d0 (patch)
tree: 19075254fbc0495a697b5e15ca1f19a99e02ac77 /pp.c
parent: 2ef28da1578e18cf36b9a30b71ac471521d2b507 (diff)
download: perl-9041c2e396c8c7de7680a2007dc341a9f65be0d0.tar.gz
1 files changed, 28 insertions, 21 deletions
diff --git a/pp.c b/pp.c
index 4c21f1b5b4..e02e002093 100644
--- a/pp.c
+++ b/pp.c
@@ -1281,7 +1281,7 @@ PP(pp_subtract)
 	    UV result;
 	    register UV buv;
 	    bool buvok = SvUOK(TOPs);
-	    
+	
 	    if (buvok)
 		buv = SvUVX(TOPs);
 	    else {
@@ -2138,7 +2138,7 @@ PP(pp_complement)
 
 	  send = tmps + len;
 	  while (tmps < send) {
-	    UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
+	    UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
 	    tmps += UTF8SKIP(tmps);
 	    targlen += UNISKIP(~c);
 	    nchar++;
@@ -2152,9 +2152,9 @@ PP(pp_complement)
 	  if (nwide) {
 	      Newz(0, result, targlen + 1, U8);
 	      while (tmps < send) {
-		  UV c = utf8_to_uv(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
+		  UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV);
 		  tmps += UTF8SKIP(tmps);
-		  result = uv_to_utf8(result, ~c);
+		  result = uvchr_to_utf8(result, ~c);
 	      }
 	      *result = '\0';
 	      result -= targlen;
@@ -2164,7 +2164,7 @@ PP(pp_complement)
 	  else {
 	      Newz(0, result, nchar + 1, U8);
 	      while (tmps < send) {
-		  U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+		  U8 c = (U8)utf8n_to_uvchr(tmps, 0, &l, UTF8_ALLOW_ANY);
 		  tmps += UTF8SKIP(tmps);
 		  *result++ = ~c;
 	      }
@@ -2934,7 +2934,7 @@ PP(pp_ord)
     STRLEN len;
     U8 *s = (U8*)SvPVx(argsv, len);
 
-    XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff));
+    XPUSHu(DO_UTF8(argsv) ? utf8_to_uvchr(s, 0) : (*s & 0xff));
     RETURN;
 }
 
@@ -2948,7 +2948,7 @@ PP(pp_chr)
 
     if (value > 255 && !IN_BYTE) {
 	SvGROW(TARG, UNISKIP(value)+1);
-	tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value);
+	tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value);
 	SvCUR_set(TARG, tmps - SvPVX(TARG));
 	*tmps = '\0';
 	(void)SvPOK_only(TARG);
@@ -2997,17 +2997,17 @@ PP(pp_ucfirst)
 	STRLEN ulen;
 	U8 tmpbuf[UTF8_MAXLEN+1];
 	U8 *tend;
-	UV uv = utf8_to_uv(s, slen, &ulen, 0);
+	UV uv;
 
 	if (PL_op->op_private & OPpLOCALE) {
 	    TAINT;
 	    SvTAINTED_on(sv);
-	    uv = toTITLE_LC_uni(uv);
+	    uv = toTITLE_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0));
 	}
 	else
 	    uv = toTITLE_utf8(s);
 	
-	tend = uv_to_utf8(tmpbuf, uv);
+	tend = uvchr_to_utf8(tmpbuf, uv);
 
 	if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
 	    dTARGET;
@@ -3056,17 +3056,17 @@ PP(pp_lcfirst)
 	STRLEN ulen;
 	U8 tmpbuf[UTF8_MAXLEN+1];
 	U8 *tend;
-	UV uv = utf8_to_uv(s, slen, &ulen, 0);
+	UV uv;
 
 	if (PL_op->op_private & OPpLOCALE) {
 	    TAINT;
 	    SvTAINTED_on(sv);
-	    uv = toLOWER_LC_uni(uv);
+	    uv = toLOWER_LC_uvchr(utf8n_to_uvchr(s, slen, &ulen, 0));
 	}
 	else
 	    uv = toLOWER_utf8(s);
 	
-	tend = uv_to_utf8(tmpbuf, uv);
+	tend = uvchr_to_utf8(tmpbuf, uv);
 
 	if (!SvPADTMP(sv) || tend - tmpbuf != ulen || SvREADONLY(sv)) {
 	    dTARGET;
@@ -3133,13 +3133,13 @@ PP(pp_uc)
 		TAINT;
 		SvTAINTED_on(TARG);
 		while (s < send) {
-		    d = uv_to_utf8(d, toUPPER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
+		    d = uvchr_to_utf8(d, toUPPER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0)));
 		    s += ulen;
 		}
 	    }
 	    else {
 		while (s < send) {
-		    d = uv_to_utf8(d, toUPPER_utf8( s ));
+		    d = uvchr_to_utf8(d, toUPPER_utf8( s ));
 		    s += UTF8SKIP(s);
 		}
 	    }
@@ -3207,13 +3207,13 @@ PP(pp_lc)
 		TAINT;
 		SvTAINTED_on(TARG);
 		while (s < send) {
-		    d = uv_to_utf8(d, toLOWER_LC_uni( utf8_to_uv(s, len, &ulen, 0)));
+		    d = uvchr_to_utf8(d, toLOWER_LC_uvchr( utf8n_to_uvchr(s, len, &ulen, 0)));
 		    s += ulen;
 		}
 	    }
 	    else {
 		while (s < send) {
-		    d = uv_to_utf8(d, toLOWER_utf8(s));
+		    d = uvchr_to_utf8(d, toLOWER_utf8(s));
 		    s += UTF8SKIP(s);
 		}
 	    }
@@ -3967,7 +3967,7 @@ PP(pp_reverse)
 			continue;
 		    }
 		    else {
-			if (!utf8_to_uv_simple(s, 0))
+			if (!utf8_to_uvchr(s, 0))
 			    break;
 			up = (char*)s;
 			s += UTF8SKIP(s);
@@ -4046,7 +4046,14 @@ PP(pp_unpack)
     STRLEN llen;
     STRLEN rlen;
     register char *pat = SvPV(left, llen);
+#if 0
+    /* Packed side is assumed to be octets - so force downgrade if it
+       has been UTF-8 encoded by accident
+     */
+    register char *s = SvPVbyte(right, rlen);
+#else
     register char *s = SvPV(right, rlen);
+#endif
     char *strend = s + rlen;
     char *strbeg = s;
     register char *patend = pat + llen;
@@ -4355,7 +4362,7 @@ PP(pp_unpack)
 	    if (checksum) {
 		while (len-- > 0 && s < strend) {
 		    STRLEN alen;
-		    auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
+		    auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
 		    along = alen;
 		    s += along;
 		    if (checksum > 32)
@@ -4369,7 +4376,7 @@ PP(pp_unpack)
 		EXTEND_MORTAL(len);
 		while (len-- > 0 && s < strend) {
 		    STRLEN alen;
-		    auint = utf8_to_uv((U8*)s, strend - s, &alen, 0);
+		    auint = utf8n_to_uvchr((U8*)s, strend - s, &alen, 0);
 		    along = alen;
 		    s += along;
 		    sv = NEWSV(37, 0);
@@ -5407,7 +5414,7 @@ PP(pp_pack)
 		fromstr = NEXTFROM;
 		auint = SvUV(fromstr);
 		SvGROW(cat, SvCUR(cat) + UTF8_MAXLEN + 1);
-		SvCUR_set(cat, (char*)uv_to_utf8((U8*)SvEND(cat),auint)
+		SvCUR_set(cat, (char*)uvchr_to_utf8((U8*)SvEND(cat),auint)
 			       - SvPVX(cat));
 	    }
 	    *SvEND(cat) = '\0';
author	Nick Ing-Simmons <nik@tiuk.ti.com>	2001-03-10 11:55:43 +0000
committer	Nick Ing-Simmons <nik@tiuk.ti.com>	2001-03-10 11:55:43 +0000
commit	9041c2e396c8c7de7680a2007dc341a9f65be0d0 (patch)
tree	19075254fbc0495a697b5e15ca1f19a99e02ac77 /pp.c
parent	2ef28da1578e18cf36b9a30b71ac471521d2b507 (diff)
download	perl-9041c2e396c8c7de7680a2007dc341a9f65be0d0.tar.gz