Make \x{...} consistently produce UTF-8.

Subject: Re: \x{...} is confused Message-ID: <20001029193648.A6287@pembro4.pmb.ox.ac.uk> p4raw-id: //depot/perl@7485
author: Simon Cozens <simon@netthink.co.uk> 2000-10-29 19:36:48 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2000-10-29 21:00:47 +0000
commit: 89491803eada141bfe112702c189849c457eac87 (patch)
tree: 701fe90c1f6d9cd3d308f33706c76eeccda86114 /toke.c
parent: 39ca283eb96f3f63e2aa42b33b559e2ab96b0231 (diff)
download: perl-89491803eada141bfe112702c189849c457eac87.tar.gz
1 files changed, 20 insertions, 19 deletions
diff --git a/toke.c b/toke.c
index b007de4550..274e506b3b 100644
--- a/toke.c
+++ b/toke.c
@@ -1187,13 +1187,13 @@ S_scan_const(pTHX_ char *start)
     register char *d = SvPVX(sv);		/* destination for copies */
     bool dorange = FALSE;			/* are we in a translit range? */
     bool didrange = FALSE;		        /* did we just finish a range? */
-    bool has_utf = FALSE;			/* embedded \x{} */
+    bool has_utf8 = FALSE;			/* embedded \x{} */
     UV uv;
 
     I32 utf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
 	? (PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
 	: UTF;
-    I32 thisutf = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
+    I32 this_utf8 = (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op)
 	? (PL_sublex_info.sub_op->op_private & (PL_lex_repl ?
 						OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF))
 	: UTF;
@@ -1327,7 +1327,7 @@ S_scan_const(pTHX_ char *start)
 
 	/* (now in tr/// code again) */
 
-	if (*s & 0x80 && thisutf) {
+	if (*s & 0x80 && this_utf8) {
 	    STRLEN len;
 	    UV uv;
 
@@ -1343,7 +1343,7 @@ S_scan_const(pTHX_ char *start)
 		while (len--)
 		    *d++ = *s++;
 	    }
-	    has_utf = TRUE;
+	    has_utf8 = TRUE;
 	    continue;
 	}
 
@@ -1416,9 +1416,10 @@ S_scan_const(pTHX_ char *start)
 			yyerror("Missing right brace on \\x{}");
 			e = s;
 		    }
-		    {
+		    else {
 			STRLEN len = 1;		/* allow underscores */
 			uv = (UV)scan_hex(s + 1, e - s - 1, &len);
+			has_utf8 = TRUE;
 		    }
 		    s = e + 1;
 		}
@@ -1435,8 +1436,8 @@ S_scan_const(pTHX_ char *start)
 		 * There will always enough room in sv since such escapes will
 		 * be longer than any utf8 sequence they can end up as
 		 */
-		if (uv > 127) {
-		    if (!thisutf && !has_utf && uv > 255) {
+		if (uv > 127 || has_utf8) {
+		    if (!this_utf8 && !has_utf8 && uv > 255) {
 		        /* might need to recode whatever we have accumulated so far
 			 * if it contains any hibit chars
 			 */
@@ -1468,9 +1469,9 @@ S_scan_const(pTHX_ char *start)
                         }
                     }
 
-                    if (thisutf || uv > 255) {
+                    if (has_utf8 || uv > 255) {
 		        d = (char*)uv_to_utf8((U8*)d, uv);
-			has_utf = TRUE;
+			this_utf8 = TRUE;
                     }
 		    else {
 		        *d++ = (char)uv;
@@ -1499,7 +1500,7 @@ S_scan_const(pTHX_ char *start)
 		    res = new_constant( Nullch, 0, "charnames", 
 					res, Nullsv, "\\N{...}" );
 		    str = SvPV(res,len);
-		    if (!has_utf && SvUTF8(res)) {
+		    if (!has_utf8 && SvUTF8(res)) {
 			char *ostart = SvPVX(sv);
 			SvCUR_set(sv, d - ostart);
 			SvPOK_on(sv);
@@ -1508,7 +1509,7 @@ S_scan_const(pTHX_ char *start)
 			/* this just broke our allocation above... */
 			SvGROW(sv, send - start);
 			d = SvPVX(sv) + SvCUR(sv);
-			has_utf = TRUE;
+			has_utf8 = TRUE;
 		    }
 		    if (len > e - s + 4) {
 			char *odest = SvPVX(sv);
@@ -1587,7 +1588,7 @@ S_scan_const(pTHX_ char *start)
     *d = '\0';
     SvCUR_set(sv, d - SvPVX(sv));
     SvPOK_on(sv);
-    if (has_utf)
+    if (has_utf8)
 	SvUTF8_on(sv);
 
     /* shrink the sv if we allocated more than we used */
@@ -6553,7 +6554,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
     register char term;			/* terminating character */
     register char *to;			/* current position in the sv's data */
     I32 brackets = 1;			/* bracket nesting level */
-    bool has_utf = FALSE;		/* is there any utf8 content? */
+    bool has_utf8 = FALSE;		/* is there any utf8 content? */
 
     /* skip space before the delimiter */
     if (isSPACE(*s))
@@ -6565,7 +6566,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
     /* after skipping whitespace, the next character is the terminator */
     term = *s;
     if ((term & 0x80) && UTF)
-	has_utf = TRUE;
+	has_utf8 = TRUE;
 
     /* mark where we are */
     PL_multi_start = CopLINE(PL_curcop);
@@ -6611,8 +6612,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 		   have found the terminator */
 		else if (*s == term)
 		    break;
-		else if (!has_utf && (*s & 0x80) && UTF)
-		    has_utf = TRUE;
+		else if (!has_utf8 && (*s & 0x80) && UTF)
+		    has_utf8 = TRUE;
 		*to = *s;
 	    }
 	}
@@ -6640,8 +6641,8 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 		    break;
 		else if (*s == PL_multi_open)
 		    brackets++;
-		else if (!has_utf && (*s & 0x80) && UTF)
-		    has_utf = TRUE;
+		else if (!has_utf8 && (*s & 0x80) && UTF)
+		    has_utf8 = TRUE;
 		*to = *s;
 	    }
 	}
@@ -6701,7 +6702,7 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
 
     if (keep_delims)
 	sv_catpvn(sv, s, 1);
-    if (has_utf)
+    if (has_utf8)
 	SvUTF8_on(sv);
     PL_multi_end = CopLINE(PL_curcop);
     s++;
author	Simon Cozens <simon@netthink.co.uk>	2000-10-29 19:36:48 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2000-10-29 21:00:47 +0000
commit	89491803eada141bfe112702c189849c457eac87 (patch)
tree	701fe90c1f6d9cd3d308f33706c76eeccda86114 /toke.c
parent	39ca283eb96f3f63e2aa42b33b559e2ab96b0231 (diff)
download	perl-89491803eada141bfe112702c189849c457eac87.tar.gz