Patch from Inaba Hiroto:

- canonical UTF-8 hash keys: if a key string for a hash is UTF8-on, try downgrade the string and use it if unicode::distinct is not in effect. For the task, I added a function bytes_from_utf8() to utf8.c. It might resemble utf8_to_bytes() but it is not convenient to the task. Made a test for it and added to t/op/each.t - Changed do_print in doio.c to apply sv_utf8_(downgrade|upgrade) to the mortal copy of the argument SV. And changed t/io/utf8.t test 18 which expects print() to upgrade its argument. - re-implement sv_eq with bytes_from_utf8() - some bug fixes - tr/// does not handle UTF8 range (\x{}-\x{}) - \ before raw UTF8 character produced "Malformed UTF-8 character" warning. - "\x{100}\N{CENT SIGN}" is Malformed. Added tests for these 3. - and one silly bug (by me) with qu operator. p4raw-id: //depot/perl@8583
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-01-28 19:28:40 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-01-28 19:28:40 +0000
commit: f9a6324217cffea75ff769ddd313748c0613a128 (patch)
tree: 9fb5b4ade5877ba969d093cfe37ec605de62d8dc /toke.c
parent: 9ee2bb1a7c54b1866ff07ab9c157254810ee5205 (diff)
download: perl-f9a6324217cffea75ff769ddd313748c0613a128.tar.gz
1 files changed, 19 insertions, 6 deletions
diff --git a/toke.c b/toke.c
index 1b41dccb4b..a85a7252a9 100644
--- a/toke.c
+++ b/toke.c
@@ -1388,8 +1388,7 @@ S_scan_const(pTHX_ char *start)
 			       "Unrecognized escape \\%c passed through",
 			       *s);
 		    /* default action is to copy the quoted character */
-		    *d++ = *s++;
-		    continue;
+		    goto default_action;
 		}
 
 	    /* \132 indicates an octal constant */
@@ -1479,6 +1478,13 @@ S_scan_const(pTHX_ char *start)
                     if (has_utf8 || uv > 255) {
 		        d = (char*)uv_to_utf8((U8*)d, uv);
 			has_utf8 = TRUE;
+			if (PL_lex_inwhat == OP_TRANS &&
+			    PL_sublex_info.sub_op) {
+			    PL_sublex_info.sub_op->op_private |=
+				(PL_lex_repl ? OPpTRANS_FROM_UTF
+					     : OPpTRANS_TO_UTF);
+			    utf = TRUE;
+			}
                     }
 		    else {
 		        *d++ = (char)uv;
@@ -1506,6 +1512,8 @@ S_scan_const(pTHX_ char *start)
 		    res = newSVpvn(s + 1, e - s - 1);
 		    res = new_constant( Nullch, 0, "charnames",
 					res, Nullsv, "\\N{...}" );
+		    if (has_utf8)
+			sv_utf8_upgrade(res);
 		    str = SvPV(res,len);
 		    if (!has_utf8 && SvUTF8(res)) {
 			char *ostart = SvPVX(sv);
@@ -1588,8 +1596,7 @@ S_scan_const(pTHX_ char *start)
 	    continue;
 	} /* end if (backslash) */
 
-       /* (now in tr/// code again) */
-
+    default_action:
        if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) {
            STRLEN len = (STRLEN) -1;
            UV uv;
@@ -1608,10 +1615,15 @@ S_scan_const(pTHX_ char *start)
                    *d++ = *s++;
            }
            has_utf8 = TRUE;
+	   if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
+	       PL_sublex_info.sub_op->op_private |=
+		   (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
+	       utf = TRUE;
+	   }
            continue;
        }
 
-	*d++ = *s++;
+       *d++ = *s++;
     } /* while loop to process each character */
 
     /* terminate the string and set up the sv */
@@ -4742,7 +4754,8 @@ Perl_yylex(pTHX)
 	case KEY_qq:
 	case KEY_qu:
 	    s = scan_str(s,FALSE,FALSE);
-	    if (tmp == KEY_qu && is_utf8_string((U8*)s, SvCUR(PL_lex_stuff)))
+	    if (tmp == KEY_qu &&
+		is_utf8_string((U8*)SvPVX(PL_lex_stuff), SvCUR(PL_lex_stuff)))
 		SvUTF8_on(PL_lex_stuff);
 	    if (!s)
 		missingterm((char*)0);
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-01-28 19:28:40 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-01-28 19:28:40 +0000
commit	f9a6324217cffea75ff769ddd313748c0613a128 (patch)
tree	9fb5b4ade5877ba969d093cfe37ec605de62d8dc /toke.c
parent	9ee2bb1a7c54b1866ff07ab9c157254810ee5205 (diff)
download	perl-f9a6324217cffea75ff769ddd313748c0613a128.tar.gz