summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-01-05 15:02:38 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-01-05 15:02:38 +0000
commit301d3d20746da77e204598157e568e8c22a220b1 (patch)
tree39602e782c8435a8344c1a72723b5da8c9feba11
parent65e2654b3cf08ae676040518f84657b8a84698e3 (diff)
downloadperl-301d3d20746da77e204598157e568e8c22a220b1.tar.gz
"\x{FF}\xFF" was broken, the \xFF was appended in its
raw 8-bit form to the UTF-8 string. p4raw-id: //depot/perl@8330
-rw-r--r--toke.c22
1 files changed, 15 insertions, 7 deletions
diff --git a/toke.c b/toke.c
index 9b2389645d..09a2e489c9 100644
--- a/toke.c
+++ b/toke.c
@@ -1397,16 +1397,21 @@ S_scan_const(pTHX_ char *start)
NUM_ESCAPE_INSERT:
/* Insert oct or hex escaped character.
- * There will always enough room in sv since such escapes will
- * be longer than any utf8 sequence they can end up as
- */
+ * There will always enough room in sv since such
+ * escapes will be longer than any UT-F8 sequence
+ * they can end up as. */
if (uv > 127) {
if (!has_utf8 && (to_be_utf8 || uv > 255)) {
- /* might need to recode whatever we have accumulated so far
- * if it contains any hibit chars
+ /* Might need to recode whatever we have
+ * accumulated so far if it contains any
+ * hibit chars.
+ *
+ * (Can't we keep track of that and avoid
+ * this rescan? --jhi)
*/
int hicount = 0;
char *c;
+
for (c = SvPVX(sv); c < d; c++) {
if (UTF8_IS_CONTINUED(*c))
hicount++;
@@ -1416,7 +1421,10 @@ S_scan_const(pTHX_ char *start)
char *src, *dst;
U8 tmpbuf[UTF8_MAXLEN+1];
U8 *tmpend;
- d = SvGROW(sv, SvCUR(sv) + hicount + 1) + (d - old_pvx);
+
+ d = SvGROW(sv,
+ SvCUR(sv) + hicount + 1) +
+ (d - old_pvx);
src = d - 1;
d += hicount;
@@ -1436,7 +1444,7 @@ S_scan_const(pTHX_ char *start)
}
}
- if (to_be_utf8 || uv > 255) {
+ if (to_be_utf8 || (has_utf8 && uv > 127) || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
has_utf8 = TRUE;
}