summaryrefslogtreecommitdiff
path: root/toke.c
diff options
context:
space:
mode:
authorBrian Fraser <fraserbn@gmail.com>2011-07-06 10:41:10 -0300
committerFather Chrysostomos <sprout@cpan.org>2011-10-06 13:01:10 -0700
commit204e6232679d0d412347fddd9e5bd0e529da73d5 (patch)
treef277f72f11f914e9b6c9874e5e48c22d56ba27a1 /toke.c
parenta00b390b6689672af8817e28321f92e70369c0d4 (diff)
downloadperl-204e6232679d0d412347fddd9e5bd0e529da73d5.tar.gz
mro UTF8 cleanup.
This patch also duplicates existing mro tests with copies that use Unicode in identifiers, to test the mro code. Since those tests trigger it, it also fixes a bug in the parsing of *{...}: If the first character inside the braces is a non-ASCII Unicode identifier character, the inside is now implicitly quoted if it is just an identifier (just as it is with ASCII identifiers), instead of being parsed as a bareword that would violate strict subs.
Diffstat (limited to 'toke.c')
-rw-r--r--toke.c18
1 files changed, 14 insertions, 4 deletions
diff --git a/toke.c b/toke.c
index a85b698d5d..53c6759ed6 100644
--- a/toke.c
+++ b/toke.c
@@ -8705,9 +8705,19 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
}
else if (ck_uni)
check_uni();
- if (s < send)
- *d = *s++;
- d[1] = '\0';
+ if (s < send) {
+ if (UTF) {
+ const STRLEN skip = UTF8SKIP(s);
+ STRLEN i;
+ d[skip] = '\0';
+ for ( i = 0; i < skip; i++ )
+ d[i] = *s++;
+ }
+ else {
+ *d = *s++;
+ d[1] = '\0';
+ }
+ }
if (*d == '^' && *s && isCONTROLVAR(*s)) {
*d = toCTRL(*s);
s++;
@@ -8723,7 +8733,7 @@ S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRL
}
}
if (isIDFIRST_lazy_if(d,UTF)) {
- d++;
+ d += UTF8SKIP(d);
if (UTF) {
char *end = s;
while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {