summaryrefslogtreecommitdiff
path: root/doop.c
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2004-01-15 00:03:04 +0000
committerNicholas Clark <nick@ccl4.org>2004-01-15 00:03:04 +0000
commitc4c87a065d5684a07ac86a151149508724e14d4e (patch)
tree7a31550bcb46ef9edc1895fe18e15e2246b03e2c /doop.c
parent2e038148849ab01588058f0158033f74c84f47f5 (diff)
downloadperl-c4c87a065d5684a07ac86a151149508724e14d4e.tar.gz
Make chomp heed the utf8 flags on the target string and $/
[Fixes #24888] More work still needed to make chomp heed the encoding pragma. p4raw-id: //depot/perl@22155
Diffstat (limited to 'doop.c')
-rw-r--r--doop.c23
1 files changed, 23 insertions, 0 deletions
diff --git a/doop.c b/doop.c
index ea64ff8fb4..6724aca814 100644
--- a/doop.c
+++ b/doop.c
@@ -1008,6 +1008,7 @@ Perl_do_chomp(pTHX_ register SV *sv)
STRLEN len;
STRLEN n_a;
char *s;
+ char *temp_buffer = NULL;
if (RsSNARF(PL_rs))
return 0;
@@ -1059,6 +1060,27 @@ Perl_do_chomp(pTHX_ register SV *sv)
else {
STRLEN rslen;
char *rsptr = SvPV(PL_rs, rslen);
+ if (SvUTF8(PL_rs) != SvUTF8(sv)) {
+ /* Assumption is that rs is shorter than the scalar. */
+ if (SvUTF8(PL_rs)) {
+ /* RS is utf8, scalar is 8 bit. */
+ bool is_utf8 = TRUE;
+ temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
+ &rslen, &is_utf8);
+ if (is_utf8) {
+ /* Cannot downgrade, therefore cannot possibly match
+ */
+ assert (temp_buffer == rsptr);
+ temp_buffer = NULL;
+ goto nope;
+ }
+ rsptr = temp_buffer;
+ } else {
+ /* RS is 8 bit, scalar is utf8. */
+ temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
+ rsptr = temp_buffer;
+ }
+ }
if (rslen == 1) {
if (*s != *rsptr)
goto nope;
@@ -1081,6 +1103,7 @@ Perl_do_chomp(pTHX_ register SV *sv)
SvSETMAGIC(sv);
}
nope:
+ Safefree(temp_buffer);
return count;
}