diff options
author | Nicholas Clark <nick@ccl4.org> | 2004-01-15 00:03:04 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2004-01-15 00:03:04 +0000 |
commit | c4c87a065d5684a07ac86a151149508724e14d4e (patch) | |
tree | 7a31550bcb46ef9edc1895fe18e15e2246b03e2c /doop.c | |
parent | 2e038148849ab01588058f0158033f74c84f47f5 (diff) | |
download | perl-c4c87a065d5684a07ac86a151149508724e14d4e.tar.gz |
Make chomp heed the utf8 flags on the target string and $/
[Fixes #24888]
More work still needed to make chomp heed the encoding pragma.
p4raw-id: //depot/perl@22155
Diffstat (limited to 'doop.c')
-rw-r--r-- | doop.c | 23 |
1 files changed, 23 insertions, 0 deletions
@@ -1008,6 +1008,7 @@ Perl_do_chomp(pTHX_ register SV *sv) STRLEN len; STRLEN n_a; char *s; + char *temp_buffer = NULL; if (RsSNARF(PL_rs)) return 0; @@ -1059,6 +1060,27 @@ Perl_do_chomp(pTHX_ register SV *sv) else { STRLEN rslen; char *rsptr = SvPV(PL_rs, rslen); + if (SvUTF8(PL_rs) != SvUTF8(sv)) { + /* Assumption is that rs is shorter than the scalar. */ + if (SvUTF8(PL_rs)) { + /* RS is utf8, scalar is 8 bit. */ + bool is_utf8 = TRUE; + temp_buffer = (char*)bytes_from_utf8((U8*)rsptr, + &rslen, &is_utf8); + if (is_utf8) { + /* Cannot downgrade, therefore cannot possibly match + */ + assert (temp_buffer == rsptr); + temp_buffer = NULL; + goto nope; + } + rsptr = temp_buffer; + } else { + /* RS is 8 bit, scalar is utf8. */ + temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen); + rsptr = temp_buffer; + } + } if (rslen == 1) { if (*s != *rsptr) goto nope; @@ -1081,6 +1103,7 @@ Perl_do_chomp(pTHX_ register SV *sv) SvSETMAGIC(sv); } nope: + Safefree(temp_buffer); return count; } |