summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doop.c23
-rwxr-xr-xt/op/chop.t28
2 files changed, 50 insertions, 1 deletions
diff --git a/doop.c b/doop.c
index ea64ff8fb4..6724aca814 100644
--- a/doop.c
+++ b/doop.c
@@ -1008,6 +1008,7 @@ Perl_do_chomp(pTHX_ register SV *sv)
STRLEN len;
STRLEN n_a;
char *s;
+ char *temp_buffer = NULL;
if (RsSNARF(PL_rs))
return 0;
@@ -1059,6 +1060,27 @@ Perl_do_chomp(pTHX_ register SV *sv)
else {
STRLEN rslen;
char *rsptr = SvPV(PL_rs, rslen);
+ if (SvUTF8(PL_rs) != SvUTF8(sv)) {
+ /* Assumption is that rs is shorter than the scalar. */
+ if (SvUTF8(PL_rs)) {
+ /* RS is utf8, scalar is 8 bit. */
+ bool is_utf8 = TRUE;
+ temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
+ &rslen, &is_utf8);
+ if (is_utf8) {
+ /* Cannot downgrade, therefore cannot possibly match
+ */
+ assert (temp_buffer == rsptr);
+ temp_buffer = NULL;
+ goto nope;
+ }
+ rsptr = temp_buffer;
+ } else {
+ /* RS is 8 bit, scalar is utf8. */
+ temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
+ rsptr = temp_buffer;
+ }
+ }
if (rslen == 1) {
if (*s != *rsptr)
goto nope;
@@ -1081,6 +1103,7 @@ Perl_do_chomp(pTHX_ register SV *sv)
SvSETMAGIC(sv);
}
nope:
+ Safefree(temp_buffer);
return count;
}
diff --git a/t/op/chop.t b/t/op/chop.t
index 87700de929..68025b7f3a 100755
--- a/t/op/chop.t
+++ b/t/op/chop.t
@@ -6,7 +6,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 51;
+plan tests => 91;
$_ = 'abc';
$c = do foo();
@@ -183,3 +183,29 @@ ok($@ =~ /Can\'t modify.*chop.*in.*assignment/);
eval 'chomp($x, $y) = (1, 2);';
ok($@ =~ /Can\'t modify.*chom?p.*in.*assignment/);
+my @chars = ("N", "\xd3", substr ("\xd4\x{100}", 0, 1), chr 1296);
+foreach my $start (@chars) {
+ foreach my $end (@chars) {
+ local $/ = $end;
+ my $message = "start=" . ord ($start) . " end=" . ord $end;
+ my $string = $start . $end;
+ chomp $string;
+ is ($string, $start, $message);
+
+ my $end_utf8 = $end;
+ utf8::encode ($end_utf8);
+ next if $end_utf8 eq $end;
+
+ # $end ne $end_utf8, so these should not chomp.
+ $string = $start . $end_utf8;
+ my $chomped = $string;
+ chomp $chomped;
+ is ($chomped, $string, "$message (end as bytes)");
+
+ $/ = $end_utf8;
+ $string = $start . $end;
+ $chomped = $string;
+ chomp $chomped;
+ is ($chomped, $string, "$message (\$/ as bytes)");
+ }
+}