diff options
author | Nicholas Clark <nick@ccl4.org> | 2006-04-28 16:34:14 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2006-04-28 16:34:14 +0000 |
commit | 923318002ce505ec75344304b64394034456b5b8 (patch) | |
tree | 833a99dd20cac6fddcd316164035d1d87b67b8f4 | |
parent | 2bfc3eabdb0f2b7bb8953cf725f1f43ee9e85d30 (diff) | |
download | perl-923318002ce505ec75344304b64394034456b5b8.tar.gz |
Fix bug 34297 (length of overloaded UTF-8 strings)
p4raw-id: //depot/perl@28006
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | pp.c | 17 | ||||
-rw-r--r-- | t/uni/overload.t | 44 |
3 files changed, 61 insertions, 1 deletions
@@ -3532,6 +3532,7 @@ t/uni/chomp.t See if Unicode chomp works t/uni/class.t See if Unicode classes work (\p) t/uni/fold.t See if Unicode folding works t/uni/lower.t See if Unicode casing works +t/uni/overload.t See if Unicode overloading works t/uni/sprintf.t See if Unicode sprintf works t/uni/title.t See if Unicode casing works t/uni/tr_7jis.t See if Unicode tr/// works @@ -2950,7 +2950,22 @@ PP(pp_length) dVAR; dSP; dTARGET; SV * const sv = TOPs; - if (DO_UTF8(sv)) + if (SvAMAGIC(sv)) { + /* For an overloaded scalar, we can't know in advance if it's going to + be UTF-8 or not. Also, we can't call sv_len_utf8 as it likes to + cache the length. Maybe that should be a documented feature of it. + */ + STRLEN len; + const char *const p = SvPV_const(sv, len); + + if (DO_UTF8(sv)) { + SETi(Perl_utf8_length(aTHX_ p, p + len)); + } + else + SETi(len); + + } + else if (DO_UTF8(sv)) SETi(sv_len_utf8(sv)); else SETi(sv_len(sv)); diff --git a/t/uni/overload.t b/t/uni/overload.t new file mode 100644 index 0000000000..9338f75816 --- /dev/null +++ b/t/uni/overload.t @@ -0,0 +1,44 @@ +#!perl -w + +BEGIN { + if ($ENV{'PERL_CORE'}){ + chdir 't'; + @INC = '../lib'; + } +} + +use Test::More tests => 8; + +package UTF8Field; +use strict; + +use overload '""' => 'stringify'; + +sub new { + my $class = shift; + return bless [shift, 0], $class; +} + +sub stringify { + my $self = shift; + $self->[1] = ! $self->[1]; + if ($self->[1]) { + utf8::downgrade($self->[0]); + } else { + utf8::upgrade($self->[0]); + } + $self->[0]; +} + +package main; + +# Bug 34297 +foreach my $t ("ASCII", "B\366se") { + my $length = length $t; + + my $u = UTF8Field->new($t); + is (length $u, $length, "length of '$t'"); + is (length $u, $length, "length of '$t'"); + is (length $u, $length, "length of '$t'"); + is (length $u, $length, "length of '$t'"); +} |