summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2006-04-28 16:34:14 +0000
committerNicholas Clark <nick@ccl4.org>2006-04-28 16:34:14 +0000
commit923318002ce505ec75344304b64394034456b5b8 (patch)
tree833a99dd20cac6fddcd316164035d1d87b67b8f4
parent2bfc3eabdb0f2b7bb8953cf725f1f43ee9e85d30 (diff)
downloadperl-923318002ce505ec75344304b64394034456b5b8.tar.gz
Fix bug 34297 (length of overloaded UTF-8 strings)
p4raw-id: //depot/perl@28006
-rw-r--r--MANIFEST1
-rw-r--r--pp.c17
-rw-r--r--t/uni/overload.t44
3 files changed, 61 insertions, 1 deletions
diff --git a/MANIFEST b/MANIFEST
index d130e2e39d..7d34252a2c 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3532,6 +3532,7 @@ t/uni/chomp.t See if Unicode chomp works
t/uni/class.t See if Unicode classes work (\p)
t/uni/fold.t See if Unicode folding works
t/uni/lower.t See if Unicode casing works
+t/uni/overload.t See if Unicode overloading works
t/uni/sprintf.t See if Unicode sprintf works
t/uni/title.t See if Unicode casing works
t/uni/tr_7jis.t See if Unicode tr/// works
diff --git a/pp.c b/pp.c
index 356bfec80c..718f0f0c83 100644
--- a/pp.c
+++ b/pp.c
@@ -2950,7 +2950,22 @@ PP(pp_length)
dVAR; dSP; dTARGET;
SV * const sv = TOPs;
- if (DO_UTF8(sv))
+ if (SvAMAGIC(sv)) {
+ /* For an overloaded scalar, we can't know in advance if it's going to
+ be UTF-8 or not. Also, we can't call sv_len_utf8 as it likes to
+ cache the length. Maybe that should be a documented feature of it.
+ */
+ STRLEN len;
+ const char *const p = SvPV_const(sv, len);
+
+ if (DO_UTF8(sv)) {
+ SETi(Perl_utf8_length(aTHX_ p, p + len));
+ }
+ else
+ SETi(len);
+
+ }
+ else if (DO_UTF8(sv))
SETi(sv_len_utf8(sv));
else
SETi(sv_len(sv));
diff --git a/t/uni/overload.t b/t/uni/overload.t
new file mode 100644
index 0000000000..9338f75816
--- /dev/null
+++ b/t/uni/overload.t
@@ -0,0 +1,44 @@
+#!perl -w
+
+BEGIN {
+ if ($ENV{'PERL_CORE'}){
+ chdir 't';
+ @INC = '../lib';
+ }
+}
+
+use Test::More tests => 8;
+
+package UTF8Field;
+use strict;
+
+use overload '""' => 'stringify';
+
+sub new {
+ my $class = shift;
+ return bless [shift, 0], $class;
+}
+
+sub stringify {
+ my $self = shift;
+ $self->[1] = ! $self->[1];
+ if ($self->[1]) {
+ utf8::downgrade($self->[0]);
+ } else {
+ utf8::upgrade($self->[0]);
+ }
+ $self->[0];
+}
+
+package main;
+
+# Bug 34297
+foreach my $t ("ASCII", "B\366se") {
+ my $length = length $t;
+
+ my $u = UTF8Field->new($t);
+ is (length $u, $length, "length of '$t'");
+ is (length $u, $length, "length of '$t'");
+ is (length $u, $length, "length of '$t'");
+ is (length $u, $length, "length of '$t'");
+}