summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-03-19 19:26:49 +0000
committerDavid Mitchell <davem@iabyn.com>2011-03-19 19:41:55 +0000
commit75da9d4c616bae3e6791af93d2ced52dc8080f06 (patch)
tree5c0043963158345e1bb1c1c21526aa604fa9f06f /lib
parent912c63ed00375338703043928cac3c740d00cc9d (diff)
downloadperl-75da9d4c616bae3e6791af93d2ced52dc8080f06.tar.gz
reset pos and utf8 cache when de/encoding utf8 str
When using utf8::upgrade utf8::downgrade utf8::encode utf8::decode or the underlying C-level functions sv_utf8_upgrade_flags_grow sv_utf8_downgrade sv_utf8_encode sv_utf8_decode and sv_recode_to_utf8 update the position of the pos magic, if any, and clear the utf8 length/position-mapping cache. This fixes [perl #80190].
Diffstat (limited to 'lib')
-rw-r--r--lib/utf8.t48
1 files changed, 48 insertions, 0 deletions
diff --git a/lib/utf8.t b/lib/utf8.t
index 715ca3e7e1..ae81ccdc46 100644
--- a/lib/utf8.t
+++ b/lib/utf8.t
@@ -484,4 +484,52 @@ SKIP: {
}
}
+# #80190 update pos, and cached length/position-mapping after
+# utf8 upgrade/downgrade, encode/decode
+
+for my $pos (0..5) {
+
+ my $pos1 = ($pos >= 3) ? 2 : ($pos >= 1) ? 1 : 0;
+ my $pos2 = ($pos1 == 2) ? 3 : $pos1;
+
+ my $p;
+ my $s = "A\xc8\x81\xe8\xab\x86\x{100}";
+ chop($s);
+
+ pos($s) = $pos;
+ # also sets cache
+ is(length($s), 6, "(pos $pos) len before utf8::downgrade");
+ is(pos($s), $pos, "(pos $pos) pos before utf8::downgrade");
+ utf8::downgrade($s);
+ is(length($s), 6, "(pos $pos) len after utf8::downgrade");
+ is(pos($s), $pos, "(pos $pos) pos after utf8::downgrade");
+ is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after utf8::downgrade");
+ utf8::decode($s);
+ is(length($s), 3, "(pos $pos) len after D; utf8::decode");
+ is(pos($s), $pos1, "(pos $pos) pos after D; utf8::decode");
+ is($s, "A\x{201}\x{8ac6}", "(pos $pos) str after D; utf8::decode");
+ utf8::encode($s);
+ is(length($s), 6, "(pos $pos) len after D; utf8::encode");
+ is(pos($s), $pos2, "(pos $pos) pos after D; utf8::encode");
+ is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after D; utf8::encode");
+
+ $s = "A\xc8\x81\xe8\xab\x86";
+
+ pos($s) = $pos;
+ is(length($s), 6, "(pos $pos) len before utf8::upgrade");
+ is(pos($s), $pos, "(pos $pos) pos before utf8::upgrade");
+ utf8::upgrade($s);
+ is(length($s), 6, "(pos $pos) len after utf8::upgrade");
+ is(pos($s), $pos, "(pos $pos) pos after utf8::upgrade");
+ is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after utf8::upgrade");
+ utf8::decode($s);
+ is(length($s), 3, "(pos $pos) len after U; utf8::decode");
+ is(pos($s), $pos1, "(pos $pos) pos after U; utf8::decode");
+ is($s, "A\x{201}\x{8ac6}", "(pos $pos) str after U; utf8::decode");
+ utf8::encode($s);
+ is(length($s), 6, "(pos $pos) len after U; utf8::encode");
+ is(pos($s), $pos2, "(pos $pos) pos after U; utf8::encode");
+ is($s, "A\xc8\x81\xe8\xab\x86","(pos $pos) str after U; utf8::encode");
+}
+
done_testing();