summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul "LeoNerd" Evans <leonerd@leonerd.org.uk>2012-02-02 22:17:46 +0000
committerÆvar Arnfjörð Bjarmason <avar@cpan.org>2012-02-11 22:22:25 +0000
commit6b75a72293a16ce81c0d49fc63f3584b89eceb01 (patch)
tree8d848c7339a36bda14adf4598fa799ba34916131
parenta920efd2f60663f60fa956fac6d376d8645921f7 (diff)
downloadperl-6b75a72293a16ce81c0d49fc63f3584b89eceb01.tar.gz
If strptime() is called with legacy string but UTF-8 format, then upgrade the string to match; taking care to handle pos() counts both sides
-rw-r--r--ext/POSIX/POSIX.xs22
-rw-r--r--ext/POSIX/t/time.t27
2 files changed, 47 insertions, 2 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index f28ab9e336..49c504d812 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -1895,6 +1895,18 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y
croak("str is not a reference to a mutable scalar");
}
+ if(!SvUTF8(str) && SvUTF8(fmt)) {
+ /* fmt is UTF-8, str is not. Upgrade a local copy of it, and
+ * take care to update str_offset to match. */
+ str = sv_mortalcopy(str);
+ sv_utf8_upgrade_nomg(str);
+
+ if(str_offset) {
+ U8 *bytes = SvPV_nolen(str);
+ str_offset = utf8_hop(bytes, str_offset) - bytes;
+ }
+ }
+
str_c = SvPV_nolen(str);
remains = strptime(str_c + str_offset, SvPV_nolen(fmt), &tm);
@@ -1907,10 +1919,18 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y
XSRETURN(0);
if(strref) {
+ if(str != strref) {
+ /* str is a UTF-8 upgraded copy of the original non-UTF-8
+ * string the caller referred us to in strref */
+ str_offset = utf8_distance(remains, str_c);
+ }
+ else {
+ str_offset = remains - str_c;
+ }
if(!posmg)
posmg = sv_magicext(strref, NULL, PERL_MAGIC_regex_global,
&PL_vtbl_mglob, NULL, 0);
- posmg->mg_len = remains - str_c;
+ posmg->mg_len = str_offset;
}
if(tm.tm_mday > -1 && tm.tm_mon > -1 && tm.tm_year > -1) {
diff --git a/ext/POSIX/t/time.t b/ext/POSIX/t/time.t
index 959f675e4d..4fedcc0f3c 100644
--- a/ext/POSIX/t/time.t
+++ b/ext/POSIX/t/time.t
@@ -4,7 +4,7 @@ use strict;
use Config;
use POSIX;
-use Test::More tests => 33;
+use Test::More tests => 38;
# go to UTC to avoid DST issues around the world when testing. SUS3 says that
# null should get you UTC, but some environments want the explicit names.
@@ -98,6 +98,31 @@ pos($str) = 10;
is_deeply(\@time, [0, 0, 0, 1, 12-1, 2012-1900, 6, 335, 0], 'strptime() starts SCALAR ref at pos()');
is(pos($str), 20, 'strptime() updates pos() magic on SCALAR ref');
+{
+ # Latin-1 vs. UTF-8 strings
+ my $date = "2012\x{e9}02\x{e9}01";
+ utf8::upgrade my $date_U = $date;
+ my $fmt = "%Y\x{e9}%m\x{e9}%d";
+ utf8::upgrade my $fmt_U = $fmt;
+
+ my @want = (undef, undef, undef, 1, 2-1, 2012-1900, 3, 31, 0);
+
+ is_deeply([POSIX::strptime($date, $fmt_U)], \@want, 'strptime() legacy date, UTF-8 fmt');
+ is_deeply([POSIX::strptime($date_U, $fmt_U)], \@want, 'strptime() UTF-8 date, UTF-8 fmt');
+
+ my $str = "\x{ea} $date \x{ea}";
+ pos($str) = 2;
+
+ is_deeply([POSIX::strptime(\$str, $fmt_U)], \@want, 'strptime() legacy data SCALAR ref, UTF-8 fmt');
+ is(pos($str), 12, 'pos() of legacy data SCALAR after strptime() UTF-8 fmt');
+
+ # High (>U+FF) strings
+ my $date_UU = "2012\x{1234}02\x{1234}01";
+ my $fmt_UU = "%Y\x{1234}%m\x{1234}%d";
+
+ is_deeply([POSIX::strptime($date_UU, $fmt_UU)], \@want, 'strptime() on non-Latin-1 Unicode');
+}
+
eval { POSIX::strptime({}, "format") };
like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on HASH ref');