diff options
Diffstat (limited to 'ext')
-rw-r--r-- | ext/POSIX/POSIX.xs | 22 | ||||
-rw-r--r-- | ext/POSIX/t/time.t | 27 |
2 files changed, 47 insertions, 2 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index f28ab9e336..49c504d812 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -1895,6 +1895,18 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y croak("str is not a reference to a mutable scalar"); } + if(!SvUTF8(str) && SvUTF8(fmt)) { + /* fmt is UTF-8, str is not. Upgrade a local copy of it, and + * take care to update str_offset to match. */ + str = sv_mortalcopy(str); + sv_utf8_upgrade_nomg(str); + + if(str_offset) { + U8 *bytes = SvPV_nolen(str); + str_offset = utf8_hop(bytes, str_offset) - bytes; + } + } + str_c = SvPV_nolen(str); remains = strptime(str_c + str_offset, SvPV_nolen(fmt), &tm); @@ -1907,10 +1919,18 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y XSRETURN(0); if(strref) { + if(str != strref) { + /* str is a UTF-8 upgraded copy of the original non-UTF-8 + * string the caller referred us to in strref */ + str_offset = utf8_distance(remains, str_c); + } + else { + str_offset = remains - str_c; + } if(!posmg) posmg = sv_magicext(strref, NULL, PERL_MAGIC_regex_global, &PL_vtbl_mglob, NULL, 0); - posmg->mg_len = remains - str_c; + posmg->mg_len = str_offset; } if(tm.tm_mday > -1 && tm.tm_mon > -1 && tm.tm_year > -1) { diff --git a/ext/POSIX/t/time.t b/ext/POSIX/t/time.t index 959f675e4d..4fedcc0f3c 100644 --- a/ext/POSIX/t/time.t +++ b/ext/POSIX/t/time.t @@ -4,7 +4,7 @@ use strict; use Config; use POSIX; -use Test::More tests => 33; +use Test::More tests => 38; # go to UTC to avoid DST issues around the world when testing. SUS3 says that # null should get you UTC, but some environments want the explicit names. @@ -98,6 +98,31 @@ pos($str) = 10; is_deeply(\@time, [0, 0, 0, 1, 12-1, 2012-1900, 6, 335, 0], 'strptime() starts SCALAR ref at pos()'); is(pos($str), 20, 'strptime() updates pos() magic on SCALAR ref'); +{ + # Latin-1 vs. UTF-8 strings + my $date = "2012\x{e9}02\x{e9}01"; + utf8::upgrade my $date_U = $date; + my $fmt = "%Y\x{e9}%m\x{e9}%d"; + utf8::upgrade my $fmt_U = $fmt; + + my @want = (undef, undef, undef, 1, 2-1, 2012-1900, 3, 31, 0); + + is_deeply([POSIX::strptime($date, $fmt_U)], \@want, 'strptime() legacy date, UTF-8 fmt'); + is_deeply([POSIX::strptime($date_U, $fmt_U)], \@want, 'strptime() UTF-8 date, UTF-8 fmt'); + + my $str = "\x{ea} $date \x{ea}"; + pos($str) = 2; + + is_deeply([POSIX::strptime(\$str, $fmt_U)], \@want, 'strptime() legacy data SCALAR ref, UTF-8 fmt'); + is(pos($str), 12, 'pos() of legacy data SCALAR after strptime() UTF-8 fmt'); + + # High (>U+FF) strings + my $date_UU = "2012\x{1234}02\x{1234}01"; + my $fmt_UU = "%Y\x{1234}%m\x{1234}%d"; + + is_deeply([POSIX::strptime($date_UU, $fmt_UU)], \@want, 'strptime() on non-Latin-1 Unicode'); +} + eval { POSIX::strptime({}, "format") }; like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on HASH ref'); |