diff options
author | Paul "LeoNerd" Evans <leonerd@leonerd.org.uk> | 2012-02-02 22:17:46 +0000 |
---|---|---|
committer | Ævar Arnfjörð Bjarmason <avar@cpan.org> | 2012-02-11 22:22:25 +0000 |
commit | 6b75a72293a16ce81c0d49fc63f3584b89eceb01 (patch) | |
tree | 8d848c7339a36bda14adf4598fa799ba34916131 | |
parent | a920efd2f60663f60fa956fac6d376d8645921f7 (diff) | |
download | perl-6b75a72293a16ce81c0d49fc63f3584b89eceb01.tar.gz |
If strptime() is called with legacy string but UTF-8 format, then upgrade the string to match; taking care to handle pos() counts both sides
-rw-r--r-- | ext/POSIX/POSIX.xs | 22 | ||||
-rw-r--r-- | ext/POSIX/t/time.t | 27 |
2 files changed, 47 insertions, 2 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index f28ab9e336..49c504d812 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -1895,6 +1895,18 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y croak("str is not a reference to a mutable scalar"); } + if(!SvUTF8(str) && SvUTF8(fmt)) { + /* fmt is UTF-8, str is not. Upgrade a local copy of it, and + * take care to update str_offset to match. */ + str = sv_mortalcopy(str); + sv_utf8_upgrade_nomg(str); + + if(str_offset) { + U8 *bytes = SvPV_nolen(str); + str_offset = utf8_hop(bytes, str_offset) - bytes; + } + } + str_c = SvPV_nolen(str); remains = strptime(str_c + str_offset, SvPV_nolen(fmt), &tm); @@ -1907,10 +1919,18 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y XSRETURN(0); if(strref) { + if(str != strref) { + /* str is a UTF-8 upgraded copy of the original non-UTF-8 + * string the caller referred us to in strref */ + str_offset = utf8_distance(remains, str_c); + } + else { + str_offset = remains - str_c; + } if(!posmg) posmg = sv_magicext(strref, NULL, PERL_MAGIC_regex_global, &PL_vtbl_mglob, NULL, 0); - posmg->mg_len = remains - str_c; + posmg->mg_len = str_offset; } if(tm.tm_mday > -1 && tm.tm_mon > -1 && tm.tm_year > -1) { diff --git a/ext/POSIX/t/time.t b/ext/POSIX/t/time.t index 959f675e4d..4fedcc0f3c 100644 --- a/ext/POSIX/t/time.t +++ b/ext/POSIX/t/time.t @@ -4,7 +4,7 @@ use strict; use Config; use POSIX; -use Test::More tests => 33; +use Test::More tests => 38; # go to UTC to avoid DST issues around the world when testing. SUS3 says that # null should get you UTC, but some environments want the explicit names. @@ -98,6 +98,31 @@ pos($str) = 10; is_deeply(\@time, [0, 0, 0, 1, 12-1, 2012-1900, 6, 335, 0], 'strptime() starts SCALAR ref at pos()'); is(pos($str), 20, 'strptime() updates pos() magic on SCALAR ref'); +{ + # Latin-1 vs. UTF-8 strings + my $date = "2012\x{e9}02\x{e9}01"; + utf8::upgrade my $date_U = $date; + my $fmt = "%Y\x{e9}%m\x{e9}%d"; + utf8::upgrade my $fmt_U = $fmt; + + my @want = (undef, undef, undef, 1, 2-1, 2012-1900, 3, 31, 0); + + is_deeply([POSIX::strptime($date, $fmt_U)], \@want, 'strptime() legacy date, UTF-8 fmt'); + is_deeply([POSIX::strptime($date_U, $fmt_U)], \@want, 'strptime() UTF-8 date, UTF-8 fmt'); + + my $str = "\x{ea} $date \x{ea}"; + pos($str) = 2; + + is_deeply([POSIX::strptime(\$str, $fmt_U)], \@want, 'strptime() legacy data SCALAR ref, UTF-8 fmt'); + is(pos($str), 12, 'pos() of legacy data SCALAR after strptime() UTF-8 fmt'); + + # High (>U+FF) strings + my $date_UU = "2012\x{1234}02\x{1234}01"; + my $fmt_UU = "%Y\x{1234}%m\x{1234}%d"; + + is_deeply([POSIX::strptime($date_UU, $fmt_UU)], \@want, 'strptime() on non-Latin-1 Unicode'); +} + eval { POSIX::strptime({}, "format") }; like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on HASH ref'); |