diff options
author | Paul "LeoNerd" Evans <leonerd@leonerd.org.uk> | 2012-02-02 22:44:00 +0000 |
---|---|---|
committer | Ævar Arnfjörð Bjarmason <avar@cpan.org> | 2012-02-11 22:22:25 +0000 |
commit | 216961170a26689e3730db4200ec8167f699fce0 (patch) | |
tree | 5e1c0763ad7e55273c300d8cf199d5c9b8fe377a | |
parent | 6b75a72293a16ce81c0d49fc63f3584b89eceb01 (diff) | |
download | perl-216961170a26689e3730db4200ec8167f699fce0.tar.gz |
If strptime() is called with UTf-8 string but legacy format, then downgrade the string to match; taking care to handle pos() counts both sides
-rw-r--r-- | ext/POSIX/POSIX.xs | 38 | ||||
-rw-r--r-- | ext/POSIX/t/time.t | 9 |
2 files changed, 38 insertions, 9 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index 49c504d812..b01c1bd31a 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -1860,6 +1860,7 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y PPCODE: { const char *str_c; + const U8 *orig_bytes; SV *strref = NULL; MAGIC *posmg = NULL; int str_offset = 0; @@ -1895,19 +1896,37 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y croak("str is not a reference to a mutable scalar"); } + /* If fmt and str differ in UTF-8ness then take a temporary copy + * of and regrade it to match fmt, taking care to update the + * offset in both cases. */ if(!SvUTF8(str) && SvUTF8(fmt)) { - /* fmt is UTF-8, str is not. Upgrade a local copy of it, and - * take care to update str_offset to match. */ str = sv_mortalcopy(str); sv_utf8_upgrade_nomg(str); + str_c = SvPV_nolen(str); + if(str_offset) { - U8 *bytes = SvPV_nolen(str); - str_offset = utf8_hop(bytes, str_offset) - bytes; + str_offset = utf8_hop(str_c, str_offset) - (U8*)str_c; } } + else if(SvUTF8(str) && !SvUTF8(fmt)) { + str = sv_mortalcopy(str); + /* If downgrade fails then str must have contained characters + * that could not possibly be matched by fmt */ + if(!sv_utf8_downgrade(str, 1)) + XSRETURN(0); - str_c = SvPV_nolen(str); + str_c = SvPV_nolen(str); + + if(str_offset) { + orig_bytes = SvPV_nolen(strref); + str_offset = utf8_distance(orig_bytes + str_offset, orig_bytes); + } + } + else { + /* else it doesn't matter if both or neither are, because they'll match */ + str_c = SvPV_nolen(str); + } remains = strptime(str_c + str_offset, SvPV_nolen(fmt), &tm); @@ -1920,9 +1939,12 @@ strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, y if(strref) { if(str != strref) { - /* str is a UTF-8 upgraded copy of the original non-UTF-8 - * string the caller referred us to in strref */ - str_offset = utf8_distance(remains, str_c); + if(SvUTF8(str)) + /* str is a UTF-8 upgraded copy of the original non-UTF-8 + * string the caller referred us to in strref */ + str_offset = utf8_distance(remains, str_c); + else + str_offset = utf8_hop(orig_bytes, remains - str_c) - orig_bytes; } else { str_offset = remains - str_c; diff --git a/ext/POSIX/t/time.t b/ext/POSIX/t/time.t index 4fedcc0f3c..f6954b3695 100644 --- a/ext/POSIX/t/time.t +++ b/ext/POSIX/t/time.t @@ -4,7 +4,7 @@ use strict; use Config; use POSIX; -use Test::More tests => 38; +use Test::More tests => 41; # go to UTC to avoid DST issues around the world when testing. SUS3 says that # null should get you UTC, but some environments want the explicit names. @@ -107,6 +107,7 @@ is(pos($str), 20, 'strptime() updates pos() magic on SCALAR ref'); my @want = (undef, undef, undef, 1, 2-1, 2012-1900, 3, 31, 0); + is_deeply([POSIX::strptime($date_U, $fmt )], \@want, 'strptime() UTF-8 date, legacy fmt'); is_deeply([POSIX::strptime($date, $fmt_U)], \@want, 'strptime() legacy date, UTF-8 fmt'); is_deeply([POSIX::strptime($date_U, $fmt_U)], \@want, 'strptime() UTF-8 date, UTF-8 fmt'); @@ -116,6 +117,12 @@ is(pos($str), 20, 'strptime() updates pos() magic on SCALAR ref'); is_deeply([POSIX::strptime(\$str, $fmt_U)], \@want, 'strptime() legacy data SCALAR ref, UTF-8 fmt'); is(pos($str), 12, 'pos() of legacy data SCALAR after strptime() UTF-8 fmt'); + utf8::upgrade my $str_U = $str; + pos($str_U) = 2; + + is_deeply([POSIX::strptime(\$str_U, $fmt)], \@want, 'strptime() UTF-8 data SCALAR ref, legacy fmt'); + is(pos($str_U), 12, 'pos() of UTF-8 data SCALAR after strptime() legacy fmt'); + # High (>U+FF) strings my $date_UU = "2012\x{1234}02\x{1234}01"; my $fmt_UU = "%Y\x{1234}%m\x{1234}%d"; |