diff options
-rw-r--r-- | ext/B/t/concise-xs.t | 2 | ||||
-rw-r--r-- | ext/POSIX/POSIX.xs | 147 | ||||
-rw-r--r-- | ext/POSIX/lib/POSIX.pm | 18 | ||||
-rw-r--r-- | ext/POSIX/lib/POSIX.pod | 59 | ||||
-rw-r--r-- | ext/POSIX/t/export.t | 4 | ||||
-rw-r--r-- | ext/POSIX/t/time.t | 82 | ||||
-rw-r--r-- | pod/perldelta.pod | 8 |
7 files changed, 311 insertions, 9 deletions
diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t index efd0cf7788..a09e0decc7 100644 --- a/ext/B/t/concise-xs.t +++ b/ext/B/t/concise-xs.t @@ -193,7 +193,7 @@ my $testpkgs = { XS => [qw/ write wctomb wcstombs uname tzset tzname ttyname tmpnam times tcsetpgrp tcsendbreak tcgetpgrp tcflush tcflow tcdrain tanh tan - sysconf strxfrm strtoul strtol strtod + sysconf strxfrm strtoul strtol strtod strptime strftime strcoll sinh sigsuspend sigprocmask sigpending sigaction setuid setsid setpgid setlocale setgid read pipe pause pathconf diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index 7e30a82839..89d543e5cb 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -13,6 +13,9 @@ #define PERL_NO_GET_CONTEXT +/* Solaris needs this in order not to zero out all the untouched fields in strptime() */ +#define _STRPTIME_DONTZERO + #include "EXTERN.h" #define PERLIO_NOT_STDIO 1 #include "perl.h" @@ -1842,6 +1845,150 @@ strftime(fmt, sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1) } void +strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, yday=-1, isdst=-1) + SV * str + SV * fmt + int sec + int min + int hour + int mday + int mon + int year + int wday + int yday + int isdst + PPCODE: + { + const char *str_c; + int returning_pos = 0; /* true if caller wants us to set pos() marker on str */ + SV *orig_str = NULL; /* caller's original SV* if we have had to regrade it */ + const U8 *orig_bytes; /* SvPV of orig_str */ + MAGIC *posmg = NULL; + STRLEN str_offset = 0; + struct tm tm; + char *remains; + + init_tm(&tm); /* XXX workaround - see init_tm() in core util.c */ + tm.tm_sec = sec; + tm.tm_min = min; + tm.tm_hour = hour; + tm.tm_mday = mday; + tm.tm_mon = mon; + tm.tm_year = year; + tm.tm_wday = wday; + tm.tm_yday = yday; + tm.tm_isdst = isdst; + + if(SvROK(str) && !SvOBJECT(SvRV(str))) { + SV *ref = SvRV(str); + + if(SvTYPE(ref) > SVt_PVMG || SvREADONLY(ref)) + croak("str is not a reference to a mutable scalar"); + + str = ref; + returning_pos = 1; + + if(SvTYPE(str) >= SVt_PVMG && SvMAGIC(str)) + posmg = mg_find(str, PERL_MAGIC_regex_global); + + if(posmg) + str_offset = posmg->mg_len; + } + else if(SvROK(str) && SvTYPE(SvRV(str)) == SVt_REGEXP) { + croak("str is not a reference to a mutable scalar"); + } + + /* If fmt and str differ in UTF-8ness then take a temporary copy + * of and regrade it to match fmt, taking care to update the + * offset in both cases. */ + if(!SvUTF8(str) && SvUTF8(fmt)) { + orig_str = str; + str = sv_mortalcopy(str); + sv_utf8_upgrade_nomg(str); + + str_c = SvPV_nolen(str); + + if(str_offset) { + str_offset = utf8_hop(str_c, str_offset) - (U8*)str_c; + } + } + else if(SvUTF8(str) && !SvUTF8(fmt)) { + orig_str = str; + str = sv_mortalcopy(str); + /* If downgrade fails then str must have contained characters + * that could not possibly be matched by fmt */ + if(!sv_utf8_downgrade(str, 1)) + XSRETURN(0); + + str_c = SvPV_nolen(str); + + if(str_offset) { + orig_bytes = SvPV_nolen(orig_str); + str_offset = utf8_distance(orig_bytes + str_offset, orig_bytes); + } + } + else { + /* else it doesn't matter if both or neither are, because they'll match */ + str_c = SvPV_nolen(str); + } + + remains = strptime(str_c + str_offset, SvPV_nolen(fmt), &tm); + + if(!remains) + /* failed parse */ + XSRETURN(0); + if(remains[0] && !returning_pos) + /* leftovers - without ref we can't signal this so this is a failure */ + XSRETURN(0); + + if(returning_pos) { + if(orig_str) { + if(SvUTF8(str)) + /* str is a UTF-8 upgraded copy of the original non-UTF-8 + * string the caller referred us to in orig_str */ + str_offset = utf8_distance(remains, str_c); + else + str_offset = utf8_hop(orig_bytes, remains - str_c) - orig_bytes; + + str = orig_str; + } + else { + str_offset = remains - str_c; + } + if(!posmg) + posmg = sv_magicext(str, NULL, PERL_MAGIC_regex_global, + &PL_vtbl_mglob, NULL, 0); + posmg->mg_len = str_offset; + } + + if(tm.tm_mday > -1 && tm.tm_mon > -1 && tm.tm_year > -1) { + /* if we leave sec/min/hour == -1, then these will be + * normalised to the previous day */ + int was_sec = tm.tm_sec; tm.tm_sec = 0; + int was_min = tm.tm_min; tm.tm_min = 0; + int was_hour = tm.tm_hour; tm.tm_hour = 0; + + if(mktime(&tm) == (time_t)-1) + XSRETURN(0); + + tm.tm_sec = was_sec; + tm.tm_min = was_min; + tm.tm_hour = was_hour; + } + + EXTEND(SP, 9); + PUSHs(tm.tm_sec != -1 ? sv_2mortal(newSViv(tm.tm_sec)) : &PL_sv_undef); + PUSHs(tm.tm_min != -1 ? sv_2mortal(newSViv(tm.tm_min)) : &PL_sv_undef); + PUSHs(tm.tm_hour != -1 ? sv_2mortal(newSViv(tm.tm_hour)) : &PL_sv_undef); + PUSHs(tm.tm_mday != -1 ? sv_2mortal(newSViv(tm.tm_mday)) : &PL_sv_undef); + PUSHs(tm.tm_mon != -1 ? sv_2mortal(newSViv(tm.tm_mon)) : &PL_sv_undef); + PUSHs(tm.tm_year != -1 ? sv_2mortal(newSViv(tm.tm_year)) : &PL_sv_undef); + PUSHs(tm.tm_wday != -1 ? sv_2mortal(newSViv(tm.tm_wday)) : &PL_sv_undef); + PUSHs(tm.tm_yday != -1 ? sv_2mortal(newSViv(tm.tm_yday)) : &PL_sv_undef); + PUSHs(tm.tm_isdst!= -1 ? sv_2mortal(newSViv(tm.tm_isdst)): &PL_sv_undef); + } + +void tzset() PPCODE: my_tzset(aTHX); diff --git a/ext/POSIX/lib/POSIX.pm b/ext/POSIX/lib/POSIX.pm index ec5c076294..e1ba950689 100644 --- a/ext/POSIX/lib/POSIX.pm +++ b/ext/POSIX/lib/POSIX.pm @@ -4,7 +4,7 @@ use warnings; our ($AUTOLOAD, %SIGRT); -our $VERSION = '1.28'; +our $VERSION = '1.29'; require XSLoader; @@ -360,7 +360,7 @@ our %EXPORT_TAGS = ( tcflow tcflush tcgetattr tcsendbreak tcsetattr )], time_h => [qw(CLK_TCK CLOCKS_PER_SEC NULL asctime clock ctime - difftime mktime strftime tzset tzname)], + difftime mktime strftime strptime tzset tzname)], unistd_h => [qw(F_OK NULL R_OK SEEK_CUR SEEK_END SEEK_SET STDERR_FILENO STDIN_FILENO STDOUT_FILENO W_OK X_OK @@ -386,13 +386,21 @@ our %EXPORT_TAGS = ( # De-duplicate the export list: my %export; @export{map {@$_} values %EXPORT_TAGS} = (); - # Doing the de-dup with a temporary hash has the advantage that the SVs in - # @EXPORT are actually shared hash key scalars, which will save some memory. - our @EXPORT = keys %export; our @EXPORT_OK = (qw(close lchown nice open pipe read sleep times write printf sprintf), grep {!exists $export{$_}} keys %reimpl, keys %replacement); + + # Symbols that should not be exported by default because they are recently + # added. It would upset too much of CPAN to export these by default + foreach (qw(strptime)) { + delete $export{$_}; + push @EXPORT_OK, $_; + } + + # Doing the de-dup with a temporary hash has the advantage that the SVs in + # @EXPORT are actually shared hash key scalars, which will save some memory. + our @EXPORT = keys %export; } require Exporter; diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod index f935ae0574..b24df0ab85 100644 --- a/ext/POSIX/lib/POSIX.pod +++ b/ext/POSIX/lib/POSIX.pod @@ -1349,6 +1349,65 @@ strncpy() is C-specific, use C<=> instead, see L<perlop>. strpbrk() is C-specific, use regular expressions instead, see L<perlre>. +=item strptime + +Parse date and time information from a string. Returns a 9-element list of +time and date information. + +Synopsis: + + (sec, min, hour, mday, mon, year, wday, yday, isdst) = + strptime(str, fmt, [@init]) + +Optionally, an existing 9-element list of time and date informaiton may be +passed to initialise the structure before parsing. Any fields not parsed by +the format will be left as initialised. + +The month (C<mon>), weekday (C<wday>), and yearday (C<yday>) begin at zero. +I.e. January is 0, not 1; Sunday is 0, not 1; January 1st is 0, not 1. The +year (C<year>) is given in years since 1900. I.e., the year 1995 is 95; the +year 2001 is 101. Consult your system's C<strftime()> manpage for details +about these and the other arguments. + +If you want your code to be portable, your format (C<fmt>) argument +should use only the conversion specifiers defined by the ANSI C +standard (C89, to play safe). These are C<aAbBcdHIjmMpSUwWxXyYZ%>. +But even then, the results of some of the conversion specifiers are +non-portable. For example, the specifiers C<aAbBcpZ> change according +to the locale settings of the user, and both how to set locales (the +locale names) and what output to expect are non-standard. +The specifier C<c> changes according to the timezone settings of the +user and the timezone computation rules of the operating system. +The C<Z> specifier is notoriously unportable since the names of +timezones are non-standard. Sticking to the numeric specifiers is the +safest route. + +The return values are made consistent as though by calling C<mktime()> +before they are returned, if all of the C<mday>, C<mon> and C<year> fields +are valid. + +The string for Tuesday, December 12, 1995. + + @time = POSIX::strptime( "Tuesday, December 12, 1995", + "%A, %B %d, %Y", 0, 0, 0 ); + + local $, = ", "; + print @time, "\n"; + +If the input string is not valid, or not consumed completely by the format, +then an error occurs; indicated by C<strptime()> returning an empty list. + +By passing a reference to a string as the value to parse, C<strptime()> will +use the C<pos()> position to start the parse, and to return the position where +it finished. In this situation, it is not an error if the entire input is not +consumed by the format. + + $str = "18:05:29 is the time"; + @time = POSIX::strptime( \$str, "%H:%M:%S" ); + local $, = ", "; + print @time[0..2], "\n"; + print pos($str) . "\n"; + =item strrchr strrchr() is C-specific, see L<perlfunc/rindex> instead. diff --git a/ext/POSIX/t/export.t b/ext/POSIX/t/export.t index 07d428eb1a..0753178f63 100644 --- a/ext/POSIX/t/export.t +++ b/ext/POSIX/t/export.t @@ -102,8 +102,8 @@ my %expect = ( getpgrp getppid getpwnam getpwuid gmtime kill lchown link localtime log mkdir nice open opendir pipe printf rand read readdir rename rewinddir rmdir sin sleep sprintf sqrt - srand stat system time times umask unlink utime wait - waitpid write)], + srand stat strptime system time times umask unlink utime + wait waitpid write)], ); plan (tests => 2 * keys %expect); diff --git a/ext/POSIX/t/time.t b/ext/POSIX/t/time.t index 90b54caa47..f6954b3695 100644 --- a/ext/POSIX/t/time.t +++ b/ext/POSIX/t/time.t @@ -4,7 +4,7 @@ use strict; use Config; use POSIX; -use Test::More tests => 19; +use Test::More tests => 41; # go to UTC to avoid DST issues around the world when testing. SUS3 says that # null should get you UTC, but some environments want the explicit names. @@ -68,6 +68,86 @@ is(ord strftime($ss, POSIX::localtime(time)), 223, 'Format string has correct character'); unlike($ss, qr/\w/, 'Still not internally UTF-8 encoded'); +my @time = POSIX::strptime("2011-12-18 12:34:56", "%Y-%m-%d %H:%M:%S"); +is_deeply(\@time, [56, 34, 12, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all 6 fields'); + +@time = POSIX::strptime("2011-12-18", "%Y-%m-%d", 1, 23, 4); +is_deeply(\@time, [1, 23, 4, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all date fields with passed time'); + +@time = POSIX::strptime("2011-12-18", "%Y-%m-%d"); +is_deeply(\@time, [undef, undef, undef, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all date fields with no time'); + +# tm_year == 6 => 1906, which is a negative time_t. Lets use 106 as 2006 instead +@time = POSIX::strptime("12:34:56", "%H:%M:%S", 1, 2, 3, 4, 5, 106); +is_deeply(\@time, [56, 34, 12, 4, 5, 106, 0, 154, 1], 'strptime() all time fields with passed date'); + +@time = POSIX::strptime("July 4", "%b %d"); +is_deeply([@time[3,4]], [4, 7-1], 'strptime() partial yields correct mday/mon'); + +@time = POSIX::strptime("Foobar", "%H:%M:%S"); +is(scalar @time, 0, 'strptime() invalid input yields empty list'); + +my $str; +@time = POSIX::strptime(\($str = "01:02:03"), "%H:%M:%S", -1,-1,-1, 1,0,70); +is_deeply(\@time, [3, 2, 1, 1, 0, 70, 4, 0, 0], 'strptime() parses SCALAR ref'); +is(pos($str), 8, 'strptime() sets pos() magic on SCALAR ref'); + +$str = "Text with 2012-12-01 datestamp"; +pos($str) = 10; +@time = POSIX::strptime(\$str, "%Y-%m-%d", 0, 0, 0); +is_deeply(\@time, [0, 0, 0, 1, 12-1, 2012-1900, 6, 335, 0], 'strptime() starts SCALAR ref at pos()'); +is(pos($str), 20, 'strptime() updates pos() magic on SCALAR ref'); + +{ + # Latin-1 vs. UTF-8 strings + my $date = "2012\x{e9}02\x{e9}01"; + utf8::upgrade my $date_U = $date; + my $fmt = "%Y\x{e9}%m\x{e9}%d"; + utf8::upgrade my $fmt_U = $fmt; + + my @want = (undef, undef, undef, 1, 2-1, 2012-1900, 3, 31, 0); + + is_deeply([POSIX::strptime($date_U, $fmt )], \@want, 'strptime() UTF-8 date, legacy fmt'); + is_deeply([POSIX::strptime($date, $fmt_U)], \@want, 'strptime() legacy date, UTF-8 fmt'); + is_deeply([POSIX::strptime($date_U, $fmt_U)], \@want, 'strptime() UTF-8 date, UTF-8 fmt'); + + my $str = "\x{ea} $date \x{ea}"; + pos($str) = 2; + + is_deeply([POSIX::strptime(\$str, $fmt_U)], \@want, 'strptime() legacy data SCALAR ref, UTF-8 fmt'); + is(pos($str), 12, 'pos() of legacy data SCALAR after strptime() UTF-8 fmt'); + + utf8::upgrade my $str_U = $str; + pos($str_U) = 2; + + is_deeply([POSIX::strptime(\$str_U, $fmt)], \@want, 'strptime() UTF-8 data SCALAR ref, legacy fmt'); + is(pos($str_U), 12, 'pos() of UTF-8 data SCALAR after strptime() legacy fmt'); + + # High (>U+FF) strings + my $date_UU = "2012\x{1234}02\x{1234}01"; + my $fmt_UU = "%Y\x{1234}%m\x{1234}%d"; + + is_deeply([POSIX::strptime($date_UU, $fmt_UU)], \@want, 'strptime() on non-Latin-1 Unicode'); +} + +eval { POSIX::strptime({}, "format") }; +like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on HASH ref'); + +eval { POSIX::strptime(\"boo", "format") }; +like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on const literal ref'); + +eval { POSIX::strptime(qr/boo!/, "format") }; +like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on Regexp'); + +$str = bless [], "WithStringOverload"; +{ + package WithStringOverload; + use overload '""' => sub { return "2012-02-01" }; +} + +@time = POSIX::strptime($str, "%Y-%m-%d", 0, 0, 0); +is_deeply(\@time, [0, 0, 0, 1, 2-1, 2012-1900, 3, 31, 0], 'strptime() allows object with string overload'); + setlocale(LC_TIME, $orig_loc) || die "Cannot setlocale() back to orig: $!"; # clock() seems to have different definitions of what it does between POSIX diff --git a/pod/perldelta.pod b/pod/perldelta.pod index fb4166b73f..07a684a49c 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -368,6 +368,14 @@ The only change is to fix a formatting error in the Pod. L<Version::Requirements> has been upgraded from version 0.101021 to version 0.101022. +=item * + +L<POSIX> has been upgraded from version 1.28 to version 1.29.. + +It now has a wrapper for the C<strptime(3)> function, it's not +exported on C<use POSIX;> due to backwards compatibility concerns, it +has to be explicitly requested with C<use POSIX qw(strptime)>. + =back =head2 Removed Modules and Pragmata |