summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÆvar Arnfjörð Bjarmason <avar@cpan.org>2012-02-11 22:31:03 +0000
committerÆvar Arnfjörð Bjarmason <avar@cpan.org>2012-02-11 22:49:05 +0000
commit0e582130ad8fc3afc6514f60b7a513c550379b7d (patch)
treec1c629113ce462fad2f0f9da0b41753fa0eb6c90
parenta748fe11f70695552294fe4e31343b2dacb59db2 (diff)
parent423a1dfc8c367cb58e7dcef73a81b4ec7a8b8810 (diff)
downloadperl-0e582130ad8fc3afc6514f60b7a513c550379b7d.tar.gz
Merge branch 'avar/POSIX-strptime' into blead
Merge my rebased version of Paul "LeoNerd" Evans's branch to blead after I'd cherry-picked the unrelated a748fe1 commit out of it. This may or may not be the perfect implementation of strptime, but it seems to work well enough for me, the bugs that have been raised against it have been addressed, and it's going to work a hell of a lot better than not having any strptime support at all.
-rw-r--r--ext/B/t/concise-xs.t2
-rw-r--r--ext/POSIX/POSIX.xs147
-rw-r--r--ext/POSIX/lib/POSIX.pm18
-rw-r--r--ext/POSIX/lib/POSIX.pod59
-rw-r--r--ext/POSIX/t/export.t4
-rw-r--r--ext/POSIX/t/time.t82
-rw-r--r--pod/perldelta.pod8
7 files changed, 311 insertions, 9 deletions
diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t
index efd0cf7788..a09e0decc7 100644
--- a/ext/B/t/concise-xs.t
+++ b/ext/B/t/concise-xs.t
@@ -193,7 +193,7 @@ my $testpkgs = {
XS => [qw/ write wctomb wcstombs uname tzset tzname
ttyname tmpnam times tcsetpgrp tcsendbreak
tcgetpgrp tcflush tcflow tcdrain tanh tan
- sysconf strxfrm strtoul strtol strtod
+ sysconf strxfrm strtoul strtol strtod strptime
strftime strcoll sinh sigsuspend sigprocmask
sigpending sigaction setuid setsid setpgid
setlocale setgid read pipe pause pathconf
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index 7e30a82839..89d543e5cb 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -13,6 +13,9 @@
#define PERL_NO_GET_CONTEXT
+/* Solaris needs this in order not to zero out all the untouched fields in strptime() */
+#define _STRPTIME_DONTZERO
+
#include "EXTERN.h"
#define PERLIO_NOT_STDIO 1
#include "perl.h"
@@ -1842,6 +1845,150 @@ strftime(fmt, sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1)
}
void
+strptime(str, fmt, sec=-1, min=-1, hour=-1, mday=-1, mon=-1, year=-1, wday=-1, yday=-1, isdst=-1)
+ SV * str
+ SV * fmt
+ int sec
+ int min
+ int hour
+ int mday
+ int mon
+ int year
+ int wday
+ int yday
+ int isdst
+ PPCODE:
+ {
+ const char *str_c;
+ int returning_pos = 0; /* true if caller wants us to set pos() marker on str */
+ SV *orig_str = NULL; /* caller's original SV* if we have had to regrade it */
+ const U8 *orig_bytes; /* SvPV of orig_str */
+ MAGIC *posmg = NULL;
+ STRLEN str_offset = 0;
+ struct tm tm;
+ char *remains;
+
+ init_tm(&tm); /* XXX workaround - see init_tm() in core util.c */
+ tm.tm_sec = sec;
+ tm.tm_min = min;
+ tm.tm_hour = hour;
+ tm.tm_mday = mday;
+ tm.tm_mon = mon;
+ tm.tm_year = year;
+ tm.tm_wday = wday;
+ tm.tm_yday = yday;
+ tm.tm_isdst = isdst;
+
+ if(SvROK(str) && !SvOBJECT(SvRV(str))) {
+ SV *ref = SvRV(str);
+
+ if(SvTYPE(ref) > SVt_PVMG || SvREADONLY(ref))
+ croak("str is not a reference to a mutable scalar");
+
+ str = ref;
+ returning_pos = 1;
+
+ if(SvTYPE(str) >= SVt_PVMG && SvMAGIC(str))
+ posmg = mg_find(str, PERL_MAGIC_regex_global);
+
+ if(posmg)
+ str_offset = posmg->mg_len;
+ }
+ else if(SvROK(str) && SvTYPE(SvRV(str)) == SVt_REGEXP) {
+ croak("str is not a reference to a mutable scalar");
+ }
+
+ /* If fmt and str differ in UTF-8ness then take a temporary copy
+ * of and regrade it to match fmt, taking care to update the
+ * offset in both cases. */
+ if(!SvUTF8(str) && SvUTF8(fmt)) {
+ orig_str = str;
+ str = sv_mortalcopy(str);
+ sv_utf8_upgrade_nomg(str);
+
+ str_c = SvPV_nolen(str);
+
+ if(str_offset) {
+ str_offset = utf8_hop(str_c, str_offset) - (U8*)str_c;
+ }
+ }
+ else if(SvUTF8(str) && !SvUTF8(fmt)) {
+ orig_str = str;
+ str = sv_mortalcopy(str);
+ /* If downgrade fails then str must have contained characters
+ * that could not possibly be matched by fmt */
+ if(!sv_utf8_downgrade(str, 1))
+ XSRETURN(0);
+
+ str_c = SvPV_nolen(str);
+
+ if(str_offset) {
+ orig_bytes = SvPV_nolen(orig_str);
+ str_offset = utf8_distance(orig_bytes + str_offset, orig_bytes);
+ }
+ }
+ else {
+ /* else it doesn't matter if both or neither are, because they'll match */
+ str_c = SvPV_nolen(str);
+ }
+
+ remains = strptime(str_c + str_offset, SvPV_nolen(fmt), &tm);
+
+ if(!remains)
+ /* failed parse */
+ XSRETURN(0);
+ if(remains[0] && !returning_pos)
+ /* leftovers - without ref we can't signal this so this is a failure */
+ XSRETURN(0);
+
+ if(returning_pos) {
+ if(orig_str) {
+ if(SvUTF8(str))
+ /* str is a UTF-8 upgraded copy of the original non-UTF-8
+ * string the caller referred us to in orig_str */
+ str_offset = utf8_distance(remains, str_c);
+ else
+ str_offset = utf8_hop(orig_bytes, remains - str_c) - orig_bytes;
+
+ str = orig_str;
+ }
+ else {
+ str_offset = remains - str_c;
+ }
+ if(!posmg)
+ posmg = sv_magicext(str, NULL, PERL_MAGIC_regex_global,
+ &PL_vtbl_mglob, NULL, 0);
+ posmg->mg_len = str_offset;
+ }
+
+ if(tm.tm_mday > -1 && tm.tm_mon > -1 && tm.tm_year > -1) {
+ /* if we leave sec/min/hour == -1, then these will be
+ * normalised to the previous day */
+ int was_sec = tm.tm_sec; tm.tm_sec = 0;
+ int was_min = tm.tm_min; tm.tm_min = 0;
+ int was_hour = tm.tm_hour; tm.tm_hour = 0;
+
+ if(mktime(&tm) == (time_t)-1)
+ XSRETURN(0);
+
+ tm.tm_sec = was_sec;
+ tm.tm_min = was_min;
+ tm.tm_hour = was_hour;
+ }
+
+ EXTEND(SP, 9);
+ PUSHs(tm.tm_sec != -1 ? sv_2mortal(newSViv(tm.tm_sec)) : &PL_sv_undef);
+ PUSHs(tm.tm_min != -1 ? sv_2mortal(newSViv(tm.tm_min)) : &PL_sv_undef);
+ PUSHs(tm.tm_hour != -1 ? sv_2mortal(newSViv(tm.tm_hour)) : &PL_sv_undef);
+ PUSHs(tm.tm_mday != -1 ? sv_2mortal(newSViv(tm.tm_mday)) : &PL_sv_undef);
+ PUSHs(tm.tm_mon != -1 ? sv_2mortal(newSViv(tm.tm_mon)) : &PL_sv_undef);
+ PUSHs(tm.tm_year != -1 ? sv_2mortal(newSViv(tm.tm_year)) : &PL_sv_undef);
+ PUSHs(tm.tm_wday != -1 ? sv_2mortal(newSViv(tm.tm_wday)) : &PL_sv_undef);
+ PUSHs(tm.tm_yday != -1 ? sv_2mortal(newSViv(tm.tm_yday)) : &PL_sv_undef);
+ PUSHs(tm.tm_isdst!= -1 ? sv_2mortal(newSViv(tm.tm_isdst)): &PL_sv_undef);
+ }
+
+void
tzset()
PPCODE:
my_tzset(aTHX);
diff --git a/ext/POSIX/lib/POSIX.pm b/ext/POSIX/lib/POSIX.pm
index ec5c076294..e1ba950689 100644
--- a/ext/POSIX/lib/POSIX.pm
+++ b/ext/POSIX/lib/POSIX.pm
@@ -4,7 +4,7 @@ use warnings;
our ($AUTOLOAD, %SIGRT);
-our $VERSION = '1.28';
+our $VERSION = '1.29';
require XSLoader;
@@ -360,7 +360,7 @@ our %EXPORT_TAGS = (
tcflow tcflush tcgetattr tcsendbreak tcsetattr )],
time_h => [qw(CLK_TCK CLOCKS_PER_SEC NULL asctime clock ctime
- difftime mktime strftime tzset tzname)],
+ difftime mktime strftime strptime tzset tzname)],
unistd_h => [qw(F_OK NULL R_OK SEEK_CUR SEEK_END SEEK_SET
STDERR_FILENO STDIN_FILENO STDOUT_FILENO W_OK X_OK
@@ -386,13 +386,21 @@ our %EXPORT_TAGS = (
# De-duplicate the export list:
my %export;
@export{map {@$_} values %EXPORT_TAGS} = ();
- # Doing the de-dup with a temporary hash has the advantage that the SVs in
- # @EXPORT are actually shared hash key scalars, which will save some memory.
- our @EXPORT = keys %export;
our @EXPORT_OK = (qw(close lchown nice open pipe read sleep times write
printf sprintf),
grep {!exists $export{$_}} keys %reimpl, keys %replacement);
+
+ # Symbols that should not be exported by default because they are recently
+ # added. It would upset too much of CPAN to export these by default
+ foreach (qw(strptime)) {
+ delete $export{$_};
+ push @EXPORT_OK, $_;
+ }
+
+ # Doing the de-dup with a temporary hash has the advantage that the SVs in
+ # @EXPORT are actually shared hash key scalars, which will save some memory.
+ our @EXPORT = keys %export;
}
require Exporter;
diff --git a/ext/POSIX/lib/POSIX.pod b/ext/POSIX/lib/POSIX.pod
index f935ae0574..b24df0ab85 100644
--- a/ext/POSIX/lib/POSIX.pod
+++ b/ext/POSIX/lib/POSIX.pod
@@ -1349,6 +1349,65 @@ strncpy() is C-specific, use C<=> instead, see L<perlop>.
strpbrk() is C-specific, use regular expressions instead,
see L<perlre>.
+=item strptime
+
+Parse date and time information from a string. Returns a 9-element list of
+time and date information.
+
+Synopsis:
+
+ (sec, min, hour, mday, mon, year, wday, yday, isdst) =
+ strptime(str, fmt, [@init])
+
+Optionally, an existing 9-element list of time and date informaiton may be
+passed to initialise the structure before parsing. Any fields not parsed by
+the format will be left as initialised.
+
+The month (C<mon>), weekday (C<wday>), and yearday (C<yday>) begin at zero.
+I.e. January is 0, not 1; Sunday is 0, not 1; January 1st is 0, not 1. The
+year (C<year>) is given in years since 1900. I.e., the year 1995 is 95; the
+year 2001 is 101. Consult your system's C<strftime()> manpage for details
+about these and the other arguments.
+
+If you want your code to be portable, your format (C<fmt>) argument
+should use only the conversion specifiers defined by the ANSI C
+standard (C89, to play safe). These are C<aAbBcdHIjmMpSUwWxXyYZ%>.
+But even then, the results of some of the conversion specifiers are
+non-portable. For example, the specifiers C<aAbBcpZ> change according
+to the locale settings of the user, and both how to set locales (the
+locale names) and what output to expect are non-standard.
+The specifier C<c> changes according to the timezone settings of the
+user and the timezone computation rules of the operating system.
+The C<Z> specifier is notoriously unportable since the names of
+timezones are non-standard. Sticking to the numeric specifiers is the
+safest route.
+
+The return values are made consistent as though by calling C<mktime()>
+before they are returned, if all of the C<mday>, C<mon> and C<year> fields
+are valid.
+
+The string for Tuesday, December 12, 1995.
+
+ @time = POSIX::strptime( "Tuesday, December 12, 1995",
+ "%A, %B %d, %Y", 0, 0, 0 );
+
+ local $, = ", ";
+ print @time, "\n";
+
+If the input string is not valid, or not consumed completely by the format,
+then an error occurs; indicated by C<strptime()> returning an empty list.
+
+By passing a reference to a string as the value to parse, C<strptime()> will
+use the C<pos()> position to start the parse, and to return the position where
+it finished. In this situation, it is not an error if the entire input is not
+consumed by the format.
+
+ $str = "18:05:29 is the time";
+ @time = POSIX::strptime( \$str, "%H:%M:%S" );
+ local $, = ", ";
+ print @time[0..2], "\n";
+ print pos($str) . "\n";
+
=item strrchr
strrchr() is C-specific, see L<perlfunc/rindex> instead.
diff --git a/ext/POSIX/t/export.t b/ext/POSIX/t/export.t
index 07d428eb1a..0753178f63 100644
--- a/ext/POSIX/t/export.t
+++ b/ext/POSIX/t/export.t
@@ -102,8 +102,8 @@ my %expect = (
getpgrp getppid getpwnam getpwuid gmtime kill lchown link
localtime log mkdir nice open opendir pipe printf rand
read readdir rename rewinddir rmdir sin sleep sprintf sqrt
- srand stat system time times umask unlink utime wait
- waitpid write)],
+ srand stat strptime system time times umask unlink utime
+ wait waitpid write)],
);
plan (tests => 2 * keys %expect);
diff --git a/ext/POSIX/t/time.t b/ext/POSIX/t/time.t
index 90b54caa47..f6954b3695 100644
--- a/ext/POSIX/t/time.t
+++ b/ext/POSIX/t/time.t
@@ -4,7 +4,7 @@ use strict;
use Config;
use POSIX;
-use Test::More tests => 19;
+use Test::More tests => 41;
# go to UTC to avoid DST issues around the world when testing. SUS3 says that
# null should get you UTC, but some environments want the explicit names.
@@ -68,6 +68,86 @@ is(ord strftime($ss, POSIX::localtime(time)),
223, 'Format string has correct character');
unlike($ss, qr/\w/, 'Still not internally UTF-8 encoded');
+my @time = POSIX::strptime("2011-12-18 12:34:56", "%Y-%m-%d %H:%M:%S");
+is_deeply(\@time, [56, 34, 12, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all 6 fields');
+
+@time = POSIX::strptime("2011-12-18", "%Y-%m-%d", 1, 23, 4);
+is_deeply(\@time, [1, 23, 4, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all date fields with passed time');
+
+@time = POSIX::strptime("2011-12-18", "%Y-%m-%d");
+is_deeply(\@time, [undef, undef, undef, 18, 12-1, 2011-1900, 0, 351, 0], 'strptime() all date fields with no time');
+
+# tm_year == 6 => 1906, which is a negative time_t. Lets use 106 as 2006 instead
+@time = POSIX::strptime("12:34:56", "%H:%M:%S", 1, 2, 3, 4, 5, 106);
+is_deeply(\@time, [56, 34, 12, 4, 5, 106, 0, 154, 1], 'strptime() all time fields with passed date');
+
+@time = POSIX::strptime("July 4", "%b %d");
+is_deeply([@time[3,4]], [4, 7-1], 'strptime() partial yields correct mday/mon');
+
+@time = POSIX::strptime("Foobar", "%H:%M:%S");
+is(scalar @time, 0, 'strptime() invalid input yields empty list');
+
+my $str;
+@time = POSIX::strptime(\($str = "01:02:03"), "%H:%M:%S", -1,-1,-1, 1,0,70);
+is_deeply(\@time, [3, 2, 1, 1, 0, 70, 4, 0, 0], 'strptime() parses SCALAR ref');
+is(pos($str), 8, 'strptime() sets pos() magic on SCALAR ref');
+
+$str = "Text with 2012-12-01 datestamp";
+pos($str) = 10;
+@time = POSIX::strptime(\$str, "%Y-%m-%d", 0, 0, 0);
+is_deeply(\@time, [0, 0, 0, 1, 12-1, 2012-1900, 6, 335, 0], 'strptime() starts SCALAR ref at pos()');
+is(pos($str), 20, 'strptime() updates pos() magic on SCALAR ref');
+
+{
+ # Latin-1 vs. UTF-8 strings
+ my $date = "2012\x{e9}02\x{e9}01";
+ utf8::upgrade my $date_U = $date;
+ my $fmt = "%Y\x{e9}%m\x{e9}%d";
+ utf8::upgrade my $fmt_U = $fmt;
+
+ my @want = (undef, undef, undef, 1, 2-1, 2012-1900, 3, 31, 0);
+
+ is_deeply([POSIX::strptime($date_U, $fmt )], \@want, 'strptime() UTF-8 date, legacy fmt');
+ is_deeply([POSIX::strptime($date, $fmt_U)], \@want, 'strptime() legacy date, UTF-8 fmt');
+ is_deeply([POSIX::strptime($date_U, $fmt_U)], \@want, 'strptime() UTF-8 date, UTF-8 fmt');
+
+ my $str = "\x{ea} $date \x{ea}";
+ pos($str) = 2;
+
+ is_deeply([POSIX::strptime(\$str, $fmt_U)], \@want, 'strptime() legacy data SCALAR ref, UTF-8 fmt');
+ is(pos($str), 12, 'pos() of legacy data SCALAR after strptime() UTF-8 fmt');
+
+ utf8::upgrade my $str_U = $str;
+ pos($str_U) = 2;
+
+ is_deeply([POSIX::strptime(\$str_U, $fmt)], \@want, 'strptime() UTF-8 data SCALAR ref, legacy fmt');
+ is(pos($str_U), 12, 'pos() of UTF-8 data SCALAR after strptime() legacy fmt');
+
+ # High (>U+FF) strings
+ my $date_UU = "2012\x{1234}02\x{1234}01";
+ my $fmt_UU = "%Y\x{1234}%m\x{1234}%d";
+
+ is_deeply([POSIX::strptime($date_UU, $fmt_UU)], \@want, 'strptime() on non-Latin-1 Unicode');
+}
+
+eval { POSIX::strptime({}, "format") };
+like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on HASH ref');
+
+eval { POSIX::strptime(\"boo", "format") };
+like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on const literal ref');
+
+eval { POSIX::strptime(qr/boo!/, "format") };
+like($@, qr/not a reference to a mutable scalar/, 'strptime() dies on Regexp');
+
+$str = bless [], "WithStringOverload";
+{
+ package WithStringOverload;
+ use overload '""' => sub { return "2012-02-01" };
+}
+
+@time = POSIX::strptime($str, "%Y-%m-%d", 0, 0, 0);
+is_deeply(\@time, [0, 0, 0, 1, 2-1, 2012-1900, 3, 31, 0], 'strptime() allows object with string overload');
+
setlocale(LC_TIME, $orig_loc) || die "Cannot setlocale() back to orig: $!";
# clock() seems to have different definitions of what it does between POSIX
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index fb4166b73f..07a684a49c 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -368,6 +368,14 @@ The only change is to fix a formatting error in the Pod.
L<Version::Requirements> has been upgraded from version 0.101021 to version 0.101022.
+=item *
+
+L<POSIX> has been upgraded from version 1.28 to version 1.29..
+
+It now has a wrapper for the C<strptime(3)> function, it's not
+exported on C<use POSIX;> due to backwards compatibility concerns, it
+has to be explicitly requested with C<use POSIX qw(strptime)>.
+
=back
=head2 Removed Modules and Pragmata