diff options
author | Michael Twomey <mick@twomeylee.name> | 2013-10-16 19:12:12 +0100 |
---|---|---|
committer | Michael Twomey <mick@twomeylee.name> | 2013-10-16 19:12:12 +0100 |
commit | e2f5a7a8ad904991b4ce3ff0091e5e1059272536 (patch) | |
tree | d04d05847fc57a690a2e5478114cda8640dc14c6 | |
parent | 2e60ca3636e739fb1b68e803a7698f55df2b42af (diff) | |
download | pyiso8601-e2f5a7a8ad904991b4ce3ff0091e5e1059272536.tar.gz |
Rewrote the regex, large parts of the code and tests
Added more tests to cover more of the spec and fixed many edge cases.
-rw-r--r-- | iso8601/iso8601.py | 112 | ||||
-rw-r--r-- | iso8601/test_iso8601.py | 32 |
2 files changed, 110 insertions, 34 deletions
diff --git a/iso8601/iso8601.py b/iso8601/iso8601.py index eee6741..a121e2e 100644 --- a/iso8601/iso8601.py +++ b/iso8601/iso8601.py @@ -8,12 +8,19 @@ datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.iso8601.Utc ...>) """ -from datetime import datetime, timedelta, tzinfo +from datetime import ( + datetime, + timedelta, + tzinfo +) +import logging import sys import re __all__ = ["parse_date", "ParseError"] +LOG = logging.getLogger(__name__) + if sys.version_info >= (3, 0, 0): _basestring = str else: @@ -21,11 +28,32 @@ else: # Adapted from http://delete.me.uk/2005/03/iso8601.html -ISO8601_REGEX = re.compile(r"(?P<year>[0-9]{4})(-(?P<month>[0-9]{1,2})(-(?P<day>[0-9]{1,2})" - r"((?P<separator>.)(?P<hour>[0-9]{2}):(?P<minute>[0-9]{2})(:(?P<second>[0-9]{2})(\.(?P<fraction>[0-9]+))?)?" - r"(?P<timezone>Z|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?" +ISO8601_REGEX = re.compile(r""" + (?P<year>[0-9]{4}) + (-{0,1}(?P<month>[0-9]{1,2})){1} + (-{0,1}(?P<day>[0-9]{1,2})){1} + ( + (?P<separator>[ T]) + (?P<hour>[0-9]{2}) + (:{0,1}(?P<minute>[0-9]{2})){0,1} + ( + :{0,1}(?P<second>[0-9]{1,2}) + (\.(?P<second_fraction>[0-9]+)){0,1} + ){0,1} + (?P<timezone> + Z + | + ( + (?P<tz_sign>[-+]) + (?P<tz_hour>[0-9]{2}) + :{0,1} + (?P<tz_minute>[0-9]{2}){0,1} + ) + ){0,1} + ){0,1} + """, + re.VERBOSE ) -TIMEZONE_REGEX = re.compile("(?P<prefix>[+-])(?P<hours>[0-9]{2}).(?P<minutes>[0-9]{2})") class ParseError(Exception): """Raised when there is a problem parsing a date string""" @@ -44,6 +72,7 @@ class Utc(tzinfo): def dst(self, dt): return ZERO + UTC = Utc() class FixedOffset(tzinfo): @@ -54,6 +83,17 @@ class FixedOffset(tzinfo): self.__offset = timedelta(hours=offset_hours, minutes=offset_minutes) self.__name = name + def __eq__(self, other): + if isinstance(other, FixedOffset): + return ( + (other.__offset == self.__offset) + and + (other.__name == self.__name) + ) + if isinstance(other, tzinfo): + return other == self + return False + def utcoffset(self, dt): return self.__offset @@ -64,26 +104,42 @@ class FixedOffset(tzinfo): return ZERO def __repr__(self): - return "<FixedOffset %r>" % self.__name + return "<FixedOffset %r %r>" % (self.__name, self.__offset) + +def to_int(d, key, default_to_zero=False, default=None): + """Pull a value from the dict and convert to int + + :param default_to_zero: If the value is None or empty, treat it as zero + :param default: If the value is missing in the dict use this default -def parse_timezone(tzstring, default_timezone=UTC): + """ + value = d.get(key, default) + LOG.debug("Got %r for %r with default %r", value, key, default) + if (value in ["", None]) and default_to_zero: + return 0 + if value is None: + raise ParseError("Unable to read %s from %s" % (key, d)) + return int(value) + +def parse_timezone(matches, default_timezone=UTC): """Parses ISO 8601 time zone specs into tzinfo offsets """ - if tzstring == "Z": + + if matches["timezone"] == "Z": return default_timezone # This isn't strictly correct, but it's common to encounter dates without # timezones so I'll assume the default (which defaults to UTC). # Addresses issue 4. - if tzstring is None: + if matches["timezone"] is None: return default_timezone - m = TIMEZONE_REGEX.match(tzstring) - prefix, hours, minutes = m.groups() - hours, minutes = int(hours), int(minutes) - if prefix == "-": + sign = matches["tz_sign"] + hours = to_int(matches, "tz_hour") + minutes = to_int(matches, "tz_minute", default_to_zero=True) + description = "%s%02d:%02d" % (sign, hours, minutes) + if sign == "-": hours = -hours - minutes = -minutes - return FixedOffset(hours, minutes, tzstring) + return FixedOffset(hours, minutes, description) def parse_date(datestring, default_timezone=UTC): """Parses ISO 8601 dates into datetime objects @@ -99,16 +155,22 @@ def parse_date(datestring, default_timezone=UTC): if not m: raise ParseError("Unable to parse date string %r" % datestring) groups = m.groupdict() - tz = parse_timezone(groups["timezone"], default_timezone=default_timezone) - if groups["fraction"] is None: - groups["fraction"] = 0 - else: - groups["fraction"] = int(float("0.%s" % groups["fraction"]) * 1e6) - if groups["second"] is None: - groups["second"] = 0 + LOG.debug("Parsed %s into %s with default timezone %s", datestring, groups, default_timezone) + + tz = parse_timezone(groups, default_timezone=default_timezone) + + groups["second_fraction"] = int(float("0.%s" % to_int(groups, "second_fraction", default_to_zero=True)) * 1e6) + try: - return datetime(int(groups["year"]), int(groups["month"]), int(groups["day"]), - int(groups["hour"]), int(groups["minute"]), int(groups["second"]), - int(groups["fraction"]), tz) + return datetime( + year=to_int(groups, "year"), + month=to_int(groups, "month"), + day=to_int(groups, "day"), + hour=to_int(groups, "hour"), + minute=to_int(groups, "minute", default_to_zero=True), + second=to_int(groups, "second", default_to_zero=True), + microsecond=groups["second_fraction"], + tzinfo=tz, + ) except Exception as e: raise ParseError(e) diff --git a/iso8601/test_iso8601.py b/iso8601/test_iso8601.py index 5e1aa7f..5b5e760 100644 --- a/iso8601/test_iso8601.py +++ b/iso8601/test_iso8601.py @@ -1,3 +1,4 @@ +# coding=UTF-8 from __future__ import absolute_import import datetime @@ -9,15 +10,10 @@ from iso8601 import iso8601 def test_iso8601_regex(): assert iso8601.ISO8601_REGEX.match("2006-10-11T00:14:33Z") -def test_timezone_regex(): - assert iso8601.TIMEZONE_REGEX.match("+01:00") - assert iso8601.TIMEZONE_REGEX.match("+00:00") - assert iso8601.TIMEZONE_REGEX.match("+01:20") - assert iso8601.TIMEZONE_REGEX.match("-01:00") - def test_parse_no_timezone_different_default(): tz = iso8601.FixedOffset(2, 0, "test offset") d = iso8601.parse_date("2007-01-01T08:00:00", default_timezone=tz) + assert d == datetime.datetime(2007, 1, 1, 8, 0, 0, 0, tz) assert d.tzinfo == tz @pytest.mark.parametrize("invalid_date", [ @@ -26,8 +22,9 @@ def test_parse_no_timezone_different_default(): ("",), (None,), ("23",), + ("131015T142533Z",), + ("131015",), ]) - def test_parse_invalid_date(invalid_date): with pytest.raises(iso8601.ParseError) as exc: iso8601.parse_date(invalid_date) @@ -41,7 +38,24 @@ def test_parse_invalid_date(invalid_date): ("2006-10-20T15:34:56Z", datetime.datetime(2006, 10, 20, 15, 34, 56, 0, iso8601.UTC)), ("2007-5-7T11:43:55.328Z'", datetime.datetime(2007, 5, 7, 11, 43, 55, 328000, iso8601.UTC)), # http://code.google.com/p/pyiso8601/issues/detail?id=6 ("2006-10-20T15:34:56.123Z", datetime.datetime(2006, 10, 20, 15, 34, 56, 123000, iso8601.UTC)), + ("2013-10-15T18:30Z", datetime.datetime(2013, 10, 15, 18, 30, 0, 0, iso8601.UTC)), + ("2013-10-15T22:30+04", datetime.datetime(2013, 10, 15, 22, 30, 0, 0, iso8601.FixedOffset(4, 0, "+04:00"))), # <time>±hh:mm + ("2013-10-15T1130-0700", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(-7, 0, "-07:00"))), # <time>±hhmm + ("2013-10-15T15:00-03:30", datetime.datetime(2013, 10, 15, 15, 0, 0, 0, iso8601.FixedOffset(-3, 30, "-03:30"))), # <time>±hh + ("2013-10-15T183123Z", datetime.datetime(2013, 10, 15, 18, 31, 23, 0, iso8601.UTC)), # hhmmss + ("2013-10-15T1831Z", datetime.datetime(2013, 10, 15, 18, 31, 0, 0, iso8601.UTC)), # hhmm + ("2013-10-15T18Z", datetime.datetime(2013, 10, 15, 18, 0, 0, 0, iso8601.UTC)), # hh + ("20131015T18:30Z", datetime.datetime(2013, 10, 15, 18, 30, 0, 0, iso8601.UTC)), # YYYYMMDD ]) - def test_parse_valid_date(valid_date, expected_datetime): - assert iso8601.parse_date(valid_date) == expected_datetime + parsed = iso8601.parse_date(valid_date) + assert parsed.year == expected_datetime.year + assert parsed.month == expected_datetime.month + assert parsed.day == expected_datetime.day + assert parsed.hour == expected_datetime.hour + assert parsed.minute == expected_datetime.minute + assert parsed.second == expected_datetime.second + assert parsed.microsecond == expected_datetime.microsecond + assert parsed.tzinfo == expected_datetime.tzinfo + assert parsed == expected_datetime + assert parsed.isoformat() == expected_datetime.isoformat() |