diff options
author | Inada Naoki <songofacandy@gmail.com> | 2019-12-11 23:48:16 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-12-11 23:48:16 +0900 |
commit | 2186455d1579affc33253484d9445f7bdf3f7c29 (patch) | |
tree | 4081e93d9815fb0a63a81e9d3a55b1f971b02ac6 /msgpack | |
parent | 5fd611909319d03200774ea3c7a6ae16dbd26c12 (diff) | |
download | msgpack-python-2186455d1579affc33253484d9445f7bdf3f7c29.tar.gz |
Support datetime. (#394)
Diffstat (limited to 'msgpack')
-rw-r--r-- | msgpack/_cmsgpack.pyx | 7 | ||||
-rw-r--r-- | msgpack/_packer.pyx | 19 | ||||
-rw-r--r-- | msgpack/_unpacker.pyx | 23 | ||||
-rw-r--r-- | msgpack/ext.py | 27 | ||||
-rw-r--r-- | msgpack/fallback.py | 48 | ||||
-rw-r--r-- | msgpack/unpack.h | 72 |
6 files changed, 175 insertions, 21 deletions
diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx index 8ebdbf5..1faaac3 100644 --- a/msgpack/_cmsgpack.pyx +++ b/msgpack/_cmsgpack.pyx @@ -1,4 +1,11 @@ # coding: utf-8 #cython: embedsignature=True, c_string_encoding=ascii, language_level=3 +from cpython.datetime cimport import_datetime, datetime_new +import_datetime() + +import datetime +cdef object utc = datetime.timezone.utc +cdef object epoch = datetime_new(1970, 1, 1, 0, 0, 0, 0, tz=utc) + include "_packer.pyx" include "_unpacker.pyx" diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 1426439..b470646 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -2,6 +2,10 @@ from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact +from cpython.datetime cimport ( + PyDateTime_CheckExact, PyDelta_CheckExact, + datetime_tzinfo, timedelta_days, timedelta_seconds, timedelta_microseconds, +) cdef ExtType cdef Timestamp @@ -99,8 +103,9 @@ cdef class Packer(object): cdef object _berrors cdef const char *unicode_errors cdef bint strict_types - cdef bool use_float + cdef bint use_float cdef bint autoreset + cdef bint datetime def __cinit__(self): cdef int buf_size = 1024*1024 @@ -110,12 +115,13 @@ cdef class Packer(object): self.pk.buf_size = buf_size self.pk.length = 0 - def __init__(self, *, default=None, unicode_errors=None, + def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False): + bint strict_types=False, bint datetime=False, unicode_errors=None): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset + self.datetime = datetime self.pk.use_bin_type = use_bin_type if default is not None: if not PyCallable_Check(default): @@ -262,6 +268,13 @@ cdef class Packer(object): if ret == 0: ret = msgpack_pack_raw_body(&self.pk, <char*>view.buf, L) PyBuffer_Release(&view); + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * <long long>(24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + ret = msgpack_pack_timestamp(&self.pk, llval, ulval) elif not default_used and self._default: o = self._default(o) default_used = 1 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 0ff633b..43c93a2 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,7 +1,6 @@ # coding: utf-8 from cpython cimport * - cdef extern from "Python.h": ctypedef struct PyObject cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 @@ -21,6 +20,8 @@ from .exceptions import ( ) from .ext import ExtType, Timestamp +cdef object giga = 1_000_000_000 + cdef extern from "unpack.h": ctypedef struct msgpack_user: @@ -28,10 +29,13 @@ cdef extern from "unpack.h": bint raw bint has_pairs_hook # call object_hook with k-v pairs bint strict_map_key + int timestamp PyObject* object_hook PyObject* list_hook PyObject* ext_hook PyObject* timestamp_t + PyObject *giga; + PyObject *utc; char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len @@ -57,7 +61,8 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, bint raw, bint strict_map_key, + bint use_list, bint raw, int timestamp, + bint strict_map_key, const char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, @@ -99,8 +104,14 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("ext_hook must be a callable.") ctx.user.ext_hook = <PyObject*>ext_hook + if timestamp < 0 or 3 < timestamp: + raise ValueError("timestamp must be 0..3") + # Add Timestamp type to the user object so it may be used in unpack.h + ctx.user.timestamp = timestamp ctx.user.timestamp_t = <PyObject*>Timestamp + ctx.user.giga = <PyObject*>giga + ctx.user.utc = <PyObject*>utc ctx.user.unicode_errors = unicode_errors def default_read_extended_type(typecode, data): @@ -131,7 +142,7 @@ cdef inline int get_data_from_buffer(object obj, def unpackb(object packed, *, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw=False, bint strict_map_key=True, + bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=-1, @@ -179,7 +190,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, try: init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, raw, strict_map_key, cerr, + use_list, raw, timestamp, strict_map_key, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: @@ -304,7 +315,7 @@ cdef class Unpacker(object): self.buf = NULL def __init__(self, file_like=None, *, Py_ssize_t read_size=0, - bint use_list=True, bint raw=False, bint strict_map_key=True, + bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024, object ext_hook=ExtType, @@ -359,7 +370,7 @@ cdef class Unpacker(object): cerr = unicode_errors init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, raw, strict_map_key, cerr, + ext_hook, use_list, raw, timestamp, strict_map_key, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) diff --git a/msgpack/ext.py b/msgpack/ext.py index c7efff6..09adb34 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,12 +1,18 @@ # coding: utf-8 from collections import namedtuple +import datetime import sys import struct PY2 = sys.version_info[0] == 2 + if not PY2: long = int + try: + _utc = datetime.timezone.utc + except AttributeError: + _utc = datetime.timezone(datetime.timedelta(0)) class ExtType(namedtuple("ExtType", "code data")): @@ -131,7 +137,7 @@ class Timestamp(object): data = struct.pack("!Iq", self.nanoseconds, self.seconds) return data - def to_float_s(self): + def to_float(self): """Get the timestamp as a floating-point value. :returns: posix timestamp @@ -139,6 +145,12 @@ class Timestamp(object): """ return self.seconds + self.nanoseconds / 1e9 + @staticmethod + def from_float(unix_float): + seconds = int(unix_float) + nanoseconds = int((unix_float % 1) * 1000000000) + return Timestamp(seconds, nanoseconds) + def to_unix_ns(self): """Get the timestamp as a unixtime in nanoseconds. @@ -146,3 +158,16 @@ class Timestamp(object): :rtype: int """ return int(self.seconds * 1e9 + self.nanoseconds) + + if not PY2: + + def to_datetime(self): + """Get the timestamp as a UTC datetime. + + :rtype: datetime. + """ + return datetime.datetime.fromtimestamp(self.to_float(), _utc) + + @staticmethod + def from_datetime(dt): + return Timestamp.from_float(dt.timestamp()) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f6ba424..9ba98bf 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,5 +1,6 @@ """Fallback pure Python implementation of msgpack""" +from datetime import datetime as _DateTime import sys import struct @@ -174,6 +175,14 @@ class Unpacker(object): If true, unpack msgpack raw to Python bytes. Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + :param int timestamp: + Control how timestamp type is unpacked: + + 0 - Tiemstamp + 1 - float (Seconds from the EPOCH) + 2 - int (Nanoseconds from the EPOCH) + 3 - datetime.datetime (UTC). Python 2 is not supported. + :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. @@ -248,6 +257,7 @@ class Unpacker(object): read_size=0, use_list=True, raw=False, + timestamp=0, strict_map_key=True, object_hook=None, object_pairs_hook=None, @@ -307,6 +317,9 @@ class Unpacker(object): self._strict_map_key = bool(strict_map_key) self._unicode_errors = unicode_errors self._use_list = use_list + if not (0 <= timestamp <= 3): + raise ValueError("timestamp must be 0..3") + self._timestamp = timestamp self._list_hook = list_hook self._object_hook = object_hook self._object_pairs_hook = object_pairs_hook @@ -672,10 +685,21 @@ class Unpacker(object): else: obj = obj.decode("utf_8", self._unicode_errors) return obj - if typ == TYPE_EXT: - return self._ext_hook(n, bytes(obj)) if typ == TYPE_BIN: return bytes(obj) + if typ == TYPE_EXT: + if n == -1: # timestamp + ts = Timestamp.from_bytes(bytes(obj)) + if self._timestamp == 1: + return ts.to_float() + elif self._timestamp == 2: + return ts.to_unix_ns() + elif self._timestamp == 3: + return ts.to_datetime() + else: + return ts + else: + return self._ext_hook(n, bytes(obj)) assert typ == TYPE_IMMEDIATE return obj @@ -756,6 +780,12 @@ class Packer(object): This is useful when trying to implement accurate serialization for python types. + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unapcker. + (Python 2 is not supported). + :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. @@ -764,18 +794,22 @@ class Packer(object): def __init__( self, default=None, - unicode_errors=None, use_single_float=False, autoreset=True, use_bin_type=True, strict_types=False, + datetime=False, + unicode_errors=None, ): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._unicode_errors = unicode_errors or "strict" self._buffer = StringIO() + if PY2 and datetime: + raise ValueError("datetime is not supported in Python 2") + self._datetime = bool(datetime) + self._unicode_errors = unicode_errors or "strict" if default is not None: if not callable(default): raise TypeError("default must be callable") @@ -891,6 +925,12 @@ class Packer(object): return self._pack_map_pairs( len(obj), dict_iteritems(obj), nest_limit - 1 ) + + if self._datetime and check(obj, _DateTime): + obj = Timestamp.from_datetime(obj) + default_used = 1 + continue + if not default_used and self._default is not None: obj = self._default(obj) default_used = 1 diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 4380ec5..debdf71 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -24,10 +24,13 @@ typedef struct unpack_user { bool raw; bool has_pairs_hook; bool strict_map_key; + int timestamp; PyObject *object_hook; PyObject *list_hook; PyObject *ext_hook; PyObject *timestamp_t; + PyObject *giga; + PyObject *utc; const char *unicode_errors; Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; } unpack_user; @@ -268,7 +271,7 @@ typedef struct msgpack_timestamp { /* * Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h. */ -static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { +static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { switch (buflen) { case 4: ts->tv_nsec = 0; @@ -292,10 +295,11 @@ static inline int unpack_timestamp(const char* buf, unsigned int buflen, msgpack } } -static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, - unsigned int length, msgpack_unpack_object* o) +#include "datetime.h" + +static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int length, msgpack_unpack_object* o) { - PyObject *py; int8_t typecode = (int8_t)*pos++; if (!u->ext_hook) { PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); @@ -305,13 +309,67 @@ static inline int unpack_callback_ext(unpack_user* u, const char* base, const ch PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); return -1; } + + PyObject *py = NULL; // length also includes the typecode, so the actual data is length-1 if (typecode == -1) { msgpack_timestamp ts; - if (unpack_timestamp(pos, length-1, &ts) == 0) { + if (unpack_timestamp(pos, length-1, &ts) < 0) { + return -1; + } + + if (u->timestamp == 2) { // int + PyObject *a = PyLong_FromLongLong(ts.tv_sec); + if (a == NULL) return -1; + + PyObject *c = PyNumber_Multiply(a, u->giga); + Py_DECREF(a); + if (c == NULL) { + return -1; + } + + PyObject *b = PyLong_FromUnsignedLong(ts.tv_nsec); + if (b == NULL) { + Py_DECREF(c); + return -1; + } + + py = PyNumber_Add(c, b); + Py_DECREF(c); + Py_DECREF(b); + } + else if (u->timestamp == 0) { // Timestamp py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); - } else { - py = NULL; + } + else { // float or datetime + PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec); + if (a == NULL) return -1; + + PyObject *b = PyNumber_TrueDivide(a, u->giga); + Py_DECREF(a); + if (b == NULL) return -1; + + PyObject *c = PyLong_FromLongLong(ts.tv_sec); + if (c == NULL) { + Py_DECREF(b); + return -1; + } + + a = PyNumber_Add(b, c); + Py_DECREF(b); + Py_DECREF(c); + + if (u->timestamp == 3) { // datetime + PyObject *t = PyTuple_Pack(2, a, u->utc); + Py_DECREF(a); + if (t == NULL) { + return -1; + } + py = PyDateTime_FromTimestamp(t); + Py_DECREF(t); + } else { // float + py = a; + } } } else { py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); |