summaryrefslogtreecommitdiff
path: root/msgpack
diff options
context:
space:
mode:
authorJoel Nothman <joel.nothman@gmail.com>2012-10-04 11:31:40 +1000
committerJoel Nothman <joel.nothman@gmail.com>2012-10-04 11:31:40 +1000
commitdf4f23779d14f2b41e9a5ecca0a06e21385cc603 (patch)
treebf03928d8eb40e9f5391eb49676001a32b83f39e /msgpack
parente7c51d9089e9270ce197c00a6af1c60e45f36e97 (diff)
parentd5f99959cc2ec393c13fc9e44714351272bac7fc (diff)
downloadmsgpack-python-df4f23779d14f2b41e9a5ecca0a06e21385cc603.tar.gz
Merge commit 'd5f9995' into read_bytes
Conflicts: msgpack/_msgpack.pyx
Diffstat (limited to 'msgpack')
-rw-r--r--msgpack/_msgpack.pyx205
-rw-r--r--msgpack/_version.py2
-rw-r--r--msgpack/unpack.h31
-rw-r--r--msgpack/unpack_template.h104
4 files changed, 236 insertions, 106 deletions
diff --git a/msgpack/_msgpack.pyx b/msgpack/_msgpack.pyx
index 30fb9fc..728e4f3 100644
--- a/msgpack/_msgpack.pyx
+++ b/msgpack/_msgpack.pyx
@@ -1,12 +1,16 @@
# coding: utf-8
#cython: embedsignature=True
+import warnings
+
from cpython cimport *
cdef extern from "Python.h":
ctypedef char* const_char_ptr "const char*"
ctypedef char* const_void_ptr "const void*"
ctypedef struct PyObject
cdef int PyObject_AsReadBuffer(object o, const_void_ptr* buff, Py_ssize_t* buf_len) except -1
+ char* __FILE__
+ int __LINE__
from libc.stdlib cimport *
from libc.string cimport *
@@ -139,11 +143,19 @@ cdef class Packer(object):
ret = msgpack_pack_raw(&self.pk, len(o))
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
- elif PyDict_Check(o):
+ elif PyDict_CheckExact(o):
d = <dict>o
ret = msgpack_pack_map(&self.pk, len(d))
if ret == 0:
- for k,v in d.iteritems():
+ for k, v in d.iteritems():
+ ret = self._pack(k, nest_limit-1)
+ if ret != 0: break
+ ret = self._pack(v, nest_limit-1)
+ if ret != 0: break
+ elif PyDict_Check(o):
+ ret = msgpack_pack_map(&self.pk, len(o))
+ if ret == 0:
+ for k, v in o.items():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
@@ -170,6 +182,17 @@ cdef class Packer(object):
self.pk.length = 0
return buf
+ cpdef pack_array_header(self, size_t size):
+ msgpack_pack_array(&self.pk, size)
+ buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
+ self.pk.length = 0
+ return buf
+
+ cpdef pack_map_header(self, size_t size):
+ msgpack_pack_map(&self.pk, size)
+ buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
+ self.pk.length = 0
+ return buf
def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'):
"""
@@ -187,8 +210,9 @@ def packb(object o, default=None, encoding='utf-8', unicode_errors='strict', use
cdef extern from "unpack.h":
ctypedef struct msgpack_user:
- int use_list
+ bint use_list
PyObject* object_hook
+ bint has_pairs_hook # call object_hook with k-v pairs
PyObject* list_hook
char *encoding
char *unicode_errors
@@ -200,71 +224,98 @@ cdef extern from "unpack.h":
unsigned int ct
PyObject* key
- int template_execute(template_context* ctx, const_char_ptr data,
- size_t len, size_t* off) except -1
+ ctypedef int (*execute_fn)(template_context* ctx, const_char_ptr data,
+ size_t len, size_t* off) except -1
+ execute_fn template_construct
+ execute_fn template_skip
+ execute_fn read_array_header
+ execute_fn read_map_header
void template_init(template_context* ctx)
object template_data(template_context* ctx)
+cdef inline init_ctx(template_context *ctx, object object_hook, object object_pairs_hook, object list_hook, bint use_list, encoding, unicode_errors):
+ template_init(ctx)
+ ctx.user.use_list = use_list
+ ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
+
+ if object_hook is not None and object_pairs_hook is not None:
+ raise ValueError("object_pairs_hook and object_hook are mutually exclusive.")
+
+ if object_hook is not None:
+ if not PyCallable_Check(object_hook):
+ raise TypeError("object_hook must be a callable.")
+ ctx.user.object_hook = <PyObject*>object_hook
+
+ if object_pairs_hook is None:
+ ctx.user.has_pairs_hook = False
+ else:
+ if not PyCallable_Check(object_pairs_hook):
+ raise TypeError("object_pairs_hook must be a callable.")
+ ctx.user.object_hook = <PyObject*>object_pairs_hook
+ ctx.user.has_pairs_hook = True
+
+ if list_hook is not None:
+ if not PyCallable_Check(list_hook):
+ raise TypeError("list_hook must be a callable.")
+ ctx.user.list_hook = <PyObject*>list_hook
+
+ if encoding is None:
+ ctx.user.encoding = NULL
+ ctx.user.unicode_errors = NULL
+ else:
+ if isinstance(encoding, unicode):
+ _bencoding = encoding.encode('ascii')
+ else:
+ _bencoding = encoding
+ ctx.user.encoding = PyBytes_AsString(_bencoding)
+ if isinstance(unicode_errors, unicode):
+ _berrors = unicode_errors.encode('ascii')
+ else:
+ _berrors = unicode_errors
+ ctx.user.unicode_errors = PyBytes_AsString(_berrors)
def unpackb(object packed, object object_hook=None, object list_hook=None,
- bint use_list=0, encoding=None, unicode_errors="strict",
+ bint use_list=1, encoding=None, unicode_errors="strict",
+ object_pairs_hook=None,
):
+ """Unpack packed_bytes to object. Returns an unpacked object.
+
+ Raises `ValueError` when `packed` contains extra bytes.
"""
- Unpack packed_bytes to object. Returns an unpacked object."""
cdef template_context ctx
cdef size_t off = 0
cdef int ret
cdef char* buf
cdef Py_ssize_t buf_len
- PyObject_AsReadBuffer(packed, <const_void_ptr*>&buf, &buf_len)
- if encoding is None:
- enc = NULL
- err = NULL
- else:
- if isinstance(encoding, unicode):
- bencoding = encoding.encode('ascii')
- else:
- bencoding = encoding
- if isinstance(unicode_errors, unicode):
- berrors = unicode_errors.encode('ascii')
- else:
- berrors = unicode_errors
- enc = PyBytes_AsString(bencoding)
- err = PyBytes_AsString(berrors)
+ PyObject_AsReadBuffer(packed, <const_void_ptr*>&buf, &buf_len)
- template_init(&ctx)
- ctx.user.use_list = use_list
- ctx.user.object_hook = ctx.user.list_hook = NULL
- ctx.user.encoding = <const_char_ptr>enc
- ctx.user.unicode_errors = <const_char_ptr>err
- if object_hook is not None:
- if not PyCallable_Check(object_hook):
- raise TypeError("object_hook must be a callable.")
- ctx.user.object_hook = <PyObject*>object_hook
- if list_hook is not None:
- if not PyCallable_Check(list_hook):
- raise TypeError("list_hook must be a callable.")
- ctx.user.list_hook = <PyObject*>list_hook
- ret = template_execute(&ctx, buf, buf_len, &off)
+ init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, encoding, unicode_errors)
+ ret = template_construct(&ctx, buf, buf_len, &off)
if ret == 1:
- return template_data(&ctx)
+ obj = template_data(&ctx)
+ if off < buf_len:
+ raise ValueError("Extra data.")
+ return obj
else:
return None
def unpack(object stream, object object_hook=None, object list_hook=None,
- bint use_list=0, encoding=None, unicode_errors="strict",
+ bint use_list=1, encoding=None, unicode_errors="strict",
+ object_pairs_hook=None,
):
- """
- unpack an object from stream.
+ """Unpack an object from `stream`.
+
+ Raises `ValueError` when `stream` has extra bytes.
"""
return unpackb(stream.read(), use_list=use_list,
- object_hook=object_hook, list_hook=list_hook,
+ object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook,
encoding=encoding, unicode_errors=unicode_errors,
)
+
cdef class Unpacker(object):
"""
Streaming unpacker.
@@ -277,10 +328,13 @@ cdef class Unpacker(object):
(default: min(1024**2, max_buffer_size))
If `use_list` is true, msgpack list is deserialized to Python list.
- Otherwise, it is deserialized to Python tuple. (default: False)
+ Otherwise, it is deserialized to Python tuple.
`object_hook` is same to simplejson. If it is not None, it should be callable
- and Unpacker calls it when deserializing key-value.
+ and Unpacker calls it with a dict argument after deserializing a map.
+
+ `object_pairs_hook` is same to simplejson. If it is not None, it should be callable
+ and Unpacker calls it with a list of key-value pairs after deserializing a map.
`encoding` is encoding used for decoding msgpack bytes. If it is None (default),
msgpack bytes is deserialized to Python bytes.
@@ -315,7 +369,6 @@ cdef class Unpacker(object):
cdef object file_like
cdef object file_like_read
cdef Py_ssize_t read_size
- cdef bint use_list
cdef object object_hook
cdef object _bencoding
cdef object _berrors
@@ -330,10 +383,10 @@ cdef class Unpacker(object):
free(self.buf)
self.buf = NULL
- def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=0,
- object object_hook=None, object list_hook=None,
- encoding=None, unicode_errors='strict', int max_buffer_size=0):
- self.use_list = use_list
+ def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
+ object object_hook=None, object object_pairs_hook=None, object list_hook=None,
+ encoding=None, unicode_errors='strict', int max_buffer_size=0,
+ ):
self.file_like = file_like
if file_like:
self.file_like_read = file_like.read
@@ -353,31 +406,7 @@ cdef class Unpacker(object):
self.buf_size = read_size
self.buf_head = 0
self.buf_tail = 0
- template_init(&self.ctx)
- self.ctx.user.use_list = use_list
- self.ctx.user.object_hook = self.ctx.user.list_hook = <PyObject*>NULL
- if object_hook is not None:
- if not PyCallable_Check(object_hook):
- raise TypeError("object_hook must be a callable.")
- self.ctx.user.object_hook = <PyObject*>object_hook
- if list_hook is not None:
- if not PyCallable_Check(list_hook):
- raise TypeError("list_hook must be a callable.")
- self.ctx.user.list_hook = <PyObject*>list_hook
- if encoding is None:
- self.ctx.user.encoding = NULL
- self.ctx.user.unicode_errors = NULL
- else:
- if isinstance(encoding, unicode):
- self._bencoding = encoding.encode('ascii')
- else:
- self._bencoding = encoding
- self.ctx.user.encoding = PyBytes_AsString(self._bencoding)
- if isinstance(unicode_errors, unicode):
- self._berrors = unicode_errors.encode('ascii')
- else:
- self._berrors = unicode_errors
- self.ctx.user.unicode_errors = PyBytes_AsString(self._berrors)
+ init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, encoding, unicode_errors)
def feed(self, object next_bytes):
cdef char* buf
@@ -438,20 +467,20 @@ cdef class Unpacker(object):
else:
self.file_like = None
- cpdef unpack(self):
- """unpack one object"""
+ cdef object _unpack(self, execute_fn execute):
cdef int ret
+ cdef object obj
while 1:
- ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head)
+ ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head)
if ret == 1:
- o = template_data(&self.ctx)
+ obj = template_data(&self.ctx)
template_init(&self.ctx)
- return o
+ return obj
elif ret == 0:
if self.file_like is not None:
self.read_from_file()
continue
- raise StopIteration("No more unpack data.")
+ raise StopIteration("No more data to unpack.")
else:
raise ValueError("Unpack failed: error = %d" % (ret,))
@@ -465,11 +494,27 @@ cdef class Unpacker(object):
ret += self.file_like.read(nbytes - len(ret))
return ret
+ def unpack(self):
+ """unpack one object"""
+ return self._unpack(template_construct)
+
+ def skip(self):
+ """read and ignore one object, returning None"""
+ return self._unpack(template_skip)
+
+ def read_array_header(self):
+ """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents."""
+ return self._unpack(read_array_header)
+
+ def read_map_header(self):
+ """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs."""
+ return self._unpack(read_map_header)
+
def __iter__(self):
return self
def __next__(self):
- return self.unpack()
+ return self._unpack(template_construct)
# for debug.
#def _buf(self):
diff --git a/msgpack/_version.py b/msgpack/_version.py
index f343b7a..e750485 100644
--- a/msgpack/_version.py
+++ b/msgpack/_version.py
@@ -1 +1 @@
-version = (0, 2, 2)
+version = (0, 3, 0, 'dev1')
diff --git a/msgpack/unpack.h b/msgpack/unpack.h
index a106f9c..5ec7dbc 100644
--- a/msgpack/unpack.h
+++ b/msgpack/unpack.h
@@ -22,6 +22,7 @@
typedef struct unpack_user {
int use_list;
PyObject *object_hook;
+ bool has_pairs_hook;
PyObject *list_hook;
const char *encoding;
const char *unicode_errors;
@@ -41,6 +42,7 @@ typedef struct unpack_user {
#define msgpack_unpack_user unpack_user
+typedef int (*execute_fn)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off);
struct template_context;
typedef struct template_context template_context;
@@ -160,9 +162,7 @@ static inline int template_callback_array_item(unpack_user* u, unsigned int curr
static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_object* c)
{
if (u->list_hook) {
- PyObject *arglist = Py_BuildValue("(O)", *c);
- PyObject *new_c = PyEval_CallObject(u->list_hook, arglist);
- Py_DECREF(arglist);
+ PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c);
Py_DECREF(*c);
*c = new_c;
}
@@ -171,16 +171,31 @@ static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_obj
static inline int template_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o)
{
- PyObject *p = PyDict_New();
+ PyObject *p;
+ if (u->has_pairs_hook) {
+ p = PyList_New(n); // Or use tuple?
+ }
+ else {
+ p = PyDict_New();
+ }
if (!p)
return -1;
*o = p;
return 0;
}
-static inline int template_callback_map_item(unpack_user* u, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v)
+static inline int template_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v)
{
- if (PyDict_SetItem(*c, k, v) == 0) {
+ if (u->has_pairs_hook) {
+ msgpack_unpack_object item = PyTuple_Pack(2, k, v);
+ if (!item)
+ return -1;
+ Py_DECREF(k);
+ Py_DECREF(v);
+ PyList_SET_ITEM(*c, current, item);
+ return 0;
+ }
+ else if (PyDict_SetItem(*c, k, v) == 0) {
Py_DECREF(k);
Py_DECREF(v);
return 0;
@@ -191,9 +206,7 @@ static inline int template_callback_map_item(unpack_user* u, msgpack_unpack_obje
static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_object* c)
{
if (u->object_hook) {
- PyObject *arglist = Py_BuildValue("(O)", *c);
- PyObject *new_c = PyEval_CallObject(u->object_hook, arglist);
- Py_DECREF(arglist);
+ PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c);
Py_DECREF(*c);
*c = new_c;
}
diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h
index b844a24..7d07601 100644
--- a/msgpack/unpack_template.h
+++ b/msgpack/unpack_template.h
@@ -95,6 +95,7 @@ msgpack_unpack_func(msgpack_unpack_object, _data)(msgpack_unpack_struct(_context
}
+template <bool construct>
msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off)
{
assert(len >= *off);
@@ -117,14 +118,17 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c
int ret;
+#define construct_cb(name) \
+ construct && msgpack_unpack_callback(name)
+
#define push_simple_value(func) \
- if(msgpack_unpack_callback(func)(user, &obj) < 0) { goto _failed; } \
+ if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \
goto _push
#define push_fixed_value(func, arg) \
- if(msgpack_unpack_callback(func)(user, arg, &obj) < 0) { goto _failed; } \
+ if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \
goto _push
#define push_variable_value(func, base, pos, len) \
- if(msgpack_unpack_callback(func)(user, \
+ if(construct_cb(func)(user, \
(const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \
goto _push
@@ -140,9 +144,9 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c
#define start_container(func, count_, ct_) \
if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \
- if(msgpack_unpack_callback(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \
+ if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \
if((count_) == 0) { obj = stack[top].obj; \
- msgpack_unpack_callback(func##_end)(user, &obj); \
+ construct_cb(func##_end)(user, &obj); \
goto _push; } \
stack[top].ct = ct_; \
stack[top].size = count_; \
@@ -340,10 +344,10 @@ _push:
c = &stack[top-1];
switch(c->ct) {
case CT_ARRAY_ITEM:
- if(msgpack_unpack_callback(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; }
+ if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; }
if(++c->count == c->size) {
obj = c->obj;
- msgpack_unpack_callback(_array_end)(user, &obj);
+ construct_cb(_array_end)(user, &obj);
--top;
/*printf("stack pop %d\n", top);*/
goto _push;
@@ -354,10 +358,10 @@ _push:
c->ct = CT_MAP_VALUE;
goto _header_again;
case CT_MAP_VALUE:
- if(msgpack_unpack_callback(_map_item)(user, &c->obj, c->map_key, obj) < 0) { goto _failed; }
+ if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; }
if(++c->count == c->size) {
obj = c->obj;
- msgpack_unpack_callback(_map_end)(user, &obj);
+ construct_cb(_map_end)(user, &obj);
--top;
/*printf("stack pop %d\n", top);*/
goto _push;
@@ -377,6 +381,8 @@ _header_again:
_finish:
+ if (!construct)
+ msgpack_unpack_callback(_nil)(user, &obj);
stack[0].obj = obj;
++p;
ret = 1;
@@ -399,15 +405,13 @@ _end:
*off = p - (const unsigned char*)data;
return ret;
+#undef construct_cb
}
-
-#undef msgpack_unpack_func
-#undef msgpack_unpack_callback
-#undef msgpack_unpack_struct
-#undef msgpack_unpack_object
-#undef msgpack_unpack_user
-
+#undef SWITCH_RANGE_BEGIN
+#undef SWITCH_RANGE
+#undef SWITCH_RANGE_DEFAULT
+#undef SWITCH_RANGE_END
#undef push_simple_value
#undef push_fixed_value
#undef push_variable_value
@@ -415,6 +419,74 @@ _end:
#undef again_fixed_trail_if_zero
#undef start_container
+template <unsigned int fixed_offset, unsigned int var_offset>
+msgpack_unpack_func(int, _container_header)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off)
+{
+ assert(len >= *off);
+ uint32_t size;
+ const unsigned char *const p = (unsigned char*)data + *off;
+
+#define inc_offset(inc) \
+ if (len - *off < inc) \
+ return 0; \
+ *off += inc;
+
+ switch (*p) {
+ case var_offset:
+ inc_offset(3);
+ size = _msgpack_load16(uint16_t, p + 1);
+ break;
+ case var_offset + 1:
+ inc_offset(5);
+ size = _msgpack_load32(uint32_t, p + 1);
+ break;
+#ifdef USE_CASE_RANGE
+ case fixed_offset + 0x0 ... fixed_offset + 0xf:
+#else
+ case fixed_offset + 0x0:
+ case fixed_offset + 0x1:
+ case fixed_offset + 0x2:
+ case fixed_offset + 0x3:
+ case fixed_offset + 0x4:
+ case fixed_offset + 0x5:
+ case fixed_offset + 0x6:
+ case fixed_offset + 0x7:
+ case fixed_offset + 0x8:
+ case fixed_offset + 0x9:
+ case fixed_offset + 0xa:
+ case fixed_offset + 0xb:
+ case fixed_offset + 0xc:
+ case fixed_offset + 0xd:
+ case fixed_offset + 0xe:
+ case fixed_offset + 0xf:
+#endif
+ ++*off;
+ size = ((unsigned int)*p) & 0x0f;
+ break;
+ default:
+ PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream");
+ return -1;
+ }
+ msgpack_unpack_callback(_uint32)(&ctx->user, size, &ctx->stack[0].obj);
+ return 1;
+}
+
+#undef SWITCH_RANGE_BEGIN
+#undef SWITCH_RANGE
+#undef SWITCH_RANGE_DEFAULT
+#undef SWITCH_RANGE_END
+
+static const execute_fn template_construct = &template_execute<true>;
+static const execute_fn template_skip = &template_execute<false>;
+static const execute_fn read_array_header = &template_container_header<0x90, 0xdc>;
+static const execute_fn read_map_header = &template_container_header<0x80, 0xde>;
+
+#undef msgpack_unpack_func
+#undef msgpack_unpack_callback
+#undef msgpack_unpack_struct
+#undef msgpack_unpack_object
+#undef msgpack_unpack_user
+
#undef NEXT_CS
/* vim: set ts=4 sw=4 noexpandtab */