diff options
author | mdroe <mdroe@localhost> | 2009-10-14 15:01:41 +0000 |
---|---|---|
committer | mdroe <mdroe@localhost> | 2009-10-14 15:01:41 +0000 |
commit | fbbf05cfefe98fd284c08c2f3a78c7cf5503821a (patch) | |
tree | 87da915d7d0e3f96b5ba1b072563b44c31e6545a /numpy/core | |
parent | c4db9cf34c643bd422060de6b636f53a57557c4d (diff) | |
download | numpy-fbbf05cfefe98fd284c08c2f3a78c7cf5503821a.tar.gz |
Fix Unicode object -> chararray conversion on narrow Python builds
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/defchararray.py | 32 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arraytypes.c.src | 14 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 9 | ||||
-rw-r--r-- | numpy/core/tests/test_defchararray.py | 2 |
4 files changed, 48 insertions, 9 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py index 45b061d69..4ad5c59d3 100644 --- a/numpy/core/defchararray.py +++ b/numpy/core/defchararray.py @@ -1714,6 +1714,7 @@ class chararray(ndarray): self = ndarray.__new__(subtype, shape, (dtype, itemsize), order=order) else: + print shape, dtype, itemsize self = ndarray.__new__(subtype, shape, (dtype, itemsize), buffer=buffer, offset=offset, strides=strides, @@ -2422,9 +2423,40 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None): unicode = True else: unicode = False + if itemsize is None: itemsize = _len(obj) shape = _len(obj) / itemsize + + if unicode: + if sys.maxunicode == 0xffff: + # On a narrow Python build, the buffer for Unicode + # strings is UCS2, which doesn't match the buffer for + # Numpy Unicode types, which is ALWAYS UCS4. + # Therefore, we need to convert the buffer. On Python + # 2.6 and later, we can use the utf_32 codec. Earlier + # versions don't have that codec, so we convert to a + # numerical array that matches the input buffer, and + # then use Numpy to convert it to UCS4. All of this + # should happen in native endianness. + if sys.hexversion >= 0x2060000: + obj = obj.encode('utf_32') + else: + if isinstance(obj, str): + ascii = numpy.frombuffer(obj, 'u1') + ucs4 = numpy.array(ascii, 'u4') + obj = ucs4.data + else: + ucs2 = numpy.frombuffer(obj, 'u2') + ucs4 = numpy.array(ucs2, 'u4') + obj = ucs4.data + else: + obj = unicode(obj) + else: + # Let the default Unicode -> string encoding (if any) take + # precedence. + obj = str(obj) + return chararray(shape, itemsize=itemsize, unicode=unicode, buffer=obj, order=order) diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 9cff6836e..b2b73d7be 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -1816,9 +1816,10 @@ OBJECT_copyswapn (PyObject **dst, intp dstride, PyObject **src, intp sstride, { intp i; if (src != NULL) { - dstride /= sizeof(PyObject **); - sstride /= sizeof(PyObject **); - if (__ALIGNED(dst,sizeof(PyObject **)) && __ALIGNED(src, sizeof(PyObject **))) { + if (__ALIGNED(dst,sizeof(PyObject **)) && __ALIGNED(src, sizeof(PyObject **)) && + __ALIGNED(dstride,sizeof(PyObject **)) && __ALIGNED(sstride,sizeof(PyObject*))) { + dstride /= sizeof(PyObject **); + sstride /= sizeof(PyObject **); for (i=0; i<n; i++) { Py_XINCREF(*src); Py_XDECREF(*dst); @@ -1828,10 +1829,13 @@ OBJECT_copyswapn (PyObject **dst, intp dstride, PyObject **src, intp sstride, } } else { + unsigned char *dstp, *srcp; PyObject **dp, **sp; + dstp = (unsigned char*)dst; + srcp = (unsigned char*)src; for (i=0; i<n; i++) { - dp = dst; - sp = src; + dp = (PyObject **)dstp; + sp = (PyObject **)srcp; Py_XINCREF(*sp); Py_XDECREF(*dp); memcpy(dst, src, sizeof(PyObject *)); diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index b85cf937d..bf69b8953 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -126,24 +126,25 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num) * buffers[1] is the source */ static void -_strided_buffered_cast(char *dptr, intp dstride, int delsize, int dswap, +_strided_buffered_cast(char *dptr, intp dstride, intp delsize, int dswap, PyArray_CopySwapNFunc *dcopyfunc, - char *sptr, intp sstride, int selsize, int sswap, + char *sptr, intp sstride, intp selsize, int sswap, PyArray_CopySwapNFunc *scopyfunc, intp N, char **buffers, int bufsize, PyArray_VectorUnaryFunc *castfunc, PyArrayObject *dest, PyArrayObject *src) { int i; + if (N <= bufsize) { /* * 1. copy input to buffer and swap * 2. cast input to output * 3. swap output if necessary and copy from output buffer */ - scopyfunc(buffers[1], selsize, sptr, sstride, N, sswap, src); + scopyfunc((void *)buffers[1], selsize, sptr, sstride, N, sswap, src); castfunc(buffers[1], buffers[0], N, src, dest); - dcopyfunc(dptr, dstride, buffers[0], delsize, N, dswap, dest); + dcopyfunc(dptr, dstride, (void *)buffers[0], delsize, N, dswap, dest); return; } diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py index fa3fe982f..a2e04b632 100644 --- a/numpy/core/tests/test_defchararray.py +++ b/numpy/core/tests/test_defchararray.py @@ -66,6 +66,8 @@ class TestBasic(TestCase): def test_from_unicode(self): A = np.char.array(u'\u03a3') + print A + print repr(A) assert_equal(len(A), 1) assert_equal(len(A[0]), 1) assert_equal(A.itemsize, 4) |