summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authormdroe <mdroe@localhost>2009-10-14 15:01:41 +0000
committermdroe <mdroe@localhost>2009-10-14 15:01:41 +0000
commitfbbf05cfefe98fd284c08c2f3a78c7cf5503821a (patch)
tree87da915d7d0e3f96b5ba1b072563b44c31e6545a /numpy/core
parentc4db9cf34c643bd422060de6b636f53a57557c4d (diff)
downloadnumpy-fbbf05cfefe98fd284c08c2f3a78c7cf5503821a.tar.gz
Fix Unicode object -> chararray conversion on narrow Python builds
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/defchararray.py32
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src14
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c9
-rw-r--r--numpy/core/tests/test_defchararray.py2
4 files changed, 48 insertions, 9 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 45b061d69..4ad5c59d3 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -1714,6 +1714,7 @@ class chararray(ndarray):
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
order=order)
else:
+ print shape, dtype, itemsize
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
buffer=buffer,
offset=offset, strides=strides,
@@ -2422,9 +2423,40 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
unicode = True
else:
unicode = False
+
if itemsize is None:
itemsize = _len(obj)
shape = _len(obj) / itemsize
+
+ if unicode:
+ if sys.maxunicode == 0xffff:
+ # On a narrow Python build, the buffer for Unicode
+ # strings is UCS2, which doesn't match the buffer for
+ # Numpy Unicode types, which is ALWAYS UCS4.
+ # Therefore, we need to convert the buffer. On Python
+ # 2.6 and later, we can use the utf_32 codec. Earlier
+ # versions don't have that codec, so we convert to a
+ # numerical array that matches the input buffer, and
+ # then use Numpy to convert it to UCS4. All of this
+ # should happen in native endianness.
+ if sys.hexversion >= 0x2060000:
+ obj = obj.encode('utf_32')
+ else:
+ if isinstance(obj, str):
+ ascii = numpy.frombuffer(obj, 'u1')
+ ucs4 = numpy.array(ascii, 'u4')
+ obj = ucs4.data
+ else:
+ ucs2 = numpy.frombuffer(obj, 'u2')
+ ucs4 = numpy.array(ucs2, 'u4')
+ obj = ucs4.data
+ else:
+ obj = unicode(obj)
+ else:
+ # Let the default Unicode -> string encoding (if any) take
+ # precedence.
+ obj = str(obj)
+
return chararray(shape, itemsize=itemsize, unicode=unicode,
buffer=obj, order=order)
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 9cff6836e..b2b73d7be 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -1816,9 +1816,10 @@ OBJECT_copyswapn (PyObject **dst, intp dstride, PyObject **src, intp sstride,
{
intp i;
if (src != NULL) {
- dstride /= sizeof(PyObject **);
- sstride /= sizeof(PyObject **);
- if (__ALIGNED(dst,sizeof(PyObject **)) && __ALIGNED(src, sizeof(PyObject **))) {
+ if (__ALIGNED(dst,sizeof(PyObject **)) && __ALIGNED(src, sizeof(PyObject **)) &&
+ __ALIGNED(dstride,sizeof(PyObject **)) && __ALIGNED(sstride,sizeof(PyObject*))) {
+ dstride /= sizeof(PyObject **);
+ sstride /= sizeof(PyObject **);
for (i=0; i<n; i++) {
Py_XINCREF(*src);
Py_XDECREF(*dst);
@@ -1828,10 +1829,13 @@ OBJECT_copyswapn (PyObject **dst, intp dstride, PyObject **src, intp sstride,
}
}
else {
+ unsigned char *dstp, *srcp;
PyObject **dp, **sp;
+ dstp = (unsigned char*)dst;
+ srcp = (unsigned char*)src;
for (i=0; i<n; i++) {
- dp = dst;
- sp = src;
+ dp = (PyObject **)dstp;
+ sp = (PyObject **)srcp;
Py_XINCREF(*sp);
Py_XDECREF(*dp);
memcpy(dst, src, sizeof(PyObject *));
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index b85cf937d..bf69b8953 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -126,24 +126,25 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
* buffers[1] is the source
*/
static void
-_strided_buffered_cast(char *dptr, intp dstride, int delsize, int dswap,
+_strided_buffered_cast(char *dptr, intp dstride, intp delsize, int dswap,
PyArray_CopySwapNFunc *dcopyfunc,
- char *sptr, intp sstride, int selsize, int sswap,
+ char *sptr, intp sstride, intp selsize, int sswap,
PyArray_CopySwapNFunc *scopyfunc,
intp N, char **buffers, int bufsize,
PyArray_VectorUnaryFunc *castfunc,
PyArrayObject *dest, PyArrayObject *src)
{
int i;
+
if (N <= bufsize) {
/*
* 1. copy input to buffer and swap
* 2. cast input to output
* 3. swap output if necessary and copy from output buffer
*/
- scopyfunc(buffers[1], selsize, sptr, sstride, N, sswap, src);
+ scopyfunc((void *)buffers[1], selsize, sptr, sstride, N, sswap, src);
castfunc(buffers[1], buffers[0], N, src, dest);
- dcopyfunc(dptr, dstride, buffers[0], delsize, N, dswap, dest);
+ dcopyfunc(dptr, dstride, (void *)buffers[0], delsize, N, dswap, dest);
return;
}
diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py
index fa3fe982f..a2e04b632 100644
--- a/numpy/core/tests/test_defchararray.py
+++ b/numpy/core/tests/test_defchararray.py
@@ -66,6 +66,8 @@ class TestBasic(TestCase):
def test_from_unicode(self):
A = np.char.array(u'\u03a3')
+ print A
+ print repr(A)
assert_equal(len(A), 1)
assert_equal(len(A[0]), 1)
assert_equal(A.itemsize, 4)