summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArmin Rigo <arigo@tunes.org>2012-07-09 16:03:25 +0200
committerArmin Rigo <arigo@tunes.org>2012-07-09 16:03:25 +0200
commitcebed941263313cfcabbdefe634dc6356eddd5e7 (patch)
treef790584e19954dcdba804e9b9b937eef2f0d4c01
parent9761db8685de6f8586a007dee522553d5c77dfb3 (diff)
downloadcffi-cebed941263313cfcabbdefe634dc6356eddd5e7.tar.gz
in-progress
-rw-r--r--c/_cffi_backend.c73
-rw-r--r--c/test_c.py25
-rw-r--r--c/wchar_helper.h47
3 files changed, 112 insertions, 33 deletions
diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
index ca56f84..9635820 100644
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -762,24 +762,46 @@ convert_from_object(char *data, CTypeDescrObject *ct, PyObject *init)
return 0;
}
else if (ctitem->ct_flags & CT_PRIMITIVE_CHAR) {
- char *srcdata;
- Py_ssize_t n;
- if (!PyString_Check(init)) {
- expected = "str or list or tuple";
- goto cannot_convert;
+ if (ctitem->ct_size == sizeof(char)) {
+ char *srcdata;
+ Py_ssize_t n;
+ if (!PyString_Check(init)) {
+ expected = "str or list or tuple";
+ goto cannot_convert;
+ }
+ n = PyString_GET_SIZE(init);
+ if (ct->ct_length >= 0 && n > ct->ct_length) {
+ PyErr_Format(PyExc_IndexError,
+ "initializer string is too long for '%s' "
+ "(got %zd characters)", ct->ct_name, n);
+ return -1;
+ }
+ if (n != ct->ct_length)
+ n++;
+ srcdata = PyString_AS_STRING(init);
+ memcpy(data, srcdata, n);
+ return 0;
}
- n = PyString_GET_SIZE(init);
- if (ct->ct_length >= 0 && n > ct->ct_length) {
- PyErr_Format(PyExc_IndexError,
- "initializer string is too long for '%s' "
- "(got %zd characters)", ct->ct_name, n);
- return -1;
+#ifdef HAVE_WCHAR_H
+ else {
+ Py_ssize_t n;
+ if (!PyUnicode_Check(init)) {
+ expected = "unicode or list or tuple";
+ goto cannot_convert;
+ }
+ n = _my_PyUnicode_SizeAsWideChar(init);
+ if (ct->ct_length >= 0 && n > ct->ct_length) {
+ PyErr_Format(PyExc_IndexError,
+ "initializer unicode is too long for '%s' "
+ "(got %zd characters)", ct->ct_name, n);
+ return -1;
+ }
+ if (n != ct->ct_length)
+ n++;
+ _my_PyUnicode_AsWideChar(init, (wchar_t *)data, n);
+ return 0;
}
- if (n != ct->ct_length)
- n++;
- srcdata = PyString_AS_STRING(init);
- memcpy(data, srcdata, n);
- return 0;
+#endif
}
else {
expected = "list or tuple";
@@ -1153,18 +1175,17 @@ static PyObject *cdata_unicode(CDataObject *cd)
else if (cd->c_type->ct_itemdescr != NULL &&
cd->c_type->ct_itemdescr->ct_flags & CT_PRIMITIVE_CHAR &&
cd->c_type->ct_itemdescr->ct_size > sizeof(char)) {
- abort();
Py_ssize_t length;
if (cd->c_type->ct_flags & CT_ARRAY) {
- const char *start = cd->c_data;
- const char *end;
- length = get_array_length(cd);
- end = (const char *)memchr(start, 0, length);
- if (end != NULL)
- length = end - start;
+ const wchar_t *start = (wchar_t *)cd->c_data;
+ const Py_ssize_t lenmax = get_array_length(cd);
+ length = 0;
+ while (length < lenmax && start[length])
+ length++;
}
else {
+ abort();
if (cd->c_data == NULL) {
PyObject *s = cdata_repr(cd);
if (s != NULL) {
@@ -1178,7 +1199,7 @@ static PyObject *cdata_unicode(CDataObject *cd)
length = strlen(cd->c_data);
}
- return PyString_FromStringAndSize(cd->c_data, length);
+ return _my_PyUnicode_FromWideChar((wchar_t *)cd->c_data, length);
}
else
return cdata_repr(cd);
@@ -1949,6 +1970,10 @@ static PyObject *b_newp(PyObject *self, PyObject *args)
/* from a string, we add the null terminator */
explicitlength = PyString_GET_SIZE(init) + 1;
}
+ else if (PyUnicode_Check(init)) {
+ /* from a unicode, we add the null terminator */
+ explicitlength = PyUnicode_GET_SIZE(init) + 1;
+ }
else {
explicitlength = PyNumber_AsSsize_t(init, PyExc_OverflowError);
if (explicitlength < 0) {
diff --git a/c/test_c.py b/c/test_c.py
index f75b4c1..eae5f35 100644
--- a/c/test_c.py
+++ b/c/test_c.py
@@ -1311,21 +1311,36 @@ def test_wchar():
else:
py.test.raises(ValueError, "s.a1 = u'\U00012345'")
#
- a = new_array_type(BWCharP, u'hello \u1234 world')
+ BWCharArray = new_array_type(BWCharP, None)
+ a = newp(BWCharArray, u'hello \u1234 world')
assert len(a) == 14 # including the final null
assert unicode(a) == u'hello \u1234 world'
- py.test.raises(UnicodeEncodeError, str, a)
+ a[13] = u'!'
+ assert unicode(a) == u'hello \u1234 world!'
+ assert str(a) == repr(a)
assert a[6] == u'\u1234'
a[6] = '-'
assert str(a) == 'hello - world'
#
+ if wchar4:
+ u = u'\U00012345\U00012346\U00012347'
+ a = newp(BWCharArray, u)
+ assert len(a) == 4
+ assert unicode(a) == u
+ assert len(list(a)) == 4
+ expected = [u'\U00012345', u'\U00012346', u'\U00012347', unichr(0)]
+ assert list(a) == expected
+ got = [a[i] for i in range(4)]
+ assert got == expected
+ py.test.raises(IndexError, 'a[4]')
+ #
w = cast(BWChar, 'a')
assert repr(w) == "<cdata 'wchar_t' u'a'>"
assert str(w) == 'a'
assert unicode(w) == u'a'
w = cast(BWChar, 0x1234)
assert repr(w) == "<cdata 'wchar_t' u'\u1234'>"
- py.test.raises(UnicodeEncodeError, str, w)
+ py.test.raises(xxUnicodeEncodeError, str, w)
assert unicode(w) == u'\u1234'
assert int(w) == 0x1234
#
@@ -1333,13 +1348,13 @@ def test_wchar():
assert str(p) == 'hello - world'
assert unicode(p) == u'hello - world'
p[6] = u'\u2345'
- py.test.raises(UnicodeEncodeError, str, p)
+ py.test.raises(xxUnicodeEncodeError, str, p)
assert unicode(p) == u'hello \u2345 world'
#
s = newp(BStructPtr, [u'\u1234', p])
assert s.a1 == u'\u1234'
assert s.a2 == p
- py.test.raises(UnicodeEncodeError, str, s.a2)
+ py.test.raises(xxUnicodeEncodeError, str, s.a2)
assert unicode(s.a2) == u'hello \u2345 world'
#
q = cast(BWCharP, 0)
diff --git a/c/wchar_helper.h b/c/wchar_helper.h
index 9e88e45..f17bef3 100644
--- a/c/wchar_helper.h
+++ b/c/wchar_helper.h
@@ -63,6 +63,11 @@ _my_PyUnicode_FromWideChar(register const wchar_t *w,
#endif
+#define IS_SURROGATE(u) (0xD800 <= (u)[0] && (u)[0] <= 0xDBFF && \
+ 0xDC00 <= (u)[1] && (u)[1] <= 0xDFFF)
+#define AS_SURROGATE(u) (0x10000 + (((u)[0] - 0xD800) << 10) + \
+ ((u)[1] - 0xDC00))
+
static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result)
{
Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
@@ -71,12 +76,46 @@ static int _my_PyUnicode_AsSingleWideChar(PyObject *unicode, wchar_t *result)
return 0;
}
#ifdef CONVERT_WCHAR_TO_SURROGATES
- if (PyUnicode_GET_SIZE(unicode) == 2 &&
- 0xD800 <= u[0] && u[0] <= 0xDBFF &&
- 0xDC00 <= u[1] && u[1] <= 0xDFFF) {
- *result = 0x10000 + ((u[0] - 0xD800) << 10) + (u[1] - 0xDC00);
+ if (PyUnicode_GET_SIZE(unicode) == 2 && IS_SURROGATE(u)) {
+ *result = AS_SURROGATE(u);
return 0;
}
#endif
return -1;
}
+
+static Py_ssize_t _my_PyUnicode_SizeAsWideChar(PyObject *unicode)
+{
+ Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
+ Py_ssize_t result = length;
+
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+ Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+ Py_ssize_t i;
+
+ for (i=0; i<length-1; i++) {
+ if (IS_SURROGATE(u+i))
+ result--;
+ }
+#endif
+ return result;
+}
+
+static void _my_PyUnicode_AsWideChar(PyObject *unicode,
+ wchar_t *result,
+ Py_ssize_t resultlen)
+{
+ Py_UNICODE *u = PyUnicode_AS_UNICODE(unicode);
+ Py_ssize_t i;
+ for (i=0; i<resultlen; i++) {
+ wchar_t ordinal = *u;
+#ifdef CONVERT_WCHAR_TO_SURROGATES
+ if (IS_SURROGATE(u)) {
+ ordinal = AS_SURROGATE(u);
+ u++;
+ }
+#endif
+ result[i] = ordinal;
+ u++;
+ }
+}