summaryrefslogtreecommitdiff
path: root/tests/run/py_unicode_strings.pyx
blob: ba4df10879a4a28d046ee643e246d88413e73a7d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# mode: run
# tag: py_unicode_strings

import sys

from libc.string cimport memcpy

cdef assert_Py_UNICODE_equal(const Py_UNICODE* u1, const Py_UNICODE* u2):
    cdef size_t i = 0
    while u1[i] != 0 and u2[i] != 0 and u1[i] == u2[i]:
        i += 1
    assert u1[i] == u2[i], f"Mismatch at position {i}: {<long>u1[i]} != {<long>u2[i]} ({u1!r} != {u2!r})"


ctypedef Py_UNICODE* LPWSTR

cdef unicode uobj = u'unicode\u1234'
cdef unicode uobj1 = u'u'
cdef Py_UNICODE* c_pu_str = u"unicode\u1234"
cdef Py_UNICODE[42] c_pu_arr
cdef LPWSTR c_wstr = u"unicode\u1234"
cdef Py_UNICODE* c_pu_empty = u""
cdef char* c_empty = ""
cdef unicode uwide_literal = u'\U00020000\U00020001'
cdef Py_UNICODE* c_pu_wide_literal = u'\U00020000\U00020001'

memcpy(c_pu_arr, c_pu_str, sizeof(Py_UNICODE) * (len(uobj) + 1))


def test_c_to_python():
    """
    >>> test_c_to_python()
    """
    assert c_pu_arr == uobj
    assert c_pu_str == uobj
    assert c_wstr == uobj

    assert c_pu_arr[1:] == uobj[1:]
    assert c_pu_str[1:] == uobj[1:]
    assert c_wstr[1:] == uobj[1:]

    assert c_pu_arr[:1] == uobj[:1]
    assert c_pu_arr[:1] == uobj[:1]
    assert c_pu_str[:1] == uobj[:1]
    assert c_wstr[:1] == uobj[:1]

    assert c_pu_arr[1:7] == uobj[1:7]
    assert c_pu_str[1:7] == uobj[1:7]
    assert c_wstr[1:7] == uobj[1:7]

    cdef Py_UNICODE ch = uobj[1]  # Py_UCS4 is unsigned, Py_UNICODE is usually signed.
    assert c_pu_arr[1] == ch
    assert c_pu_str[1] == ch
    assert c_wstr[1] == ch

    assert len(c_pu_str) == 8
    assert len(c_pu_arr) == 8
    assert len(c_wstr) == 8

    assert sizeof(c_pu_arr) == sizeof(Py_UNICODE) * 42
    assert sizeof(c_pu_str) == sizeof(void*)

    assert c_pu_wide_literal == uwide_literal
    if sizeof(Py_UNICODE) >= 4:
        assert len(c_pu_wide_literal) == 2
    else:
        assert len(c_pu_wide_literal) == 4

    if sys.version_info >= (3, 3):
        # Make sure len(unicode) is not reverted to pre-3.3 behavior
        assert len(uwide_literal) == 2

    assert u'unicode'
    assert not u''
    assert c_pu_str
    assert c_pu_empty


def test_python_to_c():
    """
    >>> test_python_to_c()
    """
    cdef unicode u

    assert_Py_UNICODE_equal(c_pu_arr, uobj)
    assert_Py_UNICODE_equal(c_pu_str, uobj)
    assert_Py_UNICODE_equal(c_pu_str, <LPWSTR>uobj)
    u = uobj[1:]
    assert_Py_UNICODE_equal(c_pu_str + 1, u)
    assert_Py_UNICODE_equal(c_wstr + 1, u)
    u = uobj[:1]
    assert_Py_UNICODE_equal(<Py_UNICODE*>u"u", u)
    u = uobj[1:7]
    assert_Py_UNICODE_equal(<Py_UNICODE*>u"nicode", u)
    u = uobj[1]
    assert_Py_UNICODE_equal(<Py_UNICODE*>u"n", u)

    assert_Py_UNICODE_equal(uwide_literal, <Py_UNICODE*>c_pu_wide_literal)

    assert len(u"abc\0") == 4
    assert len(<Py_UNICODE*>u"abc\0") == 3