summaryrefslogtreecommitdiff
path: root/tests/run/unicode_identifiers_normalization.srctree
diff options
context:
space:
mode:
Diffstat (limited to 'tests/run/unicode_identifiers_normalization.srctree')
-rw-r--r--tests/run/unicode_identifiers_normalization.srctree83
1 files changed, 83 insertions, 0 deletions
diff --git a/tests/run/unicode_identifiers_normalization.srctree b/tests/run/unicode_identifiers_normalization.srctree
new file mode 100644
index 000000000..764384455
--- /dev/null
+++ b/tests/run/unicode_identifiers_normalization.srctree
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+# mode: run
+# tag: pure3.0, pep3131
+
+PYTHON build_tests.py
+# show behaviour in Python mode
+PYTHON -m doctest test0.py
+PYTHON -m doctest test1.py
+PYTHON -m doctest test2.py
+
+PYTHON setup.py build_ext --inplace
+# test in Cython mode
+PYTHON -c "import doctest; import test0 as m; exit(doctest.testmod(m)[0])"
+PYTHON -c "import doctest; import test1 as m; exit(doctest.testmod(m)[0])"
+PYTHON -c "import doctest; import test2 as m; exit(doctest.testmod(m)[0])"
+
+########## setup.py #########
+
+from Cython.Build.Dependencies import cythonize
+from distutils.core import setup
+
+setup(
+ ext_modules = cythonize("test*.py"),
+)
+
+######### build_tests.py ########
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import sys
+import unicodedata
+
+# a few pairs of unicode strings that should be equivalent after normalization
+string_pairs = [("fi", "fi"), # ligature and two letters
+ ("a\u0301", '\u00e1'), # a with acute accent with combining character or as 1 character
+ ("α\u0334\u0362", "α\u0362\u0334") # alpha with a pair of combining characters
+ # in a different order. No single character to normalize to
+ ]
+
+# Show that the pairs genuinely aren't equal before normalization
+for sp in string_pairs:
+ assert sp[0] != sp[1]
+ assert unicodedata.normalize('NFKC', sp[0]) == unicodedata.normalize('NFKC', sp[1])
+
+# some code that accesses the identifiers through the two different names
+# contains doctests
+example_code = [
+"""
+class C:
+ '''
+ >>> C().get()
+ True
+ '''
+ def __init__(self):
+ self.{0} = True
+ def get(self):
+ return self.{1}
+""", """
+def pass_through({0}):
+ '''
+ >>> pass_through(True)
+ True
+ '''
+ return {1}
+""", """
+import cython
+{0} = True
+def test():
+ '''
+ >>> test()
+ True
+ '''
+ return {1}
+"""]
+
+from io import open
+
+for idx, (code, strings) in enumerate(zip(example_code, string_pairs)):
+ with open("test{0}.py".format(idx), "w", encoding="utf8") as f:
+ code = code.format(*strings)
+ f.write("# -*- coding: utf-8 -*-\n")
+ # The code isn't Py2 compatible. Only write actual code in Py3+.
+ if sys.version_info[0] > 2:
+ f.write(code)