summaryrefslogtreecommitdiff
path: root/Cython/Compiler/ModuleNode.py
diff options
context:
space:
mode:
authorda-woods <dw-git@d-woods.co.uk>2019-09-30 19:50:57 +0100
committerStefan Behnel <stefan_ml@behnel.de>2019-09-30 20:50:57 +0200
commit074362b47093febfe5273ea7a00f4ba5ded6e73f (patch)
tree2418329e26b32f06e7e148ed719de5693cf3b350 /Cython/Compiler/ModuleNode.py
parent7e233ab00e117b2e7165c246941ac85a989be262 (diff)
downloadcython-074362b47093febfe5273ea7a00f4ba5ded6e73f.tar.gz
unicode imports (#3119)
* Handle normalization of unicode identifiers * Support unicode characters in module names (Only valid under Python 3)
Diffstat (limited to 'Cython/Compiler/ModuleNode.py')
-rw-r--r--Cython/Compiler/ModuleNode.py121
1 files changed, 87 insertions, 34 deletions
diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py
index da9fb7d04..35d5e4f6e 100644
--- a/Cython/Compiler/ModuleNode.py
+++ b/Cython/Compiler/ModuleNode.py
@@ -14,6 +14,7 @@ import json
import operator
import os
import re
+import sys
from .PyrexTypes import CPtrType
from . import Future
@@ -30,9 +31,21 @@ from .Errors import error, warning
from .PyrexTypes import py_object_type
from ..Utils import open_new_file, replace_suffix, decode_filename, build_hex_version
from .Code import UtilityCode, IncludeCode
-from .StringEncoding import EncodedString
+from .StringEncoding import EncodedString, encoded_string_or_bytes_literal
from .Pythran import has_np_pythran
+
+def replace_suffix_encoded(path, newsuf):
+ # calls replace suffix and returns a EncodedString or BytesLiteral with the encoding set
+ newpath = replace_suffix(path, newsuf)
+ return as_encoded_filename(newpath)
+
+def as_encoded_filename(path):
+ # wraps the path with either EncodedString or BytesLiteral (depending on its input type)
+ # and sets the encoding to the file system encoding
+ return encoded_string_or_bytes_literal(path, sys.getfilesystemencoding())
+
+
def check_c_declarations_pxd(module_node):
module_node.scope.check_c_classes_pxd()
return module_node
@@ -70,6 +83,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
child_attrs = ["body"]
directives = None
+
def merge_in(self, tree, scope, merge_scope=False):
# Merges in the contents of another tree, and possibly scope. With the
# current implementation below, this must be done right prior
@@ -162,18 +176,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
h_funcs = h_entries(env.cfunc_entries)
h_extension_types = h_entries(env.c_class_entries)
if h_types or h_vars or h_funcs or h_extension_types:
- result.h_file = replace_suffix(result.c_file, ".h")
+ result.h_file = replace_suffix_encoded(result.c_file, ".h")
h_code = Code.CCodeWriter()
c_code_config = generate_c_code_config(env, options)
Code.GlobalState(h_code, self, c_code_config)
if options.generate_pxi:
- result.i_file = replace_suffix(result.c_file, ".pxi")
+ result.i_file = replace_suffix_encoded(result.c_file, ".pxi")
i_code = Code.PyrexCodeWriter(result.i_file)
else:
i_code = None
h_code.put_generated_by()
- h_guard = Naming.h_guard_prefix + self.api_name(env)
+ h_guard = self.api_name(Naming.h_guard_prefix, env)
h_code.put_h_guard(h_guard)
h_code.putln("")
h_code.putln('#include "Python.h"')
@@ -181,7 +195,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if options.capi_reexport_cincludes:
self.generate_includes(env, [], h_code)
h_code.putln("")
- api_guard = Naming.api_guard_prefix + self.api_name(env)
+ api_guard = self.api_name(Naming.api_guard_prefix, env)
h_code.putln("#ifndef %s" % api_guard)
h_code.putln("")
self.generate_extern_c_macro_definition(h_code)
@@ -208,7 +222,12 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
h_code.putln("/* It now returns a PyModuleDef instance instead of a PyModule instance. */")
h_code.putln("")
h_code.putln("#if PY_MAJOR_VERSION < 3")
- h_code.putln("PyMODINIT_FUNC init%s(void);" % env.module_name)
+ if env.module_name.isascii():
+ py2_mod_name = env.module_name
+ else:
+ py2_mod_name = env.module_name.encode("ascii", errors="ignore").decode("utf-8")
+ h_code.putln('#error "Unicode module names are not supported in Python 2";')
+ h_code.putln("PyMODINIT_FUNC init%s(void);" % py2_mod_name)
h_code.putln("#else")
h_code.putln("PyMODINIT_FUNC %s(void);" % self.mod_init_func_cname('PyInit', env))
h_code.putln("#endif")
@@ -229,8 +248,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
i_code.putln("cdef extern %s" % (
entry.type.declaration_code(entry.cname, pyrex=1)))
- def api_name(self, env):
- return env.qualified_name.replace(".", "__")
+ def api_name(self, prefix, env):
+ api_name = self.punycode_module_name(prefix, env.qualified_name)
+ return api_name.replace(".", "__")
def generate_api_code(self, env, options, result):
def api_entries(entries, pxd=0):
@@ -240,12 +260,12 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
api_funcs = api_entries(env.cfunc_entries)
api_extension_types = api_entries(env.c_class_entries)
if api_vars or api_funcs or api_extension_types:
- result.api_file = replace_suffix(result.c_file, "_api.h")
+ result.api_file = replace_suffix_encoded(result.c_file, "_api.h")
h_code = Code.CCodeWriter()
c_code_config = generate_c_code_config(env, options)
Code.GlobalState(h_code, self, c_code_config)
h_code.put_generated_by()
- api_guard = Naming.api_guard_prefix + self.api_name(env)
+ api_guard = self.api_name(Naming.api_guard_prefix, env)
h_code.put_h_guard(api_guard)
# Work around https://bugs.python.org/issue4709
h_code.putln('#ifdef __MINGW64__')
@@ -254,7 +274,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
h_code.putln('#include "Python.h"')
if result.h_file:
- h_code.putln('#include "%s"' % os.path.basename(result.h_file))
+ h_filename = os.path.basename(result.h_file)
+ h_filename = as_encoded_filename(h_filename)
+ h_code.putln('#include %s' % h_filename.as_c_string_literal())
if api_extension_types:
h_code.putln("")
for entry in api_extension_types:
@@ -285,9 +307,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[0])
h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[1])
h_code.putln("")
- h_code.putln("static int import_%s(void) {" % self.api_name(env))
+ h_code.putln("static int %s(void) {" % self.api_name("import", env))
h_code.putln("PyObject *module = 0;")
- h_code.putln('module = PyImport_ImportModule("%s");' % env.qualified_name)
+ h_code.putln('module = PyImport_ImportModule(%s);' % env.qualified_name.as_c_string_literal())
h_code.putln("if (!module) goto bad;")
for entry in api_funcs:
cname = env.mangle(Naming.func_prefix_api, entry.name)
@@ -365,12 +387,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
globalstate.use_utility_code(refnanny_utility_code)
code = globalstate['before_global_var']
- code.putln('#define __Pyx_MODULE_NAME "%s"' % self.full_module_name)
- module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))
+ code.putln('#define __Pyx_MODULE_NAME %s' %
+ self.full_module_name.as_c_string_literal())
+ module_is_main = self.is_main_module_flag_cname()
code.putln("extern int %s;" % module_is_main)
code.putln("int %s = 0;" % module_is_main)
code.putln("")
- code.putln("/* Implementation of '%s' */" % env.qualified_name)
+ code.putln("/* Implementation of %s */" % env.qualified_name.as_c_string_literal())
code = globalstate['late_includes']
code.putln("/* Late includes */")
@@ -614,7 +637,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
for module in modules:
defined_here = module is env
modulecode.putln("")
- modulecode.putln("/* Module declarations from '%s' */" % module.qualified_name)
+ modulecode.putln("/* Module declarations from %s */" % module.qualified_name.as_c_string_literal())
self.generate_c_class_declarations(module, modulecode, defined_here)
self.generate_cvariable_declarations(module, modulecode, defined_here)
self.generate_cfunction_declarations(module, modulecode, defined_here)
@@ -672,8 +695,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
self.generate_extern_c_macro_definition(code)
code.putln("")
- code.putln("#define %s" % Naming.h_guard_prefix + self.api_name(env))
- code.putln("#define %s" % Naming.api_guard_prefix + self.api_name(env))
+ code.putln("#define %s" % self.api_name(Naming.h_guard_prefix, env))
+ code.putln("#define %s" % self.api_name(Naming.api_guard_prefix, env))
code.putln("/* Early includes */")
self.generate_includes(env, cimported_modules, code, late=False)
code.putln("")
@@ -773,7 +796,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if isabs(file_path):
file_path = basename(file_path) # never include absolute paths
escaped_filename = file_path.replace("\\", "\\\\").replace('"', r'\"')
- code.putln('"%s",' % escaped_filename)
+ escaped_filename = as_encoded_filename(escaped_filename)
+ code.putln('%s,' % escaped_filename.as_c_string_literal())
else:
# Some C compilers don't like an empty array
code.putln("0")
@@ -2318,15 +2342,25 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.enter_cfunc_scope(self.scope)
code.putln("")
code.putln(UtilityCode.load_as_string("PyModInitFuncType", "ModuleSetupCode.c")[0])
- header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % env.module_name
+ if env.module_name.isascii():
+ py2_mod_name = env.module_name
+ fail_compilation_in_py2 = False
+ else:
+ fail_compilation_in_py2 = True
+ # at this point py2_mod_name is largely a placeholder and the value doesn't matter
+ py2_mod_name = env.module_name.encode("ascii", errors="ignore").decode("utf8")
+
+ header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % py2_mod_name
header3 = "__Pyx_PyMODINIT_FUNC %s(void)" % self.mod_init_func_cname('PyInit', env)
header3 = EncodedString(header3)
code.putln("#if PY_MAJOR_VERSION < 3")
# Optimise for small code size as the module init function is only executed once.
code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header2)
+ if fail_compilation_in_py2:
+ code.putln('#error "Unicode module names are not supported in Python 2";')
if self.scope.is_package:
code.putln("#if !defined(CYTHON_NO_PYINIT_EXPORT) && (defined(WIN32) || defined(MS_WINDOWS))")
- code.putln("__Pyx_PyMODINIT_FUNC init__init__(void) { init%s(); }" % env.module_name)
+ code.putln("__Pyx_PyMODINIT_FUNC init__init__(void) { init%s(); }" % py2_mod_name)
code.putln("#endif")
code.putln(header2)
code.putln("#else")
@@ -2380,7 +2414,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
))
code.putln('PyErr_SetString(PyExc_RuntimeError,'
' "Module \'%s\' has already been imported. Re-initialisation is not supported.");' %
- env.module_name)
+ env.module_name.as_c_string_literal()[1:-1])
code.putln("return -1;")
code.putln("}")
code.putln("#elif PY_MAJOR_VERSION >= 3")
@@ -2439,7 +2473,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.put_error_if_neg(self.pos, "__Pyx_init_sys_getdefaultencoding_params()")
code.putln("#endif")
- code.putln("if (%s%s) {" % (Naming.module_is_main, self.full_module_name.replace('.', '__')))
+ code.putln("if (%s) {" % self.is_main_module_flag_cname())
code.put_error_if_neg(self.pos, 'PyObject_SetAttr(%s, %s, %s)' % (
env.module_cname,
code.intern_identifier(EncodedString("__name__")),
@@ -2526,7 +2560,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln('}')
code.put_decref_clear(env.module_cname, py_object_type, nanny=False, clear_before_decref=True)
code.putln('} else if (!PyErr_Occurred()) {')
- code.putln('PyErr_SetString(PyExc_ImportError, "init %s");' % env.qualified_name)
+ code.putln('PyErr_SetString(PyExc_ImportError, "init %s");' %
+ env.qualified_name.as_c_string_literal()[1:-1])
code.putln('}')
code.put_label(code.return_label)
@@ -2664,14 +2699,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
# CPython may not have put us into sys.modules yet, but relative imports and reimports require it
fq_module_name = self.full_module_name
if fq_module_name.endswith('.__init__'):
- fq_module_name = fq_module_name[:-len('.__init__')]
+ fq_module_name = EncodedString(fq_module_name[:-len('.__init__')])
+ fq_module_name_cstring = fq_module_name.as_c_string_literal()
code.putln("#if PY_MAJOR_VERSION >= 3")
code.putln("{")
code.putln("PyObject *modules = PyImport_GetModuleDict(); %s" %
code.error_goto_if_null("modules", self.pos))
- code.putln('if (!PyDict_GetItemString(modules, "%s")) {' % fq_module_name)
- code.putln(code.error_goto_if_neg('PyDict_SetItemString(modules, "%s", %s)' % (
- fq_module_name, env.module_cname), self.pos))
+ code.putln('if (!PyDict_GetItemString(modules, %s)) {' % fq_module_name_cstring)
+ code.putln(code.error_goto_if_neg('PyDict_SetItemString(modules, %s, %s)' % (
+ fq_module_name_cstring, env.module_cname), self.pos))
code.putln("}")
code.putln("}")
code.putln("#endif")
@@ -2746,7 +2782,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.put_decref_clear(cname, py_object_type, nanny=False, clear_before_decref=True)
def generate_main_method(self, env, code):
- module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))
+ module_is_main = self.is_main_module_flag_cname()
if Options.embed == "main":
wmain = "wmain"
else:
@@ -2759,8 +2795,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
main_method=Options.embed,
wmain_method=wmain))
+ def punycode_module_name(self, prefix, name):
+ # adapted from PEP483
+ try:
+ name = '_' + name.encode('ascii').decode('ascii')
+ except UnicodeEncodeError:
+ name = 'U_' + name.encode('punycode').replace(b'-', b'_').decode('ascii')
+ return "%s%s" % (prefix, name)
+
def mod_init_func_cname(self, prefix, env):
- return '%s_%s' % (prefix, env.module_name)
+ # from PEP483
+ return self.punycode_module_name(prefix, env.module_name)
def generate_pymoduledef_struct(self, env, code):
if env.doc:
@@ -2785,12 +2830,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("{Py_mod_exec, (void*)%s}," % exec_func_cname)
code.putln("{0, NULL}")
code.putln("};")
+ if not env.module_name.isascii():
+ code.putln("#else /* CYTHON_PEP489_MULTI_PHASE_INIT */")
+ code.putln('#error "Unicode module names are only supported with multi-phase init'
+ ' as per PEP489"')
code.putln("#endif")
code.putln("")
code.putln("static struct PyModuleDef %s = {" % Naming.pymoduledef_cname)
code.putln(" PyModuleDef_HEAD_INIT,")
- code.putln(' "%s",' % env.module_name)
+ code.putln(' %s,' % env.module_name.as_c_string_literal())
code.putln(" %s, /* m_doc */" % doc)
code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT")
code.putln(" 0, /* m_size */")
@@ -2825,9 +2874,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("#else")
code.putln("#if PY_MAJOR_VERSION < 3")
code.putln(
- '%s = Py_InitModule4("%s", %s, %s, 0, PYTHON_API_VERSION); Py_XINCREF(%s);' % (
+ '%s = Py_InitModule4(%s, %s, %s, 0, PYTHON_API_VERSION); Py_XINCREF(%s);' % (
env.module_cname,
- env.module_name,
+ env.module_name.as_c_string_literal(),
env.method_table_cname,
doc,
env.module_cname))
@@ -3120,6 +3169,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
def generate_type_ready_code(self, entry, code):
Nodes.CClassDefNode.generate_type_ready_code(entry, code)
+ def is_main_module_flag_cname(self):
+ full_module_name = self.full_module_name.replace('.', '__')
+ return self.punycode_module_name(Naming.module_is_main, full_module_name)
+
def generate_exttype_vtable_init_code(self, entry, code):
# Generate code to initialise the C method table of an
# extension type.