diff options
author | da-woods <dw-git@d-woods.co.uk> | 2019-09-30 19:50:57 +0100 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2019-09-30 20:50:57 +0200 |
commit | 074362b47093febfe5273ea7a00f4ba5ded6e73f (patch) | |
tree | 2418329e26b32f06e7e148ed719de5693cf3b350 /Cython/Compiler/ModuleNode.py | |
parent | 7e233ab00e117b2e7165c246941ac85a989be262 (diff) | |
download | cython-074362b47093febfe5273ea7a00f4ba5ded6e73f.tar.gz |
unicode imports (#3119)
* Handle normalization of unicode identifiers
* Support unicode characters in module names
(Only valid under Python 3)
Diffstat (limited to 'Cython/Compiler/ModuleNode.py')
-rw-r--r-- | Cython/Compiler/ModuleNode.py | 121 |
1 files changed, 87 insertions, 34 deletions
diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py index da9fb7d04..35d5e4f6e 100644 --- a/Cython/Compiler/ModuleNode.py +++ b/Cython/Compiler/ModuleNode.py @@ -14,6 +14,7 @@ import json import operator import os import re +import sys from .PyrexTypes import CPtrType from . import Future @@ -30,9 +31,21 @@ from .Errors import error, warning from .PyrexTypes import py_object_type from ..Utils import open_new_file, replace_suffix, decode_filename, build_hex_version from .Code import UtilityCode, IncludeCode -from .StringEncoding import EncodedString +from .StringEncoding import EncodedString, encoded_string_or_bytes_literal from .Pythran import has_np_pythran + +def replace_suffix_encoded(path, newsuf): + # calls replace suffix and returns a EncodedString or BytesLiteral with the encoding set + newpath = replace_suffix(path, newsuf) + return as_encoded_filename(newpath) + +def as_encoded_filename(path): + # wraps the path with either EncodedString or BytesLiteral (depending on its input type) + # and sets the encoding to the file system encoding + return encoded_string_or_bytes_literal(path, sys.getfilesystemencoding()) + + def check_c_declarations_pxd(module_node): module_node.scope.check_c_classes_pxd() return module_node @@ -70,6 +83,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): child_attrs = ["body"] directives = None + def merge_in(self, tree, scope, merge_scope=False): # Merges in the contents of another tree, and possibly scope. With the # current implementation below, this must be done right prior @@ -162,18 +176,18 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): h_funcs = h_entries(env.cfunc_entries) h_extension_types = h_entries(env.c_class_entries) if h_types or h_vars or h_funcs or h_extension_types: - result.h_file = replace_suffix(result.c_file, ".h") + result.h_file = replace_suffix_encoded(result.c_file, ".h") h_code = Code.CCodeWriter() c_code_config = generate_c_code_config(env, options) Code.GlobalState(h_code, self, c_code_config) if options.generate_pxi: - result.i_file = replace_suffix(result.c_file, ".pxi") + result.i_file = replace_suffix_encoded(result.c_file, ".pxi") i_code = Code.PyrexCodeWriter(result.i_file) else: i_code = None h_code.put_generated_by() - h_guard = Naming.h_guard_prefix + self.api_name(env) + h_guard = self.api_name(Naming.h_guard_prefix, env) h_code.put_h_guard(h_guard) h_code.putln("") h_code.putln('#include "Python.h"') @@ -181,7 +195,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): if options.capi_reexport_cincludes: self.generate_includes(env, [], h_code) h_code.putln("") - api_guard = Naming.api_guard_prefix + self.api_name(env) + api_guard = self.api_name(Naming.api_guard_prefix, env) h_code.putln("#ifndef %s" % api_guard) h_code.putln("") self.generate_extern_c_macro_definition(h_code) @@ -208,7 +222,12 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): h_code.putln("/* It now returns a PyModuleDef instance instead of a PyModule instance. */") h_code.putln("") h_code.putln("#if PY_MAJOR_VERSION < 3") - h_code.putln("PyMODINIT_FUNC init%s(void);" % env.module_name) + if env.module_name.isascii(): + py2_mod_name = env.module_name + else: + py2_mod_name = env.module_name.encode("ascii", errors="ignore").decode("utf-8") + h_code.putln('#error "Unicode module names are not supported in Python 2";') + h_code.putln("PyMODINIT_FUNC init%s(void);" % py2_mod_name) h_code.putln("#else") h_code.putln("PyMODINIT_FUNC %s(void);" % self.mod_init_func_cname('PyInit', env)) h_code.putln("#endif") @@ -229,8 +248,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): i_code.putln("cdef extern %s" % ( entry.type.declaration_code(entry.cname, pyrex=1))) - def api_name(self, env): - return env.qualified_name.replace(".", "__") + def api_name(self, prefix, env): + api_name = self.punycode_module_name(prefix, env.qualified_name) + return api_name.replace(".", "__") def generate_api_code(self, env, options, result): def api_entries(entries, pxd=0): @@ -240,12 +260,12 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): api_funcs = api_entries(env.cfunc_entries) api_extension_types = api_entries(env.c_class_entries) if api_vars or api_funcs or api_extension_types: - result.api_file = replace_suffix(result.c_file, "_api.h") + result.api_file = replace_suffix_encoded(result.c_file, "_api.h") h_code = Code.CCodeWriter() c_code_config = generate_c_code_config(env, options) Code.GlobalState(h_code, self, c_code_config) h_code.put_generated_by() - api_guard = Naming.api_guard_prefix + self.api_name(env) + api_guard = self.api_name(Naming.api_guard_prefix, env) h_code.put_h_guard(api_guard) # Work around https://bugs.python.org/issue4709 h_code.putln('#ifdef __MINGW64__') @@ -254,7 +274,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): h_code.putln('#include "Python.h"') if result.h_file: - h_code.putln('#include "%s"' % os.path.basename(result.h_file)) + h_filename = os.path.basename(result.h_file) + h_filename = as_encoded_filename(h_filename) + h_code.putln('#include %s' % h_filename.as_c_string_literal()) if api_extension_types: h_code.putln("") for entry in api_extension_types: @@ -285,9 +307,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[0]) h_code.put(UtilityCode.load_as_string("TypeImport", "ImportExport.c")[1]) h_code.putln("") - h_code.putln("static int import_%s(void) {" % self.api_name(env)) + h_code.putln("static int %s(void) {" % self.api_name("import", env)) h_code.putln("PyObject *module = 0;") - h_code.putln('module = PyImport_ImportModule("%s");' % env.qualified_name) + h_code.putln('module = PyImport_ImportModule(%s);' % env.qualified_name.as_c_string_literal()) h_code.putln("if (!module) goto bad;") for entry in api_funcs: cname = env.mangle(Naming.func_prefix_api, entry.name) @@ -365,12 +387,13 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): globalstate.use_utility_code(refnanny_utility_code) code = globalstate['before_global_var'] - code.putln('#define __Pyx_MODULE_NAME "%s"' % self.full_module_name) - module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__')) + code.putln('#define __Pyx_MODULE_NAME %s' % + self.full_module_name.as_c_string_literal()) + module_is_main = self.is_main_module_flag_cname() code.putln("extern int %s;" % module_is_main) code.putln("int %s = 0;" % module_is_main) code.putln("") - code.putln("/* Implementation of '%s' */" % env.qualified_name) + code.putln("/* Implementation of %s */" % env.qualified_name.as_c_string_literal()) code = globalstate['late_includes'] code.putln("/* Late includes */") @@ -614,7 +637,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): for module in modules: defined_here = module is env modulecode.putln("") - modulecode.putln("/* Module declarations from '%s' */" % module.qualified_name) + modulecode.putln("/* Module declarations from %s */" % module.qualified_name.as_c_string_literal()) self.generate_c_class_declarations(module, modulecode, defined_here) self.generate_cvariable_declarations(module, modulecode, defined_here) self.generate_cfunction_declarations(module, modulecode, defined_here) @@ -672,8 +695,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): self.generate_extern_c_macro_definition(code) code.putln("") - code.putln("#define %s" % Naming.h_guard_prefix + self.api_name(env)) - code.putln("#define %s" % Naming.api_guard_prefix + self.api_name(env)) + code.putln("#define %s" % self.api_name(Naming.h_guard_prefix, env)) + code.putln("#define %s" % self.api_name(Naming.api_guard_prefix, env)) code.putln("/* Early includes */") self.generate_includes(env, cimported_modules, code, late=False) code.putln("") @@ -773,7 +796,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): if isabs(file_path): file_path = basename(file_path) # never include absolute paths escaped_filename = file_path.replace("\\", "\\\\").replace('"', r'\"') - code.putln('"%s",' % escaped_filename) + escaped_filename = as_encoded_filename(escaped_filename) + code.putln('%s,' % escaped_filename.as_c_string_literal()) else: # Some C compilers don't like an empty array code.putln("0") @@ -2318,15 +2342,25 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): code.enter_cfunc_scope(self.scope) code.putln("") code.putln(UtilityCode.load_as_string("PyModInitFuncType", "ModuleSetupCode.c")[0]) - header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % env.module_name + if env.module_name.isascii(): + py2_mod_name = env.module_name + fail_compilation_in_py2 = False + else: + fail_compilation_in_py2 = True + # at this point py2_mod_name is largely a placeholder and the value doesn't matter + py2_mod_name = env.module_name.encode("ascii", errors="ignore").decode("utf8") + + header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % py2_mod_name header3 = "__Pyx_PyMODINIT_FUNC %s(void)" % self.mod_init_func_cname('PyInit', env) header3 = EncodedString(header3) code.putln("#if PY_MAJOR_VERSION < 3") # Optimise for small code size as the module init function is only executed once. code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header2) + if fail_compilation_in_py2: + code.putln('#error "Unicode module names are not supported in Python 2";') if self.scope.is_package: code.putln("#if !defined(CYTHON_NO_PYINIT_EXPORT) && (defined(WIN32) || defined(MS_WINDOWS))") - code.putln("__Pyx_PyMODINIT_FUNC init__init__(void) { init%s(); }" % env.module_name) + code.putln("__Pyx_PyMODINIT_FUNC init__init__(void) { init%s(); }" % py2_mod_name) code.putln("#endif") code.putln(header2) code.putln("#else") @@ -2380,7 +2414,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): )) code.putln('PyErr_SetString(PyExc_RuntimeError,' ' "Module \'%s\' has already been imported. Re-initialisation is not supported.");' % - env.module_name) + env.module_name.as_c_string_literal()[1:-1]) code.putln("return -1;") code.putln("}") code.putln("#elif PY_MAJOR_VERSION >= 3") @@ -2439,7 +2473,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): code.put_error_if_neg(self.pos, "__Pyx_init_sys_getdefaultencoding_params()") code.putln("#endif") - code.putln("if (%s%s) {" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))) + code.putln("if (%s) {" % self.is_main_module_flag_cname()) code.put_error_if_neg(self.pos, 'PyObject_SetAttr(%s, %s, %s)' % ( env.module_cname, code.intern_identifier(EncodedString("__name__")), @@ -2526,7 +2560,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): code.putln('}') code.put_decref_clear(env.module_cname, py_object_type, nanny=False, clear_before_decref=True) code.putln('} else if (!PyErr_Occurred()) {') - code.putln('PyErr_SetString(PyExc_ImportError, "init %s");' % env.qualified_name) + code.putln('PyErr_SetString(PyExc_ImportError, "init %s");' % + env.qualified_name.as_c_string_literal()[1:-1]) code.putln('}') code.put_label(code.return_label) @@ -2664,14 +2699,15 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): # CPython may not have put us into sys.modules yet, but relative imports and reimports require it fq_module_name = self.full_module_name if fq_module_name.endswith('.__init__'): - fq_module_name = fq_module_name[:-len('.__init__')] + fq_module_name = EncodedString(fq_module_name[:-len('.__init__')]) + fq_module_name_cstring = fq_module_name.as_c_string_literal() code.putln("#if PY_MAJOR_VERSION >= 3") code.putln("{") code.putln("PyObject *modules = PyImport_GetModuleDict(); %s" % code.error_goto_if_null("modules", self.pos)) - code.putln('if (!PyDict_GetItemString(modules, "%s")) {' % fq_module_name) - code.putln(code.error_goto_if_neg('PyDict_SetItemString(modules, "%s", %s)' % ( - fq_module_name, env.module_cname), self.pos)) + code.putln('if (!PyDict_GetItemString(modules, %s)) {' % fq_module_name_cstring) + code.putln(code.error_goto_if_neg('PyDict_SetItemString(modules, %s, %s)' % ( + fq_module_name_cstring, env.module_cname), self.pos)) code.putln("}") code.putln("}") code.putln("#endif") @@ -2746,7 +2782,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): code.put_decref_clear(cname, py_object_type, nanny=False, clear_before_decref=True) def generate_main_method(self, env, code): - module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__')) + module_is_main = self.is_main_module_flag_cname() if Options.embed == "main": wmain = "wmain" else: @@ -2759,8 +2795,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): main_method=Options.embed, wmain_method=wmain)) + def punycode_module_name(self, prefix, name): + # adapted from PEP483 + try: + name = '_' + name.encode('ascii').decode('ascii') + except UnicodeEncodeError: + name = 'U_' + name.encode('punycode').replace(b'-', b'_').decode('ascii') + return "%s%s" % (prefix, name) + def mod_init_func_cname(self, prefix, env): - return '%s_%s' % (prefix, env.module_name) + # from PEP483 + return self.punycode_module_name(prefix, env.module_name) def generate_pymoduledef_struct(self, env, code): if env.doc: @@ -2785,12 +2830,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): code.putln("{Py_mod_exec, (void*)%s}," % exec_func_cname) code.putln("{0, NULL}") code.putln("};") + if not env.module_name.isascii(): + code.putln("#else /* CYTHON_PEP489_MULTI_PHASE_INIT */") + code.putln('#error "Unicode module names are only supported with multi-phase init' + ' as per PEP489"') code.putln("#endif") code.putln("") code.putln("static struct PyModuleDef %s = {" % Naming.pymoduledef_cname) code.putln(" PyModuleDef_HEAD_INIT,") - code.putln(' "%s",' % env.module_name) + code.putln(' %s,' % env.module_name.as_c_string_literal()) code.putln(" %s, /* m_doc */" % doc) code.putln("#if CYTHON_PEP489_MULTI_PHASE_INIT") code.putln(" 0, /* m_size */") @@ -2825,9 +2874,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): code.putln("#else") code.putln("#if PY_MAJOR_VERSION < 3") code.putln( - '%s = Py_InitModule4("%s", %s, %s, 0, PYTHON_API_VERSION); Py_XINCREF(%s);' % ( + '%s = Py_InitModule4(%s, %s, %s, 0, PYTHON_API_VERSION); Py_XINCREF(%s);' % ( env.module_cname, - env.module_name, + env.module_name.as_c_string_literal(), env.method_table_cname, doc, env.module_cname)) @@ -3120,6 +3169,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): def generate_type_ready_code(self, entry, code): Nodes.CClassDefNode.generate_type_ready_code(entry, code) + def is_main_module_flag_cname(self): + full_module_name = self.full_module_name.replace('.', '__') + return self.punycode_module_name(Naming.module_is_main, full_module_name) + def generate_exttype_vtable_init_code(self, entry, code): # Generate code to initialise the C method table of an # extension type. |