diff options
author | Emile Anclin <emile.anclin@logilab.fr> | 2010-11-22 15:56:26 +0100 |
---|---|---|
committer | Emile Anclin <emile.anclin@logilab.fr> | 2010-11-22 15:56:26 +0100 |
commit | 0bd84938bb037b70cee901157880df4bf9dd7f87 (patch) | |
tree | afa1fa5ac8116ed3d5996fe5329061cffdc9452c /builder.py | |
parent | 5d3f2a5255224c15c316d0854fd9db2b41a201a0 (diff) | |
download | astroid-git-0bd84938bb037b70cee901157880df4bf9dd7f87.tar.gz |
py3k: handle file encoding and stream definition in builder
In Py3k, We can't do "data = open(path, 'U').read()" anymore since we need
to know the encoding to read the file (UnicodeError); instead we have
to first guess the encoding by reading the file as a byte stream.
We then define
+ node.file_encoding = encoding
+ node.file_stream = file_stream
to be used by the Pylint checkers
Diffstat (limited to 'builder.py')
-rw-r--r-- | builder.py | 53 |
1 files changed, 49 insertions, 4 deletions
@@ -47,6 +47,45 @@ from _ast import PyCF_ONLY_AST def parse(string): return compile(string, "<string>", 'exec', PyCF_ONLY_AST) +if sys.version_info >= (3, 0): + from tokenize import detect_encoding + + def open_source_file(filename): + byte_stream = open(filename, 'bU') + encoding = detect_encoding(byte_stream.readline)[0] + stream = open(filename, 'U', encoding=encoding) + try: + data = stream.read() + except UnicodeError, uex: # wrong encodingg + # detect_encoding returns utf-8 if no encoding specified + msg = 'Wrong (%s) or no encoding specified' % encoding + raise ASTNGBuildingException(msg) + return stream, encoding, data + +else: + import re + + _ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)") + + def _guess_encoding(string): + """get encoding from a python file as string or return None if not found + """ + # check for UTF-8 byte-order mark + if string.startswith('\xef\xbb\xbf'): + return 'UTF-8' + for line in string.split('\n', 2)[:2]: + # check for encoding declaration + match = _ENCODING_RGX.match(line) + if match is not None: + return match.group(1) + + def open_source_file(filename): + """get data for parsing a file""" + stream = open(filename, 'U') + data = stream.read() + encoding = _guess_encoding(data) + return stream, encoding, data + # ast NG builder ############################################################## MANAGER = ASTNGManager() @@ -101,10 +140,14 @@ class ASTNGBuilder: path is expected to be a python source file """ try: - data = open(path, 'U').read() - except IOError, ex: - msg = 'Unable to load file %r (%s)' % (path, ex) + stream, encoding, data = open_source_file(path) + except IOError, exc: + msg = 'Unable to load file %r (%s)' % (path, exc) raise ASTNGBuildingException(msg) + except SyntaxError, exc: # py3k encoding specification error + raise ASTNGBuildingException(exc) + except LookupError, exc: # unknown encoding + raise ASTNGBuildingException(exc) # get module name if necessary, *before modifying sys.path* if modname is None: try: @@ -113,10 +156,12 @@ class ASTNGBuilder: modname = splitext(basename(path))[0] # build astng representation try: - sys.path.insert(0, dirname(path)) + sys.path.insert(0, dirname(path)) # XXX (syt) iirk node = self.string_build(data, modname, path) finally: sys.path.pop(0) + node.file_encoding = encoding + node.file_stream = stream return node def string_build(self, data, modname='', path=None): |