diff options
author | Emile Anclin <emile.anclin@logilab.fr> | 2010-12-15 10:34:07 +0100 |
---|---|---|
committer | Emile Anclin <emile.anclin@logilab.fr> | 2010-12-15 10:34:07 +0100 |
commit | a9e5aa99d683f8e7daebbc065bb363290cd9580e (patch) | |
tree | 89ded6d31cf1312533b634c4d0fea8ca27a194e5 | |
parent | fb09403ab49b207da3aa9a9b3f738af0085c031e (diff) | |
parent | 93234d5c28024cc72a0c22d80c3d6b65f3d84686 (diff) | |
download | astroid-git-a9e5aa99d683f8e7daebbc065bb363290cd9580e.tar.gz |
merge
-rw-r--r-- | builder.py | 59 | ||||
-rw-r--r-- | manager.py | 14 | ||||
-rw-r--r-- | test/unittest_builder.py | 40 |
3 files changed, 73 insertions, 40 deletions
@@ -42,42 +42,45 @@ from _ast import PyCF_ONLY_AST def parse(string): return compile(string, "<string>", 'exec', PyCF_ONLY_AST) -_ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)") - -def _guess_encoding(string): - """get encoding from a python file as string or return None if not found - """ - # check for UTF-8 byte-order mark - if string.startswith('\xef\xbb\xbf'): - return 'UTF-8' - for line in string.split('\n', 2)[:2]: - # check for encoding declaration - match = _ENCODING_RGX.match(line) - if match is not None: - return match.group(1) - -def get_data(filename): - """get data for parsing a file""" - stream = open(filename, 'U') - data = stream.read() - encoding = _guess_encoding(data) - return stream, encoding, data - if sys.version_info >= (3, 0): from tokenize import detect_encoding - def get_data(filename): + def open_source_file(filename): byte_stream = open(filename, 'bU') encoding = detect_encoding(byte_stream.readline)[0] stream = open(filename, 'U', encoding=encoding) try: data = stream.read() - except UnicodeError: # wrong encoding + except UnicodeError, uex: # wrong encodingg # detect_encoding returns utf-8 if no encoding specified msg = 'Wrong (%s) or no encoding specified' % encoding raise ASTNGBuildingException(msg) return stream, encoding, data +else: + import re + + _ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)") + + def _guess_encoding(string): + """get encoding from a python file as string or return None if not found + """ + # check for UTF-8 byte-order mark + if string.startswith('\xef\xbb\xbf'): + return 'UTF-8' + for line in string.split('\n', 2)[:2]: + # check for encoding declaration + match = _ENCODING_RGX.match(line) + if match is not None: + return match.group(1) + + def open_source_file(filename): + """get data for parsing a file""" + stream = open(filename, 'U') + data = stream.read() + encoding = _guess_encoding(data) + return stream, encoding, data + # ast NG builder ############################################################## MANAGER = ASTNGManager() @@ -111,14 +114,14 @@ class ASTNGBuilder(InspectBuilder): path is expected to be a python source file """ try: - file_stream, encoding, data = get_data(path) + stream, encoding, data = open_source_file(path) except IOError, exc: msg = 'Unable to load file %r (%s)' % (path, exc) raise ASTNGBuildingException(msg) except SyntaxError, exc: # py3k encoding specification error raise ASTNGBuildingException(exc) - except LookupError, l_ex: # unknown encoding - raise ASTNGBuildingException(l_ex) + except LookupError, exc: # unknown encoding + raise ASTNGBuildingException(exc) # get module name if necessary, *before modifying sys.path* if modname is None: try: @@ -127,12 +130,12 @@ class ASTNGBuilder(InspectBuilder): modname = splitext(basename(path))[0] # build astng representation try: - sys.path.insert(0, dirname(path)) + sys.path.insert(0, dirname(path)) # XXX (syt) iirk node = self.string_build(data, modname, path) finally: sys.path.pop(0) node.file_encoding = encoding - node.file_stream = file_stream + node.file_stream = stream return node def string_build(self, data, modname='', path=None): @@ -116,18 +116,8 @@ class ASTNGManager(OptionsProviderMixIn): if modname in self._cache: return self._cache[modname] if source: - try: - from logilab.astng.builder import ASTNGBuilder - return ASTNGBuilder(self).file_build(filepath, modname) - except (SyntaxError, KeyboardInterrupt, SystemExit): - raise - except Exception, ex: - if __debug__: - print 'error while building astng for', filepath - import traceback - traceback.print_exc() - msg = 'Unable to load module %s (%s)' % (modname, ex) - raise ASTNGBuildingException, msg, sys.exc_info()[-1] + from logilab.astng.builder import ASTNGBuilder + return ASTNGBuilder(self).file_build(filepath, modname) elif fallback and modname: return self.astng_from_module_name(modname) raise ASTNGBuildingException('unable to get astng for file %s' % diff --git a/test/unittest_builder.py b/test/unittest_builder.py index 89fba46b..2f086266 100644 --- a/test/unittest_builder.py +++ b/test/unittest_builder.py @@ -43,8 +43,10 @@ from logilab.astng.nodes import Module from logilab.astng.bases import YES, BUILTINS_NAME from logilab.astng.as_string import as_string from logilab.astng.manager import ASTNGManager + MANAGER = ASTNGManager() + from unittest_inference import get_name_node import data @@ -682,5 +684,43 @@ def func(): self.assertEqual(chain.value, 'None') +guess_encoding = builder._guess_encoding + +class TestGuessEncoding(TestCase): + + def testEmacs(self): + e = guess_encoding('# -*- coding: UTF-8 -*-') + self.failUnlessEqual(e, 'UTF-8') + e = guess_encoding('# -*- coding:UTF-8 -*-') + self.failUnlessEqual(e, 'UTF-8') + e = guess_encoding(''' + ### -*- coding: ISO-8859-1 -*- + ''') + self.failUnlessEqual(e, 'ISO-8859-1') + e = guess_encoding(''' + + ### -*- coding: ISO-8859-1 -*- + ''') + self.failUnlessEqual(e, None) + + def testVim(self): + e = guess_encoding('# vim:fileencoding=UTF-8') + self.failUnlessEqual(e, 'UTF-8') + e = guess_encoding(''' + ### vim:fileencoding=ISO-8859-1 + ''') + self.failUnlessEqual(e, 'ISO-8859-1') + e = guess_encoding(''' + + ### vim:fileencoding= ISO-8859-1 + ''') + self.failUnlessEqual(e, None) + + def testUTF8(self): + e = guess_encoding('\xef\xbb\xbf any UTF-8 data') + self.failUnlessEqual(e, 'UTF-8') + e = guess_encoding(' any UTF-8 data \xef\xbb\xbf') + self.failUnlessEqual(e, None) + if __name__ == '__main__': unittest_main() |