merge

author: Emile Anclin <emile.anclin@logilab.fr> 2010-12-15 10:34:07 +0100
committer: Emile Anclin <emile.anclin@logilab.fr> 2010-12-15 10:34:07 +0100
commit: a9e5aa99d683f8e7daebbc065bb363290cd9580e (patch)
tree: 89ded6d31cf1312533b634c4d0fea8ca27a194e5
parent: fb09403ab49b207da3aa9a9b3f738af0085c031e (diff)
parent: 93234d5c28024cc72a0c22d80c3d6b65f3d84686 (diff)
download: astroid-git-a9e5aa99d683f8e7daebbc065bb363290cd9580e.tar.gz
3 files changed, 73 insertions, 40 deletions
diff --git a/builder.py b/builder.py
index 5c6e5710..8f82c65e 100644
--- a/builder.py
+++ b/builder.py
@@ -42,42 +42,45 @@ from _ast import PyCF_ONLY_AST
 def parse(string):
     return compile(string, "<string>", 'exec', PyCF_ONLY_AST)
 
-_ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
-
-def _guess_encoding(string):
-    """get encoding from a python file as string or return None if not found
-    """
-    # check for UTF-8 byte-order mark
-    if string.startswith('\xef\xbb\xbf'):
-        return 'UTF-8'
-    for line in string.split('\n', 2)[:2]:
-        # check for encoding declaration
-        match = _ENCODING_RGX.match(line)
-        if match is not None:
-            return match.group(1)
-
-def get_data(filename):
-    """get data for parsing a file"""
-    stream = open(filename, 'U')
-    data = stream.read()
-    encoding = _guess_encoding(data)
-    return stream, encoding, data
-
 if sys.version_info >= (3, 0):
     from tokenize import detect_encoding
 
-    def get_data(filename):
+    def open_source_file(filename):
         byte_stream = open(filename, 'bU')
         encoding = detect_encoding(byte_stream.readline)[0]
         stream = open(filename, 'U', encoding=encoding)
         try:
             data = stream.read()
-        except UnicodeError: # wrong encoding
+        except UnicodeError, uex: # wrong encodingg
             # detect_encoding returns utf-8 if no encoding specified
             msg = 'Wrong (%s) or no encoding specified' % encoding
             raise ASTNGBuildingException(msg)
         return stream, encoding, data
 
+else:
+    import re
+
+    _ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
+
+    def _guess_encoding(string):
+        """get encoding from a python file as string or return None if not found
+        """
+        # check for UTF-8 byte-order mark
+        if string.startswith('\xef\xbb\xbf'):
+            return 'UTF-8'
+        for line in string.split('\n', 2)[:2]:
+            # check for encoding declaration
+            match = _ENCODING_RGX.match(line)
+            if match is not None:
+                return match.group(1)
+
+    def open_source_file(filename):
+        """get data for parsing a file"""
+        stream = open(filename, 'U')
+        data = stream.read()
+        encoding = _guess_encoding(data)
+        return stream, encoding, data
+
 # ast NG builder ##############################################################
 
 MANAGER = ASTNGManager()
@@ -111,14 +114,14 @@ class ASTNGBuilder(InspectBuilder):
         path is expected to be a python source file
         """
         try:
-            file_stream, encoding, data = get_data(path)
+            stream, encoding, data = open_source_file(path)
         except IOError, exc:
             msg = 'Unable to load file %r (%s)' % (path, exc)
             raise ASTNGBuildingException(msg)
         except SyntaxError, exc: # py3k encoding specification error
             raise ASTNGBuildingException(exc)
-        except LookupError, l_ex: # unknown encoding
-            raise ASTNGBuildingException(l_ex)
+        except LookupError, exc: # unknown encoding
+            raise ASTNGBuildingException(exc)
         # get module name if necessary, *before modifying sys.path*
         if modname is None:
             try:
@@ -127,12 +130,12 @@ class ASTNGBuilder(InspectBuilder):
                 modname = splitext(basename(path))[0]
         # build astng representation
         try:
-            sys.path.insert(0, dirname(path))
+            sys.path.insert(0, dirname(path)) # XXX (syt) iirk
             node = self.string_build(data, modname, path)
         finally:
             sys.path.pop(0)
         node.file_encoding = encoding
-        node.file_stream = file_stream
+        node.file_stream = stream
         return node
 
     def string_build(self, data, modname='', path=None):
diff --git a/manager.py b/manager.py
index 22b5f831..55f55c51 100644
--- a/manager.py
+++ b/manager.py
@@ -116,18 +116,8 @@ class ASTNGManager(OptionsProviderMixIn):
         if modname in self._cache:
             return self._cache[modname]
         if source:
-            try:
-                from logilab.astng.builder import ASTNGBuilder
-                return ASTNGBuilder(self).file_build(filepath, modname)
-            except (SyntaxError, KeyboardInterrupt, SystemExit):
-                raise
-            except Exception, ex:
-                if __debug__:
-                    print 'error while building astng for', filepath
-                    import traceback
-                    traceback.print_exc()
-                msg = 'Unable to load module %s (%s)' % (modname, ex)
-                raise ASTNGBuildingException, msg, sys.exc_info()[-1]
+            from logilab.astng.builder import ASTNGBuilder
+            return ASTNGBuilder(self).file_build(filepath, modname)
         elif fallback and modname:
             return self.astng_from_module_name(modname)
         raise ASTNGBuildingException('unable to get astng for file %s' %
diff --git a/test/unittest_builder.py b/test/unittest_builder.py
index 89fba46b..2f086266 100644
--- a/test/unittest_builder.py
+++ b/test/unittest_builder.py
@@ -43,8 +43,10 @@ from logilab.astng.nodes import Module
 from logilab.astng.bases import YES, BUILTINS_NAME
 from logilab.astng.as_string import as_string
 from logilab.astng.manager import ASTNGManager
+
 MANAGER = ASTNGManager()
 
+
 from unittest_inference import get_name_node
 
 import data
@@ -682,5 +684,43 @@ def func():
         self.assertEqual(chain.value, 'None')
 
 
+guess_encoding = builder._guess_encoding
+
+class TestGuessEncoding(TestCase):
+
+    def testEmacs(self):
+        e = guess_encoding('# -*- coding: UTF-8  -*-')
+        self.failUnlessEqual(e, 'UTF-8')
+        e = guess_encoding('# -*- coding:UTF-8 -*-')
+        self.failUnlessEqual(e, 'UTF-8')
+        e = guess_encoding('''
+        ### -*- coding: ISO-8859-1  -*-
+        ''')
+        self.failUnlessEqual(e, 'ISO-8859-1')
+        e = guess_encoding('''
+
+        ### -*- coding: ISO-8859-1  -*-
+        ''')
+        self.failUnlessEqual(e, None)
+
+    def testVim(self):
+        e = guess_encoding('# vim:fileencoding=UTF-8')
+        self.failUnlessEqual(e, 'UTF-8')
+        e = guess_encoding('''
+        ### vim:fileencoding=ISO-8859-1
+        ''')
+        self.failUnlessEqual(e, 'ISO-8859-1')
+        e = guess_encoding('''
+
+        ### vim:fileencoding= ISO-8859-1
+        ''')
+        self.failUnlessEqual(e, None)
+
+    def testUTF8(self):
+        e = guess_encoding('\xef\xbb\xbf any UTF-8 data')
+        self.failUnlessEqual(e, 'UTF-8')
+        e = guess_encoding(' any UTF-8 data \xef\xbb\xbf')
+        self.failUnlessEqual(e, None)
+
 if __name__ == '__main__':
     unittest_main()
author	Emile Anclin <emile.anclin@logilab.fr>	2010-12-15 10:34:07 +0100
committer	Emile Anclin <emile.anclin@logilab.fr>	2010-12-15 10:34:07 +0100
commit	a9e5aa99d683f8e7daebbc065bb363290cd9580e (patch)
tree	89ded6d31cf1312533b634c4d0fea8ca27a194e5
parent	fb09403ab49b207da3aa9a9b3f738af0085c031e (diff)
parent	93234d5c28024cc72a0c22d80c3d6b65f3d84686 (diff)
download	astroid-git-a9e5aa99d683f8e7daebbc065bb363290cd9580e.tar.gz