summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmile Anclin <emile.anclin@logilab.fr>2010-12-15 10:34:07 +0100
committerEmile Anclin <emile.anclin@logilab.fr>2010-12-15 10:34:07 +0100
commita9e5aa99d683f8e7daebbc065bb363290cd9580e (patch)
tree89ded6d31cf1312533b634c4d0fea8ca27a194e5
parentfb09403ab49b207da3aa9a9b3f738af0085c031e (diff)
parent93234d5c28024cc72a0c22d80c3d6b65f3d84686 (diff)
downloadastroid-git-a9e5aa99d683f8e7daebbc065bb363290cd9580e.tar.gz
merge
-rw-r--r--builder.py59
-rw-r--r--manager.py14
-rw-r--r--test/unittest_builder.py40
3 files changed, 73 insertions, 40 deletions
diff --git a/builder.py b/builder.py
index 5c6e5710..8f82c65e 100644
--- a/builder.py
+++ b/builder.py
@@ -42,42 +42,45 @@ from _ast import PyCF_ONLY_AST
def parse(string):
return compile(string, "<string>", 'exec', PyCF_ONLY_AST)
-_ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
-
-def _guess_encoding(string):
- """get encoding from a python file as string or return None if not found
- """
- # check for UTF-8 byte-order mark
- if string.startswith('\xef\xbb\xbf'):
- return 'UTF-8'
- for line in string.split('\n', 2)[:2]:
- # check for encoding declaration
- match = _ENCODING_RGX.match(line)
- if match is not None:
- return match.group(1)
-
-def get_data(filename):
- """get data for parsing a file"""
- stream = open(filename, 'U')
- data = stream.read()
- encoding = _guess_encoding(data)
- return stream, encoding, data
-
if sys.version_info >= (3, 0):
from tokenize import detect_encoding
- def get_data(filename):
+ def open_source_file(filename):
byte_stream = open(filename, 'bU')
encoding = detect_encoding(byte_stream.readline)[0]
stream = open(filename, 'U', encoding=encoding)
try:
data = stream.read()
- except UnicodeError: # wrong encoding
+ except UnicodeError, uex: # wrong encodingg
# detect_encoding returns utf-8 if no encoding specified
msg = 'Wrong (%s) or no encoding specified' % encoding
raise ASTNGBuildingException(msg)
return stream, encoding, data
+else:
+ import re
+
+ _ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
+
+ def _guess_encoding(string):
+ """get encoding from a python file as string or return None if not found
+ """
+ # check for UTF-8 byte-order mark
+ if string.startswith('\xef\xbb\xbf'):
+ return 'UTF-8'
+ for line in string.split('\n', 2)[:2]:
+ # check for encoding declaration
+ match = _ENCODING_RGX.match(line)
+ if match is not None:
+ return match.group(1)
+
+ def open_source_file(filename):
+ """get data for parsing a file"""
+ stream = open(filename, 'U')
+ data = stream.read()
+ encoding = _guess_encoding(data)
+ return stream, encoding, data
+
# ast NG builder ##############################################################
MANAGER = ASTNGManager()
@@ -111,14 +114,14 @@ class ASTNGBuilder(InspectBuilder):
path is expected to be a python source file
"""
try:
- file_stream, encoding, data = get_data(path)
+ stream, encoding, data = open_source_file(path)
except IOError, exc:
msg = 'Unable to load file %r (%s)' % (path, exc)
raise ASTNGBuildingException(msg)
except SyntaxError, exc: # py3k encoding specification error
raise ASTNGBuildingException(exc)
- except LookupError, l_ex: # unknown encoding
- raise ASTNGBuildingException(l_ex)
+ except LookupError, exc: # unknown encoding
+ raise ASTNGBuildingException(exc)
# get module name if necessary, *before modifying sys.path*
if modname is None:
try:
@@ -127,12 +130,12 @@ class ASTNGBuilder(InspectBuilder):
modname = splitext(basename(path))[0]
# build astng representation
try:
- sys.path.insert(0, dirname(path))
+ sys.path.insert(0, dirname(path)) # XXX (syt) iirk
node = self.string_build(data, modname, path)
finally:
sys.path.pop(0)
node.file_encoding = encoding
- node.file_stream = file_stream
+ node.file_stream = stream
return node
def string_build(self, data, modname='', path=None):
diff --git a/manager.py b/manager.py
index 22b5f831..55f55c51 100644
--- a/manager.py
+++ b/manager.py
@@ -116,18 +116,8 @@ class ASTNGManager(OptionsProviderMixIn):
if modname in self._cache:
return self._cache[modname]
if source:
- try:
- from logilab.astng.builder import ASTNGBuilder
- return ASTNGBuilder(self).file_build(filepath, modname)
- except (SyntaxError, KeyboardInterrupt, SystemExit):
- raise
- except Exception, ex:
- if __debug__:
- print 'error while building astng for', filepath
- import traceback
- traceback.print_exc()
- msg = 'Unable to load module %s (%s)' % (modname, ex)
- raise ASTNGBuildingException, msg, sys.exc_info()[-1]
+ from logilab.astng.builder import ASTNGBuilder
+ return ASTNGBuilder(self).file_build(filepath, modname)
elif fallback and modname:
return self.astng_from_module_name(modname)
raise ASTNGBuildingException('unable to get astng for file %s' %
diff --git a/test/unittest_builder.py b/test/unittest_builder.py
index 89fba46b..2f086266 100644
--- a/test/unittest_builder.py
+++ b/test/unittest_builder.py
@@ -43,8 +43,10 @@ from logilab.astng.nodes import Module
from logilab.astng.bases import YES, BUILTINS_NAME
from logilab.astng.as_string import as_string
from logilab.astng.manager import ASTNGManager
+
MANAGER = ASTNGManager()
+
from unittest_inference import get_name_node
import data
@@ -682,5 +684,43 @@ def func():
self.assertEqual(chain.value, 'None')
+guess_encoding = builder._guess_encoding
+
+class TestGuessEncoding(TestCase):
+
+ def testEmacs(self):
+ e = guess_encoding('# -*- coding: UTF-8 -*-')
+ self.failUnlessEqual(e, 'UTF-8')
+ e = guess_encoding('# -*- coding:UTF-8 -*-')
+ self.failUnlessEqual(e, 'UTF-8')
+ e = guess_encoding('''
+ ### -*- coding: ISO-8859-1 -*-
+ ''')
+ self.failUnlessEqual(e, 'ISO-8859-1')
+ e = guess_encoding('''
+
+ ### -*- coding: ISO-8859-1 -*-
+ ''')
+ self.failUnlessEqual(e, None)
+
+ def testVim(self):
+ e = guess_encoding('# vim:fileencoding=UTF-8')
+ self.failUnlessEqual(e, 'UTF-8')
+ e = guess_encoding('''
+ ### vim:fileencoding=ISO-8859-1
+ ''')
+ self.failUnlessEqual(e, 'ISO-8859-1')
+ e = guess_encoding('''
+
+ ### vim:fileencoding= ISO-8859-1
+ ''')
+ self.failUnlessEqual(e, None)
+
+ def testUTF8(self):
+ e = guess_encoding('\xef\xbb\xbf any UTF-8 data')
+ self.failUnlessEqual(e, 'UTF-8')
+ e = guess_encoding(' any UTF-8 data \xef\xbb\xbf')
+ self.failUnlessEqual(e, None)
+
if __name__ == '__main__':
unittest_main()