summaryrefslogtreecommitdiff
path: root/checkers/misc.py
diff options
context:
space:
mode:
authorroot <devnull@localhost>2006-04-26 10:48:09 +0000
committerroot <devnull@localhost>2006-04-26 10:48:09 +0000
commiteea76f1da01a33dec2afc42119e001e4350aaea2 (patch)
tree3bb03a16daa8c780bf60c622dc288eb01cfca145 /checkers/misc.py
downloadpylint-eea76f1da01a33dec2afc42119e001e4350aaea2.tar.gz
forget the past.
forget the past.
Diffstat (limited to 'checkers/misc.py')
-rw-r--r--checkers/misc.py127
1 files changed, 127 insertions, 0 deletions
diff --git a/checkers/misc.py b/checkers/misc.py
new file mode 100644
index 0000000..7b59b95
--- /dev/null
+++ b/checkers/misc.py
@@ -0,0 +1,127 @@
+# pylint: disable-msg=W0511
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+""" Copyright (c) 2000-2003 LOGILAB S.A. (Paris, FRANCE).
+ http://www.logilab.fr/ -- mailto:contact@logilab.fr
+
+Check source code is ascii only or has an encoding declaration (PEP 263)
+"""
+
+__revision__ = '$Id: misc.py,v 1.19 2005-11-02 09:21:47 syt Exp $'
+
+import re
+
+from pylint.interfaces import IRawChecker
+from pylint.checkers import BaseChecker
+
+def is_ascii(string):
+ """return true if non ascii characters are detected in the given string
+ """
+ if string:
+ return max([ord(char) for char in string]) < 128
+ return True
+
+# regexp matching both emacs and vim declaration
+ENCODING_RGX = re.compile("[^#]*#*.*coding[:=]\s*([^\s]+)")
+
+def guess_encoding(string):
+ """try to guess encoding from a python file as string
+ return None if not found
+ """
+ assert type(string) is type(''), type(string)
+ # check for UTF-8 byte-order mark
+ if string.startswith('\xef\xbb\xbf'):
+ return 'UTF-8'
+ first_lines = string.split('\n', 2)[:2]
+ for line in first_lines:
+ # check for emacs / vim encoding declaration
+ match = ENCODING_RGX.match(line)
+ if match is not None:
+ return match.group(1)
+
+
+MSGS = {
+ 'E0501': ('Non ascii characters found but no encoding specified (PEP 263)',
+ 'Used when some non ascii characters are detected but now \
+ encoding is specified, as explicited in the PEP 263.'),
+ 'E0502': ('Wrong encoding specified (%s)',
+ 'Used when a known encoding is specified but the file doesn\'t \
+ seem to be actually in this encoding.'),
+ 'E0503': ('Unknown encoding specified (%s)',
+ 'Used when an encoding is specified, but it\'s unknown to Python.'
+ ),
+
+ 'W0511': ('%s',
+ 'Used when a warning note as FIXME or XXX is detected.'),
+ }
+
+class EncodingChecker(BaseChecker):
+ """checks for:
+ * warning notes in the code like FIXME, XXX
+ * PEP 263: source code with non ascii character but no encoding declaration
+ """
+ __implements__ = IRawChecker
+
+ # configuration section name
+ name = 'miscellaneous'
+ msgs = MSGS
+
+ options = (('notes',
+ {'type' : 'csv', 'metavar' : '<comma separated values>',
+ 'default' : ('FIXME', 'XXX', 'TODO'),
+ 'help' : 'List of note tags to take in consideration, \
+separated by a comma. Default to FIXME, XXX, TODO'
+ }),
+ )
+
+ def __init__(self, linter=None):
+ BaseChecker.__init__(self, linter)
+
+ def process_module(self, stream):
+ """inspect the source file to found encoding problem or fixmes like
+ notes
+ """
+ # source encoding
+ data = stream.read()
+ if not is_ascii(data):
+ encoding = guess_encoding(data)
+ if encoding is None:
+ self.add_message('E0501', line=1)
+ else:
+ try:
+ unicode(data, encoding)
+ except UnicodeError:
+ self.add_message('E0502', args=encoding, line=1)
+ except LookupError:
+ self.add_message('E0503', args=encoding, line=1)
+ del data
+ # warning notes in the code
+ stream.seek(0)
+ notes = []
+ for note in self.config.notes:
+ notes.append(re.compile(note))
+ linenum = 1
+ for line in stream.readlines():
+ for note in notes:
+ match = note.search(line)
+ if match:
+ self.add_message('W0511', args=line[match.start():-1],
+ line=linenum)
+ break
+ linenum += 1
+
+
+
+def register(linter):
+ """required method to auto register this checker"""
+ linter.register_checker(EncodingChecker(linter))