Merged in alastairh/pygments (pull request #141: Improve and unify C family language lexers)

author: Georg Brandl <georg@python.org> 2013-01-09 13:15:36 +0100
committer: Georg Brandl <georg@python.org> 2013-01-09 13:15:36 +0100
commit: 1b551e169d6c29d49c90baa62ea8886a7e8be46f (patch)
tree: 4c57f637cf7d23a1a52f058b1dba36d32d1ed73d /pygments/lexer.py
parent: 01ff62d16bcb567d2c7becd241e2b5bfb1bd3a93 (diff)
parent: 73a4b067b9ec07088e7c2e77e3f3c040aedcd50c (diff)
download: pygments-1b551e169d6c29d49c90baa62ea8886a7e8be46f.tar.gz
1 files changed, 18 insertions, 17 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index b8cf69fb..53539df7 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -5,7 +5,7 @@
 
     Base lexer classes.
 
-    :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 import re, itertools
@@ -72,13 +72,13 @@ class Lexer(object):
     #: Shortcuts for the lexer
     aliases = []
 
-    #: fn match rules
+    #: File name globs
     filenames = []
 
-    #: fn alias filenames
+    #: Secondary file name globs
     alias_filenames = []
 
-    #: mime types
+    #: MIME types
     mimetypes = []
 
     #: Priority, should multiple lexers match and no content is provided
@@ -166,6 +166,10 @@ class Lexer(object):
                 text = decoded
             else:
                 text = text.decode(self.encoding)
+        else:
+            if text.startswith(u'\ufeff'):
+                text = text[len(u'\ufeff'):]
+
         # text now *is* a unicode string
         text = text.replace('\r\n', '\n')
         text = text.replace('\r', '\n')
@@ -240,6 +244,7 @@ class include(str):
     """
     pass
 
+
 class _inherit(object):
     """
     Indicates the a state should inherit from its superclass.
@@ -249,6 +254,7 @@ class _inherit(object):
 
 inherit = _inherit()
 
+
 class combined(tuple):
     """
     Indicates a state combined from multiple states.
@@ -472,15 +478,15 @@ class RegexLexerMeta(LexerMeta):
 
     def get_tokendefs(cls):
         """
-        Merge tokens from superclasses in MRO order, returning a single
-        tokendef dictionary.
+        Merge tokens from superclasses in MRO order, returning a single tokendef
+        dictionary.
 
         Any state that is not defined by a subclass will be inherited
         automatically.  States that *are* defined by subclasses will, by
-        default, override that state in the superclass.  If a subclass
-        wishes to inherit definitions from a superclass, it can use the
-        special value "inherit", which will cause the superclass' state
-        definition to be included at that point in the state.
+        default, override that state in the superclass.  If a subclass wishes to
+        inherit definitions from a superclass, it can use the special value
+        "inherit", which will cause the superclass' state definition to be
+        included at that point in the state.
         """
         tokens = {}
         inheritable = {}
@@ -491,12 +497,10 @@ class RegexLexerMeta(LexerMeta):
                 curitems = tokens.get(state)
                 if curitems is None:
                     tokens[state] = items
-
                     try:
                         inherit_ndx = items.index(inherit)
                     except ValueError:
                         continue
-
                     inheritable[state] = inherit_ndx
                     continue
 
@@ -506,13 +510,11 @@ class RegexLexerMeta(LexerMeta):
 
                 # Replace the "inherit" value with the items
                 curitems[inherit_ndx:inherit_ndx+1] = items
-
                 try:
                     new_inh_ndx = items.index(inherit)
                 except ValueError:
-                    new_inh_ndx = -1
-
-                if new_inh_ndx != -1:
+                    pass
+                else:
                     inheritable[state] = inherit_ndx + new_inh_ndx
 
         return tokens
@@ -755,4 +757,3 @@ def do_insertions(insertions, tokens):
         except StopIteration:
             insleft = False
             break  # not strictly necessary
-
author	Georg Brandl <georg@python.org>	2013-01-09 13:15:36 +0100
committer	Georg Brandl <georg@python.org>	2013-01-09 13:15:36 +0100
commit	1b551e169d6c29d49c90baa62ea8886a7e8be46f (patch)
tree	4c57f637cf7d23a1a52f058b1dba36d32d1ed73d /pygments/lexer.py
parent	01ff62d16bcb567d2c7becd241e2b5bfb1bd3a93 (diff)
parent	73a4b067b9ec07088e7c2e77e3f3c040aedcd50c (diff)
download	pygments-1b551e169d6c29d49c90baa62ea8886a7e8be46f.tar.gz