diff options
author | Georg Brandl <georg@python.org> | 2013-01-09 13:15:36 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2013-01-09 13:15:36 +0100 |
commit | 1b551e169d6c29d49c90baa62ea8886a7e8be46f (patch) | |
tree | 4c57f637cf7d23a1a52f058b1dba36d32d1ed73d /pygments/lexer.py | |
parent | 01ff62d16bcb567d2c7becd241e2b5bfb1bd3a93 (diff) | |
parent | 73a4b067b9ec07088e7c2e77e3f3c040aedcd50c (diff) | |
download | pygments-1b551e169d6c29d49c90baa62ea8886a7e8be46f.tar.gz |
Merged in alastairh/pygments (pull request #141: Improve and unify C family language lexers)
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r-- | pygments/lexer.py | 35 |
1 files changed, 18 insertions, 17 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py index b8cf69fb..53539df7 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -5,7 +5,7 @@ Base lexer classes. - :copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re, itertools @@ -72,13 +72,13 @@ class Lexer(object): #: Shortcuts for the lexer aliases = [] - #: fn match rules + #: File name globs filenames = [] - #: fn alias filenames + #: Secondary file name globs alias_filenames = [] - #: mime types + #: MIME types mimetypes = [] #: Priority, should multiple lexers match and no content is provided @@ -166,6 +166,10 @@ class Lexer(object): text = decoded else: text = text.decode(self.encoding) + else: + if text.startswith(u'\ufeff'): + text = text[len(u'\ufeff'):] + # text now *is* a unicode string text = text.replace('\r\n', '\n') text = text.replace('\r', '\n') @@ -240,6 +244,7 @@ class include(str): """ pass + class _inherit(object): """ Indicates the a state should inherit from its superclass. @@ -249,6 +254,7 @@ class _inherit(object): inherit = _inherit() + class combined(tuple): """ Indicates a state combined from multiple states. @@ -472,15 +478,15 @@ class RegexLexerMeta(LexerMeta): def get_tokendefs(cls): """ - Merge tokens from superclasses in MRO order, returning a single - tokendef dictionary. + Merge tokens from superclasses in MRO order, returning a single tokendef + dictionary. Any state that is not defined by a subclass will be inherited automatically. States that *are* defined by subclasses will, by - default, override that state in the superclass. If a subclass - wishes to inherit definitions from a superclass, it can use the - special value "inherit", which will cause the superclass' state - definition to be included at that point in the state. + default, override that state in the superclass. If a subclass wishes to + inherit definitions from a superclass, it can use the special value + "inherit", which will cause the superclass' state definition to be + included at that point in the state. """ tokens = {} inheritable = {} @@ -491,12 +497,10 @@ class RegexLexerMeta(LexerMeta): curitems = tokens.get(state) if curitems is None: tokens[state] = items - try: inherit_ndx = items.index(inherit) except ValueError: continue - inheritable[state] = inherit_ndx continue @@ -506,13 +510,11 @@ class RegexLexerMeta(LexerMeta): # Replace the "inherit" value with the items curitems[inherit_ndx:inherit_ndx+1] = items - try: new_inh_ndx = items.index(inherit) except ValueError: - new_inh_ndx = -1 - - if new_inh_ndx != -1: + pass + else: inheritable[state] = inherit_ndx + new_inh_ndx return tokens @@ -755,4 +757,3 @@ def do_insertions(insertions, tokens): except StopIteration: insleft = False break # not strictly necessary - |