summaryrefslogtreecommitdiff
path: root/pygments/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r--pygments/lexer.py41
1 files changed, 31 insertions, 10 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 0ede7927..f3543d41 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -8,17 +8,20 @@
:copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
-import re, itertools
+
+import re
+import itertools
from pygments.filter import apply_filters, Filter
from pygments.filters import get_filter_by_name
from pygments.token import Error, Text, Other, _TokenType
from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
- make_analysator, text_type, add_metaclass, iteritems
-
+ make_analysator, text_type, add_metaclass, iteritems
+from pygments.regexopt import regex_opt
__all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
- 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this', 'default']
+ 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this',
+ 'default', 'words']
_encoding_map = [(b'\xef\xbb\xbf', 'utf-8'),
@@ -160,7 +163,7 @@ class Lexer(object):
break
# no BOM found, so use chardet
if decoded is None:
- enc = chardet.detect(text[:1024]) # Guess using first 1KB
+ enc = chardet.detect(text[:1024]) # Guess using first 1KB
decoded = text.decode(enc.get('encoding') or 'utf-8',
'replace')
text = decoded
@@ -237,7 +240,7 @@ class DelegatingLexer(Lexer):
self.root_lexer.get_tokens_unprocessed(buffered))
-#-------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------
# RegexLexer and ExtendedRegexLexer
#
@@ -387,12 +390,27 @@ class default:
"""
Indicates a state or state action (e.g. #pop) to apply.
For example default('#pop') is equivalent to ('', Token, '#pop')
- Note that state tuples may be used as well
+ Note that state tuples may be used as well.
+
+ .. versionadded:: 2.0
"""
def __init__(self, state):
self.state = state
+class words:
+ """
+ Indicates a list of literal words that is transformed into an optimized
+ regex that matches any of the words.
+
+ .. versionadded:: 2.0
+ """
+ def __init__(self, words, prefix='', suffix=''):
+ self.words = words
+ self.prefix = prefix
+ self.suffix = suffix
+
+
class RegexLexerMeta(LexerMeta):
"""
Metaclass for RegexLexer, creates the self._tokens attribute from
@@ -401,12 +419,15 @@ class RegexLexerMeta(LexerMeta):
def _process_regex(cls, regex, rflags):
"""Preprocess the regular expression component of a token definition."""
+ if isinstance(regex, words):
+ return regex_opt(regex.words, rflags, prefix=regex.prefix,
+ suffix=regex.suffix).match
return re.compile(regex, rflags).match
def _process_token(cls, token):
"""Preprocess the token component of a token definition."""
assert type(token) is _TokenType or callable(token), \
- 'token type must be simple type or callable, not %r' % (token,)
+ 'token type must be simple type or callable, not %r' % (token,)
return token
def _process_new_state(cls, new_state, unprocessed, processed):
@@ -439,7 +460,7 @@ class RegexLexerMeta(LexerMeta):
for istate in new_state:
assert (istate in unprocessed or
istate in ('#pop', '#push')), \
- 'unknown new state ' + istate
+ 'unknown new state ' + istate
return new_state
else:
assert False, 'unknown new state def %r' % new_state
@@ -645,7 +666,7 @@ class LexerContext(object):
def __init__(self, text, pos, stack=None, end=None):
self.text = text
self.pos = pos
- self.end = end or len(text) # end=0 not supported ;-)
+ self.end = end or len(text) # end=0 not supported ;-)
self.stack = stack or ['root']
def __repr__(self):