diff options
author | Georg Brandl <georg@python.org> | 2014-09-16 14:06:54 +0200 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-09-16 14:06:54 +0200 |
commit | 4ebcf72d1a077c29d94a0cefce3f068ce41a37eb (patch) | |
tree | 887c6378b170a1fb00a252d929738cb227f99967 /pygments/lexer.py | |
parent | 5e5586a698e82c7b596ab2e47f035d2aa941b400 (diff) | |
download | pygments-4ebcf72d1a077c29d94a0cefce3f068ce41a37eb.tar.gz |
Add module to optimize regexes that consist of a long |-separated list of literals.
Diffstat (limited to 'pygments/lexer.py')
-rw-r--r-- | pygments/lexer.py | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py index 5214d43e..f3543d41 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -17,11 +17,11 @@ from pygments.filters import get_filter_by_name from pygments.token import Error, Text, Other, _TokenType from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ make_analysator, text_type, add_metaclass, iteritems - +from pygments.regexopt import regex_opt __all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer', 'LexerContext', 'include', 'inherit', 'bygroups', 'using', 'this', - 'default'] + 'default', 'words'] _encoding_map = [(b'\xef\xbb\xbf', 'utf-8'), @@ -390,12 +390,27 @@ class default: """ Indicates a state or state action (e.g. #pop) to apply. For example default('#pop') is equivalent to ('', Token, '#pop') - Note that state tuples may be used as well + Note that state tuples may be used as well. + + .. versionadded:: 2.0 """ def __init__(self, state): self.state = state +class words: + """ + Indicates a list of literal words that is transformed into an optimized + regex that matches any of the words. + + .. versionadded:: 2.0 + """ + def __init__(self, words, prefix='', suffix=''): + self.words = words + self.prefix = prefix + self.suffix = suffix + + class RegexLexerMeta(LexerMeta): """ Metaclass for RegexLexer, creates the self._tokens attribute from @@ -404,6 +419,9 @@ class RegexLexerMeta(LexerMeta): def _process_regex(cls, regex, rflags): """Preprocess the regular expression component of a token definition.""" + if isinstance(regex, words): + return regex_opt(regex.words, rflags, prefix=regex.prefix, + suffix=regex.suffix).match return re.compile(regex, rflags).match def _process_token(cls, token): |