diff options
-rw-r--r-- | pygments/lexer.py | 23 | ||||
-rw-r--r-- | pygments/lexers/__init__.py | 106 | ||||
-rw-r--r-- | pygments/lexers/templates.py | 56 |
3 files changed, 145 insertions, 40 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py index 049b2868..55a74e19 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -1,15 +1,20 @@ # -*- coding: utf-8 -*- """ -pygments.lexer -~~~~~~~~~~~~~~ + pygments.lexer + ~~~~~~~~~~~~~~ -Base lexer classes. + Base lexer classes. -:copyright: 2006 by Georg Brandl. -:license: GNU LGPL, see LICENSE for more details. + :copyright: 2006 by Georg Brandl. + :license: GNU LGPL, see LICENSE for more details. """ import re +try: + set +except NameError: + from sets import Set as set + from types import FunctionType from pygments.token import Error, Text, Other, _TokenType from pygments.util import get_bool_opt, get_int_opt, make_analysator @@ -31,6 +36,9 @@ class LexerMeta(type): def __new__(cls, name, bases, d): if 'analyse_text' in d: d['analyse_text'] = make_analysator(d['analyse_text']) + for key in 'aliases', 'filenames', 'alias_filenames': + if key in d: + d[key] = set(d[key]) return type.__new__(cls, name, bases, d) @@ -57,6 +65,9 @@ class Lexer(object): #: fn match rules filenames = [] + #: fn alias filenames + alias_filenames = [] + __metaclass__ = LexerMeta def __init__(self, **options): @@ -203,7 +214,7 @@ def bygroups(*args): yield match.start(i + 1), action, data else: if ctx: - ctx.pos = match.start(i+1) + ctx.pos = match.start(i + 1) for item in action(lexer, _PseudoMatch(match.start(i + 1), match.group(i + 1)), ctx): if item: diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py index b102076d..2dcab576 100644 --- a/pygments/lexers/__init__.py +++ b/pygments/lexers/__init__.py @@ -12,6 +12,11 @@ import fnmatch import types from os.path import basename +try: + set +except NameError: + from sets import Set as set + from pygments.lexers._mapping import LEXERS from pygments.plugin import find_plugin_lexers @@ -32,68 +37,105 @@ def _load_lexers(module_name): _lexer_cache[cls.name] = cls -def get_lexer_by_name(alias, **options): +def _iter_lexers(): + """ + Returns a generator for all lexer classes + """ + for module_name, name, _, _ in LEXERS.itervalues(): + if name not in _lexer_cache: + _load_lexers(module_name) + yield _lexer_cache[name] + for lexer in find_plugin_lexers(): + yield lexer + + +def get_lexer_by_name(_alias, **options): """ Get a lexer by an alias """ # lookup builtin lexers for module_name, name, aliases, _ in LEXERS.itervalues(): - if alias in aliases: + if _alias in aliases: if name not in _lexer_cache: _load_lexers(module_name) return _lexer_cache[name](**options) # continue with lexers from setuptools entrypoints for cls in find_plugin_lexers(): - if alias in cls.aliases: + if _alias in cls.aliases: return cls(**options) raise ValueError('no lexer for alias %r found' % alias) -def get_lexer_for_filename(fn, **options): +def get_lexer_for_filename(_fn, **options): """ Guess a lexer by a filename """ - fn = basename(fn) - # lookup builtin lexers - for module_name, name, _, filenames in LEXERS.itervalues(): - for filename in filenames: + fn = basename(_fn) + for lexer in _iter_lexers(): + for filename in lexer.filenames: + if fnmatch.fnmatch(_fn, filename): + return lexer(**options) + raise ValueError('no lexer for filename %r found' % fn) + + +def guess_lexer_for_filename(_fn, _text, **options): + """ + Lookup all lexers that handle those filenames primary (``filenames``) + or secondary (``alias_filenames``). Then run a text analysis for those + lexers and choose the best result. + + usage:: + + >>> from pygments.lexers import guess_lexer_for_filename + >>> guess_lexer_for_filename('hello.html', '<%= @foo %>') + <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c> + >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>') + <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac> + >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }') + <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c> + """ + fn = basename(_fn) + primary = None + matching_lexers = set() + for lexer in _iter_lexers(): + for filename in lexer.filenames: if fnmatch.fnmatch(fn, filename): - if name not in _lexer_cache: - _load_lexers(module_name) - return _lexer_cache[name](**options) - # continue with lexers from setuptools entrypoints - for cls in find_plugin_lexers(): - for filename in cls.filenames: + matching_lexers.add(lexer) + primary = lexer + for filename in lexer.alias_filenames: if fnmatch.fnmatch(fn, filename): - return cls(**options) - raise ValueError('no lexer for filename %r found' % fn) + matching_lexers.add(lexer) + if not matching_lexers: + raise ValueError('no lexer for filename %r found' % fn) + if len(matching_lexers) == 1: + return iter(matching_lexers).next() + result = [] + for lexer in matching_lexers: + rv = lexer.analyse_text(_text) + if rv == 1.0: + return lexer(**options) + result.append((rv, lexer)) + result.sort() + if not result[-1][0] and primary is not None: + return primary(**options) + return result[-1][1](**options) -def guess_lexer(text, **options): +def guess_lexer(_text, **options): """ Guess a lexer by strong distinctions in the text (eg, shebang). """ + #XXX: i (mitsuhiko) would like to drop this function in favor of the + # better guess_lexer_for_filename function. best_lexer = [0.0, None] - # builtin lexers - for module_name, name, _, _ in LEXERS.itervalues(): - if name not in _lexer_cache: - _load_lexers(module_name) - lexer = _lexer_cache[name] - rv = lexer.analyse_text(text) - if rv == 1.0: - return lexer(**options) - if rv > best_lexer[0]: - best_lexer[:] = (rv, lexer) - # plugin lexers - for lexer in find_plugin_lexers(): + for lexer in _iter_lexers(): rv = lexer.analyse_text(text) if rv == 1.0: return lexer(**options) if rv > best_lexer[0]: best_lexer[:] = (rv, lexer) - if best_lexer[0] == 0.0 or best_lexer[1] is None: - from pygments.lexers.special import TextLexer - return TextLexer(**options) + if not best_lexer[0] or best_lexer[1] is None: + raise ValueError('no lexer matching the text found') return best_lexer[1](**options) diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py index ed9ea633..dddc14b4 100644 --- a/pygments/lexers/templates.py +++ b/pygments/lexers/templates.py @@ -19,7 +19,8 @@ from pygments.lexers.web import \ PhpLexer, HtmlLexer, XmlLexer, JavascriptLexer, CssLexer from pygments.lexers.agile import PythonLexer from pygments.lexer import \ - Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, include, using + Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, \ + include, using from pygments.token import \ Text, Comment, Operator, Keyword, Name, String, Number, Other from pygments.util import html_doctype_matches, looks_like_xml @@ -121,11 +122,11 @@ class ErbLexer(Lexer): class SmartyLexer(RegexLexer): name = 'Smarty' aliases = ['smarty'] + filenames = ['*.tpl'] flags = re.MULTILINE | re.DOTALL tokens = { - # XXX: make smarty delimiters customizable somehow 'root': [ (r'[^{]+', Other), (r'(\{)(\*.*?\*)(\})', @@ -314,6 +315,7 @@ class GenshiMarkupLexer(RegexLexer): class HtmlGenshiLexer(DelegatingLexer): name = 'HTML+Genshi' aliases = ['html+genshi', 'html+kid'] + alias_filenames = ['*.html', '*.htm', '*.xhtml'] def __init__(self, **options): super(HtmlGenshiLexer, self).__init__(HtmlLexer, GenshiMarkupLexer, @@ -332,6 +334,7 @@ class GenshiLexer(DelegatingLexer): name = 'Genshi' aliases = ['genshi', 'kid', 'xml+genshi', 'xml+kid'] filenames = ['*.kid'] + alias_filenames = ['*.xml'] def __init__(self, **options): super(GenshiLexer, self).__init__(XmlLexer, GenshiMarkupLexer, @@ -350,26 +353,35 @@ class JavascriptGenshiLexer(DelegatingLexer): name = 'JavaScript+Genshi Text' aliases = ['js+genshitext', 'js+genshi', 'javascript+genshitext', 'javascript+genshi'] + alias_filenames = ['*.js'] def __init__(self, **options): super(JavascriptGenshiLexer, self).__init__(JavascriptLexer, GenshiTextLexer, **options) + def analyse_text(text): + return GenshiLexer.analyse_text(text) - 0.05 + class CssGenshiLexer(DelegatingLexer): name = 'CSS+Genshi Text' aliases = ['css+genshitext', 'css+genshi'] + alias_filenames = ['*.css'] def __init__(self, **options): super(CssGenshiLexer, self).__init__(CssLexer, GenshiTextLexer, **options) + def analyse_text(text): + return GenshiLexer.analyse_text(text) - 0.05 + class RhtmlLexer(DelegatingLexer): name = 'RHTML' aliases = ['rhtml', 'html+erb', 'html+ruby'] filenames = ['*.rhtml'] + alias_filenames = ['*.html', '*.htm', '*.xhtml'] def __init__(self, **options): super(RhtmlLexer, self).__init__(HtmlLexer, ErbLexer, **options) @@ -385,6 +397,7 @@ class RhtmlLexer(DelegatingLexer): class XmlErbLexer(DelegatingLexer): name = 'XML+Ruby' aliases = ['xml+erb', 'xml+ruby'] + alias_filenames = ['*.xml'] def __init__(self, **options): super(XmlErbLexer, self).__init__(XmlLexer, ErbLexer, **options) @@ -399,24 +412,34 @@ class XmlErbLexer(DelegatingLexer): class CssErbLexer(DelegatingLexer): name = 'CSS+Ruby' aliases = ['css+erb', 'css+ruby'] + alias_filenames = ['*.xml'] def __init__(self, **options): super(CssErbLexer, self).__init__(CssLexer, ErbLexer, **options) + def analyse_text(text): + return ErbLexer.analyse_text(text) - 0.05 + class JavascriptErbLexer(DelegatingLexer): name = 'JavaScript+Ruby' aliases = ['js+erb', 'javascript+erb', 'js+ruby', 'javascript+ruby'] + alias_filenames = ['*.js'] def __init__(self, **options): super(JavascriptErbLexer, self).__init__(JavascriptLexer, ErbLexer, **options) + def analyse_text(text): + return ErbLexer.analyse_text(text) - 0.05 + class HtmlPhpLexer(DelegatingLexer): name = 'HTML+PHP' aliases = ['html+php'] filenames = ['*.phtml'] + alias_filenames = ['*.php', '*.html', '*.htm', '*.xhtml', + '*.php[345]'] def __init__(self, **options): super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options) @@ -431,6 +454,7 @@ class HtmlPhpLexer(DelegatingLexer): class XmlPhpLexer(DelegatingLexer): name = 'XML+PHP' aliases = ['xml+php'] + alias_filenames = ['*.xml', '*.php', '*.php[345]'] def __init__(self, **options): super(XmlPhpLexer, self).__init__(XmlLexer, PhpLexer, **options) @@ -445,23 +469,32 @@ class XmlPhpLexer(DelegatingLexer): class CssPhpLexer(DelegatingLexer): name = 'CSS+PHP' aliases = ['css+php'] + alias_filenames = ['*.css'] def __init__(self, **options): super(CssPhpLexer, self).__init__(CssLexer, PhpLexer, **options) + def analyse_text(text): + return PhpLexer.analyse_text(text) - 0.05 + class JavascriptPhpLexer(DelegatingLexer): name = 'JavaScript+PHP' aliases = ['js+php', 'javascript+php'] + alias_filenames = ['*.js'] def __init__(self, **options): super(JavascriptPhpLexer, self).__init__(JavascriptLexer, PhpLexer, **options) + def analyse_text(text): + return PhpLexer.analyse_text(text) + class HtmlSmartyLexer(DelegatingLexer): name = 'HTML+Smarty' aliases = ['html+smarty'] + alias_filenames = ['*.html', '*.htm', '*.xhtml', '*.tpl'] def __init__(self, **options): super(HtmlSmartyLexer, self).__init__(HtmlLexer, SmartyLexer, **options) @@ -476,6 +509,7 @@ class HtmlSmartyLexer(DelegatingLexer): class XmlSmartyLexer(DelegatingLexer): name = 'XML+Smarty' aliases = ['xml+smarty'] + alias_filenames = ['*.xml', '*.tpl'] def __init__(self, **options): super(XmlSmartyLexer, self).__init__(XmlLexer, SmartyLexer, **options) @@ -490,23 +524,32 @@ class XmlSmartyLexer(DelegatingLexer): class CssSmartyLexer(DelegatingLexer): name = 'CSS+Smarty' aliases = ['css+smarty'] + alias_filenames = ['*.css', '*.tpl'] def __init__(self, **options): super(CssSmartyLexer, self).__init__(CssLexer, SmartyLexer, **options) + def analyse_text(text): + return SmartyLexer.analyse_text(text) - 0.05 + class JavascriptSmartyLexer(DelegatingLexer): name = 'JavaScript+Smarty' aliases = ['js+smarty', 'javascript+smarty'] + alias_filenames = ['*.js', '*.tpl'] def __init__(self, **options): super(JavascriptSmartyLexer, self).__init__(JavascriptLexer, SmartyLexer, **options) + def analyse_text(text): + return SmartyLexer.analyse_text(text) - 0.05 + class HtmlDjangoLexer(DelegatingLexer): name = 'HTML+Django/Jinja' aliases = ['html+django', 'html+jinja'] + alias_filenames = ['*.html', '*.htm', '*.xhtml'] def __init__(self, **options): super(HtmlDjangoLexer, self).__init__(HtmlLexer, DjangoLexer, **options) @@ -521,6 +564,7 @@ class HtmlDjangoLexer(DelegatingLexer): class XmlDjangoLexer(DelegatingLexer): name = 'XML+Django/Jinja' aliases = ['xml+django', 'xml+jinja'] + alias_filenames = ['*.xml'] def __init__(self, **options): super(XmlDjangoLexer, self).__init__(XmlLexer, DjangoLexer, **options) @@ -535,16 +579,24 @@ class XmlDjangoLexer(DelegatingLexer): class CssDjangoLexer(DelegatingLexer): name = 'CSS+Django/Jinja' aliases = ['css+django', 'css+jinja'] + alias_filenames = ['*.css'] def __init__(self, **options): super(CssDjangoLexer, self).__init__(CssLexer, DjangoLexer, **options) + def analyse_text(text): + return DjangoLexer.analyse_text(text) - 0.05 + class JavascriptDjangoLexer(DelegatingLexer): name = 'JavaScript+Django/Jinja' aliases = ['js+django', 'javascript+django', 'js+jinja', 'javascript+jinja'] + alias_filenames = ['*.js'] def __init__(self, **options): super(JavascriptDjangoLexer, self).__init__(JavascriptLexer, DjangoLexer, **options) + + def analyse_text(text): + return DjangoLexer.analyse_text(text) - 0.05 |