summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pygments/lexer.py23
-rw-r--r--pygments/lexers/__init__.py106
-rw-r--r--pygments/lexers/templates.py56
3 files changed, 145 insertions, 40 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 049b2868..55a74e19 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -1,15 +1,20 @@
# -*- coding: utf-8 -*-
"""
-pygments.lexer
-~~~~~~~~~~~~~~
+ pygments.lexer
+ ~~~~~~~~~~~~~~
-Base lexer classes.
+ Base lexer classes.
-:copyright: 2006 by Georg Brandl.
-:license: GNU LGPL, see LICENSE for more details.
+ :copyright: 2006 by Georg Brandl.
+ :license: GNU LGPL, see LICENSE for more details.
"""
import re
+try:
+ set
+except NameError:
+ from sets import Set as set
+
from types import FunctionType
from pygments.token import Error, Text, Other, _TokenType
from pygments.util import get_bool_opt, get_int_opt, make_analysator
@@ -31,6 +36,9 @@ class LexerMeta(type):
def __new__(cls, name, bases, d):
if 'analyse_text' in d:
d['analyse_text'] = make_analysator(d['analyse_text'])
+ for key in 'aliases', 'filenames', 'alias_filenames':
+ if key in d:
+ d[key] = set(d[key])
return type.__new__(cls, name, bases, d)
@@ -57,6 +65,9 @@ class Lexer(object):
#: fn match rules
filenames = []
+ #: fn alias filenames
+ alias_filenames = []
+
__metaclass__ = LexerMeta
def __init__(self, **options):
@@ -203,7 +214,7 @@ def bygroups(*args):
yield match.start(i + 1), action, data
else:
if ctx:
- ctx.pos = match.start(i+1)
+ ctx.pos = match.start(i + 1)
for item in action(lexer, _PseudoMatch(match.start(i + 1),
match.group(i + 1)), ctx):
if item:
diff --git a/pygments/lexers/__init__.py b/pygments/lexers/__init__.py
index b102076d..2dcab576 100644
--- a/pygments/lexers/__init__.py
+++ b/pygments/lexers/__init__.py
@@ -12,6 +12,11 @@ import fnmatch
import types
from os.path import basename
+try:
+ set
+except NameError:
+ from sets import Set as set
+
from pygments.lexers._mapping import LEXERS
from pygments.plugin import find_plugin_lexers
@@ -32,68 +37,105 @@ def _load_lexers(module_name):
_lexer_cache[cls.name] = cls
-def get_lexer_by_name(alias, **options):
+def _iter_lexers():
+ """
+ Returns a generator for all lexer classes
+ """
+ for module_name, name, _, _ in LEXERS.itervalues():
+ if name not in _lexer_cache:
+ _load_lexers(module_name)
+ yield _lexer_cache[name]
+ for lexer in find_plugin_lexers():
+ yield lexer
+
+
+def get_lexer_by_name(_alias, **options):
"""
Get a lexer by an alias
"""
# lookup builtin lexers
for module_name, name, aliases, _ in LEXERS.itervalues():
- if alias in aliases:
+ if _alias in aliases:
if name not in _lexer_cache:
_load_lexers(module_name)
return _lexer_cache[name](**options)
# continue with lexers from setuptools entrypoints
for cls in find_plugin_lexers():
- if alias in cls.aliases:
+ if _alias in cls.aliases:
return cls(**options)
raise ValueError('no lexer for alias %r found' % alias)
-def get_lexer_for_filename(fn, **options):
+def get_lexer_for_filename(_fn, **options):
"""
Guess a lexer by a filename
"""
- fn = basename(fn)
- # lookup builtin lexers
- for module_name, name, _, filenames in LEXERS.itervalues():
- for filename in filenames:
+ fn = basename(_fn)
+ for lexer in _iter_lexers():
+ for filename in lexer.filenames:
+ if fnmatch.fnmatch(_fn, filename):
+ return lexer(**options)
+ raise ValueError('no lexer for filename %r found' % fn)
+
+
+def guess_lexer_for_filename(_fn, _text, **options):
+ """
+ Lookup all lexers that handle those filenames primary (``filenames``)
+ or secondary (``alias_filenames``). Then run a text analysis for those
+ lexers and choose the best result.
+
+ usage::
+
+ >>> from pygments.lexers import guess_lexer_for_filename
+ >>> guess_lexer_for_filename('hello.html', '<%= @foo %>')
+ <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c>
+ >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>')
+ <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac>
+ >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }')
+ <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c>
+ """
+ fn = basename(_fn)
+ primary = None
+ matching_lexers = set()
+ for lexer in _iter_lexers():
+ for filename in lexer.filenames:
if fnmatch.fnmatch(fn, filename):
- if name not in _lexer_cache:
- _load_lexers(module_name)
- return _lexer_cache[name](**options)
- # continue with lexers from setuptools entrypoints
- for cls in find_plugin_lexers():
- for filename in cls.filenames:
+ matching_lexers.add(lexer)
+ primary = lexer
+ for filename in lexer.alias_filenames:
if fnmatch.fnmatch(fn, filename):
- return cls(**options)
- raise ValueError('no lexer for filename %r found' % fn)
+ matching_lexers.add(lexer)
+ if not matching_lexers:
+ raise ValueError('no lexer for filename %r found' % fn)
+ if len(matching_lexers) == 1:
+ return iter(matching_lexers).next()
+ result = []
+ for lexer in matching_lexers:
+ rv = lexer.analyse_text(_text)
+ if rv == 1.0:
+ return lexer(**options)
+ result.append((rv, lexer))
+ result.sort()
+ if not result[-1][0] and primary is not None:
+ return primary(**options)
+ return result[-1][1](**options)
-def guess_lexer(text, **options):
+def guess_lexer(_text, **options):
"""
Guess a lexer by strong distinctions in the text (eg, shebang).
"""
+ #XXX: i (mitsuhiko) would like to drop this function in favor of the
+ # better guess_lexer_for_filename function.
best_lexer = [0.0, None]
- # builtin lexers
- for module_name, name, _, _ in LEXERS.itervalues():
- if name not in _lexer_cache:
- _load_lexers(module_name)
- lexer = _lexer_cache[name]
- rv = lexer.analyse_text(text)
- if rv == 1.0:
- return lexer(**options)
- if rv > best_lexer[0]:
- best_lexer[:] = (rv, lexer)
- # plugin lexers
- for lexer in find_plugin_lexers():
+ for lexer in _iter_lexers():
rv = lexer.analyse_text(text)
if rv == 1.0:
return lexer(**options)
if rv > best_lexer[0]:
best_lexer[:] = (rv, lexer)
- if best_lexer[0] == 0.0 or best_lexer[1] is None:
- from pygments.lexers.special import TextLexer
- return TextLexer(**options)
+ if not best_lexer[0] or best_lexer[1] is None:
+ raise ValueError('no lexer matching the text found')
return best_lexer[1](**options)
diff --git a/pygments/lexers/templates.py b/pygments/lexers/templates.py
index ed9ea633..dddc14b4 100644
--- a/pygments/lexers/templates.py
+++ b/pygments/lexers/templates.py
@@ -19,7 +19,8 @@ from pygments.lexers.web import \
PhpLexer, HtmlLexer, XmlLexer, JavascriptLexer, CssLexer
from pygments.lexers.agile import PythonLexer
from pygments.lexer import \
- Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, include, using
+ Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, \
+ include, using
from pygments.token import \
Text, Comment, Operator, Keyword, Name, String, Number, Other
from pygments.util import html_doctype_matches, looks_like_xml
@@ -121,11 +122,11 @@ class ErbLexer(Lexer):
class SmartyLexer(RegexLexer):
name = 'Smarty'
aliases = ['smarty']
+ filenames = ['*.tpl']
flags = re.MULTILINE | re.DOTALL
tokens = {
- # XXX: make smarty delimiters customizable somehow
'root': [
(r'[^{]+', Other),
(r'(\{)(\*.*?\*)(\})',
@@ -314,6 +315,7 @@ class GenshiMarkupLexer(RegexLexer):
class HtmlGenshiLexer(DelegatingLexer):
name = 'HTML+Genshi'
aliases = ['html+genshi', 'html+kid']
+ alias_filenames = ['*.html', '*.htm', '*.xhtml']
def __init__(self, **options):
super(HtmlGenshiLexer, self).__init__(HtmlLexer, GenshiMarkupLexer,
@@ -332,6 +334,7 @@ class GenshiLexer(DelegatingLexer):
name = 'Genshi'
aliases = ['genshi', 'kid', 'xml+genshi', 'xml+kid']
filenames = ['*.kid']
+ alias_filenames = ['*.xml']
def __init__(self, **options):
super(GenshiLexer, self).__init__(XmlLexer, GenshiMarkupLexer,
@@ -350,26 +353,35 @@ class JavascriptGenshiLexer(DelegatingLexer):
name = 'JavaScript+Genshi Text'
aliases = ['js+genshitext', 'js+genshi', 'javascript+genshitext',
'javascript+genshi']
+ alias_filenames = ['*.js']
def __init__(self, **options):
super(JavascriptGenshiLexer, self).__init__(JavascriptLexer,
GenshiTextLexer,
**options)
+ def analyse_text(text):
+ return GenshiLexer.analyse_text(text) - 0.05
+
class CssGenshiLexer(DelegatingLexer):
name = 'CSS+Genshi Text'
aliases = ['css+genshitext', 'css+genshi']
+ alias_filenames = ['*.css']
def __init__(self, **options):
super(CssGenshiLexer, self).__init__(CssLexer, GenshiTextLexer,
**options)
+ def analyse_text(text):
+ return GenshiLexer.analyse_text(text) - 0.05
+
class RhtmlLexer(DelegatingLexer):
name = 'RHTML'
aliases = ['rhtml', 'html+erb', 'html+ruby']
filenames = ['*.rhtml']
+ alias_filenames = ['*.html', '*.htm', '*.xhtml']
def __init__(self, **options):
super(RhtmlLexer, self).__init__(HtmlLexer, ErbLexer, **options)
@@ -385,6 +397,7 @@ class RhtmlLexer(DelegatingLexer):
class XmlErbLexer(DelegatingLexer):
name = 'XML+Ruby'
aliases = ['xml+erb', 'xml+ruby']
+ alias_filenames = ['*.xml']
def __init__(self, **options):
super(XmlErbLexer, self).__init__(XmlLexer, ErbLexer, **options)
@@ -399,24 +412,34 @@ class XmlErbLexer(DelegatingLexer):
class CssErbLexer(DelegatingLexer):
name = 'CSS+Ruby'
aliases = ['css+erb', 'css+ruby']
+ alias_filenames = ['*.xml']
def __init__(self, **options):
super(CssErbLexer, self).__init__(CssLexer, ErbLexer, **options)
+ def analyse_text(text):
+ return ErbLexer.analyse_text(text) - 0.05
+
class JavascriptErbLexer(DelegatingLexer):
name = 'JavaScript+Ruby'
aliases = ['js+erb', 'javascript+erb', 'js+ruby', 'javascript+ruby']
+ alias_filenames = ['*.js']
def __init__(self, **options):
super(JavascriptErbLexer, self).__init__(JavascriptLexer, ErbLexer,
**options)
+ def analyse_text(text):
+ return ErbLexer.analyse_text(text) - 0.05
+
class HtmlPhpLexer(DelegatingLexer):
name = 'HTML+PHP'
aliases = ['html+php']
filenames = ['*.phtml']
+ alias_filenames = ['*.php', '*.html', '*.htm', '*.xhtml',
+ '*.php[345]']
def __init__(self, **options):
super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options)
@@ -431,6 +454,7 @@ class HtmlPhpLexer(DelegatingLexer):
class XmlPhpLexer(DelegatingLexer):
name = 'XML+PHP'
aliases = ['xml+php']
+ alias_filenames = ['*.xml', '*.php', '*.php[345]']
def __init__(self, **options):
super(XmlPhpLexer, self).__init__(XmlLexer, PhpLexer, **options)
@@ -445,23 +469,32 @@ class XmlPhpLexer(DelegatingLexer):
class CssPhpLexer(DelegatingLexer):
name = 'CSS+PHP'
aliases = ['css+php']
+ alias_filenames = ['*.css']
def __init__(self, **options):
super(CssPhpLexer, self).__init__(CssLexer, PhpLexer, **options)
+ def analyse_text(text):
+ return PhpLexer.analyse_text(text) - 0.05
+
class JavascriptPhpLexer(DelegatingLexer):
name = 'JavaScript+PHP'
aliases = ['js+php', 'javascript+php']
+ alias_filenames = ['*.js']
def __init__(self, **options):
super(JavascriptPhpLexer, self).__init__(JavascriptLexer, PhpLexer,
**options)
+ def analyse_text(text):
+ return PhpLexer.analyse_text(text)
+
class HtmlSmartyLexer(DelegatingLexer):
name = 'HTML+Smarty'
aliases = ['html+smarty']
+ alias_filenames = ['*.html', '*.htm', '*.xhtml', '*.tpl']
def __init__(self, **options):
super(HtmlSmartyLexer, self).__init__(HtmlLexer, SmartyLexer, **options)
@@ -476,6 +509,7 @@ class HtmlSmartyLexer(DelegatingLexer):
class XmlSmartyLexer(DelegatingLexer):
name = 'XML+Smarty'
aliases = ['xml+smarty']
+ alias_filenames = ['*.xml', '*.tpl']
def __init__(self, **options):
super(XmlSmartyLexer, self).__init__(XmlLexer, SmartyLexer, **options)
@@ -490,23 +524,32 @@ class XmlSmartyLexer(DelegatingLexer):
class CssSmartyLexer(DelegatingLexer):
name = 'CSS+Smarty'
aliases = ['css+smarty']
+ alias_filenames = ['*.css', '*.tpl']
def __init__(self, **options):
super(CssSmartyLexer, self).__init__(CssLexer, SmartyLexer, **options)
+ def analyse_text(text):
+ return SmartyLexer.analyse_text(text) - 0.05
+
class JavascriptSmartyLexer(DelegatingLexer):
name = 'JavaScript+Smarty'
aliases = ['js+smarty', 'javascript+smarty']
+ alias_filenames = ['*.js', '*.tpl']
def __init__(self, **options):
super(JavascriptSmartyLexer, self).__init__(JavascriptLexer, SmartyLexer,
**options)
+ def analyse_text(text):
+ return SmartyLexer.analyse_text(text) - 0.05
+
class HtmlDjangoLexer(DelegatingLexer):
name = 'HTML+Django/Jinja'
aliases = ['html+django', 'html+jinja']
+ alias_filenames = ['*.html', '*.htm', '*.xhtml']
def __init__(self, **options):
super(HtmlDjangoLexer, self).__init__(HtmlLexer, DjangoLexer, **options)
@@ -521,6 +564,7 @@ class HtmlDjangoLexer(DelegatingLexer):
class XmlDjangoLexer(DelegatingLexer):
name = 'XML+Django/Jinja'
aliases = ['xml+django', 'xml+jinja']
+ alias_filenames = ['*.xml']
def __init__(self, **options):
super(XmlDjangoLexer, self).__init__(XmlLexer, DjangoLexer, **options)
@@ -535,16 +579,24 @@ class XmlDjangoLexer(DelegatingLexer):
class CssDjangoLexer(DelegatingLexer):
name = 'CSS+Django/Jinja'
aliases = ['css+django', 'css+jinja']
+ alias_filenames = ['*.css']
def __init__(self, **options):
super(CssDjangoLexer, self).__init__(CssLexer, DjangoLexer, **options)
+ def analyse_text(text):
+ return DjangoLexer.analyse_text(text) - 0.05
+
class JavascriptDjangoLexer(DelegatingLexer):
name = 'JavaScript+Django/Jinja'
aliases = ['js+django', 'javascript+django',
'js+jinja', 'javascript+jinja']
+ alias_filenames = ['*.js']
def __init__(self, **options):
super(JavascriptDjangoLexer, self).__init__(JavascriptLexer, DjangoLexer,
**options)
+
+ def analyse_text(text):
+ return DjangoLexer.analyse_text(text) - 0.05