summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorblackbird <devnull@localhost>2006-12-21 22:10:34 +0100
committerblackbird <devnull@localhost>2006-12-21 22:10:34 +0100
commitf86e5908ca4c35bcbe8fe25ba839ff5c9a7a9f34 (patch)
tree98237a777c718b58116b68e7643c79e76acf10f8
parent95377ce6c229ec8cb1df8e358337fc524b8476c8 (diff)
downloadpygments-f86e5908ca4c35bcbe8fe25ba839ff5c9a7a9f34.tar.gz
[svn] implemented filters for pygments (first approach, api might change), it's impossible at the moment to add filters by using pygmentize
-rw-r--r--docs/src/filterdevelopment.txt68
-rw-r--r--docs/src/filters.txt57
-rw-r--r--docs/src/formatterdevelopment.txt (renamed from docs/src/formatterdev.txt)0
-rw-r--r--docs/src/index.txt6
-rw-r--r--docs/src/lexerdevelopment.txt4
-rw-r--r--docs/src/plugins.txt11
-rw-r--r--docs/src/tokens.txt4
-rw-r--r--pygments/__init__.py3
-rw-r--r--pygments/lexer.py40
-rw-r--r--pygments/plugin.py13
-rw-r--r--pygments/styles/autumn.py1
-rw-r--r--pygments/styles/borland.py1
-rw-r--r--pygments/styles/colorful.py1
-rw-r--r--pygments/styles/default.py1
-rw-r--r--pygments/styles/friendly.py1
-rw-r--r--pygments/styles/manni.py1
-rw-r--r--pygments/styles/murphy.py1
-rw-r--r--pygments/styles/native.py1
-rw-r--r--pygments/styles/pastie.py3
-rw-r--r--pygments/styles/perldoc.py1
-rw-r--r--pygments/styles/trac.py5
-rw-r--r--pygments/token.py6
-rw-r--r--pygments/util.py12
23 files changed, 221 insertions, 20 deletions
diff --git a/docs/src/filterdevelopment.txt b/docs/src/filterdevelopment.txt
new file mode 100644
index 00000000..af467831
--- /dev/null
+++ b/docs/src/filterdevelopment.txt
@@ -0,0 +1,68 @@
+.. -*- mode: rst -*-
+
+=====================
+Write your own filter
+=====================
+
+*New in Pygments 0.7*
+
+Writing own filters is very easy. All you have to do is to subclass
+the `Filter` class and override the `filter` method. Additionally a
+filter is instanciated with some keyword arguments you can use to
+adjust the behavior of your filter.
+
+
+Subclassing Filters
+===================
+
+As an example we write a filter that converts all `Name.Function` tokens
+to normal `Name` tokens to make the output less colorful.
+
+.. sourcecode:: pycon
+
+ from pygments.util import get_bool_opt
+ from pygments.token import Name
+ from pygments.filter import Filter
+
+ class UncolorFilter(Filter):
+
+ def __init__(self, **options):
+ Filter.__init__(self, **options)
+ self.class_too = get_bool_opt(options, 'classtoo')
+
+ def filter(self, lexer, stream):
+ for ttype, value in stream:
+ if ttype == Name.Function or (self.class_too and
+ ttype == Name.Class):
+ ttype = Name
+ yield ttype, value
+
+Some words on the `lexer` argument. That can be quite confusing since it
+must not be a lexer instance. If a filter was added by using the `add_filter`
+function of lexers that lexer is registered for the filter. In that case
+`lexer` will be point to the lexer that has registered the filter. It can
+be used (but must not) to access options passed to a lexer. Because it
+could be `None` you always have to check for that case if you access it.
+
+
+Using a Decorator
+=================
+
+You can also use the `simplefilter` decorator from the `pygments.filter`
+module:
+
+.. sourcecode:: pycon
+
+ from pygments.util import get_bool_opt
+ from pygments.token import Name
+ from pygments.filter import simplefilter
+
+
+ @simplefilter
+ def uncolor(lexer, stream, options):
+ class_too = get_bool_opt(options, 'classtoo')
+ for ttype, value in stream:
+ if ttype == Name.Function or (class_too and
+ ttype == Name.Class):
+ ttype = Name
+ yield ttype, value
diff --git a/docs/src/filters.txt b/docs/src/filters.txt
new file mode 100644
index 00000000..55b74faf
--- /dev/null
+++ b/docs/src/filters.txt
@@ -0,0 +1,57 @@
+.. -*- mode: rst -*-
+
+=======
+Filters
+=======
+
+Since Pygments 0.7 you can filter token streams to improve the output. For
+example you can highlight special words in comments, convert keywords
+to upper or lowercase to enforce an styleguide etc.
+
+To apply an filter you can use the `add_filter` method of a lexer:
+
+.. sourcecode:: pycon
+
+ >>> from pygments.lexers import PythonLexer
+ >>> l = PythonLexer()
+ >>> # as string
+ >>> l.add_filter("codetagify")
+ >>> l.filters
+ [<pygments.filters.CodeTagFilter object at 0xb785decc>]
+ >>> from pygments.filters import KeywordRewriteFilter
+ >>> # or class
+ >>> l.add_filter(KeywordRewriteFilter(keywordcase='lower'))
+
+The `add_filter` method also takes keyword arguments which are forwarded
+to the constructor of the filter.
+
+To get a list of all registered filters by name you can use the
+`get_all_filters` function from the `pygments.filters` module that returns
+an iterable for all known filters.
+
+If you want to write your own lexer have a look at `Write your own filter`_.
+
+.. _Write your own filter: filterdevelopment.txt
+
+
+Builtin Filters
+===============
+
+`CodeTagFilter`
+
+ Highlights special code tags in comments and docstrings. Per
+ default the list of highlighted tags is ``XXX``, ``TODO``,
+ ``BUG`` and ``NOTE``. You can override this list by specifying
+ a ``codetags`` parameter that takes a list of words.
+
+ :Name: ``codetagify``
+
+`KeywordCaseFilter`
+
+ Converts keywords to ``lower``, ``upper`` or ``capitalize`` which
+ means first letter uppercase, rest lowercase. This can be useful
+ if you highlight pascal code and want to adapt the code to your
+ styleguide. The default is ``lower``, override that by providing
+ the `keywordcase` parameter.
+
+ :Name: ``keywordcase``
diff --git a/docs/src/formatterdev.txt b/docs/src/formatterdevelopment.txt
index 82208aa0..82208aa0 100644
--- a/docs/src/formatterdev.txt
+++ b/docs/src/formatterdevelopment.txt
diff --git a/docs/src/index.txt b/docs/src/index.txt
index 08e77f62..88bf8591 100644
--- a/docs/src/index.txt
+++ b/docs/src/index.txt
@@ -20,6 +20,8 @@ Welcome to the Pygments documentation.
- `Builtin formatters <formatters.txt>`_
+ - `Filters <filters.txt>`_
+
- `Styles <styles.txt>`_
- API and more
@@ -32,7 +34,9 @@ Welcome to the Pygments documentation.
- `Write your own lexer <lexerdevelopment.txt>`_
- - `Write your own formatter <formatterdev.txt>`_
+ - `Write your own formatter <formatterdevelopment.txt>`_
+
+ - `Write your own filter <filterdevelopment.txt>`_
- `Register Plugins <plugins.txt>`_
diff --git a/docs/src/lexerdevelopment.txt b/docs/src/lexerdevelopment.txt
index d7ab3923..a004df5c 100644
--- a/docs/src/lexerdevelopment.txt
+++ b/docs/src/lexerdevelopment.txt
@@ -535,3 +535,7 @@ the ``get_tokens_unprocessed()`` method. The following lexer subclasses the
yield index, token, value
The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions.
+
+**Note** Do not confuse this with the `filter`_ system.
+
+.. _filter: filters.txt
diff --git a/docs/src/plugins.txt b/docs/src/plugins.txt
index 5a302e9e..da8b33b9 100644
--- a/docs/src/plugins.txt
+++ b/docs/src/plugins.txt
@@ -61,6 +61,17 @@ Here a list of setuptools entrypoints pygments understands:
yourstyle = yourmodule:YourStyle
+`pygments.filters`
+
+ Use this entrypoint to register a new filter. The name of the
+ entrypoint is the name of the filter:
+
+ .. sourcecode:: ini
+
+ [pygments.filters]
+ yourfilter = yourmodule:YourFilter
+
+
How To Use Entrypoints
======================
diff --git a/docs/src/tokens.txt b/docs/src/tokens.txt
index daaf1eca..e1c4402b 100644
--- a/docs/src/tokens.txt
+++ b/docs/src/tokens.txt
@@ -245,6 +245,10 @@ Comments
`Comment.Single`
Token type for comments that end at the end of a line (e.g. ``# foo``).
+`Comment.Special`
+ Special data in comments. For example code tags, author and license
+ informations etc.
+
Generic Tokens
==============
diff --git a/pygments/__init__.py b/pygments/__init__.py
index 027eafd3..eafa867f 100644
--- a/pygments/__init__.py
+++ b/pygments/__init__.py
@@ -64,7 +64,8 @@ def format(tokens, formatter, outfile=None):
def highlight(code, lexer, formatter, outfile=None):
"""
Lex ``code`` with ``lexer`` and format it with the formatter
- ``formatter``.
+ ``formatter``. If ``filters`` are given they will be applied
+ on the token stream.
If ``outfile`` is given and a valid file object (an object
with a ``write`` method), the result will be written to it, otherwise
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 16a6b8ab..7e141887 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -15,8 +15,11 @@ try:
except NameError:
from sets import Set as set
+from pygments.filter import apply_filters, Filter
+from pygments.filters import find_filter
from pygments.token import Error, Text, Other, _TokenType
-from pygments.util import get_bool_opt, get_int_opt, make_analysator
+from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
+ make_analysator
__all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
@@ -81,6 +84,9 @@ class Lexer(object):
self.stripall = get_bool_opt(options, 'stripall', False)
self.tabsize = get_int_opt(options, 'tabsize', 0)
self.encoding = options.get('encoding', 'latin1')
+ self.filters = []
+ for filter in get_list_opt(options, 'filters', ()):
+ self.add_filter(filter)
def __repr__(self):
if self.options:
@@ -89,6 +95,14 @@ class Lexer(object):
else:
return '<pygments.lexers.%s>' % self.__class__.__name__
+ def add_filter(self, filter, **options):
+ """
+ Add a new stream filter to this lexer.
+ """
+ if not isinstance(filter, Filter):
+ filter = find_filter(filter, **options)
+ self.filters.append(filter)
+
def analyse_text(text):
"""
Has to return a float between ``0`` and ``1`` that indicates
@@ -103,11 +117,14 @@ class Lexer(object):
it's the same as if the return values was ``0.0``.
"""
- def get_tokens(self, text):
+ def get_tokens(self, text, unfiltered=False):
"""
- Return an iterable of (tokentype, value) pairs generated from ``text``.
+ Return an iterable of (tokentype, value) pairs generated from
+ `text`. If `unfiltered` is set to `True` the filtering mechanism
+ is bypassed, even if filters are defined.
- Also preprocess the text, i.e. expand tabs and strip it if wanted.
+ Also preprocess the text, i.e. expand tabs and strip it if
+ wanted and applies registered filters.
"""
if isinstance(text, unicode):
text = u'\n'.join(text.splitlines())
@@ -122,9 +139,9 @@ class Lexer(object):
try:
import chardet
except ImportError:
- raise ImportError('To enable chardet encoding guessing, please '
- 'install the chardet library from '
- 'http://chardet.feedparser.org/')
+ raise ImportError('To enable chardet encoding guessing, '
+ 'please install the chardet library '
+ 'from http://chardet.feedparser.org/')
enc = chardet.detect(text)
text = text.decode(enc['encoding'])
else:
@@ -138,8 +155,13 @@ class Lexer(object):
if not text.endswith('\n'):
text += '\n'
- for i, t, v in self.get_tokens_unprocessed(text):
- yield t, v
+ def streamer():
+ for i, t, v in self.get_tokens_unprocessed(text):
+ yield t, v
+ stream = streamer()
+ if not unfiltered:
+ stream = apply_filters(stream, self.filters, self)
+ return stream
def get_tokens_unprocessed(self, text):
"""
diff --git a/pygments/plugin.py b/pygments/plugin.py
index 4422cbb5..89300677 100644
--- a/pygments/plugin.py
+++ b/pygments/plugin.py
@@ -26,6 +26,11 @@
[pygments.styles]
yourstyle = yourstyle:YourStyle
+ filter plugin::
+
+ [pygments.filter]
+ yourfilter = yourfilter:YourFilter
+
:copyright: 2006 by Armin Ronacher.
:license: BSD, see LICENSE for more details.
@@ -38,6 +43,7 @@ except ImportError:
LEXER_ENTRY_POINT = 'pygments.lexers'
FORMATTER_ENTRY_POINT = 'pygments.formatters'
STYLE_ENTRY_POINT = 'pygments.styles'
+FILTER_ENTRY_POINT = 'pygments.filters'
def find_plugin_lexers():
@@ -59,3 +65,10 @@ def find_plugin_styles():
return
for entrypoint in pkg_resources.iter_entry_points(STYLE_ENTRY_POINT):
yield entrypoint.name, entrypoint.load()
+
+
+def find_plugin_filters():
+ if pkg_recources is None:
+ return
+ for entrypoint in pkg_resources.iter_entry_points(FILTER_ENTRY_POINT):
+ yield entrypoint.name, entrypoint.load()
diff --git a/pygments/styles/autumn.py b/pygments/styles/autumn.py
index 0e9c64d2..1862c3ad 100644
--- a/pygments/styles/autumn.py
+++ b/pygments/styles/autumn.py
@@ -21,6 +21,7 @@ class AutumnStyle(Style):
styles = {
Comment: "italic #aaaaaa",
Comment.Preproc: "noitalic #4c8317",
+ Comment.Special: "italic #0000aa",
Keyword: "#0000aa",
Keyword.Type: "#00aaaa",
diff --git a/pygments/styles/borland.py b/pygments/styles/borland.py
index e6c14b19..b798ebe0 100644
--- a/pygments/styles/borland.py
+++ b/pygments/styles/borland.py
@@ -21,6 +21,7 @@ class BorlandStyle(Style):
styles = {
Comment: 'italic #008800',
Comment.Preproc: 'noitalic',
+ Comment.Special: 'noitalic bold',
String: '#0000FF',
Number: '#0000FF',
diff --git a/pygments/styles/colorful.py b/pygments/styles/colorful.py
index 0e3bc5e6..f542d3fc 100644
--- a/pygments/styles/colorful.py
+++ b/pygments/styles/colorful.py
@@ -21,6 +21,7 @@ class ColorfulStyle(Style):
styles = {
Comment: "#888",
Comment.Preproc: "#579",
+ Comment.Special: "bold #cc0000",
Keyword: "bold #080",
Keyword.Pseudo: "#038",
diff --git a/pygments/styles/default.py b/pygments/styles/default.py
index 277c323f..a7ba6eb8 100644
--- a/pygments/styles/default.py
+++ b/pygments/styles/default.py
@@ -25,6 +25,7 @@ class DefaultStyle(Style):
styles = {
Comment: "italic #008800",
Comment.Preproc: "noitalic",
+ Comment.Special: "noitalic bold",
Keyword: "bold #AA22FF",
Keyword.Pseudo: "nobold",
diff --git a/pygments/styles/friendly.py b/pygments/styles/friendly.py
index af983909..e6762091 100644
--- a/pygments/styles/friendly.py
+++ b/pygments/styles/friendly.py
@@ -22,6 +22,7 @@ class FriendlyStyle(Style):
styles = {
Comment: "italic #60a0b0",
Comment.Preproc: "noitalic #007020",
+ Comment.Special: "noitalic bg:#fff0f0",
Keyword: "bold #007020",
Keyword.Pseudo: "nobold",
diff --git a/pygments/styles/manni.py b/pygments/styles/manni.py
index 61e041fc..1f99ad1f 100644
--- a/pygments/styles/manni.py
+++ b/pygments/styles/manni.py
@@ -24,6 +24,7 @@ class ManniStyle(Style):
styles = {
Comment: 'italic #0099FF',
Comment.Preproc: 'noitalic #009999',
+ Comment.Special: 'bold',
Keyword: 'bold #006699',
Keyword.Pseudo: 'nobold',
diff --git a/pygments/styles/murphy.py b/pygments/styles/murphy.py
index 425a1683..82ae6d15 100644
--- a/pygments/styles/murphy.py
+++ b/pygments/styles/murphy.py
@@ -21,6 +21,7 @@ class MurphyStyle(Style):
styles = {
Comment: "#666 italic",
Comment.Preproc: "#579 noitalic",
+ Comment.Special: "#c00 bold",
Keyword: "bold #289",
Keyword.Pseudo: "#08f",
diff --git a/pygments/styles/native.py b/pygments/styles/native.py
index 057e66ce..2f1e6576 100644
--- a/pygments/styles/native.py
+++ b/pygments/styles/native.py
@@ -23,6 +23,7 @@ class NativeStyle(Style):
Comment: 'italic #999999',
Comment.Preproc: 'noitalic bold #cd2828',
+ Comment.Special: 'noitalic bold #e50808 bg:#520000',
Keyword: 'bold #6ab825',
Keyword.Pseudo: 'nobold',
diff --git a/pygments/styles/pastie.py b/pygments/styles/pastie.py
index a6b3a444..3cfe781d 100644
--- a/pygments/styles/pastie.py
+++ b/pygments/styles/pastie.py
@@ -23,6 +23,7 @@ class PastieStyle(Style):
styles = {
Comment: '#888888',
Comment.Preproc: 'bold #cc0000',
+ Comment.Special: 'bg:#fff0f0 bold #cc0000',
String: 'bg:#fff0f0 #dd2200',
String.Regex: 'bg:#fff0ff #008800',
@@ -41,7 +42,7 @@ class PastieStyle(Style):
Name.Exception: 'bold #bb0066',
Name.Function: 'bold #0066bb',
Name.Property: 'bold #336699',
- Name.Module: 'bold #bb0066',
+ Name.Namespace: 'bold #bb0066',
Name.Builtin: '#003388',
Name.Variable: '#336699',
Name.Variable.Class: '#336699',
diff --git a/pygments/styles/perldoc.py b/pygments/styles/perldoc.py
index 300d8d25..8cd21a93 100644
--- a/pygments/styles/perldoc.py
+++ b/pygments/styles/perldoc.py
@@ -24,6 +24,7 @@ class PerldocStyle(Style):
styles = {
Comment: '#228B22',
Comment.Preproc: '#1e889b',
+ Comment.Special: '#8B008B bold',
String: '#CD5555',
String.Heredoc: '#1c7e71 italic',
diff --git a/pygments/styles/trac.py b/pygments/styles/trac.py
index 158b1f82..df28045a 100644
--- a/pygments/styles/trac.py
+++ b/pygments/styles/trac.py
@@ -21,6 +21,7 @@ class TracStyle(Style):
styles = {
Comment: 'italic #999988',
Comment.Preproc: 'bold noitalic #999999',
+ Comment.Special: 'bold #999999',
Operator: 'bold',
@@ -37,8 +38,8 @@ class TracStyle(Style):
Name.Class: 'bold #445588',
Name.Exception: 'bold #990000',
Name.Namespace: '#555555',
- Name.Variable: '#ff99ff',
- Name.Constant: '#ff99ff',
+ Name.Variable: '#008080',
+ Name.Constant: '#008080',
Name.Tag: '#000080',
Name.Attribute: '#008080',
Name.Entity: '#800080',
diff --git a/pygments/token.py b/pygments/token.py
index f9ab7442..64c67be4 100644
--- a/pygments/token.py
+++ b/pygments/token.py
@@ -21,6 +21,12 @@ class _TokenType(tuple):
buf.reverse()
return buf
+ def __contains__(self, val):
+ return self is val or (
+ type(val) is self.__class__ and
+ val[:len(self)] == self
+ )
+
def __getattr__(self, val):
if not val or not val[0].isupper():
return tuple.__getattr__(self, val)
diff --git a/pygments/util.py b/pygments/util.py
index d66eabd6..87ee817a 100644
--- a/pygments/util.py
+++ b/pygments/util.py
@@ -37,8 +37,8 @@ def get_bool_opt(options, optname, default=None):
return False
else:
raise OptionError('Invalid value %r for option %s; use '
- '1/0, yes/no, true/false, on/off' %
- string, optname)
+ '1/0, yes/no, true/false, on/off' % (
+ string, optname))
def get_int_opt(options, optname, default=None):
@@ -47,8 +47,8 @@ def get_int_opt(options, optname, default=None):
return int(string)
except ValueError:
raise OptionError('Invalid value %r for option %s; you '
- 'must give an integer value' %
- string, optname)
+ 'must give an integer value' % (
+ string, optname))
def get_list_opt(options, optname, default=None):
@@ -59,8 +59,8 @@ def get_list_opt(options, optname, default=None):
return list(val)
else:
raise OptionError('Invalid value %r for option %s; you '
- 'must give a list value' %
- val, optname)
+ 'must give a list value' % (
+ val, optname))
def make_analysator(f):