diff options
Diffstat (limited to 'docs')
-rwxr-xr-x | docs/generate.py | 486 | ||||
-rw-r--r-- | docs/pygmentize.1 | 94 | ||||
-rw-r--r-- | docs/src/api.txt | 270 | ||||
-rw-r--r-- | docs/src/authors.txt | 5 | ||||
-rw-r--r-- | docs/src/changelog.txt | 5 | ||||
-rw-r--r-- | docs/src/cmdline.txt | 147 | ||||
-rw-r--r-- | docs/src/filterdevelopment.txt | 70 | ||||
-rw-r--r-- | docs/src/filters.txt | 42 | ||||
-rw-r--r-- | docs/src/formatterdevelopment.txt | 169 | ||||
-rw-r--r-- | docs/src/formatters.txt | 48 | ||||
-rw-r--r-- | docs/src/index.txt | 69 | ||||
-rw-r--r-- | docs/src/installation.txt | 71 | ||||
-rw-r--r-- | docs/src/integrate.txt | 48 | ||||
-rw-r--r-- | docs/src/java.txt | 70 | ||||
-rw-r--r-- | docs/src/lexerdevelopment.txt | 603 | ||||
-rw-r--r-- | docs/src/lexers.txt | 67 | ||||
-rw-r--r-- | docs/src/moinmoin.txt | 39 | ||||
-rw-r--r-- | docs/src/plugins.txt | 93 | ||||
-rw-r--r-- | docs/src/quickstart.txt | 202 | ||||
-rw-r--r-- | docs/src/rstdirective.txt | 22 | ||||
-rw-r--r-- | docs/src/styles.txt | 143 | ||||
-rw-r--r-- | docs/src/tokens.txt | 349 | ||||
-rw-r--r-- | docs/src/unicode.txt | 49 |
23 files changed, 0 insertions, 3161 deletions
diff --git a/docs/generate.py b/docs/generate.py deleted file mode 100755 index cd9438a8..00000000 --- a/docs/generate.py +++ /dev/null @@ -1,486 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - Generate Pygments Documentation - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Generates a bunch of html files containing the documentation. - - :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -from __future__ import print_function - -import os -import sys -from datetime import datetime -from cgi import escape - -from docutils import nodes -from docutils.parsers.rst import directives -from docutils.core import publish_parts -from docutils.writers import html4css1 - -from jinja2 import Template - -# try to use the right Pygments to build the docs -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) - -from pygments import highlight, __version__ -from pygments.lexers import get_lexer_by_name -from pygments.formatters import HtmlFormatter - - -LEXERDOC = ''' -`%s` -%s - :Short names: %s - :Filename patterns: %s - :Mimetypes: %s - -''' - -def generate_lexer_docs(): - from pygments.lexers import LEXERS - - out = [] - - modules = {} - moduledocstrings = {} - for classname, data in sorted(LEXERS.items(), key=lambda x: x[0]): - module = data[0] - mod = __import__(module, None, None, [classname]) - cls = getattr(mod, classname) - if not cls.__doc__: - print("Warning: %s does not have a docstring." % classname) - modules.setdefault(module, []).append(( - classname, - cls.__doc__, - ', '.join(data[2]) or 'None', - ', '.join(data[3]).replace('*', '\\*').replace('_', '\\') or 'None', - ', '.join(data[4]) or 'None')) - if module not in moduledocstrings: - moduledocstrings[module] = mod.__doc__ - - for module, lexers in sorted(modules.items(), key=lambda x: x[0]): - heading = moduledocstrings[module].splitlines()[4].strip().rstrip('.') - out.append('\n' + heading + '\n' + '-'*len(heading) + '\n') - for data in lexers: - out.append(LEXERDOC % data) - s = ''.join(out) - if isinstance(s, bytes): - s = s.decode('utf-8') - return s - -def generate_formatter_docs(): - from pygments.formatters import FORMATTERS - - out = [] - for cls, data in sorted(FORMATTERS.items(), - key=lambda x: x[0].__name__): - heading = cls.__name__ - out.append('`' + heading + '`\n' + '-'*(2+len(heading)) + '\n') - out.append(cls.__doc__) - out.append(''' - :Short names: %s - :Filename patterns: %s - - -''' % (', '.join(data[1]) or 'None', ', '.join(data[2]).replace('*', '\\*') or 'None')) - s = ''.join(out) - if isinstance(s, bytes): - s = s.decode('utf-8') - return s - -def generate_filter_docs(): - from pygments.filters import FILTERS - - out = [] - for name, cls in FILTERS.items(): - out.append(''' -`%s` -%s - :Name: %s -''' % (cls.__name__, cls.__doc__, name)) - s = ''.join(out) - if isinstance(s, bytes): - s = s.decode('utf-8') - return s - -def generate_changelog(): - fn = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', - 'CHANGES')) - f = open(fn) - result = [] - in_header = False - header = True - for line in f: - if header: - if not in_header and line.strip(): - in_header = True - elif in_header and not line.strip(): - header = False - else: - result.append(line.rstrip()) - f.close() - s = '\n'.join(result) - if isinstance(s, bytes): - s = s.decode('utf-8') - return s - -def generate_authors(): - fn = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', - 'AUTHORS')) - f = open(fn, 'rb') - r = f.read().rstrip().decode('utf-8') - f.close() - return r - -LEXERDOCS = generate_lexer_docs() -FORMATTERDOCS = generate_formatter_docs() -FILTERDOCS = generate_filter_docs() -CHANGELOG = generate_changelog() -AUTHORS = generate_authors() - - -PYGMENTS_FORMATTER = HtmlFormatter(style='pastie', cssclass='syntax') - -USAGE = '''\ -Usage: %s <mode> <destination> [<source.txt> ...] - -Generate either python or html files out of the documentation. - -Mode can either be python or html.\ -''' % sys.argv[0] - -TEMPLATE = '''\ -<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/html4/strict.dtd"> -<html> -<head> - <title>{{ title }} — Pygments</title> - <meta http-equiv="content-type" content="text/html; charset=utf-8"> - <style type="text/css"> - {{ style }} - </style> -</head> -<body> - <div id="content"> - <h1 class="heading">Pygments</h1> - <h2 class="subheading">{{ title }}</h2> - {% if file_id != "index" %} - <a id="backlink" href="index.html">« Back To Index</a> - {% endif %} - {% if toc %} - <div class="toc"> - <h2>Contents</h2> - <ul class="contents"> - {% for key, value in toc %} - <li><a href="{{ key }}">{{ value }}</a></li> - {% endfor %} - </ul> - </div> - {% endif %} - {{ body }} - </div> -</body> -<!-- generated on: {{ generation_date }} - file id: {{ file_id }} --> -</html>\ -''' - -STYLESHEET = '''\ -body { - background-color: #f2f2f2; - margin: 0; - padding: 0; - font-family: 'Georgia', serif; - color: #111; -} - -#content { - background-color: white; - padding: 20px; - margin: 20px auto 20px auto; - max-width: 800px; - border: 4px solid #ddd; -} - -h1 { - font-weight: normal; - font-size: 40px; - color: #09839A; -} - -h2 { - font-weight: normal; - font-size: 30px; - color: #C73F00; -} - -h1.heading { - margin: 0 0 30px 0; -} - -h2.subheading { - margin: -30px 0 0 45px; -} - -h3 { - margin-top: 30px; -} - -table.docutils { - border-collapse: collapse; - border: 2px solid #aaa; - margin: 0.5em 1.5em 0.5em 1.5em; -} - -table.docutils td { - padding: 2px; - border: 1px solid #ddd; -} - -p, li, dd, dt, blockquote { - font-size: 15px; - color: #333; -} - -p { - line-height: 150%; - margin-bottom: 0; - margin-top: 10px; -} - -hr { - border-top: 1px solid #ccc; - border-bottom: 0; - border-right: 0; - border-left: 0; - margin-bottom: 10px; - margin-top: 20px; -} - -dl { - margin-left: 10px; -} - -li, dt { - margin-top: 5px; -} - -dt { - font-weight: bold; -} - -th { - text-align: left; -} - -a { - color: #990000; -} - -a:hover { - color: #c73f00; -} - -pre { - background-color: #f9f9f9; - border-top: 1px solid #ccc; - border-bottom: 1px solid #ccc; - padding: 5px; - font-size: 13px; - font-family: Bitstream Vera Sans Mono,monospace; -} - -tt { - font-size: 13px; - font-family: Bitstream Vera Sans Mono,monospace; - color: black; - padding: 1px 2px 1px 2px; - background-color: #f0f0f0; -} - -cite { - /* abusing <cite>, it's generated by ReST for `x` */ - font-size: 13px; - font-family: Bitstream Vera Sans Mono,monospace; - font-weight: bold; - font-style: normal; -} - -#backlink { - float: right; - font-size: 11px; - color: #888; -} - -div.toc { - margin: 0 0 10px 0; -} - -div.toc h2 { - font-size: 20px; -} -''' #' - - -def pygments_directive(name, arguments, options, content, lineno, - content_offset, block_text, state, state_machine): - try: - lexer = get_lexer_by_name(arguments[0]) - except ValueError: - # no lexer found - lexer = get_lexer_by_name('text') - parsed = highlight(u'\n'.join(content), lexer, PYGMENTS_FORMATTER) - return [nodes.raw('', parsed, format="html")] -pygments_directive.arguments = (1, 0, 1) -pygments_directive.content = 1 -directives.register_directive('sourcecode', pygments_directive) - - -def create_translator(link_style): - class Translator(html4css1.HTMLTranslator): - def visit_reference(self, node): - refuri = node.get('refuri') - if refuri is not None and '/' not in refuri and refuri.endswith('.txt'): - node['refuri'] = link_style(refuri[:-4]) - html4css1.HTMLTranslator.visit_reference(self, node) - return Translator - - -class DocumentationWriter(html4css1.Writer): - - def __init__(self, link_style): - html4css1.Writer.__init__(self) - self.translator_class = create_translator(link_style) - - def translate(self): - html4css1.Writer.translate(self) - # generate table of contents - contents = self.build_contents(self.document) - contents_doc = self.document.copy() - contents_doc.children = contents - contents_visitor = self.translator_class(contents_doc) - contents_doc.walkabout(contents_visitor) - self.parts['toc'] = self._generated_toc - - def build_contents(self, node, level=0): - sections = [] - i = len(node) - 1 - while i >= 0 and isinstance(node[i], nodes.section): - sections.append(node[i]) - i -= 1 - sections.reverse() - toc = [] - for section in sections: - try: - reference = nodes.reference('', '', refid=section['ids'][0], *section[0]) - except IndexError: - continue - ref_id = reference['refid'] - text = escape(reference.astext()) - toc.append((ref_id, text)) - - self._generated_toc = [('#%s' % href, caption) for href, caption in toc] - # no further processing - return [] - - -def generate_documentation(data, link_style): - writer = DocumentationWriter(link_style) - data = data.replace('[builtin_lexer_docs]', LEXERDOCS).\ - replace('[builtin_formatter_docs]', FORMATTERDOCS).\ - replace('[builtin_filter_docs]', FILTERDOCS).\ - replace('[changelog]', CHANGELOG).\ - replace('[authors]', AUTHORS) - parts = publish_parts( - data, - writer=writer, - settings_overrides={ - 'initial_header_level': 3, - 'field_name_limit': 50, - } - ) - return { - 'title': parts['title'], - 'body': parts['body'], - 'toc': parts['toc'] - } - - -def handle_python(filename, fp, dst): - now = datetime.now() - title = os.path.basename(filename)[:-4] - content = fp.read() - def urlize(href): - # create links for the pygments webpage - if href == 'index.txt': - return '/docs/' - else: - return '/docs/%s/' % href - parts = generate_documentation(content, urlize) - result = open(os.path.join(dst, title + '.py'), 'w') - result.write('# -*- coding: utf-8 -*-\n') - result.write('"""\n Pygments Documentation - %s\n' % title) - result.write(' %s\n\n' % ('~' * (24 + len(title)))) - result.write(' Generated on: %s\n"""\n\n' % now) - result.write('import datetime\n') - result.write('DATE = %r\n' % now) - result.write('TITLE = %r\n' % parts['title']) - result.write('TOC = %r\n' % parts['toc']) - result.write('BODY = %r\n' % parts['body']) - result.close() - - -def handle_html(filename, fp, dst): - now = datetime.now() - title = os.path.basename(filename)[:-4] - content = fp.read().decode('utf-8') - c = generate_documentation(content, (lambda x: './%s.html' % x)) - result = open(os.path.join(dst, title + '.html'), 'wb') - c['style'] = STYLESHEET + PYGMENTS_FORMATTER.get_style_defs('.syntax') - c['generation_date'] = now - c['file_id'] = title - t = Template(TEMPLATE) - result.write(t.render(c).encode('utf-8')) - result.close() - - -def run(handle_file, dst, sources=()): - path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')) - if not sources: - sources = [os.path.join(path, fn) for fn in os.listdir(path)] - if not os.path.isdir(dst): - os.makedirs(dst) - print('Making docs for Pygments %s in %s' % (__version__, dst)) - for fn in sources: - if not os.path.isfile(fn): - continue - print('Processing %s' % fn) - f = open(fn, 'rb') - try: - handle_file(fn, f, dst) - finally: - f.close() - - -def main(mode, dst='build/', *sources): - try: - handler = { - 'html': handle_html, - 'python': handle_python - }[mode] - except KeyError: - print('Error: unknown mode "%s"' % mode) - sys.exit(1) - run(handler, os.path.realpath(dst), sources) - - -if __name__ == '__main__': - if len(sys.argv) == 1: - print(USAGE) - else: - main(*sys.argv[1:]) diff --git a/docs/pygmentize.1 b/docs/pygmentize.1 deleted file mode 100644 index 71bb6f9c..00000000 --- a/docs/pygmentize.1 +++ /dev/null @@ -1,94 +0,0 @@ -.TH PYGMENTIZE 1 "February 15, 2007" - -.SH NAME -pygmentize \- highlights the input file - -.SH SYNOPSIS -.B \fBpygmentize\fP -.RI [-l\ \fI<lexer>\fP]\ [-F\ \fI<filter>\fP[:\fI<options>\fP]]\ [-f\ \fI<formatter>\fP] -.RI [-O\ \fI<options>\fP]\ [-P\ \fI<option=value>\fP]\ [-o\ \fI<outfile>\fP]\ [\fI<infile>\fP] -.br -.B \fBpygmentize\fP -.RI -S\ \fI<style>\fP\ -f\ \fI<formatter>\fP\ [-a\ \fI<arg>\fP]\ [-O\ \fI<options>\fP]\ [-P\ \fI<option=value>\fP] -.br -.B \fBpygmentize\fP -.RI -L\ [\fI<which>\fP\ ...] -.br -.B \fBpygmentize\fP -.RI -H\ \fI<type>\fP\ \fI<name>\fP -.br -.B \fBpygmentize\fP -.RI -h\ |\ -V - -.SH DESCRIPTION -Pygments is a generic syntax highlighter for general use in all kinds -of software such as forum systems, wikis or other applications that need to -prettify source code. -.PP -Its highlights are: - * a wide range of common languages and markup formats is supported - * special attention is paid to details, increasing quality by a fair amount - * support for new languages and formats are added easily - * a number of output formats, presently HTML, LaTeX and ANSI sequences - * it is usable as a command-line tool and as a library - * ... and it highlights even Brainfuck! -.PP -\fBpygmentize\fP is a command that uses Pygments to highlight the input file and -write the result to \fI<outfile>\fP. If no \fI<infile>\fP is given, stdin is used. -.SH OPTIONS -A summary of options is included below. -.TP -.B \-l \fI<lexer>\fP -Set the lexer name. If not given, the lexer is guessed from the extension of the -input file name (this obviously doesn't work if the input is stdin). -.TP -.B \-F \fI<filter>\fP[:\fI<options>\fP] -Add a filter to the token stream. You can give options in the same way as for --O after a colon (note: there must not be spaces around the colon). -This option can be given multiple times. -.TP -.B \-f \fI<formatter>\fP -Set the formatter name. If not given, it will be guessed from the extension of -the output file name. If no output file is given, the terminal formatter will be -used by default. -.TP -.B \-o \fI<outfile>\fP -Set output file. If not given, stdout is used. -.TP -.B \-O \fI<options>\fP -With this option, you can give the lexer and formatter a comma-separated list of -options, e.g. "-O bg=light,python=cool". Which options are valid for which -lexers and formatters can be found in the documentation. -This option can be given multiple times. -.TP -.B \-P \fI<option=value>\fP -This option adds lexer and formatter options like the -O option, but -you can only give one option per -P. That way, the option value may contain -commas and equals signs, which it can't with -O. -.TP -.B \-S \fI<style>\fP -Print out style definitions for style \fI<style>\fP and for formatter \fI<formatter>\fP. -The meaning of the argument given by -.B \-a \fI<arg>\fP -is formatter dependent and can be found in the documentation. -.TP -.B \-L [\fI<which>\fP ...] -List lexers, formatters, styles or filters. Set \fI<which>\fP to the thing you want -to list (e.g. "styles"), or omit it to list everything. -.TP -.B \-H \fI<type>\fP \fI<name>\fP -Print detailed help for the object \fI<name>\fP of type \fI<type>\fP, where \fI<type>\fP is one -of "lexer", "formatter" or "filter". -.TP -.B \-h -Show help screen. -.TP -.B \-V -Show version of the Pygments package. -.SH SEE ALSO -/usr/share/doc/python-pygments/index.html -.SH AUTHOR -pygmentize was written by Georg Brandl <g.brandl@gmx.net>. -.PP -This manual page was written by Piotr Ozarowski <ozarow@gmail.com>, -for the Debian project (but may be used by others). diff --git a/docs/src/api.txt b/docs/src/api.txt deleted file mode 100644 index 4276eea2..00000000 --- a/docs/src/api.txt +++ /dev/null @@ -1,270 +0,0 @@ -.. -*- mode: rst -*- - -===================== -The full Pygments API -===================== - -This page describes the Pygments API. - -High-level API -============== - -Functions from the `pygments` module: - -def `lex(code, lexer):` - Lex `code` with the `lexer` (must be a `Lexer` instance) - and return an iterable of tokens. Currently, this only calls - `lexer.get_tokens()`. - -def `format(tokens, formatter, outfile=None):` - Format a token stream (iterable of tokens) `tokens` with the - `formatter` (must be a `Formatter` instance). The result is - written to `outfile`, or if that is ``None``, returned as a - string. - -def `highlight(code, lexer, formatter, outfile=None):` - This is the most high-level highlighting function. - It combines `lex` and `format` in one function. - - -Functions from `pygments.lexers`: - -def `get_lexer_by_name(alias, **options):` - Return an instance of a `Lexer` subclass that has `alias` in its - aliases list. The lexer is given the `options` at its - instantiation. - - Will raise `pygments.util.ClassNotFound` if no lexer with that alias is - found. - -def `get_lexer_for_filename(fn, **options):` - Return a `Lexer` subclass instance that has a filename pattern - matching `fn`. The lexer is given the `options` at its - instantiation. - - Will raise `pygments.util.ClassNotFound` if no lexer for that filename is - found. - -def `get_lexer_for_mimetype(mime, **options):` - Return a `Lexer` subclass instance that has `mime` in its mimetype - list. The lexer is given the `options` at its instantiation. - - Will raise `pygments.util.ClassNotFound` if not lexer for that mimetype is - found. - -def `guess_lexer(text, **options):` - Return a `Lexer` subclass instance that's guessed from the text - in `text`. For that, the `analyse_text()` method of every known - lexer class is called with the text as argument, and the lexer - which returned the highest value will be instantiated and returned. - - `pygments.util.ClassNotFound` is raised if no lexer thinks it can handle the - content. - -def `guess_lexer_for_filename(filename, text, **options):` - As `guess_lexer()`, but only lexers which have a pattern in `filenames` - or `alias_filenames` that matches `filename` are taken into consideration. - - `pygments.util.ClassNotFound` is raised if no lexer thinks it can handle the - content. - -def `get_all_lexers():` - Return an iterable over all registered lexers, yielding tuples in the - format:: - - (longname, tuple of aliases, tuple of filename patterns, tuple of mimetypes) - - *New in Pygments 0.6.* - - -Functions from `pygments.formatters`: - -def `get_formatter_by_name(alias, **options):` - Return an instance of a `Formatter` subclass that has `alias` in its - aliases list. The formatter is given the `options` at its - instantiation. - - Will raise `pygments.util.ClassNotFound` if no formatter with that alias is - found. - -def `get_formatter_for_filename(fn, **options):` - Return a `Formatter` subclass instance that has a filename pattern - matching `fn`. The formatter is given the `options` at its - instantiation. - - Will raise `pygments.util.ClassNotFound` if no formatter for that filename - is found. - - -Functions from `pygments.styles`: - -def `get_style_by_name(name):` - Return a style class by its short name. The names of the builtin styles - are listed in `pygments.styles.STYLE_MAP`. - - Will raise `pygments.util.ClassNotFound` if no style of that name is found. - -def `get_all_styles():` - Return an iterable over all registered styles, yielding their names. - - *New in Pygments 0.6.* - - -Lexers -====== - -A lexer (derived from `pygments.lexer.Lexer`) has the following functions: - -def `__init__(self, **options):` - The constructor. Takes a \*\*keywords dictionary of options. - Every subclass must first process its own options and then call - the `Lexer` constructor, since it processes the `stripnl`, - `stripall` and `tabsize` options. - - An example looks like this: - - .. sourcecode:: python - - def __init__(self, **options): - self.compress = options.get('compress', '') - Lexer.__init__(self, **options) - - As these options must all be specifiable as strings (due to the - command line usage), there are various utility functions - available to help with that, see `Option processing`_. - -def `get_tokens(self, text):` - This method is the basic interface of a lexer. It is called by - the `highlight()` function. It must process the text and return an - iterable of ``(tokentype, value)`` pairs from `text`. - - Normally, you don't need to override this method. The default - implementation processes the `stripnl`, `stripall` and `tabsize` - options and then yields all tokens from `get_tokens_unprocessed()`, - with the ``index`` dropped. - -def `get_tokens_unprocessed(self, text):` - This method should process the text and return an iterable of - ``(index, tokentype, value)`` tuples where ``index`` is the starting - position of the token within the input text. - - This method must be overridden by subclasses. - -def `analyse_text(text):` - A static method which is called for lexer guessing. It should analyse - the text and return a float in the range from ``0.0`` to ``1.0``. - If it returns ``0.0``, the lexer will not be selected as the most - probable one, if it returns ``1.0``, it will be selected immediately. - -For a list of known tokens have a look at the `Tokens`_ page. - -A lexer also can have the following attributes (in fact, they are mandatory -except `alias_filenames`) that are used by the builtin lookup mechanism. - -`name` - Full name for the lexer, in human-readable form. - -`aliases` - A list of short, unique identifiers that can be used to lookup - the lexer from a list, e.g. using `get_lexer_by_name()`. - -`filenames` - A list of `fnmatch` patterns that match filenames which contain - content for this lexer. The patterns in this list should be unique among - all lexers. - -`alias_filenames` - A list of `fnmatch` patterns that match filenames which may or may not - contain content for this lexer. This list is used by the - `guess_lexer_for_filename()` function, to determine which lexers are - then included in guessing the correct one. That means that e.g. every - lexer for HTML and a template language should include ``\*.html`` in - this list. - -`mimetypes` - A list of MIME types for content that can be lexed with this - lexer. - - -.. _Tokens: tokens.txt - - -Formatters -========== - -A formatter (derived from `pygments.formatter.Formatter`) has the following -functions: - -def `__init__(self, **options):` - As with lexers, this constructor processes options and then must call - the base class `__init__`. - - The `Formatter` class recognizes the options `style`, `full` and - `title`. It is up to the formatter class whether it uses them. - -def `get_style_defs(self, arg=''):` - This method must return statements or declarations suitable to define - the current style for subsequent highlighted text (e.g. CSS classes - in the `HTMLFormatter`). - - The optional argument `arg` can be used to modify the generation and - is formatter dependent (it is standardized because it can be given on - the command line). - - This method is called by the ``-S`` `command-line option`_, the `arg` - is then given by the ``-a`` option. - -def `format(self, tokensource, outfile):` - This method must format the tokens from the `tokensource` iterable and - write the formatted version to the file object `outfile`. - - Formatter options can control how exactly the tokens are converted. - -.. _command-line option: cmdline.txt - -A formatter must have the following attributes that are used by the -builtin lookup mechanism. (*New in Pygments 0.7.*) - -`name` - Full name for the formatter, in human-readable form. - -`aliases` - A list of short, unique identifiers that can be used to lookup - the formatter from a list, e.g. using `get_formatter_by_name()`. - -`filenames` - A list of `fnmatch` patterns that match filenames for which this formatter - can produce output. The patterns in this list should be unique among - all formatters. - - -Option processing -================= - -The `pygments.util` module has some utility functions usable for option -processing: - -class `OptionError` - This exception will be raised by all option processing functions if - the type or value of the argument is not correct. - -def `get_bool_opt(options, optname, default=None):` - Interpret the key `optname` from the dictionary `options` - as a boolean and return it. Return `default` if `optname` - is not in `options`. - - The valid string values for ``True`` are ``1``, ``yes``, - ``true`` and ``on``, the ones for ``False`` are ``0``, - ``no``, ``false`` and ``off`` (matched case-insensitively). - -def `get_int_opt(options, optname, default=None):` - As `get_bool_opt`, but interpret the value as an integer. - -def `get_list_opt(options, optname, default=None):` - If the key `optname` from the dictionary `options` is a string, - split it at whitespace and return it. If it is already a list - or a tuple, it is returned as a list. - -def `get_choice_opt(options, optname, allowed, default=None):` - If the key `optname` from the dictionary is not in the sequence - `allowed`, raise an error, otherwise return it. *New in Pygments 0.8.* diff --git a/docs/src/authors.txt b/docs/src/authors.txt deleted file mode 100644 index c8c532aa..00000000 --- a/docs/src/authors.txt +++ /dev/null @@ -1,5 +0,0 @@ -======= -Authors -======= - -[authors] diff --git a/docs/src/changelog.txt b/docs/src/changelog.txt deleted file mode 100644 index 6caf0a32..00000000 --- a/docs/src/changelog.txt +++ /dev/null @@ -1,5 +0,0 @@ -========= -Changelog -========= - -[changelog] diff --git a/docs/src/cmdline.txt b/docs/src/cmdline.txt deleted file mode 100644 index a48a5c27..00000000 --- a/docs/src/cmdline.txt +++ /dev/null @@ -1,147 +0,0 @@ -.. -*- mode: rst -*- - -====================== -Command Line Interface -====================== - -You can use Pygments from the shell, provided you installed the `pygmentize` -script:: - - $ pygmentize test.py - print "Hello World" - -will print the file test.py to standard output, using the Python lexer -(inferred from the file name extension) and the terminal formatter (because -you didn't give an explicit formatter name). - -If you want HTML output:: - - $ pygmentize -f html -l python -o test.html test.py - -As you can see, the -l option explicitly selects a lexer. As seen above, if you -give an input file name and it has an extension that Pygments recognizes, you can -omit this option. - -The ``-o`` option gives an output file name. If it is not given, output is -written to stdout. - -The ``-f`` option selects a formatter (as with ``-l``, it can also be omitted -if an output file name is given and has a supported extension). -If no output file name is given and ``-f`` is omitted, the -`TerminalFormatter` is used. - -The above command could therefore also be given as:: - - $ pygmentize -o test.html test.py - -To create a full HTML document, including line numbers and stylesheet (using the -"emacs" style), highlighting the Python file ``test.py`` to ``test.html``:: - - $ pygmentize -O full,style=emacs -o test.html test.py - - -Options and filters -------------------- - -Lexer and formatter options can be given using the ``-O`` option:: - - $ pygmentize -f html -O style=colorful,linenos=1 -l python test.py - -Be sure to enclose the option string in quotes if it contains any special shell -characters, such as spaces or expansion wildcards like ``*``. If an option -expects a list value, separate the list entries with spaces (you'll have to -quote the option value in this case too, so that the shell doesn't split it). - -Since the ``-O`` option argument is split at commas and expects the split values -to be of the form ``name=value``, you can't give an option value that contains -commas or equals signs. Therefore, an option ``-P`` is provided (as of Pygments -0.9) that works like ``-O`` but can only pass one option per ``-P``. Its value -can then contain all characters:: - - $ pygmentize -P "heading=Pygments, the Python highlighter" ... - -Filters are added to the token stream using the ``-F`` option:: - - $ pygmentize -f html -l pascal -F keywordcase:case=upper main.pas - -As you see, options for the filter are given after a colon. As for ``-O``, the -filter name and options must be one shell word, so there may not be any spaces -around the colon. - - -Generating styles ------------------ - -Formatters normally don't output full style information. For example, the HTML -formatter by default only outputs ``<span>`` tags with ``class`` attributes. -Therefore, there's a special ``-S`` option for generating style definitions. -Usage is as follows:: - - $ pygmentize -f html -S colorful -a .syntax - -generates a CSS style sheet (because you selected the HTML formatter) for -the "colorful" style prepending a ".syntax" selector to all style rules. - -For an explanation what ``-a`` means for `a particular formatter`_, look for -the `arg` argument for the formatter's `get_style_defs()` method. - - -Getting lexer names -------------------- - -*New in Pygments 1.0.* - -The ``-N`` option guesses a lexer name for a given filename, so that :: - - $ pygmentize -N setup.py - -will print out ``python``. It won't highlight anything yet. If no specific -lexer is known for that filename, ``text`` is printed. - - -Getting help ------------- - -The ``-L`` option lists lexers, formatters, along with their short -names and supported file name extensions, styles and filters. If you want to see -only one category, give it as an argument:: - - $ pygmentize -L filters - -will list only all installed filters. - -The ``-H`` option will give you detailed information (the same that can be found -in this documentation) about a lexer, formatter or filter. Usage is as follows:: - - $ pygmentize -H formatter html - -will print the help for the HTML formatter, while :: - - $ pygmentize -H lexer python - -will print the help for the Python lexer, etc. - - -A note on encodings -------------------- - -*New in Pygments 0.9.* - -Pygments tries to be smart regarding encodings in the formatting process: - -* If you give an ``encoding`` option, it will be used as the input and - output encoding. - -* If you give an ``outencoding`` option, it will override ``encoding`` - as the output encoding. - -* If you don't give an encoding and have given an output file, the default - encoding for lexer and formatter is ``latin1`` (which will pass through - all non-ASCII characters). - -* If you don't give an encoding and haven't given an output file (that means - output is written to the console), the default encoding for lexer and - formatter is the terminal encoding (`sys.stdout.encoding`). - - -.. _a particular formatter: formatters.txt diff --git a/docs/src/filterdevelopment.txt b/docs/src/filterdevelopment.txt deleted file mode 100644 index c60e1e84..00000000 --- a/docs/src/filterdevelopment.txt +++ /dev/null @@ -1,70 +0,0 @@ -.. -*- mode: rst -*- - -===================== -Write your own filter -===================== - -*New in Pygments 0.7.* - -Writing own filters is very easy. All you have to do is to subclass -the `Filter` class and override the `filter` method. Additionally a -filter is instanciated with some keyword arguments you can use to -adjust the behavior of your filter. - - -Subclassing Filters -=================== - -As an example, we write a filter that converts all `Name.Function` tokens -to normal `Name` tokens to make the output less colorful. - -.. sourcecode:: python - - from pygments.util import get_bool_opt - from pygments.token import Name - from pygments.filter import Filter - - class UncolorFilter(Filter): - - def __init__(self, **options): - Filter.__init__(self, **options) - self.class_too = get_bool_opt(options, 'classtoo') - - def filter(self, lexer, stream): - for ttype, value in stream: - if ttype is Name.Function or (self.class_too and - ttype is Name.Class): - ttype = Name - yield ttype, value - -Some notes on the `lexer` argument: that can be quite confusing since it doesn't -need to be a lexer instance. If a filter was added by using the `add_filter()` -function of lexers, that lexer is registered for the filter. In that case -`lexer` will refer to the lexer that has registered the filter. It *can* be used -to access options passed to a lexer. Because it could be `None` you always have -to check for that case if you access it. - - -Using a decorator -================= - -You can also use the `simplefilter` decorator from the `pygments.filter` module: - -.. sourcecode:: python - - from pygments.util import get_bool_opt - from pygments.token import Name - from pygments.filter import simplefilter - - - @simplefilter - def uncolor(lexer, stream, options): - class_too = get_bool_opt(options, 'classtoo') - for ttype, value in stream: - if ttype is Name.Function or (class_too and - ttype is Name.Class): - ttype = Name - yield ttype, value - -The decorator automatically subclasses an internal filter class and uses the -decorated function for filtering. diff --git a/docs/src/filters.txt b/docs/src/filters.txt deleted file mode 100644 index 522f6330..00000000 --- a/docs/src/filters.txt +++ /dev/null @@ -1,42 +0,0 @@ -.. -*- mode: rst -*- - -======= -Filters -======= - -*New in Pygments 0.7.* - -You can filter token streams coming from lexers to improve or annotate the -output. For example, you can highlight special words in comments, convert -keywords to upper or lowercase to enforce a style guide etc. - -To apply a filter, you can use the `add_filter()` method of a lexer: - -.. sourcecode:: pycon - - >>> from pygments.lexers import PythonLexer - >>> l = PythonLexer() - >>> # add a filter given by a string and options - >>> l.add_filter('codetagify', case='lower') - >>> l.filters - [<pygments.filters.CodeTagFilter object at 0xb785decc>] - >>> from pygments.filters import KeywordCaseFilter - >>> # or give an instance - >>> l.add_filter(KeywordCaseFilter(case='lower')) - -The `add_filter()` method takes keyword arguments which are forwarded to -the constructor of the filter. - -To get a list of all registered filters by name, you can use the -`get_all_filters()` function from the `pygments.filters` module that returns an -iterable for all known filters. - -If you want to write your own filter, have a look at `Write your own filter`_. - -.. _Write your own filter: filterdevelopment.txt - - -Builtin Filters -=============== - -[builtin_filter_docs] diff --git a/docs/src/formatterdevelopment.txt b/docs/src/formatterdevelopment.txt deleted file mode 100644 index 83a13b6a..00000000 --- a/docs/src/formatterdevelopment.txt +++ /dev/null @@ -1,169 +0,0 @@ -.. -*- mode: rst -*- - -======================== -Write your own formatter -======================== - -As well as creating `your own lexer <lexerdevelopment.txt>`_, writing a new -formatter for Pygments is easy and straightforward. - -A formatter is a class that is initialized with some keyword arguments (the -formatter options) and that must provides a `format()` method. -Additionally a formatter should provide a `get_style_defs()` method that -returns the style definitions from the style in a form usable for the -formatter's output format. - - -Quickstart -========== - -The most basic formatter shipped with Pygments is the `NullFormatter`. It just -sends the value of a token to the output stream: - -.. sourcecode:: python - - from pygments.formatter import Formatter - - class NullFormatter(Formatter): - def format(self, tokensource, outfile): - for ttype, value in tokensource: - outfile.write(value) - -As you can see, the `format()` method is passed two parameters: `tokensource` -and `outfile`. The first is an iterable of ``(token_type, value)`` tuples, -the latter a file like object with a `write()` method. - -Because the formatter is that basic it doesn't overwrite the `get_style_defs()` -method. - - -Styles -====== - -Styles aren't instantiated but their metaclass provides some class functions -so that you can access the style definitions easily. - -Styles are iterable and yield tuples in the form ``(ttype, d)`` where `ttype` -is a token and `d` is a dict with the following keys: - -``'color'`` - Hexadecimal color value (eg: ``'ff0000'`` for red) or `None` if not - defined. - -``'bold'`` - `True` if the value should be bold - -``'italic'`` - `True` if the value should be italic - -``'underline'`` - `True` if the value should be underlined - -``'bgcolor'`` - Hexadecimal color value for the background (eg: ``'eeeeeee'`` for light - gray) or `None` if not defined. - -``'border'`` - Hexadecimal color value for the border (eg: ``'0000aa'`` for a dark - blue) or `None` for no border. - -Additional keys might appear in the future, formatters should ignore all keys -they don't support. - - -HTML 3.2 Formatter -================== - -For an more complex example, let's implement a HTML 3.2 Formatter. We don't -use CSS but inline markup (``<u>``, ``<font>``, etc). Because this isn't good -style this formatter isn't in the standard library ;-) - -.. sourcecode:: python - - from pygments.formatter import Formatter - - class OldHtmlFormatter(Formatter): - - def __init__(self, **options): - Formatter.__init__(self, **options) - - # create a dict of (start, end) tuples that wrap the - # value of a token so that we can use it in the format - # method later - self.styles = {} - - # we iterate over the `_styles` attribute of a style item - # that contains the parsed style values. - for token, style in self.style: - start = end = '' - # a style item is a tuple in the following form: - # colors are readily specified in hex: 'RRGGBB' - if style['color']: - start += '<font color="#%s">' % style['color'] - end = '</font>' + end - if style['bold']: - start += '<b>' - end = '</b>' + end - if style['italic']: - start += '<i>' - end = '</i>' + end - if style['underline']: - start += '<u>' - end = '</u>' + end - self.styles[token] = (start, end) - - def format(self, tokensource, outfile): - # lastval is a string we use for caching - # because it's possible that an lexer yields a number - # of consecutive tokens with the same token type. - # to minimize the size of the generated html markup we - # try to join the values of same-type tokens here - lastval = '' - lasttype = None - - # wrap the whole output with <pre> - outfile.write('<pre>') - - for ttype, value in tokensource: - # if the token type doesn't exist in the stylemap - # we try it with the parent of the token type - # eg: parent of Token.Literal.String.Double is - # Token.Literal.String - while ttype not in self.styles: - ttype = ttype.parent - if ttype == lasttype: - # the current token type is the same of the last - # iteration. cache it - lastval += value - else: - # not the same token as last iteration, but we - # have some data in the buffer. wrap it with the - # defined style and write it to the output file - if lastval: - stylebegin, styleend = self.styles[lasttype] - outfile.write(stylebegin + lastval + styleend) - # set lastval/lasttype to current values - lastval = value - lasttype = ttype - - # if something is left in the buffer, write it to the - # output file, then close the opened <pre> tag - if lastval: - stylebegin, styleend = self.styles[lasttype] - outfile.write(stylebegin + lastval + styleend) - outfile.write('</pre>\n') - -The comments should explain it. Again, this formatter doesn't override the -`get_style_defs()` method. If we would have used CSS classes instead of -inline HTML markup, we would need to generate the CSS first. For that -purpose the `get_style_defs()` method exists: - - -Generating Style Definitions -============================ - -Some formatters like the `LatexFormatter` and the `HtmlFormatter` don't -output inline markup but reference either macros or css classes. Because -the definitions of those are not part of the output, the `get_style_defs()` -method exists. It is passed one parameter (if it's used and how it's used -is up to the formatter) and has to return a string or ``None``. diff --git a/docs/src/formatters.txt b/docs/src/formatters.txt deleted file mode 100644 index 7a590648..00000000 --- a/docs/src/formatters.txt +++ /dev/null @@ -1,48 +0,0 @@ -.. -*- mode: rst -*- - -==================== -Available formatters -==================== - -This page lists all builtin formatters. - -Common options -============== - -All formatters support these options: - -`encoding` - *New in Pygments 0.6.* - - If given, must be an encoding name (such as ``"utf-8"``). This will - be used to convert the token strings (which are Unicode strings) - to byte strings in the output (default: ``None``). - It will also be written in an encoding declaration suitable for the - document format if the `full` option is given (e.g. a ``meta - content-type`` directive in HTML or an invocation of the `inputenc` - package in LaTeX). - - If this is ``""`` or ``None``, Unicode strings will be written - to the output file, which most file-like objects do not support. - For example, `pygments.highlight()` will return a Unicode string if - called with no `outfile` argument and a formatter that has `encoding` - set to ``None`` because it uses a `StringIO.StringIO` object that - supports Unicode arguments to `write()`. Using a regular file object - wouldn't work. - -`outencoding` - *New in Pygments 0.7.* - - When using Pygments from the command line, any `encoding` option given is - passed to the lexer and the formatter. This is sometimes not desirable, - for example if you want to set the input encoding to ``"guess"``. - Therefore, `outencoding` has been introduced which overrides `encoding` - for the formatter if given. - - -Formatter classes -================= - -All these classes are importable from `pygments.formatters`. - -[builtin_formatter_docs] diff --git a/docs/src/index.txt b/docs/src/index.txt deleted file mode 100644 index d24785ac..00000000 --- a/docs/src/index.txt +++ /dev/null @@ -1,69 +0,0 @@ -.. -*- mode: rst -*- - -======== -Overview -======== - -Welcome to the Pygments documentation. - -- Starting with Pygments - - - `Installation <installation.txt>`_ - - - `Introduction and Quickstart <quickstart.txt>`_ - - - `Command line interface <cmdline.txt>`_ - -- Builtin components - - - `Lexers <lexers.txt>`_ - - - `Filters <filters.txt>`_ - - - `Formatters <formatters.txt>`_ - - - `Styles <styles.txt>`_ - -- Reference - - - `Unicode and encodings <unicode.txt>`_ - - - `Builtin tokens <tokens.txt>`_ - - - `API documentation <api.txt>`_ - -- Hacking for Pygments - - - `Write your own lexer <lexerdevelopment.txt>`_ - - - `Write your own formatter <formatterdevelopment.txt>`_ - - - `Write your own filter <filterdevelopment.txt>`_ - - - `Register plugins <plugins.txt>`_ - -- Hints and Tricks - - - `Using Pygments in ReST documents <rstdirective.txt>`_ - - - `Using Pygments with MoinMoin <moinmoin.txt>`_ - - - `Using Pygments in other contexts <integrate.txt>`_ - -- About Pygments - - - `Changelog <changelog.txt>`_ - - - `Authors <authors.txt>`_ - - --------------- - -If you find bugs or have suggestions for the documentation, please -look `here`_ for info on how to contact the team. - -You can download an offline version of this documentation from the -`download page`_. - -.. _here: http://pygments.org/contribute/ -.. _download page: http://pygments.org/download/ diff --git a/docs/src/installation.txt b/docs/src/installation.txt deleted file mode 100644 index 17a9aad5..00000000 --- a/docs/src/installation.txt +++ /dev/null @@ -1,71 +0,0 @@ -.. -*- mode: rst -*- - -============ -Installation -============ - -Pygments requires at least Python 2.4 to work correctly. Just to clarify: -there *won't* ever be support for Python versions below 2.4. However, there -are no other dependencies. - - -Installing a released version -============================= - -As a Python egg (via easy_install) ----------------------------------- - -You can install the most recent Pygments version using `easy_install`_:: - - sudo easy_install Pygments - -This will install a Pygments egg in your Python installation's site-packages -directory. - - -From the tarball release -------------------------- - -1. Download the most recent tarball from the `download page`_ -2. Unpack the tarball -3. ``sudo python setup.py install`` - -Note that the last command will automatically download and install -`setuptools`_ if you don't already have it installed. This requires a working -internet connection. - -This will install Pygments into your Python installation's site-packages directory. - - -Installing the development version -================================== - -If you want to play around with the code ----------------------------------------- - -1. Install `Mercurial`_ -2. ``hg clone http://bitbucket.org/birkenfeld/pygments-main pygments`` -3. ``cd pygments`` -4. ``ln -s pygments /usr/lib/python2.X/site-packages`` -5. ``ln -s pygmentize /usr/local/bin`` - -As an alternative to steps 4 and 5 you can also do ``python setup.py develop`` -which will install the package via setuptools in development mode. - -.. - If you just want the latest features and use them - ------------------------------------------------- - - :: - - sudo easy_install Pygments==dev - - This will install a Pygments egg containing the latest Subversion trunk code - in your Python installation's site-packages directory. Every time the command - is run, the sources are updated from Subversion. - - -.. _download page: http://pygments.org/download/ -.. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools -.. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall -.. _Mercurial: http://selenic.com/mercurial/ diff --git a/docs/src/integrate.txt b/docs/src/integrate.txt deleted file mode 100644 index 6f8c1253..00000000 --- a/docs/src/integrate.txt +++ /dev/null @@ -1,48 +0,0 @@ -.. -*- mode: rst -*- - -=================================== -Using Pygments in various scenarios -=================================== - -PyGtk ------ - -Armin has written a piece of sample code that shows how to create a Gtk -`TextBuffer` object containing Pygments-highlighted text. - -See the article here: http://lucumr.pocoo.org/cogitations/2007/05/30/pygments-gtk-rendering/ - -Wordpress ---------- - -He also has a snippet that shows how to use Pygments in WordPress: - -http://lucumr.pocoo.org/cogitations/2007/05/30/pygments-in-wordpress/ - -Markdown --------- - -Since Pygments 0.9, the distribution ships Markdown_ preprocessor sample code -that uses Pygments to render source code in `external/markdown-processor.py`. -You can copy and adapt it to your liking. - -.. _Markdown: http://www.freewisdom.org/projects/python-markdown/ - -TextMate --------- - -Antonio Cangiano has created a Pygments bundle for TextMate that allows to -colorize code via a simple menu option. It can be found here_. - -.. _here: http://antoniocangiano.com/2008/10/28/pygments-textmate-bundle/ - -Bash completion ---------------- - -The source distribution contains a file ``external/pygments.bashcomp`` that -sets up completion for the ``pygmentize`` command in bash. - -Java ----- - -See the `Java quickstart <java.txt>`_ document. diff --git a/docs/src/java.txt b/docs/src/java.txt deleted file mode 100644 index 5eb6196a..00000000 --- a/docs/src/java.txt +++ /dev/null @@ -1,70 +0,0 @@ -===================== -Use Pygments in Java -===================== - -Thanks to `Jython <http://www.jython.org>`__ it is possible to use Pygments in -Java. - -This page is a simple tutorial to get an idea of how this is working. You can -then look at the `Jython documentation <http://www.jython.org/docs/>`__ for more -advanced use. - -Since version 1.5, Pygments is deployed on `Maven Central -<http://repo1.maven.org/maven2/org/pygments/pygments/>`__ as a JAR so is Jython -which makes it a lot easier to create the Java project. - -Here is an example of a `Maven <http://www.maven.org>`__ ``pom.xml`` file for a -project running Pygments: - -.. sourcecode:: xml - - <?xml version="1.0" encoding="UTF-8"?> - - <project xmlns="http://maven.apache.org/POM/4.0.0" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 - http://maven.apache.org/maven-v4_0_0.xsd"> - <modelVersion>4.0.0</modelVersion> - <groupId>example</groupId> - <artifactId>example</artifactId> - <version>1.0-SNAPSHOT</version> - <dependencies> - <dependency> - <groupId>org.python</groupId> - <artifactId>jython-standalone</artifactId> - <version>2.5.3</version> - </dependency> - <dependency> - <groupId>org.pygments</groupId> - <artifactId>pygments</artifactId> - <version>1.5</version> - <scope>runtime</scope> - </dependency> - </dependencies> - </project> - -The following Java example: - -.. sourcecode:: java - - PythonInterpreter interpreter = new PythonInterpreter(); - - // Set a variable with the content you want to work with - interpreter.set("code", code); - - // Simple use Pygments as you would in Python - interpreter.exec("from pygments import highlight\n" - + "from pygments.lexers import PythonLexer\n" - + "from pygments.formatters import HtmlFormatter\n" - + "\nresult = highlight(code, PythonLexer(), HtmlFormatter())"); - - // Get the result that has been set in a variable - System.out.println(interpreter.get("result", String.class)); - -will print something like: - -.. sourcecode:: html - - <div class="highlight"> - <pre><span class="k">print</span> <span class="s">"Hello World"</span></pre> - </div> diff --git a/docs/src/lexerdevelopment.txt b/docs/src/lexerdevelopment.txt deleted file mode 100644 index 730a08b2..00000000 --- a/docs/src/lexerdevelopment.txt +++ /dev/null @@ -1,603 +0,0 @@ -.. -*- mode: rst -*- - -==================== -Write your own lexer -==================== - -If a lexer for your favorite language is missing in the Pygments package, you can -easily write your own and extend Pygments. - -All you need can be found inside the `pygments.lexer` module. As you can read in -the `API documentation <api.txt>`_, a lexer is a class that is initialized with -some keyword arguments (the lexer options) and that provides a -`get_tokens_unprocessed()` method which is given a string or unicode object with -the data to parse. - -The `get_tokens_unprocessed()` method must return an iterator or iterable -containing tuples in the form ``(index, token, value)``. Normally you don't need -to do this since there are numerous base lexers you can subclass. - - -RegexLexer -========== - -A very powerful (but quite easy to use) lexer is the `RegexLexer`. This lexer -base class allows you to define lexing rules in terms of *regular expressions* -for different *states*. - -States are groups of regular expressions that are matched against the input -string at the *current position*. If one of these expressions matches, a -corresponding action is performed (normally yielding a token with a specific -type), the current position is set to where the last match ended and the -matching process continues with the first regex of the current state. - -Lexer states are kept in a state stack: each time a new state is entered, the -new state is pushed onto the stack. The most basic lexers (like the -`DiffLexer`) just need one state. - -Each state is defined as a list of tuples in the form (`regex`, `action`, -`new_state`) where the last item is optional. In the most basic form, `action` -is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a -token with the match text and type `tokentype` and push `new_state` on the state -stack. If the new state is ``'#pop'``, the topmost state is popped from the -stack instead. (To pop more than one state, use ``'#pop:2'`` and so on.) -``'#push'`` is a synonym for pushing the current state on the -stack. - -The following example shows the `DiffLexer` from the builtin lexers. Note that -it contains some additional attributes `name`, `aliases` and `filenames` which -aren't required for a lexer. They are used by the builtin lexer lookup -functions. - -.. sourcecode:: python - - from pygments.lexer import RegexLexer - from pygments.token import * - - class DiffLexer(RegexLexer): - name = 'Diff' - aliases = ['diff'] - filenames = ['*.diff'] - - tokens = { - 'root': [ - (r' .*\n', Text), - (r'\+.*\n', Generic.Inserted), - (r'-.*\n', Generic.Deleted), - (r'@.*\n', Generic.Subheading), - (r'Index.*\n', Generic.Heading), - (r'=.*\n', Generic.Heading), - (r'.*\n', Text), - ] - } - -As you can see this lexer only uses one state. When the lexer starts scanning -the text, it first checks if the current character is a space. If this is true -it scans everything until newline and returns the parsed data as `Text` token. - -If this rule doesn't match, it checks if the current char is a plus sign. And -so on. - -If no rule matches at the current position, the current char is emitted as an -`Error` token that indicates a parsing error, and the position is increased by -1. - - -Adding and testing a new lexer -============================== - -To make pygments aware of your new lexer, you have to perform the following -steps: - -First, change to the current directory containing the pygments source code: - -.. sourcecode:: console - - $ cd .../pygments-main - -Next, make sure the lexer is known from outside of the module. All modules in -the ``pygments.lexers`` specify ``__all__``. For example, ``other.py`` sets: - -.. sourcecode:: python - - __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] - -Simply add the name of your lexer class to this list. - -Finally the lexer can be made publically known by rebuilding the lexer -mapping: - -.. sourcecode:: console - - $ make mapfiles - -To test the new lexer, store an example file with the proper extension in -``tests/examplefiles``. For example, to test your ``DiffLexer``, add a -``tests/examplefiles/example.diff`` containing a sample diff output. - -Now you can use pygmentize to render your example to HTML: - -.. sourcecode:: console - - $ ./pygmentize -O full -f html -o /tmp/example.html tests/examplefiles/example.diff - -Note that this explicitely calls the ``pygmentize`` in the current directory -by preceding it with ``./``. This ensures your modifications are used. -Otherwise a possibly already installed, unmodified version without your new -lexer would have been called from the system search path (``$PATH``). - -To view the result, open ``/tmp/example.html`` in your browser. - -Once the example renders as expected, you should run the complete test suite: - -.. sourcecode:: console - - $ make test - - -Regex Flags -=========== - -You can either define regex flags in the regex (``r'(?x)foo bar'``) or by adding -a `flags` attribute to your lexer class. If no attribute is defined, it defaults -to `re.MULTILINE`. For more informations about regular expression flags see the -`regular expressions`_ help page in the python documentation. - -.. _regular expressions: http://docs.python.org/lib/re-syntax.html - - -Scanning multiple tokens at once -================================ - -Here is a more complex lexer that highlights INI files. INI files consist of -sections, comments and key = value pairs: - -.. sourcecode:: python - - from pygments.lexer import RegexLexer, bygroups - from pygments.token import * - - class IniLexer(RegexLexer): - name = 'INI' - aliases = ['ini', 'cfg'] - filenames = ['*.ini', '*.cfg'] - - tokens = { - 'root': [ - (r'\s+', Text), - (r';.*?$', Comment), - (r'\[.*?\]$', Keyword), - (r'(.*?)(\s*)(=)(\s*)(.*?)$', - bygroups(Name.Attribute, Text, Operator, Text, String)) - ] - } - -The lexer first looks for whitespace, comments and section names. And later it -looks for a line that looks like a key, value pair, separated by an ``'='`` -sign, and optional whitespace. - -The `bygroups` helper makes sure that each group is yielded with a different -token type. First the `Name.Attribute` token, then a `Text` token for the -optional whitespace, after that a `Operator` token for the equals sign. Then a -`Text` token for the whitespace again. The rest of the line is returned as -`String`. - -Note that for this to work, every part of the match must be inside a capturing -group (a ``(...)``), and there must not be any nested capturing groups. If you -nevertheless need a group, use a non-capturing group defined using this syntax: -``r'(?:some|words|here)'`` (note the ``?:`` after the beginning parenthesis). - -If you find yourself needing a capturing group inside the regex which -shouldn't be part of the output but is used in the regular expressions for -backreferencing (eg: ``r'(<(foo|bar)>)(.*?)(</\2>)'``), you can pass `None` -to the bygroups function and it will skip that group will be skipped in the -output. - - -Changing states -=============== - -Many lexers need multiple states to work as expected. For example, some -languages allow multiline comments to be nested. Since this is a recursive -pattern it's impossible to lex just using regular expressions. - -Here is the solution: - -.. sourcecode:: python - - from pygments.lexer import RegexLexer - from pygments.token import * - - class ExampleLexer(RegexLexer): - name = 'Example Lexer with states' - - tokens = { - 'root': [ - (r'[^/]+', Text), - (r'/\*', Comment.Multiline, 'comment'), - (r'//.*?$', Comment.Singleline), - (r'/', Text) - ], - 'comment': [ - (r'[^*/]', Comment.Multiline), - (r'/\*', Comment.Multiline, '#push'), - (r'\*/', Comment.Multiline, '#pop'), - (r'[*/]', Comment.Multiline) - ] - } - -This lexer starts lexing in the ``'root'`` state. It tries to match as much as -possible until it finds a slash (``'/'``). If the next character after the slash -is a star (``'*'``) the `RegexLexer` sends those two characters to the output -stream marked as `Comment.Multiline` and continues parsing with the rules -defined in the ``'comment'`` state. - -If there wasn't a star after the slash, the `RegexLexer` checks if it's a -singleline comment (eg: followed by a second slash). If this also wasn't the -case it must be a single slash (the separate regex for a single slash must also -be given, else the slash would be marked as an error token). - -Inside the ``'comment'`` state, we do the same thing again. Scan until the lexer -finds a star or slash. If it's the opening of a multiline comment, push the -``'comment'`` state on the stack and continue scanning, again in the -``'comment'`` state. Else, check if it's the end of the multiline comment. If -yes, pop one state from the stack. - -Note: If you pop from an empty stack you'll get an `IndexError`. (There is an -easy way to prevent this from happening: don't ``'#pop'`` in the root state). - -If the `RegexLexer` encounters a newline that is flagged as an error token, the -stack is emptied and the lexer continues scanning in the ``'root'`` state. This -helps producing error-tolerant highlighting for erroneous input, e.g. when a -single-line string is not closed. - - -Advanced state tricks -===================== - -There are a few more things you can do with states: - -- You can push multiple states onto the stack if you give a tuple instead of a - simple string as the third item in a rule tuple. For example, if you want to - match a comment containing a directive, something like:: - - /* <processing directive> rest of comment */ - - you can use this rule: - - .. sourcecode:: python - - tokens = { - 'root': [ - (r'/\* <', Comment, ('comment', 'directive')), - ... - ], - 'directive': [ - (r'[^>]*', Comment.Directive), - (r'>', Comment, '#pop'), - ], - 'comment': [ - (r'[^*]+', Comment), - (r'\*/', Comment, '#pop'), - (r'\*', Comment), - ] - } - - When this encounters the above sample, first ``'comment'`` and ``'directive'`` - are pushed onto the stack, then the lexer continues in the directive state - until it finds the closing ``>``, then it continues in the comment state until - the closing ``*/``. Then, both states are popped from the stack again and - lexing continues in the root state. - - *New in Pygments 0.9:* The tuple can contain the special ``'#push'`` and - ``'#pop'`` (but not ``'#pop:n'``) directives. - - -- You can include the rules of a state in the definition of another. This is - done by using `include` from `pygments.lexer`: - - .. sourcecode:: python - - from pygments.lexer import RegexLexer, bygroups, include - from pygments.token import * - - class ExampleLexer(RegexLexer): - tokens = { - 'comments': [ - (r'/\*.*?\*/', Comment), - (r'//.*?\n', Comment), - ], - 'root': [ - include('comments'), - (r'(function )(\w+)( {)', - bygroups(Keyword, Name, Keyword), 'function'), - (r'.', Text), - ], - 'function': [ - (r'[^}/]+', Text), - include('comments'), - (r'/', Text), - (r'}', Keyword, '#pop'), - ] - } - - This is a hypothetical lexer for a language that consist of functions and - comments. Because comments can occur at toplevel and in functions, we need - rules for comments in both states. As you can see, the `include` helper saves - repeating rules that occur more than once (in this example, the state - ``'comment'`` will never be entered by the lexer, as it's only there to be - included in ``'root'`` and ``'function'``). - - -- Sometimes, you may want to "combine" a state from existing ones. This is - possible with the `combine` helper from `pygments.lexer`. - - If you, instead of a new state, write ``combined('state1', 'state2')`` as the - third item of a rule tuple, a new anonymous state will be formed from state1 - and state2 and if the rule matches, the lexer will enter this state. - - This is not used very often, but can be helpful in some cases, such as the - `PythonLexer`'s string literal processing. - -- If you want your lexer to start lexing in a different state you can modify - the stack by overloading the `get_tokens_unprocessed()` method: - - .. sourcecode:: python - - from pygments.lexer import RegexLexer - - class MyLexer(RegexLexer): - tokens = {...} - - def get_tokens_unprocessed(self, text): - stack = ['root', 'otherstate'] - for item in RegexLexer.get_tokens_unprocessed(text, stack): - yield item - - Some lexers like the `PhpLexer` use this to make the leading ``<?php`` - preprocessor comments optional. Note that you can crash the lexer easily - by putting values into the stack that don't exist in the token map. Also - removing ``'root'`` from the stack can result in strange errors! - -- An empty regex at the end of a state list, combined with ``'#pop'``, can - act as a return point from a state that doesn't have a clear end marker. - - -Using multiple lexers -===================== - -Using multiple lexers for the same input can be tricky. One of the easiest -combination techniques is shown here: You can replace the token type entry in a -rule tuple (the second item) with a lexer class. The matched text will then be -lexed with that lexer, and the resulting tokens will be yielded. - -For example, look at this stripped-down HTML lexer: - -.. sourcecode:: python - - from pygments.lexer import RegexLexer, bygroups, using - from pygments.token import * - from pygments.lexers.web import JavascriptLexer - - class HtmlLexer(RegexLexer): - name = 'HTML' - aliases = ['html'] - filenames = ['*.html', '*.htm'] - - flags = re.IGNORECASE | re.DOTALL - tokens = { - 'root': [ - ('[^<&]+', Text), - ('&.*?;', Name.Entity), - (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')), - (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'), - (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag), - ], - 'script-content': [ - (r'(.+?)(<\s*/\s*script\s*>)', - bygroups(using(JavascriptLexer), Name.Tag), - '#pop'), - ] - } - -Here the content of a ``<script>`` tag is passed to a newly created instance of -a `JavascriptLexer` and not processed by the `HtmlLexer`. This is done using the -`using` helper that takes the other lexer class as its parameter. - -Note the combination of `bygroups` and `using`. This makes sure that the content -up to the ``</script>`` end tag is processed by the `JavascriptLexer`, while the -end tag is yielded as a normal token with the `Name.Tag` type. - -As an additional goodie, if the lexer class is replaced by `this` (imported from -`pygments.lexer`), the "other" lexer will be the current one (because you cannot -refer to the current class within the code that runs at class definition time). - -Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule. -Here, two states are pushed onto the state stack, ``'script-content'`` and -``'tag'``. That means that first ``'tag'`` is processed, which will parse -attributes and the closing ``>``, then the ``'tag'`` state is popped and the -next state on top of the stack will be ``'script-content'``. - -The `using()` helper has a special keyword argument, `state`, which works as -follows: if given, the lexer to use initially is not in the ``"root"`` state, -but in the state given by this argument. This *only* works with a `RegexLexer`. - -Any other keywords arguments passed to `using()` are added to the keyword -arguments used to create the lexer. - - -Delegating Lexer -================ - -Another approach for nested lexers is the `DelegatingLexer` which is for -example used for the template engine lexers. It takes two lexers as -arguments on initialisation: a `root_lexer` and a `language_lexer`. - -The input is processed as follows: First, the whole text is lexed with the -`language_lexer`. All tokens yielded with a type of ``Other`` are then -concatenated and given to the `root_lexer`. The language tokens of the -`language_lexer` are then inserted into the `root_lexer`'s token stream -at the appropriate positions. - -.. sourcecode:: python - - from pygments.lexer import DelegatingLexer - from pygments.lexers.web import HtmlLexer, PhpLexer - - class HtmlPhpLexer(DelegatingLexer): - def __init__(self, **options): - super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options) - -This procedure ensures that e.g. HTML with template tags in it is highlighted -correctly even if the template tags are put into HTML tags or attributes. - -If you want to change the needle token ``Other`` to something else, you can -give the lexer another token type as the third parameter: - -.. sourcecode:: python - - DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options) - - -Callbacks -========= - -Sometimes the grammar of a language is so complex that a lexer would be unable -to parse it just by using regular expressions and stacks. - -For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead -of token types (`bygroups` and `using` are nothing else but preimplemented -callbacks). The callback must be a function taking two arguments: - -* the lexer itself -* the match object for the last matched rule - -The callback must then return an iterable of (or simply yield) ``(index, -tokentype, value)`` tuples, which are then just passed through by -`get_tokens_unprocessed()`. The ``index`` here is the position of the token in -the input string, ``tokentype`` is the normal token type (like `Name.Builtin`), -and ``value`` the associated part of the input string. - -You can see an example here: - -.. sourcecode:: python - - from pygments.lexer import RegexLexer - from pygments.token import Generic - - class HypotheticLexer(RegexLexer): - - def headline_callback(lexer, match): - equal_signs = match.group(1) - text = match.group(2) - yield match.start(), Generic.Headline, equal_signs + text + equal_signs - - tokens = { - 'root': [ - (r'(=+)(.*?)(\1)', headline_callback) - ] - } - -If the regex for the `headline_callback` matches, the function is called with the -match object. Note that after the callback is done, processing continues -normally, that is, after the end of the previous match. The callback has no -possibility to influence the position. - -There are not really any simple examples for lexer callbacks, but you can see -them in action e.g. in the `compiled.py`_ source code in the `CLexer` and -`JavaLexer` classes. - -.. _compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py - - -The ExtendedRegexLexer class -============================ - -The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for -the funky syntax rules of some languages that will go unnamed, such as Ruby. - -But fear not; even then you don't have to abandon the regular expression -approach. For Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`. -All features known from RegexLexers are available here too, and the tokens are -specified in exactly the same way, *except* for one detail: - -The `get_tokens_unprocessed()` method holds its internal state data not as local -variables, but in an instance of the `pygments.lexer.LexerContext` class, and -that instance is passed to callbacks as a third argument. This means that you -can modify the lexer state in callbacks. - -The `LexerContext` class has the following members: - -* `text` -- the input text -* `pos` -- the current starting position that is used for matching regexes -* `stack` -- a list containing the state stack -* `end` -- the maximum position to which regexes are matched, this defaults to - the length of `text` - -Additionally, the `get_tokens_unprocessed()` method can be given a -`LexerContext` instead of a string and will then process this context instead of -creating a new one for the string argument. - -Note that because you can set the current position to anything in the callback, -it won't be automatically be set by the caller after the callback is finished. -For example, this is how the hypothetical lexer above would be written with the -`ExtendedRegexLexer`: - -.. sourcecode:: python - - from pygments.lexer import ExtendedRegexLexer - from pygments.token import Generic - - class ExHypotheticLexer(ExtendedRegexLexer): - - def headline_callback(lexer, match, ctx): - equal_signs = match.group(1) - text = match.group(2) - yield match.start(), Generic.Headline, equal_signs + text + equal_signs - ctx.pos = match.end() - - tokens = { - 'root': [ - (r'(=+)(.*?)(\1)', headline_callback) - ] - } - -This might sound confusing (and it can really be). But it is needed, and for an -example look at the Ruby lexer in `agile.py`_. - -.. _agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py - - -Filtering Token Streams -======================= - -Some languages ship a lot of builtin functions (for example PHP). The total -amount of those functions differs from system to system because not everybody -has every extension installed. In the case of PHP there are over 3000 builtin -functions. That's an incredible huge amount of functions, much more than you -can put into a regular expression. - -But because only `Name` tokens can be function names it's solvable by overriding -the ``get_tokens_unprocessed()`` method. The following lexer subclasses the -`PythonLexer` so that it highlights some additional names as pseudo keywords: - -.. sourcecode:: python - - from pygments.lexers.agile import PythonLexer - from pygments.token import Name, Keyword - - class MyPythonLexer(PythonLexer): - EXTRA_KEYWORDS = ['foo', 'bar', 'foobar', 'barfoo', 'spam', 'eggs'] - - def get_tokens_unprocessed(self, text): - for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): - if token is Name and value in self.EXTRA_KEYWORDS: - yield index, Keyword.Pseudo, value - else: - yield index, token, value - -The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions. - -**Note** Do not confuse this with the `filter`_ system. - -.. _filter: filters.txt diff --git a/docs/src/lexers.txt b/docs/src/lexers.txt deleted file mode 100644 index 016de6ce..00000000 --- a/docs/src/lexers.txt +++ /dev/null @@ -1,67 +0,0 @@ -.. -*- mode: rst -*- - -================ -Available lexers -================ - -This page lists all available builtin lexers and the options they take. - -Currently, **all lexers** support these options: - -`stripnl` - Strip leading and trailing newlines from the input (default: ``True``) - -`stripall` - Strip all leading and trailing whitespace from the input (default: - ``False``). - -`ensurenl` - Make sure that the input ends with a newline (default: ``True``). This - is required for some lexers that consume input linewise. - *New in Pygments 1.3.* - -`tabsize` - If given and greater than 0, expand tabs in the input (default: ``0``). - -`encoding` - *New in Pygments 0.6.* - - If given, must be an encoding name (such as ``"utf-8"``). This encoding - will be used to convert the input string to Unicode (if it is not already - a Unicode string). The default is ``"latin1"``. - - If this option is set to ``"guess"``, a simple UTF-8 vs. Latin-1 - detection is used, if it is set to ``"chardet"``, the - `chardet library <http://chardet.feedparser.org/>`__ is used to - guess the encoding of the input. - - -The "Short Names" field lists the identifiers that can be used with the -`get_lexer_by_name()` function. - -These lexers are builtin and can be imported from `pygments.lexers`: - -[builtin_lexer_docs] - -Iterating over all lexers -------------------------- - -*New in Pygments 0.6.* - -To get all lexers (both the builtin and the plugin ones), you can -use the `get_all_lexers()` function from the `pygments.lexers` -module: - -.. sourcecode:: pycon - - >>> from pygments.lexers import get_all_lexers - >>> i = get_all_lexers() - >>> i.next() - ('Diff', ('diff',), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')) - >>> i.next() - ('Delphi', ('delphi', 'objectpascal', 'pas', 'pascal'), ('*.pas',), ('text/x-pascal',)) - >>> i.next() - ('XML+Ruby', ('xml+erb', 'xml+ruby'), (), ()) - -As you can see, the return value is an iterator which yields tuples -in the form ``(name, aliases, filetypes, mimetypes)``. diff --git a/docs/src/moinmoin.txt b/docs/src/moinmoin.txt deleted file mode 100644 index 8b2216b3..00000000 --- a/docs/src/moinmoin.txt +++ /dev/null @@ -1,39 +0,0 @@ -.. -*- mode: rst -*- - -============================ -Using Pygments with MoinMoin -============================ - -From Pygments 0.7, the source distribution ships a `Moin`_ parser plugin that -can be used to get Pygments highlighting in Moin wiki pages. - -To use it, copy the file `external/moin-parser.py` from the Pygments -distribution to the `data/plugin/parser` subdirectory of your Moin instance. -Edit the options at the top of the file (currently ``ATTACHMENTS`` and -``INLINESTYLES``) and rename the file to the name that the parser directive -should have. For example, if you name the file ``code.py``, you can get a -highlighted Python code sample with this Wiki markup:: - - {{{ - #!code python - [...] - }}} - -where ``python`` is the Pygments name of the lexer to use. - -Additionally, if you set the ``ATTACHMENTS`` option to True, Pygments will also -be called for all attachments for whose filenames there is no other parser -registered. - -You are responsible for including CSS rules that will map the Pygments CSS -classes to colors. You can output a stylesheet file with `pygmentize`, put it -into the `htdocs` directory of your Moin instance and then include it in the -`stylesheets` configuration option in the Moin config, e.g.:: - - stylesheets = [('screen', '/htdocs/pygments.css')] - -If you do not want to do that and are willing to accept larger HTML output, you -can set the ``INLINESTYLES`` option to True. - - -.. _Moin: http://moinmoin.wikiwikiweb.de/ diff --git a/docs/src/plugins.txt b/docs/src/plugins.txt deleted file mode 100644 index a6f8d7b0..00000000 --- a/docs/src/plugins.txt +++ /dev/null @@ -1,93 +0,0 @@ -================ -Register Plugins -================ - -If you want to extend Pygments without hacking the sources, but want to -use the lexer/formatter/style/filter lookup functions (`lexers.get_lexer_by_name` -et al.), you can use `setuptools`_ entrypoints to add new lexers, formatters -or styles as if they were in the Pygments core. - -.. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools - -That means you can use your highlighter modules with the `pygmentize` script, -which relies on the mentioned functions. - - -Entrypoints -=========== - -Here is a list of setuptools entrypoints that Pygments understands: - -`pygments.lexers` - - This entrypoint is used for adding new lexers to the Pygments core. - The name of the entrypoint values doesn't really matter, Pygments extracts - required metadata from the class definition: - - .. sourcecode:: ini - - [pygments.lexers] - yourlexer = yourmodule:YourLexer - - Note that you have to define ``name``, ``aliases`` and ``filename`` - attributes so that you can use the highlighter from the command line: - - .. sourcecode:: python - - class YourLexer(...): - name = 'Name Of Your Lexer' - aliases = ['alias'] - filenames = ['*.ext'] - - -`pygments.formatters` - - You can use this entrypoint to add new formatters to Pygments. The - name of an entrypoint item is the name of the formatter. If you - prefix the name with a slash it's used as a filename pattern: - - .. sourcecode:: ini - - [pygments.formatters] - yourformatter = yourmodule:YourFormatter - /.ext = yourmodule:YourFormatter - - -`pygments.styles` - - To add a new style you can use this entrypoint. The name of the entrypoint - is the name of the style: - - .. sourcecode:: ini - - [pygments.styles] - yourstyle = yourmodule:YourStyle - - -`pygments.filters` - - Use this entrypoint to register a new filter. The name of the - entrypoint is the name of the filter: - - .. sourcecode:: ini - - [pygments.filters] - yourfilter = yourmodule:YourFilter - - -How To Use Entrypoints -====================== - -This documentation doesn't explain how to use those entrypoints because this is -covered in the `setuptools documentation`_. That page should cover everything -you need to write a plugin. - -.. _setuptools documentation: http://peak.telecommunity.com/DevCenter/setuptools - - -Extending The Core -================== - -If you have written a Pygments plugin that is open source, please inform us -about that. There is a high chance that we'll add it to the Pygments -distribution. diff --git a/docs/src/quickstart.txt b/docs/src/quickstart.txt deleted file mode 100644 index 40409104..00000000 --- a/docs/src/quickstart.txt +++ /dev/null @@ -1,202 +0,0 @@ -.. -*- mode: rst -*- - -=========================== -Introduction and Quickstart -=========================== - - -Welcome to Pygments! This document explains the basic concepts and terms and -gives a few examples of how to use the library. - - -Architecture -============ - -There are four types of components that work together highlighting a piece of -code: - -* A **lexer** splits the source into tokens, fragments of the source that - have a token type that determines what the text represents semantically - (e.g., keyword, string, or comment). There is a lexer for every language - or markup format that Pygments supports. -* The token stream can be piped through **filters**, which usually modify - the token types or text fragments, e.g. uppercasing all keywords. -* A **formatter** then takes the token stream and writes it to an output - file, in a format such as HTML, LaTeX or RTF. -* While writing the output, a **style** determines how to highlight all the - different token types. It maps them to attributes like "red and bold". - - -Example -======= - -Here is a small example for highlighting Python code: - -.. sourcecode:: python - - from pygments import highlight - from pygments.lexers import PythonLexer - from pygments.formatters import HtmlFormatter - - code = 'print "Hello World"' - print highlight(code, PythonLexer(), HtmlFormatter()) - -which prints something like this: - -.. sourcecode:: html - - <div class="highlight"> - <pre><span class="k">print</span> <span class="s">"Hello World"</span></pre> - </div> - -As you can see, Pygments uses CSS classes (by default, but you can change that) -instead of inline styles in order to avoid outputting redundant style information over -and over. A CSS stylesheet that contains all CSS classes possibly used in the output -can be produced by: - -.. sourcecode:: python - - print HtmlFormatter().get_style_defs('.highlight') - -The argument to `get_style_defs` is used as an additional CSS selector: the output -may look like this: - -.. sourcecode:: css - - .highlight .k { color: #AA22FF; font-weight: bold } - .highlight .s { color: #BB4444 } - ... - - -Options -======= - -The `highlight()` function supports a fourth argument called `outfile`, it must be -a file object if given. The formatted output will then be written to this file -instead of being returned as a string. - -Lexers and formatters both support options. They are given to them as keyword -arguments either to the class or to the lookup method: - -.. sourcecode:: python - - from pygments import highlight - from pygments.lexers import get_lexer_by_name - from pygments.formatters import HtmlFormatter - - lexer = get_lexer_by_name("python", stripall=True) - formatter = HtmlFormatter(linenos=True, cssclass="source") - result = highlight(code, lexer, formatter) - -This makes the lexer strip all leading and trailing whitespace from the input -(`stripall` option), lets the formatter output line numbers (`linenos` option), -and sets the wrapping ``<div>``'s class to ``source`` (instead of -``highlight``). - -Important options include: - -`encoding` : for lexers and formatters - Since Pygments uses Unicode strings internally, this determines which - encoding will be used to convert to or from byte strings. -`style` : for formatters - The name of the style to use when writing the output. - - -For an overview of builtin lexers and formatters and their options, visit the -`lexer <lexers.txt>`_ and `formatters <formatters.txt>`_ lists. - -For a documentation on filters, see `this page <filters.txt>`_. - - -Lexer and formatter lookup -========================== - -If you want to lookup a built-in lexer by its alias or a filename, you can use -one of the following methods: - -.. sourcecode:: pycon - - >>> from pygments.lexers import (get_lexer_by_name, - ... get_lexer_for_filename, get_lexer_for_mimetype) - - >>> get_lexer_by_name('python') - <pygments.lexers.PythonLexer> - - >>> get_lexer_for_filename('spam.rb') - <pygments.lexers.RubyLexer> - - >>> get_lexer_for_mimetype('text/x-perl') - <pygments.lexers.PerlLexer> - -All these functions accept keyword arguments; they will be passed to the lexer -as options. - -A similar API is available for formatters: use `get_formatter_by_name()` and -`get_formatter_for_filename()` from the `pygments.formatters` module -for this purpose. - - -Guessing lexers -=============== - -If you don't know the content of the file, or you want to highlight a file -whose extension is ambiguous, such as ``.html`` (which could contain plain HTML -or some template tags), use these functions: - -.. sourcecode:: pycon - - >>> from pygments.lexers import guess_lexer, guess_lexer_for_filename - - >>> guess_lexer('#!/usr/bin/python\nprint "Hello World!"') - <pygments.lexers.PythonLexer> - - >>> guess_lexer_for_filename('test.py', 'print "Hello World!"') - <pygments.lexers.PythonLexer> - -`guess_lexer()` passes the given content to the lexer classes' `analyse_text()` -method and returns the one for which it returns the highest number. - -All lexers have two different filename pattern lists: the primary and the -secondary one. The `get_lexer_for_filename()` function only uses the primary -list, whose entries are supposed to be unique among all lexers. -`guess_lexer_for_filename()`, however, will first loop through all lexers and -look at the primary and secondary filename patterns if the filename matches. -If only one lexer matches, it is returned, else the guessing mechanism of -`guess_lexer()` is used with the matching lexers. - -As usual, keyword arguments to these functions are given to the created lexer -as options. - - -Command line usage -================== - -You can use Pygments from the command line, using the `pygmentize` script:: - - $ pygmentize test.py - -will highlight the Python file test.py using ANSI escape sequences -(a.k.a. terminal colors) and print the result to standard output. - -To output HTML, use the ``-f`` option:: - - $ pygmentize -f html -o test.html test.py - -to write an HTML-highlighted version of test.py to the file test.html. -Note that it will only be a snippet of HTML, if you want a full HTML document, -use the "full" option:: - - $ pygmentize -f html -O full -o test.html test.py - -This will produce a full HTML document with included stylesheet. - -A style can be selected with ``-O style=<name>``. - -If you need a stylesheet for an existing HTML file using Pygments CSS classes, -it can be created with:: - - $ pygmentize -S default -f html > style.css - -where ``default`` is the style name. - -More options and tricks and be found in the `command line reference <cmdline.txt>`_. diff --git a/docs/src/rstdirective.txt b/docs/src/rstdirective.txt deleted file mode 100644 index c0d503b3..00000000 --- a/docs/src/rstdirective.txt +++ /dev/null @@ -1,22 +0,0 @@ -.. -*- mode: rst -*- - -================================ -Using Pygments in ReST documents -================================ - -Many Python people use `ReST`_ for documentation their sourcecode, programs, -scripts et cetera. This also means that documentation often includes sourcecode -samples or snippets. - -You can easily enable Pygments support for your ReST texts using a custom -directive -- this is also how this documentation displays source code. - -From Pygments 0.9, the directive is shipped in the distribution as -`external/rst-directive.py`. You can copy and adapt this code to your liking. - -.. removed -- too confusing - *Loosely related note:* The ReST lexer now recognizes ``.. sourcecode::`` and - ``.. code::`` directives and highlights the contents in the specified language - if the `handlecodeblocks` option is true. - -.. _ReST: http://docutils.sf.net/rst.html diff --git a/docs/src/styles.txt b/docs/src/styles.txt deleted file mode 100644 index e3e9cfb3..00000000 --- a/docs/src/styles.txt +++ /dev/null @@ -1,143 +0,0 @@ -.. -*- mode: rst -*- - -====== -Styles -====== - -Pygments comes with some builtin styles that work for both the HTML and -LaTeX formatter. - -The builtin styles can be looked up with the `get_style_by_name` function: - -.. sourcecode:: pycon - - >>> from pygments.styles import get_style_by_name - >>> get_style_by_name('colorful') - <class 'pygments.styles.colorful.ColorfulStyle'> - -You can pass a instance of a `Style` class to a formatter as the `style` -option in form of a string: - -.. sourcecode:: pycon - - >>> from pygments.styles import get_style_by_name - >>> HtmlFormatter(style='colorful').style - <class 'pygments.styles.colorful.ColorfulStyle'> - -Or you can also import your own style (which must be a subclass of -`pygments.style.Style`) and pass it to the formatter: - -.. sourcecode:: pycon - - >>> from yourapp.yourmodule import YourStyle - >>> HtmlFormatter(style=YourStyle).style - <class 'yourapp.yourmodule.YourStyle'> - - -Creating Own Styles -=================== - -So, how to create a style? All you have to do is to subclass `Style` and -define some styles: - -.. sourcecode:: python - - from pygments.style import Style - from pygments.token import Keyword, Name, Comment, String, Error, \ - Number, Operator, Generic - - class YourStyle(Style): - default_style = "" - styles = { - Comment: 'italic #888', - Keyword: 'bold #005', - Name: '#f00', - Name.Function: '#0f0', - Name.Class: 'bold #0f0', - String: 'bg:#eee #111' - } - -That's it. There are just a few rules. When you define a style for `Name` -the style automatically also affects `Name.Function` and so on. If you -defined ``'bold'`` and you don't want boldface for a subtoken use ``'nobold'``. - -(Philosophy: the styles aren't written in CSS syntax since this way -they can be used for a variety of formatters.) - -`default_style` is the style inherited by all token types. - -To make the style usable for Pygments, you must - -* either register it as a plugin (see `the plugin docs <plugins.txt>`_) -* or drop it into the `styles` subpackage of your Pygments distribution one style - class per style, where the file name is the style name and the class name is - `StylenameClass`. For example, if your style should be called - ``"mondrian"``, name the class `MondrianStyle`, put it into the file - ``mondrian.py`` and this file into the ``pygments.styles`` subpackage - directory. - - -Style Rules -=========== - -Here a small overview of all allowed styles: - -``bold`` - render text as bold -``nobold`` - don't render text as bold (to prevent subtokens being highlighted bold) -``italic`` - render text italic -``noitalic`` - don't render text as italic -``underline`` - render text underlined -``nounderline`` - don't render text underlined -``bg:`` - transparent background -``bg:#000000`` - background color (black) -``border:`` - no border -``border:#ffffff`` - border color (white) -``#ff0000`` - text color (red) -``noinherit`` - don't inherit styles from supertoken - -Note that there may not be a space between ``bg:`` and the color value -since the style definition string is split at whitespace. -Also, using named colors is not allowed since the supported color names -vary for different formatters. - -Furthermore, not all lexers might support every style. - - -Builtin Styles -============== - -Pygments ships some builtin styles which are maintained by the Pygments team. - -To get a list of known styles you can use this snippet: - -.. sourcecode:: pycon - - >>> from pygments.styles import STYLE_MAP - >>> STYLE_MAP.keys() - ['default', 'emacs', 'friendly', 'colorful'] - - -Getting a list of available styles -================================== - -*New in Pygments 0.6.* - -Because it could be that a plugin registered a style, there is -a way to iterate over all styles: - -.. sourcecode:: pycon - - >>> from pygments.styles import get_all_styles - >>> styles = list(get_all_styles()) diff --git a/docs/src/tokens.txt b/docs/src/tokens.txt deleted file mode 100644 index 4900a9ab..00000000 --- a/docs/src/tokens.txt +++ /dev/null @@ -1,349 +0,0 @@ -.. -*- mode: rst -*- - -============== -Builtin Tokens -============== - -Inside the `pygments.token` module, there is a special object called `Token` -that is used to create token types. - -You can create a new token type by accessing an attribute of `Token`: - -.. sourcecode:: pycon - - >>> from pygments.token import Token - >>> Token.String - Token.String - >>> Token.String is Token.String - True - -Note that tokens are singletons so you can use the ``is`` operator for comparing -token types. - -As of Pygments 0.7 you can also use the ``in`` operator to perform set tests: - -.. sourcecode:: pycon - - >>> from pygments.token import Comment - >>> Comment.Single in Comment - True - >>> Comment in Comment.Multi - False - -This can be useful in `filters`_ and if you write lexers on your own without -using the base lexers. - -You can also split a token type into a hierarchy, and get the parent of it: - -.. sourcecode:: pycon - - >>> String.split() - [Token, Token.Literal, Token.Literal.String] - >>> String.parent - Token.Literal - -In principle, you can create an unlimited number of token types but nobody can -guarantee that a style would define style rules for a token type. Because of -that, Pygments proposes some global token types defined in the -`pygments.token.STANDARD_TYPES` dict. - -For some tokens aliases are already defined: - -.. sourcecode:: pycon - - >>> from pygments.token import String - >>> String - Token.Literal.String - -Inside the `pygments.token` module the following aliases are defined: - -============= ============================ ==================================== -`Text` `Token.Text` for any type of text data -`Whitespace` `Token.Text.Whitespace` for specially highlighted whitespace -`Error` `Token.Error` represents lexer errors -`Other` `Token.Other` special token for data not - matched by a parser (e.g. HTML - markup in PHP code) -`Keyword` `Token.Keyword` any kind of keywords -`Name` `Token.Name` variable/function names -`Literal` `Token.Literal` Any literals -`String` `Token.Literal.String` string literals -`Number` `Token.Literal.Number` number literals -`Operator` `Token.Operator` operators (``+``, ``not``...) -`Punctuation` `Token.Punctuation` punctuation (``[``, ``(``...) -`Comment` `Token.Comment` any kind of comments -`Generic` `Token.Generic` generic tokens (have a look at - the explanation below) -============= ============================ ==================================== - -The `Whitespace` token type is new in Pygments 0.8. It is used only by the -`VisibleWhitespaceFilter` currently. - -Normally you just create token types using the already defined aliases. For each -of those token aliases, a number of subtypes exists (excluding the special tokens -`Token.Text`, `Token.Error` and `Token.Other`) - -The `is_token_subtype()` function in the `pygments.token` module can be used to -test if a token type is a subtype of another (such as `Name.Tag` and `Name`). -(This is the same as ``Name.Tag in Name``. The overloaded `in` operator was newly -introduced in Pygments 0.7, the function still exists for backwards -compatiblity.) - -With Pygments 0.7, it's also possible to convert strings to token types (for example -if you want to supply a token from the command line): - -.. sourcecode:: pycon - - >>> from pygments.token import String, string_to_tokentype - >>> string_to_tokentype("String") - Token.Literal.String - >>> string_to_tokentype("Token.Literal.String") - Token.Literal.String - >>> string_to_tokentype(String) - Token.Literal.String - - -Keyword Tokens -============== - -`Keyword` - For any kind of keyword (especially if it doesn't match any of the - subtypes of course). - -`Keyword.Constant` - For keywords that are constants (e.g. ``None`` in future Python versions). - -`Keyword.Declaration` - For keywords used for variable declaration (e.g. ``var`` in some programming - languages like JavaScript). - -`Keyword.Namespace` - For keywords used for namespace declarations (e.g. ``import`` in Python and - Java and ``package`` in Java). - -`Keyword.Pseudo` - For keywords that aren't really keywords (e.g. ``None`` in old Python - versions). - -`Keyword.Reserved` - For reserved keywords. - -`Keyword.Type` - For builtin types that can't be used as identifiers (e.g. ``int``, - ``char`` etc. in C). - - -Name Tokens -=========== - -`Name` - For any name (variable names, function names, classes). - -`Name.Attribute` - For all attributes (e.g. in HTML tags). - -`Name.Builtin` - Builtin names; names that are available in the global namespace. - -`Name.Builtin.Pseudo` - Builtin names that are implicit (e.g. ``self`` in Ruby, ``this`` in Java). - -`Name.Class` - Class names. Because no lexer can know if a name is a class or a function - or something else this token is meant for class declarations. - -`Name.Constant` - Token type for constants. In some languages you can recognise a token by the - way it's defined (the value after a ``const`` keyword for example). In - other languages constants are uppercase by definition (Ruby). - -`Name.Decorator` - Token type for decorators. Decorators are synatic elements in the Python - language. Similar syntax elements exist in C# and Java. - -`Name.Entity` - Token type for special entities. (e.g. `` `` in HTML). - -`Name.Exception` - Token type for exception names (e.g. ``RuntimeError`` in Python). Some languages - define exceptions in the function signature (Java). You can highlight - the name of that exception using this token then. - -`Name.Function` - Token type for function names. - -`Name.Label` - Token type for label names (e.g. in languages that support ``goto``). - -`Name.Namespace` - Token type for namespaces. (e.g. import paths in Java/Python), names following - the ``module``/``namespace`` keyword in other languages. - -`Name.Other` - Other names. Normally unused. - -`Name.Tag` - Tag names (in HTML/XML markup or configuration files). - -`Name.Variable` - Token type for variables. Some languages have prefixes for variable names - (PHP, Ruby, Perl). You can highlight them using this token. - -`Name.Variable.Class` - same as `Name.Variable` but for class variables (also static variables). - -`Name.Variable.Global` - same as `Name.Variable` but for global variables (used in Ruby, for - example). - -`Name.Variable.Instance` - same as `Name.Variable` but for instance variables. - - -Literals -======== - -`Literal` - For any literal (if not further defined). - -`Literal.Date` - for date literals (e.g. ``42d`` in Boo). - - -`String` - For any string literal. - -`String.Backtick` - Token type for strings enclosed in backticks. - -`String.Char` - Token type for single characters (e.g. Java, C). - -`String.Doc` - Token type for documentation strings (for example Python). - -`String.Double` - Double quoted strings. - -`String.Escape` - Token type for escape sequences in strings. - -`String.Heredoc` - Token type for "heredoc" strings (e.g. in Ruby or Perl). - -`String.Interpol` - Token type for interpolated parts in strings (e.g. ``#{foo}`` in Ruby). - -`String.Other` - Token type for any other strings (for example ``%q{foo}`` string constructs - in Ruby). - -`String.Regex` - Token type for regular expression literals (e.g. ``/foo/`` in JavaScript). - -`String.Single` - Token type for single quoted strings. - -`String.Symbol` - Token type for symbols (e.g. ``:foo`` in LISP or Ruby). - - -`Number` - Token type for any number literal. - -`Number.Float` - Token type for float literals (e.g. ``42.0``). - -`Number.Hex` - Token type for hexadecimal number literals (e.g. ``0xdeadbeef``). - -`Number.Integer` - Token type for integer literals (e.g. ``42``). - -`Number.Integer.Long` - Token type for long integer literals (e.g. ``42L`` in Python). - -`Number.Oct` - Token type for octal literals. - - -Operators -========= - -`Operator` - For any punctuation operator (e.g. ``+``, ``-``). - -`Operator.Word` - For any operator that is a word (e.g. ``not``). - - -Punctuation -=========== - -*New in Pygments 0.7.* - -`Punctuation` - For any punctuation which is not an operator (e.g. ``[``, ``(``...) - - -Comments -======== - -`Comment` - Token type for any comment. - -`Comment.Multiline` - Token type for multiline comments. - -`Comment.Preproc` - Token type for preprocessor comments (also ``<?php``/``<%`` constructs). - -`Comment.Single` - Token type for comments that end at the end of a line (e.g. ``# foo``). - -`Comment.Special` - Special data in comments. For example code tags, author and license - information, etc. - - -Generic Tokens -============== - -Generic tokens are for special lexers like the `DiffLexer` that doesn't really -highlight a programming language but a patch file. - - -`Generic` - A generic, unstyled token. Normally you don't use this token type. - -`Generic.Deleted` - Marks the token value as deleted. - -`Generic.Emph` - Marks the token value as emphasized. - -`Generic.Error` - Marks the token value as an error message. - -`Generic.Heading` - Marks the token value as headline. - -`Generic.Inserted` - Marks the token value as inserted. - -`Generic.Output` - Marks the token value as program output (e.g. for python cli lexer). - -`Generic.Prompt` - Marks the token value as command prompt (e.g. bash lexer). - -`Generic.Strong` - Marks the token value as bold (e.g. for rst lexer). - -`Generic.Subheading` - Marks the token value as subheadline. - -`Generic.Traceback` - Marks the token value as a part of an error traceback. - -.. _filters: filters.txt diff --git a/docs/src/unicode.txt b/docs/src/unicode.txt deleted file mode 100644 index dc6394a9..00000000 --- a/docs/src/unicode.txt +++ /dev/null @@ -1,49 +0,0 @@ -===================== -Unicode and Encodings -===================== - -Since Pygments 0.6, all lexers use unicode strings internally. Because of that -you might encounter the occasional `UnicodeDecodeError` if you pass strings with the -wrong encoding. - -Per default all lexers have their input encoding set to `latin1`. -If you pass a lexer a string object (not unicode), it tries to decode the data -using this encoding. -You can override the encoding using the `encoding` lexer option. If you have the -`chardet`_ library installed and set the encoding to ``chardet`` if will ananlyse -the text and use the encoding it thinks is the right one automatically: - -.. sourcecode:: python - - from pygments.lexers import PythonLexer - lexer = PythonLexer(encoding='chardet') - -The best way is to pass Pygments unicode objects. In that case you can't get -unexpected output. - -The formatters now send Unicode objects to the stream if you don't set the -output encoding. You can do so by passing the formatters an `encoding` option: - -.. sourcecode:: python - - from pygments.formatters import HtmlFormatter - f = HtmlFormatter(encoding='utf-8') - -**You will have to set this option if you have non-ASCII characters in the -source and the output stream does not accept Unicode written to it!** -This is the case for all regular files and for terminals. - -Note: The Terminal formatter tries to be smart: if its output stream has an -`encoding` attribute, and you haven't set the option, it will encode any -Unicode string with this encoding before writing it. This is the case for -`sys.stdout`, for example. The other formatters don't have that behavior. - -Another note: If you call Pygments via the command line (`pygmentize`), -encoding is handled differently, see `the command line docs <cmdline.txt>`_. - -*New in Pygments 0.7*: the formatters now also accept an `outencoding` option -which will override the `encoding` option if given. This makes it possible to -use a single options dict with lexers and formatters, and still have different -input and output encodings. - -.. _chardet: http://chardet.feedparser.org/ |