summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Abou Samra <jean@abou-samra.fr>2023-03-30 22:02:41 +0200
committerJean Abou Samra <jean@abou-samra.fr>2023-03-30 22:02:41 +0200
commit09c8a31039366b03c3b7e117ee998eaa43f9c7dc (patch)
tree5e3048353ed6f630752ec4ee5b3ffa8c37ef7644
parentc664784df695536d119c1eb13b3578458e1317f0 (diff)
downloadpygments-git-09c8a31039366b03c3b7e117ee998eaa43f9c7dc.tar.gz
Use autodoc a bit more
-rw-r--r--doc/docs/api.rst104
-rw-r--r--pygments/__init__.py20
-rw-r--r--pygments/lexer.py104
3 files changed, 98 insertions, 130 deletions
diff --git a/doc/docs/api.rst b/doc/docs/api.rst
index b7a2a411..b38ca8b0 100644
--- a/doc/docs/api.rst
+++ b/doc/docs/api.rst
@@ -10,23 +10,11 @@ High-level API
Functions from the :mod:`pygments` module:
-.. function:: lex(code, lexer)
+.. autofunction:: lex
- Lex `code` with the `lexer` (must be a `Lexer` instance)
- and return an iterable of tokens. Currently, this only calls
- `lexer.get_tokens()`.
+.. autofunction:: format
-.. function:: format(tokens, formatter, outfile=None)
-
- Format a token stream (iterable of tokens) `tokens` with the
- `formatter` (must be a `Formatter` instance). The result is
- written to `outfile`, or if that is ``None``, returned as a
- string.
-
-.. function:: highlight(code, lexer, formatter, outfile=None)
-
- This is the most high-level highlighting function.
- It combines `lex` and `format` in one function.
+.. autofunction:: highlight
.. module:: pygments.lexers
@@ -172,94 +160,14 @@ Lexers
The base lexer class from which all lexers are derived is:
-.. class:: Lexer(**options)
-
- The constructor takes a \*\*keywords dictionary of options.
- Every subclass must first process its own options and then call
- the `Lexer` constructor, since it processes the `stripnl`,
- `stripall` and `tabsize` options.
-
- An example looks like this:
-
- .. sourcecode:: python
-
- def __init__(self, **options):
- self.compress = options.get('compress', '')
- Lexer.__init__(self, **options)
-
- As these options must all be specifiable as strings (due to the
- command line usage), there are various utility functions
- available to help with that, see `Option processing`_.
-
- .. method:: get_tokens(text)
-
- This method is the basic interface of a lexer. It is called by
- the `highlight()` function. It must process the text and return an
- iterable of ``(tokentype, value)`` pairs from `text`.
-
- Normally, you don't need to override this method. The default
- implementation processes the `stripnl`, `stripall` and `tabsize`
- options and then yields all tokens from `get_tokens_unprocessed()`,
- with the ``index`` dropped.
-
- .. method:: get_tokens_unprocessed(text)
-
- This method should process the text and return an iterable of
- ``(index, tokentype, value)`` tuples where ``index`` is the starting
- position of the token within the input text.
-
- This method must be overridden by subclasses.
-
- .. staticmethod:: analyse_text(text)
-
- A static method which is called for lexer guessing. It should analyse
- the text and return a float in the range from ``0.0`` to ``1.0``.
- If it returns ``0.0``, the lexer will not be selected as the most
- probable one, if it returns ``1.0``, it will be selected immediately.
-
- .. note:: You don't have to add ``@staticmethod`` to the definition of
- this method, this will be taken care of by the Lexer's metaclass.
-
- For a list of known tokens have a look at the :doc:`tokens` page.
-
- A lexer also can have the following attributes (in fact, they are mandatory
- except `alias_filenames`) that are used by the builtin lookup mechanism.
-
- .. attribute:: name
-
- Full name for the lexer, in human-readable form.
-
- .. attribute:: aliases
-
- A list of short, unique identifiers that can be used to lookup
- the lexer from a list, e.g. using `get_lexer_by_name()`.
-
- .. attribute:: filenames
-
- A list of `fnmatch` patterns that match filenames which contain
- content for this lexer. The patterns in this list should be unique among
- all lexers.
-
- .. attribute:: alias_filenames
-
- A list of `fnmatch` patterns that match filenames which may or may not
- contain content for this lexer. This list is used by the
- :func:`.guess_lexer_for_filename()` function, to determine which lexers
- are then included in guessing the correct one. That means that
- e.g. every lexer for HTML and a template language should include
- ``\*.html`` in this list.
-
- .. attribute:: mimetypes
-
- A list of MIME types for content that can be lexed with this
- lexer.
+.. autoclass:: Lexer
+ :members: __init__, get_tokens, get_tokens_unprocessed, analyse_text
There are several base class derived from ``Lexer`` you can use to build your lexer from:
-.. autoclass:: pygments.lexer.DelegatingLexer
.. autoclass:: pygments.lexer.RegexLexer
.. autoclass:: pygments.lexer.ExtendedRegexLexer
-
+.. autoclass:: pygments.lexer.DelegatingLexer
.. module:: pygments.formatter
diff --git a/pygments/__init__.py b/pygments/__init__.py
index 7078c45d..afd36682 100644
--- a/pygments/__init__.py
+++ b/pygments/__init__.py
@@ -34,7 +34,9 @@ __all__ = ['lex', 'format', 'highlight']
def lex(code, lexer):
"""
- Lex ``code`` with ``lexer`` and return an iterable of tokens.
+ Lex `code` with the `lexer` (must be a `Lexer` instance)
+ and return an iterable of tokens. Currently, this only calls
+ `lexer.get_tokens()`.
"""
try:
return lexer.get_tokens(code)
@@ -49,11 +51,12 @@ def lex(code, lexer):
def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builtin
"""
- Format a tokenlist ``tokens`` with the formatter ``formatter``.
+ Format ``tokens`` (an iterable of tokens) with the formatter ``formatter``
+ (a `Formatter` instance).
- If ``outfile`` is given and a valid file object (an object
- with a ``write`` method), the result will be written to it, otherwise
- it is returned as a string.
+ If ``outfile`` is given and a valid file object (an object with a
+ ``write`` method), the result will be written to it, otherwise it
+ is returned as a string.
"""
try:
if not outfile:
@@ -73,10 +76,7 @@ def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builti
def highlight(code, lexer, formatter, outfile=None):
"""
- Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``.
-
- If ``outfile`` is given and a valid file object (an object
- with a ``write`` method), the result will be written to it, otherwise
- it is returned as a string.
+ This is the most high-level highlighting function. It combines `lex` and
+ `format` in one function.
"""
return format(lex(code, lexer), formatter, outfile)
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 0290365d..7c596719 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -50,7 +50,31 @@ class Lexer(metaclass=LexerMeta):
"""
Lexer for a specific language.
- Basic options recognized:
+ See also :doc:`lexerdevelopment`, a high-level guide to writing
+ lexers.
+
+ Lexer classes have attributes used for choosing the most appropriate
+ lexer based on various criteria.
+
+ .. autoattribute:: name
+ :no-value:
+ .. autoattribute:: aliases
+ :no-value:
+ .. autoattribute:: filenames
+ :no-value:
+ .. autoattribute:: alias_filenames
+ .. autoattribute:: mimetypes
+ :no-value:
+ .. autoattribute:: priority
+
+ Lexers included in Pygments should have an additional attribute:
+
+ .. autoattribute:: url
+ :no-value:
+
+ You can pass options to the constructor. The basic options recognized
+ by all lexers and processed by the base `Lexer` class are:
+
``stripnl``
Strip leading and trailing newlines from the input (default: True).
``stripall``
@@ -74,28 +98,55 @@ class Lexer(metaclass=LexerMeta):
Overrides the ``encoding`` if given.
"""
- #: Name of the lexer
+ #: Full name of the lexer, in human-readable form
name = None
- #: URL of the language specification/definition
- url = None
-
- #: Shortcuts for the lexer
+ #: A list of short, unique identifiers that can be used to look
+ #: up the lexer from a list, e.g., using `get_lexer_by_name()`.
aliases = []
- #: File name globs
+ #: A list of `fnmatch` patterns that match filenames which contain
+ #: content for this lexer. The patterns in this list should be unique among
+ #: all lexers.
filenames = []
- #: Secondary file name globs
+ #: A list of `fnmatch` patterns that match filenames which may or may not
+ #: contain content for this lexer. This list is used by the
+ #: :func:`.guess_lexer_for_filename()` function, to determine which lexers
+ #: are then included in guessing the correct one. That means that
+ #: e.g. every lexer for HTML and a template language should include
+ #: ``\*.html`` in this list.
alias_filenames = []
- #: MIME types
+ #: A list of MIME types for content that can be lexed with this lexer.
mimetypes = []
#: Priority, should multiple lexers match and no content is provided
priority = 0
+ #: URL of the language specification/definition. Used in the Pygments
+ #: documentation.
+ url = None
+
def __init__(self, **options):
+ """
+ This constructor takes arbitrary options as keyword arguments.
+ Every subclass must first process its own options and then call
+ the `Lexer` constructor, since it processes the basic
+ options like `stripnl`.
+
+ An example looks like this:
+
+ .. sourcecode:: python
+
+ def __init__(self, **options):
+ self.compress = options.get('compress', '')
+ Lexer.__init__(self, **options)
+
+ As these options must all be specifiable as strings (due to the
+ command line usage), there are various utility functions
+ available to help with that, see `Option processing`_.
+ """
self.options = options
self.stripnl = get_bool_opt(options, 'stripnl', True)
self.stripall = get_bool_opt(options, 'stripall', False)
@@ -124,10 +175,13 @@ class Lexer(metaclass=LexerMeta):
def analyse_text(text):
"""
- Has to return a float between ``0`` and ``1`` that indicates
- if a lexer wants to highlight this text. Used by ``guess_lexer``.
- If this method returns ``0`` it won't highlight it in any case, if
- it returns ``1`` highlighting with this lexer is guaranteed.
+ A static method which is called for lexer guessing.
+
+ It should analyse the text and return a float in the range
+ from ``0.0`` to ``1.0``. If it returns ``0.0``, the lexer
+ will not be selected as the most probable one, if it returns
+ ``1.0``, it will be selected immediately. This is used by
+ `guess_lexer`.
The `LexerMeta` metaclass automatically wraps this function so
that it works like a static method (no ``self`` or ``cls``
@@ -138,12 +192,17 @@ class Lexer(metaclass=LexerMeta):
def get_tokens(self, text, unfiltered=False):
"""
- Return an iterable of (tokentype, value) pairs generated from
- `text`. If `unfiltered` is set to `True`, the filtering mechanism
- is bypassed even if filters are defined.
+ This method is the basic interface of a lexer. It is called by
+ the `highlight()` function. It must process the text and return an
+ iterable of ``(tokentype, value)`` pairs from `text`.
+
+ Normally, you don't need to override this method. The default
+ implementation processes the options recognized by all lexers
+ (`stripnl`, `stripall` and so on), and then yields all tokens
+ from `get_tokens_unprocessed()`, with the ``index`` dropped.
- Also preprocess the text, i.e. expand tabs and strip it if
- wanted and applies registered filters.
+ If `unfiltered` is set to `True`, the filtering mechanism is
+ bypassed even if filters are defined.
"""
if not isinstance(text, str):
if self.encoding == 'guess':
@@ -197,11 +256,12 @@ class Lexer(metaclass=LexerMeta):
def get_tokens_unprocessed(self, text):
"""
- Return an iterable of (index, tokentype, value) pairs where "index"
- is the starting position of the token within the input text.
+ This method should process the text and return an iterable of
+ ``(index, tokentype, value)`` tuples where ``index`` is the starting
+ position of the token within the input text.
- In subclasses, implement this method as a generator to
- maximize effectiveness.
+ It must be overridden by subclasses. It is recommended to
+ implement it as a generator to maximize effectiveness.
"""
raise NotImplementedError