Use autodoc a bit more

author: Jean Abou Samra <jean@abou-samra.fr> 2023-03-30 22:02:41 +0200
committer: Jean Abou Samra <jean@abou-samra.fr> 2023-03-30 22:02:41 +0200
commit: 09c8a31039366b03c3b7e117ee998eaa43f9c7dc (patch)
tree: 5e3048353ed6f630752ec4ee5b3ffa8c37ef7644
parent: c664784df695536d119c1eb13b3578458e1317f0 (diff)
download: pygments-git-09c8a31039366b03c3b7e117ee998eaa43f9c7dc.tar.gz
3 files changed, 98 insertions, 130 deletions
diff --git a/doc/docs/api.rst b/doc/docs/api.rst
index b7a2a411..b38ca8b0 100644
--- a/doc/docs/api.rst
+++ b/doc/docs/api.rst
@@ -10,23 +10,11 @@ High-level API
 
 Functions from the :mod:`pygments` module:
 
-.. function:: lex(code, lexer)
+.. autofunction:: lex
 
-    Lex `code` with the `lexer` (must be a `Lexer` instance)
-    and return an iterable of tokens. Currently, this only calls
-    `lexer.get_tokens()`.
+.. autofunction:: format
 
-.. function:: format(tokens, formatter, outfile=None)
-
-    Format a token stream (iterable of tokens) `tokens` with the
-    `formatter` (must be a `Formatter` instance). The result is
-    written to `outfile`, or if that is ``None``, returned as a
-    string.
-
-.. function:: highlight(code, lexer, formatter, outfile=None)
-
-    This is the most high-level highlighting function.
-    It combines `lex` and `format` in one function.
+.. autofunction:: highlight
 
 
 .. module:: pygments.lexers
@@ -172,94 +160,14 @@ Lexers
 
 The base lexer class from which all lexers are derived is:
 
-.. class:: Lexer(**options)
-
-    The constructor takes a \*\*keywords dictionary of options.
-    Every subclass must first process its own options and then call
-    the `Lexer` constructor, since it processes the `stripnl`,
-    `stripall` and `tabsize` options.
-
-    An example looks like this:
-
-    .. sourcecode:: python
-
-        def __init__(self, **options):
-            self.compress = options.get('compress', '')
-            Lexer.__init__(self, **options)
-
-    As these options must all be specifiable as strings (due to the
-    command line usage), there are various utility functions
-    available to help with that, see `Option processing`_.
-
-    .. method:: get_tokens(text)
-
-        This method is the basic interface of a lexer. It is called by
-        the `highlight()` function. It must process the text and return an
-        iterable of ``(tokentype, value)`` pairs from `text`.
-
-        Normally, you don't need to override this method. The default
-        implementation processes the `stripnl`, `stripall` and `tabsize`
-        options and then yields all tokens from `get_tokens_unprocessed()`,
-        with the ``index`` dropped.
-
-    .. method:: get_tokens_unprocessed(text)
-
-        This method should process the text and return an iterable of
-        ``(index, tokentype, value)`` tuples where ``index`` is the starting
-        position of the token within the input text.
-
-        This method must be overridden by subclasses.
-
-    .. staticmethod:: analyse_text(text)
-
-        A static method which is called for lexer guessing. It should analyse
-        the text and return a float in the range from ``0.0`` to ``1.0``.
-        If it returns ``0.0``, the lexer will not be selected as the most
-        probable one, if it returns ``1.0``, it will be selected immediately.
-
-        .. note:: You don't have to add ``@staticmethod`` to the definition of
-                  this method, this will be taken care of by the Lexer's metaclass.
-
-    For a list of known tokens have a look at the :doc:`tokens` page.
-
-    A lexer also can have the following attributes (in fact, they are mandatory
-    except `alias_filenames`) that are used by the builtin lookup mechanism.
-
-    .. attribute:: name
-
-        Full name for the lexer, in human-readable form.
-
-    .. attribute:: aliases
-
-        A list of short, unique identifiers that can be used to lookup
-        the lexer from a list, e.g. using `get_lexer_by_name()`.
-
-    .. attribute:: filenames
-
-        A list of `fnmatch` patterns that match filenames which contain
-        content for this lexer. The patterns in this list should be unique among
-        all lexers.
-
-    .. attribute:: alias_filenames
-
-        A list of `fnmatch` patterns that match filenames which may or may not
-        contain content for this lexer. This list is used by the
-        :func:`.guess_lexer_for_filename()` function, to determine which lexers
-        are then included in guessing the correct one. That means that
-        e.g. every lexer for HTML and a template language should include
-        ``\*.html`` in this list.
-
-    .. attribute:: mimetypes
-
-        A list of MIME types for content that can be lexed with this
-        lexer.
+.. autoclass:: Lexer
+   :members: __init__, get_tokens, get_tokens_unprocessed, analyse_text
 
 There are several base class derived from ``Lexer`` you can use to build your lexer from:
 
-.. autoclass:: pygments.lexer.DelegatingLexer
 .. autoclass:: pygments.lexer.RegexLexer
 .. autoclass:: pygments.lexer.ExtendedRegexLexer
-
+.. autoclass:: pygments.lexer.DelegatingLexer
 
 .. module:: pygments.formatter
 
diff --git a/pygments/__init__.py b/pygments/__init__.py
index 7078c45d..afd36682 100644
--- a/pygments/__init__.py
+++ b/pygments/__init__.py
@@ -34,7 +34,9 @@ __all__ = ['lex', 'format', 'highlight']
 
 def lex(code, lexer):
     """
-    Lex ``code`` with ``lexer`` and return an iterable of tokens.
+    Lex `code` with the `lexer` (must be a `Lexer` instance)
+    and return an iterable of tokens. Currently, this only calls
+    `lexer.get_tokens()`.
     """
     try:
         return lexer.get_tokens(code)
@@ -49,11 +51,12 @@ def lex(code, lexer):
 
 def format(tokens, formatter, outfile=None):  # pylint: disable=redefined-builtin
     """
-    Format a tokenlist ``tokens`` with the formatter ``formatter``.
+    Format ``tokens`` (an iterable of tokens) with the formatter ``formatter``
+    (a `Formatter` instance).
 
-    If ``outfile`` is given and a valid file object (an object
-    with a ``write`` method), the result will be written to it, otherwise
-    it is returned as a string.
+    If ``outfile`` is given and a valid file object (an object with a
+    ``write`` method), the result will be written to it, otherwise it
+    is returned as a string.
     """
     try:
         if not outfile:
@@ -73,10 +76,7 @@ def format(tokens, formatter, outfile=None):  # pylint: disable=redefined-builti
 
 def highlight(code, lexer, formatter, outfile=None):
     """
-    Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``.
-
-    If ``outfile`` is given and a valid file object (an object
-    with a ``write`` method), the result will be written to it, otherwise
-    it is returned as a string.
+    This is the most high-level highlighting function. It combines `lex` and
+    `format` in one function.
     """
     return format(lex(code, lexer), formatter, outfile)
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 0290365d..7c596719 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -50,7 +50,31 @@ class Lexer(metaclass=LexerMeta):
     """
     Lexer for a specific language.
 
-    Basic options recognized:
+    See also :doc:`lexerdevelopment`, a high-level guide to writing
+    lexers.
+
+    Lexer classes have attributes used for choosing the most appropriate
+    lexer based on various criteria.
+
+    .. autoattribute:: name
+       :no-value:
+    .. autoattribute:: aliases
+       :no-value:
+    .. autoattribute:: filenames
+       :no-value:
+    .. autoattribute:: alias_filenames
+    .. autoattribute:: mimetypes
+       :no-value:
+    .. autoattribute:: priority
+
+    Lexers included in Pygments should have an additional attribute:
+
+    .. autoattribute:: url
+       :no-value:
+
+    You can pass options to the constructor. The basic options recognized
+    by all lexers and processed by the base `Lexer` class are:
+
     ``stripnl``
         Strip leading and trailing newlines from the input (default: True).
     ``stripall``
@@ -74,28 +98,55 @@ class Lexer(metaclass=LexerMeta):
         Overrides the ``encoding`` if given.
     """
 
-    #: Name of the lexer
+    #: Full name of the lexer, in human-readable form
     name = None
 
-    #: URL of the language specification/definition
-    url = None
-
-    #: Shortcuts for the lexer
+    #: A list of short, unique identifiers that can be used to look
+    #: up the lexer from a list, e.g., using `get_lexer_by_name()`.
     aliases = []
 
-    #: File name globs
+    #: A list of `fnmatch` patterns that match filenames which contain
+    #: content for this lexer. The patterns in this list should be unique among
+    #: all lexers.
     filenames = []
 
-    #: Secondary file name globs
+    #: A list of `fnmatch` patterns that match filenames which may or may not
+    #: contain content for this lexer. This list is used by the
+    #: :func:`.guess_lexer_for_filename()` function, to determine which lexers
+    #: are then included in guessing the correct one. That means that
+    #: e.g. every lexer for HTML and a template language should include
+    #: ``\*.html`` in this list.
     alias_filenames = []
 
-    #: MIME types
+    #: A list of MIME types for content that can be lexed with this lexer.
     mimetypes = []
 
     #: Priority, should multiple lexers match and no content is provided
     priority = 0
 
+    #: URL of the language specification/definition. Used in the Pygments
+    #: documentation.
+    url = None
+
     def __init__(self, **options):
+        """
+        This constructor takes arbitrary options as keyword arguments.
+        Every subclass must first process its own options and then call
+        the `Lexer` constructor, since it processes the basic
+        options like `stripnl`.
+
+        An example looks like this:
+
+        .. sourcecode:: python
+
+           def __init__(self, **options):
+               self.compress = options.get('compress', '')
+               Lexer.__init__(self, **options)
+
+        As these options must all be specifiable as strings (due to the
+        command line usage), there are various utility functions
+        available to help with that, see `Option processing`_.
+        """
         self.options = options
         self.stripnl = get_bool_opt(options, 'stripnl', True)
         self.stripall = get_bool_opt(options, 'stripall', False)
@@ -124,10 +175,13 @@ class Lexer(metaclass=LexerMeta):
 
     def analyse_text(text):
         """
-        Has to return a float between ``0`` and ``1`` that indicates
-        if a lexer wants to highlight this text. Used by ``guess_lexer``.
-        If this method returns ``0`` it won't highlight it in any case, if
-        it returns ``1`` highlighting with this lexer is guaranteed.
+        A static method which is called for lexer guessing.
+
+        It should analyse the text and return a float in the range
+        from ``0.0`` to ``1.0``.  If it returns ``0.0``, the lexer
+        will not be selected as the most probable one, if it returns
+        ``1.0``, it will be selected immediately.  This is used by
+        `guess_lexer`.
 
         The `LexerMeta` metaclass automatically wraps this function so
         that it works like a static method (no ``self`` or ``cls``
@@ -138,12 +192,17 @@ class Lexer(metaclass=LexerMeta):
 
     def get_tokens(self, text, unfiltered=False):
         """
-        Return an iterable of (tokentype, value) pairs generated from
-        `text`. If `unfiltered` is set to `True`, the filtering mechanism
-        is bypassed even if filters are defined.
+        This method is the basic interface of a lexer. It is called by
+        the `highlight()` function. It must process the text and return an
+        iterable of ``(tokentype, value)`` pairs from `text`.
+
+        Normally, you don't need to override this method. The default
+        implementation processes the options recognized by all lexers
+        (`stripnl`, `stripall` and so on), and then yields all tokens
+        from `get_tokens_unprocessed()`, with the ``index`` dropped.
 
-        Also preprocess the text, i.e. expand tabs and strip it if
-        wanted and applies registered filters.
+        If `unfiltered` is set to `True`, the filtering mechanism is
+        bypassed even if filters are defined.
         """
         if not isinstance(text, str):
             if self.encoding == 'guess':
@@ -197,11 +256,12 @@ class Lexer(metaclass=LexerMeta):
 
     def get_tokens_unprocessed(self, text):
         """
-        Return an iterable of (index, tokentype, value) pairs where "index"
-        is the starting position of the token within the input text.
+        This method should process the text and return an iterable of
+        ``(index, tokentype, value)`` tuples where ``index`` is the starting
+        position of the token within the input text.
 
-        In subclasses, implement this method as a generator to
-        maximize effectiveness.
+        It must be overridden by subclasses. It is recommended to
+        implement it as a generator to maximize effectiveness.
         """
         raise NotImplementedError
author	Jean Abou Samra <jean@abou-samra.fr>	2023-03-30 22:02:41 +0200
committer	Jean Abou Samra <jean@abou-samra.fr>	2023-03-30 22:02:41 +0200
commit	09c8a31039366b03c3b7e117ee998eaa43f9c7dc (patch)
tree	5e3048353ed6f630752ec4ee5b3ffa8c37ef7644
parent	c664784df695536d119c1eb13b3578458e1317f0 (diff)
download	pygments-git-09c8a31039366b03c3b7e117ee998eaa43f9c7dc.tar.gz