diff options
author | Jean Abou Samra <jean@abou-samra.fr> | 2023-03-30 22:02:41 +0200 |
---|---|---|
committer | Jean Abou Samra <jean@abou-samra.fr> | 2023-03-30 22:02:41 +0200 |
commit | 09c8a31039366b03c3b7e117ee998eaa43f9c7dc (patch) | |
tree | 5e3048353ed6f630752ec4ee5b3ffa8c37ef7644 | |
parent | c664784df695536d119c1eb13b3578458e1317f0 (diff) | |
download | pygments-git-09c8a31039366b03c3b7e117ee998eaa43f9c7dc.tar.gz |
Use autodoc a bit more
-rw-r--r-- | doc/docs/api.rst | 104 | ||||
-rw-r--r-- | pygments/__init__.py | 20 | ||||
-rw-r--r-- | pygments/lexer.py | 104 |
3 files changed, 98 insertions, 130 deletions
diff --git a/doc/docs/api.rst b/doc/docs/api.rst index b7a2a411..b38ca8b0 100644 --- a/doc/docs/api.rst +++ b/doc/docs/api.rst @@ -10,23 +10,11 @@ High-level API Functions from the :mod:`pygments` module: -.. function:: lex(code, lexer) +.. autofunction:: lex - Lex `code` with the `lexer` (must be a `Lexer` instance) - and return an iterable of tokens. Currently, this only calls - `lexer.get_tokens()`. +.. autofunction:: format -.. function:: format(tokens, formatter, outfile=None) - - Format a token stream (iterable of tokens) `tokens` with the - `formatter` (must be a `Formatter` instance). The result is - written to `outfile`, or if that is ``None``, returned as a - string. - -.. function:: highlight(code, lexer, formatter, outfile=None) - - This is the most high-level highlighting function. - It combines `lex` and `format` in one function. +.. autofunction:: highlight .. module:: pygments.lexers @@ -172,94 +160,14 @@ Lexers The base lexer class from which all lexers are derived is: -.. class:: Lexer(**options) - - The constructor takes a \*\*keywords dictionary of options. - Every subclass must first process its own options and then call - the `Lexer` constructor, since it processes the `stripnl`, - `stripall` and `tabsize` options. - - An example looks like this: - - .. sourcecode:: python - - def __init__(self, **options): - self.compress = options.get('compress', '') - Lexer.__init__(self, **options) - - As these options must all be specifiable as strings (due to the - command line usage), there are various utility functions - available to help with that, see `Option processing`_. - - .. method:: get_tokens(text) - - This method is the basic interface of a lexer. It is called by - the `highlight()` function. It must process the text and return an - iterable of ``(tokentype, value)`` pairs from `text`. - - Normally, you don't need to override this method. The default - implementation processes the `stripnl`, `stripall` and `tabsize` - options and then yields all tokens from `get_tokens_unprocessed()`, - with the ``index`` dropped. - - .. method:: get_tokens_unprocessed(text) - - This method should process the text and return an iterable of - ``(index, tokentype, value)`` tuples where ``index`` is the starting - position of the token within the input text. - - This method must be overridden by subclasses. - - .. staticmethod:: analyse_text(text) - - A static method which is called for lexer guessing. It should analyse - the text and return a float in the range from ``0.0`` to ``1.0``. - If it returns ``0.0``, the lexer will not be selected as the most - probable one, if it returns ``1.0``, it will be selected immediately. - - .. note:: You don't have to add ``@staticmethod`` to the definition of - this method, this will be taken care of by the Lexer's metaclass. - - For a list of known tokens have a look at the :doc:`tokens` page. - - A lexer also can have the following attributes (in fact, they are mandatory - except `alias_filenames`) that are used by the builtin lookup mechanism. - - .. attribute:: name - - Full name for the lexer, in human-readable form. - - .. attribute:: aliases - - A list of short, unique identifiers that can be used to lookup - the lexer from a list, e.g. using `get_lexer_by_name()`. - - .. attribute:: filenames - - A list of `fnmatch` patterns that match filenames which contain - content for this lexer. The patterns in this list should be unique among - all lexers. - - .. attribute:: alias_filenames - - A list of `fnmatch` patterns that match filenames which may or may not - contain content for this lexer. This list is used by the - :func:`.guess_lexer_for_filename()` function, to determine which lexers - are then included in guessing the correct one. That means that - e.g. every lexer for HTML and a template language should include - ``\*.html`` in this list. - - .. attribute:: mimetypes - - A list of MIME types for content that can be lexed with this - lexer. +.. autoclass:: Lexer + :members: __init__, get_tokens, get_tokens_unprocessed, analyse_text There are several base class derived from ``Lexer`` you can use to build your lexer from: -.. autoclass:: pygments.lexer.DelegatingLexer .. autoclass:: pygments.lexer.RegexLexer .. autoclass:: pygments.lexer.ExtendedRegexLexer - +.. autoclass:: pygments.lexer.DelegatingLexer .. module:: pygments.formatter diff --git a/pygments/__init__.py b/pygments/__init__.py index 7078c45d..afd36682 100644 --- a/pygments/__init__.py +++ b/pygments/__init__.py @@ -34,7 +34,9 @@ __all__ = ['lex', 'format', 'highlight'] def lex(code, lexer): """ - Lex ``code`` with ``lexer`` and return an iterable of tokens. + Lex `code` with the `lexer` (must be a `Lexer` instance) + and return an iterable of tokens. Currently, this only calls + `lexer.get_tokens()`. """ try: return lexer.get_tokens(code) @@ -49,11 +51,12 @@ def lex(code, lexer): def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builtin """ - Format a tokenlist ``tokens`` with the formatter ``formatter``. + Format ``tokens`` (an iterable of tokens) with the formatter ``formatter`` + (a `Formatter` instance). - If ``outfile`` is given and a valid file object (an object - with a ``write`` method), the result will be written to it, otherwise - it is returned as a string. + If ``outfile`` is given and a valid file object (an object with a + ``write`` method), the result will be written to it, otherwise it + is returned as a string. """ try: if not outfile: @@ -73,10 +76,7 @@ def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builti def highlight(code, lexer, formatter, outfile=None): """ - Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``. - - If ``outfile`` is given and a valid file object (an object - with a ``write`` method), the result will be written to it, otherwise - it is returned as a string. + This is the most high-level highlighting function. It combines `lex` and + `format` in one function. """ return format(lex(code, lexer), formatter, outfile) diff --git a/pygments/lexer.py b/pygments/lexer.py index 0290365d..7c596719 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -50,7 +50,31 @@ class Lexer(metaclass=LexerMeta): """ Lexer for a specific language. - Basic options recognized: + See also :doc:`lexerdevelopment`, a high-level guide to writing + lexers. + + Lexer classes have attributes used for choosing the most appropriate + lexer based on various criteria. + + .. autoattribute:: name + :no-value: + .. autoattribute:: aliases + :no-value: + .. autoattribute:: filenames + :no-value: + .. autoattribute:: alias_filenames + .. autoattribute:: mimetypes + :no-value: + .. autoattribute:: priority + + Lexers included in Pygments should have an additional attribute: + + .. autoattribute:: url + :no-value: + + You can pass options to the constructor. The basic options recognized + by all lexers and processed by the base `Lexer` class are: + ``stripnl`` Strip leading and trailing newlines from the input (default: True). ``stripall`` @@ -74,28 +98,55 @@ class Lexer(metaclass=LexerMeta): Overrides the ``encoding`` if given. """ - #: Name of the lexer + #: Full name of the lexer, in human-readable form name = None - #: URL of the language specification/definition - url = None - - #: Shortcuts for the lexer + #: A list of short, unique identifiers that can be used to look + #: up the lexer from a list, e.g., using `get_lexer_by_name()`. aliases = [] - #: File name globs + #: A list of `fnmatch` patterns that match filenames which contain + #: content for this lexer. The patterns in this list should be unique among + #: all lexers. filenames = [] - #: Secondary file name globs + #: A list of `fnmatch` patterns that match filenames which may or may not + #: contain content for this lexer. This list is used by the + #: :func:`.guess_lexer_for_filename()` function, to determine which lexers + #: are then included in guessing the correct one. That means that + #: e.g. every lexer for HTML and a template language should include + #: ``\*.html`` in this list. alias_filenames = [] - #: MIME types + #: A list of MIME types for content that can be lexed with this lexer. mimetypes = [] #: Priority, should multiple lexers match and no content is provided priority = 0 + #: URL of the language specification/definition. Used in the Pygments + #: documentation. + url = None + def __init__(self, **options): + """ + This constructor takes arbitrary options as keyword arguments. + Every subclass must first process its own options and then call + the `Lexer` constructor, since it processes the basic + options like `stripnl`. + + An example looks like this: + + .. sourcecode:: python + + def __init__(self, **options): + self.compress = options.get('compress', '') + Lexer.__init__(self, **options) + + As these options must all be specifiable as strings (due to the + command line usage), there are various utility functions + available to help with that, see `Option processing`_. + """ self.options = options self.stripnl = get_bool_opt(options, 'stripnl', True) self.stripall = get_bool_opt(options, 'stripall', False) @@ -124,10 +175,13 @@ class Lexer(metaclass=LexerMeta): def analyse_text(text): """ - Has to return a float between ``0`` and ``1`` that indicates - if a lexer wants to highlight this text. Used by ``guess_lexer``. - If this method returns ``0`` it won't highlight it in any case, if - it returns ``1`` highlighting with this lexer is guaranteed. + A static method which is called for lexer guessing. + + It should analyse the text and return a float in the range + from ``0.0`` to ``1.0``. If it returns ``0.0``, the lexer + will not be selected as the most probable one, if it returns + ``1.0``, it will be selected immediately. This is used by + `guess_lexer`. The `LexerMeta` metaclass automatically wraps this function so that it works like a static method (no ``self`` or ``cls`` @@ -138,12 +192,17 @@ class Lexer(metaclass=LexerMeta): def get_tokens(self, text, unfiltered=False): """ - Return an iterable of (tokentype, value) pairs generated from - `text`. If `unfiltered` is set to `True`, the filtering mechanism - is bypassed even if filters are defined. + This method is the basic interface of a lexer. It is called by + the `highlight()` function. It must process the text and return an + iterable of ``(tokentype, value)`` pairs from `text`. + + Normally, you don't need to override this method. The default + implementation processes the options recognized by all lexers + (`stripnl`, `stripall` and so on), and then yields all tokens + from `get_tokens_unprocessed()`, with the ``index`` dropped. - Also preprocess the text, i.e. expand tabs and strip it if - wanted and applies registered filters. + If `unfiltered` is set to `True`, the filtering mechanism is + bypassed even if filters are defined. """ if not isinstance(text, str): if self.encoding == 'guess': @@ -197,11 +256,12 @@ class Lexer(metaclass=LexerMeta): def get_tokens_unprocessed(self, text): """ - Return an iterable of (index, tokentype, value) pairs where "index" - is the starting position of the token within the input text. + This method should process the text and return an iterable of + ``(index, tokentype, value)`` tuples where ``index`` is the starting + position of the token within the input text. - In subclasses, implement this method as a generator to - maximize effectiveness. + It must be overridden by subclasses. It is recommended to + implement it as a generator to maximize effectiveness. """ raise NotImplementedError |