From 09c8a31039366b03c3b7e117ee998eaa43f9c7dc Mon Sep 17 00:00:00 2001 From: Jean Abou Samra Date: Thu, 30 Mar 2023 22:02:41 +0200 Subject: Use autodoc a bit more --- pygments/__init__.py | 20 +++++----- pygments/lexer.py | 104 ++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 92 insertions(+), 32 deletions(-) (limited to 'pygments') diff --git a/pygments/__init__.py b/pygments/__init__.py index 7078c45d..afd36682 100644 --- a/pygments/__init__.py +++ b/pygments/__init__.py @@ -34,7 +34,9 @@ __all__ = ['lex', 'format', 'highlight'] def lex(code, lexer): """ - Lex ``code`` with ``lexer`` and return an iterable of tokens. + Lex `code` with the `lexer` (must be a `Lexer` instance) + and return an iterable of tokens. Currently, this only calls + `lexer.get_tokens()`. """ try: return lexer.get_tokens(code) @@ -49,11 +51,12 @@ def lex(code, lexer): def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builtin """ - Format a tokenlist ``tokens`` with the formatter ``formatter``. + Format ``tokens`` (an iterable of tokens) with the formatter ``formatter`` + (a `Formatter` instance). - If ``outfile`` is given and a valid file object (an object - with a ``write`` method), the result will be written to it, otherwise - it is returned as a string. + If ``outfile`` is given and a valid file object (an object with a + ``write`` method), the result will be written to it, otherwise it + is returned as a string. """ try: if not outfile: @@ -73,10 +76,7 @@ def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builti def highlight(code, lexer, formatter, outfile=None): """ - Lex ``code`` with ``lexer`` and format it with the formatter ``formatter``. - - If ``outfile`` is given and a valid file object (an object - with a ``write`` method), the result will be written to it, otherwise - it is returned as a string. + This is the most high-level highlighting function. It combines `lex` and + `format` in one function. """ return format(lex(code, lexer), formatter, outfile) diff --git a/pygments/lexer.py b/pygments/lexer.py index 0290365d..7c596719 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -50,7 +50,31 @@ class Lexer(metaclass=LexerMeta): """ Lexer for a specific language. - Basic options recognized: + See also :doc:`lexerdevelopment`, a high-level guide to writing + lexers. + + Lexer classes have attributes used for choosing the most appropriate + lexer based on various criteria. + + .. autoattribute:: name + :no-value: + .. autoattribute:: aliases + :no-value: + .. autoattribute:: filenames + :no-value: + .. autoattribute:: alias_filenames + .. autoattribute:: mimetypes + :no-value: + .. autoattribute:: priority + + Lexers included in Pygments should have an additional attribute: + + .. autoattribute:: url + :no-value: + + You can pass options to the constructor. The basic options recognized + by all lexers and processed by the base `Lexer` class are: + ``stripnl`` Strip leading and trailing newlines from the input (default: True). ``stripall`` @@ -74,28 +98,55 @@ class Lexer(metaclass=LexerMeta): Overrides the ``encoding`` if given. """ - #: Name of the lexer + #: Full name of the lexer, in human-readable form name = None - #: URL of the language specification/definition - url = None - - #: Shortcuts for the lexer + #: A list of short, unique identifiers that can be used to look + #: up the lexer from a list, e.g., using `get_lexer_by_name()`. aliases = [] - #: File name globs + #: A list of `fnmatch` patterns that match filenames which contain + #: content for this lexer. The patterns in this list should be unique among + #: all lexers. filenames = [] - #: Secondary file name globs + #: A list of `fnmatch` patterns that match filenames which may or may not + #: contain content for this lexer. This list is used by the + #: :func:`.guess_lexer_for_filename()` function, to determine which lexers + #: are then included in guessing the correct one. That means that + #: e.g. every lexer for HTML and a template language should include + #: ``\*.html`` in this list. alias_filenames = [] - #: MIME types + #: A list of MIME types for content that can be lexed with this lexer. mimetypes = [] #: Priority, should multiple lexers match and no content is provided priority = 0 + #: URL of the language specification/definition. Used in the Pygments + #: documentation. + url = None + def __init__(self, **options): + """ + This constructor takes arbitrary options as keyword arguments. + Every subclass must first process its own options and then call + the `Lexer` constructor, since it processes the basic + options like `stripnl`. + + An example looks like this: + + .. sourcecode:: python + + def __init__(self, **options): + self.compress = options.get('compress', '') + Lexer.__init__(self, **options) + + As these options must all be specifiable as strings (due to the + command line usage), there are various utility functions + available to help with that, see `Option processing`_. + """ self.options = options self.stripnl = get_bool_opt(options, 'stripnl', True) self.stripall = get_bool_opt(options, 'stripall', False) @@ -124,10 +175,13 @@ class Lexer(metaclass=LexerMeta): def analyse_text(text): """ - Has to return a float between ``0`` and ``1`` that indicates - if a lexer wants to highlight this text. Used by ``guess_lexer``. - If this method returns ``0`` it won't highlight it in any case, if - it returns ``1`` highlighting with this lexer is guaranteed. + A static method which is called for lexer guessing. + + It should analyse the text and return a float in the range + from ``0.0`` to ``1.0``. If it returns ``0.0``, the lexer + will not be selected as the most probable one, if it returns + ``1.0``, it will be selected immediately. This is used by + `guess_lexer`. The `LexerMeta` metaclass automatically wraps this function so that it works like a static method (no ``self`` or ``cls`` @@ -138,12 +192,17 @@ class Lexer(metaclass=LexerMeta): def get_tokens(self, text, unfiltered=False): """ - Return an iterable of (tokentype, value) pairs generated from - `text`. If `unfiltered` is set to `True`, the filtering mechanism - is bypassed even if filters are defined. + This method is the basic interface of a lexer. It is called by + the `highlight()` function. It must process the text and return an + iterable of ``(tokentype, value)`` pairs from `text`. + + Normally, you don't need to override this method. The default + implementation processes the options recognized by all lexers + (`stripnl`, `stripall` and so on), and then yields all tokens + from `get_tokens_unprocessed()`, with the ``index`` dropped. - Also preprocess the text, i.e. expand tabs and strip it if - wanted and applies registered filters. + If `unfiltered` is set to `True`, the filtering mechanism is + bypassed even if filters are defined. """ if not isinstance(text, str): if self.encoding == 'guess': @@ -197,11 +256,12 @@ class Lexer(metaclass=LexerMeta): def get_tokens_unprocessed(self, text): """ - Return an iterable of (index, tokentype, value) pairs where "index" - is the starting position of the token within the input text. + This method should process the text and return an iterable of + ``(index, tokentype, value)`` tuples where ``index`` is the starting + position of the token within the input text. - In subclasses, implement this method as a generator to - maximize effectiveness. + It must be overridden by subclasses. It is recommended to + implement it as a generator to maximize effectiveness. """ raise NotImplementedError -- cgit v1.2.1