diff options
68 files changed, 2409 insertions, 531 deletions
@@ -33,6 +33,7 @@ Other contributors, listed alphabetically, are: * Adam Blinkinsop -- Haskell, Redcode lexers * Frits van Bommel -- assembler lexers * Pierre Bourdon -- bugfixes +* Matthias Bussonnier -- ANSI style handling for terminal-256 formatter * chebee7i -- Python traceback lexer improvements * Hiram Chirino -- Scaml and Jade lexers * Ian Cooper -- VGL lexer @@ -88,7 +89,8 @@ Other contributors, listed alphabetically, are: * Tim Howard -- BlitzMax lexer * Dustin Howett -- Logos lexer * Ivan Inozemtsev -- Fantom lexer -* Hiroaki Itoh -- Shell console rewrite, Lexers for PowerShell session, MSDOS session, BC +* Hiroaki Itoh -- Shell console rewrite, Lexers for PowerShell session, + MSDOS session, BC, WDiff * Brian R. Jackson -- Tea lexer * Christian Jann -- ShellSession lexer * Dennis Kaarsemaker -- sources.list lexer @@ -110,6 +112,7 @@ Other contributors, listed alphabetically, are: * Jon Larimer, Google Inc. -- Smali lexer * Olov Lassus -- Dart lexer * Matt Layman -- TAP lexer +* Kristian Lyngstøl -- Varnish lexers * Sylvestre Ledru -- Scilab lexer * Chee Sing Lee -- Flatline lexer * Mark Lee -- Vala lexer @@ -173,6 +176,7 @@ Other contributors, listed alphabetically, are: * Alexander Smishlajev -- Visual FoxPro lexer * Steve Spigarelli -- XQuery lexer * Jerome St-Louis -- eC lexer +* Camil Staps -- Clean lexer * James Strachan -- Kotlin lexer * Tom Stuart -- Treetop lexer * Colin Sullivan -- SuperCollider lexer @@ -15,16 +15,28 @@ Version 2.2 * AMPL * TypoScript (#1173) + * Varnish config (PR#554) + * Clean (PR#503) + * WDiff (PR#513) -- Added `lexers.find_lexer_class_by_name()` (#1203) +- Added `lexers.find_lexer_class_by_name()`. (#1203) - Added new token types and lexing for magic methods and variables in Python and PHP. +- Added a new token type for string affixes and lexing for them in Python, C++ + and Postgresql lexers. + +- Added a new token type for heredoc (and similar) string delimiters and + lexing for them in C++, Perl, PHP, Postgresql and Ruby lexers. + +- Styles can now define colors with ANSI colors for use in the 256-color + terminal formatter. (PR#531) + Version 2.1.1 ------------- -(in development) +(relased Feb 14, 2016) - Fixed Jython compatibility (#1205) - Fixed HTML formatter output with leading empty lines (#1111) diff --git a/doc/docs/lexerdevelopment.rst b/doc/docs/lexerdevelopment.rst index 2c868440..fd6e76b9 100644 --- a/doc/docs/lexerdevelopment.rst +++ b/doc/docs/lexerdevelopment.rst @@ -88,10 +88,16 @@ one. Adding and testing a new lexer ============================== -To make Pygments aware of your new lexer, you have to perform the following -steps: +Using a lexer that is not part of Pygments can be done via the Python API. You +can import and instantiate the lexer, and pass it to :func:`pygments.highlight`. -First, change to the current directory containing the Pygments source code: +To prepare your new lexer for inclusion in the Pygments distribution, so that it +will be found when passing filenames or lexer aliases from the command line, you +have to perform the following steps. + +First, change to the current directory containing the Pygments source code. You +will need to have either an unpacked source tarball, or (preferably) a copy +cloned from BitBucket. .. code-block:: console @@ -101,11 +107,13 @@ Select a matching module under ``pygments/lexers``, or create a new module for your lexer class. Next, make sure the lexer is known from outside of the module. All modules in -the ``pygments.lexers`` specify ``__all__``. For example, ``esoteric.py`` sets:: +the ``pygments.lexers`` package specify ``__all__``. For example, +``esoteric.py`` sets:: __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] -Simply add the name of your lexer class to this list. +Add the name of your lexer class to this list (or create the list if your lexer +is the only class in the module). Finally the lexer can be made publicly known by rebuilding the lexer mapping: diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst index d56db0db..1094a270 100644 --- a/doc/docs/styles.rst +++ b/doc/docs/styles.rst @@ -143,3 +143,59 @@ a way to iterate over all styles: >>> from pygments.styles import get_all_styles >>> styles = list(get_all_styles()) + + +.. _AnsiTerminalStyle: + +Terminal Styles +=============== + +.. versionadded:: 2.2 + +Custom styles used with the 256-color terminal formatter can also map colors to +use the 8 default ANSI colors. To do so, use ``#ansigreen``, ``#ansired`` or +any other colors defined in :attr:`pygments.style.ansicolors`. Foreground ANSI +colors will be mapped to the corresponding `escape codes 30 to 37 +<https://en.wikipedia.org/wiki/ANSI_escape_code#Colors>`_ thus respecting any +custom color mapping and themes provided by many terminal emulators. Light +variants are treated as foreground color with and an added bold flag. +``bg:#ansi<color>`` will also be respected, except the light variant will be the +same shade as their dark variant. + +See the following example where the color of the string ``"hello world"`` is +governed by the escape sequence ``\x1b[34;01m`` (Ansi Blue, Bold, 41 being red +background) instead of an extended foreground & background color. + +.. sourcecode:: pycon + + >>> from pygments import highlight + >>> from pygments.style import Style + >>> from pygments.token import Token + >>> from pygments.lexers import Python3Lexer + >>> from pygments.formatters import Terminal256Formatter + + >>> class MyStyle(Style): + styles = { + Token.String: '#ansiblue bg:#ansired', + } + + >>> code = 'print("Hello World")' + >>> result = highlight(code, Python3Lexer(), Terminal256Formatter(style=MyStyle)) + >>> print(result.encode()) + b'\x1b[34;41;01m"\x1b[39;49;00m\x1b[34;41;01mHello World\x1b[39;49;00m\x1b[34;41;01m"\x1b[39;49;00m' + +Colors specified using ``#ansi*`` are converted to a default set of RGB colors +when used with formatters other than the terminal-256 formatter. + +By definition of ANSI, the following colors are considered "light" colors, and +will be rendered by most terminals as bold: + +- "darkgray", "red", "green", "yellow", "blue", "fuchsia", "turquoise", "white" + +The following are considered "dark" colors and will be rendered as non-bold: + +- "black", "darkred", "darkgreen", "brown", "darkblue", "purple", "teal", + "lightgray" + +Exact behavior might depends on the terminal emulator you are using, and its +settings. diff --git a/doc/docs/tokens.rst b/doc/docs/tokens.rst index 96a6d003..801fc638 100644 --- a/doc/docs/tokens.rst +++ b/doc/docs/tokens.rst @@ -223,12 +223,20 @@ Literals `String` For any string literal. +`String.Affix` + Token type for affixes that further specify the type of the string they're + attached to (e.g. the prefixes ``r`` and ``u8`` in ``r"foo"`` and ``u8"foo"``). + `String.Backtick` Token type for strings enclosed in backticks. `String.Char` Token type for single characters (e.g. Java, C). +`String.Delimiter` + Token type for delimiting identifiers in "heredoc", raw and other similar + strings (e.g. the word ``END`` in Perl code ``print <<'END';``). + `String.Doc` Token type for documentation strings (for example Python). diff --git a/pygments/__init__.py b/pygments/__init__.py index 7bd7557f..ffac59ef 100644 --- a/pygments/__init__.py +++ b/pygments/__init__.py @@ -25,18 +25,16 @@ :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ +import sys + +from pygments.util import StringIO, BytesIO -__version__ = '2.1' +__version__ = '2.2a0' __docformat__ = 'restructuredtext' __all__ = ['lex', 'format', 'highlight'] -import sys - -from pygments.util import StringIO, BytesIO - - def lex(code, lexer): """ Lex ``code`` with ``lexer`` and return an iterable of tokens. @@ -44,9 +42,9 @@ def lex(code, lexer): try: return lexer.get_tokens(code) except TypeError as err: - if isinstance(err.args[0], str) and \ - ('unbound method get_tokens' in err.args[0] or - 'missing 1 required positional argument' in err.args[0]): + if (isinstance(err.args[0], str) and + ('unbound method get_tokens' in err.args[0] or + 'missing 1 required positional argument' in err.args[0])): raise TypeError('lex() argument must be a lexer instance, ' 'not a class') raise @@ -68,9 +66,9 @@ def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builti else: formatter.format(tokens, outfile) except TypeError as err: - if isinstance(err.args[0], str) and \ - ('unbound method format' in err.args[0] or - 'missing 1 required positional argument' in err.args[0]): + if (isinstance(err.args[0], str) and + ('unbound method format' in err.args[0] or + 'missing 1 required positional argument' in err.args[0])): raise TypeError('format() argument must be a formatter instance, ' 'not a class') raise diff --git a/pygments/console.py b/pygments/console.py index 4a2c9acb..4aaf5fcb 100644 --- a/pygments/console.py +++ b/pygments/console.py @@ -12,18 +12,18 @@ esc = "\x1b[" codes = {} -codes[""] = "" -codes["reset"] = esc + "39;49;00m" +codes[""] = "" +codes["reset"] = esc + "39;49;00m" -codes["bold"] = esc + "01m" -codes["faint"] = esc + "02m" -codes["standout"] = esc + "03m" +codes["bold"] = esc + "01m" +codes["faint"] = esc + "02m" +codes["standout"] = esc + "03m" codes["underline"] = esc + "04m" -codes["blink"] = esc + "05m" -codes["overline"] = esc + "06m" +codes["blink"] = esc + "05m" +codes["overline"] = esc + "06m" -dark_colors = ["black", "darkred", "darkgreen", "brown", "darkblue", - "purple", "teal", "lightgray"] +dark_colors = ["black", "darkred", "darkgreen", "brown", "darkblue", + "purple", "teal", "lightgray"] light_colors = ["darkgray", "red", "green", "yellow", "blue", "fuchsia", "turquoise", "white"] @@ -35,10 +35,10 @@ for d, l in zip(dark_colors, light_colors): del d, l, x -codes["darkteal"] = codes["turquoise"] +codes["darkteal"] = codes["turquoise"] codes["darkyellow"] = codes["brown"] -codes["fuscia"] = codes["fuchsia"] -codes["white"] = codes["bold"] +codes["fuscia"] = codes["fuchsia"] +codes["white"] = codes["bold"] def reset_color(): diff --git a/pygments/filter.py b/pygments/filter.py index c8176ed9..f3082037 100644 --- a/pygments/filter.py +++ b/pygments/filter.py @@ -34,10 +34,10 @@ def simplefilter(f): yield ttype, value.lower() """ return type(f.__name__, (FunctionFilter,), { - 'function': f, - '__module__': getattr(f, '__module__'), - '__doc__': f.__doc__ - }) + '__module__': getattr(f, '__module__'), + '__doc__': f.__doc__, + 'function': f, + }) class Filter(object): diff --git a/pygments/formatter.py b/pygments/formatter.py index addd07d7..9f22b3bc 100644 --- a/pygments/formatter.py +++ b/pygments/formatter.py @@ -65,7 +65,7 @@ class Formatter(object): def __init__(self, **options): self.style = _lookup_style(options.get('style', 'default')) - self.full = get_bool_opt(options, 'full', False) + self.full = get_bool_opt(options, 'full', False) self.title = options.get('title', '') self.encoding = options.get('encoding', None) or None if self.encoding in ('guess', 'chardet'): diff --git a/pygments/formatters/terminal256.py b/pygments/formatters/terminal256.py index af311955..5110bc9e 100644 --- a/pygments/formatters/terminal256.py +++ b/pygments/formatters/terminal256.py @@ -27,6 +27,8 @@ import sys from pygments.formatter import Formatter +from pygments.console import codes +from pygments.style import ansicolors __all__ = ['Terminal256Formatter', 'TerminalTrueColorFormatter'] @@ -47,9 +49,21 @@ class EscapeSequence: def color_string(self): attrs = [] if self.fg is not None: - attrs.extend(("38", "5", "%i" % self.fg)) + if self.fg in ansicolors: + esc = codes[self.fg[5:]] + if ';01m' in esc: + self.bold = True + # extract fg color code. + attrs.append(esc[2:4]) + else: + attrs.extend(("38", "5", "%i" % self.fg)) if self.bg is not None: - attrs.extend(("48", "5", "%i" % self.bg)) + if self.bg in ansicolors: + esc = codes[self.bg[5:]] + # extract fg color code, add 10 for bg. + attrs.append(str(int(esc[2:4])+10)) + else: + attrs.extend(("48", "5", "%i" % self.bg)) if self.bold: attrs.append("01") if self.underline: @@ -91,6 +105,11 @@ class Terminal256Formatter(Formatter): .. versionadded:: 0.9 + .. versionchanged:: 2.2 + If the used style defines foreground colors in the form ``#ansi*``, then + `Terminal256Formatter` will map these to non extended foreground color. + See :ref:`AnsiTerminalStyle` for more information. + Options accepted: `style` @@ -169,6 +188,10 @@ class Terminal256Formatter(Formatter): def _color_index(self, color): index = self.best_match.get(color, None) + if color in ansicolors: + # strip the `#ansi` part and look up code + index = color + self.best_match[color] = index if index is None: try: rgb = int(str(color), 16) @@ -185,9 +208,14 @@ class Terminal256Formatter(Formatter): def _setup_styles(self): for ttype, ndef in self.style: escape = EscapeSequence() - if ndef['color']: + # get foreground from ansicolor if set + if ndef['ansicolor']: + escape.fg = self._color_index(ndef['ansicolor']) + elif ndef['color']: escape.fg = self._color_index(ndef['color']) - if ndef['bgcolor']: + if ndef['bgansicolor']: + escape.bg = self._color_index(ndef['bgansicolor']) + elif ndef['bgcolor']: escape.bg = self._color_index(ndef['bgcolor']) if self.usebold and ndef['bold']: escape.bold = True diff --git a/pygments/lexer.py b/pygments/lexer.py index dd6c01e4..f16d8106 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -319,8 +319,8 @@ def bygroups(*args): if data is not None: if ctx: ctx.pos = match.start(i + 1) - for item in action( - lexer, _PseudoMatch(match.start(i + 1), data), ctx): + for item in action(lexer, + _PseudoMatch(match.start(i + 1), data), ctx): if item: yield item if ctx: diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index debb7de5..8650b7e4 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -79,6 +79,7 @@ LEXERS = { 'CheetahXmlLexer': ('pygments.lexers.templates', 'XML+Cheetah', ('xml+cheetah', 'xml+spitfire'), (), ('application/xml+cheetah', 'application/xml+spitfire')), 'CirruLexer': ('pygments.lexers.webmisc', 'Cirru', ('cirru',), ('*.cirru',), ('text/x-cirru',)), 'ClayLexer': ('pygments.lexers.c_like', 'Clay', ('clay',), ('*.clay',), ('text/x-clay',)), + 'CleanLexer': ('pygments.lexers.clean', 'Clean', ('clean',), ('*.icl', '*.dcl'), ()), 'ClojureLexer': ('pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')), 'ClojureScriptLexer': ('pygments.lexers.jvm', 'ClojureScript', ('clojurescript', 'cljs'), ('*.cljs',), ('text/x-clojurescript', 'application/x-clojurescript')), 'CobolFreeformatLexer': ('pygments.lexers.business', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()), @@ -397,6 +398,8 @@ LEXERS = { 'TypoScriptHtmlDataLexer': ('pygments.lexers.typoscript', 'TypoScriptHtmlData', ('typoscripthtmldata',), (), ()), 'TypoScriptLexer': ('pygments.lexers.typoscript', 'TypoScript', ('typoscript',), ('*.ts', '*.txt'), ('text/x-typoscript',)), 'UrbiscriptLexer': ('pygments.lexers.urbi', 'UrbiScript', ('urbiscript',), ('*.u',), ('application/x-urbiscript',)), + 'VCLLexer': ('pygments.lexers.varnish', 'VCL', ('vcl',), ('*.vcl',), ('text/x-vclsrc',)), + 'VCLSnippetLexer': ('pygments.lexers.varnish', 'VCLSnippets', ('vclsnippets', 'vclsnippet'), (), ('text/x-vclsnippet',)), 'VCTreeStatusLexer': ('pygments.lexers.console', 'VCTreeStatus', ('vctreestatus',), (), ()), 'VGLLexer': ('pygments.lexers.dsls', 'VGL', ('vgl',), ('*.rpf',), ()), 'ValaLexer': ('pygments.lexers.c_like', 'Vala', ('vala', 'vapi'), ('*.vala', '*.vapi'), ('text/x-vala',)), @@ -408,6 +411,7 @@ LEXERS = { 'VerilogLexer': ('pygments.lexers.hdl', 'verilog', ('verilog', 'v'), ('*.v',), ('text/x-verilog',)), 'VhdlLexer': ('pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)), 'VimLexer': ('pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)), + 'WDiffLexer': ('pygments.lexers.diff', 'WDiff', ('wdiff',), ('*.wdiff',), ()), 'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)), 'XQueryLexer': ('pygments.lexers.webmisc', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')), 'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ('application/xml+django', 'application/xml+jinja')), diff --git a/pygments/lexers/algebra.py b/pygments/lexers/algebra.py index fc54c3c3..79460ad4 100644 --- a/pygments/lexers/algebra.py +++ b/pygments/lexers/algebra.py @@ -104,9 +104,9 @@ class MathematicaLexer(RegexLexer): (r'#\d*', Name.Variable), (r'([a-zA-Z]+[a-zA-Z0-9]*)', Name), - (r'-?[0-9]+\.[0-9]*', Number.Float), - (r'-?[0-9]*\.[0-9]+', Number.Float), - (r'-?[0-9]+', Number.Integer), + (r'-?\d+\.\d*', Number.Float), + (r'-?\d*\.\d+', Number.Float), + (r'-?\d+', Number.Integer), (words(operators), Operator), (words(punctuation), Punctuation), diff --git a/pygments/lexers/ampl.py b/pygments/lexers/ampl.py index f57b486f..c3ca80d4 100644 --- a/pygments/lexers/ampl.py +++ b/pygments/lexers/ampl.py @@ -9,13 +9,10 @@ :license: BSD, see LICENSE for details. """ -import re - -from pygments.lexer import RegexLexer, bygroups, using, this +from pygments.lexer import RegexLexer, bygroups, using, this, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation - __all__ = ['AmplLexer'] @@ -30,27 +27,30 @@ class AmplLexer(RegexLexer): filenames = ['*.run'] tokens = { - 'root':[ + 'root': [ (r'\n', Text), (r'\s+', Text.Whitespace), (r'#.*?\n', Comment.Single), (r'/[*](.|\n)*?[*]/', Comment.Multiline), - (r'(call|cd|close|commands|data|delete|display|drop|end|environ|' - r'exit|expand|include|load|model|objective|option|problem|purge|' - r'quit|redeclare|reload|remove|reset|restore|shell|show|solexpand|' - r'solution|solve|update|unload|xref|' - r'coeff|coef|cover|obj|interval|' - r'default|from|to|to_come|net_in|net_out|dimen|dimension|' - r'check|complements|write|end|function|pipe|' - r'format|if|then|else|in|while|repeat|for)\b', Keyword.Reserved), - (r'(integer|binary|symbolic|ordered|circular|reversed|IN|INOUT|OUT|LOCAL)', + (words(( + 'call', 'cd', 'close', 'commands', 'data', 'delete', 'display', + 'drop', 'end', 'environ', 'exit', 'expand', 'include', 'load', + 'model', 'objective', 'option', 'problem', 'purge', 'quit', + 'redeclare', 'reload', 'remove', 'reset', 'restore', 'shell', + 'show', 'solexpand', 'solution', 'solve', 'update', 'unload', + 'xref', 'coeff', 'coef', 'cover', 'obj', 'interval', 'default', + 'from', 'to', 'to_come', 'net_in', 'net_out', 'dimen', + 'dimension', 'check', 'complements', 'write', 'function', + 'pipe', 'format', 'if', 'then', 'else', 'in', 'while', 'repeat', + 'for'), suffix=r'\b'), Keyword.Reserved), + (r'(integer|binary|symbolic|ordered|circular|reversed|INOUT|IN|OUT|LOCAL)', Keyword.Type), (r'\".*?\"', String.Double), (r'\'.*?\'', String.Single), (r'[()\[\]{},;:]+', Punctuation), - (r'\b(\w+)(\.)(astatus|init|init0|lb|lb0|lb1|lb2|lrc|' - r'lslack|rc|relax|slack|sstatus|status|ub|ub0|ub1|' - r'ub2|urc|uslack|val)', + (r'\b(\w+)(\.)(astatus|init0|init|lb0|lb1|lb2|lb|lrc|' + r'lslack|rc|relax|slack|sstatus|status|ub0|ub1|ub2|' + r'ub|urc|uslack|val)', bygroups(Name.Variable, Punctuation, Keyword.Reserved)), (r'(set|param|var|arc|minimize|maximize|subject to|s\.t\.|subj to|' r'node|table|suffix|read table|write table)(\s+)(\w+)', @@ -58,21 +58,26 @@ class AmplLexer(RegexLexer): (r'(param)(\s*)(:)(\s*)(\w+)(\s*)(:)(\s*)((\w|\s)+)', bygroups(Keyword.Declaration, Text, Punctuation, Text, Name.Variable, Text, Punctuation, Text, Name.Variable)), - (r'(let|fix|unfix)(\s*)(\{.*\}|)(\s*)(\w+)', + (r'(let|fix|unfix)(\s*)((?:\{.*\})?)(\s*)(\w+)', bygroups(Keyword.Declaration, Text, using(this), Text, Name.Variable)), - (r'\b(abs|acos|acosh|alias|' - r'asin|asinh|atan|atan2|atanh|ceil|ctime|cos|exp|floor|log|log10|' - r'max|min|precision|round|sin|sinh|sqrt|tan|tanh|time|trunc|Beta|' - r'Cauchy|Exponential|Gamma|Irand224|Normal|Normal01|Poisson|Uniform|Uniform01|' - r'num|num0|ichar|char|length|substr|sprintf|match|sub|gsub|print|printf' - r'next|nextw|prev|prevw|first|last|ord|ord0|card|arity|indexarity)\b', - Name.Builtin), + (words(( + 'abs', 'acos', 'acosh', 'alias', 'asin', 'asinh', 'atan', 'atan2', + 'atanh', 'ceil', 'ctime', 'cos', 'exp', 'floor', 'log', 'log10', + 'max', 'min', 'precision', 'round', 'sin', 'sinh', 'sqrt', 'tan', + 'tanh', 'time', 'trunc', 'Beta', 'Cauchy', 'Exponential', 'Gamma', + 'Irand224', 'Normal', 'Normal01', 'Poisson', 'Uniform', 'Uniform01', + 'num', 'num0', 'ichar', 'char', 'length', 'substr', 'sprintf', + 'match', 'sub', 'gsub', 'print', 'printf', 'next', 'nextw', 'prev', + 'prevw', 'first', 'last', 'ord', 'ord0', 'card', 'arity', + 'indexarity'), prefix=r'\b', suffix=r'\b'), Name.Builtin), (r'(\+|\-|\*|/|\*\*|=|<=|>=|==|\||\^|<|>|\!|\.\.|:=|\&|\!=|<<|>>)', Operator), - (r'(or|exists|forall|and|in|not|within|union|diff|' - r'difference|symdiff|inter|intersect|intersection|' - r'cross|setof|by|less|sum|prod|product|div|mod)', - Keyword.Reserved), #Operator.Name but not enough emphasized with Operator.Name + (words(( + 'or', 'exists', 'forall', 'and', 'in', 'not', 'within', 'union', + 'diff', 'difference', 'symdiff', 'inter', 'intersect', + 'intersection', 'cross', 'setof', 'by', 'less', 'sum', 'prod', + 'product', 'div', 'mod'), suffix=r'\b'), + Keyword.Reserved), # Operator.Name but not enough emphasized with that (r'(\d+\.(?!\.)\d*|\.(?!.)\d+)([eE][+-]?\d+)?', Number.Float), (r'\d+([eE][+-]?\d+)?', Number.Integer), (r'[+-]?Infinity', Number.Integer), diff --git a/pygments/lexers/business.py b/pygments/lexers/business.py index ea888245..43978690 100644 --- a/pygments/lexers/business.py +++ b/pygments/lexers/business.py @@ -57,9 +57,9 @@ class CobolLexer(RegexLexer): ], 'core': [ # Figurative constants - (r'(^|(?<=[^0-9a-z_\-]))(ALL\s+)?' + (r'(^|(?<=[^\w\-]))(ALL\s+)?' r'((ZEROES)|(HIGH-VALUE|LOW-VALUE|QUOTE|SPACE|ZERO)(S)?)' - r'\s*($|(?=[^0-9a-z_\-]))', + r'\s*($|(?=[^\w\-]))', Name.Constant), # Reserved words STATEMENTS and other bolds @@ -79,8 +79,8 @@ class CobolLexer(RegexLexer): 'RETURN', 'REWRITE', 'SCREEN', 'SD', 'SEARCH', 'SECTION', 'SET', 'SORT', 'START', 'STOP', 'STRING', 'SUBTRACT', 'SUPPRESS', 'TERMINATE', 'THEN', 'UNLOCK', 'UNSTRING', 'USE', 'VALIDATE', - 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^0-9a-z_\-]))', - suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^\w\-]))', + suffix=r'\s*($|(?=[^\w\-]))'), Keyword.Reserved), # Reserved words @@ -89,33 +89,33 @@ class CobolLexer(RegexLexer): 'ALPHABET', 'ALPHABETIC', 'ALPHABETIC-LOWER', 'ALPHABETIC-UPPER', 'ALPHANUMERIC', 'ALPHANUMERIC-EDITED', 'ALSO', 'ALTER', 'ALTERNATE' 'ANY', 'ARE', 'AREA', 'AREAS', 'ARGUMENT-NUMBER', 'ARGUMENT-VALUE', 'AS', - 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC', 'AUTOTERMINATE', - 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL', + 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC', + 'AUTOTERMINATE', 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL', 'BLANK', 'BLINK', 'BLOCK', 'BOTTOM', 'BY', 'BYTE-LENGTH', 'CHAINING', - 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL', 'COLLATING', - 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE', 'COMMIT', 'COMMON', - 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL', + 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL', + 'COLLATING', 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE', + 'COMMIT', 'COMMON', 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL', 'CONTROLS', 'CONVERTING', 'COPY', 'CORR', 'CORRESPONDING', 'COUNT', 'CRT', - 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE', 'DEBUGGING', - 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED', + 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE', + 'DEBUGGING', 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED', 'DELIMITER', 'DEPENDING', 'DESCENDING', 'DETAIL', 'DISK', 'DOWN', 'DUPLICATES', 'DYNAMIC', 'EBCDIC', 'ENTRY', 'ENVIRONMENT-NAME', 'ENVIRONMENT-VALUE', 'EOL', 'EOP', 'EOS', 'ERASE', 'ERROR', 'ESCAPE', 'EXCEPTION', - 'EXCLUSIVE', 'EXTEND', 'EXTERNAL', - 'FILE-ID', 'FILLER', 'FINAL', 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT', - 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL', 'FUNCTION', - 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP', + 'EXCLUSIVE', 'EXTEND', 'EXTERNAL', 'FILE-ID', 'FILLER', 'FINAL', + 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT', + 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL', + 'FUNCTION', 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP', 'HEADING', 'HIGHLIGHT', 'I-O', 'ID', 'IGNORE', 'IGNORING', 'IN', 'INDEX', 'INDEXED', 'INDICATE', - 'INITIAL', 'INITIALIZED', 'INPUT', - 'INTO', 'INTRINSIC', 'INVALID', 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL', + 'INITIAL', 'INITIALIZED', 'INPUT', 'INTO', 'INTRINSIC', 'INVALID', + 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL', 'LAST', 'LEADING', 'LEFT', 'LENGTH', 'LIMIT', 'LIMITS', 'LINAGE', 'LINAGE-COUNTER', 'LINE', 'LINES', 'LOCALE', 'LOCK', - 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE', - 'MULTIPLE', 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE', - 'NEGATIVE', 'NEXT', 'NO', 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC', - 'NUMERIC-EDITED', 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY', + 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE', 'MULTIPLE', + 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE', 'NEGATIVE', 'NEXT', 'NO', + 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC', 'NUMERIC-EDITED', + 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY', 'OPTIONAL', 'ORDER', 'ORGANIZATION', 'OTHER', 'OUTPUT', 'OVERFLOW', 'OVERLINE', 'PACKED-DECIMAL', 'PADDING', 'PAGE', 'PARAGRAPH', 'PLUS', 'POINTER', 'POSITION', 'POSITIVE', 'PRESENT', 'PREVIOUS', @@ -137,40 +137,42 @@ class CobolLexer(RegexLexer): 'UNSIGNED-INT', 'UNSIGNED-LONG', 'UNSIGNED-SHORT', 'UNTIL', 'UP', 'UPDATE', 'UPON', 'USAGE', 'USING', 'VALUE', 'VALUES', 'VARYING', 'WAIT', 'WHEN', 'WITH', 'WORDS', 'YYYYDDD', 'YYYYMMDD'), - prefix=r'(^|(?<=[^0-9a-z_\-]))', suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Keyword.Pseudo), # inactive reserved words (words(( - 'ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE', 'B-AND', - 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER', 'CF', 'CH', 'CHAIN', 'CLASS-ID', - 'CLASSIFICATION', 'COMMUNICATION', 'CONDITION', 'DATA-POINTER', - 'DESTINATION', 'DISABLE', 'EC', 'EGI', 'EMI', 'ENABLE', 'END-RECEIVE', - 'ENTRY-CONVENTION', 'EO', 'ESI', 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY', - 'FLOAT-BINARY-16', 'FLOAT-BINARY-34', 'FLOAT-BINARY-7', - 'FLOAT-DECIMAL-16', 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT', - 'FUNCTION-POINTER', 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY', - 'INHERITS', 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE', + 'ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE', + 'B-AND', 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER', + 'CF', 'CH', 'CHAIN', 'CLASS-ID', 'CLASSIFICATION', 'COMMUNICATION', + 'CONDITION', 'DATA-POINTER', 'DESTINATION', 'DISABLE', 'EC', 'EGI', + 'EMI', 'ENABLE', 'END-RECEIVE', 'ENTRY-CONVENTION', 'EO', 'ESI', + 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY', 'FLOAT-BINARY-16', + 'FLOAT-BINARY-34', 'FLOAT-BINARY-7', 'FLOAT-DECIMAL-16', + 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT', 'FUNCTION-POINTER', + 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY', 'INHERITS', + 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE', 'LC_CTYPE', 'LC_MESSAGES', 'LC_MONETARY', 'LC_NUMERIC', 'LC_TIME', - 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE', 'NORMAL', - 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE', 'PAGE-COUNTER', 'PF', 'PH', - 'PROPERTY', 'PROTOTYPE', 'PURGE', 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE', - 'RELATION', 'REPLACE', 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY', - 'RF', 'RH', 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT', 'STEP', - 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3', 'SUPER', 'SYMBOL', - 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT', 'TYPEDEF', 'UCS-4', 'UNIVERSAL', - 'USER-DEFAULT', 'UTF-16', 'UTF-8', 'VAL-STATUS', 'VALID', 'VALIDATE', - 'VALIDATE-STATUS'), - prefix=r'(^|(?<=[^0-9a-z_\-]))', suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE', + 'NORMAL', 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE', + 'PAGE-COUNTER', 'PF', 'PH', 'PROPERTY', 'PROTOTYPE', 'PURGE', + 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE', 'RELATION', 'REPLACE', + 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY', 'RF', 'RH', + 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT', + 'STEP', 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3', + 'SUPER', 'SYMBOL', 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT', + 'TYPEDEF', 'UCS-4', 'UNIVERSAL', 'USER-DEFAULT', 'UTF-16', 'UTF-8', + 'VAL-STATUS', 'VALID', 'VALIDATE', 'VALIDATE-STATUS'), + prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Error), # Data Types - (r'(^|(?<=[^0-9a-z_\-]))' + (r'(^|(?<=[^\w\-]))' r'(PIC\s+.+?(?=(\s|\.\s))|PICTURE\s+.+?(?=(\s|\.\s))|' r'(COMPUTATIONAL)(-[1-5X])?|(COMP)(-[1-5X])?|' r'BINARY-C-LONG|' r'BINARY-CHAR|BINARY-DOUBLE|BINARY-LONG|BINARY-SHORT|' - r'BINARY)\s*($|(?=[^0-9a-z_\-]))', Keyword.Type), + r'BINARY)\s*($|(?=[^\w\-]))', Keyword.Type), # Operators (r'(\*\*|\*|\+|-|/|<=|>=|<|>|==|/=|=)', Operator), @@ -180,7 +182,7 @@ class CobolLexer(RegexLexer): (r'([(),;:&%.])', Punctuation), # Intrinsics - (r'(^|(?<=[^0-9a-z_\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|' + (r'(^|(?<=[^\w\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|' r'CHAR|COMBINED-DATETIME|CONCATENATE|COS|CURRENT-DATE|' r'DATE-OF-INTEGER|DATE-TO-YYYYMMDD|DAY-OF-INTEGER|DAY-TO-YYYYDDD|' r'EXCEPTION-(?:FILE|LOCATION|STATEMENT|STATUS)|EXP10|EXP|E|' @@ -192,13 +194,13 @@ class CobolLexer(RegexLexer): r'STANDARD-DEVIATION|STORED-CHAR-LENGTH|SUBSTITUTE(?:-CASE)?|' r'SUM|TAN|TEST-DATE-YYYYMMDD|TEST-DAY-YYYYDDD|TRIM|' r'UPPER-CASE|VARIANCE|WHEN-COMPILED|YEAR-TO-YYYY)\s*' - r'($|(?=[^0-9a-z_\-]))', Name.Function), + r'($|(?=[^\w\-]))', Name.Function), # Booleans - (r'(^|(?<=[^0-9a-z_\-]))(true|false)\s*($|(?=[^0-9a-z_\-]))', Name.Builtin), + (r'(^|(?<=[^\w\-]))(true|false)\s*($|(?=[^\w\-]))', Name.Builtin), # Comparing Operators - (r'(^|(?<=[^0-9a-z_\-]))(equal|equals|ne|lt|le|gt|ge|' - r'greater|less|than|not|and|or)\s*($|(?=[^0-9a-z_\-]))', Operator.Word), + (r'(^|(?<=[^\w\-]))(equal|equals|ne|lt|le|gt|ge|' + r'greater|less|than|not|and|or)\s*($|(?=[^\w\-]))', Operator.Word), ], # \"[^\"\n]*\"|\'[^\'\n]*\' @@ -439,15 +441,15 @@ class OpenEdgeLexer(RegexLexer): filenames = ['*.p', '*.cls'] mimetypes = ['text/x-openedge', 'application/x-openedge'] - types = (r'(?i)(^|(?<=[^0-9a-z_\-]))(CHARACTER|CHAR|CHARA|CHARAC|CHARACT|CHARACTE|' + types = (r'(?i)(^|(?<=[^\w\-]))(CHARACTER|CHAR|CHARA|CHARAC|CHARACT|CHARACTE|' r'COM-HANDLE|DATE|DATETIME|DATETIME-TZ|' r'DECIMAL|DEC|DECI|DECIM|DECIMA|HANDLE|' r'INT64|INTEGER|INT|INTE|INTEG|INTEGE|' - r'LOGICAL|LONGCHAR|MEMPTR|RAW|RECID|ROWID)\s*($|(?=[^0-9a-z_\-]))') + r'LOGICAL|LONGCHAR|MEMPTR|RAW|RECID|ROWID)\s*($|(?=[^\w\-]))') keywords = words(OPENEDGEKEYWORDS, - prefix=r'(?i)(^|(?<=[^0-9a-z_\-]))', - suffix=r'\s*($|(?=[^0-9a-z_\-]))') + prefix=r'(?i)(^|(?<=[^\w\-]))', + suffix=r'\s*($|(?=[^\w\-]))') tokens = { 'root': [ diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index 5c724d03..632871ba 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -50,8 +50,9 @@ class CFamilyLexer(RegexLexer): (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ - (r'L?"', String, 'string'), - (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), + (r'(L?)(")', bygroups(String.Affix, String), 'string'), + (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", + bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), @@ -123,7 +124,8 @@ class CFamilyLexer(RegexLexer): (r'\\', String), # stray backslash ], 'macro': [ - (r'(include)(' + _ws1 + ')([^\n]+)', bygroups(Comment.Preproc, Text, Comment.PreprocFile)), + (r'(include)(' + _ws1 + r')([^\n]+)', + bygroups(Comment.Preproc, Text, Comment.PreprocFile)), (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), @@ -217,7 +219,11 @@ class CppLexer(CFamilyLexer): (r'char(16_t|32_t)\b', Keyword.Type), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), # C++11 raw strings - (r'R"\(', String, 'rawstring'), + (r'(R)(")([^\\()\s]{,16})(\()((?:.|\n)*?)(\)\3)(")', + bygroups(String.Affix, String, String.Delimiter, String.Delimiter, + String, String.Delimiter, String)), + # C++11 UTF-8/16/32 strings + (r'(u8|u|U)(")', bygroups(String.Affix, String), 'string'), inherit, ], 'root': [ @@ -234,11 +240,6 @@ class CppLexer(CFamilyLexer): # template specification (r'\s*(?=>)', Text, '#pop'), ], - 'rawstring': [ - (r'\)"', String, '#pop'), - (r'[^)]+', String), - (r'\)', String), - ], } def analyse_text(text): diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py new file mode 100644 index 00000000..a3e81534 --- /dev/null +++ b/pygments/lexers/clean.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.clean + ~~~~~~~~~~~~~~~~~~~~~ + + Lexer for the Clean language. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import ExtendedRegexLexer, LexerContext, \ + bygroups, words, include, default +from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, \ + Punctuation, String, Text, Whitespace + +__all__ = ['CleanLexer'] + + +class CleanLexer(ExtendedRegexLexer): + """ + Lexer for the general purpose, state-of-the-art, pure and lazy functional + programming language Clean (http://clean.cs.ru.nl/Clean). + + .. versionadded: 2.2 + """ + name = 'Clean' + aliases = ['clean'] + filenames = ['*.icl', '*.dcl'] + + def get_tokens_unprocessed(self, text=None, context=None): + ctx = LexerContext(text, 0) + ctx.indent = 0 + return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context=ctx) + + def check_class_not_import(lexer, match, ctx): + if match.group(0) == 'import': + yield match.start(), Keyword.Namespace, match.group(0) + ctx.stack = ctx.stack[:-1] + ['fromimportfunc'] + else: + yield match.start(), Name.Class, match.group(0) + ctx.pos = match.end() + + def check_instance_class(lexer, match, ctx): + if match.group(0) == 'instance' or match.group(0) == 'class': + yield match.start(), Keyword, match.group(0) + else: + yield match.start(), Name.Function, match.group(0) + ctx.stack = ctx.stack + ['fromimportfunctype'] + ctx.pos = match.end() + + @staticmethod + def indent_len(text): + # Tabs are four spaces: + # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt + text = text.replace('\n', '') + return len(text.replace('\t', ' ')), len(text) + + def store_indent(lexer, match, ctx): + ctx.indent, _ = CleanLexer.indent_len(match.group(0)) + ctx.pos = match.end() + yield match.start(), Text, match.group(0) + + def check_indent1(lexer, match, ctx): + indent, reallen = CleanLexer.indent_len(match.group(0)) + if indent > ctx.indent: + yield match.start(), Whitespace, match.group(0) + ctx.pos = match.start() + reallen + 1 + else: + ctx.indent = 0 + ctx.pos = match.start() + ctx.stack = ctx.stack[:-1] + yield match.start(), Whitespace, match.group(0)[1:] + + def check_indent2(lexer, match, ctx): + indent, reallen = CleanLexer.indent_len(match.group(0)) + if indent > ctx.indent: + yield match.start(), Whitespace, match.group(0) + ctx.pos = match.start() + reallen + 1 + else: + ctx.indent = 0 + ctx.pos = match.start() + ctx.stack = ctx.stack[:-2] + + def check_indent3(lexer, match, ctx): + indent, reallen = CleanLexer.indent_len(match.group(0)) + if indent > ctx.indent: + yield match.start(), Whitespace, match.group(0) + ctx.pos = match.start() + reallen + 1 + else: + ctx.indent = 0 + ctx.pos = match.start() + ctx.stack = ctx.stack[:-3] + yield match.start(), Whitespace, match.group(0)[1:] + if match.group(0) == '\n\n': + ctx.pos = ctx.pos + 1 + + def skip(lexer, match, ctx): + ctx.stack = ctx.stack[:-1] + ctx.pos = match.end() + yield match.start(), Comment, match.group(0) + + keywords = ('class', 'instance', 'where', 'with', 'let', 'let!', 'with', + 'in', 'case', 'of', 'infix', 'infixr', 'infixl', 'generic', + 'derive', 'otherwise', 'code', 'inline') + + tokens = { + 'common': [ + (r';', Punctuation, '#pop'), + (r'//', Comment, 'singlecomment'), + ], + 'root': [ + # Comments + (r'//.*\n', Comment.Single), + (r'(?s)/\*\*.*?\*/', Comment.Special), + (r'(?s)/\*.*?\*/', Comment.Multi), + + # Modules, imports, etc. + (r'\b((?:implementation|definition|system)\s+)?(module)(\s+)([\w`]+)', + bygroups(Keyword.Namespace, Keyword.Namespace, Text, Name.Class)), + (r'(?<=\n)import(?=\s)', Keyword.Namespace, 'import'), + (r'(?<=\n)from(?=\s)', Keyword.Namespace, 'fromimport'), + + # Keywords + # We cannot use (?s)^|(?<=\s) as prefix, so need to repeat this + (words(keywords, prefix=r'(?<=\s)', suffix=r'(?=\s)'), Keyword), + (words(keywords, prefix=r'^', suffix=r'(?=\s)'), Keyword), + + # Function definitions + (r'(?=\{\|)', Whitespace, 'genericfunction'), + (r'(?<=\n)([ \t]*)([\w`$()=\-<>~*\^|+&%]+)((?:\s+[\w])*)(\s*)(::)', + bygroups(store_indent, Name.Function, Keyword.Type, Whitespace, + Punctuation), + 'functiondefargs'), + + # Type definitions + (r'(?<=\n)([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), + (r'^([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), + + # Literals + (r'\'\\?.(?<!\\)\'', String.Char), + (r'\'\\\d+\'', String.Char), + (r'\'\\\\\'', String.Char), # (special case for '\\') + (r'[+\-~]?\s*\d+\.\d+(E[+\-~]?\d+)?\b', Number.Float), + (r'[+\-~]?\s*0[0-7]\b', Number.Oct), + (r'[+\-~]?\s*0x[0-9a-fA-F]\b', Number.Hex), + (r'[+\-~]?\s*\d+\b', Number.Integer), + (r'"', String.Double, 'doubleqstring'), + (words(('True', 'False'), prefix=r'(?<=\s)', suffix=r'(?=\s)'), + Literal), + + # Everything else is some name + (r'([\w`$%]+\.?)*[\w`$%]+', Name), + + # Punctuation + (r'[{}()\[\],:;.#]', Punctuation), + (r'[+\-=!<>|&~*\^/]', Operator), + (r'\\\\', Operator), + + # Lambda expressions + (r'\\.*?(->|\.|=)', Name.Function), + + # Whitespace + (r'\s', Whitespace), + + include('common'), + ], + 'fromimport': [ + include('common'), + (r'([\w`]+)', check_class_not_import), + (r'\n', Whitespace, '#pop'), + (r'\s', Whitespace), + ], + 'fromimportfunc': [ + include('common'), + (r'([\w`$()=\-<>~*\^|+&%]+)', check_instance_class), + (r',', Punctuation), + (r'\n', Whitespace, '#pop'), + (r'\s', Whitespace), + ], + 'fromimportfunctype': [ + include('common'), + (r'[{(\[]', Punctuation, 'combtype'), + (r',', Punctuation, '#pop'), + (r'[:;.#]', Punctuation), + (r'\n', Whitespace, '#pop:2'), + (r'[^\S\n]+', Whitespace), + (r'\S+', Keyword.Type), + ], + 'combtype': [ + include('common'), + (r'[})\]]', Punctuation, '#pop'), + (r'[{(\[]', Punctuation, '#pop'), + (r'[,:;.#]', Punctuation), + (r'\s+', Whitespace), + (r'\S+', Keyword.Type), + ], + 'import': [ + include('common'), + (words(('from', 'import', 'as', 'qualified'), + prefix='(?<=\s)', suffix='(?=\s)'), Keyword.Namespace), + (r'[\w`]+', Name.Class), + (r'\n', Whitespace, '#pop'), + (r',', Punctuation), + (r'[^\S\n]+', Whitespace), + ], + 'singlecomment': [ + (r'(.)(?=\n)', skip), + (r'.+(?!\n)', Comment), + ], + 'doubleqstring': [ + (r'[^\\"]+', String.Double), + (r'"', String.Double, '#pop'), + (r'\\.', String.Double), + ], + 'typedef': [ + include('common'), + (r'[\w`]+', Keyword.Type), + (r'[:=|(),\[\]{}!*]', Punctuation), + (r'->', Punctuation), + (r'\n(?=[^\s|])', Whitespace, '#pop'), + (r'\s', Whitespace), + (r'.', Keyword.Type), + ], + 'genericfunction': [ + include('common'), + (r'\{\|', Punctuation), + (r'\|\}', Punctuation, '#pop'), + (r',', Punctuation), + (r'->', Punctuation), + (r'(\s+of\s+)(\{)', bygroups(Keyword, Punctuation), 'genericftypes'), + (r'\s', Whitespace), + (r'[\w`]+', Keyword.Type), + (r'[*()]', Punctuation), + ], + 'genericftypes': [ + include('common'), + (r'[\w`]+', Keyword.Type), + (r',', Punctuation), + (r'\s', Whitespace), + (r'\}', Punctuation, '#pop'), + ], + 'functiondefargs': [ + include('common'), + (r'\n(\s*)', check_indent1), + (r'[!{}()\[\],:;.#]', Punctuation), + (r'->', Punctuation, 'functiondefres'), + (r'^(?=\S)', Whitespace, '#pop'), + (r'\S', Keyword.Type), + (r'\s', Whitespace), + ], + 'functiondefres': [ + include('common'), + (r'\n(\s*)', check_indent2), + (r'^(?=\S)', Whitespace, '#pop:2'), + (r'[!{}()\[\],:;.#]', Punctuation), + (r'\|', Punctuation, 'functiondefclasses'), + (r'\S', Keyword.Type), + (r'\s', Whitespace), + ], + 'functiondefclasses': [ + include('common'), + (r'\n(\s*)', check_indent3), + (r'^(?=\S)', Whitespace, '#pop:3'), + (r'[,&]', Punctuation), + (r'[\w`$()=\-<>~*\^|+&%]', Name.Function, 'functionname'), + (r'\s', Whitespace), + ], + 'functionname': [ + include('common'), + (r'[\w`$()=\-<>~*\^|+&%]+', Name.Function), + (r'(?=\{\|)', Punctuation, 'genericfunction'), + default('#pop'), + ] + } diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 77c7714d..9cc291e5 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -42,7 +42,7 @@ class IniLexer(RegexLexer): bygroups(Name.Attribute, Text, Operator, Text, String)), # standalone option, supported by some INI parsers (r'(.+?)$', Name.Attribute), - ] + ], } def analyse_text(text): @@ -600,7 +600,7 @@ class TerraformLexer(RegexLexer): (r'(".*")', bygroups(String.Double)), ], 'punctuation': [ - (r'[\[\]\(\),.]', Punctuation), + (r'[\[\](),.]', Punctuation), ], # Keep this seperate from punctuation - we sometimes want to use different # Tokens for { } @@ -631,9 +631,8 @@ class TermcapLexer(RegexLexer): .. versionadded:: 2.1 """ name = 'Termcap' - aliases = ['termcap',] - - filenames = ['termcap', 'termcap.src',] + aliases = ['termcap'] + filenames = ['termcap', 'termcap.src'] mimetypes = [] # NOTE: @@ -644,13 +643,13 @@ class TermcapLexer(RegexLexer): tokens = { 'root': [ (r'^#.*$', Comment), - (r'^[^\s#:\|]+', Name.Tag, 'names'), + (r'^[^\s#:|]+', Name.Tag, 'names'), ], 'names': [ (r'\n', Text, '#pop'), (r':', Punctuation, 'defs'), (r'\|', Punctuation), - (r'[^:\|]+', Name.Attribute), + (r'[^:|]+', Name.Attribute), ], 'defs': [ (r'\\\n[ \t]*', Text), @@ -678,9 +677,8 @@ class TerminfoLexer(RegexLexer): .. versionadded:: 2.1 """ name = 'Terminfo' - aliases = ['terminfo',] - - filenames = ['terminfo', 'terminfo.src',] + aliases = ['terminfo'] + filenames = ['terminfo', 'terminfo.src'] mimetypes = [] # NOTE: @@ -691,13 +689,13 @@ class TerminfoLexer(RegexLexer): tokens = { 'root': [ (r'^#.*$', Comment), - (r'^[^\s#,\|]+', Name.Tag, 'names'), + (r'^[^\s#,|]+', Name.Tag, 'names'), ], 'names': [ (r'\n', Text, '#pop'), (r'(,)([ \t]*)', bygroups(Punctuation, Text), 'defs'), (r'\|', Punctuation), - (r'[^,\|]+', Name.Attribute), + (r'[^,|]+', Name.Attribute), ], 'defs': [ (r'\n[ \t]+', Text), @@ -726,8 +724,8 @@ class PkgConfigLexer(RegexLexer): """ name = 'PkgConfig' - aliases = ['pkgconfig',] - filenames = ['*.pc',] + aliases = ['pkgconfig'] + filenames = ['*.pc'] mimetypes = [] tokens = { @@ -793,8 +791,8 @@ class PacmanConfLexer(RegexLexer): """ name = 'PacmanConf' - aliases = ['pacmanconf',] - filenames = ['pacman.conf',] + aliases = ['pacmanconf'] + filenames = ['pacman.conf'] mimetypes = [] tokens = { @@ -822,7 +820,7 @@ class PacmanConfLexer(RegexLexer): '%u', # url ), suffix=r'\b'), Name.Variable), - + # fallback (r'.', Text), ], diff --git a/pygments/lexers/csound.py b/pygments/lexers/csound.py index 51414073..95ee73d8 100644 --- a/pygments/lexers/csound.py +++ b/pygments/lexers/csound.py @@ -9,7 +9,7 @@ :license: BSD, see LICENSE for details. """ -import copy, re +import re from pygments.lexer import RegexLexer, bygroups, default, include, using, words from pygments.token import Comment, Keyword, Name, Number, Operator, Punctuation, \ @@ -21,7 +21,7 @@ from pygments.lexers.scripting import LuaLexer __all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer'] -newline = (r'((?:;|//).*)*(\n)', bygroups(Comment.Single, Text)) +newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text)) class CsoundLexer(RegexLexer): @@ -177,7 +177,7 @@ class CsoundOrchestraLexer(CsoundLexer): (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+', Number.Integer), (r'"', String, 'single-line string'), - (r'{{', String, 'multi-line string'), + (r'\{\{', String, 'multi-line string'), (r'[+\-*/%^!=&|<>#~¬]', Operator), (r'[](),?:[]', Punctuation), (words(( @@ -273,40 +273,40 @@ class CsoundOrchestraLexer(CsoundLexer): (r'[\\"~$%\^\n]', String) ], 'multi-line string': [ - (r'}}', String, '#pop'), - (r'[^\}]+|\}(?!\})', String) + (r'\}\}', String, '#pop'), + (r'[^}]+|\}(?!\})', String) ], 'scoreline opcode': [ include('whitespace or macro call'), - (r'{{', String, 'scoreline'), + (r'\{\{', String, 'scoreline'), default('#pop') ], 'scoreline': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(CsoundScoreLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(CsoundScoreLexer)) ], 'python opcode': [ include('whitespace or macro call'), - (r'{{', String, 'python'), + (r'\{\{', String, 'python'), default('#pop') ], 'python': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(PythonLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(PythonLexer)) ], 'lua opcode': [ include('whitespace or macro call'), (r'"', String, 'single-line string'), - (r'{{', String, 'lua'), + (r'\{\{', String, 'lua'), (r',', Punctuation), default('#pop') ], 'lua': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(LuaLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(LuaLexer)) ] } @@ -315,7 +315,7 @@ class CsoundDocumentLexer(RegexLexer): """ For `Csound <http://csound.github.io>`_ documents. - + .. versionadded:: 2.1 """ name = 'Csound Document' diff --git a/pygments/lexers/css.py b/pygments/lexers/css.py index b40201f4..6c585dfa 100644 --- a/pygments/lexers/css.py +++ b/pygments/lexers/css.py @@ -476,8 +476,8 @@ class ScssLexer(RegexLexer): (r'@[\w-]+', Keyword, 'selector'), (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'), # TODO: broken, and prone to infinite loops. - #(r'(?=[^;{}][;}])', Name.Attribute, 'attr'), - #(r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), + # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'), + # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), default('selector'), ], @@ -518,7 +518,7 @@ class LessCssLexer(CssLexer): inherit, ], 'content': [ - (r'{', Punctuation, '#push'), + (r'\{', Punctuation, '#push'), inherit, ], } diff --git a/pygments/lexers/diff.py b/pygments/lexers/diff.py index d3b1589d..726b49ad 100644 --- a/pygments/lexers/diff.py +++ b/pygments/lexers/diff.py @@ -9,11 +9,13 @@ :license: BSD, see LICENSE for details. """ +import re + from pygments.lexer import RegexLexer, include, bygroups from pygments.token import Text, Comment, Operator, Keyword, Name, Generic, \ Literal -__all__ = ['DiffLexer', 'DarcsPatchLexer'] +__all__ = ['DiffLexer', 'DarcsPatchLexer', 'WDiffLexer'] class DiffLexer(RegexLexer): @@ -104,3 +106,60 @@ class DarcsPatchLexer(RegexLexer): (r'[^\n\[]+', Generic.Deleted), ], } + + +class WDiffLexer(RegexLexer): + """ + A `wdiff <https://www.gnu.org/software/wdiff/>`_ lexer. + + Note that: + + * only to normal output (without option like -l). + * if target files of wdiff contain "[-", "-]", "{+", "+}", + especially they are unbalanced, this lexer will get confusing. + + .. versionadded:: 2.2 + """ + + name = 'WDiff' + aliases = ['wdiff'] + filenames = ['*.wdiff'] + mimetypes = [] + + flags = re.MULTILINE | re.DOTALL + + # We can only assume "[-" after "[-" before "-]" is `nested`, + # for instance wdiff to wdiff outputs. We have no way to + # distinct these marker is of wdiff output from original text. + + ins_op = r"\{\+" + ins_cl = r"\+\}" + del_op = r"\[\-" + del_cl = r"\-\]" + normal = r'[^{}[\]+-]+' # for performance + tokens = { + 'root': [ + (ins_op, Generic.Inserted, 'inserted'), + (del_op, Generic.Deleted, 'deleted'), + (normal, Text), + (r'.', Text), + ], + 'inserted': [ + (ins_op, Generic.Inserted, '#push'), + (del_op, Generic.Inserted, '#push'), + (del_cl, Generic.Inserted, '#pop'), + + (ins_cl, Generic.Inserted, '#pop'), + (normal, Generic.Inserted), + (r'.', Generic.Inserted), + ], + 'deleted': [ + (del_op, Generic.Deleted, '#push'), + (ins_op, Generic.Deleted, '#push'), + (ins_cl, Generic.Deleted, '#pop'), + + (del_cl, Generic.Deleted, '#pop'), + (normal, Generic.Deleted), + (r'.', Generic.Deleted), + ], + } diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index eac4b5e5..11b4573e 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -11,7 +11,7 @@ import re from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \ - using, this, default + using, this, default, words from pygments.token import Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other from pygments.util import get_choice_opt, iteritems @@ -375,8 +375,8 @@ class VbNetLexer(RegexLexer): filenames = ['*.vb', '*.bas'] mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?) - uni_name = '[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + \ - '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', + uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \ + '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*' flags = re.MULTILINE | re.IGNORECASE @@ -394,25 +394,26 @@ class VbNetLexer(RegexLexer): (r'[(){}!#,.:]', Punctuation), (r'Option\s+(Strict|Explicit|Compare)\s+' r'(On|Off|Binary|Text)', Keyword.Declaration), - (r'(?<!\.)(AddHandler|Alias|' - r'ByRef|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|' - r'CDec|CDbl|CInt|CLng|CObj|Continue|CSByte|CShort|' - r'CSng|CStr|CType|CUInt|CULng|CUShort|Declare|' - r'Default|Delegate|DirectCast|Do|Each|Else|ElseIf|' - r'EndIf|Erase|Error|Event|Exit|False|Finally|For|' - r'Friend|Get|Global|GoSub|GoTo|Handles|If|' - r'Implements|Inherits|Interface|' - r'Let|Lib|Loop|Me|MustInherit|' - r'MustOverride|MyBase|MyClass|Narrowing|New|Next|' - r'Not|Nothing|NotInheritable|NotOverridable|Of|On|' - r'Operator|Option|Optional|Overloads|Overridable|' - r'Overrides|ParamArray|Partial|Private|Protected|' - r'Public|RaiseEvent|ReadOnly|ReDim|RemoveHandler|Resume|' - r'Return|Select|Set|Shadows|Shared|Single|' - r'Static|Step|Stop|SyncLock|Then|' - r'Throw|To|True|Try|TryCast|Wend|' - r'Using|When|While|Widening|With|WithEvents|' - r'WriteOnly)\b', Keyword), + (words(( + 'AddHandler', 'Alias', 'ByRef', 'ByVal', 'Call', 'Case', + 'Catch', 'CBool', 'CByte', 'CChar', 'CDate', 'CDec', 'CDbl', + 'CInt', 'CLng', 'CObj', 'Continue', 'CSByte', 'CShort', 'CSng', + 'CStr', 'CType', 'CUInt', 'CULng', 'CUShort', 'Declare', + 'Default', 'Delegate', 'DirectCast', 'Do', 'Each', 'Else', + 'ElseIf', 'EndIf', 'Erase', 'Error', 'Event', 'Exit', 'False', + 'Finally', 'For', 'Friend', 'Get', 'Global', 'GoSub', 'GoTo', + 'Handles', 'If', 'Implements', 'Inherits', 'Interface', 'Let', + 'Lib', 'Loop', 'Me', 'MustInherit', 'MustOverride', 'MyBase', + 'MyClass', 'Narrowing', 'New', 'Next', 'Not', 'Nothing', + 'NotInheritable', 'NotOverridable', 'Of', 'On', 'Operator', + 'Option', 'Optional', 'Overloads', 'Overridable', 'Overrides', + 'ParamArray', 'Partial', 'Private', 'Protected', 'Public', + 'RaiseEvent', 'ReadOnly', 'ReDim', 'RemoveHandler', 'Resume', + 'Return', 'Select', 'Set', 'Shadows', 'Shared', 'Single', + 'Static', 'Step', 'Stop', 'SyncLock', 'Then', 'Throw', 'To', + 'True', 'Try', 'TryCast', 'Wend', 'Using', 'When', 'While', + 'Widening', 'With', 'WithEvents', 'WriteOnly'), + prefix='(?<!\.)', suffix=r'\b'), Keyword), (r'(?<!\.)End\b', Keyword, 'end'), (r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'), (r'(?<!\.)(Function|Sub|Property)(\s+)', diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py index 0d55af09..e1362649 100644 --- a/pygments/lexers/dsls.py +++ b/pygments/lexers/dsls.py @@ -111,8 +111,8 @@ class ThriftLexer(RegexLexer): include('keywords'), include('numbers'), (r'[&=]', Operator), - (r'[:;\,\{\}\(\)\<>\[\]]', Punctuation), - (r'[a-zA-Z_](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*', Name), + (r'[:;,{}()<>\[\]]', Punctuation), + (r'[a-zA-Z_](\.\w|\w)*', Name), ], 'whitespace': [ (r'\n', Text.Whitespace), @@ -135,7 +135,7 @@ class ThriftLexer(RegexLexer): (r'[^\\\'\n]+', String.Single), ], 'namespace': [ - (r'[a-z\*](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*', Name.Namespace, '#pop'), + (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'), default('#pop'), ], 'class': [ diff --git a/pygments/lexers/elm.py b/pygments/lexers/elm.py index 7df6346a..cd1fb98e 100644 --- a/pygments/lexers/elm.py +++ b/pygments/lexers/elm.py @@ -46,7 +46,7 @@ class ElmLexer(RegexLexer): 'root': [ # Comments - (r'{-', Comment.Multiline, 'comment'), + (r'\{-', Comment.Multiline, 'comment'), (r'--.*', Comment.Single), # Whitespace @@ -86,20 +86,20 @@ class ElmLexer(RegexLexer): (validName, Name.Variable), # Parens - (r'[,\(\)\[\]{}]', Punctuation), + (r'[,()\[\]{}]', Punctuation), ], 'comment': [ - (r'-(?!})', Comment.Multiline), - (r'{-', Comment.Multiline, 'comment'), + (r'-(?!\})', Comment.Multiline), + (r'\{-', Comment.Multiline, 'comment'), (r'[^-}]', Comment.Multiline), - (r'-}', Comment.Multiline, '#pop'), + (r'-\}', Comment.Multiline, '#pop'), ], 'doublequote': [ - (r'\\u[0-9a-fA-F]\{4}', String.Escape), - (r'\\[nrfvb\\\"]', String.Escape), + (r'\\u[0-9a-fA-F]{4}', String.Escape), + (r'\\[nrfvb\\"]', String.Escape), (r'[^"]', String), (r'"', String, '#pop'), ], diff --git a/pygments/lexers/erlang.py b/pygments/lexers/erlang.py index 7838b3c5..93ddd2c2 100644 --- a/pygments/lexers/erlang.py +++ b/pygments/lexers/erlang.py @@ -127,7 +127,7 @@ class ErlangLexer(RegexLexer): 'string': [ (escape_re, String.Escape), (r'"', String, '#pop'), - (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), + (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol), (r'[^"\\~]+', String), (r'~', String), ], @@ -240,11 +240,11 @@ class ElixirLexer(RegexLexer): KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') BUILTIN = ( 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', - 'quote', 'unquote', 'unquote_splicing', 'throw', 'super' + 'quote', 'unquote', 'unquote_splicing', 'throw', 'super', ) BUILTIN_DECLARATION = ( 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', - 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback' + 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback', ) BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') @@ -263,7 +263,7 @@ class ElixirLexer(RegexLexer): OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') PUNCTUATION = ( - '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']' + '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']', ) def get_tokens_unprocessed(self, text): diff --git a/pygments/lexers/esoteric.py b/pygments/lexers/esoteric.py index 73ea4a4a..2b17615c 100644 --- a/pygments/lexers/esoteric.py +++ b/pygments/lexers/esoteric.py @@ -90,7 +90,7 @@ class CAmkESLexer(RegexLexer): filenames = ['*.camkes', '*.idl4'] tokens = { - 'root':[ + 'root': [ # C pre-processor directive (r'^\s*#.*\n', Comment.Preproc), @@ -99,7 +99,7 @@ class CAmkESLexer(RegexLexer): (r'/\*(.|\n)*?\*/', Comment), (r'//.*\n', Comment), - (r'[\[\(\){},\.;=\]]', Punctuation), + (r'[\[(){},.;=\]]', Punctuation), (words(('assembly', 'attribute', 'component', 'composition', 'configuration', 'connection', 'connector', 'consumes', diff --git a/pygments/lexers/felix.py b/pygments/lexers/felix.py index b7659769..9631bcc1 100644 --- a/pygments/lexers/felix.py +++ b/pygments/lexers/felix.py @@ -237,7 +237,7 @@ class FelixLexer(RegexLexer): ], 'strings': [ (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), diff --git a/pygments/lexers/fortran.py b/pygments/lexers/fortran.py index 4c22139d..e2f95b11 100644 --- a/pygments/lexers/fortran.py +++ b/pygments/lexers/fortran.py @@ -11,7 +11,7 @@ import re -from pygments.lexer import RegexLexer, bygroups, include, words, using +from pygments.lexer import RegexLexer, bygroups, include, words, using, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic @@ -191,16 +191,15 @@ class FortranFixedLexer(RegexLexer): (r'(.{5})', Name.Label, 'cont-char'), (r'.*\n', using(FortranLexer)), ], - 'cont-char': [ (' ', Text, 'code'), ('0', Comment, 'code'), - ('.', Generic.Strong, 'code') + ('.', Generic.Strong, 'code'), ], - 'code': [ (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment, Text), 'root'), (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'), - (r'', Text, 'root')] + default('root'), + ] } diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py index 95e68a33..ffc3a3a2 100644 --- a/pygments/lexers/haskell.py +++ b/pygments/lexers/haskell.py @@ -321,7 +321,7 @@ class AgdaLexer(RegexLexer): 'module': [ (r'\{-', Comment.Multiline, 'comment'), (r'[a-zA-Z][\w.]*', Name, '#pop'), - (r'[^a-zA-Z]+', Text) + (r'[\W0-9_]+', Text) ], 'comment': HaskellLexer.tokens['comment'], 'character': HaskellLexer.tokens['character'], diff --git a/pygments/lexers/idl.py b/pygments/lexers/idl.py index d745bcfd..a0b39492 100644 --- a/pygments/lexers/idl.py +++ b/pygments/lexers/idl.py @@ -258,12 +258,13 @@ class IDLLexer(RegexLexer): (r'\b(mod|lt|le|eq|ne|ge|gt|not|and|or|xor)\b', Operator), (r'"[^\"]*"', String.Double), (r"'[^\']*'", String.Single), - (r'\b[\+\-]?([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(D|E)?([\+\-]?[0-9]+)?\b', Number.Float), - (r'\b\'[\+\-]?[0-9A-F]+\'X(U?(S?|L{1,2})|B)\b', Number.Hex), - (r'\b\'[\+\-]?[0-7]+\'O(U?(S?|L{1,2})|B)\b', Number.Oct), - (r'\b[\+\-]?[0-9]+U?L{1,2}\b', Number.Integer.Long), - (r'\b[\+\-]?[0-9]+U?S?\b', Number.Integer), - (r'\b[\+\-]?[0-9]+B\b', Number), + (r'\b[+\-]?([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(D|E)?([+\-]?[0-9]+)?\b', + Number.Float), + (r'\b\'[+\-]?[0-9A-F]+\'X(U?(S?|L{1,2})|B)\b', Number.Hex), + (r'\b\'[+\-]?[0-7]+\'O(U?(S?|L{1,2})|B)\b', Number.Oct), + (r'\b[+\-]?[0-9]+U?L{1,2}\b', Number.Integer.Long), + (r'\b[+\-]?[0-9]+U?S?\b', Number.Integer), + (r'\b[+\-]?[0-9]+B\b', Number), (r'.', Text), ] } diff --git a/pygments/lexers/j.py b/pygments/lexers/j.py index 278374e5..f15595f8 100644 --- a/pygments/lexers/j.py +++ b/pygments/lexers/j.py @@ -48,7 +48,7 @@ class JLexer(RegexLexer): # Definitions (r'0\s+:\s*0|noun\s+define\s*$', Name.Entity, 'nounDefinition'), - (r'\b(([1-4]|13)\s+:\s*0)|((adverb|conjunction|dyad|monad|verb)\s+define)\b', + (r'(([1-4]|13)\s+:\s*0|(adverb|conjunction|dyad|monad|verb)\s+define)\b', Name.Function, 'explicitDefinition'), # Flow Control diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index 2a01cd42..5dca6832 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -97,13 +97,13 @@ class JavascriptLexer(RegexLexer): (r'`', String.Backtick, '#pop'), (r'\\\\', String.Backtick), (r'\\`', String.Backtick), - (r'\${', String.Interpol, 'interp-inside'), + (r'\$\{', String.Interpol, 'interp-inside'), (r'\$', String.Backtick), (r'[^`\\$]+', String.Backtick), ], 'interp-inside': [ # TODO: should this include single-line comments and allow nesting strings? - (r'}', String.Interpol, '#pop'), + (r'\}', String.Interpol, '#pop'), include('root'), ], # (\\\\|\\`|[^`])*`', String.Backtick), @@ -1245,32 +1245,32 @@ class EarlGreyLexer(RegexLexer): include('control'), (r'[^\S\n]+', Text), (r';;.*\n', Comment), - (r'[\[\]\{\}\:\(\)\,\;]', Punctuation), + (r'[\[\]{}:(),;]', Punctuation), (r'\\\n', Text), (r'\\', Text), include('errors'), (words(( 'with', 'where', 'when', 'and', 'not', 'or', 'in', 'as', 'of', 'is'), - prefix=r'(?<=\s|\[)', suffix=r'(?![\w\$\-])'), + prefix=r'(?<=\s|\[)', suffix=r'(?![\w$\-])'), Operator.Word), - (r'[\*@]?->', Name.Function), + (r'[*@]?->', Name.Function), (r'[+\-*/~^<>%&|?!@#.]*=', Operator.Word), (r'\.{2,3}', Operator.Word), # Range Operator (r'([+*/~^<>&|?!]+)|([#\-](?=\s))|@@+(?=\s)|=+', Operator), - (r'(?<![\w\$\-])(var|let)(?:[^\w\$])', Keyword.Declaration), + (r'(?<![\w$\-])(var|let)(?:[^\w$])', Keyword.Declaration), include('keywords'), include('builtins'), include('assignment'), (r'''(?x) - (?:()([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)| - (?<=[\s\{\[\(])(\.)([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)) + (?:()([a-zA-Z$_](?:[\w$\-]*[\w$])?)| + (?<=[\s{\[(])(\.)([a-zA-Z$_](?:[\w$\-]*[\w$])?)) (?=.*%)''', bygroups(Punctuation, Name.Tag, Punctuation, Name.Class.Start), 'dbs'), (r'[rR]?`', String.Backtick, 'bt'), (r'[rR]?```', String.Backtick, 'tbt'), - (r'(?<=[\s\[\{\(,;])\.([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)' - r'(?=[\s\]\}\),;])', String.Symbol), + (r'(?<=[\s\[{(,;])\.([a-zA-Z$_](?:[\w$\-]*[\w$])?)' + r'(?=[\s\]}),;])', String.Symbol), include('nested'), (r'(?:[rR]|[rR]\.[gmi]{1,3})?"', String, combined('stringescape', 'dqs')), (r'(?:[rR]|[rR]\.[gmi]{1,3})?\'', String, combined('stringescape', 'sqs')), @@ -1281,9 +1281,9 @@ class EarlGreyLexer(RegexLexer): include('numbers'), ], 'dbs': [ - (r'(\.)([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(?=[\[\.\s])', + (r'(\.)([a-zA-Z$_](?:[\w$\-]*[\w$])?)(?=[.\[\s])', bygroups(Punctuation, Name.Class.DBS)), - (r'(\[)([\^#][a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(\])', + (r'(\[)([\^#][a-zA-Z$_](?:[\w$\-]*[\w$])?)(\])', bygroups(Punctuation, Name.Entity.DBS, Punctuation)), (r'\s+', Text), (r'%', Operator.DBS, '#pop'), @@ -1293,29 +1293,29 @@ class EarlGreyLexer(RegexLexer): bygroups(Text.Whitespace, Text)), ], 'assignment': [ - (r'(\.)?([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)' + (r'(\.)?([a-zA-Z$_](?:[\w$\-]*[\w$])?)' r'(?=\s+[+\-*/~^<>%&|?!@#.]*\=\s)', bygroups(Punctuation, Name.Variable)) ], 'errors': [ (words(('Error', 'TypeError', 'ReferenceError'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'), Name.Exception), (r'''(?x) - (?<![\w\$]) - E\.[\w\$](?:[\w\$\-]*[\w\$])? - (?:\.[\w\$](?:[\w\$\-]*[\w\$])?)* - (?=[\(\{\[\?\!\s])''', + (?<![\w$]) + E\.[\w$](?:[\w$\-]*[\w$])? + (?:\.[\w$](?:[\w$\-]*[\w$])?)* + (?=[({\[?!\s])''', Name.Exception), ], 'control': [ (r'''(?x) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) + ([a-zA-Z$_](?:[\w$-]*[\w$])?) (?!\n)\s+ (?!and|as|each\*|each|in|is|mod|of|or|when|where|with) - (?=(?:[+\-*/~^<>%&|?!@#.])?[a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)''', + (?=(?:[+\-*/~^<>%&|?!@#.])?[a-zA-Z$_](?:[\w$-]*[\w$])?)''', Keyword.Control), - (r'([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(?!\n)\s+(?=[\'"\d\{\[\(])', + (r'([a-zA-Z$_](?:[\w$-]*[\w$])?)(?!\n)\s+(?=[\'"\d{\[(])', Keyword.Control), (r'''(?x) (?: @@ -1324,28 +1324,28 @@ class EarlGreyLexer(RegexLexer): (?<=with|each|with)| (?<=each\*|where) )(\s+) - ([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(:)''', + ([a-zA-Z$_](?:[\w$-]*[\w$])?)(:)''', bygroups(Text, Keyword.Control, Punctuation)), (r'''(?x) (?<![+\-*/~^<>%&|?!@#.])(\s+) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(:)''', + ([a-zA-Z$_](?:[\w$-]*[\w$])?)(:)''', bygroups(Text, Keyword.Control, Punctuation)), ], 'nested': [ (r'''(?x) - (?<=[a-zA-Z$0-9_\]\}\)])(\.) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) + (?<=[\w$\]})])(\.) + ([a-zA-Z$_](?:[\w$-]*[\w$])?) (?=\s+with(?:\s|\n))''', bygroups(Punctuation, Name.Function)), (r'''(?x) (?<!\s)(\.) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) - (?=[\}\]\)\.,;:\s])''', + ([a-zA-Z$_](?:[\w$-]*[\w$])?) + (?=[}\]).,;:\s])''', bygroups(Punctuation, Name.Field)), (r'''(?x) - (?<=[a-zA-Z$0-9_\]\}\)])(\.) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) - (?=[\[\{\(:])''', + (?<=[\w$\]})])(\.) + ([a-zA-Z$_](?:[\w$-]*[\w$])?) + (?=[\[{(:])''', bygroups(Punctuation, Name.Function)), ], 'keywords': [ @@ -1354,15 +1354,15 @@ class EarlGreyLexer(RegexLexer): 'continue', 'elif', 'expr-value', 'if', 'match', 'return', 'yield', 'pass', 'else', 'require', 'var', 'let', 'async', 'method', 'gen'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'), Keyword.Pseudo), (words(('this', 'self', '@'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$])'), Keyword.Constant), (words(( 'Function', 'Object', 'Array', 'String', 'Number', 'Boolean', 'ErrorFactory', 'ENode', 'Promise'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$])'), Keyword.Type), ], 'builtins': [ @@ -1373,20 +1373,20 @@ class EarlGreyLexer(RegexLexer): 'getChecker', 'get-checker', 'getProperty', 'get-property', 'getProjector', 'get-projector', 'consume', 'take', 'promisify', 'spawn', 'constructor'), - prefix=r'(?<![\w\-#\.])', suffix=r'(?![\w\-\.])'), + prefix=r'(?<![\w\-#.])', suffix=r'(?![\w\-.])'), Name.Builtin), (words(( 'true', 'false', 'null', 'undefined'), - prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'), + prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'), Name.Constant), ], 'name': [ - (r'@([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)', Name.Variable.Instance), - (r'([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(\+\+|\-\-)?', + (r'@([a-zA-Z$_](?:[\w$-]*[\w$])?)', Name.Variable.Instance), + (r'([a-zA-Z$_](?:[\w$-]*[\w$])?)(\+\+|\-\-)?', bygroups(Name.Symbol, Operator.Word)) ], 'tuple': [ - (r'#[a-zA-Z_][a-zA-Z_\-0-9]*(?=[\s\{\(,;\n])', Name.Namespace) + (r'#[a-zA-Z_][\w\-]*(?=[\s{(,;])', Name.Namespace) ], 'interpoling_string': [ (r'\}', String.Interpol, '#pop'), @@ -1426,7 +1426,7 @@ class EarlGreyLexer(RegexLexer): (r'```', String.Backtick, '#pop'), (r'\n', String.Backtick), (r'\^=?', String.Escape), - (r'[^\`]+', String.Backtick), + (r'[^`]+', String.Backtick), ], 'numbers': [ (r'\d+\.(?!\.)\d*([eE][+-]?[0-9]+)?', Number.Float), @@ -1434,7 +1434,7 @@ class EarlGreyLexer(RegexLexer): (r'8r[0-7]+', Number.Oct), (r'2r[01]+', Number.Bin), (r'16r[a-fA-F0-9]+', Number.Hex), - (r'([3-79]|[1-2][0-9]|3[0-6])r[a-zA-Z\d]+(\.[a-zA-Z\d]+)?', Number.Radix), + (r'([3-79]|[12][0-9]|3[0-6])r[a-zA-Z\d]+(\.[a-zA-Z\d]+)?', Number.Radix), (r'\d+', Number.Integer) ], } diff --git a/pygments/lexers/julia.py b/pygments/lexers/julia.py index d0aa6d35..9f84b8d9 100644 --- a/pygments/lexers/julia.py +++ b/pygments/lexers/julia.py @@ -11,13 +11,16 @@ import re -from pygments.lexer import Lexer, RegexLexer, bygroups, combined, do_insertions +from pygments.lexer import Lexer, RegexLexer, bygroups, combined, \ + do_insertions, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic from pygments.util import shebang_matches, unirange __all__ = ['JuliaLexer', 'JuliaConsoleLexer'] +line_re = re.compile('.*?\n') + class JuliaLexer(RegexLexer): """ @@ -32,13 +35,26 @@ class JuliaLexer(RegexLexer): flags = re.MULTILINE | re.UNICODE - builtins = [ + builtins = ( 'exit', 'whos', 'edit', 'load', 'is', 'isa', 'isequal', 'typeof', 'tuple', 'ntuple', 'uid', 'hash', 'finalizer', 'convert', 'promote', 'subtype', 'typemin', 'typemax', 'realmin', 'realmax', 'sizeof', 'eps', 'promote_type', 'method_exists', 'applicable', 'invoke', 'dlopen', 'dlsym', 'system', 'error', 'throw', 'assert', 'new', 'Inf', 'Nan', 'pi', 'im', - ] + ) + + keywords = ( + 'begin', 'while', 'for', 'in', 'return', 'break', 'continue', + 'macro', 'quote', 'let', 'if', 'elseif', 'else', 'try', 'catch', 'end', + 'bitstype', 'ccall', 'do', 'using', 'module', 'import', 'export', + 'importall', 'baremodule', 'immutable', + ) + + types = ( + 'Bool', 'Int', 'Int8', 'Int16', 'Int32', 'Int64', 'Uint', 'Uint8', 'Uint16', + 'Uint32', 'Uint64', 'Float32', 'Float64', 'Complex64', 'Complex128', 'Any', + 'Nothing', 'None', + ) tokens = { 'root': [ @@ -46,34 +62,29 @@ class JuliaLexer(RegexLexer): (r'[^\S\n]+', Text), (r'#=', Comment.Multiline, "blockcomment"), (r'#.*$', Comment), - (r'[]{}:(),;[@]', Punctuation), + (r'[\[\]{}:(),;@]', Punctuation), (r'\\\n', Text), (r'\\', Text), # keywords - (r'(begin|while|for|in|return|break|continue|' - r'macro|quote|let|if|elseif|else|try|catch|end|' - r'bitstype|ccall|do|using|module|import|export|' - r'importall|baremodule|immutable)\b', Keyword), (r'(local|global|const)\b', Keyword.Declaration), - (r'(Bool|Int|Int8|Int16|Int32|Int64|Uint|Uint8|Uint16|Uint32|Uint64' - r'|Float32|Float64|Complex64|Complex128|Any|Nothing|None)\b', - Keyword.Type), + (words(keywords, suffix=r'\b'), Keyword), + (words(types, suffix=r'\b'), Keyword.Type), # functions (r'(function)((?:\s|\\\s)+)', - bygroups(Keyword, Name.Function), 'funcname'), + bygroups(Keyword, Name.Function), 'funcname'), # types (r'(type|typealias|abstract|immutable)((?:\s|\\\s)+)', - bygroups(Keyword, Name.Class), 'typename'), + bygroups(Keyword, Name.Class), 'typename'), # operators (r'==|!=|<=|>=|->|&&|\|\||::|<:|[-~+/*%=<>&^|.?!$]', Operator), (r'\.\*|\.\^|\.\\|\.\/|\\', Operator), # builtins - ('(' + '|'.join(builtins) + r')\b', Name.Builtin), + (words(builtins, suffix=r'\b'), Name.Builtin), # backticks (r'`(?s).*?`', String.Backtick), @@ -116,12 +127,12 @@ class JuliaLexer(RegexLexer): ], 'typename': [ - ('[a-zA-Z_]\w*', Name.Class, '#pop') + ('[a-zA-Z_]\w*', Name.Class, '#pop'), ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' - r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) + r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape), ], "blockcomment": [ (r'[^=#]', Comment.Multiline), @@ -138,7 +149,7 @@ class JuliaLexer(RegexLexer): (r'\$[a-zA-Z_]+', String.Interpol), (r'\$\(', String.Interpol, 'in-intp'), # @printf and @sprintf formats - (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[diouxXeEfFgGcrs%]', + (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^$%"\\]+', String), # unhandled special signs @@ -155,9 +166,6 @@ class JuliaLexer(RegexLexer): return shebang_matches(text, r'julia') -line_re = re.compile('.*?\n') - - class JuliaConsoleLexer(Lexer): """ For Julia console sessions. Modeled after MatlabSessionLexer. diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 41fc0fdb..af7f8105 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -564,14 +564,14 @@ class IokeLexer(RegexLexer): ], 'slashRegexp': [ - (r'(?<!\\)/[oxpniums]*', String.Regex, '#pop'), + (r'(?<!\\)/[im-psux]*', String.Regex, '#pop'), include('interpolatableText'), (r'\\/', String.Regex), (r'[^/]', String.Regex) ], 'squareRegexp': [ - (r'(?<!\\)][oxpniums]*', String.Regex, '#pop'), + (r'(?<!\\)][im-psux]*', String.Regex, '#pop'), include('interpolatableText'), (r'\\]', String.Regex), (r'[^\]]', String.Regex) diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py index 84720fab..6d591e10 100644 --- a/pygments/lexers/lisp.py +++ b/pygments/lexers/lisp.py @@ -2135,49 +2135,52 @@ class ShenLexer(RegexLexer): filenames = ['*.shen'] mimetypes = ['text/x-shen', 'application/x-shen'] - DECLARATIONS = re.findall(r'\S+', """ - datatype define defmacro defprolog defcc synonyms declare package - type function - """) - - SPECIAL_FORMS = re.findall(r'\S+', """ - lambda get let if cases cond put time freeze value load $ - protect or and not do output prolog? trap-error error - make-string /. set @p @s @v - """) - - BUILTINS = re.findall(r'\S+', """ - == = * + - / < > >= <= <-address <-vector abort absvector - absvector? address-> adjoin append arity assoc bind boolean? - bound? call cd close cn compile concat cons cons? cut destroy - difference element? empty? enable-type-theory error-to-string - eval eval-kl exception explode external fail fail-if file - findall fix fst fwhen gensym get-time hash hd hdstr hdv head - identical implementation in include include-all-but inferences - input input+ integer? intern intersection is kill language - length limit lineread loaded macro macroexpand map mapcan - maxinferences mode n->string nl nth null number? occurrences - occurs-check open os out port porters pos pr preclude - preclude-all-but print profile profile-results ps quit read - read+ read-byte read-file read-file-as-bytelist - read-file-as-string read-from-string release remove return - reverse run save set simple-error snd specialise spy step - stinput stoutput str string->n string->symbol string? subst - symbol? systemf tail tc tc? thaw tl tlstr tlv track tuple? - undefmacro unify unify! union unprofile unspecialise untrack - variable? vector vector-> vector? verified version warn when - write-byte write-to-file y-or-n? - """) - - BUILTINS_ANYWHERE = re.findall(r'\S+', """ - where skip >> _ ! <e> <!> - """) + DECLARATIONS = ( + 'datatype', 'define', 'defmacro', 'defprolog', 'defcc', + 'synonyms', 'declare', 'package', 'type', 'function', + ) + + SPECIAL_FORMS = ( + 'lambda', 'get', 'let', 'if', 'cases', 'cond', 'put', 'time', 'freeze', + 'value', 'load', '$', 'protect', 'or', 'and', 'not', 'do', 'output', + 'prolog?', 'trap-error', 'error', 'make-string', '/.', 'set', '@p', + '@s', '@v', + ) + + BUILTINS = ( + '==', '=', '*', '+', '-', '/', '<', '>', '>=', '<=', '<-address', + '<-vector', 'abort', 'absvector', 'absvector?', 'address->', 'adjoin', + 'append', 'arity', 'assoc', 'bind', 'boolean?', 'bound?', 'call', 'cd', + 'close', 'cn', 'compile', 'concat', 'cons', 'cons?', 'cut', 'destroy', + 'difference', 'element?', 'empty?', 'enable-type-theory', + 'error-to-string', 'eval', 'eval-kl', 'exception', 'explode', 'external', + 'fail', 'fail-if', 'file', 'findall', 'fix', 'fst', 'fwhen', 'gensym', + 'get-time', 'hash', 'hd', 'hdstr', 'hdv', 'head', 'identical', + 'implementation', 'in', 'include', 'include-all-but', 'inferences', + 'input', 'input+', 'integer?', 'intern', 'intersection', 'is', 'kill', + 'language', 'length', 'limit', 'lineread', 'loaded', 'macro', 'macroexpand', + 'map', 'mapcan', 'maxinferences', 'mode', 'n->string', 'nl', 'nth', 'null', + 'number?', 'occurrences', 'occurs-check', 'open', 'os', 'out', 'port', + 'porters', 'pos', 'pr', 'preclude', 'preclude-all-but', 'print', 'profile', + 'profile-results', 'ps', 'quit', 'read', 'read+', 'read-byte', 'read-file', + 'read-file-as-bytelist', 'read-file-as-string', 'read-from-string', + 'release', 'remove', 'return', 'reverse', 'run', 'save', 'set', + 'simple-error', 'snd', 'specialise', 'spy', 'step', 'stinput', 'stoutput', + 'str', 'string->n', 'string->symbol', 'string?', 'subst', 'symbol?', + 'systemf', 'tail', 'tc', 'tc?', 'thaw', 'tl', 'tlstr', 'tlv', 'track', + 'tuple?', 'undefmacro', 'unify', 'unify!', 'union', 'unprofile', + 'unspecialise', 'untrack', 'variable?', 'vector', 'vector->', 'vector?', + 'verified', 'version', 'warn', 'when', 'write-byte', 'write-to-file', + 'y-or-n?', + ) + + BUILTINS_ANYWHERE = ('where', 'skip', '>>', '_', '!', '<e>', '<!>') MAPPINGS = dict((s, Keyword) for s in DECLARATIONS) MAPPINGS.update((s, Name.Builtin) for s in BUILTINS) MAPPINGS.update((s, Keyword) for s in SPECIAL_FORMS) - valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:_-]' + valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:-]' valid_name = '%s+' % valid_symbol_chars symbol_name = r'[a-z!$%%*+,<=>?/.\'@&#_-]%s*' % valid_symbol_chars variable = r'[A-Z]%s*' % valid_symbol_chars @@ -2313,7 +2316,7 @@ class CPSALexer(SchemeLexer): # valid names for identifiers # well, names can only not consist fully of numbers # but this should be good enough for now - valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+' + valid_name = r'[\w!$%&*+,/:<=>?@^~|-]+' tokens = { 'root': [ @@ -2334,7 +2337,7 @@ class CPSALexer(SchemeLexer): # strings, symbols and characters (r'"(\\\\|\\"|[^"])*"', String), (r"'" + valid_name, String.Symbol), - (r"#\\([()/'\"._!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), + (r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char), # constants (r'(#t|#f)', Name.Constant), diff --git a/pygments/lexers/modula2.py b/pygments/lexers/modula2.py index a5fcbf78..01771f55 100644 --- a/pygments/lexers/modula2.py +++ b/pygments/lexers/modula2.py @@ -290,7 +290,7 @@ class Modula2Lexer(RegexLexer): ], 'unigraph_punctuation': [ # Common Punctuation - (r'[\(\)\[\]{},.:;\|]', Punctuation), + (r'[()\[\]{},.:;|]', Punctuation), # Case Label Separator Synonym (r'!', Punctuation), # ISO # Blueprint Punctuation diff --git a/pygments/lexers/oberon.py b/pygments/lexers/oberon.py index db18259d..51dfdab6 100644 --- a/pygments/lexers/oberon.py +++ b/pygments/lexers/oberon.py @@ -47,11 +47,11 @@ class ComponentPascalLexer(RegexLexer): (r'\s+', Text), # whitespace ], 'comments': [ - (r'\(\*([^\$].*?)\*\)', Comment.Multiline), + (r'\(\*([^$].*?)\*\)', Comment.Multiline), # TODO: nested comments (* (* ... *) ... (* ... *) *) not supported! ], 'punctuation': [ - (r'[\(\)\[\]\{\},.:;\|]', Punctuation), + (r'[()\[\]{},.:;|]', Punctuation), ], 'numliterals': [ (r'[0-9A-F]+X\b', Number.Hex), # char code @@ -83,7 +83,7 @@ class ComponentPascalLexer(RegexLexer): (r'\$', Operator), ], 'identifiers': [ - (r'([a-zA-Z_\$][\w\$]*)', Name), + (r'([a-zA-Z_$][\w$]*)', Name), ], 'builtins': [ (words(( diff --git a/pygments/lexers/parasail.py b/pygments/lexers/parasail.py index 878f7d26..812e2923 100644 --- a/pygments/lexers/parasail.py +++ b/pygments/lexers/parasail.py @@ -60,7 +60,7 @@ class ParaSailLexer(RegexLexer): (r'[a-zA-Z]\w*', Name), # Operators and Punctuation (r'(<==|==>|<=>|\*\*=|<\|=|<<=|>>=|==|!=|=\?|<=|>=|' - r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\||\|=|/=|\+|-|\*|/|' + r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\|=|\||/=|\+|-|\*|/|' r'\.\.|<\.\.|\.\.<|<\.\.<)', Operator), (r'(<|>|\[|\]|\(|\)|\||:|;|,|.|\{|\}|->)', diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py index b78963d0..8df3c810 100644 --- a/pygments/lexers/perl.py +++ b/pygments/lexers/perl.py @@ -109,7 +109,8 @@ class PerlLexer(RegexLexer): 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'), Name.Builtin), (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), - (r'<<([\'"]?)([a-zA-Z_]\w*)\1;?\n.*?\n\2\n', String), + (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)', + bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)), (r'__END__', Comment.Preproc, 'end-part'), (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), diff --git a/pygments/lexers/php.py b/pygments/lexers/php.py index 257dd94f..2421738f 100644 --- a/pygments/lexers/php.py +++ b/pygments/lexers/php.py @@ -138,7 +138,9 @@ class PhpLexer(RegexLexer): ], 'php': [ (r'\?>', Comment.Preproc, '#pop'), - (r'<<<([\'"]?)(' + _ident_inner + r')\1\n.*?\n\s*\2;?\n', String), + (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)', + bygroups(String, String, String.Delimiter, String, String.Delimiter, + Punctuation, Text)), (r'\s+', Text), (r'#.*?\n', Comment.Single), (r'//.*?\n', Comment.Single), diff --git a/pygments/lexers/praat.py b/pygments/lexers/praat.py index 776c38b8..9255216d 100644 --- a/pygments/lexers/praat.py +++ b/pygments/lexers/praat.py @@ -27,21 +27,21 @@ class PraatLexer(RegexLexer): aliases = ['praat'] filenames = ['*.praat', '*.proc', '*.psc'] - keywords = [ + keywords = ( 'if', 'then', 'else', 'elsif', 'elif', 'endif', 'fi', 'for', 'from', 'to', 'endfor', 'endproc', 'while', 'endwhile', 'repeat', 'until', 'select', 'plus', 'minus', 'demo', 'assert', 'stopwatch', 'nocheck', 'nowarn', 'noprogress', 'editor', 'endeditor', 'clearinfo', - ] + ) - functions_string = [ + functions_string = ( 'backslashTrigraphsToUnicode', 'chooseDirectory', 'chooseReadFile', 'chooseWriteFile', 'date', 'demoKey', 'do', 'environment', 'extractLine', 'extractWord', 'fixed', 'info', 'left', 'mid', 'percent', 'readFile', 'replace', 'replace_regex', 'right', 'selected', 'string', 'unicodeToBackslashTrigraphs', - ] + ) - functions_numeric = [ + functions_numeric = ( 'abs', 'appendFile', 'appendFileLine', 'appendInfo', 'appendInfoLine', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'barkToHertz', 'beginPause', 'beginSendPraat', 'besselI', 'besselK', 'beta', 'beta2', @@ -67,13 +67,13 @@ class PraatLexer(RegexLexer): 'sincpi', 'sinh', 'soundPressureToPhon', 'sqrt', 'startsWith', 'studentP', 'studentQ', 'tan', 'tanh', 'variableExists', 'word', 'writeFile', 'writeFileLine', 'writeInfo', 'writeInfoLine', - ] + ) - functions_array = [ + functions_array = ( 'linear', 'randomGauss', 'randomInteger', 'randomUniform', 'zero', - ] + ) - objects = [ + objects = ( 'Activation', 'AffineTransform', 'AmplitudeTier', 'Art', 'Artword', 'Autosegment', 'BarkFilter', 'BarkSpectrogram', 'CCA', 'Categories', 'Cepstrogram', 'Cepstrum', 'Cepstrumc', 'ChebyshevSeries', 'ClassificationTable', @@ -100,17 +100,17 @@ class PraatLexer(RegexLexer): 'Strings', 'StringsIndex', 'Table', 'TableOfReal', 'TextGrid', 'TextInterval', 'TextPoint', 'TextTier', 'Tier', 'Transition', 'VocalTract', 'VocalTractTier', 'Weight', 'WordList', - ] + ) - variables_numeric = [ + variables_numeric = ( 'macintosh', 'windows', 'unix', 'praatVersion', 'pi', 'e', 'undefined', - ] + ) - variables_string = [ + variables_string = ( 'praatVersion', 'tab', 'shellDirectory', 'homeDirectory', 'preferencesDirectory', 'newline', 'temporaryDirectory', 'defaultDirectory', - ] + ) tokens = { 'root': [ @@ -151,7 +151,7 @@ class PraatLexer(RegexLexer): (r"'(?=.*')", String.Interpol, 'string_interpolated'), (r'\.{3}', Keyword, ('#pop', 'old_arguments')), (r':', Keyword, ('#pop', 'comma_list')), - (r'[\s\n]', Text, '#pop'), + (r'\s', Text, '#pop'), ], 'procedure_call': [ (r'\s+', Text), @@ -230,7 +230,7 @@ class PraatLexer(RegexLexer): bygroups(Name.Builtin, Name.Builtin, String.Interpol), ('object_attributes', 'string_interpolated')), - (r'\.?_?[a-z][a-zA-Z0-9_.]*(\$|#)?', Text), + (r'\.?_?[a-z][\w.]*(\$|#)?', Text), (r'[\[\]]', Punctuation, 'comma_list'), (r"'(?=.*')", String.Interpol, 'string_interpolated'), ], @@ -239,7 +239,7 @@ class PraatLexer(RegexLexer): (r'\b(and|or|not|div|mod)\b', Operator.Word), ], 'string_interpolated': [ - (r'\.?[_a-z][a-zA-Z0-9_.]*[\$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?', + (r'\.?[_a-z][\w.]*[$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?', String.Interpol), (r"'", String.Interpol, '#pop'), ], diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 57af00e2..7601afa8 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -39,7 +39,7 @@ class PythonLexer(RegexLexer): return [ # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), @@ -51,8 +51,10 @@ class PythonLexer(RegexLexer): tokens = { 'root': [ (r'\n', Text), - (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), - (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), + (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', + bygroups(Text, String.Affix, String.Doc)), + (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", + bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), @@ -72,14 +74,22 @@ class PythonLexer(RegexLexer): include('magicfuncs'), include('magicvars'), include('backtick'), - ('(?:[rR]|[uU][rR]|[rR][uU])"""', String.Double, 'tdqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'''", String.Single, 'tsqs'), - ('(?:[rR]|[uU][rR]|[rR][uU])"', String.Double, 'dqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'", String.Single, 'sqs'), - ('[uU]?"""', String.Double, combined('stringescape', 'tdqs')), - ("[uU]?'''", String.Single, combined('stringescape', 'tsqs')), - ('[uU]?"', String.Double, combined('stringescape', 'dqs')), - ("[uU]?'", String.Single, combined('stringescape', 'sqs')), + ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', + bygroups(String.Affix, String.Double), 'tdqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", + bygroups(String.Affix, String.Single), 'tsqs'), + ('([rR]|[uUbB][rR]|[rR][uUbB])(")', + bygroups(String.Affix, String.Double), 'dqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(')", + bygroups(String.Affix, String.Single), 'sqs'), + ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), + combined('stringescape', 'tdqs')), + ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), + combined('stringescape', 'tsqs')), + ('([uUbB]?)(")', bygroups(String.Affix, String.Double), + combined('stringescape', 'dqs')), + ("([uUbB]?)(')", bygroups(String.Affix, String.Single), + combined('stringescape', 'sqs')), include('name'), include('numbers'), ], @@ -252,16 +262,16 @@ class Python3Lexer(RegexLexer): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), # the new style '{}'.format(...) string formatting (r'\{' - '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name - '(\![sra])?' # conversion - '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?' + '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + '(\![sra])?' # conversion + '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' '\}', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time - (r'[^\\\'"%\{\n]+', ttype), + (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) @@ -700,7 +710,7 @@ class CythonLexer(RegexLexer): ], 'strings': [ (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), @@ -771,18 +781,20 @@ class DgLexer(RegexLexer): (words(( 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'', 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object', - 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', 'super', - 'tuple', 'tuple\'', 'type'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'), + 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', + 'super', 'tuple', 'tuple\'', 'type'), + prefix=r'(?<!\.)', suffix=r'(?![\'\w])'), Name.Builtin), (words(( '__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile', 'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate', - 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst', 'getattr', - 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init', 'input', - 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len', 'locals', - 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow', 'print', 'repr', - 'reversed', 'round', 'setattr', 'scanl1?', 'snd', 'sorted', 'sum', 'tail', - 'take', 'takewhile', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'), + 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst', + 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init', + 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len', + 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow', + 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd', + 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'), + prefix=r'(?<!\.)', suffix=r'(?![\'\w])'), Name.Builtin), (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])", Name.Builtin.Pseudo), @@ -808,7 +820,7 @@ class DgLexer(RegexLexer): ], 'string': [ (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), diff --git a/pygments/lexers/qvt.py b/pygments/lexers/qvt.py index 5bc61310..f30e4887 100644 --- a/pygments/lexers/qvt.py +++ b/pygments/lexers/qvt.py @@ -9,7 +9,8 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import RegexLexer, bygroups, include, combined +from pygments.lexer import RegexLexer, bygroups, include, combined, default, \ + words from pygments.token import Text, Comment, Operator, Keyword, Punctuation, \ Name, String, Number @@ -50,23 +51,26 @@ class QVToLexer(RegexLexer): bygroups(Comment, Comment, Comment.Preproc, Comment)), # Uncomment the following if you want to distinguish between # '/*' and '/**', à la javadoc - #(r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline), + # (r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'\\\n', Text), (r'(and|not|or|xor|##?)\b', Operator.Word), - (r'([:]{1-2}=|[-+]=)\b', Operator.Word), - (r'(@|<<|>>)\b', Keyword), # stereotypes - (r'!=|<>|=|==|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator), + (r'(:{1,2}=|[-+]=)\b', Operator.Word), + (r'(@|<<|>>)\b', Keyword), # stereotypes + (r'!=|<>|==|=|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator), (r'[]{}:(),;[]', Punctuation), (r'(true|false|unlimited|null)\b', Keyword.Constant), (r'(this|self|result)\b', Name.Builtin.Pseudo), (r'(var)\b', Keyword.Declaration), (r'(from|import)\b', Keyword.Namespace, 'fromimport'), - (r'(metamodel|class|exception|primitive|enum|transformation|library)(\s+)([a-zA-Z0-9_]+)', + (r'(metamodel|class|exception|primitive|enum|transformation|' + r'library)(\s+)(\w+)', bygroups(Keyword.Word, Text, Name.Class)), - (r'(exception)(\s+)([a-zA-Z0-9_]+)', bygroups(Keyword.Word, Text, Name.Exception)), + (r'(exception)(\s+)(\w+)', + bygroups(Keyword.Word, Text, Name.Exception)), (r'(main)\b', Name.Function), - (r'(mapping|helper|query)(\s+)', bygroups(Keyword.Declaration, Text), 'operation'), + (r'(mapping|helper|query)(\s+)', + bygroups(Keyword.Declaration, Text), 'operation'), (r'(assert)(\s+)\b', bygroups(Keyword, Text), 'assert'), (r'(Bag|Collection|Dict|OrderedSet|Sequence|Set|Tuple|List)\b', Keyword.Type), @@ -75,46 +79,45 @@ class QVToLexer(RegexLexer): ("'", String, combined('stringescape', 'sqs')), include('name'), include('numbers'), - # (r'([a-zA-Z_][a-zA-Z0-9_]*)(::)([a-zA-Z_][a-zA-Z0-9_]*)', + # (r'([a-zA-Z_]\w*)(::)([a-zA-Z_]\w*)', # bygroups(Text, Text, Text)), - ], + ], 'fromimport': [ (r'(?:[ \t]|\\\n)+', Text), - (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace), - (r'', Text, '#pop'), - ], + (r'[a-zA-Z_][\w.]*', Name.Namespace), + default('#pop'), + ], 'operation': [ (r'::', Text), - (r'(.*::)([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*(\()', bygroups(Text,Name.Function, Text), '#pop') - ], + (r'(.*::)([a-zA-Z_]\w*)([ \t]*)(\()', + bygroups(Text, Name.Function, Text, Punctuation), '#pop') + ], 'assert': [ (r'(warning|error|fatal)\b', Keyword, '#pop'), - (r'', Text, '#pop') # all else: go back - ], + default('#pop'), # all else: go back + ], 'keywords': [ - (r'(abstract|access|any|assert|' - r'blackbox|break|case|collect|collectNested|' - r'collectOne|collectselect|collectselectOne|composes|' - r'compute|configuration|constructor|continue|datatype|' - r'default|derived|disjuncts|do|elif|else|end|' - r'endif|except|exists|extends|' - r'forAll|forEach|forOne|from|if|' - r'implies|in|inherits|init|inout|' - r'intermediate|invresolve|invresolveIn|invresolveone|' - r'invresolveoneIn|isUnique|iterate|late|let|' - r'literal|log|map|merges|' - r'modeltype|new|object|one|' - r'ordered|out|package|population|' - r'property|raise|readonly|references|refines|' - r'reject|resolve|resolveIn|resolveone|resolveoneIn|' - r'return|select|selectOne|sortedBy|static|switch|' - r'tag|then|try|typedef|' - r'unlimited|uses|when|where|while|with|' - r'xcollect|xmap|xselect)\b', Keyword), + (words(( + 'abstract', 'access', 'any', 'assert', 'blackbox', 'break', + 'case', 'collect', 'collectNested', 'collectOne', 'collectselect', + 'collectselectOne', 'composes', 'compute', 'configuration', + 'constructor', 'continue', 'datatype', 'default', 'derived', + 'disjuncts', 'do', 'elif', 'else', 'end', 'endif', 'except', + 'exists', 'extends', 'forAll', 'forEach', 'forOne', 'from', 'if', + 'implies', 'in', 'inherits', 'init', 'inout', 'intermediate', + 'invresolve', 'invresolveIn', 'invresolveone', 'invresolveoneIn', + 'isUnique', 'iterate', 'late', 'let', 'literal', 'log', 'map', + 'merges', 'modeltype', 'new', 'object', 'one', 'ordered', 'out', + 'package', 'population', 'property', 'raise', 'readonly', + 'references', 'refines', 'reject', 'resolve', 'resolveIn', + 'resolveone', 'resolveoneIn', 'return', 'select', 'selectOne', + 'sortedBy', 'static', 'switch', 'tag', 'then', 'try', 'typedef', + 'unlimited', 'uses', 'when', 'where', 'while', 'with', 'xcollect', + 'xmap', 'xselect'), suffix=r'\b'), Keyword), ], # There is no need to distinguish between String.Single and @@ -127,18 +130,18 @@ class QVToLexer(RegexLexer): 'stringescape': [ (r'\\([\\btnfr"\']|u[0-3][0-7]{2}|u[0-7]{1,2})', String.Escape) ], - 'dqs': [ # double-quoted string + 'dqs': [ # double-quoted string (r'"', String, '#pop'), (r'\\\\|\\"', String.Escape), include('strings') ], - 'sqs': [ # single-quoted string + 'sqs': [ # single-quoted string (r"'", String, '#pop'), (r"\\\\|\\'", String.Escape), include('strings') ], 'name': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), ], # numbers: excerpt taken from the python lexer 'numbers': [ @@ -146,5 +149,4 @@ class QVToLexer(RegexLexer): (r'\d+[eE][+-]?[0-9]+', Number.Float), (r'\d+', Number.Integer) ], - } - + } diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py index 103b4ad0..6dd6e8b9 100644 --- a/pygments/lexers/rdf.py +++ b/pygments/lexers/rdf.py @@ -42,8 +42,7 @@ class SparqlLexer(RegexLexer): u'\u2c00-\u2fef' u'\u3001-\ud7ff' u'\uf900-\ufdcf' - u'\ufdf0-\ufffd' - u'\U00010000-\U000effff') + u'\ufdf0-\ufffd') PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') @@ -56,7 +55,7 @@ class SparqlLexer(RegexLexer): HEX_GRP = '0-9A-Fa-f' - PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&""()*+,;=/?#@%' + PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' # terminal productions :: @@ -191,7 +190,7 @@ class TurtleLexer(RegexLexer): flags = re.IGNORECASE patterns = { - 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range + 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' } @@ -258,8 +257,7 @@ class TurtleLexer(RegexLexer): (r'.', String, '#pop'), ], 'end-of-string': [ - - (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)', + (r'(@)([a-z]+(:?-[a-z0-9]+)*)', bygroups(Operator, Generic.Emph), '#pop:2'), (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py index e81d6ecf..f16416d3 100644 --- a/pygments/lexers/ruby.py +++ b/pygments/lexers/ruby.py @@ -47,9 +47,9 @@ class RubyLexer(ExtendedRegexLexer): start = match.start(1) yield start, Operator, match.group(1) # <<-? - yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` - yield match.start(3), Name.Constant, match.group(3) # heredoc name - yield match.start(4), String.Heredoc, match.group(4) # quote again + yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` + yield match.start(3), String.Delimiter, match.group(3) # heredoc name + yield match.start(4), String.Heredoc, match.group(4) # quote again heredocstack = ctx.__dict__.setdefault('heredocstack', []) outermost = not bool(heredocstack) @@ -74,7 +74,7 @@ class RubyLexer(ExtendedRegexLexer): if check == hdname: for amatch in lines: yield amatch.start(), String.Heredoc, amatch.group() - yield match.start(), Name.Constant, match.group() + yield match.start(), String.Delimiter, match.group() ctx.pos = match.end() break else: diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py index 4dd9594b..ac0f7533 100644 --- a/pygments/lexers/scripting.py +++ b/pygments/lexers/scripting.py @@ -1020,11 +1020,11 @@ class EasytrieveLexer(RegexLexer): (r"'(''|[^'])*'", String), (r'\s+', Whitespace), # Everything else just belongs to a name - (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name), ], 'after_declaration': [ (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Function), - ('', Whitespace, '#pop') + default('#pop'), ], 'after_macro_argument': [ (r'\*.*\n', Comment.Single, '#pop'), @@ -1032,7 +1032,7 @@ class EasytrieveLexer(RegexLexer): (_OPERATORS_PATTERN, Operator, '#pop'), (r"'(''|[^'])*'", String, '#pop'), # Everything else just belongs to a name - (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name), ], } _COMMENT_LINE_REGEX = re.compile(r'^\s*\*') @@ -1122,7 +1122,8 @@ class EasytrieveLexer(RegexLexer): class JclLexer(RegexLexer): """ - `Job Control Language (JCL) <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_ + `Job Control Language (JCL) + <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_ is a scripting language used on mainframe platforms to instruct the system on how to run a batch job or start a subsystem. It is somewhat comparable to MS DOS batch and Unix shell scripts. @@ -1145,10 +1146,10 @@ class JclLexer(RegexLexer): ], 'statement': [ (r'\s*\n', Whitespace, '#pop'), - (r'([a-z][a-z_0-9]*)(\s+)(exec|job)(\s*)', + (r'([a-z]\w*)(\s+)(exec|job)(\s*)', bygroups(Name.Label, Whitespace, Keyword.Reserved, Whitespace), 'option'), - (r'[a-z][a-z_0-9]*', Name.Variable, 'statement_command'), + (r'[a-z]\w*', Name.Variable, 'statement_command'), (r'\s+', Whitespace, 'statement_command'), ], 'statement_command': [ @@ -1167,10 +1168,10 @@ class JclLexer(RegexLexer): (r'\*', Name.Builtin), (r'[\[\](){}<>;,]', Punctuation), (r'[-+*/=&%]', Operator), - (r'[a-z_][a-z_0-9]*', Name), - (r'[0-9]+\.[0-9]*', Number.Float), - (r'\.[0-9]+', Number.Float), - (r'[0-9]+', Number.Integer), + (r'[a-z_]\w*', Name), + (r'\d+\.\d*', Number.Float), + (r'\.\d+', Number.Float), + (r'\d+', Number.Integer), (r"'", String, 'option_string'), (r'[ \t]+', Whitespace, 'option_comment'), (r'\.', Punctuation), diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 05503c3a..7c06226b 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -57,11 +57,14 @@ line_re = re.compile('.*?\n') language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) +do_re = re.compile(r'\bDO\b', re.IGNORECASE) + def language_callback(lexer, match): """Parse the content of a $-string using a lexer - The lexer is chosen looking for a nearby LANGUAGE. + The lexer is chosen looking for a nearby LANGUAGE or assumed as + plpgsql if inside a DO statement and no LANGUAGE has been found. """ l = None m = language_re.match(lexer.text[match.end():match.end()+100]) @@ -72,15 +75,26 @@ def language_callback(lexer, match): lexer.text[max(0, match.start()-100):match.start()])) if m: l = lexer._get_lexer(m[-1].group(1)) - + else: + m = list(do_re.finditer( + lexer.text[max(0, match.start()-25):match.start()])) + if m: + l = lexer._get_lexer('plpgsql') + + # 1 = $, 2 = delimiter, 3 = $ + yield (match.start(1), String, match.group(1)) + yield (match.start(2), String.Delimiter, match.group(2)) + yield (match.start(3), String, match.group(3)) + # 4 = string contents if l: - yield (match.start(1), String, match.group(1)) - for x in l.get_tokens_unprocessed(match.group(2)): + for x in l.get_tokens_unprocessed(match.group(4)): yield x - yield (match.start(3), String, match.group(3)) - else: - yield (match.start(), String, match.group()) + yield (match.start(4), String, match.group(4)) + # 5 = $, 6 = delimiter, 7 = $ + yield (match.start(5), String, match.group(5)) + yield (match.start(6), String.Delimiter, match.group(6)) + yield (match.start(7), String, match.group(7)) class PostgresBase(object): @@ -148,9 +162,10 @@ class PostgresLexer(PostgresBase, RegexLexer): (r'\$\d+', Name.Variable), (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), (r'[0-9]+', Number.Integer), - (r"(E|U&)?'(''|[^'])*'", String.Single), - (r'(U&)?"(""|[^"])*"', String.Name), # quoted identifier - (r'(?s)(\$[^$]*\$)(.*?)(\1)', language_callback), + (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'), + # quoted identifier + (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'), + (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), (r'[a-z_]\w*', Name), # psql variable in SQL @@ -164,6 +179,16 @@ class PostgresLexer(PostgresBase, RegexLexer): (r'[^/*]+', Comment.Multiline), (r'[/*]', Comment.Multiline) ], + 'string': [ + (r"[^']+", String.Single), + (r"''", String.Single), + (r"'", String.Single, '#pop'), + ], + 'quoted-ident': [ + (r'[^"]+', String.Name), + (r'""', String.Name), + (r'"', String.Name, '#pop'), + ], } diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py index d3e4c460..cef147b8 100644 --- a/pygments/lexers/supercollider.py +++ b/pygments/lexers/supercollider.py @@ -11,7 +11,7 @@ import re -from pygments.lexer import RegexLexer, include, words +from pygments.lexer import RegexLexer, include, words, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation @@ -43,7 +43,7 @@ class SuperColliderLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop') + default('#pop'), ], 'badregex': [ (r'\n', Text, '#pop') @@ -79,8 +79,8 @@ class SuperColliderLexer(RegexLexer): 'thisFunctionDef', 'thisFunction', 'thisMethod', 'thisProcess', 'thisThread', 'this'), suffix=r'\b'), Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), - (r'\\?[$a-zA-Z_][a-zA-Z0-9_]*', String.Symbol), + (r'[$a-zA-Z_]\w*', Name.Other), + (r'\\?[$a-zA-Z_]\w*', String.Symbol), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), diff --git a/pygments/lexers/testing.py b/pygments/lexers/testing.py index 0bdebe74..be8b6f71 100644 --- a/pygments/lexers/testing.py +++ b/pygments/lexers/testing.py @@ -147,7 +147,7 @@ class TAPLexer(RegexLexer): (r'^TAP version \d+\n', Name.Namespace), # Specify a plan with a plan line. - (r'^1..\d+', Keyword.Declaration, 'plan'), + (r'^1\.\.\d+', Keyword.Declaration, 'plan'), # A test failure (r'^(not ok)([^\S\n]*)(\d*)', diff --git a/pygments/lexers/theorem.py b/pygments/lexers/theorem.py index 60a101cc..f8c7d0a9 100644 --- a/pygments/lexers/theorem.py +++ b/pygments/lexers/theorem.py @@ -390,20 +390,23 @@ class LeanLexer(RegexLexer): flags = re.MULTILINE | re.UNICODE - keywords1 = ('import', 'abbreviation', 'opaque_hint', 'tactic_hint', 'definition', 'renaming', - 'inline', 'hiding', 'exposing', 'parameter', 'parameters', 'conjecture', - 'hypothesis', 'lemma', 'corollary', 'variable', 'variables', 'print', 'theorem', - 'axiom', 'inductive', 'structure', 'universe', 'alias', 'help', - 'options', 'precedence', 'postfix', 'prefix', 'calc_trans', 'calc_subst', 'calc_refl', - 'infix', 'infixl', 'infixr', 'notation', 'eval', 'check', 'exit', 'coercion', 'end', - 'private', 'using', 'namespace', 'including', 'instance', 'section', 'context', - 'protected', 'expose', 'export', 'set_option', 'add_rewrite', 'extends', - 'open', 'example', 'constant', 'constants', 'print', 'opaque', 'reducible', 'irreducible' + keywords1 = ( + 'import', 'abbreviation', 'opaque_hint', 'tactic_hint', 'definition', + 'renaming', 'inline', 'hiding', 'exposing', 'parameter', 'parameters', + 'conjecture', 'hypothesis', 'lemma', 'corollary', 'variable', 'variables', + 'print', 'theorem', 'axiom', 'inductive', 'structure', 'universe', 'alias', + 'help', 'options', 'precedence', 'postfix', 'prefix', 'calc_trans', + 'calc_subst', 'calc_refl', 'infix', 'infixl', 'infixr', 'notation', 'eval', + 'check', 'exit', 'coercion', 'end', 'private', 'using', 'namespace', + 'including', 'instance', 'section', 'context', 'protected', 'expose', + 'export', 'set_option', 'add_rewrite', 'extends', 'open', 'example', + 'constant', 'constants', 'print', 'opaque', 'reducible', 'irreducible', ) keywords2 = ( - 'forall', 'fun', 'Pi', 'obtain', 'from', 'have', 'show', 'assume', 'take', - 'let', 'if', 'else', 'then', 'by', 'in', 'with', 'begin', 'proof', 'qed', 'calc', 'match' + 'forall', 'fun', 'Pi', 'obtain', 'from', 'have', 'show', 'assume', + 'take', 'let', 'if', 'else', 'then', 'by', 'in', 'with', 'begin', + 'proof', 'qed', 'calc', 'match', ) keywords3 = ( @@ -414,10 +417,10 @@ class LeanLexer(RegexLexer): operators = ( '!=', '#', '&', '&&', '*', '+', '-', '/', '@', '!', '`', '-.', '->', '.', '..', '...', '::', ':>', ';', ';;', '<', - '<-', '=', '==', '>', '_', '`', '|', '||', '~', '=>', '<=', '>=', + '<-', '=', '==', '>', '_', '|', '||', '~', '=>', '<=', '>=', '/\\', '\\/', u'∀', u'Π', u'λ', u'↔', u'∧', u'∨', u'≠', u'≤', u'≥', - u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', u'⌟', u'≡', - u'⟨', u'⟩' + u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', + u'⌟', u'≡', u'⟨', u'⟩', ) punctuation = ('(', ')', ':', '{', '}', '[', ']', u'⦃', u'⦄', ':=', ',') diff --git a/pygments/lexers/typoscript.py b/pygments/lexers/typoscript.py index 25bfef9c..407847ed 100644 --- a/pygments/lexers/typoscript.py +++ b/pygments/lexers/typoscript.py @@ -44,11 +44,11 @@ class TypoScriptCssDataLexer(RegexLexer): # marker: ###MARK### (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)), # constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # constant: {register:somevalue} - (r'(.*)(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})(.*)', + (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)', bygroups(String, String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol, String)), # constant # whitespace @@ -58,8 +58,8 @@ class TypoScriptCssDataLexer(RegexLexer): (r'(?<!(#|\'|"))(?:#(?!(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))[^\n#]+|//[^\n]*)', Comment), # other - (r'[<>,:=\.\*%+\|]', String), - (r'[\w"_\-!\/&;\(\)\{\}]+', String), + (r'[<>,:=.*%+|]', String), + (r'[\w"\-!/&;(){}]+', String), ] } @@ -79,22 +79,22 @@ class TypoScriptHtmlDataLexer(RegexLexer): # INCLUDE_TYPOSCRIPT (r'(INCLUDE_TYPOSCRIPT)', Name.Class), # Language label or extension resource FILE:... or LLL:... or EXT:... - (r'(EXT|FILE|LLL):[^\}\n"]*', String), + (r'(EXT|FILE|LLL):[^}\n"]*', String), # marker: ###MARK### (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)), # constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # constant: {register:somevalue} - (r'(.*)(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})(.*)', + (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)', bygroups(String, String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol, String)), # constant # whitespace (r'\s+', Text), # other - (r'[<>,:=\.\*%+\|]', String), - (r'[\w"_\-!\/&;\(\)\{\}#]+', String), + (r'[<>,:=.*%+|]', String), + (r'[\w"\-!/&;(){}#]+', String), ] } @@ -138,38 +138,38 @@ class TypoScriptLexer(RegexLexer): r'version)([^\]]*)(\])', bygroups(String.Symbol, Name.Constant, Text, String.Symbol)), # Functions - (r'(?=[\w\-_])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|' + (r'(?=[\w\-])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|' r'filelink|if|imageLinkWrap|imgResource|makelinks|numRows|numberFormat|' r'parseFunc|replacement|round|select|split|stdWrap|strPad|tableStyle|' - r'tags|textStyle|typolink)(?![\w\-_])', Name.Function), + r'tags|textStyle|typolink)(?![\w\-])', Name.Function), # Toplevel objects and _* (r'(?:(=?\s*<?\s+|^\s*))(cObj|field|config|content|constants|FEData|' r'file|frameset|includeLibs|lib|page|plugin|register|resources|sitemap|' - r'sitetitle|styles|temp|tt_[^:\.\n\s]*|types|xmlnews|INCLUDE_TYPOSCRIPT|' - r'_CSS_DEFAULT_STYLE|_DEFAULT_PI_VARS|_LOCAL_LANG)(?![\w\-_])', + r'sitetitle|styles|temp|tt_[^:.\s]*|types|xmlnews|INCLUDE_TYPOSCRIPT|' + r'_CSS_DEFAULT_STYLE|_DEFAULT_PI_VARS|_LOCAL_LANG)(?![\w\-])', bygroups(Operator, Name.Builtin)), # Content objects - (r'(?=[\w\-_])(CASE|CLEARGIF|COA|COA_INT|COBJ_ARRAY|COLUMNS|CONTENT|' + (r'(?=[\w\-])(CASE|CLEARGIF|COA|COA_INT|COBJ_ARRAY|COLUMNS|CONTENT|' r'CTABLE|EDITPANEL|FILE|FILES|FLUIDTEMPLATE|FORM|HMENU|HRULER|HTML|' r'IMAGE|IMGTEXT|IMG_RESOURCE|LOAD_REGISTER|MEDIA|MULTIMEDIA|OTABLE|' r'PAGE|QTOBJECT|RECORDS|RESTORE_REGISTER|SEARCHRESULT|SVG|SWFOBJECT|' - r'TEMPLATE|TEXT|USER|USER_INT)(?![\w\-_])', Name.Class), + r'TEMPLATE|TEXT|USER|USER_INT)(?![\w\-])', Name.Class), # Menu states - (r'(?=[\w\-_])(ACT|ACTIFSUB|ACTIFSUBRO|ACTRO|CUR|CURIFSUB|CURIFSUBRO|' - r'CURRO|IFSUB|IFSUBRO|NO|SPC|USERDEF1|USERDEF1RO|USERDEF2|USERDEF2RO|' - r'USR|USRRO)', Name.Class), + (r'(?=[\w\-])(ACTIFSUBRO|ACTIFSUB|ACTRO|ACT|CURIFSUBRO|CURIFSUB|CURRO|' + r'CUR|IFSUBRO|IFSUB|NO|SPC|USERDEF1RO|USERDEF1|USERDEF2RO|USERDEF2|' + r'USRRO|USR)', Name.Class), # Menu objects - (r'(?=[\w\-_])(GMENU|GMENU_FOLDOUT|GMENU_LAYERS|IMGMENU|IMGMENUITEM|' - r'JSMENU|JSMENUITEM|TMENU|TMENUITEM|TMENU_LAYERS)', Name.Class), + (r'(?=[\w\-])(GMENU_FOLDOUT|GMENU_LAYERS|GMENU|IMGMENUITEM|IMGMENU|' + r'JSMENUITEM|JSMENU|TMENUITEM|TMENU_LAYERS|TMENU)', Name.Class), # PHP objects - (r'(?=[\w\-_])(PHP_SCRIPT(_EXT|_INT)?)', Name.Class), - (r'(?=[\w\-_])(userFunc)(?![\w\-_])', Name.Function), + (r'(?=[\w\-])(PHP_SCRIPT(_EXT|_INT)?)', Name.Class), + (r'(?=[\w\-])(userFunc)(?![\w\-])', Name.Function), ], 'whitespace': [ (r'\s+', Text), ], 'html':[ - (r'<[^\s][^\n>]*>', using(TypoScriptHtmlDataLexer)), + (r'<\S[^\n>]*>', using(TypoScriptHtmlDataLexer)), (r'&[^;\n]*;', String), (r'(_CSS_DEFAULT_STYLE)(\s*)(\()(?s)(.*(?=\n\)))', bygroups(Name.Class, Text, String.Symbol, using(TypoScriptCssDataLexer))), @@ -182,28 +182,28 @@ class TypoScriptLexer(RegexLexer): ], 'label': [ # Language label or extension resource FILE:... or LLL:... or EXT:... - (r'(EXT|FILE|LLL):[^\}\n"]*', String), + (r'(EXT|FILE|LLL):[^}\n"]*', String), # Path to a resource - (r'(?![^\w\-_])([\w\-_]+(?:/[\w\-_]+)+/?)([^\s]*\n)', + (r'(?![^\w\-])([\w\-]+(?:/[\w\-]+)+/?)(\S*\n)', bygroups(String, String)), ], 'punctuation': [ - (r'[,\.]', Punctuation), + (r'[,.]', Punctuation), ], 'operator': [ - (r'[<>,:=\.\*%+\|]', Operator), + (r'[<>,:=.*%+|]', Operator), ], 'structure': [ # Brackets and braces - (r'[\{\}\(\)\[\]\\\\]', String.Symbol), + (r'[{}()\[\]\\]', String.Symbol), ], 'constant': [ # Constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # Constant: {register:somevalue} - (r'(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})', + (r'(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})', bygroups(String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol)), # constant # Hex color: #ff0077 @@ -216,7 +216,7 @@ class TypoScriptLexer(RegexLexer): (r'(\s*#\s*\n)', Comment), ], 'other': [ - (r'[\w"\-_!\/&;]+', Text), + (r'[\w"\-!/&;]+', Text), ], } diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py new file mode 100644 index 00000000..e64a601b --- /dev/null +++ b/pygments/lexers/varnish.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.varnish + ~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Varnish configuration + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import RegexLexer, include, bygroups, using, this, \ + inherit, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Literal + +__all__ = ['VCLLexer', 'VCLSnippetLexer'] + + +class VCLLexer(RegexLexer): + """ + For Varnish Configuration Language (VCL). + + .. versionadded:: 2.2 + """ + name = 'VCL' + aliases = ['vcl'] + filenames = ['*.vcl'] + mimetypes = ['text/x-vclsrc'] + + def analyse_text(text): + # If the very first line is 'vcl 4.0;' it's pretty much guaranteed + # that this is VCL + if text.startswith('vcl 4.0;'): + return 1.0 + # Skip over comments and blank lines + # This is accurate enough that returning 0.9 is reasonable. + # Almost no VCL files start without some comments. + elif '\nvcl 4\.0;' in text[:1000]: + return 0.9 + + tokens = { + 'probe': [ + include('whitespace'), + include('comments'), + (r'(\.\w+)(\s*=\s*)([^;]*)(;)', + bygroups(Name.Attribute, Operator, using(this), Punctuation)), + (r'\}', Punctuation, '#pop'), + ], + 'acl': [ + include('whitespace'), + include('comments'), + (r'[!/]+', Operator), + (r';', Punctuation), + (r'\d+', Number), + (r'\}', Punctuation, '#pop'), + ], + 'backend': [ + include('whitespace'), + (r'(\.probe)(\s*=\s*)(\w+)(;)', + bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)), + (r'(\.probe)(\s*=\s*)(\{)', + bygroups(Name.Attribute, Operator, Punctuation), 'probe'), + (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)', + bygroups(Name.Attribute, Operator, using(this), Punctuation)), + (r'\{', Punctuation, '#push'), + (r'\}', Punctuation, '#pop'), + ], + 'statements': [ + (r'(\d\.)?\d+[sdwhmy]', Literal.Date), + (r'(\d\.)?\d+ms', Literal.Date), + (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|' + r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|' + r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function), + (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|' + r'miss|fetch|restart)\b', Name.Constant), + (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b', Name.Variable), + (words(( + 'obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level', + 'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits', + 'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid', + 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip', + 'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now', + 'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto', + 'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi', + 'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout', + 'beresp.do_gzip', 'resp.status', 'beresp.do_gunzip', + 'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream', + 'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status', + 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries', + 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', + 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', + 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', + 'server.ip', 'bereq.uncacheable', 'now'), suffix=r'\b'), + Name.Variable), + (r'[!%&+*\-,/<.}{>=|~]+', Operator), + (r'[();]', Punctuation), + + (r'[,]+', Punctuation), + (words(('include', 'hash_data', 'regsub', 'regsuball', 'if', 'else', + 'elsif', 'elif', 'synth', 'synthetic', 'ban', 'synth', + 'return', 'set', 'unset', 'import', 'include', 'new', + 'rollback', 'call'), suffix=r'\b'), + Keyword), + (r'storage\.\w+\.\w+\b', Name.Variable), + (words(('true', 'false')), Name.Builtin), + (r'\d+\b', Number), + (r'(backend)(\s+\w+)(\s*\{)', + bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), + (r'(probe\s)(\s*\w+\s)(\{)', + bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'), + (r'(acl\s)(\s*\w+\s)(\{)', + bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'), + (r'(vcl )(4.0)(;)$', + bygroups(Keyword.Reserved, Name.Constant, Punctuation)), + (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)', + bygroups(Keyword, Name.Function, Punctuation)), + (r'([a-zA-Z_]\w*)' + r'(\.)' + r'([a-zA-Z_]\w*)' + r'(\s*\(.*\))', + bygroups(Name.Function, Punctuation, Name.Function, using(this))), + ('[a-zA-Z_]\w*', Name), + ], + 'comment': [ + (r'[^*/]+', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline), + ], + 'comments': [ + (r'#.*$', Comment), + (r'/\*', Comment.Multiline, 'comment'), + (r'//.*$', Comment), + ], + 'string': [ + (r'"', String, '#pop'), + (r'[^"\n]+', String), # all other characters + ], + 'multistring': [ + (r'[^"}]', String), + (r'"\}', String, '#pop'), + (r'["}]', String), + ], + 'whitespace': [ + (r'L?"', String, 'string'), + (r'\{"', String, 'multistring'), + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text), # line continuation + ], + 'root': [ + include('whitespace'), + include('comments'), + include('statements'), + (r'\s+', Text), + ], + } + + +class VCLSnippetLexer(VCLLexer): + """ + For Varnish Configuration Language snippets. + + .. versionadded:: 2.2 + """ + name = 'VCLSnippets' + aliases = ['vclsnippets', 'vclsnippet'] + mimetypes = ['text/x-vclsnippet'] + filenames = [] + + def analyse_text(text): + # override method inherited from VCLLexer + return 0 + + tokens = { + 'snippetspre': [ + (r'\.\.\.+', Comment), + (r'(bereq|req|req_top|resp|beresp|obj|client|server|local|remote|' + r'storage)($|\.\*)', Name.Variable), + ], + 'snippetspost': [ + (r'(backend)\b', Keyword.Reserved), + ], + 'root': [ + include('snippetspre'), + inherit, + include('snippetspost'), + ], + } diff --git a/pygments/scanner.py b/pygments/scanner.py index 35dbbadd..3ff11e4a 100644 --- a/pygments/scanner.py +++ b/pygments/scanner.py @@ -66,7 +66,8 @@ class Scanner(object): def test(self, pattern): """Apply a pattern on the current position and check - if it patches. Doesn't touch pos.""" + if it patches. Doesn't touch pos. + """ return self.check(pattern) is not None def scan(self, pattern): diff --git a/pygments/sphinxext.py b/pygments/sphinxext.py index 2dc9810f..de8cd73b 100644 --- a/pygments/sphinxext.py +++ b/pygments/sphinxext.py @@ -57,6 +57,7 @@ FILTERDOC = ''' ''' + class PygmentsDoc(Directive): """ A directive to collect all lexers/formatters/filters and generate diff --git a/pygments/style.py b/pygments/style.py index b2b990ea..68ee3a19 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -12,6 +12,29 @@ from pygments.token import Token, STANDARD_TYPES from pygments.util import add_metaclass +# Default mapping of #ansixxx to RGB colors. +_ansimap = { + # dark + '#ansiblack': '000000', + '#ansidarkred': '7f0000', + '#ansidarkgreen': '007f00', + '#ansibrown': '7f7fe0', + '#ansidarkblue': '00007f', + '#ansipurple': '7f007f', + '#ansiteal': '007f7f', + '#ansilightgray': 'e5e5e5', + # normal + '#ansidarkgray': '555555', + '#ansired': 'ff0000', + '#ansigreen': '00ff00', + '#ansiyellow': 'ffff00', + '#ansiblue': '0000ff', + '#ansifuchsia': 'ff00ff', + '#ansiturquoise': '00ffff', + '#ansiwhite': 'ffffff', +} +ansicolors = set(_ansimap) + class StyleMeta(type): @@ -22,6 +45,8 @@ class StyleMeta(type): obj.styles[token] = '' def colorformat(text): + if text in ansicolors: + return text if text[0:1] == '#': col = text[1:] if len(col) == 6: @@ -79,16 +104,28 @@ class StyleMeta(type): def style_for_token(cls, token): t = cls._styles[token] + ansicolor = bgansicolor = None + color = t[0] + if color.startswith('#ansi'): + ansicolor = color + color = _ansimap[color] + bgcolor = t[4] + if bgcolor.startswith('#ansi'): + bgansicolor = bgcolor + bgcolor = _ansimap[bgcolor] + return { - 'color': t[0] or None, + 'color': color or None, 'bold': bool(t[1]), 'italic': bool(t[2]), 'underline': bool(t[3]), - 'bgcolor': t[4] or None, + 'bgcolor': bgcolor or None, 'border': t[5] or None, 'roman': bool(t[6]) or None, 'sans': bool(t[7]) or None, 'mono': bool(t[8]) or None, + 'ansicolor': ansicolor, + 'bgansicolor': bgansicolor, } def list_styles(cls): diff --git a/pygments/styles/arduino.py b/pygments/styles/arduino.py index 5b31bb84..1bf2103c 100644 --- a/pygments/styles/arduino.py +++ b/pygments/styles/arduino.py @@ -95,4 +95,4 @@ class ArduinoStyle(Style): Generic.Strong: "", # class: 'gs' Generic.Subheading: "", # class: 'gu' Generic.Traceback: "", # class: 'gt' - }
\ No newline at end of file + } diff --git a/pygments/styles/lovelace.py b/pygments/styles/lovelace.py index 712f3e5c..236dde9b 100644 --- a/pygments/styles/lovelace.py +++ b/pygments/styles/lovelace.py @@ -71,7 +71,9 @@ class LovelaceStyle(Style): Name.Variable.Magic: _DOC_ORANGE, String: _STR_RED, + String.Affix: '#444444', String.Char: _OW_PURPLE, + String.Delimiter: _DOC_ORANGE, String.Doc: 'italic '+_DOC_ORANGE, String.Escape: _ESCAPE_LIME, String.Interpol: 'underline', diff --git a/pygments/token.py b/pygments/token.py index 097ff064..fbd5b805 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -9,6 +9,7 @@ :license: BSD, see LICENSE for details. """ + class _TokenType(tuple): parent = None @@ -52,30 +53,30 @@ class _TokenType(tuple): return self -Token = _TokenType() +Token = _TokenType() # Special token types -Text = Token.Text -Whitespace = Text.Whitespace -Escape = Token.Escape -Error = Token.Error +Text = Token.Text +Whitespace = Text.Whitespace +Escape = Token.Escape +Error = Token.Error # Text that doesn't belong to this lexer (e.g. HTML in PHP) -Other = Token.Other +Other = Token.Other # Common token types for source code -Keyword = Token.Keyword -Name = Token.Name -Literal = Token.Literal -String = Literal.String -Number = Literal.Number +Keyword = Token.Keyword +Name = Token.Name +Literal = Token.Literal +String = Literal.String +Number = Literal.Number Punctuation = Token.Punctuation -Operator = Token.Operator -Comment = Token.Comment +Operator = Token.Operator +Comment = Token.Comment # Generic types for non-source code -Generic = Token.Generic +Generic = Token.Generic -# String and some others are not direct childs of Token. +# String and some others are not direct children of Token. # alias them: Token.Token = Token Token.String = String @@ -163,8 +164,10 @@ STANDARD_TYPES = { Literal.Date: 'ld', String: 's', + String.Affix: 'sa', String.Backtick: 'sb', String.Char: 'sc', + String.Delimiter: 'dl', String.Doc: 'sd', String.Double: 's2', String.Escape: 'se', @@ -54,7 +54,7 @@ else: setup( name = 'Pygments', - version = '2.1', + version = '2.2', url = 'http://pygments.org/', license = 'BSD License', author = 'Georg Brandl', diff --git a/tests/examplefiles/StdGeneric.icl b/tests/examplefiles/StdGeneric.icl new file mode 100644 index 00000000..2e6c3931 --- /dev/null +++ b/tests/examplefiles/StdGeneric.icl @@ -0,0 +1,92 @@ +implementation module StdGeneric + +import StdInt, StdMisc, StdClass, StdFunc + +generic bimap a b :: Bimap .a .b + +bimapId :: Bimap .a .a +bimapId = { map_to = id, map_from = id } + +bimap{|c|} = { map_to = id, map_from = id } + +bimap{|PAIR|} bx by = { map_to= map_to, map_from=map_from } +where + map_to (PAIR x y) = PAIR (bx.map_to x) (by.map_to y) + map_from (PAIR x y) = PAIR (bx.map_from x) (by.map_from y) +bimap{|EITHER|} bl br = { map_to= map_to, map_from=map_from } +where + map_to (LEFT x) = LEFT (bl.map_to x) + map_to (RIGHT x) = RIGHT (br.map_to x) + map_from (LEFT x) = LEFT (bl.map_from x) + map_from (RIGHT x) = RIGHT (br.map_from x) + +bimap{|(->)|} barg bres = { map_to = map_to, map_from = map_from } +where + map_to f = comp3 bres.map_to f barg.map_from + map_from f = comp3 bres.map_from f barg.map_to + +bimap{|CONS|} barg = { map_to= map_to, map_from=map_from } +where + map_to (CONS x) = CONS (barg.map_to x) + map_from (CONS x) = CONS (barg.map_from x) + +bimap{|FIELD|} barg = { map_to= map_to, map_from=map_from } +where + map_to (FIELD x) = FIELD (barg.map_to x) + map_from (FIELD x) = FIELD (barg.map_from x) + +bimap{|OBJECT|} barg = { map_to= map_to, map_from=map_from } +where + map_to (OBJECT x) = OBJECT (barg.map_to x) + map_from (OBJECT x) = OBJECT (barg.map_from x) + +bimap{|Bimap|} x y = {map_to = map_to, map_from = map_from} +where + map_to {map_to, map_from} = + { map_to = comp3 y.map_to map_to x.map_from + , map_from = comp3 x.map_to map_from y.map_from + } + map_from {map_to, map_from} = + { map_to = comp3 y.map_from map_to x.map_to + , map_from = comp3 x.map_from map_from y.map_to + } + +comp3 :: !(.a -> .b) u:(.c -> .a) !(.d -> .c) -> u:(.d -> .b) +comp3 f g h + | is_id f + | is_id h + = cast g + = cast (\x -> g (h x)) + | is_id h + = cast (\x -> f (g x)) + = \x -> f (g (h x)) +where + is_id :: !.(.a -> .b) -> Bool + is_id f = code inline + { + eq_desc e_StdFunc_did 0 0 + pop_a 1 + } + + cast :: !u:a -> u:b + cast f = code inline + { + pop_a 0 + } + +getConsPath :: !GenericConsDescriptor -> [ConsPos] +getConsPath {gcd_index, gcd_type_def={gtd_num_conses}} + = doit gcd_index gtd_num_conses +where + doit i n + | n == 0 + = abort "getConsPath: zero conses\n" + | i >= n + = abort "getConsPath: cons index >= number of conses" + | n == 1 + = [] + | i < (n/2) + = [ ConsLeft : doit i (n/2) ] + | otherwise + = [ ConsRight : doit (i - (n/2)) (n - (n/2)) ] +
\ No newline at end of file diff --git a/tests/examplefiles/example2.cpp b/tests/examplefiles/example2.cpp new file mode 100644 index 00000000..ccd99383 --- /dev/null +++ b/tests/examplefiles/example2.cpp @@ -0,0 +1,20 @@ +/* + * A Test file for the different string literals. + */ + +#include <iostream> + +int main() { + char *_str = "a normal string"; + wchar_t *L_str = L"a wide string"; + char *u8_str = u8"utf-8 string"; + char16_t *u_str = u"utf-16 string"; + char32_t *U_str = U"utf-32 string"; + char *R_str = R""""(raw string with +""" +as a delimiter)""""; + + std::cout << R_str << std::endl; + + return 0; +} diff --git a/tests/examplefiles/postgresql_test.txt b/tests/examplefiles/postgresql_test.txt index 190d184f..28db5ee3 100644 --- a/tests/examplefiles/postgresql_test.txt +++ b/tests/examplefiles/postgresql_test.txt @@ -45,3 +45,37 @@ $$; SELECT U&'\0441\043B\043E\043D' FROM U&"\0441\043B\043E\043D"; +-- Escapes +SELECT E'1\n2\n3'; + +-- DO example from postgresql documentation +/* + * PostgreSQL is Copyright © 1996-2016 by the PostgreSQL Global Development Group. + * + * Postgres95 is Copyright © 1994-5 by the Regents of the University of California. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without a written agreement + * is hereby granted, provided that the above copyright notice and this paragraph + * and the following two paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, + * EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS-IS" BASIS, + * AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ +DO $$DECLARE r record; +BEGIN + FOR r IN SELECT table_schema, table_name FROM information_schema.tables + WHERE table_type = 'VIEW' AND table_schema = 'public' + LOOP + EXECUTE 'GRANT ALL ON ' || quote_ident(r.table_schema) || '.' || quote_ident(r.table_name) || ' TO webuser'; + END LOOP; +END$$; diff --git a/tests/examplefiles/test.php b/tests/examplefiles/test.php index 794961c1..e8efdc6a 100644 --- a/tests/examplefiles/test.php +++ b/tests/examplefiles/test.php @@ -535,5 +535,10 @@ $magic->__toString(); EOF; +echo <<<"some_delimiter" +more heredoc testing +continues on this line +some_delimiter; + ?> diff --git a/tests/examplefiles/varnish.vcl b/tests/examplefiles/varnish.vcl new file mode 100644 index 00000000..6258c313 --- /dev/null +++ b/tests/examplefiles/varnish.vcl @@ -0,0 +1,187 @@ +# This is the VCL configuration Varnish will automatically append to your VCL +# file during compilation/loading. See the vcl(7) man page for details on syntax +# and semantics. +# New users is recommended to use the example.vcl file as a starting point. + +vcl 4.0; + +backend foo { .host = "192.168.1.1"; } + +probe blatti { .url = "foo"; } +probe fooy { + .url = "beh"; + +} + +acl foo { + "192.168.1.1"; + "192.168.0.0"/24; + ! "192.168.0.1"; +} + +include "foo.vcl"; + +import std; + +sub vcl_init { + new b = director.foo(); +} + +sub vcl_recv { + ban(req.url ~ "foo"); + rollback(); +} +sub vcl_recv { + if (req.method == "PRI") { + /* We do not support SPDY or HTTP/2.0 */ + return (synth(405)); + } + if (req.method != "GET" && + req.method != "HEAD" && + req.method != "PUT" && + req.method != "POST" && + req.method != "TRACE" && + req.method != "OPTIONS" && + req.method != "DELETE") { + /* Non-RFC2616 or CONNECT which is weird. */ + return (pipe); + } + + if (req.method != "GET" && req.method != "HEAD") { + /* We only deal with GET and HEAD by default */ + return (pass); + } + if (req.http.Authorization || req.http.Cookie) { + /* Not cacheable by default */ + return (pass); + } + return (hash); +} + +sub vcl_pipe { + # By default Connection: close is set on all piped requests, to stop + # connection reuse from sending future requests directly to the + # (potentially) wrong backend. If you do want this to happen, you can undo + # it here. + # unset bereq.http.connection; + return (pipe); +} + +sub vcl_pass { + return (fetch); +} + +sub vcl_hash { + hash_data(req.url); + if (req.http.host) { + hash_data(req.http.host); + } else { + hash_data(server.ip); + } + return (lookup); +} + +sub vcl_purge { + return (synth(200, "Purged")); +} + +sub vcl_hit { + if (obj.ttl >= 0s) { + // A pure unadultered hit, deliver it + return (deliver); + } + if (obj.ttl + obj.grace > 0s) { + // Object is in grace, deliver it + // Automatically triggers a background fetch + return (deliver); + } + // fetch & deliver once we get the result + return (miss); +} + +sub vcl_miss { + return (fetch); +} + +sub vcl_deliver { + set resp.http.x-storage = storage.s0.free; + return (deliver); +} + +/* + * We can come here "invisibly" with the following errors: 413, 417 & 503 + */ +sub vcl_synth { + set resp.http.Content-Type = "text/html; charset=utf-8"; + set resp.http.Retry-After = "5"; + synthetic( {"<!DOCTYPE html> +<html> + <head> + <title>"} + resp.status + " " + resp.reason + {"</title> + </head> + <body> + <h1>Error "} + resp.status + " " + resp.reason + {"</h1> + <p>"} + resp.reason + {"</p> + <h3>Guru Meditation:</h3> + <p>XID: "} + req.xid + {"</p> + <hr> + <p>Varnish cache server</p> + </body> +</html> +"} ); + return (deliver); +} + +####################################################################### +# Backend Fetch + +sub vcl_backend_fetch { + return (fetch); +} + +sub vcl_backend_response { + if (beresp.ttl <= 0s || + beresp.http.Set-Cookie || + beresp.http.Surrogate-control ~ "no-store" || + (!beresp.http.Surrogate-Control && + beresp.http.Cache-Control ~ "no-cache|no-store|private") || + beresp.http.Vary == "*") { + /* + * Mark as "Hit-For-Pass" for the next 2 minutes + */ + set beresp.ttl = 120s; + set beresp.uncacheable = true; + } + return (deliver); +} + +sub vcl_backend_error { + set beresp.http.Content-Type = "text/html; charset=utf-8"; + set beresp.http.Retry-After = "5"; + synthetic( {"<!DOCTYPE html> +<html> + <head> + <title>"} + beresp.status + " " + beresp.reason + {"</title> + </head> + <body> + <h1>Error "} + beresp.status + " " + beresp.reason + {"</h1> + <p>"} + beresp.reason + {"</p> + <h3>Guru Meditation:</h3> + <p>XID: "} + bereq.xid + {"</p> + <hr> + <p>Varnish cache server</p> + </body> +</html> +"} ); + return (deliver); +} + +####################################################################### +# Housekeeping + +sub vcl_init { +} + +sub vcl_fini { + return (ok); +} diff --git a/tests/examplefiles/wdiff_example1.wdiff b/tests/examplefiles/wdiff_example1.wdiff new file mode 100644 index 00000000..ca760812 --- /dev/null +++ b/tests/examplefiles/wdiff_example1.wdiff @@ -0,0 +1,731 @@ +.. -*- mode: rst -*- + +{+.. highlight:: python+} + +==================== +Write your own lexer +==================== + +If a lexer for your favorite language is missing in the Pygments package, you +can easily write your own and extend Pygments. + +All you need can be found inside the :mod:`pygments.lexer` module. As you can +read in the :doc:`API documentation <api>`, a lexer is a class that is +initialized with some keyword arguments (the lexer options) and that provides a +:meth:`.get_tokens_unprocessed()` method which is given a string or unicode +object with the data to [-parse.-] {+lex.+} + +The :meth:`.get_tokens_unprocessed()` method must return an iterator or iterable +containing tuples in the form ``(index, token, value)``. Normally you don't +need to do this since there are [-numerous-] base lexers {+that do most of the work and that+} +you can subclass. + + +RegexLexer +========== + +[-A very powerful (but quite easy to use)-] + +{+The+} lexer {+base class used by almost all of Pygments' lexers+} is the +:class:`RegexLexer`. This +[-lexer base-] class allows you to define lexing rules in terms of +*regular expressions* for different *states*. + +States are groups of regular expressions that are matched against the input +string at the *current position*. If one of these expressions matches, a +corresponding action is performed [-(normally-] {+(such as+} yielding a token with a specific +[-type),-] +{+type, or changing state),+} the current position is set to where the last match +ended and the matching process continues with the first regex of the current +state. + +Lexer states are kept [-in-] {+on+} a [-state-] stack: each time a new state is entered, the new +state is pushed onto the stack. The most basic lexers (like the `DiffLexer`) +just need one state. + +Each state is defined as a list of tuples in the form (`regex`, `action`, +`new_state`) where the last item is optional. In the most basic form, `action` +is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a +token with the match text and type `tokentype` and push `new_state` on the state +stack. If the new state is ``'#pop'``, the topmost state is popped from the +stack instead. [-(To-] {+To+} pop more than one state, use ``'#pop:2'`` and so [-on.)-] {+on.+} +``'#push'`` is a synonym for pushing the current state on the stack. + +The following example shows the `DiffLexer` from the builtin lexers. Note that +it contains some additional attributes `name`, `aliases` and `filenames` which +aren't required for a lexer. They are used by the builtin lexer lookup +functions. + +[-.. sourcecode:: python-] {+::+} + + from pygments.lexer import RegexLexer + from pygments.token import * + + class DiffLexer(RegexLexer): + name = 'Diff' + aliases = ['diff'] + filenames = ['*.diff'] + + tokens = { + 'root': [ + (r' .*\n', Text), + (r'\+.*\n', Generic.Inserted), + (r'-.*\n', Generic.Deleted), + (r'@.*\n', Generic.Subheading), + (r'Index.*\n', Generic.Heading), + (r'=.*\n', Generic.Heading), + (r'.*\n', Text), + ] + } + +As you can see this lexer only uses one state. When the lexer starts scanning +the text, it first checks if the current character is a space. If this is true +it scans everything until newline and returns the [-parsed-] data as {+a+} `Text` [-token.-] {+token (which +is the "no special highlighting" token).+} + +If this rule doesn't match, it checks if the current char is a plus sign. And +so on. + +If no rule matches at the current position, the current char is emitted as an +`Error` token that indicates a [-parsing-] {+lexing+} error, and the position is increased by +[-1.-] +{+one.+} + + +Adding and testing a new lexer +============================== + +To make [-pygments-] {+Pygments+} aware of your new lexer, you have to perform the following +steps: + +First, change to the current directory containing the [-pygments-] {+Pygments+} source code: + +.. [-sourcecode::-] {+code-block::+} console + + $ cd .../pygments-main + +{+Select a matching module under ``pygments/lexers``, or create a new module for +your lexer class.+} + +Next, make sure the lexer is known from outside of the module. All modules in +the ``pygments.lexers`` specify ``__all__``. For example, [-``other.py`` sets: + +.. sourcecode:: python-] {+``esoteric.py`` sets::+} + + __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] + +Simply add the name of your lexer class to this list. + +Finally the lexer can be made [-publically-] {+publicly+} known by rebuilding the lexer mapping: + +.. [-sourcecode::-] {+code-block::+} console + + $ make mapfiles + +To test the new lexer, store an example file with the proper extension in +``tests/examplefiles``. For example, to test your ``DiffLexer``, add a +``tests/examplefiles/example.diff`` containing a sample diff output. + +Now you can use pygmentize to render your example to HTML: + +.. [-sourcecode::-] {+code-block::+} console + + $ ./pygmentize -O full -f html -o /tmp/example.html tests/examplefiles/example.diff + +Note that this [-explicitely-] {+explicitly+} calls the ``pygmentize`` in the current directory +by preceding it with ``./``. This ensures your modifications are used. +Otherwise a possibly already installed, unmodified version without your new +lexer would have been called from the system search path (``$PATH``). + +To view the result, open ``/tmp/example.html`` in your browser. + +Once the example renders as expected, you should run the complete test suite: + +.. [-sourcecode::-] {+code-block::+} console + + $ make test + +{+It also tests that your lexer fulfills the lexer API and certain invariants, +such as that the concatenation of all token text is the same as the input text.+} + + +Regex Flags +=========== + +You can either define regex flags {+locally+} in the regex (``r'(?x)foo bar'``) or +{+globally+} by adding a `flags` attribute to your lexer class. If no attribute is +defined, it defaults to `re.MULTILINE`. For more [-informations-] {+information+} about regular +expression flags see the {+page about+} `regular expressions`_ [-help page-] in the [-python-] {+Python+} +documentation. + +.. _regular expressions: [-http://docs.python.org/lib/re-syntax.html-] {+http://docs.python.org/library/re.html#regular-expression-syntax+} + + +Scanning multiple tokens at once +================================ + +{+So far, the `action` element in the rule tuple of regex, action and state has +been a single token type. Now we look at the first of several other possible +values.+} + +Here is a more complex lexer that highlights INI files. INI files consist of +sections, comments and [-key-] {+``key+} = [-value pairs: + +.. sourcecode:: python-] {+value`` pairs::+} + + from pygments.lexer import RegexLexer, bygroups + from pygments.token import * + + class IniLexer(RegexLexer): + name = 'INI' + aliases = ['ini', 'cfg'] + filenames = ['*.ini', '*.cfg'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r';.*?$', Comment), + (r'\[.*?\]$', Keyword), + (r'(.*?)(\s*)(=)(\s*)(.*?)$', + bygroups(Name.Attribute, Text, Operator, Text, String)) + ] + } + +The lexer first looks for whitespace, comments and section names. [-And later-] {+Later+} it +looks for a line that looks like a key, value pair, separated by an ``'='`` +sign, and optional whitespace. + +The `bygroups` helper [-makes sure that-] {+yields+} each {+capturing+} group [-is yielded-] {+in the regex+} with a different +token type. First the `Name.Attribute` token, then a `Text` token for the +optional whitespace, after that a `Operator` token for the equals sign. Then a +`Text` token for the whitespace again. The rest of the line is returned as +`String`. + +Note that for this to work, every part of the match must be inside a capturing +group (a ``(...)``), and there must not be any nested capturing groups. If you +nevertheless need a group, use a non-capturing group defined using this syntax: +[-``r'(?:some|words|here)'``-] +{+``(?:some|words|here)``+} (note the ``?:`` after the beginning parenthesis). + +If you find yourself needing a capturing group inside the regex which shouldn't +be part of the output but is used in the regular expressions for backreferencing +(eg: ``r'(<(foo|bar)>)(.*?)(</\2>)'``), you can pass `None` to the bygroups +function and [-it will skip-] that group will be skipped in the output. + + +Changing states +=============== + +Many lexers need multiple states to work as expected. For example, some +languages allow multiline comments to be nested. Since this is a recursive +pattern it's impossible to lex just using regular expressions. + +Here is [-the solution: + +.. sourcecode:: python-] {+a lexer that recognizes C++ style comments (multi-line with ``/* */`` +and single-line with ``//`` until end of line)::+} + + from pygments.lexer import RegexLexer + from pygments.token import * + + class [-ExampleLexer(RegexLexer):-] {+CppCommentLexer(RegexLexer):+} + name = 'Example Lexer with states' + + tokens = { + 'root': [ + (r'[^/]+', Text), + (r'/\*', Comment.Multiline, 'comment'), + (r'//.*?$', Comment.Singleline), + (r'/', Text) + ], + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ] + } + +This lexer starts lexing in the ``'root'`` state. It tries to match as much as +possible until it finds a slash (``'/'``). If the next character after the slash +is [-a star-] {+an asterisk+} (``'*'``) the `RegexLexer` sends those two characters to the +output stream marked as `Comment.Multiline` and continues [-parsing-] {+lexing+} with the rules +defined in the ``'comment'`` state. + +If there wasn't [-a star-] {+an asterisk+} after the slash, the `RegexLexer` checks if it's a +[-singleline-] +{+Singleline+} comment [-(eg:-] {+(i.e.+} followed by a second slash). If this also wasn't the +case it must be a single [-slash-] {+slash, which is not a comment starter+} (the separate +regex for a single slash must also be given, else the slash would be marked as +an error token). + +Inside the ``'comment'`` state, we do the same thing again. Scan until the +lexer finds a star or slash. If it's the opening of a multiline comment, push +the ``'comment'`` state on the stack and continue scanning, again in the +``'comment'`` state. Else, check if it's the end of the multiline comment. If +yes, pop one state from the stack. + +Note: If you pop from an empty stack you'll get an `IndexError`. (There is an +easy way to prevent this from happening: don't ``'#pop'`` in the root state). + +If the `RegexLexer` encounters a newline that is flagged as an error token, the +stack is emptied and the lexer continues scanning in the ``'root'`` state. This +[-helps-] +{+can help+} producing error-tolerant highlighting for erroneous input, e.g. when a +single-line string is not closed. + + +Advanced state tricks +===================== + +There are a few more things you can do with states: + +- You can push multiple states onto the stack if you give a tuple instead of a + simple string as the third item in a rule tuple. For example, if you want to + match a comment containing a directive, something [-like::-] {+like: + + .. code-block:: text+} + + /* <processing directive> rest of comment */ + + you can use this [-rule: + + .. sourcecode:: python-] {+rule::+} + + tokens = { + 'root': [ + (r'/\* <', Comment, ('comment', 'directive')), + ... + ], + 'directive': [ + (r'[^>]*', Comment.Directive), + (r'>', Comment, '#pop'), + ], + 'comment': [ + (r'[^*]+', Comment), + (r'\*/', Comment, '#pop'), + (r'\*', Comment), + ] + } + + When this encounters the above sample, first ``'comment'`` and ``'directive'`` + are pushed onto the stack, then the lexer continues in the directive state + until it finds the closing ``>``, then it continues in the comment state until + the closing ``*/``. Then, both states are popped from the stack again and + lexing continues in the root state. + + .. versionadded:: 0.9 + The tuple can contain the special ``'#push'`` and ``'#pop'`` (but not + ``'#pop:n'``) directives. + + +- You can include the rules of a state in the definition of another. This is + done by using `include` from [-`pygments.lexer`: + + .. sourcecode:: python-] {+`pygments.lexer`::+} + + from pygments.lexer import RegexLexer, bygroups, include + from pygments.token import * + + class ExampleLexer(RegexLexer): + tokens = { + 'comments': [ + (r'/\*.*?\*/', Comment), + (r'//.*?\n', Comment), + ], + 'root': [ + include('comments'), + (r'(function )(\w+)( {)', + bygroups(Keyword, Name, Keyword), 'function'), + (r'.', Text), + ], + 'function': [ + (r'[^}/]+', Text), + include('comments'), + (r'/', Text), + [-(r'}',-] + {+(r'\}',+} Keyword, '#pop'), + ] + } + + This is a hypothetical lexer for a language that consist of functions and + comments. Because comments can occur at toplevel and in functions, we need + rules for comments in both states. As you can see, the `include` helper saves + repeating rules that occur more than once (in this example, the state + ``'comment'`` will never be entered by the lexer, as it's only there to be + included in ``'root'`` and ``'function'``). + +- Sometimes, you may want to "combine" a state from existing ones. This is + possible with the [-`combine`-] {+`combined`+} helper from `pygments.lexer`. + + If you, instead of a new state, write ``combined('state1', 'state2')`` as the + third item of a rule tuple, a new anonymous state will be formed from state1 + and state2 and if the rule matches, the lexer will enter this state. + + This is not used very often, but can be helpful in some cases, such as the + `PythonLexer`'s string literal processing. + +- If you want your lexer to start lexing in a different state you can modify the + stack by [-overloading-] {+overriding+} the `get_tokens_unprocessed()` [-method: + + .. sourcecode:: python-] {+method::+} + + from pygments.lexer import RegexLexer + + class [-MyLexer(RegexLexer):-] {+ExampleLexer(RegexLexer):+} + tokens = {...} + + def get_tokens_unprocessed(self, [-text): + stack = ['root', 'otherstate']-] {+text, stack=('root', 'otherstate')):+} + for item in RegexLexer.get_tokens_unprocessed(text, stack): + yield item + + Some lexers like the `PhpLexer` use this to make the leading ``<?php`` + preprocessor comments optional. Note that you can crash the lexer easily by + putting values into the stack that don't exist in the token map. Also + removing ``'root'`` from the stack can result in strange errors! + +- [-An-] {+In some lexers, a state should be popped if anything is encountered that isn't + matched by a rule in the state. You could use an+} empty regex at the end of [-a-] + {+the+} state list, [-combined with ``'#pop'``, can + act as-] {+but Pygments provides+} a [-return point-] {+more obvious way of spelling that: + ``default('#pop')`` is equivalent to ``('', Text, '#pop')``. + + .. versionadded:: 2.0 + + +Subclassing lexers derived+} from {+RegexLexer +========================================== + +.. versionadded:: 1.6 + +Sometimes multiple languages are very similar, but should still be lexed by +different lexer classes. + +When subclassing+} a {+lexer derived from RegexLexer, the ``tokens`` dictionaries +defined in the parent and child class are merged. For example:: + + from pygments.lexer import RegexLexer, inherit + from pygments.token import * + + class BaseLexer(RegexLexer): + tokens = { + 'root': [ + ('[a-z]+', Name), + (r'/\*', Comment, 'comment'), + ('"', String, 'string'), + ('\s+', Text), + ], + 'string': [ + ('[^"]+', String), + ('"', String, '#pop'), + ], + 'comment': [ + ... + ], + } + + class DerivedLexer(BaseLexer): + tokens = { + 'root': [ + ('[0-9]+', Number), + inherit, + ], + 'string': [ + (r'[^"\\]+', String), + (r'\\.', String.Escape), + ('"', String, '#pop'), + ], + } + +The `BaseLexer` defines two states, lexing names and strings. The +`DerivedLexer` defines its own tokens dictionary, which extends the definitions +of the base lexer: + +* The "root"+} state {+has an additional rule and then the special object `inherit`, + which tells Pygments to insert the token definitions of the parent class at+} + that [-doesn't have a clear end marker.-] {+point. + +* The "string" state is replaced entirely, since there is not `inherit` rule. + +* The "comment" state is inherited entirely.+} + + +Using multiple lexers +===================== + +Using multiple lexers for the same input can be tricky. One of the easiest +combination techniques is shown here: You can replace the [-token type-] {+action+} entry in a rule +tuple [-(the second item)-] with a lexer class. The matched text will then be lexed with that lexer, +and the resulting tokens will be yielded. + +For example, look at this stripped-down HTML [-lexer: + +.. sourcecode:: python-] {+lexer::+} + + from pygments.lexer import RegexLexer, bygroups, using + from pygments.token import * + from [-pygments.lexers.web-] {+pygments.lexers.javascript+} import JavascriptLexer + + class HtmlLexer(RegexLexer): + name = 'HTML' + aliases = ['html'] + filenames = ['*.html', '*.htm'] + + flags = re.IGNORECASE | re.DOTALL + tokens = { + 'root': [ + ('[^<&]+', Text), + ('&.*?;', Name.Entity), + (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')), + (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'), + (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag), + ], + 'script-content': [ + (r'(.+?)(<\s*/\s*script\s*>)', + bygroups(using(JavascriptLexer), Name.Tag), + '#pop'), + ] + } + +Here the content of a ``<script>`` tag is passed to a newly created instance of +a `JavascriptLexer` and not processed by the `HtmlLexer`. This is done using +the `using` helper that takes the other lexer class as its parameter. + +Note the combination of `bygroups` and `using`. This makes sure that the +content up to the ``</script>`` end tag is processed by the `JavascriptLexer`, +while the end tag is yielded as a normal token with the `Name.Tag` type. + +[-As an additional goodie, if the lexer class is replaced by `this` (imported from +`pygments.lexer`), the "other" lexer will be the current one (because you cannot +refer to the current class within the code that runs at class definition time).-] + +Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule. +Here, two states are pushed onto the state stack, ``'script-content'`` and +``'tag'``. That means that first ``'tag'`` is processed, which will [-parse-] {+lex+} +attributes and the closing ``>``, then the ``'tag'`` state is popped and the +next state on top of the stack will be ``'script-content'``. + +{+Since you cannot refer to the class currently being defined, use `this` +(imported from `pygments.lexer`) to refer to the current lexer class, i.e. +``using(this)``. This construct may seem unnecessary, but this is often the +most obvious way of lexing arbitrary syntax between fixed delimiters without +introducing deeply nested states.+} + +The `using()` helper has a special keyword argument, `state`, which works as +follows: if given, the lexer to use initially is not in the ``"root"`` state, +but in the state given by this argument. This [-*only* works-] {+does not work+} with [-a `RegexLexer`.-] {+advanced +`RegexLexer` subclasses such as `ExtendedRegexLexer` (see below).+} + +Any other keywords arguments passed to `using()` are added to the keyword +arguments used to create the lexer. + + +Delegating Lexer +================ + +Another approach for nested lexers is the `DelegatingLexer` which is for example +used for the template engine lexers. It takes two lexers as arguments on +initialisation: a `root_lexer` and a `language_lexer`. + +The input is processed as follows: First, the whole text is lexed with the +`language_lexer`. All tokens yielded with [-a-] {+the special+} type of ``Other`` are +then concatenated and given to the `root_lexer`. The language tokens of the +`language_lexer` are then inserted into the `root_lexer`'s token stream at the +appropriate positions. + +[-.. sourcecode:: python-] {+::+} + + from pygments.lexer import DelegatingLexer + from pygments.lexers.web import HtmlLexer, PhpLexer + + class HtmlPhpLexer(DelegatingLexer): + def __init__(self, **options): + super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options) + +This procedure ensures that e.g. HTML with template tags in it is highlighted +correctly even if the template tags are put into HTML tags or attributes. + +If you want to change the needle token ``Other`` to something else, you can give +the lexer another token type as the third [-parameter: + +.. sourcecode:: python-] {+parameter::+} + + DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options) + + +Callbacks +========= + +Sometimes the grammar of a language is so complex that a lexer would be unable +to [-parse-] {+process+} it just by using regular expressions and stacks. + +For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead +of token types (`bygroups` and `using` are nothing else but preimplemented +callbacks). The callback must be a function taking two arguments: + +* the lexer itself +* the match object for the last matched rule + +The callback must then return an iterable of (or simply yield) ``(index, +tokentype, value)`` tuples, which are then just passed through by +`get_tokens_unprocessed()`. The ``index`` here is the position of the token in +the input string, ``tokentype`` is the normal token type (like `Name.Builtin`), +and ``value`` the associated part of the input string. + +You can see an example [-here: + +.. sourcecode:: python-] {+here::+} + + from pygments.lexer import RegexLexer + from pygments.token import Generic + + class HypotheticLexer(RegexLexer): + + def headline_callback(lexer, match): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +If the regex for the `headline_callback` matches, the function is called with +the match object. Note that after the callback is done, processing continues +normally, that is, after the end of the previous match. The callback has no +possibility to influence the position. + +There are not really any simple examples for lexer callbacks, but you can see +them in action e.g. in the [-`compiled.py`_ source code-] {+`SMLLexer` class+} in [-the `CLexer` and +`JavaLexer` classes.-] {+`ml.py`_.+} + +.. [-_compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py-] {+_ml.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ml.py+} + + +The ExtendedRegexLexer class +============================ + +The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for +the funky syntax rules of [-some-] languages [-that will go unnamed,-] such as Ruby. + +But fear not; even then you don't have to abandon the regular expression +[-approach. For-] +{+approach:+} Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`. +All features known from RegexLexers are available here too, and the tokens are +specified in exactly the same way, *except* for one detail: + +The `get_tokens_unprocessed()` method holds its internal state data not as local +variables, but in an instance of the `pygments.lexer.LexerContext` class, and +that instance is passed to callbacks as a third argument. This means that you +can modify the lexer state in callbacks. + +The `LexerContext` class has the following members: + +* `text` -- the input text +* `pos` -- the current starting position that is used for matching regexes +* `stack` -- a list containing the state stack +* `end` -- the maximum position to which regexes are matched, this defaults to + the length of `text` + +Additionally, the `get_tokens_unprocessed()` method can be given a +`LexerContext` instead of a string and will then process this context instead of +creating a new one for the string argument. + +Note that because you can set the current position to anything in the callback, +it won't be automatically be set by the caller after the callback is finished. +For example, this is how the hypothetical lexer above would be written with the +[-`ExtendedRegexLexer`: + +.. sourcecode:: python-] +{+`ExtendedRegexLexer`::+} + + from pygments.lexer import ExtendedRegexLexer + from pygments.token import Generic + + class ExHypotheticLexer(ExtendedRegexLexer): + + def headline_callback(lexer, match, ctx): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + ctx.pos = match.end() + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +This might sound confusing (and it can really be). But it is needed, and for an +example look at the Ruby lexer in [-`agile.py`_.-] {+`ruby.py`_.+} + +.. [-_agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py + + +Filtering-] {+_ruby.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ruby.py + + +Handling Lists of Keywords +========================== + +For a relatively short list (hundreds) you can construct an optimized regular +expression directly using ``words()`` (longer lists, see next section). This +function handles a few things for you automatically, including escaping +metacharacters and Python's first-match rather than longest-match in +alternations. Feel free to put the lists themselves in +``pygments/lexers/_$lang_builtins.py`` (see examples there), and generated by +code if possible. + +An example of using ``words()`` is something like:: + + from pygments.lexer import RegexLexer, words, Name + + class MyLexer(RegexLexer): + + tokens = { + 'root': [ + (words(('else', 'elseif'), suffix=r'\b'), Name.Builtin), + (r'\w+', Name), + ], + } + +As you can see, you can add ``prefix`` and ``suffix`` parts to the constructed +regex. + + +Modifying+} Token Streams +======================= + +Some languages ship a lot of builtin functions (for example PHP). The total +amount of those functions differs from system to system because not everybody +has every extension installed. In the case of PHP there are over 3000 builtin +functions. That's an [-incredible-] {+incredibly+} huge amount of functions, much more than you +[-can-] +{+want to+} put into a regular expression. + +But because only `Name` tokens can be function names [-it's-] {+this is+} solvable by +overriding the ``get_tokens_unprocessed()`` method. The following lexer +subclasses the `PythonLexer` so that it highlights some additional names as +pseudo [-keywords: + +.. sourcecode:: python-] {+keywords::+} + + from [-pygments.lexers.agile-] {+pygments.lexers.python+} import PythonLexer + from pygments.token import Name, Keyword + + class MyPythonLexer(PythonLexer): + EXTRA_KEYWORDS = [-['foo',-] {+set(('foo',+} 'bar', 'foobar', 'barfoo', 'spam', [-'eggs']-] {+'eggs'))+} + + def get_tokens_unprocessed(self, text): + for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): + if token is Name and value in self.EXTRA_KEYWORDS: + yield index, Keyword.Pseudo, value + else: + yield index, token, value + +The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions. + +[-.. note:: Do not confuse this with the :doc:`filter <filters>` system.-] diff --git a/tests/examplefiles/wdiff_example3.wdiff b/tests/examplefiles/wdiff_example3.wdiff new file mode 100644 index 00000000..0bbd6d65 --- /dev/null +++ b/tests/examplefiles/wdiff_example3.wdiff @@ -0,0 +1,10 @@ +This example is unbalanced open-close. +We can't treat these easily. + +{+ added? -] +[- deleted? +} + +suddenly closed -] +suddenly closed +} + +{+ added? [- deleted? diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py index bb667c05..90d05ef8 100644 --- a/tests/test_lexers_other.py +++ b/tests/test_lexers_other.py @@ -13,6 +13,7 @@ import unittest from pygments.lexers import guess_lexer from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer + def _exampleFilePath(filename): return os.path.join(os.path.dirname(__file__), 'examplefiles', filename) @@ -28,8 +29,8 @@ class AnalyseTextTest(unittest.TestCase): text = fp.read().decode('utf-8') probability = lexer.analyse_text(text) self.assertTrue(probability > 0, - '%s must recognize %r' % ( - lexer.name, exampleFilePath)) + '%s must recognize %r' % ( + lexer.name, exampleFilePath)) guessedLexer = guess_lexer(text) self.assertEqual(guessedLexer.name, lexer.name) @@ -45,25 +46,24 @@ class AnalyseTextTest(unittest.TestCase): class EasyTrieveLexerTest(unittest.TestCase): def testCanGuessFromText(self): - self.assertLess(0, EasytrieveLexer.analyse_text('MACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text( + self.assertTrue(EasytrieveLexer.analyse_text('MACRO')) + self.assertTrue(EasytrieveLexer.analyse_text('\nMACRO')) + self.assertTrue(EasytrieveLexer.analyse_text(' \nMACRO')) + self.assertTrue(EasytrieveLexer.analyse_text(' \n MACRO')) + self.assertTrue(EasytrieveLexer.analyse_text('*\nMACRO')) + self.assertTrue(EasytrieveLexer.analyse_text( '*\n *\n\n \n*\n MACRO')) class RexxLexerTest(unittest.TestCase): def testCanGuessFromText(self): - self.assertAlmostEqual(0.01, - RexxLexer.analyse_text('/* */')) + self.assertAlmostEqual(0.01, RexxLexer.analyse_text('/* */')) self.assertAlmostEqual(1.0, - RexxLexer.analyse_text('''/* Rexx */ + RexxLexer.analyse_text('''/* Rexx */ say "hello world"''')) val = RexxLexer.analyse_text('/* */\n' - 'hello:pRoceduRe\n' - ' say "hello world"') + 'hello:pRoceduRe\n' + ' say "hello world"') self.assertTrue(val > 0.5, val) val = RexxLexer.analyse_text('''/* */ if 1 > 0 then do diff --git a/tests/test_terminal_formatter.py b/tests/test_terminal_formatter.py index 07337cd5..cb5c6c44 100644 --- a/tests/test_terminal_formatter.py +++ b/tests/test_terminal_formatter.py @@ -14,7 +14,13 @@ import re from pygments.util import StringIO from pygments.lexers.sql import PlPgsqlLexer -from pygments.formatters import TerminalFormatter +from pygments.formatters import TerminalFormatter, Terminal256Formatter, \ + HtmlFormatter, LatexFormatter + +from pygments.style import Style +from pygments.token import Token +from pygments.lexers import Python3Lexer +from pygments import highlight DEMO_TEXT = '''\ -- comment @@ -26,9 +32,11 @@ DEMO_TOKENS = list(DEMO_LEXER().get_tokens(DEMO_TEXT)) ANSI_RE = re.compile(r'\x1b[\w\W]*?m') + def strip_ansi(x): return ANSI_RE.sub('', x) + class TerminalFormatterTest(unittest.TestCase): def test_reasonable_output(self): out = StringIO() @@ -49,3 +57,46 @@ class TerminalFormatterTest(unittest.TestCase): for a, b in zip(DEMO_TEXT.splitlines(), plain.splitlines()): self.assertTrue(a in b) + + +class MyStyle(Style): + styles = { + Token.Comment: '#ansidarkgray', + Token.String: '#ansiblue bg:#ansidarkred', + Token.Number: '#ansigreen bg:#ansidarkgreen', + Token.Number.Hex: '#ansidarkgreen bg:#ansired', + } + + +class Terminal256FormatterTest(unittest.TestCase): + code = ''' +# this should be a comment +print("Hello World") +async def function(a,b,c, *d, **kwarg:Bool)->Bool: + pass + return 123, 0xb3e3 + +''' + + def test_style_html(self): + style = HtmlFormatter(style=MyStyle).get_style_defs() + self.assertTrue('#555555' in style, + "ansigray for comment not html css style") + + def test_others_work(self): + """check other formatters don't crash""" + highlight(self.code, Python3Lexer(), LatexFormatter(style=MyStyle)) + highlight(self.code, Python3Lexer(), HtmlFormatter(style=MyStyle)) + + def test_256esc_seq(self): + """ + test that a few escape sequences are actualy used when using #ansi<> color codes + """ + def termtest(x): + return highlight(x, Python3Lexer(), + Terminal256Formatter(style=MyStyle)) + + self.assertTrue('32;41' in termtest('0x123')) + self.assertTrue('32;42' in termtest('123')) + self.assertTrue('30;01' in termtest('#comment')) + self.assertTrue('34;41' in termtest('"String"')) |