summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2016-02-17 09:35:28 +0100
committerGeorg Brandl <georg@python.org>2016-02-17 09:35:28 +0100
commitd86bc7cb6ebe5193ec1217b0cfa3a6b3164caa5a (patch)
tree78b9a0ad7093c970b641040d480e2c0534715fe9
parent0331bdc908b1b3529c5603b739744e65eaf8a9df (diff)
parentb52b5fd49d18cd8da1fd5acc0712db848256b1bd (diff)
downloadpygments-d86bc7cb6ebe5193ec1217b0cfa3a6b3164caa5a.tar.gz
Merged in cheesinglee/pygments-main (pull request #551)
Add Flatline lexer
-rw-r--r--AUTHORS6
-rw-r--r--CHANGES16
-rw-r--r--doc/docs/lexerdevelopment.rst18
-rw-r--r--doc/docs/styles.rst56
-rw-r--r--doc/docs/tokens.rst8
-rw-r--r--pygments/__init__.py22
-rw-r--r--pygments/console.py24
-rw-r--r--pygments/filter.py8
-rw-r--r--pygments/formatter.py2
-rw-r--r--pygments/formatters/terminal256.py36
-rw-r--r--pygments/lexer.py4
-rw-r--r--pygments/lexers/_mapping.py4
-rw-r--r--pygments/lexers/algebra.py6
-rw-r--r--pygments/lexers/ampl.py63
-rw-r--r--pygments/lexers/business.py106
-rw-r--r--pygments/lexers/c_cpp.py19
-rw-r--r--pygments/lexers/clean.py275
-rw-r--r--pygments/lexers/configs.py32
-rw-r--r--pygments/lexers/csound.py30
-rw-r--r--pygments/lexers/css.py6
-rw-r--r--pygments/lexers/diff.py61
-rw-r--r--pygments/lexers/dotnet.py45
-rw-r--r--pygments/lexers/dsls.py6
-rw-r--r--pygments/lexers/elm.py14
-rw-r--r--pygments/lexers/erlang.py8
-rw-r--r--pygments/lexers/esoteric.py4
-rw-r--r--pygments/lexers/felix.py2
-rw-r--r--pygments/lexers/fortran.py9
-rw-r--r--pygments/lexers/haskell.py2
-rw-r--r--pygments/lexers/idl.py13
-rw-r--r--pygments/lexers/j.py2
-rw-r--r--pygments/lexers/javascript.py80
-rw-r--r--pygments/lexers/julia.py48
-rw-r--r--pygments/lexers/jvm.py4
-rw-r--r--pygments/lexers/lisp.py83
-rw-r--r--pygments/lexers/modula2.py2
-rw-r--r--pygments/lexers/oberon.py6
-rw-r--r--pygments/lexers/parasail.py2
-rw-r--r--pygments/lexers/perl.py3
-rw-r--r--pygments/lexers/php.py4
-rw-r--r--pygments/lexers/praat.py34
-rw-r--r--pygments/lexers/python.py64
-rw-r--r--pygments/lexers/qvt.py84
-rw-r--r--pygments/lexers/rdf.py10
-rw-r--r--pygments/lexers/ruby.py8
-rw-r--r--pygments/lexers/scripting.py21
-rw-r--r--pygments/lexers/sql.py45
-rw-r--r--pygments/lexers/supercollider.py8
-rw-r--r--pygments/lexers/testing.py2
-rw-r--r--pygments/lexers/theorem.py31
-rw-r--r--pygments/lexers/typoscript.py62
-rw-r--r--pygments/lexers/varnish.py190
-rw-r--r--pygments/scanner.py3
-rw-r--r--pygments/sphinxext.py1
-rw-r--r--pygments/style.py41
-rw-r--r--pygments/styles/arduino.py2
-rw-r--r--pygments/styles/lovelace.py2
-rw-r--r--pygments/token.py33
-rwxr-xr-xsetup.py2
-rw-r--r--tests/examplefiles/StdGeneric.icl92
-rw-r--r--tests/examplefiles/example2.cpp20
-rw-r--r--tests/examplefiles/postgresql_test.txt34
-rw-r--r--tests/examplefiles/test.php5
-rw-r--r--tests/examplefiles/varnish.vcl187
-rw-r--r--tests/examplefiles/wdiff_example1.wdiff731
-rw-r--r--tests/examplefiles/wdiff_example3.wdiff10
-rw-r--r--tests/test_lexers_other.py26
-rw-r--r--tests/test_terminal_formatter.py53
68 files changed, 2409 insertions, 531 deletions
diff --git a/AUTHORS b/AUTHORS
index aeb63ea1..68004c89 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -33,6 +33,7 @@ Other contributors, listed alphabetically, are:
* Adam Blinkinsop -- Haskell, Redcode lexers
* Frits van Bommel -- assembler lexers
* Pierre Bourdon -- bugfixes
+* Matthias Bussonnier -- ANSI style handling for terminal-256 formatter
* chebee7i -- Python traceback lexer improvements
* Hiram Chirino -- Scaml and Jade lexers
* Ian Cooper -- VGL lexer
@@ -88,7 +89,8 @@ Other contributors, listed alphabetically, are:
* Tim Howard -- BlitzMax lexer
* Dustin Howett -- Logos lexer
* Ivan Inozemtsev -- Fantom lexer
-* Hiroaki Itoh -- Shell console rewrite, Lexers for PowerShell session, MSDOS session, BC
+* Hiroaki Itoh -- Shell console rewrite, Lexers for PowerShell session,
+ MSDOS session, BC, WDiff
* Brian R. Jackson -- Tea lexer
* Christian Jann -- ShellSession lexer
* Dennis Kaarsemaker -- sources.list lexer
@@ -110,6 +112,7 @@ Other contributors, listed alphabetically, are:
* Jon Larimer, Google Inc. -- Smali lexer
* Olov Lassus -- Dart lexer
* Matt Layman -- TAP lexer
+* Kristian Lyngstøl -- Varnish lexers
* Sylvestre Ledru -- Scilab lexer
* Chee Sing Lee -- Flatline lexer
* Mark Lee -- Vala lexer
@@ -173,6 +176,7 @@ Other contributors, listed alphabetically, are:
* Alexander Smishlajev -- Visual FoxPro lexer
* Steve Spigarelli -- XQuery lexer
* Jerome St-Louis -- eC lexer
+* Camil Staps -- Clean lexer
* James Strachan -- Kotlin lexer
* Tom Stuart -- Treetop lexer
* Colin Sullivan -- SuperCollider lexer
diff --git a/CHANGES b/CHANGES
index e88582c8..edf4e0b6 100644
--- a/CHANGES
+++ b/CHANGES
@@ -15,16 +15,28 @@ Version 2.2
* AMPL
* TypoScript (#1173)
+ * Varnish config (PR#554)
+ * Clean (PR#503)
+ * WDiff (PR#513)
-- Added `lexers.find_lexer_class_by_name()` (#1203)
+- Added `lexers.find_lexer_class_by_name()`. (#1203)
- Added new token types and lexing for magic methods and variables in Python
and PHP.
+- Added a new token type for string affixes and lexing for them in Python, C++
+ and Postgresql lexers.
+
+- Added a new token type for heredoc (and similar) string delimiters and
+ lexing for them in C++, Perl, PHP, Postgresql and Ruby lexers.
+
+- Styles can now define colors with ANSI colors for use in the 256-color
+ terminal formatter. (PR#531)
+
Version 2.1.1
-------------
-(in development)
+(relased Feb 14, 2016)
- Fixed Jython compatibility (#1205)
- Fixed HTML formatter output with leading empty lines (#1111)
diff --git a/doc/docs/lexerdevelopment.rst b/doc/docs/lexerdevelopment.rst
index 2c868440..fd6e76b9 100644
--- a/doc/docs/lexerdevelopment.rst
+++ b/doc/docs/lexerdevelopment.rst
@@ -88,10 +88,16 @@ one.
Adding and testing a new lexer
==============================
-To make Pygments aware of your new lexer, you have to perform the following
-steps:
+Using a lexer that is not part of Pygments can be done via the Python API. You
+can import and instantiate the lexer, and pass it to :func:`pygments.highlight`.
-First, change to the current directory containing the Pygments source code:
+To prepare your new lexer for inclusion in the Pygments distribution, so that it
+will be found when passing filenames or lexer aliases from the command line, you
+have to perform the following steps.
+
+First, change to the current directory containing the Pygments source code. You
+will need to have either an unpacked source tarball, or (preferably) a copy
+cloned from BitBucket.
.. code-block:: console
@@ -101,11 +107,13 @@ Select a matching module under ``pygments/lexers``, or create a new module for
your lexer class.
Next, make sure the lexer is known from outside of the module. All modules in
-the ``pygments.lexers`` specify ``__all__``. For example, ``esoteric.py`` sets::
+the ``pygments.lexers`` package specify ``__all__``. For example,
+``esoteric.py`` sets::
__all__ = ['BrainfuckLexer', 'BefungeLexer', ...]
-Simply add the name of your lexer class to this list.
+Add the name of your lexer class to this list (or create the list if your lexer
+is the only class in the module).
Finally the lexer can be made publicly known by rebuilding the lexer mapping:
diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst
index d56db0db..1094a270 100644
--- a/doc/docs/styles.rst
+++ b/doc/docs/styles.rst
@@ -143,3 +143,59 @@ a way to iterate over all styles:
>>> from pygments.styles import get_all_styles
>>> styles = list(get_all_styles())
+
+
+.. _AnsiTerminalStyle:
+
+Terminal Styles
+===============
+
+.. versionadded:: 2.2
+
+Custom styles used with the 256-color terminal formatter can also map colors to
+use the 8 default ANSI colors. To do so, use ``#ansigreen``, ``#ansired`` or
+any other colors defined in :attr:`pygments.style.ansicolors`. Foreground ANSI
+colors will be mapped to the corresponding `escape codes 30 to 37
+<https://en.wikipedia.org/wiki/ANSI_escape_code#Colors>`_ thus respecting any
+custom color mapping and themes provided by many terminal emulators. Light
+variants are treated as foreground color with and an added bold flag.
+``bg:#ansi<color>`` will also be respected, except the light variant will be the
+same shade as their dark variant.
+
+See the following example where the color of the string ``"hello world"`` is
+governed by the escape sequence ``\x1b[34;01m`` (Ansi Blue, Bold, 41 being red
+background) instead of an extended foreground & background color.
+
+.. sourcecode:: pycon
+
+ >>> from pygments import highlight
+ >>> from pygments.style import Style
+ >>> from pygments.token import Token
+ >>> from pygments.lexers import Python3Lexer
+ >>> from pygments.formatters import Terminal256Formatter
+
+ >>> class MyStyle(Style):
+ styles = {
+ Token.String: '#ansiblue bg:#ansired',
+ }
+
+ >>> code = 'print("Hello World")'
+ >>> result = highlight(code, Python3Lexer(), Terminal256Formatter(style=MyStyle))
+ >>> print(result.encode())
+ b'\x1b[34;41;01m"\x1b[39;49;00m\x1b[34;41;01mHello World\x1b[39;49;00m\x1b[34;41;01m"\x1b[39;49;00m'
+
+Colors specified using ``#ansi*`` are converted to a default set of RGB colors
+when used with formatters other than the terminal-256 formatter.
+
+By definition of ANSI, the following colors are considered "light" colors, and
+will be rendered by most terminals as bold:
+
+- "darkgray", "red", "green", "yellow", "blue", "fuchsia", "turquoise", "white"
+
+The following are considered "dark" colors and will be rendered as non-bold:
+
+- "black", "darkred", "darkgreen", "brown", "darkblue", "purple", "teal",
+ "lightgray"
+
+Exact behavior might depends on the terminal emulator you are using, and its
+settings.
diff --git a/doc/docs/tokens.rst b/doc/docs/tokens.rst
index 96a6d003..801fc638 100644
--- a/doc/docs/tokens.rst
+++ b/doc/docs/tokens.rst
@@ -223,12 +223,20 @@ Literals
`String`
For any string literal.
+`String.Affix`
+ Token type for affixes that further specify the type of the string they're
+ attached to (e.g. the prefixes ``r`` and ``u8`` in ``r"foo"`` and ``u8"foo"``).
+
`String.Backtick`
Token type for strings enclosed in backticks.
`String.Char`
Token type for single characters (e.g. Java, C).
+`String.Delimiter`
+ Token type for delimiting identifiers in "heredoc", raw and other similar
+ strings (e.g. the word ``END`` in Perl code ``print <<'END';``).
+
`String.Doc`
Token type for documentation strings (for example Python).
diff --git a/pygments/__init__.py b/pygments/__init__.py
index 7bd7557f..ffac59ef 100644
--- a/pygments/__init__.py
+++ b/pygments/__init__.py
@@ -25,18 +25,16 @@
:copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
+import sys
+
+from pygments.util import StringIO, BytesIO
-__version__ = '2.1'
+__version__ = '2.2a0'
__docformat__ = 'restructuredtext'
__all__ = ['lex', 'format', 'highlight']
-import sys
-
-from pygments.util import StringIO, BytesIO
-
-
def lex(code, lexer):
"""
Lex ``code`` with ``lexer`` and return an iterable of tokens.
@@ -44,9 +42,9 @@ def lex(code, lexer):
try:
return lexer.get_tokens(code)
except TypeError as err:
- if isinstance(err.args[0], str) and \
- ('unbound method get_tokens' in err.args[0] or
- 'missing 1 required positional argument' in err.args[0]):
+ if (isinstance(err.args[0], str) and
+ ('unbound method get_tokens' in err.args[0] or
+ 'missing 1 required positional argument' in err.args[0])):
raise TypeError('lex() argument must be a lexer instance, '
'not a class')
raise
@@ -68,9 +66,9 @@ def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builti
else:
formatter.format(tokens, outfile)
except TypeError as err:
- if isinstance(err.args[0], str) and \
- ('unbound method format' in err.args[0] or
- 'missing 1 required positional argument' in err.args[0]):
+ if (isinstance(err.args[0], str) and
+ ('unbound method format' in err.args[0] or
+ 'missing 1 required positional argument' in err.args[0])):
raise TypeError('format() argument must be a formatter instance, '
'not a class')
raise
diff --git a/pygments/console.py b/pygments/console.py
index 4a2c9acb..4aaf5fcb 100644
--- a/pygments/console.py
+++ b/pygments/console.py
@@ -12,18 +12,18 @@
esc = "\x1b["
codes = {}
-codes[""] = ""
-codes["reset"] = esc + "39;49;00m"
+codes[""] = ""
+codes["reset"] = esc + "39;49;00m"
-codes["bold"] = esc + "01m"
-codes["faint"] = esc + "02m"
-codes["standout"] = esc + "03m"
+codes["bold"] = esc + "01m"
+codes["faint"] = esc + "02m"
+codes["standout"] = esc + "03m"
codes["underline"] = esc + "04m"
-codes["blink"] = esc + "05m"
-codes["overline"] = esc + "06m"
+codes["blink"] = esc + "05m"
+codes["overline"] = esc + "06m"
-dark_colors = ["black", "darkred", "darkgreen", "brown", "darkblue",
- "purple", "teal", "lightgray"]
+dark_colors = ["black", "darkred", "darkgreen", "brown", "darkblue",
+ "purple", "teal", "lightgray"]
light_colors = ["darkgray", "red", "green", "yellow", "blue",
"fuchsia", "turquoise", "white"]
@@ -35,10 +35,10 @@ for d, l in zip(dark_colors, light_colors):
del d, l, x
-codes["darkteal"] = codes["turquoise"]
+codes["darkteal"] = codes["turquoise"]
codes["darkyellow"] = codes["brown"]
-codes["fuscia"] = codes["fuchsia"]
-codes["white"] = codes["bold"]
+codes["fuscia"] = codes["fuchsia"]
+codes["white"] = codes["bold"]
def reset_color():
diff --git a/pygments/filter.py b/pygments/filter.py
index c8176ed9..f3082037 100644
--- a/pygments/filter.py
+++ b/pygments/filter.py
@@ -34,10 +34,10 @@ def simplefilter(f):
yield ttype, value.lower()
"""
return type(f.__name__, (FunctionFilter,), {
- 'function': f,
- '__module__': getattr(f, '__module__'),
- '__doc__': f.__doc__
- })
+ '__module__': getattr(f, '__module__'),
+ '__doc__': f.__doc__,
+ 'function': f,
+ })
class Filter(object):
diff --git a/pygments/formatter.py b/pygments/formatter.py
index addd07d7..9f22b3bc 100644
--- a/pygments/formatter.py
+++ b/pygments/formatter.py
@@ -65,7 +65,7 @@ class Formatter(object):
def __init__(self, **options):
self.style = _lookup_style(options.get('style', 'default'))
- self.full = get_bool_opt(options, 'full', False)
+ self.full = get_bool_opt(options, 'full', False)
self.title = options.get('title', '')
self.encoding = options.get('encoding', None) or None
if self.encoding in ('guess', 'chardet'):
diff --git a/pygments/formatters/terminal256.py b/pygments/formatters/terminal256.py
index af311955..5110bc9e 100644
--- a/pygments/formatters/terminal256.py
+++ b/pygments/formatters/terminal256.py
@@ -27,6 +27,8 @@
import sys
from pygments.formatter import Formatter
+from pygments.console import codes
+from pygments.style import ansicolors
__all__ = ['Terminal256Formatter', 'TerminalTrueColorFormatter']
@@ -47,9 +49,21 @@ class EscapeSequence:
def color_string(self):
attrs = []
if self.fg is not None:
- attrs.extend(("38", "5", "%i" % self.fg))
+ if self.fg in ansicolors:
+ esc = codes[self.fg[5:]]
+ if ';01m' in esc:
+ self.bold = True
+ # extract fg color code.
+ attrs.append(esc[2:4])
+ else:
+ attrs.extend(("38", "5", "%i" % self.fg))
if self.bg is not None:
- attrs.extend(("48", "5", "%i" % self.bg))
+ if self.bg in ansicolors:
+ esc = codes[self.bg[5:]]
+ # extract fg color code, add 10 for bg.
+ attrs.append(str(int(esc[2:4])+10))
+ else:
+ attrs.extend(("48", "5", "%i" % self.bg))
if self.bold:
attrs.append("01")
if self.underline:
@@ -91,6 +105,11 @@ class Terminal256Formatter(Formatter):
.. versionadded:: 0.9
+ .. versionchanged:: 2.2
+ If the used style defines foreground colors in the form ``#ansi*``, then
+ `Terminal256Formatter` will map these to non extended foreground color.
+ See :ref:`AnsiTerminalStyle` for more information.
+
Options accepted:
`style`
@@ -169,6 +188,10 @@ class Terminal256Formatter(Formatter):
def _color_index(self, color):
index = self.best_match.get(color, None)
+ if color in ansicolors:
+ # strip the `#ansi` part and look up code
+ index = color
+ self.best_match[color] = index
if index is None:
try:
rgb = int(str(color), 16)
@@ -185,9 +208,14 @@ class Terminal256Formatter(Formatter):
def _setup_styles(self):
for ttype, ndef in self.style:
escape = EscapeSequence()
- if ndef['color']:
+ # get foreground from ansicolor if set
+ if ndef['ansicolor']:
+ escape.fg = self._color_index(ndef['ansicolor'])
+ elif ndef['color']:
escape.fg = self._color_index(ndef['color'])
- if ndef['bgcolor']:
+ if ndef['bgansicolor']:
+ escape.bg = self._color_index(ndef['bgansicolor'])
+ elif ndef['bgcolor']:
escape.bg = self._color_index(ndef['bgcolor'])
if self.usebold and ndef['bold']:
escape.bold = True
diff --git a/pygments/lexer.py b/pygments/lexer.py
index dd6c01e4..f16d8106 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -319,8 +319,8 @@ def bygroups(*args):
if data is not None:
if ctx:
ctx.pos = match.start(i + 1)
- for item in action(
- lexer, _PseudoMatch(match.start(i + 1), data), ctx):
+ for item in action(lexer,
+ _PseudoMatch(match.start(i + 1), data), ctx):
if item:
yield item
if ctx:
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
index debb7de5..8650b7e4 100644
--- a/pygments/lexers/_mapping.py
+++ b/pygments/lexers/_mapping.py
@@ -79,6 +79,7 @@ LEXERS = {
'CheetahXmlLexer': ('pygments.lexers.templates', 'XML+Cheetah', ('xml+cheetah', 'xml+spitfire'), (), ('application/xml+cheetah', 'application/xml+spitfire')),
'CirruLexer': ('pygments.lexers.webmisc', 'Cirru', ('cirru',), ('*.cirru',), ('text/x-cirru',)),
'ClayLexer': ('pygments.lexers.c_like', 'Clay', ('clay',), ('*.clay',), ('text/x-clay',)),
+ 'CleanLexer': ('pygments.lexers.clean', 'Clean', ('clean',), ('*.icl', '*.dcl'), ()),
'ClojureLexer': ('pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')),
'ClojureScriptLexer': ('pygments.lexers.jvm', 'ClojureScript', ('clojurescript', 'cljs'), ('*.cljs',), ('text/x-clojurescript', 'application/x-clojurescript')),
'CobolFreeformatLexer': ('pygments.lexers.business', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()),
@@ -397,6 +398,8 @@ LEXERS = {
'TypoScriptHtmlDataLexer': ('pygments.lexers.typoscript', 'TypoScriptHtmlData', ('typoscripthtmldata',), (), ()),
'TypoScriptLexer': ('pygments.lexers.typoscript', 'TypoScript', ('typoscript',), ('*.ts', '*.txt'), ('text/x-typoscript',)),
'UrbiscriptLexer': ('pygments.lexers.urbi', 'UrbiScript', ('urbiscript',), ('*.u',), ('application/x-urbiscript',)),
+ 'VCLLexer': ('pygments.lexers.varnish', 'VCL', ('vcl',), ('*.vcl',), ('text/x-vclsrc',)),
+ 'VCLSnippetLexer': ('pygments.lexers.varnish', 'VCLSnippets', ('vclsnippets', 'vclsnippet'), (), ('text/x-vclsnippet',)),
'VCTreeStatusLexer': ('pygments.lexers.console', 'VCTreeStatus', ('vctreestatus',), (), ()),
'VGLLexer': ('pygments.lexers.dsls', 'VGL', ('vgl',), ('*.rpf',), ()),
'ValaLexer': ('pygments.lexers.c_like', 'Vala', ('vala', 'vapi'), ('*.vala', '*.vapi'), ('text/x-vala',)),
@@ -408,6 +411,7 @@ LEXERS = {
'VerilogLexer': ('pygments.lexers.hdl', 'verilog', ('verilog', 'v'), ('*.v',), ('text/x-verilog',)),
'VhdlLexer': ('pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)),
'VimLexer': ('pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)),
+ 'WDiffLexer': ('pygments.lexers.diff', 'WDiff', ('wdiff',), ('*.wdiff',), ()),
'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)),
'XQueryLexer': ('pygments.lexers.webmisc', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')),
'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ('application/xml+django', 'application/xml+jinja')),
diff --git a/pygments/lexers/algebra.py b/pygments/lexers/algebra.py
index fc54c3c3..79460ad4 100644
--- a/pygments/lexers/algebra.py
+++ b/pygments/lexers/algebra.py
@@ -104,9 +104,9 @@ class MathematicaLexer(RegexLexer):
(r'#\d*', Name.Variable),
(r'([a-zA-Z]+[a-zA-Z0-9]*)', Name),
- (r'-?[0-9]+\.[0-9]*', Number.Float),
- (r'-?[0-9]*\.[0-9]+', Number.Float),
- (r'-?[0-9]+', Number.Integer),
+ (r'-?\d+\.\d*', Number.Float),
+ (r'-?\d*\.\d+', Number.Float),
+ (r'-?\d+', Number.Integer),
(words(operators), Operator),
(words(punctuation), Punctuation),
diff --git a/pygments/lexers/ampl.py b/pygments/lexers/ampl.py
index f57b486f..c3ca80d4 100644
--- a/pygments/lexers/ampl.py
+++ b/pygments/lexers/ampl.py
@@ -9,13 +9,10 @@
:license: BSD, see LICENSE for details.
"""
-import re
-
-from pygments.lexer import RegexLexer, bygroups, using, this
+from pygments.lexer import RegexLexer, bygroups, using, this, words
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation
-
__all__ = ['AmplLexer']
@@ -30,27 +27,30 @@ class AmplLexer(RegexLexer):
filenames = ['*.run']
tokens = {
- 'root':[
+ 'root': [
(r'\n', Text),
(r'\s+', Text.Whitespace),
(r'#.*?\n', Comment.Single),
(r'/[*](.|\n)*?[*]/', Comment.Multiline),
- (r'(call|cd|close|commands|data|delete|display|drop|end|environ|'
- r'exit|expand|include|load|model|objective|option|problem|purge|'
- r'quit|redeclare|reload|remove|reset|restore|shell|show|solexpand|'
- r'solution|solve|update|unload|xref|'
- r'coeff|coef|cover|obj|interval|'
- r'default|from|to|to_come|net_in|net_out|dimen|dimension|'
- r'check|complements|write|end|function|pipe|'
- r'format|if|then|else|in|while|repeat|for)\b', Keyword.Reserved),
- (r'(integer|binary|symbolic|ordered|circular|reversed|IN|INOUT|OUT|LOCAL)',
+ (words((
+ 'call', 'cd', 'close', 'commands', 'data', 'delete', 'display',
+ 'drop', 'end', 'environ', 'exit', 'expand', 'include', 'load',
+ 'model', 'objective', 'option', 'problem', 'purge', 'quit',
+ 'redeclare', 'reload', 'remove', 'reset', 'restore', 'shell',
+ 'show', 'solexpand', 'solution', 'solve', 'update', 'unload',
+ 'xref', 'coeff', 'coef', 'cover', 'obj', 'interval', 'default',
+ 'from', 'to', 'to_come', 'net_in', 'net_out', 'dimen',
+ 'dimension', 'check', 'complements', 'write', 'function',
+ 'pipe', 'format', 'if', 'then', 'else', 'in', 'while', 'repeat',
+ 'for'), suffix=r'\b'), Keyword.Reserved),
+ (r'(integer|binary|symbolic|ordered|circular|reversed|INOUT|IN|OUT|LOCAL)',
Keyword.Type),
(r'\".*?\"', String.Double),
(r'\'.*?\'', String.Single),
(r'[()\[\]{},;:]+', Punctuation),
- (r'\b(\w+)(\.)(astatus|init|init0|lb|lb0|lb1|lb2|lrc|'
- r'lslack|rc|relax|slack|sstatus|status|ub|ub0|ub1|'
- r'ub2|urc|uslack|val)',
+ (r'\b(\w+)(\.)(astatus|init0|init|lb0|lb1|lb2|lb|lrc|'
+ r'lslack|rc|relax|slack|sstatus|status|ub0|ub1|ub2|'
+ r'ub|urc|uslack|val)',
bygroups(Name.Variable, Punctuation, Keyword.Reserved)),
(r'(set|param|var|arc|minimize|maximize|subject to|s\.t\.|subj to|'
r'node|table|suffix|read table|write table)(\s+)(\w+)',
@@ -58,21 +58,26 @@ class AmplLexer(RegexLexer):
(r'(param)(\s*)(:)(\s*)(\w+)(\s*)(:)(\s*)((\w|\s)+)',
bygroups(Keyword.Declaration, Text, Punctuation, Text,
Name.Variable, Text, Punctuation, Text, Name.Variable)),
- (r'(let|fix|unfix)(\s*)(\{.*\}|)(\s*)(\w+)',
+ (r'(let|fix|unfix)(\s*)((?:\{.*\})?)(\s*)(\w+)',
bygroups(Keyword.Declaration, Text, using(this), Text, Name.Variable)),
- (r'\b(abs|acos|acosh|alias|'
- r'asin|asinh|atan|atan2|atanh|ceil|ctime|cos|exp|floor|log|log10|'
- r'max|min|precision|round|sin|sinh|sqrt|tan|tanh|time|trunc|Beta|'
- r'Cauchy|Exponential|Gamma|Irand224|Normal|Normal01|Poisson|Uniform|Uniform01|'
- r'num|num0|ichar|char|length|substr|sprintf|match|sub|gsub|print|printf'
- r'next|nextw|prev|prevw|first|last|ord|ord0|card|arity|indexarity)\b',
- Name.Builtin),
+ (words((
+ 'abs', 'acos', 'acosh', 'alias', 'asin', 'asinh', 'atan', 'atan2',
+ 'atanh', 'ceil', 'ctime', 'cos', 'exp', 'floor', 'log', 'log10',
+ 'max', 'min', 'precision', 'round', 'sin', 'sinh', 'sqrt', 'tan',
+ 'tanh', 'time', 'trunc', 'Beta', 'Cauchy', 'Exponential', 'Gamma',
+ 'Irand224', 'Normal', 'Normal01', 'Poisson', 'Uniform', 'Uniform01',
+ 'num', 'num0', 'ichar', 'char', 'length', 'substr', 'sprintf',
+ 'match', 'sub', 'gsub', 'print', 'printf', 'next', 'nextw', 'prev',
+ 'prevw', 'first', 'last', 'ord', 'ord0', 'card', 'arity',
+ 'indexarity'), prefix=r'\b', suffix=r'\b'), Name.Builtin),
(r'(\+|\-|\*|/|\*\*|=|<=|>=|==|\||\^|<|>|\!|\.\.|:=|\&|\!=|<<|>>)',
Operator),
- (r'(or|exists|forall|and|in|not|within|union|diff|'
- r'difference|symdiff|inter|intersect|intersection|'
- r'cross|setof|by|less|sum|prod|product|div|mod)',
- Keyword.Reserved), #Operator.Name but not enough emphasized with Operator.Name
+ (words((
+ 'or', 'exists', 'forall', 'and', 'in', 'not', 'within', 'union',
+ 'diff', 'difference', 'symdiff', 'inter', 'intersect',
+ 'intersection', 'cross', 'setof', 'by', 'less', 'sum', 'prod',
+ 'product', 'div', 'mod'), suffix=r'\b'),
+ Keyword.Reserved), # Operator.Name but not enough emphasized with that
(r'(\d+\.(?!\.)\d*|\.(?!.)\d+)([eE][+-]?\d+)?', Number.Float),
(r'\d+([eE][+-]?\d+)?', Number.Integer),
(r'[+-]?Infinity', Number.Integer),
diff --git a/pygments/lexers/business.py b/pygments/lexers/business.py
index ea888245..43978690 100644
--- a/pygments/lexers/business.py
+++ b/pygments/lexers/business.py
@@ -57,9 +57,9 @@ class CobolLexer(RegexLexer):
],
'core': [
# Figurative constants
- (r'(^|(?<=[^0-9a-z_\-]))(ALL\s+)?'
+ (r'(^|(?<=[^\w\-]))(ALL\s+)?'
r'((ZEROES)|(HIGH-VALUE|LOW-VALUE|QUOTE|SPACE|ZERO)(S)?)'
- r'\s*($|(?=[^0-9a-z_\-]))',
+ r'\s*($|(?=[^\w\-]))',
Name.Constant),
# Reserved words STATEMENTS and other bolds
@@ -79,8 +79,8 @@ class CobolLexer(RegexLexer):
'RETURN', 'REWRITE', 'SCREEN', 'SD', 'SEARCH', 'SECTION', 'SET',
'SORT', 'START', 'STOP', 'STRING', 'SUBTRACT', 'SUPPRESS',
'TERMINATE', 'THEN', 'UNLOCK', 'UNSTRING', 'USE', 'VALIDATE',
- 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^0-9a-z_\-]))',
- suffix=r'\s*($|(?=[^0-9a-z_\-]))'),
+ 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^\w\-]))',
+ suffix=r'\s*($|(?=[^\w\-]))'),
Keyword.Reserved),
# Reserved words
@@ -89,33 +89,33 @@ class CobolLexer(RegexLexer):
'ALPHABET', 'ALPHABETIC', 'ALPHABETIC-LOWER', 'ALPHABETIC-UPPER',
'ALPHANUMERIC', 'ALPHANUMERIC-EDITED', 'ALSO', 'ALTER', 'ALTERNATE'
'ANY', 'ARE', 'AREA', 'AREAS', 'ARGUMENT-NUMBER', 'ARGUMENT-VALUE', 'AS',
- 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC', 'AUTOTERMINATE',
- 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL',
+ 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC',
+ 'AUTOTERMINATE', 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL',
'BLANK', 'BLINK', 'BLOCK', 'BOTTOM', 'BY', 'BYTE-LENGTH', 'CHAINING',
- 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL', 'COLLATING',
- 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE', 'COMMIT', 'COMMON',
- 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL',
+ 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL',
+ 'COLLATING', 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE',
+ 'COMMIT', 'COMMON', 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL',
'CONTROLS', 'CONVERTING', 'COPY', 'CORR', 'CORRESPONDING', 'COUNT', 'CRT',
- 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE', 'DEBUGGING',
- 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED',
+ 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE',
+ 'DEBUGGING', 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED',
'DELIMITER', 'DEPENDING', 'DESCENDING', 'DETAIL', 'DISK',
'DOWN', 'DUPLICATES', 'DYNAMIC', 'EBCDIC',
'ENTRY', 'ENVIRONMENT-NAME', 'ENVIRONMENT-VALUE', 'EOL', 'EOP',
'EOS', 'ERASE', 'ERROR', 'ESCAPE', 'EXCEPTION',
- 'EXCLUSIVE', 'EXTEND', 'EXTERNAL',
- 'FILE-ID', 'FILLER', 'FINAL', 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT',
- 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL', 'FUNCTION',
- 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP',
+ 'EXCLUSIVE', 'EXTEND', 'EXTERNAL', 'FILE-ID', 'FILLER', 'FINAL',
+ 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT',
+ 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL',
+ 'FUNCTION', 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP',
'HEADING', 'HIGHLIGHT', 'I-O', 'ID',
'IGNORE', 'IGNORING', 'IN', 'INDEX', 'INDEXED', 'INDICATE',
- 'INITIAL', 'INITIALIZED', 'INPUT',
- 'INTO', 'INTRINSIC', 'INVALID', 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL',
+ 'INITIAL', 'INITIALIZED', 'INPUT', 'INTO', 'INTRINSIC', 'INVALID',
+ 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL',
'LAST', 'LEADING', 'LEFT', 'LENGTH', 'LIMIT', 'LIMITS', 'LINAGE',
'LINAGE-COUNTER', 'LINE', 'LINES', 'LOCALE', 'LOCK',
- 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE',
- 'MULTIPLE', 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE',
- 'NEGATIVE', 'NEXT', 'NO', 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC',
- 'NUMERIC-EDITED', 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY',
+ 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE', 'MULTIPLE',
+ 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE', 'NEGATIVE', 'NEXT', 'NO',
+ 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC', 'NUMERIC-EDITED',
+ 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY',
'OPTIONAL', 'ORDER', 'ORGANIZATION', 'OTHER', 'OUTPUT', 'OVERFLOW',
'OVERLINE', 'PACKED-DECIMAL', 'PADDING', 'PAGE', 'PARAGRAPH',
'PLUS', 'POINTER', 'POSITION', 'POSITIVE', 'PRESENT', 'PREVIOUS',
@@ -137,40 +137,42 @@ class CobolLexer(RegexLexer):
'UNSIGNED-INT', 'UNSIGNED-LONG', 'UNSIGNED-SHORT', 'UNTIL', 'UP',
'UPDATE', 'UPON', 'USAGE', 'USING', 'VALUE', 'VALUES', 'VARYING',
'WAIT', 'WHEN', 'WITH', 'WORDS', 'YYYYDDD', 'YYYYMMDD'),
- prefix=r'(^|(?<=[^0-9a-z_\-]))', suffix=r'\s*($|(?=[^0-9a-z_\-]))'),
+ prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'),
Keyword.Pseudo),
# inactive reserved words
(words((
- 'ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE', 'B-AND',
- 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER', 'CF', 'CH', 'CHAIN', 'CLASS-ID',
- 'CLASSIFICATION', 'COMMUNICATION', 'CONDITION', 'DATA-POINTER',
- 'DESTINATION', 'DISABLE', 'EC', 'EGI', 'EMI', 'ENABLE', 'END-RECEIVE',
- 'ENTRY-CONVENTION', 'EO', 'ESI', 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY',
- 'FLOAT-BINARY-16', 'FLOAT-BINARY-34', 'FLOAT-BINARY-7',
- 'FLOAT-DECIMAL-16', 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT',
- 'FUNCTION-POINTER', 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY',
- 'INHERITS', 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE',
+ 'ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE',
+ 'B-AND', 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER',
+ 'CF', 'CH', 'CHAIN', 'CLASS-ID', 'CLASSIFICATION', 'COMMUNICATION',
+ 'CONDITION', 'DATA-POINTER', 'DESTINATION', 'DISABLE', 'EC', 'EGI',
+ 'EMI', 'ENABLE', 'END-RECEIVE', 'ENTRY-CONVENTION', 'EO', 'ESI',
+ 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY', 'FLOAT-BINARY-16',
+ 'FLOAT-BINARY-34', 'FLOAT-BINARY-7', 'FLOAT-DECIMAL-16',
+ 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT', 'FUNCTION-POINTER',
+ 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY', 'INHERITS',
+ 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE',
'LC_CTYPE', 'LC_MESSAGES', 'LC_MONETARY', 'LC_NUMERIC', 'LC_TIME',
- 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE', 'NORMAL',
- 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE', 'PAGE-COUNTER', 'PF', 'PH',
- 'PROPERTY', 'PROTOTYPE', 'PURGE', 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE',
- 'RELATION', 'REPLACE', 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY',
- 'RF', 'RH', 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT', 'STEP',
- 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3', 'SUPER', 'SYMBOL',
- 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT', 'TYPEDEF', 'UCS-4', 'UNIVERSAL',
- 'USER-DEFAULT', 'UTF-16', 'UTF-8', 'VAL-STATUS', 'VALID', 'VALIDATE',
- 'VALIDATE-STATUS'),
- prefix=r'(^|(?<=[^0-9a-z_\-]))', suffix=r'\s*($|(?=[^0-9a-z_\-]))'),
+ 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE',
+ 'NORMAL', 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE',
+ 'PAGE-COUNTER', 'PF', 'PH', 'PROPERTY', 'PROTOTYPE', 'PURGE',
+ 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE', 'RELATION', 'REPLACE',
+ 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY', 'RF', 'RH',
+ 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT',
+ 'STEP', 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3',
+ 'SUPER', 'SYMBOL', 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT',
+ 'TYPEDEF', 'UCS-4', 'UNIVERSAL', 'USER-DEFAULT', 'UTF-16', 'UTF-8',
+ 'VAL-STATUS', 'VALID', 'VALIDATE', 'VALIDATE-STATUS'),
+ prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'),
Error),
# Data Types
- (r'(^|(?<=[^0-9a-z_\-]))'
+ (r'(^|(?<=[^\w\-]))'
r'(PIC\s+.+?(?=(\s|\.\s))|PICTURE\s+.+?(?=(\s|\.\s))|'
r'(COMPUTATIONAL)(-[1-5X])?|(COMP)(-[1-5X])?|'
r'BINARY-C-LONG|'
r'BINARY-CHAR|BINARY-DOUBLE|BINARY-LONG|BINARY-SHORT|'
- r'BINARY)\s*($|(?=[^0-9a-z_\-]))', Keyword.Type),
+ r'BINARY)\s*($|(?=[^\w\-]))', Keyword.Type),
# Operators
(r'(\*\*|\*|\+|-|/|<=|>=|<|>|==|/=|=)', Operator),
@@ -180,7 +182,7 @@ class CobolLexer(RegexLexer):
(r'([(),;:&%.])', Punctuation),
# Intrinsics
- (r'(^|(?<=[^0-9a-z_\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|'
+ (r'(^|(?<=[^\w\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|'
r'CHAR|COMBINED-DATETIME|CONCATENATE|COS|CURRENT-DATE|'
r'DATE-OF-INTEGER|DATE-TO-YYYYMMDD|DAY-OF-INTEGER|DAY-TO-YYYYDDD|'
r'EXCEPTION-(?:FILE|LOCATION|STATEMENT|STATUS)|EXP10|EXP|E|'
@@ -192,13 +194,13 @@ class CobolLexer(RegexLexer):
r'STANDARD-DEVIATION|STORED-CHAR-LENGTH|SUBSTITUTE(?:-CASE)?|'
r'SUM|TAN|TEST-DATE-YYYYMMDD|TEST-DAY-YYYYDDD|TRIM|'
r'UPPER-CASE|VARIANCE|WHEN-COMPILED|YEAR-TO-YYYY)\s*'
- r'($|(?=[^0-9a-z_\-]))', Name.Function),
+ r'($|(?=[^\w\-]))', Name.Function),
# Booleans
- (r'(^|(?<=[^0-9a-z_\-]))(true|false)\s*($|(?=[^0-9a-z_\-]))', Name.Builtin),
+ (r'(^|(?<=[^\w\-]))(true|false)\s*($|(?=[^\w\-]))', Name.Builtin),
# Comparing Operators
- (r'(^|(?<=[^0-9a-z_\-]))(equal|equals|ne|lt|le|gt|ge|'
- r'greater|less|than|not|and|or)\s*($|(?=[^0-9a-z_\-]))', Operator.Word),
+ (r'(^|(?<=[^\w\-]))(equal|equals|ne|lt|le|gt|ge|'
+ r'greater|less|than|not|and|or)\s*($|(?=[^\w\-]))', Operator.Word),
],
# \"[^\"\n]*\"|\'[^\'\n]*\'
@@ -439,15 +441,15 @@ class OpenEdgeLexer(RegexLexer):
filenames = ['*.p', '*.cls']
mimetypes = ['text/x-openedge', 'application/x-openedge']
- types = (r'(?i)(^|(?<=[^0-9a-z_\-]))(CHARACTER|CHAR|CHARA|CHARAC|CHARACT|CHARACTE|'
+ types = (r'(?i)(^|(?<=[^\w\-]))(CHARACTER|CHAR|CHARA|CHARAC|CHARACT|CHARACTE|'
r'COM-HANDLE|DATE|DATETIME|DATETIME-TZ|'
r'DECIMAL|DEC|DECI|DECIM|DECIMA|HANDLE|'
r'INT64|INTEGER|INT|INTE|INTEG|INTEGE|'
- r'LOGICAL|LONGCHAR|MEMPTR|RAW|RECID|ROWID)\s*($|(?=[^0-9a-z_\-]))')
+ r'LOGICAL|LONGCHAR|MEMPTR|RAW|RECID|ROWID)\s*($|(?=[^\w\-]))')
keywords = words(OPENEDGEKEYWORDS,
- prefix=r'(?i)(^|(?<=[^0-9a-z_\-]))',
- suffix=r'\s*($|(?=[^0-9a-z_\-]))')
+ prefix=r'(?i)(^|(?<=[^\w\-]))',
+ suffix=r'\s*($|(?=[^\w\-]))')
tokens = {
'root': [
diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py
index 5c724d03..632871ba 100644
--- a/pygments/lexers/c_cpp.py
+++ b/pygments/lexers/c_cpp.py
@@ -50,8 +50,9 @@ class CFamilyLexer(RegexLexer):
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
],
'statements': [
- (r'L?"', String, 'string'),
- (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
+ (r'(L?)(")', bygroups(String.Affix, String), 'string'),
+ (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
+ bygroups(String.Affix, String.Char, String.Char, String.Char)),
(r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
(r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
(r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
@@ -123,7 +124,8 @@ class CFamilyLexer(RegexLexer):
(r'\\', String), # stray backslash
],
'macro': [
- (r'(include)(' + _ws1 + ')([^\n]+)', bygroups(Comment.Preproc, Text, Comment.PreprocFile)),
+ (r'(include)(' + _ws1 + r')([^\n]+)',
+ bygroups(Comment.Preproc, Text, Comment.PreprocFile)),
(r'[^/\n]+', Comment.Preproc),
(r'/[*](.|\n)*?[*]/', Comment.Multiline),
(r'//.*?\n', Comment.Single, '#pop'),
@@ -217,7 +219,11 @@ class CppLexer(CFamilyLexer):
(r'char(16_t|32_t)\b', Keyword.Type),
(r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
# C++11 raw strings
- (r'R"\(', String, 'rawstring'),
+ (r'(R)(")([^\\()\s]{,16})(\()((?:.|\n)*?)(\)\3)(")',
+ bygroups(String.Affix, String, String.Delimiter, String.Delimiter,
+ String, String.Delimiter, String)),
+ # C++11 UTF-8/16/32 strings
+ (r'(u8|u|U)(")', bygroups(String.Affix, String), 'string'),
inherit,
],
'root': [
@@ -234,11 +240,6 @@ class CppLexer(CFamilyLexer):
# template specification
(r'\s*(?=>)', Text, '#pop'),
],
- 'rawstring': [
- (r'\)"', String, '#pop'),
- (r'[^)]+', String),
- (r'\)', String),
- ],
}
def analyse_text(text):
diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py
new file mode 100644
index 00000000..a3e81534
--- /dev/null
+++ b/pygments/lexers/clean.py
@@ -0,0 +1,275 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.clean
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ Lexer for the Clean language.
+
+ :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+from pygments.lexer import ExtendedRegexLexer, LexerContext, \
+ bygroups, words, include, default
+from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, \
+ Punctuation, String, Text, Whitespace
+
+__all__ = ['CleanLexer']
+
+
+class CleanLexer(ExtendedRegexLexer):
+ """
+ Lexer for the general purpose, state-of-the-art, pure and lazy functional
+ programming language Clean (http://clean.cs.ru.nl/Clean).
+
+ .. versionadded: 2.2
+ """
+ name = 'Clean'
+ aliases = ['clean']
+ filenames = ['*.icl', '*.dcl']
+
+ def get_tokens_unprocessed(self, text=None, context=None):
+ ctx = LexerContext(text, 0)
+ ctx.indent = 0
+ return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context=ctx)
+
+ def check_class_not_import(lexer, match, ctx):
+ if match.group(0) == 'import':
+ yield match.start(), Keyword.Namespace, match.group(0)
+ ctx.stack = ctx.stack[:-1] + ['fromimportfunc']
+ else:
+ yield match.start(), Name.Class, match.group(0)
+ ctx.pos = match.end()
+
+ def check_instance_class(lexer, match, ctx):
+ if match.group(0) == 'instance' or match.group(0) == 'class':
+ yield match.start(), Keyword, match.group(0)
+ else:
+ yield match.start(), Name.Function, match.group(0)
+ ctx.stack = ctx.stack + ['fromimportfunctype']
+ ctx.pos = match.end()
+
+ @staticmethod
+ def indent_len(text):
+ # Tabs are four spaces:
+ # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt
+ text = text.replace('\n', '')
+ return len(text.replace('\t', ' ')), len(text)
+
+ def store_indent(lexer, match, ctx):
+ ctx.indent, _ = CleanLexer.indent_len(match.group(0))
+ ctx.pos = match.end()
+ yield match.start(), Text, match.group(0)
+
+ def check_indent1(lexer, match, ctx):
+ indent, reallen = CleanLexer.indent_len(match.group(0))
+ if indent > ctx.indent:
+ yield match.start(), Whitespace, match.group(0)
+ ctx.pos = match.start() + reallen + 1
+ else:
+ ctx.indent = 0
+ ctx.pos = match.start()
+ ctx.stack = ctx.stack[:-1]
+ yield match.start(), Whitespace, match.group(0)[1:]
+
+ def check_indent2(lexer, match, ctx):
+ indent, reallen = CleanLexer.indent_len(match.group(0))
+ if indent > ctx.indent:
+ yield match.start(), Whitespace, match.group(0)
+ ctx.pos = match.start() + reallen + 1
+ else:
+ ctx.indent = 0
+ ctx.pos = match.start()
+ ctx.stack = ctx.stack[:-2]
+
+ def check_indent3(lexer, match, ctx):
+ indent, reallen = CleanLexer.indent_len(match.group(0))
+ if indent > ctx.indent:
+ yield match.start(), Whitespace, match.group(0)
+ ctx.pos = match.start() + reallen + 1
+ else:
+ ctx.indent = 0
+ ctx.pos = match.start()
+ ctx.stack = ctx.stack[:-3]
+ yield match.start(), Whitespace, match.group(0)[1:]
+ if match.group(0) == '\n\n':
+ ctx.pos = ctx.pos + 1
+
+ def skip(lexer, match, ctx):
+ ctx.stack = ctx.stack[:-1]
+ ctx.pos = match.end()
+ yield match.start(), Comment, match.group(0)
+
+ keywords = ('class', 'instance', 'where', 'with', 'let', 'let!', 'with',
+ 'in', 'case', 'of', 'infix', 'infixr', 'infixl', 'generic',
+ 'derive', 'otherwise', 'code', 'inline')
+
+ tokens = {
+ 'common': [
+ (r';', Punctuation, '#pop'),
+ (r'//', Comment, 'singlecomment'),
+ ],
+ 'root': [
+ # Comments
+ (r'//.*\n', Comment.Single),
+ (r'(?s)/\*\*.*?\*/', Comment.Special),
+ (r'(?s)/\*.*?\*/', Comment.Multi),
+
+ # Modules, imports, etc.
+ (r'\b((?:implementation|definition|system)\s+)?(module)(\s+)([\w`]+)',
+ bygroups(Keyword.Namespace, Keyword.Namespace, Text, Name.Class)),
+ (r'(?<=\n)import(?=\s)', Keyword.Namespace, 'import'),
+ (r'(?<=\n)from(?=\s)', Keyword.Namespace, 'fromimport'),
+
+ # Keywords
+ # We cannot use (?s)^|(?<=\s) as prefix, so need to repeat this
+ (words(keywords, prefix=r'(?<=\s)', suffix=r'(?=\s)'), Keyword),
+ (words(keywords, prefix=r'^', suffix=r'(?=\s)'), Keyword),
+
+ # Function definitions
+ (r'(?=\{\|)', Whitespace, 'genericfunction'),
+ (r'(?<=\n)([ \t]*)([\w`$()=\-<>~*\^|+&%]+)((?:\s+[\w])*)(\s*)(::)',
+ bygroups(store_indent, Name.Function, Keyword.Type, Whitespace,
+ Punctuation),
+ 'functiondefargs'),
+
+ # Type definitions
+ (r'(?<=\n)([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'),
+ (r'^([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'),
+
+ # Literals
+ (r'\'\\?.(?<!\\)\'', String.Char),
+ (r'\'\\\d+\'', String.Char),
+ (r'\'\\\\\'', String.Char), # (special case for '\\')
+ (r'[+\-~]?\s*\d+\.\d+(E[+\-~]?\d+)?\b', Number.Float),
+ (r'[+\-~]?\s*0[0-7]\b', Number.Oct),
+ (r'[+\-~]?\s*0x[0-9a-fA-F]\b', Number.Hex),
+ (r'[+\-~]?\s*\d+\b', Number.Integer),
+ (r'"', String.Double, 'doubleqstring'),
+ (words(('True', 'False'), prefix=r'(?<=\s)', suffix=r'(?=\s)'),
+ Literal),
+
+ # Everything else is some name
+ (r'([\w`$%]+\.?)*[\w`$%]+', Name),
+
+ # Punctuation
+ (r'[{}()\[\],:;.#]', Punctuation),
+ (r'[+\-=!<>|&~*\^/]', Operator),
+ (r'\\\\', Operator),
+
+ # Lambda expressions
+ (r'\\.*?(->|\.|=)', Name.Function),
+
+ # Whitespace
+ (r'\s', Whitespace),
+
+ include('common'),
+ ],
+ 'fromimport': [
+ include('common'),
+ (r'([\w`]+)', check_class_not_import),
+ (r'\n', Whitespace, '#pop'),
+ (r'\s', Whitespace),
+ ],
+ 'fromimportfunc': [
+ include('common'),
+ (r'([\w`$()=\-<>~*\^|+&%]+)', check_instance_class),
+ (r',', Punctuation),
+ (r'\n', Whitespace, '#pop'),
+ (r'\s', Whitespace),
+ ],
+ 'fromimportfunctype': [
+ include('common'),
+ (r'[{(\[]', Punctuation, 'combtype'),
+ (r',', Punctuation, '#pop'),
+ (r'[:;.#]', Punctuation),
+ (r'\n', Whitespace, '#pop:2'),
+ (r'[^\S\n]+', Whitespace),
+ (r'\S+', Keyword.Type),
+ ],
+ 'combtype': [
+ include('common'),
+ (r'[})\]]', Punctuation, '#pop'),
+ (r'[{(\[]', Punctuation, '#pop'),
+ (r'[,:;.#]', Punctuation),
+ (r'\s+', Whitespace),
+ (r'\S+', Keyword.Type),
+ ],
+ 'import': [
+ include('common'),
+ (words(('from', 'import', 'as', 'qualified'),
+ prefix='(?<=\s)', suffix='(?=\s)'), Keyword.Namespace),
+ (r'[\w`]+', Name.Class),
+ (r'\n', Whitespace, '#pop'),
+ (r',', Punctuation),
+ (r'[^\S\n]+', Whitespace),
+ ],
+ 'singlecomment': [
+ (r'(.)(?=\n)', skip),
+ (r'.+(?!\n)', Comment),
+ ],
+ 'doubleqstring': [
+ (r'[^\\"]+', String.Double),
+ (r'"', String.Double, '#pop'),
+ (r'\\.', String.Double),
+ ],
+ 'typedef': [
+ include('common'),
+ (r'[\w`]+', Keyword.Type),
+ (r'[:=|(),\[\]{}!*]', Punctuation),
+ (r'->', Punctuation),
+ (r'\n(?=[^\s|])', Whitespace, '#pop'),
+ (r'\s', Whitespace),
+ (r'.', Keyword.Type),
+ ],
+ 'genericfunction': [
+ include('common'),
+ (r'\{\|', Punctuation),
+ (r'\|\}', Punctuation, '#pop'),
+ (r',', Punctuation),
+ (r'->', Punctuation),
+ (r'(\s+of\s+)(\{)', bygroups(Keyword, Punctuation), 'genericftypes'),
+ (r'\s', Whitespace),
+ (r'[\w`]+', Keyword.Type),
+ (r'[*()]', Punctuation),
+ ],
+ 'genericftypes': [
+ include('common'),
+ (r'[\w`]+', Keyword.Type),
+ (r',', Punctuation),
+ (r'\s', Whitespace),
+ (r'\}', Punctuation, '#pop'),
+ ],
+ 'functiondefargs': [
+ include('common'),
+ (r'\n(\s*)', check_indent1),
+ (r'[!{}()\[\],:;.#]', Punctuation),
+ (r'->', Punctuation, 'functiondefres'),
+ (r'^(?=\S)', Whitespace, '#pop'),
+ (r'\S', Keyword.Type),
+ (r'\s', Whitespace),
+ ],
+ 'functiondefres': [
+ include('common'),
+ (r'\n(\s*)', check_indent2),
+ (r'^(?=\S)', Whitespace, '#pop:2'),
+ (r'[!{}()\[\],:;.#]', Punctuation),
+ (r'\|', Punctuation, 'functiondefclasses'),
+ (r'\S', Keyword.Type),
+ (r'\s', Whitespace),
+ ],
+ 'functiondefclasses': [
+ include('common'),
+ (r'\n(\s*)', check_indent3),
+ (r'^(?=\S)', Whitespace, '#pop:3'),
+ (r'[,&]', Punctuation),
+ (r'[\w`$()=\-<>~*\^|+&%]', Name.Function, 'functionname'),
+ (r'\s', Whitespace),
+ ],
+ 'functionname': [
+ include('common'),
+ (r'[\w`$()=\-<>~*\^|+&%]+', Name.Function),
+ (r'(?=\{\|)', Punctuation, 'genericfunction'),
+ default('#pop'),
+ ]
+ }
diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py
index 77c7714d..9cc291e5 100644
--- a/pygments/lexers/configs.py
+++ b/pygments/lexers/configs.py
@@ -42,7 +42,7 @@ class IniLexer(RegexLexer):
bygroups(Name.Attribute, Text, Operator, Text, String)),
# standalone option, supported by some INI parsers
(r'(.+?)$', Name.Attribute),
- ]
+ ],
}
def analyse_text(text):
@@ -600,7 +600,7 @@ class TerraformLexer(RegexLexer):
(r'(".*")', bygroups(String.Double)),
],
'punctuation': [
- (r'[\[\]\(\),.]', Punctuation),
+ (r'[\[\](),.]', Punctuation),
],
# Keep this seperate from punctuation - we sometimes want to use different
# Tokens for { }
@@ -631,9 +631,8 @@ class TermcapLexer(RegexLexer):
.. versionadded:: 2.1
"""
name = 'Termcap'
- aliases = ['termcap',]
-
- filenames = ['termcap', 'termcap.src',]
+ aliases = ['termcap']
+ filenames = ['termcap', 'termcap.src']
mimetypes = []
# NOTE:
@@ -644,13 +643,13 @@ class TermcapLexer(RegexLexer):
tokens = {
'root': [
(r'^#.*$', Comment),
- (r'^[^\s#:\|]+', Name.Tag, 'names'),
+ (r'^[^\s#:|]+', Name.Tag, 'names'),
],
'names': [
(r'\n', Text, '#pop'),
(r':', Punctuation, 'defs'),
(r'\|', Punctuation),
- (r'[^:\|]+', Name.Attribute),
+ (r'[^:|]+', Name.Attribute),
],
'defs': [
(r'\\\n[ \t]*', Text),
@@ -678,9 +677,8 @@ class TerminfoLexer(RegexLexer):
.. versionadded:: 2.1
"""
name = 'Terminfo'
- aliases = ['terminfo',]
-
- filenames = ['terminfo', 'terminfo.src',]
+ aliases = ['terminfo']
+ filenames = ['terminfo', 'terminfo.src']
mimetypes = []
# NOTE:
@@ -691,13 +689,13 @@ class TerminfoLexer(RegexLexer):
tokens = {
'root': [
(r'^#.*$', Comment),
- (r'^[^\s#,\|]+', Name.Tag, 'names'),
+ (r'^[^\s#,|]+', Name.Tag, 'names'),
],
'names': [
(r'\n', Text, '#pop'),
(r'(,)([ \t]*)', bygroups(Punctuation, Text), 'defs'),
(r'\|', Punctuation),
- (r'[^,\|]+', Name.Attribute),
+ (r'[^,|]+', Name.Attribute),
],
'defs': [
(r'\n[ \t]+', Text),
@@ -726,8 +724,8 @@ class PkgConfigLexer(RegexLexer):
"""
name = 'PkgConfig'
- aliases = ['pkgconfig',]
- filenames = ['*.pc',]
+ aliases = ['pkgconfig']
+ filenames = ['*.pc']
mimetypes = []
tokens = {
@@ -793,8 +791,8 @@ class PacmanConfLexer(RegexLexer):
"""
name = 'PacmanConf'
- aliases = ['pacmanconf',]
- filenames = ['pacman.conf',]
+ aliases = ['pacmanconf']
+ filenames = ['pacman.conf']
mimetypes = []
tokens = {
@@ -822,7 +820,7 @@ class PacmanConfLexer(RegexLexer):
'%u', # url
), suffix=r'\b'),
Name.Variable),
-
+
# fallback
(r'.', Text),
],
diff --git a/pygments/lexers/csound.py b/pygments/lexers/csound.py
index 51414073..95ee73d8 100644
--- a/pygments/lexers/csound.py
+++ b/pygments/lexers/csound.py
@@ -9,7 +9,7 @@
:license: BSD, see LICENSE for details.
"""
-import copy, re
+import re
from pygments.lexer import RegexLexer, bygroups, default, include, using, words
from pygments.token import Comment, Keyword, Name, Number, Operator, Punctuation, \
@@ -21,7 +21,7 @@ from pygments.lexers.scripting import LuaLexer
__all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer']
-newline = (r'((?:;|//).*)*(\n)', bygroups(Comment.Single, Text))
+newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text))
class CsoundLexer(RegexLexer):
@@ -177,7 +177,7 @@ class CsoundOrchestraLexer(CsoundLexer):
(r'0[xX][a-fA-F0-9]+', Number.Hex),
(r'\d+', Number.Integer),
(r'"', String, 'single-line string'),
- (r'{{', String, 'multi-line string'),
+ (r'\{\{', String, 'multi-line string'),
(r'[+\-*/%^!=&|<>#~¬]', Operator),
(r'[](),?:[]', Punctuation),
(words((
@@ -273,40 +273,40 @@ class CsoundOrchestraLexer(CsoundLexer):
(r'[\\"~$%\^\n]', String)
],
'multi-line string': [
- (r'}}', String, '#pop'),
- (r'[^\}]+|\}(?!\})', String)
+ (r'\}\}', String, '#pop'),
+ (r'[^}]+|\}(?!\})', String)
],
'scoreline opcode': [
include('whitespace or macro call'),
- (r'{{', String, 'scoreline'),
+ (r'\{\{', String, 'scoreline'),
default('#pop')
],
'scoreline': [
- (r'}}', String, '#pop'),
- (r'([^\}]+)|\}(?!\})', using(CsoundScoreLexer))
+ (r'\}\}', String, '#pop'),
+ (r'([^}]+)|\}(?!\})', using(CsoundScoreLexer))
],
'python opcode': [
include('whitespace or macro call'),
- (r'{{', String, 'python'),
+ (r'\{\{', String, 'python'),
default('#pop')
],
'python': [
- (r'}}', String, '#pop'),
- (r'([^\}]+)|\}(?!\})', using(PythonLexer))
+ (r'\}\}', String, '#pop'),
+ (r'([^}]+)|\}(?!\})', using(PythonLexer))
],
'lua opcode': [
include('whitespace or macro call'),
(r'"', String, 'single-line string'),
- (r'{{', String, 'lua'),
+ (r'\{\{', String, 'lua'),
(r',', Punctuation),
default('#pop')
],
'lua': [
- (r'}}', String, '#pop'),
- (r'([^\}]+)|\}(?!\})', using(LuaLexer))
+ (r'\}\}', String, '#pop'),
+ (r'([^}]+)|\}(?!\})', using(LuaLexer))
]
}
@@ -315,7 +315,7 @@ class CsoundDocumentLexer(RegexLexer):
"""
For `Csound <http://csound.github.io>`_ documents.
-
+ .. versionadded:: 2.1
"""
name = 'Csound Document'
diff --git a/pygments/lexers/css.py b/pygments/lexers/css.py
index b40201f4..6c585dfa 100644
--- a/pygments/lexers/css.py
+++ b/pygments/lexers/css.py
@@ -476,8 +476,8 @@ class ScssLexer(RegexLexer):
(r'@[\w-]+', Keyword, 'selector'),
(r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'),
# TODO: broken, and prone to infinite loops.
- #(r'(?=[^;{}][;}])', Name.Attribute, 'attr'),
- #(r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'),
+ # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'),
+ # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'),
default('selector'),
],
@@ -518,7 +518,7 @@ class LessCssLexer(CssLexer):
inherit,
],
'content': [
- (r'{', Punctuation, '#push'),
+ (r'\{', Punctuation, '#push'),
inherit,
],
}
diff --git a/pygments/lexers/diff.py b/pygments/lexers/diff.py
index d3b1589d..726b49ad 100644
--- a/pygments/lexers/diff.py
+++ b/pygments/lexers/diff.py
@@ -9,11 +9,13 @@
:license: BSD, see LICENSE for details.
"""
+import re
+
from pygments.lexer import RegexLexer, include, bygroups
from pygments.token import Text, Comment, Operator, Keyword, Name, Generic, \
Literal
-__all__ = ['DiffLexer', 'DarcsPatchLexer']
+__all__ = ['DiffLexer', 'DarcsPatchLexer', 'WDiffLexer']
class DiffLexer(RegexLexer):
@@ -104,3 +106,60 @@ class DarcsPatchLexer(RegexLexer):
(r'[^\n\[]+', Generic.Deleted),
],
}
+
+
+class WDiffLexer(RegexLexer):
+ """
+ A `wdiff <https://www.gnu.org/software/wdiff/>`_ lexer.
+
+ Note that:
+
+ * only to normal output (without option like -l).
+ * if target files of wdiff contain "[-", "-]", "{+", "+}",
+ especially they are unbalanced, this lexer will get confusing.
+
+ .. versionadded:: 2.2
+ """
+
+ name = 'WDiff'
+ aliases = ['wdiff']
+ filenames = ['*.wdiff']
+ mimetypes = []
+
+ flags = re.MULTILINE | re.DOTALL
+
+ # We can only assume "[-" after "[-" before "-]" is `nested`,
+ # for instance wdiff to wdiff outputs. We have no way to
+ # distinct these marker is of wdiff output from original text.
+
+ ins_op = r"\{\+"
+ ins_cl = r"\+\}"
+ del_op = r"\[\-"
+ del_cl = r"\-\]"
+ normal = r'[^{}[\]+-]+' # for performance
+ tokens = {
+ 'root': [
+ (ins_op, Generic.Inserted, 'inserted'),
+ (del_op, Generic.Deleted, 'deleted'),
+ (normal, Text),
+ (r'.', Text),
+ ],
+ 'inserted': [
+ (ins_op, Generic.Inserted, '#push'),
+ (del_op, Generic.Inserted, '#push'),
+ (del_cl, Generic.Inserted, '#pop'),
+
+ (ins_cl, Generic.Inserted, '#pop'),
+ (normal, Generic.Inserted),
+ (r'.', Generic.Inserted),
+ ],
+ 'deleted': [
+ (del_op, Generic.Deleted, '#push'),
+ (ins_op, Generic.Deleted, '#push'),
+ (ins_cl, Generic.Deleted, '#pop'),
+
+ (del_cl, Generic.Deleted, '#pop'),
+ (normal, Generic.Deleted),
+ (r'.', Generic.Deleted),
+ ],
+ }
diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py
index eac4b5e5..11b4573e 100644
--- a/pygments/lexers/dotnet.py
+++ b/pygments/lexers/dotnet.py
@@ -11,7 +11,7 @@
import re
from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \
- using, this, default
+ using, this, default, words
from pygments.token import Punctuation, \
Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other
from pygments.util import get_choice_opt, iteritems
@@ -375,8 +375,8 @@ class VbNetLexer(RegexLexer):
filenames = ['*.vb', '*.bas']
mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?)
- uni_name = '[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + \
- '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
+ uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \
+ '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc',
'Cf', 'Mn', 'Mc') + ']*'
flags = re.MULTILINE | re.IGNORECASE
@@ -394,25 +394,26 @@ class VbNetLexer(RegexLexer):
(r'[(){}!#,.:]', Punctuation),
(r'Option\s+(Strict|Explicit|Compare)\s+'
r'(On|Off|Binary|Text)', Keyword.Declaration),
- (r'(?<!\.)(AddHandler|Alias|'
- r'ByRef|ByVal|Call|Case|Catch|CBool|CByte|CChar|CDate|'
- r'CDec|CDbl|CInt|CLng|CObj|Continue|CSByte|CShort|'
- r'CSng|CStr|CType|CUInt|CULng|CUShort|Declare|'
- r'Default|Delegate|DirectCast|Do|Each|Else|ElseIf|'
- r'EndIf|Erase|Error|Event|Exit|False|Finally|For|'
- r'Friend|Get|Global|GoSub|GoTo|Handles|If|'
- r'Implements|Inherits|Interface|'
- r'Let|Lib|Loop|Me|MustInherit|'
- r'MustOverride|MyBase|MyClass|Narrowing|New|Next|'
- r'Not|Nothing|NotInheritable|NotOverridable|Of|On|'
- r'Operator|Option|Optional|Overloads|Overridable|'
- r'Overrides|ParamArray|Partial|Private|Protected|'
- r'Public|RaiseEvent|ReadOnly|ReDim|RemoveHandler|Resume|'
- r'Return|Select|Set|Shadows|Shared|Single|'
- r'Static|Step|Stop|SyncLock|Then|'
- r'Throw|To|True|Try|TryCast|Wend|'
- r'Using|When|While|Widening|With|WithEvents|'
- r'WriteOnly)\b', Keyword),
+ (words((
+ 'AddHandler', 'Alias', 'ByRef', 'ByVal', 'Call', 'Case',
+ 'Catch', 'CBool', 'CByte', 'CChar', 'CDate', 'CDec', 'CDbl',
+ 'CInt', 'CLng', 'CObj', 'Continue', 'CSByte', 'CShort', 'CSng',
+ 'CStr', 'CType', 'CUInt', 'CULng', 'CUShort', 'Declare',
+ 'Default', 'Delegate', 'DirectCast', 'Do', 'Each', 'Else',
+ 'ElseIf', 'EndIf', 'Erase', 'Error', 'Event', 'Exit', 'False',
+ 'Finally', 'For', 'Friend', 'Get', 'Global', 'GoSub', 'GoTo',
+ 'Handles', 'If', 'Implements', 'Inherits', 'Interface', 'Let',
+ 'Lib', 'Loop', 'Me', 'MustInherit', 'MustOverride', 'MyBase',
+ 'MyClass', 'Narrowing', 'New', 'Next', 'Not', 'Nothing',
+ 'NotInheritable', 'NotOverridable', 'Of', 'On', 'Operator',
+ 'Option', 'Optional', 'Overloads', 'Overridable', 'Overrides',
+ 'ParamArray', 'Partial', 'Private', 'Protected', 'Public',
+ 'RaiseEvent', 'ReadOnly', 'ReDim', 'RemoveHandler', 'Resume',
+ 'Return', 'Select', 'Set', 'Shadows', 'Shared', 'Single',
+ 'Static', 'Step', 'Stop', 'SyncLock', 'Then', 'Throw', 'To',
+ 'True', 'Try', 'TryCast', 'Wend', 'Using', 'When', 'While',
+ 'Widening', 'With', 'WithEvents', 'WriteOnly'),
+ prefix='(?<!\.)', suffix=r'\b'), Keyword),
(r'(?<!\.)End\b', Keyword, 'end'),
(r'(?<!\.)(Dim|Const)\b', Keyword, 'dim'),
(r'(?<!\.)(Function|Sub|Property)(\s+)',
diff --git a/pygments/lexers/dsls.py b/pygments/lexers/dsls.py
index 0d55af09..e1362649 100644
--- a/pygments/lexers/dsls.py
+++ b/pygments/lexers/dsls.py
@@ -111,8 +111,8 @@ class ThriftLexer(RegexLexer):
include('keywords'),
include('numbers'),
(r'[&=]', Operator),
- (r'[:;\,\{\}\(\)\<>\[\]]', Punctuation),
- (r'[a-zA-Z_](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*', Name),
+ (r'[:;,{}()<>\[\]]', Punctuation),
+ (r'[a-zA-Z_](\.\w|\w)*', Name),
],
'whitespace': [
(r'\n', Text.Whitespace),
@@ -135,7 +135,7 @@ class ThriftLexer(RegexLexer):
(r'[^\\\'\n]+', String.Single),
],
'namespace': [
- (r'[a-z\*](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*', Name.Namespace, '#pop'),
+ (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'),
default('#pop'),
],
'class': [
diff --git a/pygments/lexers/elm.py b/pygments/lexers/elm.py
index 7df6346a..cd1fb98e 100644
--- a/pygments/lexers/elm.py
+++ b/pygments/lexers/elm.py
@@ -46,7 +46,7 @@ class ElmLexer(RegexLexer):
'root': [
# Comments
- (r'{-', Comment.Multiline, 'comment'),
+ (r'\{-', Comment.Multiline, 'comment'),
(r'--.*', Comment.Single),
# Whitespace
@@ -86,20 +86,20 @@ class ElmLexer(RegexLexer):
(validName, Name.Variable),
# Parens
- (r'[,\(\)\[\]{}]', Punctuation),
+ (r'[,()\[\]{}]', Punctuation),
],
'comment': [
- (r'-(?!})', Comment.Multiline),
- (r'{-', Comment.Multiline, 'comment'),
+ (r'-(?!\})', Comment.Multiline),
+ (r'\{-', Comment.Multiline, 'comment'),
(r'[^-}]', Comment.Multiline),
- (r'-}', Comment.Multiline, '#pop'),
+ (r'-\}', Comment.Multiline, '#pop'),
],
'doublequote': [
- (r'\\u[0-9a-fA-F]\{4}', String.Escape),
- (r'\\[nrfvb\\\"]', String.Escape),
+ (r'\\u[0-9a-fA-F]{4}', String.Escape),
+ (r'\\[nrfvb\\"]', String.Escape),
(r'[^"]', String),
(r'"', String, '#pop'),
],
diff --git a/pygments/lexers/erlang.py b/pygments/lexers/erlang.py
index 7838b3c5..93ddd2c2 100644
--- a/pygments/lexers/erlang.py
+++ b/pygments/lexers/erlang.py
@@ -127,7 +127,7 @@ class ErlangLexer(RegexLexer):
'string': [
(escape_re, String.Escape),
(r'"', String, '#pop'),
- (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol),
+ (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol),
(r'[^"\\~]+', String),
(r'~', String),
],
@@ -240,11 +240,11 @@ class ElixirLexer(RegexLexer):
KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in')
BUILTIN = (
'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise',
- 'quote', 'unquote', 'unquote_splicing', 'throw', 'super'
+ 'quote', 'unquote', 'unquote_splicing', 'throw', 'super',
)
BUILTIN_DECLARATION = (
'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop',
- 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback'
+ 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback',
)
BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias')
@@ -263,7 +263,7 @@ class ElixirLexer(RegexLexer):
OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&')
PUNCTUATION = (
- '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']'
+ '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']',
)
def get_tokens_unprocessed(self, text):
diff --git a/pygments/lexers/esoteric.py b/pygments/lexers/esoteric.py
index 73ea4a4a..2b17615c 100644
--- a/pygments/lexers/esoteric.py
+++ b/pygments/lexers/esoteric.py
@@ -90,7 +90,7 @@ class CAmkESLexer(RegexLexer):
filenames = ['*.camkes', '*.idl4']
tokens = {
- 'root':[
+ 'root': [
# C pre-processor directive
(r'^\s*#.*\n', Comment.Preproc),
@@ -99,7 +99,7 @@ class CAmkESLexer(RegexLexer):
(r'/\*(.|\n)*?\*/', Comment),
(r'//.*\n', Comment),
- (r'[\[\(\){},\.;=\]]', Punctuation),
+ (r'[\[(){},.;=\]]', Punctuation),
(words(('assembly', 'attribute', 'component', 'composition',
'configuration', 'connection', 'connector', 'consumes',
diff --git a/pygments/lexers/felix.py b/pygments/lexers/felix.py
index b7659769..9631bcc1 100644
--- a/pygments/lexers/felix.py
+++ b/pygments/lexers/felix.py
@@ -237,7 +237,7 @@ class FelixLexer(RegexLexer):
],
'strings': [
(r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
(r'[^\\\'"%\n]+', String),
# quotes, percents and backslashes must be parsed one at a time
(r'[\'"\\]', String),
diff --git a/pygments/lexers/fortran.py b/pygments/lexers/fortran.py
index 4c22139d..e2f95b11 100644
--- a/pygments/lexers/fortran.py
+++ b/pygments/lexers/fortran.py
@@ -11,7 +11,7 @@
import re
-from pygments.lexer import RegexLexer, bygroups, include, words, using
+from pygments.lexer import RegexLexer, bygroups, include, words, using, default
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Generic
@@ -191,16 +191,15 @@ class FortranFixedLexer(RegexLexer):
(r'(.{5})', Name.Label, 'cont-char'),
(r'.*\n', using(FortranLexer)),
],
-
'cont-char': [
(' ', Text, 'code'),
('0', Comment, 'code'),
- ('.', Generic.Strong, 'code')
+ ('.', Generic.Strong, 'code'),
],
-
'code': [
(r'(.{66})(.*)(\n)',
bygroups(_lex_fortran, Comment, Text), 'root'),
(r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'),
- (r'', Text, 'root')]
+ default('root'),
+ ]
}
diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py
index 95e68a33..ffc3a3a2 100644
--- a/pygments/lexers/haskell.py
+++ b/pygments/lexers/haskell.py
@@ -321,7 +321,7 @@ class AgdaLexer(RegexLexer):
'module': [
(r'\{-', Comment.Multiline, 'comment'),
(r'[a-zA-Z][\w.]*', Name, '#pop'),
- (r'[^a-zA-Z]+', Text)
+ (r'[\W0-9_]+', Text)
],
'comment': HaskellLexer.tokens['comment'],
'character': HaskellLexer.tokens['character'],
diff --git a/pygments/lexers/idl.py b/pygments/lexers/idl.py
index d745bcfd..a0b39492 100644
--- a/pygments/lexers/idl.py
+++ b/pygments/lexers/idl.py
@@ -258,12 +258,13 @@ class IDLLexer(RegexLexer):
(r'\b(mod|lt|le|eq|ne|ge|gt|not|and|or|xor)\b', Operator),
(r'"[^\"]*"', String.Double),
(r"'[^\']*'", String.Single),
- (r'\b[\+\-]?([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(D|E)?([\+\-]?[0-9]+)?\b', Number.Float),
- (r'\b\'[\+\-]?[0-9A-F]+\'X(U?(S?|L{1,2})|B)\b', Number.Hex),
- (r'\b\'[\+\-]?[0-7]+\'O(U?(S?|L{1,2})|B)\b', Number.Oct),
- (r'\b[\+\-]?[0-9]+U?L{1,2}\b', Number.Integer.Long),
- (r'\b[\+\-]?[0-9]+U?S?\b', Number.Integer),
- (r'\b[\+\-]?[0-9]+B\b', Number),
+ (r'\b[+\-]?([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(D|E)?([+\-]?[0-9]+)?\b',
+ Number.Float),
+ (r'\b\'[+\-]?[0-9A-F]+\'X(U?(S?|L{1,2})|B)\b', Number.Hex),
+ (r'\b\'[+\-]?[0-7]+\'O(U?(S?|L{1,2})|B)\b', Number.Oct),
+ (r'\b[+\-]?[0-9]+U?L{1,2}\b', Number.Integer.Long),
+ (r'\b[+\-]?[0-9]+U?S?\b', Number.Integer),
+ (r'\b[+\-]?[0-9]+B\b', Number),
(r'.', Text),
]
}
diff --git a/pygments/lexers/j.py b/pygments/lexers/j.py
index 278374e5..f15595f8 100644
--- a/pygments/lexers/j.py
+++ b/pygments/lexers/j.py
@@ -48,7 +48,7 @@ class JLexer(RegexLexer):
# Definitions
(r'0\s+:\s*0|noun\s+define\s*$', Name.Entity, 'nounDefinition'),
- (r'\b(([1-4]|13)\s+:\s*0)|((adverb|conjunction|dyad|monad|verb)\s+define)\b',
+ (r'(([1-4]|13)\s+:\s*0|(adverb|conjunction|dyad|monad|verb)\s+define)\b',
Name.Function, 'explicitDefinition'),
# Flow Control
diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py
index 2a01cd42..5dca6832 100644
--- a/pygments/lexers/javascript.py
+++ b/pygments/lexers/javascript.py
@@ -97,13 +97,13 @@ class JavascriptLexer(RegexLexer):
(r'`', String.Backtick, '#pop'),
(r'\\\\', String.Backtick),
(r'\\`', String.Backtick),
- (r'\${', String.Interpol, 'interp-inside'),
+ (r'\$\{', String.Interpol, 'interp-inside'),
(r'\$', String.Backtick),
(r'[^`\\$]+', String.Backtick),
],
'interp-inside': [
# TODO: should this include single-line comments and allow nesting strings?
- (r'}', String.Interpol, '#pop'),
+ (r'\}', String.Interpol, '#pop'),
include('root'),
],
# (\\\\|\\`|[^`])*`', String.Backtick),
@@ -1245,32 +1245,32 @@ class EarlGreyLexer(RegexLexer):
include('control'),
(r'[^\S\n]+', Text),
(r';;.*\n', Comment),
- (r'[\[\]\{\}\:\(\)\,\;]', Punctuation),
+ (r'[\[\]{}:(),;]', Punctuation),
(r'\\\n', Text),
(r'\\', Text),
include('errors'),
(words((
'with', 'where', 'when', 'and', 'not', 'or', 'in',
'as', 'of', 'is'),
- prefix=r'(?<=\s|\[)', suffix=r'(?![\w\$\-])'),
+ prefix=r'(?<=\s|\[)', suffix=r'(?![\w$\-])'),
Operator.Word),
- (r'[\*@]?->', Name.Function),
+ (r'[*@]?->', Name.Function),
(r'[+\-*/~^<>%&|?!@#.]*=', Operator.Word),
(r'\.{2,3}', Operator.Word), # Range Operator
(r'([+*/~^<>&|?!]+)|([#\-](?=\s))|@@+(?=\s)|=+', Operator),
- (r'(?<![\w\$\-])(var|let)(?:[^\w\$])', Keyword.Declaration),
+ (r'(?<![\w$\-])(var|let)(?:[^\w$])', Keyword.Declaration),
include('keywords'),
include('builtins'),
include('assignment'),
(r'''(?x)
- (?:()([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)|
- (?<=[\s\{\[\(])(\.)([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?))
+ (?:()([a-zA-Z$_](?:[\w$\-]*[\w$])?)|
+ (?<=[\s{\[(])(\.)([a-zA-Z$_](?:[\w$\-]*[\w$])?))
(?=.*%)''',
bygroups(Punctuation, Name.Tag, Punctuation, Name.Class.Start), 'dbs'),
(r'[rR]?`', String.Backtick, 'bt'),
(r'[rR]?```', String.Backtick, 'tbt'),
- (r'(?<=[\s\[\{\(,;])\.([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)'
- r'(?=[\s\]\}\),;])', String.Symbol),
+ (r'(?<=[\s\[{(,;])\.([a-zA-Z$_](?:[\w$\-]*[\w$])?)'
+ r'(?=[\s\]}),;])', String.Symbol),
include('nested'),
(r'(?:[rR]|[rR]\.[gmi]{1,3})?"', String, combined('stringescape', 'dqs')),
(r'(?:[rR]|[rR]\.[gmi]{1,3})?\'', String, combined('stringescape', 'sqs')),
@@ -1281,9 +1281,9 @@ class EarlGreyLexer(RegexLexer):
include('numbers'),
],
'dbs': [
- (r'(\.)([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(?=[\[\.\s])',
+ (r'(\.)([a-zA-Z$_](?:[\w$\-]*[\w$])?)(?=[.\[\s])',
bygroups(Punctuation, Name.Class.DBS)),
- (r'(\[)([\^#][a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(\])',
+ (r'(\[)([\^#][a-zA-Z$_](?:[\w$\-]*[\w$])?)(\])',
bygroups(Punctuation, Name.Entity.DBS, Punctuation)),
(r'\s+', Text),
(r'%', Operator.DBS, '#pop'),
@@ -1293,29 +1293,29 @@ class EarlGreyLexer(RegexLexer):
bygroups(Text.Whitespace, Text)),
],
'assignment': [
- (r'(\.)?([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)'
+ (r'(\.)?([a-zA-Z$_](?:[\w$\-]*[\w$])?)'
r'(?=\s+[+\-*/~^<>%&|?!@#.]*\=\s)',
bygroups(Punctuation, Name.Variable))
],
'errors': [
(words(('Error', 'TypeError', 'ReferenceError'),
- prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'),
+ prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'),
Name.Exception),
(r'''(?x)
- (?<![\w\$])
- E\.[\w\$](?:[\w\$\-]*[\w\$])?
- (?:\.[\w\$](?:[\w\$\-]*[\w\$])?)*
- (?=[\(\{\[\?\!\s])''',
+ (?<![\w$])
+ E\.[\w$](?:[\w$\-]*[\w$])?
+ (?:\.[\w$](?:[\w$\-]*[\w$])?)*
+ (?=[({\[?!\s])''',
Name.Exception),
],
'control': [
(r'''(?x)
- ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)
+ ([a-zA-Z$_](?:[\w$-]*[\w$])?)
(?!\n)\s+
(?!and|as|each\*|each|in|is|mod|of|or|when|where|with)
- (?=(?:[+\-*/~^<>%&|?!@#.])?[a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)''',
+ (?=(?:[+\-*/~^<>%&|?!@#.])?[a-zA-Z$_](?:[\w$-]*[\w$])?)''',
Keyword.Control),
- (r'([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(?!\n)\s+(?=[\'"\d\{\[\(])',
+ (r'([a-zA-Z$_](?:[\w$-]*[\w$])?)(?!\n)\s+(?=[\'"\d{\[(])',
Keyword.Control),
(r'''(?x)
(?:
@@ -1324,28 +1324,28 @@ class EarlGreyLexer(RegexLexer):
(?<=with|each|with)|
(?<=each\*|where)
)(\s+)
- ([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(:)''',
+ ([a-zA-Z$_](?:[\w$-]*[\w$])?)(:)''',
bygroups(Text, Keyword.Control, Punctuation)),
(r'''(?x)
(?<![+\-*/~^<>%&|?!@#.])(\s+)
- ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(:)''',
+ ([a-zA-Z$_](?:[\w$-]*[\w$])?)(:)''',
bygroups(Text, Keyword.Control, Punctuation)),
],
'nested': [
(r'''(?x)
- (?<=[a-zA-Z$0-9_\]\}\)])(\.)
- ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)
+ (?<=[\w$\]})])(\.)
+ ([a-zA-Z$_](?:[\w$-]*[\w$])?)
(?=\s+with(?:\s|\n))''',
bygroups(Punctuation, Name.Function)),
(r'''(?x)
(?<!\s)(\.)
- ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)
- (?=[\}\]\)\.,;:\s])''',
+ ([a-zA-Z$_](?:[\w$-]*[\w$])?)
+ (?=[}\]).,;:\s])''',
bygroups(Punctuation, Name.Field)),
(r'''(?x)
- (?<=[a-zA-Z$0-9_\]\}\)])(\.)
- ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)
- (?=[\[\{\(:])''',
+ (?<=[\w$\]})])(\.)
+ ([a-zA-Z$_](?:[\w$-]*[\w$])?)
+ (?=[\[{(:])''',
bygroups(Punctuation, Name.Function)),
],
'keywords': [
@@ -1354,15 +1354,15 @@ class EarlGreyLexer(RegexLexer):
'continue', 'elif', 'expr-value', 'if', 'match',
'return', 'yield', 'pass', 'else', 'require', 'var',
'let', 'async', 'method', 'gen'),
- prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'),
+ prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'),
Keyword.Pseudo),
(words(('this', 'self', '@'),
- prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-])'),
+ prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$])'),
Keyword.Constant),
(words((
'Function', 'Object', 'Array', 'String', 'Number',
'Boolean', 'ErrorFactory', 'ENode', 'Promise'),
- prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-])'),
+ prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$])'),
Keyword.Type),
],
'builtins': [
@@ -1373,20 +1373,20 @@ class EarlGreyLexer(RegexLexer):
'getChecker', 'get-checker', 'getProperty', 'get-property',
'getProjector', 'get-projector', 'consume', 'take',
'promisify', 'spawn', 'constructor'),
- prefix=r'(?<![\w\-#\.])', suffix=r'(?![\w\-\.])'),
+ prefix=r'(?<![\w\-#.])', suffix=r'(?![\w\-.])'),
Name.Builtin),
(words((
'true', 'false', 'null', 'undefined'),
- prefix=r'(?<![\w\$\-\.])', suffix=r'(?![\w\$\-\.])'),
+ prefix=r'(?<![\w\-$.])', suffix=r'(?![\w\-$.])'),
Name.Constant),
],
'name': [
- (r'@([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)', Name.Variable.Instance),
- (r'([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(\+\+|\-\-)?',
+ (r'@([a-zA-Z$_](?:[\w$-]*[\w$])?)', Name.Variable.Instance),
+ (r'([a-zA-Z$_](?:[\w$-]*[\w$])?)(\+\+|\-\-)?',
bygroups(Name.Symbol, Operator.Word))
],
'tuple': [
- (r'#[a-zA-Z_][a-zA-Z_\-0-9]*(?=[\s\{\(,;\n])', Name.Namespace)
+ (r'#[a-zA-Z_][\w\-]*(?=[\s{(,;])', Name.Namespace)
],
'interpoling_string': [
(r'\}', String.Interpol, '#pop'),
@@ -1426,7 +1426,7 @@ class EarlGreyLexer(RegexLexer):
(r'```', String.Backtick, '#pop'),
(r'\n', String.Backtick),
(r'\^=?', String.Escape),
- (r'[^\`]+', String.Backtick),
+ (r'[^`]+', String.Backtick),
],
'numbers': [
(r'\d+\.(?!\.)\d*([eE][+-]?[0-9]+)?', Number.Float),
@@ -1434,7 +1434,7 @@ class EarlGreyLexer(RegexLexer):
(r'8r[0-7]+', Number.Oct),
(r'2r[01]+', Number.Bin),
(r'16r[a-fA-F0-9]+', Number.Hex),
- (r'([3-79]|[1-2][0-9]|3[0-6])r[a-zA-Z\d]+(\.[a-zA-Z\d]+)?', Number.Radix),
+ (r'([3-79]|[12][0-9]|3[0-6])r[a-zA-Z\d]+(\.[a-zA-Z\d]+)?', Number.Radix),
(r'\d+', Number.Integer)
],
}
diff --git a/pygments/lexers/julia.py b/pygments/lexers/julia.py
index d0aa6d35..9f84b8d9 100644
--- a/pygments/lexers/julia.py
+++ b/pygments/lexers/julia.py
@@ -11,13 +11,16 @@
import re
-from pygments.lexer import Lexer, RegexLexer, bygroups, combined, do_insertions
+from pygments.lexer import Lexer, RegexLexer, bygroups, combined, \
+ do_insertions, words
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Generic
from pygments.util import shebang_matches, unirange
__all__ = ['JuliaLexer', 'JuliaConsoleLexer']
+line_re = re.compile('.*?\n')
+
class JuliaLexer(RegexLexer):
"""
@@ -32,13 +35,26 @@ class JuliaLexer(RegexLexer):
flags = re.MULTILINE | re.UNICODE
- builtins = [
+ builtins = (
'exit', 'whos', 'edit', 'load', 'is', 'isa', 'isequal', 'typeof', 'tuple',
'ntuple', 'uid', 'hash', 'finalizer', 'convert', 'promote', 'subtype',
'typemin', 'typemax', 'realmin', 'realmax', 'sizeof', 'eps', 'promote_type',
'method_exists', 'applicable', 'invoke', 'dlopen', 'dlsym', 'system',
'error', 'throw', 'assert', 'new', 'Inf', 'Nan', 'pi', 'im',
- ]
+ )
+
+ keywords = (
+ 'begin', 'while', 'for', 'in', 'return', 'break', 'continue',
+ 'macro', 'quote', 'let', 'if', 'elseif', 'else', 'try', 'catch', 'end',
+ 'bitstype', 'ccall', 'do', 'using', 'module', 'import', 'export',
+ 'importall', 'baremodule', 'immutable',
+ )
+
+ types = (
+ 'Bool', 'Int', 'Int8', 'Int16', 'Int32', 'Int64', 'Uint', 'Uint8', 'Uint16',
+ 'Uint32', 'Uint64', 'Float32', 'Float64', 'Complex64', 'Complex128', 'Any',
+ 'Nothing', 'None',
+ )
tokens = {
'root': [
@@ -46,34 +62,29 @@ class JuliaLexer(RegexLexer):
(r'[^\S\n]+', Text),
(r'#=', Comment.Multiline, "blockcomment"),
(r'#.*$', Comment),
- (r'[]{}:(),;[@]', Punctuation),
+ (r'[\[\]{}:(),;@]', Punctuation),
(r'\\\n', Text),
(r'\\', Text),
# keywords
- (r'(begin|while|for|in|return|break|continue|'
- r'macro|quote|let|if|elseif|else|try|catch|end|'
- r'bitstype|ccall|do|using|module|import|export|'
- r'importall|baremodule|immutable)\b', Keyword),
(r'(local|global|const)\b', Keyword.Declaration),
- (r'(Bool|Int|Int8|Int16|Int32|Int64|Uint|Uint8|Uint16|Uint32|Uint64'
- r'|Float32|Float64|Complex64|Complex128|Any|Nothing|None)\b',
- Keyword.Type),
+ (words(keywords, suffix=r'\b'), Keyword),
+ (words(types, suffix=r'\b'), Keyword.Type),
# functions
(r'(function)((?:\s|\\\s)+)',
- bygroups(Keyword, Name.Function), 'funcname'),
+ bygroups(Keyword, Name.Function), 'funcname'),
# types
(r'(type|typealias|abstract|immutable)((?:\s|\\\s)+)',
- bygroups(Keyword, Name.Class), 'typename'),
+ bygroups(Keyword, Name.Class), 'typename'),
# operators
(r'==|!=|<=|>=|->|&&|\|\||::|<:|[-~+/*%=<>&^|.?!$]', Operator),
(r'\.\*|\.\^|\.\\|\.\/|\\', Operator),
# builtins
- ('(' + '|'.join(builtins) + r')\b', Name.Builtin),
+ (words(builtins, suffix=r'\b'), Name.Builtin),
# backticks
(r'`(?s).*?`', String.Backtick),
@@ -116,12 +127,12 @@ class JuliaLexer(RegexLexer):
],
'typename': [
- ('[a-zA-Z_]\w*', Name.Class, '#pop')
+ ('[a-zA-Z_]\w*', Name.Class, '#pop'),
],
'stringescape': [
(r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
- r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
+ r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape),
],
"blockcomment": [
(r'[^=#]', Comment.Multiline),
@@ -138,7 +149,7 @@ class JuliaLexer(RegexLexer):
(r'\$[a-zA-Z_]+', String.Interpol),
(r'\$\(', String.Interpol, 'in-intp'),
# @printf and @sprintf formats
- (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[diouxXeEfFgGcrs%]',
+ (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]',
String.Interpol),
(r'[^$%"\\]+', String),
# unhandled special signs
@@ -155,9 +166,6 @@ class JuliaLexer(RegexLexer):
return shebang_matches(text, r'julia')
-line_re = re.compile('.*?\n')
-
-
class JuliaConsoleLexer(Lexer):
"""
For Julia console sessions. Modeled after MatlabSessionLexer.
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py
index 41fc0fdb..af7f8105 100644
--- a/pygments/lexers/jvm.py
+++ b/pygments/lexers/jvm.py
@@ -564,14 +564,14 @@ class IokeLexer(RegexLexer):
],
'slashRegexp': [
- (r'(?<!\\)/[oxpniums]*', String.Regex, '#pop'),
+ (r'(?<!\\)/[im-psux]*', String.Regex, '#pop'),
include('interpolatableText'),
(r'\\/', String.Regex),
(r'[^/]', String.Regex)
],
'squareRegexp': [
- (r'(?<!\\)][oxpniums]*', String.Regex, '#pop'),
+ (r'(?<!\\)][im-psux]*', String.Regex, '#pop'),
include('interpolatableText'),
(r'\\]', String.Regex),
(r'[^\]]', String.Regex)
diff --git a/pygments/lexers/lisp.py b/pygments/lexers/lisp.py
index 84720fab..6d591e10 100644
--- a/pygments/lexers/lisp.py
+++ b/pygments/lexers/lisp.py
@@ -2135,49 +2135,52 @@ class ShenLexer(RegexLexer):
filenames = ['*.shen']
mimetypes = ['text/x-shen', 'application/x-shen']
- DECLARATIONS = re.findall(r'\S+', """
- datatype define defmacro defprolog defcc synonyms declare package
- type function
- """)
-
- SPECIAL_FORMS = re.findall(r'\S+', """
- lambda get let if cases cond put time freeze value load $
- protect or and not do output prolog? trap-error error
- make-string /. set @p @s @v
- """)
-
- BUILTINS = re.findall(r'\S+', """
- == = * + - / < > >= <= <-address <-vector abort absvector
- absvector? address-> adjoin append arity assoc bind boolean?
- bound? call cd close cn compile concat cons cons? cut destroy
- difference element? empty? enable-type-theory error-to-string
- eval eval-kl exception explode external fail fail-if file
- findall fix fst fwhen gensym get-time hash hd hdstr hdv head
- identical implementation in include include-all-but inferences
- input input+ integer? intern intersection is kill language
- length limit lineread loaded macro macroexpand map mapcan
- maxinferences mode n->string nl nth null number? occurrences
- occurs-check open os out port porters pos pr preclude
- preclude-all-but print profile profile-results ps quit read
- read+ read-byte read-file read-file-as-bytelist
- read-file-as-string read-from-string release remove return
- reverse run save set simple-error snd specialise spy step
- stinput stoutput str string->n string->symbol string? subst
- symbol? systemf tail tc tc? thaw tl tlstr tlv track tuple?
- undefmacro unify unify! union unprofile unspecialise untrack
- variable? vector vector-> vector? verified version warn when
- write-byte write-to-file y-or-n?
- """)
-
- BUILTINS_ANYWHERE = re.findall(r'\S+', """
- where skip >> _ ! <e> <!>
- """)
+ DECLARATIONS = (
+ 'datatype', 'define', 'defmacro', 'defprolog', 'defcc',
+ 'synonyms', 'declare', 'package', 'type', 'function',
+ )
+
+ SPECIAL_FORMS = (
+ 'lambda', 'get', 'let', 'if', 'cases', 'cond', 'put', 'time', 'freeze',
+ 'value', 'load', '$', 'protect', 'or', 'and', 'not', 'do', 'output',
+ 'prolog?', 'trap-error', 'error', 'make-string', '/.', 'set', '@p',
+ '@s', '@v',
+ )
+
+ BUILTINS = (
+ '==', '=', '*', '+', '-', '/', '<', '>', '>=', '<=', '<-address',
+ '<-vector', 'abort', 'absvector', 'absvector?', 'address->', 'adjoin',
+ 'append', 'arity', 'assoc', 'bind', 'boolean?', 'bound?', 'call', 'cd',
+ 'close', 'cn', 'compile', 'concat', 'cons', 'cons?', 'cut', 'destroy',
+ 'difference', 'element?', 'empty?', 'enable-type-theory',
+ 'error-to-string', 'eval', 'eval-kl', 'exception', 'explode', 'external',
+ 'fail', 'fail-if', 'file', 'findall', 'fix', 'fst', 'fwhen', 'gensym',
+ 'get-time', 'hash', 'hd', 'hdstr', 'hdv', 'head', 'identical',
+ 'implementation', 'in', 'include', 'include-all-but', 'inferences',
+ 'input', 'input+', 'integer?', 'intern', 'intersection', 'is', 'kill',
+ 'language', 'length', 'limit', 'lineread', 'loaded', 'macro', 'macroexpand',
+ 'map', 'mapcan', 'maxinferences', 'mode', 'n->string', 'nl', 'nth', 'null',
+ 'number?', 'occurrences', 'occurs-check', 'open', 'os', 'out', 'port',
+ 'porters', 'pos', 'pr', 'preclude', 'preclude-all-but', 'print', 'profile',
+ 'profile-results', 'ps', 'quit', 'read', 'read+', 'read-byte', 'read-file',
+ 'read-file-as-bytelist', 'read-file-as-string', 'read-from-string',
+ 'release', 'remove', 'return', 'reverse', 'run', 'save', 'set',
+ 'simple-error', 'snd', 'specialise', 'spy', 'step', 'stinput', 'stoutput',
+ 'str', 'string->n', 'string->symbol', 'string?', 'subst', 'symbol?',
+ 'systemf', 'tail', 'tc', 'tc?', 'thaw', 'tl', 'tlstr', 'tlv', 'track',
+ 'tuple?', 'undefmacro', 'unify', 'unify!', 'union', 'unprofile',
+ 'unspecialise', 'untrack', 'variable?', 'vector', 'vector->', 'vector?',
+ 'verified', 'version', 'warn', 'when', 'write-byte', 'write-to-file',
+ 'y-or-n?',
+ )
+
+ BUILTINS_ANYWHERE = ('where', 'skip', '>>', '_', '!', '<e>', '<!>')
MAPPINGS = dict((s, Keyword) for s in DECLARATIONS)
MAPPINGS.update((s, Name.Builtin) for s in BUILTINS)
MAPPINGS.update((s, Keyword) for s in SPECIAL_FORMS)
- valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:_-]'
+ valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:-]'
valid_name = '%s+' % valid_symbol_chars
symbol_name = r'[a-z!$%%*+,<=>?/.\'@&#_-]%s*' % valid_symbol_chars
variable = r'[A-Z]%s*' % valid_symbol_chars
@@ -2313,7 +2316,7 @@ class CPSALexer(SchemeLexer):
# valid names for identifiers
# well, names can only not consist fully of numbers
# but this should be good enough for now
- valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+'
+ valid_name = r'[\w!$%&*+,/:<=>?@^~|-]+'
tokens = {
'root': [
@@ -2334,7 +2337,7 @@ class CPSALexer(SchemeLexer):
# strings, symbols and characters
(r'"(\\\\|\\"|[^"])*"', String),
(r"'" + valid_name, String.Symbol),
- (r"#\\([()/'\"._!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char),
+ (r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char),
# constants
(r'(#t|#f)', Name.Constant),
diff --git a/pygments/lexers/modula2.py b/pygments/lexers/modula2.py
index a5fcbf78..01771f55 100644
--- a/pygments/lexers/modula2.py
+++ b/pygments/lexers/modula2.py
@@ -290,7 +290,7 @@ class Modula2Lexer(RegexLexer):
],
'unigraph_punctuation': [
# Common Punctuation
- (r'[\(\)\[\]{},.:;\|]', Punctuation),
+ (r'[()\[\]{},.:;|]', Punctuation),
# Case Label Separator Synonym
(r'!', Punctuation), # ISO
# Blueprint Punctuation
diff --git a/pygments/lexers/oberon.py b/pygments/lexers/oberon.py
index db18259d..51dfdab6 100644
--- a/pygments/lexers/oberon.py
+++ b/pygments/lexers/oberon.py
@@ -47,11 +47,11 @@ class ComponentPascalLexer(RegexLexer):
(r'\s+', Text), # whitespace
],
'comments': [
- (r'\(\*([^\$].*?)\*\)', Comment.Multiline),
+ (r'\(\*([^$].*?)\*\)', Comment.Multiline),
# TODO: nested comments (* (* ... *) ... (* ... *) *) not supported!
],
'punctuation': [
- (r'[\(\)\[\]\{\},.:;\|]', Punctuation),
+ (r'[()\[\]{},.:;|]', Punctuation),
],
'numliterals': [
(r'[0-9A-F]+X\b', Number.Hex), # char code
@@ -83,7 +83,7 @@ class ComponentPascalLexer(RegexLexer):
(r'\$', Operator),
],
'identifiers': [
- (r'([a-zA-Z_\$][\w\$]*)', Name),
+ (r'([a-zA-Z_$][\w$]*)', Name),
],
'builtins': [
(words((
diff --git a/pygments/lexers/parasail.py b/pygments/lexers/parasail.py
index 878f7d26..812e2923 100644
--- a/pygments/lexers/parasail.py
+++ b/pygments/lexers/parasail.py
@@ -60,7 +60,7 @@ class ParaSailLexer(RegexLexer):
(r'[a-zA-Z]\w*', Name),
# Operators and Punctuation
(r'(<==|==>|<=>|\*\*=|<\|=|<<=|>>=|==|!=|=\?|<=|>=|'
- r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\||\|=|/=|\+|-|\*|/|'
+ r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\|=|\||/=|\+|-|\*|/|'
r'\.\.|<\.\.|\.\.<|<\.\.<)',
Operator),
(r'(<|>|\[|\]|\(|\)|\||:|;|,|.|\{|\}|->)',
diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py
index b78963d0..8df3c810 100644
--- a/pygments/lexers/perl.py
+++ b/pygments/lexers/perl.py
@@ -109,7 +109,8 @@ class PerlLexer(RegexLexer):
'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'),
Name.Builtin),
(r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
- (r'<<([\'"]?)([a-zA-Z_]\w*)\1;?\n.*?\n\2\n', String),
+ (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)',
+ bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)),
(r'__END__', Comment.Preproc, 'end-part'),
(r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
(r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
diff --git a/pygments/lexers/php.py b/pygments/lexers/php.py
index 257dd94f..2421738f 100644
--- a/pygments/lexers/php.py
+++ b/pygments/lexers/php.py
@@ -138,7 +138,9 @@ class PhpLexer(RegexLexer):
],
'php': [
(r'\?>', Comment.Preproc, '#pop'),
- (r'<<<([\'"]?)(' + _ident_inner + r')\1\n.*?\n\s*\2;?\n', String),
+ (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
+ bygroups(String, String, String.Delimiter, String, String.Delimiter,
+ Punctuation, Text)),
(r'\s+', Text),
(r'#.*?\n', Comment.Single),
(r'//.*?\n', Comment.Single),
diff --git a/pygments/lexers/praat.py b/pygments/lexers/praat.py
index 776c38b8..9255216d 100644
--- a/pygments/lexers/praat.py
+++ b/pygments/lexers/praat.py
@@ -27,21 +27,21 @@ class PraatLexer(RegexLexer):
aliases = ['praat']
filenames = ['*.praat', '*.proc', '*.psc']
- keywords = [
+ keywords = (
'if', 'then', 'else', 'elsif', 'elif', 'endif', 'fi', 'for', 'from', 'to',
'endfor', 'endproc', 'while', 'endwhile', 'repeat', 'until', 'select', 'plus',
'minus', 'demo', 'assert', 'stopwatch', 'nocheck', 'nowarn', 'noprogress',
'editor', 'endeditor', 'clearinfo',
- ]
+ )
- functions_string = [
+ functions_string = (
'backslashTrigraphsToUnicode', 'chooseDirectory', 'chooseReadFile',
'chooseWriteFile', 'date', 'demoKey', 'do', 'environment', 'extractLine',
'extractWord', 'fixed', 'info', 'left', 'mid', 'percent', 'readFile', 'replace',
'replace_regex', 'right', 'selected', 'string', 'unicodeToBackslashTrigraphs',
- ]
+ )
- functions_numeric = [
+ functions_numeric = (
'abs', 'appendFile', 'appendFileLine', 'appendInfo', 'appendInfoLine', 'arccos',
'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'barkToHertz',
'beginPause', 'beginSendPraat', 'besselI', 'besselK', 'beta', 'beta2',
@@ -67,13 +67,13 @@ class PraatLexer(RegexLexer):
'sincpi', 'sinh', 'soundPressureToPhon', 'sqrt', 'startsWith', 'studentP',
'studentQ', 'tan', 'tanh', 'variableExists', 'word', 'writeFile', 'writeFileLine',
'writeInfo', 'writeInfoLine',
- ]
+ )
- functions_array = [
+ functions_array = (
'linear', 'randomGauss', 'randomInteger', 'randomUniform', 'zero',
- ]
+ )
- objects = [
+ objects = (
'Activation', 'AffineTransform', 'AmplitudeTier', 'Art', 'Artword',
'Autosegment', 'BarkFilter', 'BarkSpectrogram', 'CCA', 'Categories',
'Cepstrogram', 'Cepstrum', 'Cepstrumc', 'ChebyshevSeries', 'ClassificationTable',
@@ -100,17 +100,17 @@ class PraatLexer(RegexLexer):
'Strings', 'StringsIndex', 'Table', 'TableOfReal', 'TextGrid', 'TextInterval',
'TextPoint', 'TextTier', 'Tier', 'Transition', 'VocalTract', 'VocalTractTier',
'Weight', 'WordList',
- ]
+ )
- variables_numeric = [
+ variables_numeric = (
'macintosh', 'windows', 'unix', 'praatVersion', 'pi', 'e', 'undefined',
- ]
+ )
- variables_string = [
+ variables_string = (
'praatVersion', 'tab', 'shellDirectory', 'homeDirectory',
'preferencesDirectory', 'newline', 'temporaryDirectory',
'defaultDirectory',
- ]
+ )
tokens = {
'root': [
@@ -151,7 +151,7 @@ class PraatLexer(RegexLexer):
(r"'(?=.*')", String.Interpol, 'string_interpolated'),
(r'\.{3}', Keyword, ('#pop', 'old_arguments')),
(r':', Keyword, ('#pop', 'comma_list')),
- (r'[\s\n]', Text, '#pop'),
+ (r'\s', Text, '#pop'),
],
'procedure_call': [
(r'\s+', Text),
@@ -230,7 +230,7 @@ class PraatLexer(RegexLexer):
bygroups(Name.Builtin, Name.Builtin, String.Interpol),
('object_attributes', 'string_interpolated')),
- (r'\.?_?[a-z][a-zA-Z0-9_.]*(\$|#)?', Text),
+ (r'\.?_?[a-z][\w.]*(\$|#)?', Text),
(r'[\[\]]', Punctuation, 'comma_list'),
(r"'(?=.*')", String.Interpol, 'string_interpolated'),
],
@@ -239,7 +239,7 @@ class PraatLexer(RegexLexer):
(r'\b(and|or|not|div|mod)\b', Operator.Word),
],
'string_interpolated': [
- (r'\.?[_a-z][a-zA-Z0-9_.]*[\$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?',
+ (r'\.?[_a-z][\w.]*[$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?',
String.Interpol),
(r"'", String.Interpol, '#pop'),
],
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index 57af00e2..7601afa8 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -39,7 +39,7 @@ class PythonLexer(RegexLexer):
return [
# the old style '%s' % (...) string formatting
(r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
# backslashes, quotes and formatting signs must be parsed one at a time
(r'[^\\\'"%\n]+', ttype),
(r'[\'"\\]', ttype),
@@ -51,8 +51,10 @@ class PythonLexer(RegexLexer):
tokens = {
'root': [
(r'\n', Text),
- (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
- (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
+ (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
+ bygroups(Text, String.Affix, String.Doc)),
+ (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
+ bygroups(Text, String.Affix, String.Doc)),
(r'[^\S\n]+', Text),
(r'\A#!.+$', Comment.Hashbang),
(r'#.*$', Comment.Single),
@@ -72,14 +74,22 @@ class PythonLexer(RegexLexer):
include('magicfuncs'),
include('magicvars'),
include('backtick'),
- ('(?:[rR]|[uU][rR]|[rR][uU])"""', String.Double, 'tdqs'),
- ("(?:[rR]|[uU][rR]|[rR][uU])'''", String.Single, 'tsqs'),
- ('(?:[rR]|[uU][rR]|[rR][uU])"', String.Double, 'dqs'),
- ("(?:[rR]|[uU][rR]|[rR][uU])'", String.Single, 'sqs'),
- ('[uU]?"""', String.Double, combined('stringescape', 'tdqs')),
- ("[uU]?'''", String.Single, combined('stringescape', 'tsqs')),
- ('[uU]?"', String.Double, combined('stringescape', 'dqs')),
- ("[uU]?'", String.Single, combined('stringescape', 'sqs')),
+ ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
+ bygroups(String.Affix, String.Double), 'tdqs'),
+ ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
+ bygroups(String.Affix, String.Single), 'tsqs'),
+ ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
+ bygroups(String.Affix, String.Double), 'dqs'),
+ ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
+ bygroups(String.Affix, String.Single), 'sqs'),
+ ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
+ combined('stringescape', 'tdqs')),
+ ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
+ combined('stringescape', 'tsqs')),
+ ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
+ combined('stringescape', 'dqs')),
+ ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
+ combined('stringescape', 'sqs')),
include('name'),
include('numbers'),
],
@@ -252,16 +262,16 @@ class Python3Lexer(RegexLexer):
return [
# the old style '%s' % (...) string formatting (still valid in Py3)
(r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
# the new style '{}'.format(...) string formatting
(r'\{'
- '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
- '(\![sra])?' # conversion
- '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?'
+ '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
+ '(\![sra])?' # conversion
+ '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
'\}', String.Interpol),
# backslashes, quotes and formatting signs must be parsed one at a time
- (r'[^\\\'"%\{\n]+', ttype),
+ (r'[^\\\'"%{\n]+', ttype),
(r'[\'"\\]', ttype),
# unhandled string formatting sign
(r'%|(\{{1,2})', ttype)
@@ -700,7 +710,7 @@ class CythonLexer(RegexLexer):
],
'strings': [
(r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
(r'[^\\\'"%\n]+', String),
# quotes, percents and backslashes must be parsed one at a time
(r'[\'"\\]', String),
@@ -771,18 +781,20 @@ class DgLexer(RegexLexer):
(words((
'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'',
'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object',
- 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', 'super',
- 'tuple', 'tuple\'', 'type'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
+ 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str',
+ 'super', 'tuple', 'tuple\'', 'type'),
+ prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
Name.Builtin),
(words((
'__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile',
'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate',
- 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst', 'getattr',
- 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init', 'input',
- 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len', 'locals',
- 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow', 'print', 'repr',
- 'reversed', 'round', 'setattr', 'scanl1?', 'snd', 'sorted', 'sum', 'tail',
- 'take', 'takewhile', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
+ 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst',
+ 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init',
+ 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len',
+ 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow',
+ 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd',
+ 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'),
+ prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
Name.Builtin),
(r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
Name.Builtin.Pseudo),
@@ -808,7 +820,7 @@ class DgLexer(RegexLexer):
],
'string': [
(r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
- '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
+ '[hlL]?[E-GXc-giorsux%]', String.Interpol),
(r'[^\\\'"%\n]+', String),
# quotes, percents and backslashes must be parsed one at a time
(r'[\'"\\]', String),
diff --git a/pygments/lexers/qvt.py b/pygments/lexers/qvt.py
index 5bc61310..f30e4887 100644
--- a/pygments/lexers/qvt.py
+++ b/pygments/lexers/qvt.py
@@ -9,7 +9,8 @@
:license: BSD, see LICENSE for details.
"""
-from pygments.lexer import RegexLexer, bygroups, include, combined
+from pygments.lexer import RegexLexer, bygroups, include, combined, default, \
+ words
from pygments.token import Text, Comment, Operator, Keyword, Punctuation, \
Name, String, Number
@@ -50,23 +51,26 @@ class QVToLexer(RegexLexer):
bygroups(Comment, Comment, Comment.Preproc, Comment)),
# Uncomment the following if you want to distinguish between
# '/*' and '/**', à la javadoc
- #(r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline),
+ # (r'/[*]{2}(.|\n)*?[*]/', Comment.Multiline),
(r'/[*](.|\n)*?[*]/', Comment.Multiline),
(r'\\\n', Text),
(r'(and|not|or|xor|##?)\b', Operator.Word),
- (r'([:]{1-2}=|[-+]=)\b', Operator.Word),
- (r'(@|<<|>>)\b', Keyword), # stereotypes
- (r'!=|<>|=|==|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator),
+ (r'(:{1,2}=|[-+]=)\b', Operator.Word),
+ (r'(@|<<|>>)\b', Keyword), # stereotypes
+ (r'!=|<>|==|=|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator),
(r'[]{}:(),;[]', Punctuation),
(r'(true|false|unlimited|null)\b', Keyword.Constant),
(r'(this|self|result)\b', Name.Builtin.Pseudo),
(r'(var)\b', Keyword.Declaration),
(r'(from|import)\b', Keyword.Namespace, 'fromimport'),
- (r'(metamodel|class|exception|primitive|enum|transformation|library)(\s+)([a-zA-Z0-9_]+)',
+ (r'(metamodel|class|exception|primitive|enum|transformation|'
+ r'library)(\s+)(\w+)',
bygroups(Keyword.Word, Text, Name.Class)),
- (r'(exception)(\s+)([a-zA-Z0-9_]+)', bygroups(Keyword.Word, Text, Name.Exception)),
+ (r'(exception)(\s+)(\w+)',
+ bygroups(Keyword.Word, Text, Name.Exception)),
(r'(main)\b', Name.Function),
- (r'(mapping|helper|query)(\s+)', bygroups(Keyword.Declaration, Text), 'operation'),
+ (r'(mapping|helper|query)(\s+)',
+ bygroups(Keyword.Declaration, Text), 'operation'),
(r'(assert)(\s+)\b', bygroups(Keyword, Text), 'assert'),
(r'(Bag|Collection|Dict|OrderedSet|Sequence|Set|Tuple|List)\b',
Keyword.Type),
@@ -75,46 +79,45 @@ class QVToLexer(RegexLexer):
("'", String, combined('stringescape', 'sqs')),
include('name'),
include('numbers'),
- # (r'([a-zA-Z_][a-zA-Z0-9_]*)(::)([a-zA-Z_][a-zA-Z0-9_]*)',
+ # (r'([a-zA-Z_]\w*)(::)([a-zA-Z_]\w*)',
# bygroups(Text, Text, Text)),
- ],
+ ],
'fromimport': [
(r'(?:[ \t]|\\\n)+', Text),
- (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
- (r'', Text, '#pop'),
- ],
+ (r'[a-zA-Z_][\w.]*', Name.Namespace),
+ default('#pop'),
+ ],
'operation': [
(r'::', Text),
- (r'(.*::)([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*(\()', bygroups(Text,Name.Function, Text), '#pop')
- ],
+ (r'(.*::)([a-zA-Z_]\w*)([ \t]*)(\()',
+ bygroups(Text, Name.Function, Text, Punctuation), '#pop')
+ ],
'assert': [
(r'(warning|error|fatal)\b', Keyword, '#pop'),
- (r'', Text, '#pop') # all else: go back
- ],
+ default('#pop'), # all else: go back
+ ],
'keywords': [
- (r'(abstract|access|any|assert|'
- r'blackbox|break|case|collect|collectNested|'
- r'collectOne|collectselect|collectselectOne|composes|'
- r'compute|configuration|constructor|continue|datatype|'
- r'default|derived|disjuncts|do|elif|else|end|'
- r'endif|except|exists|extends|'
- r'forAll|forEach|forOne|from|if|'
- r'implies|in|inherits|init|inout|'
- r'intermediate|invresolve|invresolveIn|invresolveone|'
- r'invresolveoneIn|isUnique|iterate|late|let|'
- r'literal|log|map|merges|'
- r'modeltype|new|object|one|'
- r'ordered|out|package|population|'
- r'property|raise|readonly|references|refines|'
- r'reject|resolve|resolveIn|resolveone|resolveoneIn|'
- r'return|select|selectOne|sortedBy|static|switch|'
- r'tag|then|try|typedef|'
- r'unlimited|uses|when|where|while|with|'
- r'xcollect|xmap|xselect)\b', Keyword),
+ (words((
+ 'abstract', 'access', 'any', 'assert', 'blackbox', 'break',
+ 'case', 'collect', 'collectNested', 'collectOne', 'collectselect',
+ 'collectselectOne', 'composes', 'compute', 'configuration',
+ 'constructor', 'continue', 'datatype', 'default', 'derived',
+ 'disjuncts', 'do', 'elif', 'else', 'end', 'endif', 'except',
+ 'exists', 'extends', 'forAll', 'forEach', 'forOne', 'from', 'if',
+ 'implies', 'in', 'inherits', 'init', 'inout', 'intermediate',
+ 'invresolve', 'invresolveIn', 'invresolveone', 'invresolveoneIn',
+ 'isUnique', 'iterate', 'late', 'let', 'literal', 'log', 'map',
+ 'merges', 'modeltype', 'new', 'object', 'one', 'ordered', 'out',
+ 'package', 'population', 'property', 'raise', 'readonly',
+ 'references', 'refines', 'reject', 'resolve', 'resolveIn',
+ 'resolveone', 'resolveoneIn', 'return', 'select', 'selectOne',
+ 'sortedBy', 'static', 'switch', 'tag', 'then', 'try', 'typedef',
+ 'unlimited', 'uses', 'when', 'where', 'while', 'with', 'xcollect',
+ 'xmap', 'xselect'), suffix=r'\b'), Keyword),
],
# There is no need to distinguish between String.Single and
@@ -127,18 +130,18 @@ class QVToLexer(RegexLexer):
'stringescape': [
(r'\\([\\btnfr"\']|u[0-3][0-7]{2}|u[0-7]{1,2})', String.Escape)
],
- 'dqs': [ # double-quoted string
+ 'dqs': [ # double-quoted string
(r'"', String, '#pop'),
(r'\\\\|\\"', String.Escape),
include('strings')
],
- 'sqs': [ # single-quoted string
+ 'sqs': [ # single-quoted string
(r"'", String, '#pop'),
(r"\\\\|\\'", String.Escape),
include('strings')
],
'name': [
- ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
+ ('[a-zA-Z_]\w*', Name),
],
# numbers: excerpt taken from the python lexer
'numbers': [
@@ -146,5 +149,4 @@ class QVToLexer(RegexLexer):
(r'\d+[eE][+-]?[0-9]+', Number.Float),
(r'\d+', Number.Integer)
],
- }
-
+ }
diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py
index 103b4ad0..6dd6e8b9 100644
--- a/pygments/lexers/rdf.py
+++ b/pygments/lexers/rdf.py
@@ -42,8 +42,7 @@ class SparqlLexer(RegexLexer):
u'\u2c00-\u2fef'
u'\u3001-\ud7ff'
u'\uf900-\ufdcf'
- u'\ufdf0-\ufffd'
- u'\U00010000-\U000effff')
+ u'\ufdf0-\ufffd')
PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
@@ -56,7 +55,7 @@ class SparqlLexer(RegexLexer):
HEX_GRP = '0-9A-Fa-f'
- PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&""()*+,;=/?#@%'
+ PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'
# terminal productions ::
@@ -191,7 +190,7 @@ class TurtleLexer(RegexLexer):
flags = re.IGNORECASE
patterns = {
- 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range
+ 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range
'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
}
@@ -258,8 +257,7 @@ class TurtleLexer(RegexLexer):
(r'.', String, '#pop'),
],
'end-of-string': [
-
- (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)',
+ (r'(@)([a-z]+(:?-[a-z0-9]+)*)',
bygroups(Operator, Generic.Emph), '#pop:2'),
(r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'),
diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py
index e81d6ecf..f16416d3 100644
--- a/pygments/lexers/ruby.py
+++ b/pygments/lexers/ruby.py
@@ -47,9 +47,9 @@ class RubyLexer(ExtendedRegexLexer):
start = match.start(1)
yield start, Operator, match.group(1) # <<-?
- yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
- yield match.start(3), Name.Constant, match.group(3) # heredoc name
- yield match.start(4), String.Heredoc, match.group(4) # quote again
+ yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
+ yield match.start(3), String.Delimiter, match.group(3) # heredoc name
+ yield match.start(4), String.Heredoc, match.group(4) # quote again
heredocstack = ctx.__dict__.setdefault('heredocstack', [])
outermost = not bool(heredocstack)
@@ -74,7 +74,7 @@ class RubyLexer(ExtendedRegexLexer):
if check == hdname:
for amatch in lines:
yield amatch.start(), String.Heredoc, amatch.group()
- yield match.start(), Name.Constant, match.group()
+ yield match.start(), String.Delimiter, match.group()
ctx.pos = match.end()
break
else:
diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py
index 4dd9594b..ac0f7533 100644
--- a/pygments/lexers/scripting.py
+++ b/pygments/lexers/scripting.py
@@ -1020,11 +1020,11 @@ class EasytrieveLexer(RegexLexer):
(r"'(''|[^'])*'", String),
(r'\s+', Whitespace),
# Everything else just belongs to a name
- (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name)
+ (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name),
],
'after_declaration': [
(_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Function),
- ('', Whitespace, '#pop')
+ default('#pop'),
],
'after_macro_argument': [
(r'\*.*\n', Comment.Single, '#pop'),
@@ -1032,7 +1032,7 @@ class EasytrieveLexer(RegexLexer):
(_OPERATORS_PATTERN, Operator, '#pop'),
(r"'(''|[^'])*'", String, '#pop'),
# Everything else just belongs to a name
- (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name)
+ (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name),
],
}
_COMMENT_LINE_REGEX = re.compile(r'^\s*\*')
@@ -1122,7 +1122,8 @@ class EasytrieveLexer(RegexLexer):
class JclLexer(RegexLexer):
"""
- `Job Control Language (JCL) <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_
+ `Job Control Language (JCL)
+ <http://publibz.boulder.ibm.com/cgi-bin/bookmgr_OS390/BOOKS/IEA2B570/CCONTENTS>`_
is a scripting language used on mainframe platforms to instruct the system
on how to run a batch job or start a subsystem. It is somewhat
comparable to MS DOS batch and Unix shell scripts.
@@ -1145,10 +1146,10 @@ class JclLexer(RegexLexer):
],
'statement': [
(r'\s*\n', Whitespace, '#pop'),
- (r'([a-z][a-z_0-9]*)(\s+)(exec|job)(\s*)',
+ (r'([a-z]\w*)(\s+)(exec|job)(\s*)',
bygroups(Name.Label, Whitespace, Keyword.Reserved, Whitespace),
'option'),
- (r'[a-z][a-z_0-9]*', Name.Variable, 'statement_command'),
+ (r'[a-z]\w*', Name.Variable, 'statement_command'),
(r'\s+', Whitespace, 'statement_command'),
],
'statement_command': [
@@ -1167,10 +1168,10 @@ class JclLexer(RegexLexer):
(r'\*', Name.Builtin),
(r'[\[\](){}<>;,]', Punctuation),
(r'[-+*/=&%]', Operator),
- (r'[a-z_][a-z_0-9]*', Name),
- (r'[0-9]+\.[0-9]*', Number.Float),
- (r'\.[0-9]+', Number.Float),
- (r'[0-9]+', Number.Integer),
+ (r'[a-z_]\w*', Name),
+ (r'\d+\.\d*', Number.Float),
+ (r'\.\d+', Number.Float),
+ (r'\d+', Number.Integer),
(r"'", String, 'option_string'),
(r'[ \t]+', Whitespace, 'option_comment'),
(r'\.', Punctuation),
diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py
index 05503c3a..7c06226b 100644
--- a/pygments/lexers/sql.py
+++ b/pygments/lexers/sql.py
@@ -57,11 +57,14 @@ line_re = re.compile('.*?\n')
language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
+do_re = re.compile(r'\bDO\b', re.IGNORECASE)
+
def language_callback(lexer, match):
"""Parse the content of a $-string using a lexer
- The lexer is chosen looking for a nearby LANGUAGE.
+ The lexer is chosen looking for a nearby LANGUAGE or assumed as
+ plpgsql if inside a DO statement and no LANGUAGE has been found.
"""
l = None
m = language_re.match(lexer.text[match.end():match.end()+100])
@@ -72,15 +75,26 @@ def language_callback(lexer, match):
lexer.text[max(0, match.start()-100):match.start()]))
if m:
l = lexer._get_lexer(m[-1].group(1))
-
+ else:
+ m = list(do_re.finditer(
+ lexer.text[max(0, match.start()-25):match.start()]))
+ if m:
+ l = lexer._get_lexer('plpgsql')
+
+ # 1 = $, 2 = delimiter, 3 = $
+ yield (match.start(1), String, match.group(1))
+ yield (match.start(2), String.Delimiter, match.group(2))
+ yield (match.start(3), String, match.group(3))
+ # 4 = string contents
if l:
- yield (match.start(1), String, match.group(1))
- for x in l.get_tokens_unprocessed(match.group(2)):
+ for x in l.get_tokens_unprocessed(match.group(4)):
yield x
- yield (match.start(3), String, match.group(3))
-
else:
- yield (match.start(), String, match.group())
+ yield (match.start(4), String, match.group(4))
+ # 5 = $, 6 = delimiter, 7 = $
+ yield (match.start(5), String, match.group(5))
+ yield (match.start(6), String.Delimiter, match.group(6))
+ yield (match.start(7), String, match.group(7))
class PostgresBase(object):
@@ -148,9 +162,10 @@ class PostgresLexer(PostgresBase, RegexLexer):
(r'\$\d+', Name.Variable),
(r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
(r'[0-9]+', Number.Integer),
- (r"(E|U&)?'(''|[^'])*'", String.Single),
- (r'(U&)?"(""|[^"])*"', String.Name), # quoted identifier
- (r'(?s)(\$[^$]*\$)(.*?)(\1)', language_callback),
+ (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
+ # quoted identifier
+ (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
+ (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
(r'[a-z_]\w*', Name),
# psql variable in SQL
@@ -164,6 +179,16 @@ class PostgresLexer(PostgresBase, RegexLexer):
(r'[^/*]+', Comment.Multiline),
(r'[/*]', Comment.Multiline)
],
+ 'string': [
+ (r"[^']+", String.Single),
+ (r"''", String.Single),
+ (r"'", String.Single, '#pop'),
+ ],
+ 'quoted-ident': [
+ (r'[^"]+', String.Name),
+ (r'""', String.Name),
+ (r'"', String.Name, '#pop'),
+ ],
}
diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py
index d3e4c460..cef147b8 100644
--- a/pygments/lexers/supercollider.py
+++ b/pygments/lexers/supercollider.py
@@ -11,7 +11,7 @@
import re
-from pygments.lexer import RegexLexer, include, words
+from pygments.lexer import RegexLexer, include, words, default
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation
@@ -43,7 +43,7 @@ class SuperColliderLexer(RegexLexer):
(r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
r'([gim]+\b|\B)', String.Regex, '#pop'),
(r'(?=/)', Text, ('#pop', 'badregex')),
- (r'', Text, '#pop')
+ default('#pop'),
],
'badregex': [
(r'\n', Text, '#pop')
@@ -79,8 +79,8 @@ class SuperColliderLexer(RegexLexer):
'thisFunctionDef', 'thisFunction', 'thisMethod', 'thisProcess',
'thisThread', 'this'), suffix=r'\b'),
Name.Builtin),
- (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other),
- (r'\\?[$a-zA-Z_][a-zA-Z0-9_]*', String.Symbol),
+ (r'[$a-zA-Z_]\w*', Name.Other),
+ (r'\\?[$a-zA-Z_]\w*', String.Symbol),
(r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'[0-9]+', Number.Integer),
diff --git a/pygments/lexers/testing.py b/pygments/lexers/testing.py
index 0bdebe74..be8b6f71 100644
--- a/pygments/lexers/testing.py
+++ b/pygments/lexers/testing.py
@@ -147,7 +147,7 @@ class TAPLexer(RegexLexer):
(r'^TAP version \d+\n', Name.Namespace),
# Specify a plan with a plan line.
- (r'^1..\d+', Keyword.Declaration, 'plan'),
+ (r'^1\.\.\d+', Keyword.Declaration, 'plan'),
# A test failure
(r'^(not ok)([^\S\n]*)(\d*)',
diff --git a/pygments/lexers/theorem.py b/pygments/lexers/theorem.py
index 60a101cc..f8c7d0a9 100644
--- a/pygments/lexers/theorem.py
+++ b/pygments/lexers/theorem.py
@@ -390,20 +390,23 @@ class LeanLexer(RegexLexer):
flags = re.MULTILINE | re.UNICODE
- keywords1 = ('import', 'abbreviation', 'opaque_hint', 'tactic_hint', 'definition', 'renaming',
- 'inline', 'hiding', 'exposing', 'parameter', 'parameters', 'conjecture',
- 'hypothesis', 'lemma', 'corollary', 'variable', 'variables', 'print', 'theorem',
- 'axiom', 'inductive', 'structure', 'universe', 'alias', 'help',
- 'options', 'precedence', 'postfix', 'prefix', 'calc_trans', 'calc_subst', 'calc_refl',
- 'infix', 'infixl', 'infixr', 'notation', 'eval', 'check', 'exit', 'coercion', 'end',
- 'private', 'using', 'namespace', 'including', 'instance', 'section', 'context',
- 'protected', 'expose', 'export', 'set_option', 'add_rewrite', 'extends',
- 'open', 'example', 'constant', 'constants', 'print', 'opaque', 'reducible', 'irreducible'
+ keywords1 = (
+ 'import', 'abbreviation', 'opaque_hint', 'tactic_hint', 'definition',
+ 'renaming', 'inline', 'hiding', 'exposing', 'parameter', 'parameters',
+ 'conjecture', 'hypothesis', 'lemma', 'corollary', 'variable', 'variables',
+ 'print', 'theorem', 'axiom', 'inductive', 'structure', 'universe', 'alias',
+ 'help', 'options', 'precedence', 'postfix', 'prefix', 'calc_trans',
+ 'calc_subst', 'calc_refl', 'infix', 'infixl', 'infixr', 'notation', 'eval',
+ 'check', 'exit', 'coercion', 'end', 'private', 'using', 'namespace',
+ 'including', 'instance', 'section', 'context', 'protected', 'expose',
+ 'export', 'set_option', 'add_rewrite', 'extends', 'open', 'example',
+ 'constant', 'constants', 'print', 'opaque', 'reducible', 'irreducible',
)
keywords2 = (
- 'forall', 'fun', 'Pi', 'obtain', 'from', 'have', 'show', 'assume', 'take',
- 'let', 'if', 'else', 'then', 'by', 'in', 'with', 'begin', 'proof', 'qed', 'calc', 'match'
+ 'forall', 'fun', 'Pi', 'obtain', 'from', 'have', 'show', 'assume',
+ 'take', 'let', 'if', 'else', 'then', 'by', 'in', 'with', 'begin',
+ 'proof', 'qed', 'calc', 'match',
)
keywords3 = (
@@ -414,10 +417,10 @@ class LeanLexer(RegexLexer):
operators = (
'!=', '#', '&', '&&', '*', '+', '-', '/', '@', '!', '`',
'-.', '->', '.', '..', '...', '::', ':>', ';', ';;', '<',
- '<-', '=', '==', '>', '_', '`', '|', '||', '~', '=>', '<=', '>=',
+ '<-', '=', '==', '>', '_', '|', '||', '~', '=>', '<=', '>=',
'/\\', '\\/', u'∀', u'Π', u'λ', u'↔', u'∧', u'∨', u'≠', u'≤', u'≥',
- u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', u'⌟', u'≡',
- u'⟨', u'⟩'
+ u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞',
+ u'⌟', u'≡', u'⟨', u'⟩',
)
punctuation = ('(', ')', ':', '{', '}', '[', ']', u'⦃', u'⦄', ':=', ',')
diff --git a/pygments/lexers/typoscript.py b/pygments/lexers/typoscript.py
index 25bfef9c..407847ed 100644
--- a/pygments/lexers/typoscript.py
+++ b/pygments/lexers/typoscript.py
@@ -44,11 +44,11 @@ class TypoScriptCssDataLexer(RegexLexer):
# marker: ###MARK###
(r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)),
# constant: {$some.constant}
- (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})',
+ (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})',
bygroups(String.Symbol, Operator, Name.Constant,
Name.Constant, String.Symbol)), # constant
# constant: {register:somevalue}
- (r'(.*)(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})(.*)',
+ (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)',
bygroups(String, String.Symbol, Name.Constant, Operator,
Name.Constant, String.Symbol, String)), # constant
# whitespace
@@ -58,8 +58,8 @@ class TypoScriptCssDataLexer(RegexLexer):
(r'(?<!(#|\'|"))(?:#(?!(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3}))[^\n#]+|//[^\n]*)',
Comment),
# other
- (r'[<>,:=\.\*%+\|]', String),
- (r'[\w"_\-!\/&;\(\)\{\}]+', String),
+ (r'[<>,:=.*%+|]', String),
+ (r'[\w"\-!/&;(){}]+', String),
]
}
@@ -79,22 +79,22 @@ class TypoScriptHtmlDataLexer(RegexLexer):
# INCLUDE_TYPOSCRIPT
(r'(INCLUDE_TYPOSCRIPT)', Name.Class),
# Language label or extension resource FILE:... or LLL:... or EXT:...
- (r'(EXT|FILE|LLL):[^\}\n"]*', String),
+ (r'(EXT|FILE|LLL):[^}\n"]*', String),
# marker: ###MARK###
(r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)),
# constant: {$some.constant}
- (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})',
+ (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})',
bygroups(String.Symbol, Operator, Name.Constant,
Name.Constant, String.Symbol)), # constant
# constant: {register:somevalue}
- (r'(.*)(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})(.*)',
+ (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)',
bygroups(String, String.Symbol, Name.Constant, Operator,
Name.Constant, String.Symbol, String)), # constant
# whitespace
(r'\s+', Text),
# other
- (r'[<>,:=\.\*%+\|]', String),
- (r'[\w"_\-!\/&;\(\)\{\}#]+', String),
+ (r'[<>,:=.*%+|]', String),
+ (r'[\w"\-!/&;(){}#]+', String),
]
}
@@ -138,38 +138,38 @@ class TypoScriptLexer(RegexLexer):
r'version)([^\]]*)(\])',
bygroups(String.Symbol, Name.Constant, Text, String.Symbol)),
# Functions
- (r'(?=[\w\-_])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|'
+ (r'(?=[\w\-])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|'
r'filelink|if|imageLinkWrap|imgResource|makelinks|numRows|numberFormat|'
r'parseFunc|replacement|round|select|split|stdWrap|strPad|tableStyle|'
- r'tags|textStyle|typolink)(?![\w\-_])', Name.Function),
+ r'tags|textStyle|typolink)(?![\w\-])', Name.Function),
# Toplevel objects and _*
(r'(?:(=?\s*<?\s+|^\s*))(cObj|field|config|content|constants|FEData|'
r'file|frameset|includeLibs|lib|page|plugin|register|resources|sitemap|'
- r'sitetitle|styles|temp|tt_[^:\.\n\s]*|types|xmlnews|INCLUDE_TYPOSCRIPT|'
- r'_CSS_DEFAULT_STYLE|_DEFAULT_PI_VARS|_LOCAL_LANG)(?![\w\-_])',
+ r'sitetitle|styles|temp|tt_[^:.\s]*|types|xmlnews|INCLUDE_TYPOSCRIPT|'
+ r'_CSS_DEFAULT_STYLE|_DEFAULT_PI_VARS|_LOCAL_LANG)(?![\w\-])',
bygroups(Operator, Name.Builtin)),
# Content objects
- (r'(?=[\w\-_])(CASE|CLEARGIF|COA|COA_INT|COBJ_ARRAY|COLUMNS|CONTENT|'
+ (r'(?=[\w\-])(CASE|CLEARGIF|COA|COA_INT|COBJ_ARRAY|COLUMNS|CONTENT|'
r'CTABLE|EDITPANEL|FILE|FILES|FLUIDTEMPLATE|FORM|HMENU|HRULER|HTML|'
r'IMAGE|IMGTEXT|IMG_RESOURCE|LOAD_REGISTER|MEDIA|MULTIMEDIA|OTABLE|'
r'PAGE|QTOBJECT|RECORDS|RESTORE_REGISTER|SEARCHRESULT|SVG|SWFOBJECT|'
- r'TEMPLATE|TEXT|USER|USER_INT)(?![\w\-_])', Name.Class),
+ r'TEMPLATE|TEXT|USER|USER_INT)(?![\w\-])', Name.Class),
# Menu states
- (r'(?=[\w\-_])(ACT|ACTIFSUB|ACTIFSUBRO|ACTRO|CUR|CURIFSUB|CURIFSUBRO|'
- r'CURRO|IFSUB|IFSUBRO|NO|SPC|USERDEF1|USERDEF1RO|USERDEF2|USERDEF2RO|'
- r'USR|USRRO)', Name.Class),
+ (r'(?=[\w\-])(ACTIFSUBRO|ACTIFSUB|ACTRO|ACT|CURIFSUBRO|CURIFSUB|CURRO|'
+ r'CUR|IFSUBRO|IFSUB|NO|SPC|USERDEF1RO|USERDEF1|USERDEF2RO|USERDEF2|'
+ r'USRRO|USR)', Name.Class),
# Menu objects
- (r'(?=[\w\-_])(GMENU|GMENU_FOLDOUT|GMENU_LAYERS|IMGMENU|IMGMENUITEM|'
- r'JSMENU|JSMENUITEM|TMENU|TMENUITEM|TMENU_LAYERS)', Name.Class),
+ (r'(?=[\w\-])(GMENU_FOLDOUT|GMENU_LAYERS|GMENU|IMGMENUITEM|IMGMENU|'
+ r'JSMENUITEM|JSMENU|TMENUITEM|TMENU_LAYERS|TMENU)', Name.Class),
# PHP objects
- (r'(?=[\w\-_])(PHP_SCRIPT(_EXT|_INT)?)', Name.Class),
- (r'(?=[\w\-_])(userFunc)(?![\w\-_])', Name.Function),
+ (r'(?=[\w\-])(PHP_SCRIPT(_EXT|_INT)?)', Name.Class),
+ (r'(?=[\w\-])(userFunc)(?![\w\-])', Name.Function),
],
'whitespace': [
(r'\s+', Text),
],
'html':[
- (r'<[^\s][^\n>]*>', using(TypoScriptHtmlDataLexer)),
+ (r'<\S[^\n>]*>', using(TypoScriptHtmlDataLexer)),
(r'&[^;\n]*;', String),
(r'(_CSS_DEFAULT_STYLE)(\s*)(\()(?s)(.*(?=\n\)))',
bygroups(Name.Class, Text, String.Symbol, using(TypoScriptCssDataLexer))),
@@ -182,28 +182,28 @@ class TypoScriptLexer(RegexLexer):
],
'label': [
# Language label or extension resource FILE:... or LLL:... or EXT:...
- (r'(EXT|FILE|LLL):[^\}\n"]*', String),
+ (r'(EXT|FILE|LLL):[^}\n"]*', String),
# Path to a resource
- (r'(?![^\w\-_])([\w\-_]+(?:/[\w\-_]+)+/?)([^\s]*\n)',
+ (r'(?![^\w\-])([\w\-]+(?:/[\w\-]+)+/?)(\S*\n)',
bygroups(String, String)),
],
'punctuation': [
- (r'[,\.]', Punctuation),
+ (r'[,.]', Punctuation),
],
'operator': [
- (r'[<>,:=\.\*%+\|]', Operator),
+ (r'[<>,:=.*%+|]', Operator),
],
'structure': [
# Brackets and braces
- (r'[\{\}\(\)\[\]\\\\]', String.Symbol),
+ (r'[{}()\[\]\\]', String.Symbol),
],
'constant': [
# Constant: {$some.constant}
- (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})',
+ (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})',
bygroups(String.Symbol, Operator, Name.Constant,
Name.Constant, String.Symbol)), # constant
# Constant: {register:somevalue}
- (r'(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})',
+ (r'(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})',
bygroups(String.Symbol, Name.Constant, Operator,
Name.Constant, String.Symbol)), # constant
# Hex color: #ff0077
@@ -216,7 +216,7 @@ class TypoScriptLexer(RegexLexer):
(r'(\s*#\s*\n)', Comment),
],
'other': [
- (r'[\w"\-_!\/&;]+', Text),
+ (r'[\w"\-!/&;]+', Text),
],
}
diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py
new file mode 100644
index 00000000..e64a601b
--- /dev/null
+++ b/pygments/lexers/varnish.py
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.varnish
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+ Lexers for Varnish configuration
+
+ :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+from pygments.lexer import RegexLexer, include, bygroups, using, this, \
+ inherit, words
+from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
+ Number, Punctuation, Literal
+
+__all__ = ['VCLLexer', 'VCLSnippetLexer']
+
+
+class VCLLexer(RegexLexer):
+ """
+ For Varnish Configuration Language (VCL).
+
+ .. versionadded:: 2.2
+ """
+ name = 'VCL'
+ aliases = ['vcl']
+ filenames = ['*.vcl']
+ mimetypes = ['text/x-vclsrc']
+
+ def analyse_text(text):
+ # If the very first line is 'vcl 4.0;' it's pretty much guaranteed
+ # that this is VCL
+ if text.startswith('vcl 4.0;'):
+ return 1.0
+ # Skip over comments and blank lines
+ # This is accurate enough that returning 0.9 is reasonable.
+ # Almost no VCL files start without some comments.
+ elif '\nvcl 4\.0;' in text[:1000]:
+ return 0.9
+
+ tokens = {
+ 'probe': [
+ include('whitespace'),
+ include('comments'),
+ (r'(\.\w+)(\s*=\s*)([^;]*)(;)',
+ bygroups(Name.Attribute, Operator, using(this), Punctuation)),
+ (r'\}', Punctuation, '#pop'),
+ ],
+ 'acl': [
+ include('whitespace'),
+ include('comments'),
+ (r'[!/]+', Operator),
+ (r';', Punctuation),
+ (r'\d+', Number),
+ (r'\}', Punctuation, '#pop'),
+ ],
+ 'backend': [
+ include('whitespace'),
+ (r'(\.probe)(\s*=\s*)(\w+)(;)',
+ bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)),
+ (r'(\.probe)(\s*=\s*)(\{)',
+ bygroups(Name.Attribute, Operator, Punctuation), 'probe'),
+ (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)',
+ bygroups(Name.Attribute, Operator, using(this), Punctuation)),
+ (r'\{', Punctuation, '#push'),
+ (r'\}', Punctuation, '#pop'),
+ ],
+ 'statements': [
+ (r'(\d\.)?\d+[sdwhmy]', Literal.Date),
+ (r'(\d\.)?\d+ms', Literal.Date),
+ (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|'
+ r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|'
+ r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function),
+ (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|'
+ r'miss|fetch|restart)\b', Name.Constant),
+ (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b', Name.Variable),
+ (words((
+ 'obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level',
+ 'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits',
+ 'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid',
+ 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip',
+ 'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now',
+ 'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto',
+ 'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi',
+ 'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout',
+ 'beresp.do_gzip', 'resp.status', 'beresp.do_gunzip',
+ 'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream',
+ 'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status',
+ 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries',
+ 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip',
+ 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint',
+ 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304',
+ 'server.ip', 'bereq.uncacheable', 'now'), suffix=r'\b'),
+ Name.Variable),
+ (r'[!%&+*\-,/<.}{>=|~]+', Operator),
+ (r'[();]', Punctuation),
+
+ (r'[,]+', Punctuation),
+ (words(('include', 'hash_data', 'regsub', 'regsuball', 'if', 'else',
+ 'elsif', 'elif', 'synth', 'synthetic', 'ban', 'synth',
+ 'return', 'set', 'unset', 'import', 'include', 'new',
+ 'rollback', 'call'), suffix=r'\b'),
+ Keyword),
+ (r'storage\.\w+\.\w+\b', Name.Variable),
+ (words(('true', 'false')), Name.Builtin),
+ (r'\d+\b', Number),
+ (r'(backend)(\s+\w+)(\s*\{)',
+ bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'),
+ (r'(probe\s)(\s*\w+\s)(\{)',
+ bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'),
+ (r'(acl\s)(\s*\w+\s)(\{)',
+ bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'),
+ (r'(vcl )(4.0)(;)$',
+ bygroups(Keyword.Reserved, Name.Constant, Punctuation)),
+ (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)',
+ bygroups(Keyword, Name.Function, Punctuation)),
+ (r'([a-zA-Z_]\w*)'
+ r'(\.)'
+ r'([a-zA-Z_]\w*)'
+ r'(\s*\(.*\))',
+ bygroups(Name.Function, Punctuation, Name.Function, using(this))),
+ ('[a-zA-Z_]\w*', Name),
+ ],
+ 'comment': [
+ (r'[^*/]+', Comment.Multiline),
+ (r'/\*', Comment.Multiline, '#push'),
+ (r'\*/', Comment.Multiline, '#pop'),
+ (r'[*/]', Comment.Multiline),
+ ],
+ 'comments': [
+ (r'#.*$', Comment),
+ (r'/\*', Comment.Multiline, 'comment'),
+ (r'//.*$', Comment),
+ ],
+ 'string': [
+ (r'"', String, '#pop'),
+ (r'[^"\n]+', String), # all other characters
+ ],
+ 'multistring': [
+ (r'[^"}]', String),
+ (r'"\}', String, '#pop'),
+ (r'["}]', String),
+ ],
+ 'whitespace': [
+ (r'L?"', String, 'string'),
+ (r'\{"', String, 'multistring'),
+ (r'\n', Text),
+ (r'\s+', Text),
+ (r'\\\n', Text), # line continuation
+ ],
+ 'root': [
+ include('whitespace'),
+ include('comments'),
+ include('statements'),
+ (r'\s+', Text),
+ ],
+ }
+
+
+class VCLSnippetLexer(VCLLexer):
+ """
+ For Varnish Configuration Language snippets.
+
+ .. versionadded:: 2.2
+ """
+ name = 'VCLSnippets'
+ aliases = ['vclsnippets', 'vclsnippet']
+ mimetypes = ['text/x-vclsnippet']
+ filenames = []
+
+ def analyse_text(text):
+ # override method inherited from VCLLexer
+ return 0
+
+ tokens = {
+ 'snippetspre': [
+ (r'\.\.\.+', Comment),
+ (r'(bereq|req|req_top|resp|beresp|obj|client|server|local|remote|'
+ r'storage)($|\.\*)', Name.Variable),
+ ],
+ 'snippetspost': [
+ (r'(backend)\b', Keyword.Reserved),
+ ],
+ 'root': [
+ include('snippetspre'),
+ inherit,
+ include('snippetspost'),
+ ],
+ }
diff --git a/pygments/scanner.py b/pygments/scanner.py
index 35dbbadd..3ff11e4a 100644
--- a/pygments/scanner.py
+++ b/pygments/scanner.py
@@ -66,7 +66,8 @@ class Scanner(object):
def test(self, pattern):
"""Apply a pattern on the current position and check
- if it patches. Doesn't touch pos."""
+ if it patches. Doesn't touch pos.
+ """
return self.check(pattern) is not None
def scan(self, pattern):
diff --git a/pygments/sphinxext.py b/pygments/sphinxext.py
index 2dc9810f..de8cd73b 100644
--- a/pygments/sphinxext.py
+++ b/pygments/sphinxext.py
@@ -57,6 +57,7 @@ FILTERDOC = '''
'''
+
class PygmentsDoc(Directive):
"""
A directive to collect all lexers/formatters/filters and generate
diff --git a/pygments/style.py b/pygments/style.py
index b2b990ea..68ee3a19 100644
--- a/pygments/style.py
+++ b/pygments/style.py
@@ -12,6 +12,29 @@
from pygments.token import Token, STANDARD_TYPES
from pygments.util import add_metaclass
+# Default mapping of #ansixxx to RGB colors.
+_ansimap = {
+ # dark
+ '#ansiblack': '000000',
+ '#ansidarkred': '7f0000',
+ '#ansidarkgreen': '007f00',
+ '#ansibrown': '7f7fe0',
+ '#ansidarkblue': '00007f',
+ '#ansipurple': '7f007f',
+ '#ansiteal': '007f7f',
+ '#ansilightgray': 'e5e5e5',
+ # normal
+ '#ansidarkgray': '555555',
+ '#ansired': 'ff0000',
+ '#ansigreen': '00ff00',
+ '#ansiyellow': 'ffff00',
+ '#ansiblue': '0000ff',
+ '#ansifuchsia': 'ff00ff',
+ '#ansiturquoise': '00ffff',
+ '#ansiwhite': 'ffffff',
+}
+ansicolors = set(_ansimap)
+
class StyleMeta(type):
@@ -22,6 +45,8 @@ class StyleMeta(type):
obj.styles[token] = ''
def colorformat(text):
+ if text in ansicolors:
+ return text
if text[0:1] == '#':
col = text[1:]
if len(col) == 6:
@@ -79,16 +104,28 @@ class StyleMeta(type):
def style_for_token(cls, token):
t = cls._styles[token]
+ ansicolor = bgansicolor = None
+ color = t[0]
+ if color.startswith('#ansi'):
+ ansicolor = color
+ color = _ansimap[color]
+ bgcolor = t[4]
+ if bgcolor.startswith('#ansi'):
+ bgansicolor = bgcolor
+ bgcolor = _ansimap[bgcolor]
+
return {
- 'color': t[0] or None,
+ 'color': color or None,
'bold': bool(t[1]),
'italic': bool(t[2]),
'underline': bool(t[3]),
- 'bgcolor': t[4] or None,
+ 'bgcolor': bgcolor or None,
'border': t[5] or None,
'roman': bool(t[6]) or None,
'sans': bool(t[7]) or None,
'mono': bool(t[8]) or None,
+ 'ansicolor': ansicolor,
+ 'bgansicolor': bgansicolor,
}
def list_styles(cls):
diff --git a/pygments/styles/arduino.py b/pygments/styles/arduino.py
index 5b31bb84..1bf2103c 100644
--- a/pygments/styles/arduino.py
+++ b/pygments/styles/arduino.py
@@ -95,4 +95,4 @@ class ArduinoStyle(Style):
Generic.Strong: "", # class: 'gs'
Generic.Subheading: "", # class: 'gu'
Generic.Traceback: "", # class: 'gt'
- } \ No newline at end of file
+ }
diff --git a/pygments/styles/lovelace.py b/pygments/styles/lovelace.py
index 712f3e5c..236dde9b 100644
--- a/pygments/styles/lovelace.py
+++ b/pygments/styles/lovelace.py
@@ -71,7 +71,9 @@ class LovelaceStyle(Style):
Name.Variable.Magic: _DOC_ORANGE,
String: _STR_RED,
+ String.Affix: '#444444',
String.Char: _OW_PURPLE,
+ String.Delimiter: _DOC_ORANGE,
String.Doc: 'italic '+_DOC_ORANGE,
String.Escape: _ESCAPE_LIME,
String.Interpol: 'underline',
diff --git a/pygments/token.py b/pygments/token.py
index 097ff064..fbd5b805 100644
--- a/pygments/token.py
+++ b/pygments/token.py
@@ -9,6 +9,7 @@
:license: BSD, see LICENSE for details.
"""
+
class _TokenType(tuple):
parent = None
@@ -52,30 +53,30 @@ class _TokenType(tuple):
return self
-Token = _TokenType()
+Token = _TokenType()
# Special token types
-Text = Token.Text
-Whitespace = Text.Whitespace
-Escape = Token.Escape
-Error = Token.Error
+Text = Token.Text
+Whitespace = Text.Whitespace
+Escape = Token.Escape
+Error = Token.Error
# Text that doesn't belong to this lexer (e.g. HTML in PHP)
-Other = Token.Other
+Other = Token.Other
# Common token types for source code
-Keyword = Token.Keyword
-Name = Token.Name
-Literal = Token.Literal
-String = Literal.String
-Number = Literal.Number
+Keyword = Token.Keyword
+Name = Token.Name
+Literal = Token.Literal
+String = Literal.String
+Number = Literal.Number
Punctuation = Token.Punctuation
-Operator = Token.Operator
-Comment = Token.Comment
+Operator = Token.Operator
+Comment = Token.Comment
# Generic types for non-source code
-Generic = Token.Generic
+Generic = Token.Generic
-# String and some others are not direct childs of Token.
+# String and some others are not direct children of Token.
# alias them:
Token.Token = Token
Token.String = String
@@ -163,8 +164,10 @@ STANDARD_TYPES = {
Literal.Date: 'ld',
String: 's',
+ String.Affix: 'sa',
String.Backtick: 'sb',
String.Char: 'sc',
+ String.Delimiter: 'dl',
String.Doc: 'sd',
String.Double: 's2',
String.Escape: 'se',
diff --git a/setup.py b/setup.py
index a8667fda..a07ac055 100755
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,7 @@ else:
setup(
name = 'Pygments',
- version = '2.1',
+ version = '2.2',
url = 'http://pygments.org/',
license = 'BSD License',
author = 'Georg Brandl',
diff --git a/tests/examplefiles/StdGeneric.icl b/tests/examplefiles/StdGeneric.icl
new file mode 100644
index 00000000..2e6c3931
--- /dev/null
+++ b/tests/examplefiles/StdGeneric.icl
@@ -0,0 +1,92 @@
+implementation module StdGeneric
+
+import StdInt, StdMisc, StdClass, StdFunc
+
+generic bimap a b :: Bimap .a .b
+
+bimapId :: Bimap .a .a
+bimapId = { map_to = id, map_from = id }
+
+bimap{|c|} = { map_to = id, map_from = id }
+
+bimap{|PAIR|} bx by = { map_to= map_to, map_from=map_from }
+where
+ map_to (PAIR x y) = PAIR (bx.map_to x) (by.map_to y)
+ map_from (PAIR x y) = PAIR (bx.map_from x) (by.map_from y)
+bimap{|EITHER|} bl br = { map_to= map_to, map_from=map_from }
+where
+ map_to (LEFT x) = LEFT (bl.map_to x)
+ map_to (RIGHT x) = RIGHT (br.map_to x)
+ map_from (LEFT x) = LEFT (bl.map_from x)
+ map_from (RIGHT x) = RIGHT (br.map_from x)
+
+bimap{|(->)|} barg bres = { map_to = map_to, map_from = map_from }
+where
+ map_to f = comp3 bres.map_to f barg.map_from
+ map_from f = comp3 bres.map_from f barg.map_to
+
+bimap{|CONS|} barg = { map_to= map_to, map_from=map_from }
+where
+ map_to (CONS x) = CONS (barg.map_to x)
+ map_from (CONS x) = CONS (barg.map_from x)
+
+bimap{|FIELD|} barg = { map_to= map_to, map_from=map_from }
+where
+ map_to (FIELD x) = FIELD (barg.map_to x)
+ map_from (FIELD x) = FIELD (barg.map_from x)
+
+bimap{|OBJECT|} barg = { map_to= map_to, map_from=map_from }
+where
+ map_to (OBJECT x) = OBJECT (barg.map_to x)
+ map_from (OBJECT x) = OBJECT (barg.map_from x)
+
+bimap{|Bimap|} x y = {map_to = map_to, map_from = map_from}
+where
+ map_to {map_to, map_from} =
+ { map_to = comp3 y.map_to map_to x.map_from
+ , map_from = comp3 x.map_to map_from y.map_from
+ }
+ map_from {map_to, map_from} =
+ { map_to = comp3 y.map_from map_to x.map_to
+ , map_from = comp3 x.map_from map_from y.map_to
+ }
+
+comp3 :: !(.a -> .b) u:(.c -> .a) !(.d -> .c) -> u:(.d -> .b)
+comp3 f g h
+ | is_id f
+ | is_id h
+ = cast g
+ = cast (\x -> g (h x))
+ | is_id h
+ = cast (\x -> f (g x))
+ = \x -> f (g (h x))
+where
+ is_id :: !.(.a -> .b) -> Bool
+ is_id f = code inline
+ {
+ eq_desc e_StdFunc_did 0 0
+ pop_a 1
+ }
+
+ cast :: !u:a -> u:b
+ cast f = code inline
+ {
+ pop_a 0
+ }
+
+getConsPath :: !GenericConsDescriptor -> [ConsPos]
+getConsPath {gcd_index, gcd_type_def={gtd_num_conses}}
+ = doit gcd_index gtd_num_conses
+where
+ doit i n
+ | n == 0
+ = abort "getConsPath: zero conses\n"
+ | i >= n
+ = abort "getConsPath: cons index >= number of conses"
+ | n == 1
+ = []
+ | i < (n/2)
+ = [ ConsLeft : doit i (n/2) ]
+ | otherwise
+ = [ ConsRight : doit (i - (n/2)) (n - (n/2)) ]
+ \ No newline at end of file
diff --git a/tests/examplefiles/example2.cpp b/tests/examplefiles/example2.cpp
new file mode 100644
index 00000000..ccd99383
--- /dev/null
+++ b/tests/examplefiles/example2.cpp
@@ -0,0 +1,20 @@
+/*
+ * A Test file for the different string literals.
+ */
+
+#include <iostream>
+
+int main() {
+ char *_str = "a normal string";
+ wchar_t *L_str = L"a wide string";
+ char *u8_str = u8"utf-8 string";
+ char16_t *u_str = u"utf-16 string";
+ char32_t *U_str = U"utf-32 string";
+ char *R_str = R""""(raw string with
+"""
+as a delimiter)"""";
+
+ std::cout << R_str << std::endl;
+
+ return 0;
+}
diff --git a/tests/examplefiles/postgresql_test.txt b/tests/examplefiles/postgresql_test.txt
index 190d184f..28db5ee3 100644
--- a/tests/examplefiles/postgresql_test.txt
+++ b/tests/examplefiles/postgresql_test.txt
@@ -45,3 +45,37 @@ $$;
SELECT U&'\0441\043B\043E\043D'
FROM U&"\0441\043B\043E\043D";
+-- Escapes
+SELECT E'1\n2\n3';
+
+-- DO example from postgresql documentation
+/*
+ * PostgreSQL is Copyright © 1996-2016 by the PostgreSQL Global Development Group.
+ *
+ * Postgres95 is Copyright © 1994-5 by the Regents of the University of California.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose, without fee, and without a written agreement
+ * is hereby granted, provided that the above copyright notice and this paragraph
+ * and the following two paragraphs appear in all copies.
+ *
+ * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION,
+ * EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS-IS" BASIS,
+ * AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+DO $$DECLARE r record;
+BEGIN
+ FOR r IN SELECT table_schema, table_name FROM information_schema.tables
+ WHERE table_type = 'VIEW' AND table_schema = 'public'
+ LOOP
+ EXECUTE 'GRANT ALL ON ' || quote_ident(r.table_schema) || '.' || quote_ident(r.table_name) || ' TO webuser';
+ END LOOP;
+END$$;
diff --git a/tests/examplefiles/test.php b/tests/examplefiles/test.php
index 794961c1..e8efdc6a 100644
--- a/tests/examplefiles/test.php
+++ b/tests/examplefiles/test.php
@@ -535,5 +535,10 @@ $magic->__toString();
EOF;
+echo <<<"some_delimiter"
+more heredoc testing
+continues on this line
+some_delimiter;
+
?>
diff --git a/tests/examplefiles/varnish.vcl b/tests/examplefiles/varnish.vcl
new file mode 100644
index 00000000..6258c313
--- /dev/null
+++ b/tests/examplefiles/varnish.vcl
@@ -0,0 +1,187 @@
+# This is the VCL configuration Varnish will automatically append to your VCL
+# file during compilation/loading. See the vcl(7) man page for details on syntax
+# and semantics.
+# New users is recommended to use the example.vcl file as a starting point.
+
+vcl 4.0;
+
+backend foo { .host = "192.168.1.1"; }
+
+probe blatti { .url = "foo"; }
+probe fooy {
+ .url = "beh";
+
+}
+
+acl foo {
+ "192.168.1.1";
+ "192.168.0.0"/24;
+ ! "192.168.0.1";
+}
+
+include "foo.vcl";
+
+import std;
+
+sub vcl_init {
+ new b = director.foo();
+}
+
+sub vcl_recv {
+ ban(req.url ~ "foo");
+ rollback();
+}
+sub vcl_recv {
+ if (req.method == "PRI") {
+ /* We do not support SPDY or HTTP/2.0 */
+ return (synth(405));
+ }
+ if (req.method != "GET" &&
+ req.method != "HEAD" &&
+ req.method != "PUT" &&
+ req.method != "POST" &&
+ req.method != "TRACE" &&
+ req.method != "OPTIONS" &&
+ req.method != "DELETE") {
+ /* Non-RFC2616 or CONNECT which is weird. */
+ return (pipe);
+ }
+
+ if (req.method != "GET" && req.method != "HEAD") {
+ /* We only deal with GET and HEAD by default */
+ return (pass);
+ }
+ if (req.http.Authorization || req.http.Cookie) {
+ /* Not cacheable by default */
+ return (pass);
+ }
+ return (hash);
+}
+
+sub vcl_pipe {
+ # By default Connection: close is set on all piped requests, to stop
+ # connection reuse from sending future requests directly to the
+ # (potentially) wrong backend. If you do want this to happen, you can undo
+ # it here.
+ # unset bereq.http.connection;
+ return (pipe);
+}
+
+sub vcl_pass {
+ return (fetch);
+}
+
+sub vcl_hash {
+ hash_data(req.url);
+ if (req.http.host) {
+ hash_data(req.http.host);
+ } else {
+ hash_data(server.ip);
+ }
+ return (lookup);
+}
+
+sub vcl_purge {
+ return (synth(200, "Purged"));
+}
+
+sub vcl_hit {
+ if (obj.ttl >= 0s) {
+ // A pure unadultered hit, deliver it
+ return (deliver);
+ }
+ if (obj.ttl + obj.grace > 0s) {
+ // Object is in grace, deliver it
+ // Automatically triggers a background fetch
+ return (deliver);
+ }
+ // fetch & deliver once we get the result
+ return (miss);
+}
+
+sub vcl_miss {
+ return (fetch);
+}
+
+sub vcl_deliver {
+ set resp.http.x-storage = storage.s0.free;
+ return (deliver);
+}
+
+/*
+ * We can come here "invisibly" with the following errors: 413, 417 & 503
+ */
+sub vcl_synth {
+ set resp.http.Content-Type = "text/html; charset=utf-8";
+ set resp.http.Retry-After = "5";
+ synthetic( {"<!DOCTYPE html>
+<html>
+ <head>
+ <title>"} + resp.status + " " + resp.reason + {"</title>
+ </head>
+ <body>
+ <h1>Error "} + resp.status + " " + resp.reason + {"</h1>
+ <p>"} + resp.reason + {"</p>
+ <h3>Guru Meditation:</h3>
+ <p>XID: "} + req.xid + {"</p>
+ <hr>
+ <p>Varnish cache server</p>
+ </body>
+</html>
+"} );
+ return (deliver);
+}
+
+#######################################################################
+# Backend Fetch
+
+sub vcl_backend_fetch {
+ return (fetch);
+}
+
+sub vcl_backend_response {
+ if (beresp.ttl <= 0s ||
+ beresp.http.Set-Cookie ||
+ beresp.http.Surrogate-control ~ "no-store" ||
+ (!beresp.http.Surrogate-Control &&
+ beresp.http.Cache-Control ~ "no-cache|no-store|private") ||
+ beresp.http.Vary == "*") {
+ /*
+ * Mark as "Hit-For-Pass" for the next 2 minutes
+ */
+ set beresp.ttl = 120s;
+ set beresp.uncacheable = true;
+ }
+ return (deliver);
+}
+
+sub vcl_backend_error {
+ set beresp.http.Content-Type = "text/html; charset=utf-8";
+ set beresp.http.Retry-After = "5";
+ synthetic( {"<!DOCTYPE html>
+<html>
+ <head>
+ <title>"} + beresp.status + " " + beresp.reason + {"</title>
+ </head>
+ <body>
+ <h1>Error "} + beresp.status + " " + beresp.reason + {"</h1>
+ <p>"} + beresp.reason + {"</p>
+ <h3>Guru Meditation:</h3>
+ <p>XID: "} + bereq.xid + {"</p>
+ <hr>
+ <p>Varnish cache server</p>
+ </body>
+</html>
+"} );
+ return (deliver);
+}
+
+#######################################################################
+# Housekeeping
+
+sub vcl_init {
+}
+
+sub vcl_fini {
+ return (ok);
+}
diff --git a/tests/examplefiles/wdiff_example1.wdiff b/tests/examplefiles/wdiff_example1.wdiff
new file mode 100644
index 00000000..ca760812
--- /dev/null
+++ b/tests/examplefiles/wdiff_example1.wdiff
@@ -0,0 +1,731 @@
+.. -*- mode: rst -*-
+
+{+.. highlight:: python+}
+
+====================
+Write your own lexer
+====================
+
+If a lexer for your favorite language is missing in the Pygments package, you
+can easily write your own and extend Pygments.
+
+All you need can be found inside the :mod:`pygments.lexer` module. As you can
+read in the :doc:`API documentation <api>`, a lexer is a class that is
+initialized with some keyword arguments (the lexer options) and that provides a
+:meth:`.get_tokens_unprocessed()` method which is given a string or unicode
+object with the data to [-parse.-] {+lex.+}
+
+The :meth:`.get_tokens_unprocessed()` method must return an iterator or iterable
+containing tuples in the form ``(index, token, value)``. Normally you don't
+need to do this since there are [-numerous-] base lexers {+that do most of the work and that+}
+you can subclass.
+
+
+RegexLexer
+==========
+
+[-A very powerful (but quite easy to use)-]
+
+{+The+} lexer {+base class used by almost all of Pygments' lexers+} is the
+:class:`RegexLexer`. This
+[-lexer base-] class allows you to define lexing rules in terms of
+*regular expressions* for different *states*.
+
+States are groups of regular expressions that are matched against the input
+string at the *current position*. If one of these expressions matches, a
+corresponding action is performed [-(normally-] {+(such as+} yielding a token with a specific
+[-type),-]
+{+type, or changing state),+} the current position is set to where the last match
+ended and the matching process continues with the first regex of the current
+state.
+
+Lexer states are kept [-in-] {+on+} a [-state-] stack: each time a new state is entered, the new
+state is pushed onto the stack. The most basic lexers (like the `DiffLexer`)
+just need one state.
+
+Each state is defined as a list of tuples in the form (`regex`, `action`,
+`new_state`) where the last item is optional. In the most basic form, `action`
+is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a
+token with the match text and type `tokentype` and push `new_state` on the state
+stack. If the new state is ``'#pop'``, the topmost state is popped from the
+stack instead. [-(To-] {+To+} pop more than one state, use ``'#pop:2'`` and so [-on.)-] {+on.+}
+``'#push'`` is a synonym for pushing the current state on the stack.
+
+The following example shows the `DiffLexer` from the builtin lexers. Note that
+it contains some additional attributes `name`, `aliases` and `filenames` which
+aren't required for a lexer. They are used by the builtin lexer lookup
+functions.
+
+[-.. sourcecode:: python-] {+::+}
+
+ from pygments.lexer import RegexLexer
+ from pygments.token import *
+
+ class DiffLexer(RegexLexer):
+ name = 'Diff'
+ aliases = ['diff']
+ filenames = ['*.diff']
+
+ tokens = {
+ 'root': [
+ (r' .*\n', Text),
+ (r'\+.*\n', Generic.Inserted),
+ (r'-.*\n', Generic.Deleted),
+ (r'@.*\n', Generic.Subheading),
+ (r'Index.*\n', Generic.Heading),
+ (r'=.*\n', Generic.Heading),
+ (r'.*\n', Text),
+ ]
+ }
+
+As you can see this lexer only uses one state. When the lexer starts scanning
+the text, it first checks if the current character is a space. If this is true
+it scans everything until newline and returns the [-parsed-] data as {+a+} `Text` [-token.-] {+token (which
+is the "no special highlighting" token).+}
+
+If this rule doesn't match, it checks if the current char is a plus sign. And
+so on.
+
+If no rule matches at the current position, the current char is emitted as an
+`Error` token that indicates a [-parsing-] {+lexing+} error, and the position is increased by
+[-1.-]
+{+one.+}
+
+
+Adding and testing a new lexer
+==============================
+
+To make [-pygments-] {+Pygments+} aware of your new lexer, you have to perform the following
+steps:
+
+First, change to the current directory containing the [-pygments-] {+Pygments+} source code:
+
+.. [-sourcecode::-] {+code-block::+} console
+
+ $ cd .../pygments-main
+
+{+Select a matching module under ``pygments/lexers``, or create a new module for
+your lexer class.+}
+
+Next, make sure the lexer is known from outside of the module. All modules in
+the ``pygments.lexers`` specify ``__all__``. For example, [-``other.py`` sets:
+
+.. sourcecode:: python-] {+``esoteric.py`` sets::+}
+
+ __all__ = ['BrainfuckLexer', 'BefungeLexer', ...]
+
+Simply add the name of your lexer class to this list.
+
+Finally the lexer can be made [-publically-] {+publicly+} known by rebuilding the lexer mapping:
+
+.. [-sourcecode::-] {+code-block::+} console
+
+ $ make mapfiles
+
+To test the new lexer, store an example file with the proper extension in
+``tests/examplefiles``. For example, to test your ``DiffLexer``, add a
+``tests/examplefiles/example.diff`` containing a sample diff output.
+
+Now you can use pygmentize to render your example to HTML:
+
+.. [-sourcecode::-] {+code-block::+} console
+
+ $ ./pygmentize -O full -f html -o /tmp/example.html tests/examplefiles/example.diff
+
+Note that this [-explicitely-] {+explicitly+} calls the ``pygmentize`` in the current directory
+by preceding it with ``./``. This ensures your modifications are used.
+Otherwise a possibly already installed, unmodified version without your new
+lexer would have been called from the system search path (``$PATH``).
+
+To view the result, open ``/tmp/example.html`` in your browser.
+
+Once the example renders as expected, you should run the complete test suite:
+
+.. [-sourcecode::-] {+code-block::+} console
+
+ $ make test
+
+{+It also tests that your lexer fulfills the lexer API and certain invariants,
+such as that the concatenation of all token text is the same as the input text.+}
+
+
+Regex Flags
+===========
+
+You can either define regex flags {+locally+} in the regex (``r'(?x)foo bar'``) or
+{+globally+} by adding a `flags` attribute to your lexer class. If no attribute is
+defined, it defaults to `re.MULTILINE`. For more [-informations-] {+information+} about regular
+expression flags see the {+page about+} `regular expressions`_ [-help page-] in the [-python-] {+Python+}
+documentation.
+
+.. _regular expressions: [-http://docs.python.org/lib/re-syntax.html-] {+http://docs.python.org/library/re.html#regular-expression-syntax+}
+
+
+Scanning multiple tokens at once
+================================
+
+{+So far, the `action` element in the rule tuple of regex, action and state has
+been a single token type. Now we look at the first of several other possible
+values.+}
+
+Here is a more complex lexer that highlights INI files. INI files consist of
+sections, comments and [-key-] {+``key+} = [-value pairs:
+
+.. sourcecode:: python-] {+value`` pairs::+}
+
+ from pygments.lexer import RegexLexer, bygroups
+ from pygments.token import *
+
+ class IniLexer(RegexLexer):
+ name = 'INI'
+ aliases = ['ini', 'cfg']
+ filenames = ['*.ini', '*.cfg']
+
+ tokens = {
+ 'root': [
+ (r'\s+', Text),
+ (r';.*?$', Comment),
+ (r'\[.*?\]$', Keyword),
+ (r'(.*?)(\s*)(=)(\s*)(.*?)$',
+ bygroups(Name.Attribute, Text, Operator, Text, String))
+ ]
+ }
+
+The lexer first looks for whitespace, comments and section names. [-And later-] {+Later+} it
+looks for a line that looks like a key, value pair, separated by an ``'='``
+sign, and optional whitespace.
+
+The `bygroups` helper [-makes sure that-] {+yields+} each {+capturing+} group [-is yielded-] {+in the regex+} with a different
+token type. First the `Name.Attribute` token, then a `Text` token for the
+optional whitespace, after that a `Operator` token for the equals sign. Then a
+`Text` token for the whitespace again. The rest of the line is returned as
+`String`.
+
+Note that for this to work, every part of the match must be inside a capturing
+group (a ``(...)``), and there must not be any nested capturing groups. If you
+nevertheless need a group, use a non-capturing group defined using this syntax:
+[-``r'(?:some|words|here)'``-]
+{+``(?:some|words|here)``+} (note the ``?:`` after the beginning parenthesis).
+
+If you find yourself needing a capturing group inside the regex which shouldn't
+be part of the output but is used in the regular expressions for backreferencing
+(eg: ``r'(<(foo|bar)>)(.*?)(</\2>)'``), you can pass `None` to the bygroups
+function and [-it will skip-] that group will be skipped in the output.
+
+
+Changing states
+===============
+
+Many lexers need multiple states to work as expected. For example, some
+languages allow multiline comments to be nested. Since this is a recursive
+pattern it's impossible to lex just using regular expressions.
+
+Here is [-the solution:
+
+.. sourcecode:: python-] {+a lexer that recognizes C++ style comments (multi-line with ``/* */``
+and single-line with ``//`` until end of line)::+}
+
+ from pygments.lexer import RegexLexer
+ from pygments.token import *
+
+ class [-ExampleLexer(RegexLexer):-] {+CppCommentLexer(RegexLexer):+}
+ name = 'Example Lexer with states'
+
+ tokens = {
+ 'root': [
+ (r'[^/]+', Text),
+ (r'/\*', Comment.Multiline, 'comment'),
+ (r'//.*?$', Comment.Singleline),
+ (r'/', Text)
+ ],
+ 'comment': [
+ (r'[^*/]', Comment.Multiline),
+ (r'/\*', Comment.Multiline, '#push'),
+ (r'\*/', Comment.Multiline, '#pop'),
+ (r'[*/]', Comment.Multiline)
+ ]
+ }
+
+This lexer starts lexing in the ``'root'`` state. It tries to match as much as
+possible until it finds a slash (``'/'``). If the next character after the slash
+is [-a star-] {+an asterisk+} (``'*'``) the `RegexLexer` sends those two characters to the
+output stream marked as `Comment.Multiline` and continues [-parsing-] {+lexing+} with the rules
+defined in the ``'comment'`` state.
+
+If there wasn't [-a star-] {+an asterisk+} after the slash, the `RegexLexer` checks if it's a
+[-singleline-]
+{+Singleline+} comment [-(eg:-] {+(i.e.+} followed by a second slash). If this also wasn't the
+case it must be a single [-slash-] {+slash, which is not a comment starter+} (the separate
+regex for a single slash must also be given, else the slash would be marked as
+an error token).
+
+Inside the ``'comment'`` state, we do the same thing again. Scan until the
+lexer finds a star or slash. If it's the opening of a multiline comment, push
+the ``'comment'`` state on the stack and continue scanning, again in the
+``'comment'`` state. Else, check if it's the end of the multiline comment. If
+yes, pop one state from the stack.
+
+Note: If you pop from an empty stack you'll get an `IndexError`. (There is an
+easy way to prevent this from happening: don't ``'#pop'`` in the root state).
+
+If the `RegexLexer` encounters a newline that is flagged as an error token, the
+stack is emptied and the lexer continues scanning in the ``'root'`` state. This
+[-helps-]
+{+can help+} producing error-tolerant highlighting for erroneous input, e.g. when a
+single-line string is not closed.
+
+
+Advanced state tricks
+=====================
+
+There are a few more things you can do with states:
+
+- You can push multiple states onto the stack if you give a tuple instead of a
+ simple string as the third item in a rule tuple. For example, if you want to
+ match a comment containing a directive, something [-like::-] {+like:
+
+ .. code-block:: text+}
+
+ /* <processing directive> rest of comment */
+
+ you can use this [-rule:
+
+ .. sourcecode:: python-] {+rule::+}
+
+ tokens = {
+ 'root': [
+ (r'/\* <', Comment, ('comment', 'directive')),
+ ...
+ ],
+ 'directive': [
+ (r'[^>]*', Comment.Directive),
+ (r'>', Comment, '#pop'),
+ ],
+ 'comment': [
+ (r'[^*]+', Comment),
+ (r'\*/', Comment, '#pop'),
+ (r'\*', Comment),
+ ]
+ }
+
+ When this encounters the above sample, first ``'comment'`` and ``'directive'``
+ are pushed onto the stack, then the lexer continues in the directive state
+ until it finds the closing ``>``, then it continues in the comment state until
+ the closing ``*/``. Then, both states are popped from the stack again and
+ lexing continues in the root state.
+
+ .. versionadded:: 0.9
+ The tuple can contain the special ``'#push'`` and ``'#pop'`` (but not
+ ``'#pop:n'``) directives.
+
+
+- You can include the rules of a state in the definition of another. This is
+ done by using `include` from [-`pygments.lexer`:
+
+ .. sourcecode:: python-] {+`pygments.lexer`::+}
+
+ from pygments.lexer import RegexLexer, bygroups, include
+ from pygments.token import *
+
+ class ExampleLexer(RegexLexer):
+ tokens = {
+ 'comments': [
+ (r'/\*.*?\*/', Comment),
+ (r'//.*?\n', Comment),
+ ],
+ 'root': [
+ include('comments'),
+ (r'(function )(\w+)( {)',
+ bygroups(Keyword, Name, Keyword), 'function'),
+ (r'.', Text),
+ ],
+ 'function': [
+ (r'[^}/]+', Text),
+ include('comments'),
+ (r'/', Text),
+ [-(r'}',-]
+ {+(r'\}',+} Keyword, '#pop'),
+ ]
+ }
+
+ This is a hypothetical lexer for a language that consist of functions and
+ comments. Because comments can occur at toplevel and in functions, we need
+ rules for comments in both states. As you can see, the `include` helper saves
+ repeating rules that occur more than once (in this example, the state
+ ``'comment'`` will never be entered by the lexer, as it's only there to be
+ included in ``'root'`` and ``'function'``).
+
+- Sometimes, you may want to "combine" a state from existing ones. This is
+ possible with the [-`combine`-] {+`combined`+} helper from `pygments.lexer`.
+
+ If you, instead of a new state, write ``combined('state1', 'state2')`` as the
+ third item of a rule tuple, a new anonymous state will be formed from state1
+ and state2 and if the rule matches, the lexer will enter this state.
+
+ This is not used very often, but can be helpful in some cases, such as the
+ `PythonLexer`'s string literal processing.
+
+- If you want your lexer to start lexing in a different state you can modify the
+ stack by [-overloading-] {+overriding+} the `get_tokens_unprocessed()` [-method:
+
+ .. sourcecode:: python-] {+method::+}
+
+ from pygments.lexer import RegexLexer
+
+ class [-MyLexer(RegexLexer):-] {+ExampleLexer(RegexLexer):+}
+ tokens = {...}
+
+ def get_tokens_unprocessed(self, [-text):
+ stack = ['root', 'otherstate']-] {+text, stack=('root', 'otherstate')):+}
+ for item in RegexLexer.get_tokens_unprocessed(text, stack):
+ yield item
+
+ Some lexers like the `PhpLexer` use this to make the leading ``<?php``
+ preprocessor comments optional. Note that you can crash the lexer easily by
+ putting values into the stack that don't exist in the token map. Also
+ removing ``'root'`` from the stack can result in strange errors!
+
+- [-An-] {+In some lexers, a state should be popped if anything is encountered that isn't
+ matched by a rule in the state. You could use an+} empty regex at the end of [-a-]
+ {+the+} state list, [-combined with ``'#pop'``, can
+ act as-] {+but Pygments provides+} a [-return point-] {+more obvious way of spelling that:
+ ``default('#pop')`` is equivalent to ``('', Text, '#pop')``.
+
+ .. versionadded:: 2.0
+
+
+Subclassing lexers derived+} from {+RegexLexer
+==========================================
+
+.. versionadded:: 1.6
+
+Sometimes multiple languages are very similar, but should still be lexed by
+different lexer classes.
+
+When subclassing+} a {+lexer derived from RegexLexer, the ``tokens`` dictionaries
+defined in the parent and child class are merged. For example::
+
+ from pygments.lexer import RegexLexer, inherit
+ from pygments.token import *
+
+ class BaseLexer(RegexLexer):
+ tokens = {
+ 'root': [
+ ('[a-z]+', Name),
+ (r'/\*', Comment, 'comment'),
+ ('"', String, 'string'),
+ ('\s+', Text),
+ ],
+ 'string': [
+ ('[^"]+', String),
+ ('"', String, '#pop'),
+ ],
+ 'comment': [
+ ...
+ ],
+ }
+
+ class DerivedLexer(BaseLexer):
+ tokens = {
+ 'root': [
+ ('[0-9]+', Number),
+ inherit,
+ ],
+ 'string': [
+ (r'[^"\\]+', String),
+ (r'\\.', String.Escape),
+ ('"', String, '#pop'),
+ ],
+ }
+
+The `BaseLexer` defines two states, lexing names and strings. The
+`DerivedLexer` defines its own tokens dictionary, which extends the definitions
+of the base lexer:
+
+* The "root"+} state {+has an additional rule and then the special object `inherit`,
+ which tells Pygments to insert the token definitions of the parent class at+}
+ that [-doesn't have a clear end marker.-] {+point.
+
+* The "string" state is replaced entirely, since there is not `inherit` rule.
+
+* The "comment" state is inherited entirely.+}
+
+
+Using multiple lexers
+=====================
+
+Using multiple lexers for the same input can be tricky. One of the easiest
+combination techniques is shown here: You can replace the [-token type-] {+action+} entry in a rule
+tuple [-(the second item)-] with a lexer class. The matched text will then be lexed with that lexer,
+and the resulting tokens will be yielded.
+
+For example, look at this stripped-down HTML [-lexer:
+
+.. sourcecode:: python-] {+lexer::+}
+
+ from pygments.lexer import RegexLexer, bygroups, using
+ from pygments.token import *
+ from [-pygments.lexers.web-] {+pygments.lexers.javascript+} import JavascriptLexer
+
+ class HtmlLexer(RegexLexer):
+ name = 'HTML'
+ aliases = ['html']
+ filenames = ['*.html', '*.htm']
+
+ flags = re.IGNORECASE | re.DOTALL
+ tokens = {
+ 'root': [
+ ('[^<&]+', Text),
+ ('&.*?;', Name.Entity),
+ (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')),
+ (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'),
+ (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag),
+ ],
+ 'script-content': [
+ (r'(.+?)(<\s*/\s*script\s*>)',
+ bygroups(using(JavascriptLexer), Name.Tag),
+ '#pop'),
+ ]
+ }
+
+Here the content of a ``<script>`` tag is passed to a newly created instance of
+a `JavascriptLexer` and not processed by the `HtmlLexer`. This is done using
+the `using` helper that takes the other lexer class as its parameter.
+
+Note the combination of `bygroups` and `using`. This makes sure that the
+content up to the ``</script>`` end tag is processed by the `JavascriptLexer`,
+while the end tag is yielded as a normal token with the `Name.Tag` type.
+
+[-As an additional goodie, if the lexer class is replaced by `this` (imported from
+`pygments.lexer`), the "other" lexer will be the current one (because you cannot
+refer to the current class within the code that runs at class definition time).-]
+
+Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule.
+Here, two states are pushed onto the state stack, ``'script-content'`` and
+``'tag'``. That means that first ``'tag'`` is processed, which will [-parse-] {+lex+}
+attributes and the closing ``>``, then the ``'tag'`` state is popped and the
+next state on top of the stack will be ``'script-content'``.
+
+{+Since you cannot refer to the class currently being defined, use `this`
+(imported from `pygments.lexer`) to refer to the current lexer class, i.e.
+``using(this)``. This construct may seem unnecessary, but this is often the
+most obvious way of lexing arbitrary syntax between fixed delimiters without
+introducing deeply nested states.+}
+
+The `using()` helper has a special keyword argument, `state`, which works as
+follows: if given, the lexer to use initially is not in the ``"root"`` state,
+but in the state given by this argument. This [-*only* works-] {+does not work+} with [-a `RegexLexer`.-] {+advanced
+`RegexLexer` subclasses such as `ExtendedRegexLexer` (see below).+}
+
+Any other keywords arguments passed to `using()` are added to the keyword
+arguments used to create the lexer.
+
+
+Delegating Lexer
+================
+
+Another approach for nested lexers is the `DelegatingLexer` which is for example
+used for the template engine lexers. It takes two lexers as arguments on
+initialisation: a `root_lexer` and a `language_lexer`.
+
+The input is processed as follows: First, the whole text is lexed with the
+`language_lexer`. All tokens yielded with [-a-] {+the special+} type of ``Other`` are
+then concatenated and given to the `root_lexer`. The language tokens of the
+`language_lexer` are then inserted into the `root_lexer`'s token stream at the
+appropriate positions.
+
+[-.. sourcecode:: python-] {+::+}
+
+ from pygments.lexer import DelegatingLexer
+ from pygments.lexers.web import HtmlLexer, PhpLexer
+
+ class HtmlPhpLexer(DelegatingLexer):
+ def __init__(self, **options):
+ super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options)
+
+This procedure ensures that e.g. HTML with template tags in it is highlighted
+correctly even if the template tags are put into HTML tags or attributes.
+
+If you want to change the needle token ``Other`` to something else, you can give
+the lexer another token type as the third [-parameter:
+
+.. sourcecode:: python-] {+parameter::+}
+
+ DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options)
+
+
+Callbacks
+=========
+
+Sometimes the grammar of a language is so complex that a lexer would be unable
+to [-parse-] {+process+} it just by using regular expressions and stacks.
+
+For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead
+of token types (`bygroups` and `using` are nothing else but preimplemented
+callbacks). The callback must be a function taking two arguments:
+
+* the lexer itself
+* the match object for the last matched rule
+
+The callback must then return an iterable of (or simply yield) ``(index,
+tokentype, value)`` tuples, which are then just passed through by
+`get_tokens_unprocessed()`. The ``index`` here is the position of the token in
+the input string, ``tokentype`` is the normal token type (like `Name.Builtin`),
+and ``value`` the associated part of the input string.
+
+You can see an example [-here:
+
+.. sourcecode:: python-] {+here::+}
+
+ from pygments.lexer import RegexLexer
+ from pygments.token import Generic
+
+ class HypotheticLexer(RegexLexer):
+
+ def headline_callback(lexer, match):
+ equal_signs = match.group(1)
+ text = match.group(2)
+ yield match.start(), Generic.Headline, equal_signs + text + equal_signs
+
+ tokens = {
+ 'root': [
+ (r'(=+)(.*?)(\1)', headline_callback)
+ ]
+ }
+
+If the regex for the `headline_callback` matches, the function is called with
+the match object. Note that after the callback is done, processing continues
+normally, that is, after the end of the previous match. The callback has no
+possibility to influence the position.
+
+There are not really any simple examples for lexer callbacks, but you can see
+them in action e.g. in the [-`compiled.py`_ source code-] {+`SMLLexer` class+} in [-the `CLexer` and
+`JavaLexer` classes.-] {+`ml.py`_.+}
+
+.. [-_compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py-] {+_ml.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ml.py+}
+
+
+The ExtendedRegexLexer class
+============================
+
+The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for
+the funky syntax rules of [-some-] languages [-that will go unnamed,-] such as Ruby.
+
+But fear not; even then you don't have to abandon the regular expression
+[-approach. For-]
+{+approach:+} Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`.
+All features known from RegexLexers are available here too, and the tokens are
+specified in exactly the same way, *except* for one detail:
+
+The `get_tokens_unprocessed()` method holds its internal state data not as local
+variables, but in an instance of the `pygments.lexer.LexerContext` class, and
+that instance is passed to callbacks as a third argument. This means that you
+can modify the lexer state in callbacks.
+
+The `LexerContext` class has the following members:
+
+* `text` -- the input text
+* `pos` -- the current starting position that is used for matching regexes
+* `stack` -- a list containing the state stack
+* `end` -- the maximum position to which regexes are matched, this defaults to
+ the length of `text`
+
+Additionally, the `get_tokens_unprocessed()` method can be given a
+`LexerContext` instead of a string and will then process this context instead of
+creating a new one for the string argument.
+
+Note that because you can set the current position to anything in the callback,
+it won't be automatically be set by the caller after the callback is finished.
+For example, this is how the hypothetical lexer above would be written with the
+[-`ExtendedRegexLexer`:
+
+.. sourcecode:: python-]
+{+`ExtendedRegexLexer`::+}
+
+ from pygments.lexer import ExtendedRegexLexer
+ from pygments.token import Generic
+
+ class ExHypotheticLexer(ExtendedRegexLexer):
+
+ def headline_callback(lexer, match, ctx):
+ equal_signs = match.group(1)
+ text = match.group(2)
+ yield match.start(), Generic.Headline, equal_signs + text + equal_signs
+ ctx.pos = match.end()
+
+ tokens = {
+ 'root': [
+ (r'(=+)(.*?)(\1)', headline_callback)
+ ]
+ }
+
+This might sound confusing (and it can really be). But it is needed, and for an
+example look at the Ruby lexer in [-`agile.py`_.-] {+`ruby.py`_.+}
+
+.. [-_agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py
+
+
+Filtering-] {+_ruby.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ruby.py
+
+
+Handling Lists of Keywords
+==========================
+
+For a relatively short list (hundreds) you can construct an optimized regular
+expression directly using ``words()`` (longer lists, see next section). This
+function handles a few things for you automatically, including escaping
+metacharacters and Python's first-match rather than longest-match in
+alternations. Feel free to put the lists themselves in
+``pygments/lexers/_$lang_builtins.py`` (see examples there), and generated by
+code if possible.
+
+An example of using ``words()`` is something like::
+
+ from pygments.lexer import RegexLexer, words, Name
+
+ class MyLexer(RegexLexer):
+
+ tokens = {
+ 'root': [
+ (words(('else', 'elseif'), suffix=r'\b'), Name.Builtin),
+ (r'\w+', Name),
+ ],
+ }
+
+As you can see, you can add ``prefix`` and ``suffix`` parts to the constructed
+regex.
+
+
+Modifying+} Token Streams
+=======================
+
+Some languages ship a lot of builtin functions (for example PHP). The total
+amount of those functions differs from system to system because not everybody
+has every extension installed. In the case of PHP there are over 3000 builtin
+functions. That's an [-incredible-] {+incredibly+} huge amount of functions, much more than you
+[-can-]
+{+want to+} put into a regular expression.
+
+But because only `Name` tokens can be function names [-it's-] {+this is+} solvable by
+overriding the ``get_tokens_unprocessed()`` method. The following lexer
+subclasses the `PythonLexer` so that it highlights some additional names as
+pseudo [-keywords:
+
+.. sourcecode:: python-] {+keywords::+}
+
+ from [-pygments.lexers.agile-] {+pygments.lexers.python+} import PythonLexer
+ from pygments.token import Name, Keyword
+
+ class MyPythonLexer(PythonLexer):
+ EXTRA_KEYWORDS = [-['foo',-] {+set(('foo',+} 'bar', 'foobar', 'barfoo', 'spam', [-'eggs']-] {+'eggs'))+}
+
+ def get_tokens_unprocessed(self, text):
+ for index, token, value in PythonLexer.get_tokens_unprocessed(self, text):
+ if token is Name and value in self.EXTRA_KEYWORDS:
+ yield index, Keyword.Pseudo, value
+ else:
+ yield index, token, value
+
+The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions.
+
+[-.. note:: Do not confuse this with the :doc:`filter <filters>` system.-]
diff --git a/tests/examplefiles/wdiff_example3.wdiff b/tests/examplefiles/wdiff_example3.wdiff
new file mode 100644
index 00000000..0bbd6d65
--- /dev/null
+++ b/tests/examplefiles/wdiff_example3.wdiff
@@ -0,0 +1,10 @@
+This example is unbalanced open-close.
+We can't treat these easily.
+
+{+ added? -]
+[- deleted? +}
+
+suddenly closed -]
+suddenly closed +}
+
+{+ added? [- deleted?
diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py
index bb667c05..90d05ef8 100644
--- a/tests/test_lexers_other.py
+++ b/tests/test_lexers_other.py
@@ -13,6 +13,7 @@ import unittest
from pygments.lexers import guess_lexer
from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer
+
def _exampleFilePath(filename):
return os.path.join(os.path.dirname(__file__), 'examplefiles', filename)
@@ -28,8 +29,8 @@ class AnalyseTextTest(unittest.TestCase):
text = fp.read().decode('utf-8')
probability = lexer.analyse_text(text)
self.assertTrue(probability > 0,
- '%s must recognize %r' % (
- lexer.name, exampleFilePath))
+ '%s must recognize %r' % (
+ lexer.name, exampleFilePath))
guessedLexer = guess_lexer(text)
self.assertEqual(guessedLexer.name, lexer.name)
@@ -45,25 +46,24 @@ class AnalyseTextTest(unittest.TestCase):
class EasyTrieveLexerTest(unittest.TestCase):
def testCanGuessFromText(self):
- self.assertLess(0, EasytrieveLexer.analyse_text('MACRO'))
- self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO'))
- self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO'))
- self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO'))
- self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO'))
- self.assertLess(0, EasytrieveLexer.analyse_text(
+ self.assertTrue(EasytrieveLexer.analyse_text('MACRO'))
+ self.assertTrue(EasytrieveLexer.analyse_text('\nMACRO'))
+ self.assertTrue(EasytrieveLexer.analyse_text(' \nMACRO'))
+ self.assertTrue(EasytrieveLexer.analyse_text(' \n MACRO'))
+ self.assertTrue(EasytrieveLexer.analyse_text('*\nMACRO'))
+ self.assertTrue(EasytrieveLexer.analyse_text(
'*\n *\n\n \n*\n MACRO'))
class RexxLexerTest(unittest.TestCase):
def testCanGuessFromText(self):
- self.assertAlmostEqual(0.01,
- RexxLexer.analyse_text('/* */'))
+ self.assertAlmostEqual(0.01, RexxLexer.analyse_text('/* */'))
self.assertAlmostEqual(1.0,
- RexxLexer.analyse_text('''/* Rexx */
+ RexxLexer.analyse_text('''/* Rexx */
say "hello world"'''))
val = RexxLexer.analyse_text('/* */\n'
- 'hello:pRoceduRe\n'
- ' say "hello world"')
+ 'hello:pRoceduRe\n'
+ ' say "hello world"')
self.assertTrue(val > 0.5, val)
val = RexxLexer.analyse_text('''/* */
if 1 > 0 then do
diff --git a/tests/test_terminal_formatter.py b/tests/test_terminal_formatter.py
index 07337cd5..cb5c6c44 100644
--- a/tests/test_terminal_formatter.py
+++ b/tests/test_terminal_formatter.py
@@ -14,7 +14,13 @@ import re
from pygments.util import StringIO
from pygments.lexers.sql import PlPgsqlLexer
-from pygments.formatters import TerminalFormatter
+from pygments.formatters import TerminalFormatter, Terminal256Formatter, \
+ HtmlFormatter, LatexFormatter
+
+from pygments.style import Style
+from pygments.token import Token
+from pygments.lexers import Python3Lexer
+from pygments import highlight
DEMO_TEXT = '''\
-- comment
@@ -26,9 +32,11 @@ DEMO_TOKENS = list(DEMO_LEXER().get_tokens(DEMO_TEXT))
ANSI_RE = re.compile(r'\x1b[\w\W]*?m')
+
def strip_ansi(x):
return ANSI_RE.sub('', x)
+
class TerminalFormatterTest(unittest.TestCase):
def test_reasonable_output(self):
out = StringIO()
@@ -49,3 +57,46 @@ class TerminalFormatterTest(unittest.TestCase):
for a, b in zip(DEMO_TEXT.splitlines(), plain.splitlines()):
self.assertTrue(a in b)
+
+
+class MyStyle(Style):
+ styles = {
+ Token.Comment: '#ansidarkgray',
+ Token.String: '#ansiblue bg:#ansidarkred',
+ Token.Number: '#ansigreen bg:#ansidarkgreen',
+ Token.Number.Hex: '#ansidarkgreen bg:#ansired',
+ }
+
+
+class Terminal256FormatterTest(unittest.TestCase):
+ code = '''
+# this should be a comment
+print("Hello World")
+async def function(a,b,c, *d, **kwarg:Bool)->Bool:
+ pass
+ return 123, 0xb3e3
+
+'''
+
+ def test_style_html(self):
+ style = HtmlFormatter(style=MyStyle).get_style_defs()
+ self.assertTrue('#555555' in style,
+ "ansigray for comment not html css style")
+
+ def test_others_work(self):
+ """check other formatters don't crash"""
+ highlight(self.code, Python3Lexer(), LatexFormatter(style=MyStyle))
+ highlight(self.code, Python3Lexer(), HtmlFormatter(style=MyStyle))
+
+ def test_256esc_seq(self):
+ """
+ test that a few escape sequences are actualy used when using #ansi<> color codes
+ """
+ def termtest(x):
+ return highlight(x, Python3Lexer(),
+ Terminal256Formatter(style=MyStyle))
+
+ self.assertTrue('32;41' in termtest('0x123'))
+ self.assertTrue('32;42' in termtest('123'))
+ self.assertTrue('30;01' in termtest('#comment'))
+ self.assertTrue('34;41' in termtest('"String"'))