From 9131b270427a1810aae6674e4cba4c98c5afec59 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Sun, 4 Oct 2015 17:27:18 +0200 Subject: Adds a Clean (http://clean.cs.ru.nl/Clean) Lexer and example file --- pygments/lexers/_mapping.py | 1 + pygments/lexers/clean.py | 274 ++++++++++++++++++++++++++++++++++++++ pygments/lexers/functional.py | 1 + tests/examplefiles/StdGeneric.icl | 92 +++++++++++++ 4 files changed, 368 insertions(+) create mode 100644 pygments/lexers/clean.py create mode 100644 tests/examplefiles/StdGeneric.icl diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 2e855570..293b7c41 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -72,6 +72,7 @@ LEXERS = { 'CheetahXmlLexer': ('pygments.lexers.templates', 'XML+Cheetah', ('xml+cheetah', 'xml+spitfire'), (), ('application/xml+cheetah', 'application/xml+spitfire')), 'CirruLexer': ('pygments.lexers.webmisc', 'Cirru', ('cirru',), ('*.cirru',), ('text/x-cirru',)), 'ClayLexer': ('pygments.lexers.c_like', 'Clay', ('clay',), ('*.clay',), ('text/x-clay',)), + 'CleanLexer': ('pygments.lexers.clean', 'CleanLexer', ('Clean', 'clean'), ('*.icl', '*.dcl'), ()), 'ClojureLexer': ('pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')), 'ClojureScriptLexer': ('pygments.lexers.jvm', 'ClojureScript', ('clojurescript', 'cljs'), ('*.cljs',), ('text/x-clojurescript', 'application/x-clojurescript')), 'CobolFreeformatLexer': ('pygments.lexers.business', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()), diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py new file mode 100644 index 00000000..7fb86844 --- /dev/null +++ b/pygments/lexers/clean.py @@ -0,0 +1,274 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.make + ~~~~~~~~~~~~~~~~~~~~ + + Lexers for Makefiles and similar. + + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +from pygments.lexer import ExtendedRegexLexer, bygroups, words, include +from pygments.token import * + +__all__ = ['CleanLexer'] + +class CleanLexer(ExtendedRegexLexer): + """ + Lexer for the general purpose, state-of-the-art, pure and lazy functional + programming language Clean (http://clean.cs.ru.nl/Clean). + + .. versionadded: 2.1 + """ + name = 'CleanLexer' + aliases = ['Clean', 'clean'] + filenames = ['*.icl', '*.dcl'] + + def __init__(self, *args, **kwargs): + super(CleanLexer, self).__init__(*args, **kwargs) + global stored_indent + stored_indent = 0 + + def check_class_not_import(lexer, match, ctx): + if match.group(0) == 'import': + yield match.start(), Keyword.Namespace, match.group(0) + ctx.stack = ctx.stack[:-1] + ['fromimportfunc'] + else: + yield match.start(), Name.Class, match.group(0) + ctx.pos = match.end() + + def check_instance_class(lexer, match, ctx): + if match.group(0) == 'instance' or match.group(0) == 'class': + yield match.start(), Keyword, match.group(0) + else: + yield match.start(), Name.Function, match.group(0) + ctx.stack = ctx.stack + ['fromimportfunctype'] + ctx.pos = match.end() + + def store_indent(lexer, match, ctx): + global stored_indent + # Tabs are four spaces: + # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt + stored_indent = len(match.group(0).replace('\t',' ')) + ctx.pos = match.end() + yield match.start(), Text, match.group(0) + + def check_indent1(lexer, match, ctx): + global stored_indent + indent = len(match.group(0)) - 1 + if indent > stored_indent: + yield match.start(), Whitespace, match.group(0) + ctx.pos = match.start() + indent + 1 + else: + stored_indent = 0 + ctx.pos = match.start() + ctx.stack = ctx.stack[:-1] + yield match.start(), Whitespace, match.group(0)[1:] + + def check_indent2(lexer, match, ctx): + global stored_indent + indent = len(match.group(0)) - 1 + if indent > stored_indent: + yield match.start(), Whitespace, match.group(0) + ctx.pos = match.start() + indent + 1 + else: + stored_indent = 0 + ctx.pos = match.start() + ctx.stack = ctx.stack[:-2] + yield match.start(), Whitespace, match.group(0)[1:] + if match.group(0) == '\n\n': + ctx.pos = ctx.pos + 1 + + def check_indent3(lexer, match, ctx): + global stored_indent + indent = len(match.group(0)) - 1 + if indent > stored_indent: + yield match.start(), Whitespace, match.group(0) + ctx.pos = match.start() + indent + 1 + else: + stored_indent = 0 + ctx.pos = match.start() + ctx.stack = ctx.stack[:-3] + yield match.start(), Whitespace, match.group(0)[1:] + if match.group(0) == '\n\n': + ctx.pos = ctx.pos + 1 + + def skip(lexer, match, ctx): + ctx.stack = ctx.stack[:-1] + ctx.pos = match.end() + yield match.start(), Comment, match.group(0) + + tokens = { + 'common': [ + (r';', Punctuation, '#pop'), + (r'//', Comment, 'singlecomment') + ], + 'root': [ + # Comments + (r'//.*\n', Comment.Single), + (r'(?s)/\*\*.*?\*/', Comment.Special), + (r'(?s)/\*.*?\*/', Comment.Multi), + + # Modules, imports, etc. + (r'\b((?:implementation|definition|system)\s+)?(module)(\s+)([\w`]+)', + bygroups(Keyword.Namespace, Keyword.Namespace, Text, Name.Class)), + (r'(?<=\n)import(?=\s)', Keyword.Namespace, 'import'), + (r'(?<=\n)from(?=\s)', Keyword.Namespace, 'fromimport'), + + # Keywords + # We cannot use (?s)^|(?<=\s) as prefix, so need to repeat this + (words(('class','instance','where','with','let','let!','with','in', + 'case','of','infix','infixr','infixl','generic','derive', + 'otherwise', 'code', 'inline'), + prefix=r'(?<=\s)', suffix=r'(?=\s)'), Keyword), + (words(('class','instance','where','with','let','let!','with','in', + 'case','of','infix','infixr','infixl','generic','derive', + 'otherwise', 'code', 'inline'), + prefix=r'(?s)^', suffix=r'(?=\s)'), Keyword), + + # Function definitions + (r'(?=\{\|)', Whitespace, 'genericfunction'), + (r'(?<=\n)(\s*)([\w`\$\(\)=\-<>~*\^\|\+&%]+)(\s+[\w])*(\s*)(::)', + bygroups(store_indent, Name.Function, Keyword.Type, Whitespace, Punctuation), + 'functiondefargs'), + + # Type definitions + (r'(?<=\n)([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), + + # Literals + (r'\'\\?.(?\|&~*\^/]', Operator), + (r'\\\\', Operator), + + # Lambda expressions + (r'\\.*?(->|\.|=)', Name.Function), + + # Whitespace + (r'\s', Whitespace), + + include('common') + ], + 'fromimport': [ + include('common'), + (r'([\w`]+)', check_class_not_import), + (r'\n', Whitespace, '#pop'), + (r'\s', Whitespace) + ], + 'fromimportfunc': [ + include('common'), + (r'([\w`\$\(\)=\-<>~*\^\|\+&%]+)', check_instance_class), + (r',', Punctuation), + (r'\n', Whitespace, '#pop'), + (r'\s', Whitespace) + ], + 'fromimportfunctype': [ + include('common'), + (r'[{(\[]', Punctuation, 'combtype'), + (r',', Punctuation, '#pop'), + (r':;\.#]', Punctuation), + (r'\n', Whitespace, '#pop:2'), + (r'\s', Whitespace), + (r'.', Keyword.Type) + ], + 'combtype': [ + include('common'), + (r'[})\]]', Punctuation, '#pop'), + (r'[{(\[]', Punctuation, '#pop'), + (r',:;\.#]', Punctuation), + (r'\s', Whitespace), + (r'.', Keyword.Type) + ], + 'import': [ + include('common'), + (words(('from', 'import', 'as', 'qualified'), + prefix='(?<=\s)', suffix='(?=\s)'), Keyword.Namespace), + (r'[\w`]+', Name.Class), + (r'\n', Whitespace, '#pop'), + (r',', Punctuation), + (r'\s', Whitespace) + ], + 'singlecomment': [ + (r'(.)(?=\n)', skip), + (r'.', Comment) + ], + 'doubleqstring': [ + (r'[^\\\'"]+', String.Double), + (r'"', String.Double, '#pop'), + (r'\\.|\'', String.Double) + ], + 'typedef': [ + include('common'), + (r'[\w`]+', Keyword.Type), + (r'[:=\|\(\),\[\]\{\}\!\*]', Punctuation), + (r'->', Punctuation), + (r'\n(?=[^\s\|])', Whitespace, '#pop'), + (r'\s', Whitespace), + (r'.', Keyword.Type) + ], + 'genericfunction': [ + include('common'), + (r'\{\|', Punctuation), + (r'\|\}', Punctuation, '#pop'), + (r',', Punctuation), + (r'->', Punctuation), + (r'(\s+of\s+)(\{)', bygroups(Keyword, Punctuation), 'genericftypes'), + (r'\s', Whitespace), + (r'[\w`]+', Keyword.Type), + (r'[\*\(\)]', Punctuation) + ], + 'genericftypes': [ + include('common'), + (r'[\w`]+', Keyword.Type), + (r',', Punctuation), + (r'\s', Whitespace), + (r'\}', Punctuation, '#pop') + ], + 'functiondefargs': [ + include('common'), + (r'\n(\s*)', check_indent1), + (r'[!{}()\[\],:;\.#]', Punctuation), + (r'->', Punctuation, 'functiondefres'), + (r'^(?=\S)', Whitespace, '#pop'), + (r'\S', Keyword.Type), + (r'\s', Whitespace) + ], + 'functiondefres': [ + include('common'), + (r'\n(\s*)', check_indent2), + (r'^(?=\S)', Whitespace, '#pop:2'), + (r'[!{}()\[\],:;\.#]', Punctuation), + (r'\|', Punctuation, 'functiondefclasses'), + (r'\S', Keyword.Type), + (r'\s', Whitespace) + ], + 'functiondefclasses': [ + include('common'), + (r'\n(\s*)', check_indent3), + (r'^(?=\S)', Whitespace, '#pop:3'), + (r'[,&]', Punctuation), + (r'[\w`\$\(\)=\-<>~*\^\|\+&%]', Name.Function, 'functionname'), + (r'\s', Whitespace) + ], + 'functionname': [ + include('common'), + (r'[\w`\$\(\)=\-<>~*\^\|\+&%]+', Name.Function), + (r'(?=\{\|)', Punctuation, 'genericfunction'), + (r'', Text, '#pop') + ] + } + diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 180d3fd4..5d4cdf0c 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -17,5 +17,6 @@ from pygments.lexers.theorem import CoqLexer from pygments.lexers.erlang import ErlangLexer, ErlangShellLexer, \ ElixirConsoleLexer, ElixirLexer from pygments.lexers.ml import SMLLexer, OcamlLexer, OpaLexer +from pygments.lexers.clean import CleanLexer __all__ = [] diff --git a/tests/examplefiles/StdGeneric.icl b/tests/examplefiles/StdGeneric.icl new file mode 100644 index 00000000..2e6c3931 --- /dev/null +++ b/tests/examplefiles/StdGeneric.icl @@ -0,0 +1,92 @@ +implementation module StdGeneric + +import StdInt, StdMisc, StdClass, StdFunc + +generic bimap a b :: Bimap .a .b + +bimapId :: Bimap .a .a +bimapId = { map_to = id, map_from = id } + +bimap{|c|} = { map_to = id, map_from = id } + +bimap{|PAIR|} bx by = { map_to= map_to, map_from=map_from } +where + map_to (PAIR x y) = PAIR (bx.map_to x) (by.map_to y) + map_from (PAIR x y) = PAIR (bx.map_from x) (by.map_from y) +bimap{|EITHER|} bl br = { map_to= map_to, map_from=map_from } +where + map_to (LEFT x) = LEFT (bl.map_to x) + map_to (RIGHT x) = RIGHT (br.map_to x) + map_from (LEFT x) = LEFT (bl.map_from x) + map_from (RIGHT x) = RIGHT (br.map_from x) + +bimap{|(->)|} barg bres = { map_to = map_to, map_from = map_from } +where + map_to f = comp3 bres.map_to f barg.map_from + map_from f = comp3 bres.map_from f barg.map_to + +bimap{|CONS|} barg = { map_to= map_to, map_from=map_from } +where + map_to (CONS x) = CONS (barg.map_to x) + map_from (CONS x) = CONS (barg.map_from x) + +bimap{|FIELD|} barg = { map_to= map_to, map_from=map_from } +where + map_to (FIELD x) = FIELD (barg.map_to x) + map_from (FIELD x) = FIELD (barg.map_from x) + +bimap{|OBJECT|} barg = { map_to= map_to, map_from=map_from } +where + map_to (OBJECT x) = OBJECT (barg.map_to x) + map_from (OBJECT x) = OBJECT (barg.map_from x) + +bimap{|Bimap|} x y = {map_to = map_to, map_from = map_from} +where + map_to {map_to, map_from} = + { map_to = comp3 y.map_to map_to x.map_from + , map_from = comp3 x.map_to map_from y.map_from + } + map_from {map_to, map_from} = + { map_to = comp3 y.map_from map_to x.map_to + , map_from = comp3 x.map_from map_from y.map_to + } + +comp3 :: !(.a -> .b) u:(.c -> .a) !(.d -> .c) -> u:(.d -> .b) +comp3 f g h + | is_id f + | is_id h + = cast g + = cast (\x -> g (h x)) + | is_id h + = cast (\x -> f (g x)) + = \x -> f (g (h x)) +where + is_id :: !.(.a -> .b) -> Bool + is_id f = code inline + { + eq_desc e_StdFunc_did 0 0 + pop_a 1 + } + + cast :: !u:a -> u:b + cast f = code inline + { + pop_a 0 + } + +getConsPath :: !GenericConsDescriptor -> [ConsPos] +getConsPath {gcd_index, gcd_type_def={gtd_num_conses}} + = doit gcd_index gtd_num_conses +where + doit i n + | n == 0 + = abort "getConsPath: zero conses\n" + | i >= n + = abort "getConsPath: cons index >= number of conses" + | n == 1 + = [] + | i < (n/2) + = [ ConsLeft : doit i (n/2) ] + | otherwise + = [ ConsRight : doit (i - (n/2)) (n - (n/2)) ] + \ No newline at end of file -- cgit v1.2.1 From b9b77fbceb32da2b4973725a4bda356535429424 Mon Sep 17 00:00:00 2001 From: hhsprings Date: Wed, 4 Nov 2015 17:43:59 +0900 Subject: Add the lexer for `wdiff `_ output. (issue `#960 `_) --- pygments/lexers/_mapping.py | 1 + pygments/lexers/diff.py | 57 ++- tests/examplefiles/wdiff_example1.wdiff | 731 ++++++++++++++++++++++++++++++ tests/examplefiles/wdiff_example2.wdiff | 758 ++++++++++++++++++++++++++++++++ tests/examplefiles/wdiff_example3.wdiff | 10 + 5 files changed, 1556 insertions(+), 1 deletion(-) create mode 100644 tests/examplefiles/wdiff_example1.wdiff create mode 100644 tests/examplefiles/wdiff_example2.wdiff create mode 100644 tests/examplefiles/wdiff_example3.wdiff diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index af7eec36..28c02cff 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -396,6 +396,7 @@ LEXERS = { 'VerilogLexer': ('pygments.lexers.hdl', 'verilog', ('verilog', 'v'), ('*.v',), ('text/x-verilog',)), 'VhdlLexer': ('pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)), 'VimLexer': ('pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)), + 'WDiffLexer': ('pygments.lexers.diff', 'WDiff', ('wdiff',), ('*.wdiff',), ()), 'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)), 'XQueryLexer': ('pygments.lexers.webmisc', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')), 'XmlDjangoLexer': ('pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ('application/xml+django', 'application/xml+jinja')), diff --git a/pygments/lexers/diff.py b/pygments/lexers/diff.py index d3b1589d..9efb100b 100644 --- a/pygments/lexers/diff.py +++ b/pygments/lexers/diff.py @@ -9,11 +9,13 @@ :license: BSD, see LICENSE for details. """ +import re + from pygments.lexer import RegexLexer, include, bygroups from pygments.token import Text, Comment, Operator, Keyword, Name, Generic, \ Literal -__all__ = ['DiffLexer', 'DarcsPatchLexer'] +__all__ = ['DiffLexer', 'DarcsPatchLexer', 'WDiffLexer'] class DiffLexer(RegexLexer): @@ -104,3 +106,56 @@ class DarcsPatchLexer(RegexLexer): (r'[^\n\[]+', Generic.Deleted), ], } + + +class WDiffLexer(RegexLexer): + """ + A `wdiff `_ lexer. + + Note that: + + * only to normal output (without option like -l). + * if target files of wdiff contain "[-", "-]", "{+", "+}", + especially they are unbalanced, this lexer will get confusing. + + .. versionadded:: 2.1 + """ + + name = 'WDiff' + aliases = ['wdiff',] + filenames = ['*.wdiff',] + mimetypes = [] + + flags = re.MULTILINE | re.DOTALL + + # We can only assume "[-" after "[-" before "-]" is `nested`, + # for instance wdiff to wdiff outputs. We have no way to + # distinct these marker is of wdiff output from original text. + + ins_op = r"\{\+" + ins_cl = r"\+\}" + del_op = r"\[\-" + del_cl = r"\-\]" + tokens = { + 'root': [ + (ins_op, Generic.Inserted, 'inserted'), + (del_op, Generic.Deleted, 'deleted'), + (r'.', Text), + ], + 'inserted': [ + (ins_op, Generic.Inserted, '#push'), + (del_op, Generic.Inserted, '#push'), + (del_cl, Generic.Inserted, '#pop'), + + (ins_cl, Generic.Inserted, '#pop'), + (r'.', Generic.Inserted), + ], + 'deleted': [ + (del_op, Generic.Deleted, '#push'), + (ins_op, Generic.Deleted, '#push'), + (ins_cl, Generic.Deleted, '#pop'), + + (del_cl, Generic.Deleted, '#pop'), + (r'.', Generic.Deleted), + ], + } diff --git a/tests/examplefiles/wdiff_example1.wdiff b/tests/examplefiles/wdiff_example1.wdiff new file mode 100644 index 00000000..ca760812 --- /dev/null +++ b/tests/examplefiles/wdiff_example1.wdiff @@ -0,0 +1,731 @@ +.. -*- mode: rst -*- + +{+.. highlight:: python+} + +==================== +Write your own lexer +==================== + +If a lexer for your favorite language is missing in the Pygments package, you +can easily write your own and extend Pygments. + +All you need can be found inside the :mod:`pygments.lexer` module. As you can +read in the :doc:`API documentation `, a lexer is a class that is +initialized with some keyword arguments (the lexer options) and that provides a +:meth:`.get_tokens_unprocessed()` method which is given a string or unicode +object with the data to [-parse.-] {+lex.+} + +The :meth:`.get_tokens_unprocessed()` method must return an iterator or iterable +containing tuples in the form ``(index, token, value)``. Normally you don't +need to do this since there are [-numerous-] base lexers {+that do most of the work and that+} +you can subclass. + + +RegexLexer +========== + +[-A very powerful (but quite easy to use)-] + +{+The+} lexer {+base class used by almost all of Pygments' lexers+} is the +:class:`RegexLexer`. This +[-lexer base-] class allows you to define lexing rules in terms of +*regular expressions* for different *states*. + +States are groups of regular expressions that are matched against the input +string at the *current position*. If one of these expressions matches, a +corresponding action is performed [-(normally-] {+(such as+} yielding a token with a specific +[-type),-] +{+type, or changing state),+} the current position is set to where the last match +ended and the matching process continues with the first regex of the current +state. + +Lexer states are kept [-in-] {+on+} a [-state-] stack: each time a new state is entered, the new +state is pushed onto the stack. The most basic lexers (like the `DiffLexer`) +just need one state. + +Each state is defined as a list of tuples in the form (`regex`, `action`, +`new_state`) where the last item is optional. In the most basic form, `action` +is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a +token with the match text and type `tokentype` and push `new_state` on the state +stack. If the new state is ``'#pop'``, the topmost state is popped from the +stack instead. [-(To-] {+To+} pop more than one state, use ``'#pop:2'`` and so [-on.)-] {+on.+} +``'#push'`` is a synonym for pushing the current state on the stack. + +The following example shows the `DiffLexer` from the builtin lexers. Note that +it contains some additional attributes `name`, `aliases` and `filenames` which +aren't required for a lexer. They are used by the builtin lexer lookup +functions. + +[-.. sourcecode:: python-] {+::+} + + from pygments.lexer import RegexLexer + from pygments.token import * + + class DiffLexer(RegexLexer): + name = 'Diff' + aliases = ['diff'] + filenames = ['*.diff'] + + tokens = { + 'root': [ + (r' .*\n', Text), + (r'\+.*\n', Generic.Inserted), + (r'-.*\n', Generic.Deleted), + (r'@.*\n', Generic.Subheading), + (r'Index.*\n', Generic.Heading), + (r'=.*\n', Generic.Heading), + (r'.*\n', Text), + ] + } + +As you can see this lexer only uses one state. When the lexer starts scanning +the text, it first checks if the current character is a space. If this is true +it scans everything until newline and returns the [-parsed-] data as {+a+} `Text` [-token.-] {+token (which +is the "no special highlighting" token).+} + +If this rule doesn't match, it checks if the current char is a plus sign. And +so on. + +If no rule matches at the current position, the current char is emitted as an +`Error` token that indicates a [-parsing-] {+lexing+} error, and the position is increased by +[-1.-] +{+one.+} + + +Adding and testing a new lexer +============================== + +To make [-pygments-] {+Pygments+} aware of your new lexer, you have to perform the following +steps: + +First, change to the current directory containing the [-pygments-] {+Pygments+} source code: + +.. [-sourcecode::-] {+code-block::+} console + + $ cd .../pygments-main + +{+Select a matching module under ``pygments/lexers``, or create a new module for +your lexer class.+} + +Next, make sure the lexer is known from outside of the module. All modules in +the ``pygments.lexers`` specify ``__all__``. For example, [-``other.py`` sets: + +.. sourcecode:: python-] {+``esoteric.py`` sets::+} + + __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] + +Simply add the name of your lexer class to this list. + +Finally the lexer can be made [-publically-] {+publicly+} known by rebuilding the lexer mapping: + +.. [-sourcecode::-] {+code-block::+} console + + $ make mapfiles + +To test the new lexer, store an example file with the proper extension in +``tests/examplefiles``. For example, to test your ``DiffLexer``, add a +``tests/examplefiles/example.diff`` containing a sample diff output. + +Now you can use pygmentize to render your example to HTML: + +.. [-sourcecode::-] {+code-block::+} console + + $ ./pygmentize -O full -f html -o /tmp/example.html tests/examplefiles/example.diff + +Note that this [-explicitely-] {+explicitly+} calls the ``pygmentize`` in the current directory +by preceding it with ``./``. This ensures your modifications are used. +Otherwise a possibly already installed, unmodified version without your new +lexer would have been called from the system search path (``$PATH``). + +To view the result, open ``/tmp/example.html`` in your browser. + +Once the example renders as expected, you should run the complete test suite: + +.. [-sourcecode::-] {+code-block::+} console + + $ make test + +{+It also tests that your lexer fulfills the lexer API and certain invariants, +such as that the concatenation of all token text is the same as the input text.+} + + +Regex Flags +=========== + +You can either define regex flags {+locally+} in the regex (``r'(?x)foo bar'``) or +{+globally+} by adding a `flags` attribute to your lexer class. If no attribute is +defined, it defaults to `re.MULTILINE`. For more [-informations-] {+information+} about regular +expression flags see the {+page about+} `regular expressions`_ [-help page-] in the [-python-] {+Python+} +documentation. + +.. _regular expressions: [-http://docs.python.org/lib/re-syntax.html-] {+http://docs.python.org/library/re.html#regular-expression-syntax+} + + +Scanning multiple tokens at once +================================ + +{+So far, the `action` element in the rule tuple of regex, action and state has +been a single token type. Now we look at the first of several other possible +values.+} + +Here is a more complex lexer that highlights INI files. INI files consist of +sections, comments and [-key-] {+``key+} = [-value pairs: + +.. sourcecode:: python-] {+value`` pairs::+} + + from pygments.lexer import RegexLexer, bygroups + from pygments.token import * + + class IniLexer(RegexLexer): + name = 'INI' + aliases = ['ini', 'cfg'] + filenames = ['*.ini', '*.cfg'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r';.*?$', Comment), + (r'\[.*?\]$', Keyword), + (r'(.*?)(\s*)(=)(\s*)(.*?)$', + bygroups(Name.Attribute, Text, Operator, Text, String)) + ] + } + +The lexer first looks for whitespace, comments and section names. [-And later-] {+Later+} it +looks for a line that looks like a key, value pair, separated by an ``'='`` +sign, and optional whitespace. + +The `bygroups` helper [-makes sure that-] {+yields+} each {+capturing+} group [-is yielded-] {+in the regex+} with a different +token type. First the `Name.Attribute` token, then a `Text` token for the +optional whitespace, after that a `Operator` token for the equals sign. Then a +`Text` token for the whitespace again. The rest of the line is returned as +`String`. + +Note that for this to work, every part of the match must be inside a capturing +group (a ``(...)``), and there must not be any nested capturing groups. If you +nevertheless need a group, use a non-capturing group defined using this syntax: +[-``r'(?:some|words|here)'``-] +{+``(?:some|words|here)``+} (note the ``?:`` after the beginning parenthesis). + +If you find yourself needing a capturing group inside the regex which shouldn't +be part of the output but is used in the regular expressions for backreferencing +(eg: ``r'(<(foo|bar)>)(.*?)()'``), you can pass `None` to the bygroups +function and [-it will skip-] that group will be skipped in the output. + + +Changing states +=============== + +Many lexers need multiple states to work as expected. For example, some +languages allow multiline comments to be nested. Since this is a recursive +pattern it's impossible to lex just using regular expressions. + +Here is [-the solution: + +.. sourcecode:: python-] {+a lexer that recognizes C++ style comments (multi-line with ``/* */`` +and single-line with ``//`` until end of line)::+} + + from pygments.lexer import RegexLexer + from pygments.token import * + + class [-ExampleLexer(RegexLexer):-] {+CppCommentLexer(RegexLexer):+} + name = 'Example Lexer with states' + + tokens = { + 'root': [ + (r'[^/]+', Text), + (r'/\*', Comment.Multiline, 'comment'), + (r'//.*?$', Comment.Singleline), + (r'/', Text) + ], + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ] + } + +This lexer starts lexing in the ``'root'`` state. It tries to match as much as +possible until it finds a slash (``'/'``). If the next character after the slash +is [-a star-] {+an asterisk+} (``'*'``) the `RegexLexer` sends those two characters to the +output stream marked as `Comment.Multiline` and continues [-parsing-] {+lexing+} with the rules +defined in the ``'comment'`` state. + +If there wasn't [-a star-] {+an asterisk+} after the slash, the `RegexLexer` checks if it's a +[-singleline-] +{+Singleline+} comment [-(eg:-] {+(i.e.+} followed by a second slash). If this also wasn't the +case it must be a single [-slash-] {+slash, which is not a comment starter+} (the separate +regex for a single slash must also be given, else the slash would be marked as +an error token). + +Inside the ``'comment'`` state, we do the same thing again. Scan until the +lexer finds a star or slash. If it's the opening of a multiline comment, push +the ``'comment'`` state on the stack and continue scanning, again in the +``'comment'`` state. Else, check if it's the end of the multiline comment. If +yes, pop one state from the stack. + +Note: If you pop from an empty stack you'll get an `IndexError`. (There is an +easy way to prevent this from happening: don't ``'#pop'`` in the root state). + +If the `RegexLexer` encounters a newline that is flagged as an error token, the +stack is emptied and the lexer continues scanning in the ``'root'`` state. This +[-helps-] +{+can help+} producing error-tolerant highlighting for erroneous input, e.g. when a +single-line string is not closed. + + +Advanced state tricks +===================== + +There are a few more things you can do with states: + +- You can push multiple states onto the stack if you give a tuple instead of a + simple string as the third item in a rule tuple. For example, if you want to + match a comment containing a directive, something [-like::-] {+like: + + .. code-block:: text+} + + /* rest of comment */ + + you can use this [-rule: + + .. sourcecode:: python-] {+rule::+} + + tokens = { + 'root': [ + (r'/\* <', Comment, ('comment', 'directive')), + ... + ], + 'directive': [ + (r'[^>]*', Comment.Directive), + (r'>', Comment, '#pop'), + ], + 'comment': [ + (r'[^*]+', Comment), + (r'\*/', Comment, '#pop'), + (r'\*', Comment), + ] + } + + When this encounters the above sample, first ``'comment'`` and ``'directive'`` + are pushed onto the stack, then the lexer continues in the directive state + until it finds the closing ``>``, then it continues in the comment state until + the closing ``*/``. Then, both states are popped from the stack again and + lexing continues in the root state. + + .. versionadded:: 0.9 + The tuple can contain the special ``'#push'`` and ``'#pop'`` (but not + ``'#pop:n'``) directives. + + +- You can include the rules of a state in the definition of another. This is + done by using `include` from [-`pygments.lexer`: + + .. sourcecode:: python-] {+`pygments.lexer`::+} + + from pygments.lexer import RegexLexer, bygroups, include + from pygments.token import * + + class ExampleLexer(RegexLexer): + tokens = { + 'comments': [ + (r'/\*.*?\*/', Comment), + (r'//.*?\n', Comment), + ], + 'root': [ + include('comments'), + (r'(function )(\w+)( {)', + bygroups(Keyword, Name, Keyword), 'function'), + (r'.', Text), + ], + 'function': [ + (r'[^}/]+', Text), + include('comments'), + (r'/', Text), + [-(r'}',-] + {+(r'\}',+} Keyword, '#pop'), + ] + } + + This is a hypothetical lexer for a language that consist of functions and + comments. Because comments can occur at toplevel and in functions, we need + rules for comments in both states. As you can see, the `include` helper saves + repeating rules that occur more than once (in this example, the state + ``'comment'`` will never be entered by the lexer, as it's only there to be + included in ``'root'`` and ``'function'``). + +- Sometimes, you may want to "combine" a state from existing ones. This is + possible with the [-`combine`-] {+`combined`+} helper from `pygments.lexer`. + + If you, instead of a new state, write ``combined('state1', 'state2')`` as the + third item of a rule tuple, a new anonymous state will be formed from state1 + and state2 and if the rule matches, the lexer will enter this state. + + This is not used very often, but can be helpful in some cases, such as the + `PythonLexer`'s string literal processing. + +- If you want your lexer to start lexing in a different state you can modify the + stack by [-overloading-] {+overriding+} the `get_tokens_unprocessed()` [-method: + + .. sourcecode:: python-] {+method::+} + + from pygments.lexer import RegexLexer + + class [-MyLexer(RegexLexer):-] {+ExampleLexer(RegexLexer):+} + tokens = {...} + + def get_tokens_unprocessed(self, [-text): + stack = ['root', 'otherstate']-] {+text, stack=('root', 'otherstate')):+} + for item in RegexLexer.get_tokens_unprocessed(text, stack): + yield item + + Some lexers like the `PhpLexer` use this to make the leading ``', Name.Tag), + ], + 'script-content': [ + (r'(.+?)(<\s*/\s*script\s*>)', + bygroups(using(JavascriptLexer), Name.Tag), + '#pop'), + ] + } + +Here the content of a ```` end tag is processed by the `JavascriptLexer`, +while the end tag is yielded as a normal token with the `Name.Tag` type. + +[-As an additional goodie, if the lexer class is replaced by `this` (imported from +`pygments.lexer`), the "other" lexer will be the current one (because you cannot +refer to the current class within the code that runs at class definition time).-] + +Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule. +Here, two states are pushed onto the state stack, ``'script-content'`` and +``'tag'``. That means that first ``'tag'`` is processed, which will [-parse-] {+lex+} +attributes and the closing ``>``, then the ``'tag'`` state is popped and the +next state on top of the stack will be ``'script-content'``. + +{+Since you cannot refer to the class currently being defined, use `this` +(imported from `pygments.lexer`) to refer to the current lexer class, i.e. +``using(this)``. This construct may seem unnecessary, but this is often the +most obvious way of lexing arbitrary syntax between fixed delimiters without +introducing deeply nested states.+} + +The `using()` helper has a special keyword argument, `state`, which works as +follows: if given, the lexer to use initially is not in the ``"root"`` state, +but in the state given by this argument. This [-*only* works-] {+does not work+} with [-a `RegexLexer`.-] {+advanced +`RegexLexer` subclasses such as `ExtendedRegexLexer` (see below).+} + +Any other keywords arguments passed to `using()` are added to the keyword +arguments used to create the lexer. + + +Delegating Lexer +================ + +Another approach for nested lexers is the `DelegatingLexer` which is for example +used for the template engine lexers. It takes two lexers as arguments on +initialisation: a `root_lexer` and a `language_lexer`. + +The input is processed as follows: First, the whole text is lexed with the +`language_lexer`. All tokens yielded with [-a-] {+the special+} type of ``Other`` are +then concatenated and given to the `root_lexer`. The language tokens of the +`language_lexer` are then inserted into the `root_lexer`'s token stream at the +appropriate positions. + +[-.. sourcecode:: python-] {+::+} + + from pygments.lexer import DelegatingLexer + from pygments.lexers.web import HtmlLexer, PhpLexer + + class HtmlPhpLexer(DelegatingLexer): + def __init__(self, **options): + super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options) + +This procedure ensures that e.g. HTML with template tags in it is highlighted +correctly even if the template tags are put into HTML tags or attributes. + +If you want to change the needle token ``Other`` to something else, you can give +the lexer another token type as the third [-parameter: + +.. sourcecode:: python-] {+parameter::+} + + DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options) + + +Callbacks +========= + +Sometimes the grammar of a language is so complex that a lexer would be unable +to [-parse-] {+process+} it just by using regular expressions and stacks. + +For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead +of token types (`bygroups` and `using` are nothing else but preimplemented +callbacks). The callback must be a function taking two arguments: + +* the lexer itself +* the match object for the last matched rule + +The callback must then return an iterable of (or simply yield) ``(index, +tokentype, value)`` tuples, which are then just passed through by +`get_tokens_unprocessed()`. The ``index`` here is the position of the token in +the input string, ``tokentype`` is the normal token type (like `Name.Builtin`), +and ``value`` the associated part of the input string. + +You can see an example [-here: + +.. sourcecode:: python-] {+here::+} + + from pygments.lexer import RegexLexer + from pygments.token import Generic + + class HypotheticLexer(RegexLexer): + + def headline_callback(lexer, match): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +If the regex for the `headline_callback` matches, the function is called with +the match object. Note that after the callback is done, processing continues +normally, that is, after the end of the previous match. The callback has no +possibility to influence the position. + +There are not really any simple examples for lexer callbacks, but you can see +them in action e.g. in the [-`compiled.py`_ source code-] {+`SMLLexer` class+} in [-the `CLexer` and +`JavaLexer` classes.-] {+`ml.py`_.+} + +.. [-_compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py-] {+_ml.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ml.py+} + + +The ExtendedRegexLexer class +============================ + +The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for +the funky syntax rules of [-some-] languages [-that will go unnamed,-] such as Ruby. + +But fear not; even then you don't have to abandon the regular expression +[-approach. For-] +{+approach:+} Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`. +All features known from RegexLexers are available here too, and the tokens are +specified in exactly the same way, *except* for one detail: + +The `get_tokens_unprocessed()` method holds its internal state data not as local +variables, but in an instance of the `pygments.lexer.LexerContext` class, and +that instance is passed to callbacks as a third argument. This means that you +can modify the lexer state in callbacks. + +The `LexerContext` class has the following members: + +* `text` -- the input text +* `pos` -- the current starting position that is used for matching regexes +* `stack` -- a list containing the state stack +* `end` -- the maximum position to which regexes are matched, this defaults to + the length of `text` + +Additionally, the `get_tokens_unprocessed()` method can be given a +`LexerContext` instead of a string and will then process this context instead of +creating a new one for the string argument. + +Note that because you can set the current position to anything in the callback, +it won't be automatically be set by the caller after the callback is finished. +For example, this is how the hypothetical lexer above would be written with the +[-`ExtendedRegexLexer`: + +.. sourcecode:: python-] +{+`ExtendedRegexLexer`::+} + + from pygments.lexer import ExtendedRegexLexer + from pygments.token import Generic + + class ExHypotheticLexer(ExtendedRegexLexer): + + def headline_callback(lexer, match, ctx): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + ctx.pos = match.end() + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +This might sound confusing (and it can really be). But it is needed, and for an +example look at the Ruby lexer in [-`agile.py`_.-] {+`ruby.py`_.+} + +.. [-_agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py + + +Filtering-] {+_ruby.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ruby.py + + +Handling Lists of Keywords +========================== + +For a relatively short list (hundreds) you can construct an optimized regular +expression directly using ``words()`` (longer lists, see next section). This +function handles a few things for you automatically, including escaping +metacharacters and Python's first-match rather than longest-match in +alternations. Feel free to put the lists themselves in +``pygments/lexers/_$lang_builtins.py`` (see examples there), and generated by +code if possible. + +An example of using ``words()`` is something like:: + + from pygments.lexer import RegexLexer, words, Name + + class MyLexer(RegexLexer): + + tokens = { + 'root': [ + (words(('else', 'elseif'), suffix=r'\b'), Name.Builtin), + (r'\w+', Name), + ], + } + +As you can see, you can add ``prefix`` and ``suffix`` parts to the constructed +regex. + + +Modifying+} Token Streams +======================= + +Some languages ship a lot of builtin functions (for example PHP). The total +amount of those functions differs from system to system because not everybody +has every extension installed. In the case of PHP there are over 3000 builtin +functions. That's an [-incredible-] {+incredibly+} huge amount of functions, much more than you +[-can-] +{+want to+} put into a regular expression. + +But because only `Name` tokens can be function names [-it's-] {+this is+} solvable by +overriding the ``get_tokens_unprocessed()`` method. The following lexer +subclasses the `PythonLexer` so that it highlights some additional names as +pseudo [-keywords: + +.. sourcecode:: python-] {+keywords::+} + + from [-pygments.lexers.agile-] {+pygments.lexers.python+} import PythonLexer + from pygments.token import Name, Keyword + + class MyPythonLexer(PythonLexer): + EXTRA_KEYWORDS = [-['foo',-] {+set(('foo',+} 'bar', 'foobar', 'barfoo', 'spam', [-'eggs']-] {+'eggs'))+} + + def get_tokens_unprocessed(self, text): + for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): + if token is Name and value in self.EXTRA_KEYWORDS: + yield index, Keyword.Pseudo, value + else: + yield index, token, value + +The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions. + +[-.. note:: Do not confuse this with the :doc:`filter ` system.-] diff --git a/tests/examplefiles/wdiff_example2.wdiff b/tests/examplefiles/wdiff_example2.wdiff new file mode 100644 index 00000000..1a746fe5 --- /dev/null +++ b/tests/examplefiles/wdiff_example2.wdiff @@ -0,0 +1,758 @@ +.. -*- mode: rst -*- + +[-..-] + +{+{+..+} highlight:: [-python-] {+python+}+} + +==================== +Write your own lexer +==================== + +If a lexer for your favorite language is missing in the Pygments package, you +can easily write your own and extend Pygments. + +All you need can be found inside the :mod:`pygments.lexer` module. As you can +read in the :doc:`API documentation `, a lexer is a class that is +initialized with some keyword arguments (the lexer options) and that provides a +:meth:`.get_tokens_unprocessed()` method which is given a string or unicode +object with the data to [-lex.-] {+[-parse.-] {+lex.+}+} + +The :meth:`.get_tokens_unprocessed()` method must return an iterator or iterable +containing tuples in the form ``(index, token, value)``. Normally you don't +need to do this since there are {+[-numerous-]+} base lexers [-that-] {+{+that+} do most of the work and [-that-] {+that+}+} +you can subclass. + + +RegexLexer +========== + +[-The-] + +{+[-A very powerful (but quite easy to use)-] + +{+The+}+} lexer [-base-] {+{+base+} class used by almost all of Pygments' [-lexers-] {+lexers+}+} is the +:class:`RegexLexer`. This +{+[-lexer base-]+} class allows you to define lexing rules in terms of +*regular expressions* for different *states*. + +States are groups of regular expressions that are matched against the input +string at the *current position*. If one of these expressions matches, a +corresponding action is performed [-(such as-] {+[-(normally-] {+(such as+}+} yielding a token with a specific +[-type,-] +{+[-type),-] +{+type,+} or changing [-state),-] {+state),+}+} the current position is set to where the last match +ended and the matching process continues with the first regex of the current +state. + +Lexer states are kept [-on-] {+[-in-] {+on+}+} a {+[-state-]+} stack: each time a new state is entered, the new +state is pushed onto the stack. The most basic lexers (like the `DiffLexer`) +just need one state. + +Each state is defined as a list of tuples in the form (`regex`, `action`, +`new_state`) where the last item is optional. In the most basic form, `action` +is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a +token with the match text and type `tokentype` and push `new_state` on the state +stack. If the new state is ``'#pop'``, the topmost state is popped from the +stack instead. [-To-] {+[-(To-] {+To+}+} pop more than one state, use ``'#pop:2'`` and so [-on.-] {+[-on.)-] {+on.+}+} +``'#push'`` is a synonym for pushing the current state on the stack. + +The following example shows the `DiffLexer` from the builtin lexers. Note that +it contains some additional attributes `name`, `aliases` and `filenames` which +aren't required for a lexer. They are used by the builtin lexer lookup +functions. [-::-] + +{+[-.. sourcecode:: python-] {+::+}+} + + from pygments.lexer import RegexLexer + from pygments.token import * + + class DiffLexer(RegexLexer): + name = 'Diff' + aliases = ['diff'] + filenames = ['*.diff'] + + tokens = { + 'root': [ + (r' .*\n', Text), + (r'\+.*\n', Generic.Inserted), + (r'-.*\n', Generic.Deleted), + (r'@.*\n', Generic.Subheading), + (r'Index.*\n', Generic.Heading), + (r'=.*\n', Generic.Heading), + (r'.*\n', Text), + ] + } + +As you can see this lexer only uses one state. When the lexer starts scanning +the text, it first checks if the current character is a space. If this is true +it scans everything until newline and returns the {+[-parsed-]+} data as [-a-] {+{+a+}+} `Text` [-token-] {+[-token.-] {+token+} (which +is the "no special highlighting" [-token).-] {+token).+}+} + +If this rule doesn't match, it checks if the current char is a plus sign. And +so on. + +If no rule matches at the current position, the current char is emitted as an +`Error` token that indicates a [-lexing-] {+[-parsing-] {+lexing+}+} error, and the position is increased by +[-one.-] +{+[-1.-] +{+one.+}+} + + +Adding and testing a new lexer +============================== + +To make [-Pygments-] {+[-pygments-] {+Pygments+}+} aware of your new lexer, you have to perform the following +steps: + +First, change to the current directory containing the [-Pygments-] {+[-pygments-] {+Pygments+}+} source code: + +.. [-code-block::-] {+[-sourcecode::-] {+code-block::+}+} console + + $ cd .../pygments-main + +[-Select-] + +{+{+Select+} a matching module under ``pygments/lexers``, or create a new module for +your lexer [-class.-] {+class.+}+} + +Next, make sure the lexer is known from outside of the module. All modules in +the ``pygments.lexers`` specify ``__all__``. For example, [-``esoteric.py`` sets::-] {+[-``other.py`` sets: + +.. sourcecode:: python-] {+``esoteric.py`` sets::+}+} + + __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] + +Simply add the name of your lexer class to this list. + +Finally the lexer can be made [-publicly-] {+[-publically-] {+publicly+}+} known by rebuilding the lexer mapping: + +.. [-code-block::-] {+[-sourcecode::-] {+code-block::+}+} console + + $ make mapfiles + +To test the new lexer, store an example file with the proper extension in +``tests/examplefiles``. For example, to test your ``DiffLexer``, add a +``tests/examplefiles/example.diff`` containing a sample diff output. + +Now you can use pygmentize to render your example to HTML: + +.. [-code-block::-] {+[-sourcecode::-] {+code-block::+}+} console + + $ ./pygmentize -O full -f html -o /tmp/example.html tests/examplefiles/example.diff + +Note that this [-explicitly-] {+[-explicitely-] {+explicitly+}+} calls the ``pygmentize`` in the current directory +by preceding it with ``./``. This ensures your modifications are used. +Otherwise a possibly already installed, unmodified version without your new +lexer would have been called from the system search path (``$PATH``). + +To view the result, open ``/tmp/example.html`` in your browser. + +Once the example renders as expected, you should run the complete test suite: + +.. [-code-block::-] {+[-sourcecode::-] {+code-block::+}+} console + + $ make test + +[-It-] + +{+{+It+} also tests that your lexer fulfills the lexer API and certain invariants, +such as that the concatenation of all token text is the same as the input [-text.-] {+text.+}+} + + +Regex Flags +=========== + +You can either define regex flags [-locally-] {+{+locally+}+} in the regex (``r'(?x)foo bar'``) or +[-globally-] +{+{+globally+}+} by adding a `flags` attribute to your lexer class. If no attribute is +defined, it defaults to `re.MULTILINE`. For more [-information-] {+[-informations-] {+information+}+} about regular +expression flags see the [-page about-] {+{+page about+}+} `regular expressions`_ {+[-help page-]+} in the [-Python-] {+[-python-] {+Python+}+} +documentation. + +.. _regular expressions: [-http://docs.python.org/library/re.html#regular-expression-syntax-] {+[-http://docs.python.org/lib/re-syntax.html-] {+http://docs.python.org/library/re.html#regular-expression-syntax+}+} + + +Scanning multiple tokens at once +================================ + +[-So-] + +{+{+So+} far, the `action` element in the rule tuple of regex, action and state has +been a single token type. Now we look at the first of several other possible +[-values.-] +{+values.+}+} + +Here is a more complex lexer that highlights INI files. INI files consist of +sections, comments and [-``key-] {+[-key-] {+``key+}+} = [-value`` pairs::-] {+[-value pairs: + +.. sourcecode:: python-] {+value`` pairs::+}+} + + from pygments.lexer import RegexLexer, bygroups + from pygments.token import * + + class IniLexer(RegexLexer): + name = 'INI' + aliases = ['ini', 'cfg'] + filenames = ['*.ini', '*.cfg'] + + tokens = { + 'root': [ + (r'\s+', Text), + (r';.*?$', Comment), + (r'\[.*?\]$', Keyword), + (r'(.*?)(\s*)(=)(\s*)(.*?)$', + bygroups(Name.Attribute, Text, Operator, Text, String)) + ] + } + +The lexer first looks for whitespace, comments and section names. [-Later-] {+[-And later-] {+Later+}+} it +looks for a line that looks like a key, value pair, separated by an ``'='`` +sign, and optional whitespace. + +The `bygroups` helper [-yields-] {+[-makes sure that-] {+yields+}+} each [-capturing-] {+{+capturing+}+} group [-in-] {+[-is yielded-] {+in+} the [-regex-] {+regex+}+} with a different +token type. First the `Name.Attribute` token, then a `Text` token for the +optional whitespace, after that a `Operator` token for the equals sign. Then a +`Text` token for the whitespace again. The rest of the line is returned as +`String`. + +Note that for this to work, every part of the match must be inside a capturing +group (a ``(...)``), and there must not be any nested capturing groups. If you +nevertheless need a group, use a non-capturing group defined using this syntax: +[-``(?:some|words|here)``-] +{+[-``r'(?:some|words|here)'``-] +{+``(?:some|words|here)``+}+} (note the ``?:`` after the beginning parenthesis). + +If you find yourself needing a capturing group inside the regex which shouldn't +be part of the output but is used in the regular expressions for backreferencing +(eg: ``r'(<(foo|bar)>)(.*?)()'``), you can pass `None` to the bygroups +function and {+[-it will skip-]+} that group will be skipped in the output. + + +Changing states +=============== + +Many lexers need multiple states to work as expected. For example, some +languages allow multiline comments to be nested. Since this is a recursive +pattern it's impossible to lex just using regular expressions. + +Here is [-a-] {+[-the solution: + +.. sourcecode:: python-] {+a+} lexer that recognizes C++ style comments (multi-line with ``/* */`` +and single-line with ``//`` until end of [-line)::-] {+line)::+}+} + + from pygments.lexer import RegexLexer + from pygments.token import * + + class [-CppCommentLexer(RegexLexer):-] {+[-ExampleLexer(RegexLexer):-] {+CppCommentLexer(RegexLexer):+}+} + name = 'Example Lexer with states' + + tokens = { + 'root': [ + (r'[^/]+', Text), + (r'/\*', Comment.Multiline, 'comment'), + (r'//.*?$', Comment.Singleline), + (r'/', Text) + ], + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ] + } + +This lexer starts lexing in the ``'root'`` state. It tries to match as much as +possible until it finds a slash (``'/'``). If the next character after the slash +is [-an asterisk-] {+[-a star-] {+an asterisk+}+} (``'*'``) the `RegexLexer` sends those two characters to the +output stream marked as `Comment.Multiline` and continues [-lexing-] {+[-parsing-] {+lexing+}+} with the rules +defined in the ``'comment'`` state. + +If there wasn't [-an asterisk-] {+[-a star-] {+an asterisk+}+} after the slash, the `RegexLexer` checks if it's a +[-Singleline-] +{+[-singleline-] +{+Singleline+}+} comment [-(i.e.-] {+[-(eg:-] {+(i.e.+}+} followed by a second slash). If this also wasn't the +case it must be a single [-slash,-] {+[-slash-] {+slash,+} which is not a comment [-starter-] {+starter+}+} (the separate +regex for a single slash must also be given, else the slash would be marked as +an error token). + +Inside the ``'comment'`` state, we do the same thing again. Scan until the +lexer finds a star or slash. If it's the opening of a multiline comment, push +the ``'comment'`` state on the stack and continue scanning, again in the +``'comment'`` state. Else, check if it's the end of the multiline comment. If +yes, pop one state from the stack. + +Note: If you pop from an empty stack you'll get an `IndexError`. (There is an +easy way to prevent this from happening: don't ``'#pop'`` in the root state). + +If the `RegexLexer` encounters a newline that is flagged as an error token, the +stack is emptied and the lexer continues scanning in the ``'root'`` state. This +[-can help-] +{+[-helps-] +{+can help+}+} producing error-tolerant highlighting for erroneous input, e.g. when a +single-line string is not closed. + + +Advanced state tricks +===================== + +There are a few more things you can do with states: + +- You can push multiple states onto the stack if you give a tuple instead of a + simple string as the third item in a rule tuple. For example, if you want to + match a comment containing a directive, something [-like:-] {+[-like::-] {+like:+} + + .. code-block:: [-text-] {+text+}+} + + /* rest of comment */ + + you can use this [-rule::-] {+[-rule: + + .. sourcecode:: python-] {+rule::+}+} + + tokens = { + 'root': [ + (r'/\* <', Comment, ('comment', 'directive')), + ... + ], + 'directive': [ + (r'[^>]*', Comment.Directive), + (r'>', Comment, '#pop'), + ], + 'comment': [ + (r'[^*]+', Comment), + (r'\*/', Comment, '#pop'), + (r'\*', Comment), + ] + } + + When this encounters the above sample, first ``'comment'`` and ``'directive'`` + are pushed onto the stack, then the lexer continues in the directive state + until it finds the closing ``>``, then it continues in the comment state until + the closing ``*/``. Then, both states are popped from the stack again and + lexing continues in the root state. + + .. versionadded:: 0.9 + The tuple can contain the special ``'#push'`` and ``'#pop'`` (but not + ``'#pop:n'``) directives. + + +- You can include the rules of a state in the definition of another. This is + done by using `include` from [-`pygments.lexer`::-] {+[-`pygments.lexer`: + + .. sourcecode:: python-] {+`pygments.lexer`::+}+} + + from pygments.lexer import RegexLexer, bygroups, include + from pygments.token import * + + class ExampleLexer(RegexLexer): + tokens = { + 'comments': [ + (r'/\*.*?\*/', Comment), + (r'//.*?\n', Comment), + ], + 'root': [ + include('comments'), + (r'(function )(\w+)( {)', + bygroups(Keyword, Name, Keyword), 'function'), + (r'.', Text), + ], + 'function': [ + (r'[^}/]+', Text), + include('comments'), + (r'/', Text), + [-(r'\}',-] + {+[-(r'}',-] + {+(r'\}',+}+} Keyword, '#pop'), + ] + } + + This is a hypothetical lexer for a language that consist of functions and + comments. Because comments can occur at toplevel and in functions, we need + rules for comments in both states. As you can see, the `include` helper saves + repeating rules that occur more than once (in this example, the state + ``'comment'`` will never be entered by the lexer, as it's only there to be + included in ``'root'`` and ``'function'``). + +- Sometimes, you may want to "combine" a state from existing ones. This is + possible with the [-`combined`-] {+[-`combine`-] {+`combined`+}+} helper from `pygments.lexer`. + + If you, instead of a new state, write ``combined('state1', 'state2')`` as the + third item of a rule tuple, a new anonymous state will be formed from state1 + and state2 and if the rule matches, the lexer will enter this state. + + This is not used very often, but can be helpful in some cases, such as the + `PythonLexer`'s string literal processing. + +- If you want your lexer to start lexing in a different state you can modify the + stack by [-overriding-] {+[-overloading-] {+overriding+}+} the `get_tokens_unprocessed()` [-method::-] {+[-method: + + .. sourcecode:: python-] {+method::+}+} + + from pygments.lexer import RegexLexer + + class [-ExampleLexer(RegexLexer):-] {+[-MyLexer(RegexLexer):-] {+ExampleLexer(RegexLexer):+}+} + tokens = {...} + + def get_tokens_unprocessed(self, [-text,-] {+[-text): + stack = ['root', 'otherstate']-] {+text,+} stack=('root', [-'otherstate')):-] {+'otherstate')):+}+} + for item in RegexLexer.get_tokens_unprocessed(text, stack): + yield item + + Some lexers like the `PhpLexer` use this to make the leading ``', Name.Tag), + ], + 'script-content': [ + (r'(.+?)(<\s*/\s*script\s*>)', + bygroups(using(JavascriptLexer), Name.Tag), + '#pop'), + ] + } + +Here the content of a ```` end tag is processed by the `JavascriptLexer`, +while the end tag is yielded as a normal token with the `Name.Tag` type. + +{+[-As an additional goodie, if the lexer class is replaced by `this` (imported from +`pygments.lexer`), the "other" lexer will be the current one (because you cannot +refer to the current class within the code that runs at class definition time).-]+} + +Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule. +Here, two states are pushed onto the state stack, ``'script-content'`` and +``'tag'``. That means that first ``'tag'`` is processed, which will [-lex-] {+[-parse-] {+lex+}+} +attributes and the closing ``>``, then the ``'tag'`` state is popped and the +next state on top of the stack will be ``'script-content'``. + +[-Since-] + +{+{+Since+} you cannot refer to the class currently being defined, use `this` +(imported from `pygments.lexer`) to refer to the current lexer class, i.e. +``using(this)``. This construct may seem unnecessary, but this is often the +most obvious way of lexing arbitrary syntax between fixed delimiters without +introducing deeply nested [-states.-] {+states.+}+} + +The `using()` helper has a special keyword argument, `state`, which works as +follows: if given, the lexer to use initially is not in the ``"root"`` state, +but in the state given by this argument. This [-does-] {+[-*only* works-] {+does+} not [-work-] {+work+}+} with [-advanced-] {+[-a `RegexLexer`.-] {+advanced+} +`RegexLexer` subclasses such as `ExtendedRegexLexer` (see [-below).-] {+below).+}+} + +Any other keywords arguments passed to `using()` are added to the keyword +arguments used to create the lexer. + + +Delegating Lexer +================ + +Another approach for nested lexers is the `DelegatingLexer` which is for example +used for the template engine lexers. It takes two lexers as arguments on +initialisation: a `root_lexer` and a `language_lexer`. + +The input is processed as follows: First, the whole text is lexed with the +`language_lexer`. All tokens yielded with [-the special-] {+[-a-] {+the special+}+} type of ``Other`` are +then concatenated and given to the `root_lexer`. The language tokens of the +`language_lexer` are then inserted into the `root_lexer`'s token stream at the +appropriate positions. [-::-] + +{+[-.. sourcecode:: python-] {+::+}+} + + from pygments.lexer import DelegatingLexer + from pygments.lexers.web import HtmlLexer, PhpLexer + + class HtmlPhpLexer(DelegatingLexer): + def __init__(self, **options): + super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options) + +This procedure ensures that e.g. HTML with template tags in it is highlighted +correctly even if the template tags are put into HTML tags or attributes. + +If you want to change the needle token ``Other`` to something else, you can give +the lexer another token type as the third [-parameter::-] {+[-parameter: + +.. sourcecode:: python-] {+parameter::+}+} + + DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options) + + +Callbacks +========= + +Sometimes the grammar of a language is so complex that a lexer would be unable +to [-process-] {+[-parse-] {+process+}+} it just by using regular expressions and stacks. + +For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead +of token types (`bygroups` and `using` are nothing else but preimplemented +callbacks). The callback must be a function taking two arguments: + +* the lexer itself +* the match object for the last matched rule + +The callback must then return an iterable of (or simply yield) ``(index, +tokentype, value)`` tuples, which are then just passed through by +`get_tokens_unprocessed()`. The ``index`` here is the position of the token in +the input string, ``tokentype`` is the normal token type (like `Name.Builtin`), +and ``value`` the associated part of the input string. + +You can see an example [-here::-] {+[-here: + +.. sourcecode:: python-] {+here::+}+} + + from pygments.lexer import RegexLexer + from pygments.token import Generic + + class HypotheticLexer(RegexLexer): + + def headline_callback(lexer, match): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +If the regex for the `headline_callback` matches, the function is called with +the match object. Note that after the callback is done, processing continues +normally, that is, after the end of the previous match. The callback has no +possibility to influence the position. + +There are not really any simple examples for lexer callbacks, but you can see +them in action e.g. in the [-`SMLLexer` class in `ml.py`_.-] {+[-`compiled.py`_ source code-] {+`SMLLexer` class+} in [-the `CLexer` and +`JavaLexer` classes.-] {+`ml.py`_.+}+} + +.. [-_ml.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ml.py-] {+[-_compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py-] {+_ml.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ml.py+}+} + + +The ExtendedRegexLexer class +============================ + +The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for +the funky syntax rules of {+[-some-]+} languages {+[-that will go unnamed,-]+} such as Ruby. + +But fear not; even then you don't have to abandon the regular expression +[-approach:-] +{+[-approach. For-] +{+approach:+}+} Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`. +All features known from RegexLexers are available here too, and the tokens are +specified in exactly the same way, *except* for one detail: + +The `get_tokens_unprocessed()` method holds its internal state data not as local +variables, but in an instance of the `pygments.lexer.LexerContext` class, and +that instance is passed to callbacks as a third argument. This means that you +can modify the lexer state in callbacks. + +The `LexerContext` class has the following members: + +* `text` -- the input text +* `pos` -- the current starting position that is used for matching regexes +* `stack` -- a list containing the state stack +* `end` -- the maximum position to which regexes are matched, this defaults to + the length of `text` + +Additionally, the `get_tokens_unprocessed()` method can be given a +`LexerContext` instead of a string and will then process this context instead of +creating a new one for the string argument. + +Note that because you can set the current position to anything in the callback, +it won't be automatically be set by the caller after the callback is finished. +For example, this is how the hypothetical lexer above would be written with the +[-`ExtendedRegexLexer`::-] +{+[-`ExtendedRegexLexer`: + +.. sourcecode:: python-] +{+`ExtendedRegexLexer`::+}+} + + from pygments.lexer import ExtendedRegexLexer + from pygments.token import Generic + + class ExHypotheticLexer(ExtendedRegexLexer): + + def headline_callback(lexer, match, ctx): + equal_signs = match.group(1) + text = match.group(2) + yield match.start(), Generic.Headline, equal_signs + text + equal_signs + ctx.pos = match.end() + + tokens = { + 'root': [ + (r'(=+)(.*?)(\1)', headline_callback) + ] + } + +This might sound confusing (and it can really be). But it is needed, and for an +example look at the Ruby lexer in [-`ruby.py`_.-] {+[-`agile.py`_.-] {+`ruby.py`_.+}+} + +.. [-_ruby.py:-] {+[-_agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py + + +Filtering-] {+_ruby.py:+} https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ruby.py + + +Handling Lists of Keywords +========================== + +For a relatively short list (hundreds) you can construct an optimized regular +expression directly using ``words()`` (longer lists, see next section). This +function handles a few things for you automatically, including escaping +metacharacters and Python's first-match rather than longest-match in +alternations. Feel free to put the lists themselves in +``pygments/lexers/_$lang_builtins.py`` (see examples there), and generated by +code if possible. + +An example of using ``words()`` is something like:: + + from pygments.lexer import RegexLexer, words, Name + + class MyLexer(RegexLexer): + + tokens = { + 'root': [ + (words(('else', 'elseif'), suffix=r'\b'), Name.Builtin), + (r'\w+', Name), + ], + } + +As you can see, you can add ``prefix`` and ``suffix`` parts to the constructed +regex. + + +[-Modifying-] + + +{+Modifying+}+} Token Streams +======================= + +Some languages ship a lot of builtin functions (for example PHP). The total +amount of those functions differs from system to system because not everybody +has every extension installed. In the case of PHP there are over 3000 builtin +functions. That's an [-incredibly-] {+[-incredible-] {+incredibly+}+} huge amount of functions, much more than you +[-want to-] +{+[-can-] +{+want to+}+} put into a regular expression. + +But because only `Name` tokens can be function names [-this is-] {+[-it's-] {+this is+}+} solvable by +overriding the ``get_tokens_unprocessed()`` method. The following lexer +subclasses the `PythonLexer` so that it highlights some additional names as +pseudo [-keywords::-] {+[-keywords: + +.. sourcecode:: python-] {+keywords::+}+} + + from [-pygments.lexers.python-] {+[-pygments.lexers.agile-] {+pygments.lexers.python+}+} import PythonLexer + from pygments.token import Name, Keyword + + class MyPythonLexer(PythonLexer): + EXTRA_KEYWORDS = [-set(('foo',-] {+[-['foo',-] {+set(('foo',+}+} 'bar', 'foobar', 'barfoo', 'spam', [-'eggs'))-] {+[-'eggs']-] {+'eggs'))+}+} + + def get_tokens_unprocessed(self, text): + for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): + if token is Name and value in self.EXTRA_KEYWORDS: + yield index, Keyword.Pseudo, value + else: + yield index, token, value + +The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions. + +{+[-.. note:: Do not confuse this with the :doc:`filter ` system.-]+} diff --git a/tests/examplefiles/wdiff_example3.wdiff b/tests/examplefiles/wdiff_example3.wdiff new file mode 100644 index 00000000..89303a75 --- /dev/null +++ b/tests/examplefiles/wdiff_example3.wdiff @@ -0,0 +1,10 @@ +This example is unbalanced open-close. +We can't treat these easily. + +{+ added? -] +[- deleted? +} + +suddenly closed -] +suddenly closed +} + +[+ added? [- deleted? -- cgit v1.2.1 From 0121547da1fb4a8719bb2faec1b1501e5066eaf9 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 4 Nov 2015 20:29:24 -0800 Subject: Allow ansi escape sequence as color format --- pygments/formatters/terminal256.py | 11 ++++++++++- pygments/style.py | 7 ++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pygments/formatters/terminal256.py b/pygments/formatters/terminal256.py index af311955..9055b10b 100644 --- a/pygments/formatters/terminal256.py +++ b/pygments/formatters/terminal256.py @@ -27,6 +27,8 @@ import sys from pygments.formatter import Formatter +from pygments.console import codes +from pygments.style import ansilist __all__ = ['Terminal256Formatter', 'TerminalTrueColorFormatter'] @@ -47,7 +49,10 @@ class EscapeSequence: def color_string(self): attrs = [] if self.fg is not None: - attrs.extend(("38", "5", "%i" % self.fg)) + if self.fg in ansilist: + attrs.append(codes[self.fg[5:]][2:-1]) + else : + attrs.extend(("38", "5", "%i" % self.fg)) if self.bg is not None: attrs.extend(("48", "5", "%i" % self.bg)) if self.bold: @@ -169,6 +174,10 @@ class Terminal256Formatter(Formatter): def _color_index(self, color): index = self.best_match.get(color, None) + if color in ansilist: + # strip the `#ansi` part an look up code + index = color + self.best_match[color] = index if index is None: try: rgb = int(str(color), 16) diff --git a/pygments/style.py b/pygments/style.py index b2b990ea..637a9303 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -5,12 +5,15 @@ Basic style object. - :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ from pygments.token import Token, STANDARD_TYPES from pygments.util import add_metaclass +from pygments.console import codes + +ansilist = ['#ansi'+x for x in codes.keys()] class StyleMeta(type): @@ -22,6 +25,8 @@ class StyleMeta(type): obj.styles[token] = '' def colorformat(text): + if text in ansilist: + return text if text[0:1] == '#': col = text[1:] if len(col) == 6: -- cgit v1.2.1 From 4d5ef3c7733a6667cc083f0dbab042be556daab9 Mon Sep 17 00:00:00 2001 From: hhsprings Date: Sat, 7 Nov 2015 21:59:57 +0900 Subject: See `#1164 `_. Before: 28807 bytes, 118.3000 [ms] / 0.004107 [ms/byte] 30964 bytes, 130.4700 [ms] / 0.004214 [ms/byte] 159 bytes, 1.2000 [ms] / 0.007547 [ms/byte] 28807 bytes, 117.6800 [ms] / 0.004085 [ms/byte] 30964 bytes, 124.3500 [ms] / 0.004016 [ms/byte] 159 bytes, 0.9500 [ms] / 0.005975 [ms/byte] 28807 bytes, 123.9600 [ms] / 0.004303 [ms/byte] 30964 bytes, 124.1700 [ms] / 0.004010 [ms/byte] 159 bytes, 1.3200 [ms] / 0.008302 [ms/byte] After: 28807 bytes, 11.3200 [ms] / 0.000393 [ms/byte] 30964 bytes, 21.6200 [ms] / 0.000698 [ms/byte] 159 bytes, 0.3400 [ms] / 0.002138 [ms/byte] 28807 bytes, 15.8100 [ms] / 0.000549 [ms/byte] 30964 bytes, 21.6800 [ms] / 0.000700 [ms/byte] 159 bytes, 0.4100 [ms] / 0.002579 [ms/byte] 28807 bytes, 11.4300 [ms] / 0.000397 [ms/byte] 30964 bytes, 15.3000 [ms] / 0.000494 [ms/byte] 159 bytes, 0.3900 [ms] / 0.002453 [ms/byte] About x10 faster... --- pygments/lexers/diff.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pygments/lexers/diff.py b/pygments/lexers/diff.py index 9efb100b..2056fbff 100644 --- a/pygments/lexers/diff.py +++ b/pygments/lexers/diff.py @@ -136,10 +136,12 @@ class WDiffLexer(RegexLexer): ins_cl = r"\+\}" del_op = r"\[\-" del_cl = r"\-\]" + normal = r'[^{}[\]+-]+' # for performance tokens = { 'root': [ (ins_op, Generic.Inserted, 'inserted'), (del_op, Generic.Deleted, 'deleted'), + (normal, Text), (r'.', Text), ], 'inserted': [ @@ -148,6 +150,7 @@ class WDiffLexer(RegexLexer): (del_cl, Generic.Inserted, '#pop'), (ins_cl, Generic.Inserted, '#pop'), + (normal, Generic.Inserted), (r'.', Generic.Inserted), ], 'deleted': [ @@ -156,6 +159,7 @@ class WDiffLexer(RegexLexer): (ins_cl, Generic.Deleted, '#pop'), (del_cl, Generic.Deleted, '#pop'), + (normal, Generic.Deleted), (r'.', Generic.Deleted), ], } -- cgit v1.2.1 From 68d2058669f4d733bb5f5beccf0c50bcabc81097 Mon Sep 17 00:00:00 2001 From: Hiroaki Itoh Date: Sun, 8 Nov 2015 09:12:24 +0000 Subject: fix typo. --- tests/examplefiles/wdiff_example3.wdiff | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/examplefiles/wdiff_example3.wdiff b/tests/examplefiles/wdiff_example3.wdiff index 89303a75..0bbd6d65 100644 --- a/tests/examplefiles/wdiff_example3.wdiff +++ b/tests/examplefiles/wdiff_example3.wdiff @@ -7,4 +7,4 @@ We can't treat these easily. suddenly closed -] suddenly closed +} -[+ added? [- deleted? +{+ added? [- deleted? -- cgit v1.2.1 From 1aecd014d3e2e635e1f3c320a51e0609fd77d2cb Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Mon, 21 Dec 2015 22:03:59 +0100 Subject: Revert unintentional change to copyright --- pygments/style.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygments/style.py b/pygments/style.py index 637a9303..06654cde 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -5,7 +5,7 @@ Basic style object. - :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -- cgit v1.2.1 From 995aa30dfb51621a017833258654d35ec3fc2c30 Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Sat, 6 Feb 2016 22:01:49 +0200 Subject: Add tokens for string affixes and heredoc delimiters. Add lexing for them in C/C++, Perl, PHP, Python and Ruby. Update my old style Lovelace accordingly. --- CHANGES | 6 ++++++ doc/docs/tokens.rst | 8 ++++++++ pygments/lexers/c_cpp.py | 16 ++++++++-------- pygments/lexers/perl.py | 3 ++- pygments/lexers/php.py | 4 +++- pygments/lexers/python.py | 20 ++++++++++---------- pygments/lexers/ruby.py | 8 ++++---- pygments/styles/lovelace.py | 2 ++ pygments/token.py | 2 ++ tests/examplefiles/example2.cpp | 20 ++++++++++++++++++++ tests/examplefiles/test.php | 5 +++++ 11 files changed, 70 insertions(+), 24 deletions(-) create mode 100644 tests/examplefiles/example2.cpp diff --git a/CHANGES b/CHANGES index e88582c8..ced0ffb0 100644 --- a/CHANGES +++ b/CHANGES @@ -21,6 +21,12 @@ Version 2.2 - Added new token types and lexing for magic methods and variables in Python and PHP. +- Added a new token type for string affixes and lexing for them in Python and + C++ lexers. + +- Added a new token type for heredoc (and similar) string delimiters and + lexing for them in C++, Perl, PHP and Ruby lexers. + Version 2.1.1 ------------- diff --git a/doc/docs/tokens.rst b/doc/docs/tokens.rst index 96a6d003..801fc638 100644 --- a/doc/docs/tokens.rst +++ b/doc/docs/tokens.rst @@ -223,12 +223,20 @@ Literals `String` For any string literal. +`String.Affix` + Token type for affixes that further specify the type of the string they're + attached to (e.g. the prefixes ``r`` and ``u8`` in ``r"foo"`` and ``u8"foo"``). + `String.Backtick` Token type for strings enclosed in backticks. `String.Char` Token type for single characters (e.g. Java, C). +`String.Delimiter` + Token type for delimiting identifiers in "heredoc", raw and other similar + strings (e.g. the word ``END`` in Perl code ``print <<'END';``). + `String.Doc` Token type for documentation strings (for example Python). diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index 5c724d03..7f061539 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -50,8 +50,9 @@ class CFamilyLexer(RegexLexer): (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ - (r'L?"', String, 'string'), - (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), + (r'(L?)(")', bygroups(String.Affix, String), 'string'), + (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", + bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), @@ -217,7 +218,11 @@ class CppLexer(CFamilyLexer): (r'char(16_t|32_t)\b', Keyword.Type), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), # C++11 raw strings - (r'R"\(', String, 'rawstring'), + (r'(R)(")([^\\()\s]{,16})(\((?:.|\n)*?\))(\3)(")', + bygroups(String.Affix, String, String.Delimiter, String, + String.Delimiter, String)), + # C++11 UTF-8/16/32 strings + (r'(u8|u|U)(")', bygroups(String.Affix, String), 'string'), inherit, ], 'root': [ @@ -234,11 +239,6 @@ class CppLexer(CFamilyLexer): # template specification (r'\s*(?=>)', Text, '#pop'), ], - 'rawstring': [ - (r'\)"', String, '#pop'), - (r'[^)]+', String), - (r'\)', String), - ], } def analyse_text(text): diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py index b78963d0..8df3c810 100644 --- a/pygments/lexers/perl.py +++ b/pygments/lexers/perl.py @@ -109,7 +109,8 @@ class PerlLexer(RegexLexer): 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'), Name.Builtin), (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), - (r'<<([\'"]?)([a-zA-Z_]\w*)\1;?\n.*?\n\2\n', String), + (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)', + bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)), (r'__END__', Comment.Preproc, 'end-part'), (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), diff --git a/pygments/lexers/php.py b/pygments/lexers/php.py index 257dd94f..2421738f 100644 --- a/pygments/lexers/php.py +++ b/pygments/lexers/php.py @@ -138,7 +138,9 @@ class PhpLexer(RegexLexer): ], 'php': [ (r'\?>', Comment.Preproc, '#pop'), - (r'<<<([\'"]?)(' + _ident_inner + r')\1\n.*?\n\s*\2;?\n', String), + (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)', + bygroups(String, String, String.Delimiter, String, String.Delimiter, + Punctuation, Text)), (r'\s+', Text), (r'#.*?\n', Comment.Single), (r'//.*?\n', Comment.Single), diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 57af00e2..f483071b 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -51,8 +51,8 @@ class PythonLexer(RegexLexer): tokens = { 'root': [ (r'\n', Text), - (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), - (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), + (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), + (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), @@ -72,14 +72,14 @@ class PythonLexer(RegexLexer): include('magicfuncs'), include('magicvars'), include('backtick'), - ('(?:[rR]|[uU][rR]|[rR][uU])"""', String.Double, 'tdqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'''", String.Single, 'tsqs'), - ('(?:[rR]|[uU][rR]|[rR][uU])"', String.Double, 'dqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'", String.Single, 'sqs'), - ('[uU]?"""', String.Double, combined('stringescape', 'tdqs')), - ("[uU]?'''", String.Single, combined('stringescape', 'tsqs')), - ('[uU]?"', String.Double, combined('stringescape', 'dqs')), - ("[uU]?'", String.Single, combined('stringescape', 'sqs')), + ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), + ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), + ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), + ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), + ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), + ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), include('name'), include('numbers'), ], diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py index e81d6ecf..f16416d3 100644 --- a/pygments/lexers/ruby.py +++ b/pygments/lexers/ruby.py @@ -47,9 +47,9 @@ class RubyLexer(ExtendedRegexLexer): start = match.start(1) yield start, Operator, match.group(1) # <<-? - yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` - yield match.start(3), Name.Constant, match.group(3) # heredoc name - yield match.start(4), String.Heredoc, match.group(4) # quote again + yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` + yield match.start(3), String.Delimiter, match.group(3) # heredoc name + yield match.start(4), String.Heredoc, match.group(4) # quote again heredocstack = ctx.__dict__.setdefault('heredocstack', []) outermost = not bool(heredocstack) @@ -74,7 +74,7 @@ class RubyLexer(ExtendedRegexLexer): if check == hdname: for amatch in lines: yield amatch.start(), String.Heredoc, amatch.group() - yield match.start(), Name.Constant, match.group() + yield match.start(), String.Delimiter, match.group() ctx.pos = match.end() break else: diff --git a/pygments/styles/lovelace.py b/pygments/styles/lovelace.py index 712f3e5c..236dde9b 100644 --- a/pygments/styles/lovelace.py +++ b/pygments/styles/lovelace.py @@ -71,7 +71,9 @@ class LovelaceStyle(Style): Name.Variable.Magic: _DOC_ORANGE, String: _STR_RED, + String.Affix: '#444444', String.Char: _OW_PURPLE, + String.Delimiter: _DOC_ORANGE, String.Doc: 'italic '+_DOC_ORANGE, String.Escape: _ESCAPE_LIME, String.Interpol: 'underline', diff --git a/pygments/token.py b/pygments/token.py index 097ff064..40c3214a 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -163,8 +163,10 @@ STANDARD_TYPES = { Literal.Date: 'ld', String: 's', + String.Affix: 'sa', String.Backtick: 'sb', String.Char: 'sc', + String.Delimiter: 'dl', String.Doc: 'sd', String.Double: 's2', String.Escape: 'se', diff --git a/tests/examplefiles/example2.cpp b/tests/examplefiles/example2.cpp new file mode 100644 index 00000000..ccd99383 --- /dev/null +++ b/tests/examplefiles/example2.cpp @@ -0,0 +1,20 @@ +/* + * A Test file for the different string literals. + */ + +#include + +int main() { + char *_str = "a normal string"; + wchar_t *L_str = L"a wide string"; + char *u8_str = u8"utf-8 string"; + char16_t *u_str = u"utf-16 string"; + char32_t *U_str = U"utf-32 string"; + char *R_str = R""""(raw string with +""" +as a delimiter)""""; + + std::cout << R_str << std::endl; + + return 0; +} diff --git a/tests/examplefiles/test.php b/tests/examplefiles/test.php index 794961c1..e8efdc6a 100644 --- a/tests/examplefiles/test.php +++ b/tests/examplefiles/test.php @@ -535,5 +535,10 @@ $magic->__toString(); EOF; +echo <<<"some_delimiter" +more heredoc testing +continues on this line +some_delimiter; + ?> -- cgit v1.2.1 From 86939c3764a93a9cd40cff29406d4d4f1ebd9c1c Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sun, 7 Feb 2016 16:32:51 -0800 Subject: Add documentation on non-extended foreground color for Terminal256 styles --- doc/docs/styles.rst | 43 +++++++++++++++++++++++++++++++++++++- pygments/formatters/terminal256.py | 7 +++++++ pygments/style.py | 2 +- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst index d56db0db..13326129 100644 --- a/doc/docs/styles.rst +++ b/doc/docs/styles.rst @@ -90,7 +90,7 @@ Here a small overview of all allowed styles: don't render text as bold (to prevent subtokens being highlighted bold) ``italic`` render text italic -``noitalic`` +``noitalic``\x1b[38;5;124 don't render text as italic ``underline`` render text underlined @@ -143,3 +143,44 @@ a way to iterate over all styles: >>> from pygments.styles import get_all_styles >>> styles = list(get_all_styles()) + + +.. _AnsiTerminalStyle: + +Terminal Styles +=============== + +.. versionadded:: 2.2 + +Custom styles used with `Terminal256` formatter can also defines foreground +colors using ansi-color. to do so use the `#ansigreen`, `#ansired` or any other +colors defined in ``pygments.style.ansilist``. Foreground ANSI colors will be +mapped to the corresponding `escape codes 30 to 37 +`_ thus respecting any +custom color mapping and themes provided by many terminal emulators. + +See following example where the color of the string `"hello world"` is governed +by the escape sequence `\x1b34;01m` (Ansi Blue) instead of an extended +foreground color. + +.. sourcecode:: pycon + + >>> from pygments import highlight + >>> from pygments.style import Style + >>> from pygments.token import Token + >>> from pygments.lexers import Python3Lexer + >>> from pygments.formatters import Terminal256Formatter + + >>> class MyStyle(Style): + >>> + >>> styles = { + >>> Token.String: '#ansiblue', + >>> } + + >>> code = 'print("Hello World")' + >>> result = highlight(code, Python3Lexer(), Terminal256Formatter(style=MyStyle)) + >>> print(result.encode()) + b'print(\x1b[34;01m"\x1b[39m\x1b[34;01mHello World\x1b[39m\x1b[34;01m"\x1b[39m)\n' + +Style that use `#ansi*` foreground colors do not currently work with formatters +others than ``Terminal256``. diff --git a/pygments/formatters/terminal256.py b/pygments/formatters/terminal256.py index 9055b10b..913536c4 100644 --- a/pygments/formatters/terminal256.py +++ b/pygments/formatters/terminal256.py @@ -94,6 +94,13 @@ class Terminal256Formatter(Formatter): and converts them to nearest ANSI 256-color escape sequences. Bold and underline attributes from the style are preserved (and displayed). + .. versionadded:: 2.2 + + If the used style defined foreground colors in the form `#ansi*`, then + `Terminal256Formatter` will map these to non extended foreground color. + + See AnsiTerminalStyle_ for more informations. + .. versionadded:: 0.9 Options accepted: diff --git a/pygments/style.py b/pygments/style.py index 06654cde..7a272b53 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -13,7 +13,7 @@ from pygments.token import Token, STANDARD_TYPES from pygments.util import add_metaclass from pygments.console import codes -ansilist = ['#ansi'+x for x in codes.keys()] +ansilist = ['#ansi'+x for x in codes.keys() if x] class StyleMeta(type): -- cgit v1.2.1 From 968f917d857f3c795e4809af6be2ec595e7a6b6b Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 8 Feb 2016 08:34:32 +0100 Subject: Make postgres strings use their own state. --- pygments/lexers/sql.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 05503c3a..a7736f75 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -148,8 +148,8 @@ class PostgresLexer(PostgresBase, RegexLexer): (r'\$\d+', Name.Variable), (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), (r'[0-9]+', Number.Integer), - (r"(E|U&)?'(''|[^'])*'", String.Single), - (r'(U&)?"(""|[^"])*"', String.Name), # quoted identifier + (r"(E|U&)?'", String.Single, 'string'), + (r'(U&)?"', String.Name, 'quoted-ident'), # quoted identifier (r'(?s)(\$[^$]*\$)(.*?)(\1)', language_callback), (r'[a-z_]\w*', Name), @@ -164,6 +164,16 @@ class PostgresLexer(PostgresBase, RegexLexer): (r'[^/*]+', Comment.Multiline), (r'[/*]', Comment.Multiline) ], + 'string': [ + (r"[^']+", String.Single), + (r"''", String.Single), + (r"'", String.Single, '#pop'), + ], + 'quoted-ident': [ + (r'[^"]+', String.Name), + (r'""', String.Name), + (r'"', String.Name, '#pop'), + ], } -- cgit v1.2.1 From a1e6b33a5cb8f74f005d17f61b6bb1051259e8af Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Mon, 8 Feb 2016 13:01:31 +0100 Subject: Add Varnish lexers, VCL and snippets VCLLexer is meant for pure VCL files that are syntactically correct. VCLSnippet is used for example-code and pseudo-code, where shorthands like req.* is sensible. Think of it like VCLLexer being for .. code:: blocks, while VCLSnippetLexer is for in-line. --- pygments/lexers/_mapping.py | 2 + pygments/lexers/varnish.py | 173 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 pygments/lexers/varnish.py diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index 0a6b4965..b34fcc18 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -396,6 +396,8 @@ LEXERS = { 'TypoScriptHtmlDataLexer': ('pygments.lexers.typoscript', 'TypoScriptHtmlData', ('typoscripthtmldata',), (), ()), 'TypoScriptLexer': ('pygments.lexers.typoscript', 'TypoScript', ('typoscript',), ('*.ts', '*.txt'), ('text/x-typoscript',)), 'UrbiscriptLexer': ('pygments.lexers.urbi', 'UrbiScript', ('urbiscript',), ('*.u',), ('application/x-urbiscript',)), + 'VCLLexer': ('pygments.lexers.varnish', 'VCL', ('vcl',), ('*.vcl',), ('text/x-vclsrc',)), + 'VCLSnippetLexer': ('pygments.lexers.varnish', 'VCLSnippets', ('vclsnippets', 'vclsnippet'), ('*.vcl',), ('text/x-vclsnippet',)), 'VCTreeStatusLexer': ('pygments.lexers.console', 'VCTreeStatus', ('vctreestatus',), (), ()), 'VGLLexer': ('pygments.lexers.dsls', 'VGL', ('vgl',), ('*.rpf',), ()), 'ValaLexer': ('pygments.lexers.c_like', 'Vala', ('vala', 'vapi'), ('*.vala', '*.vapi'), ('text/x-vala',)), diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py new file mode 100644 index 00000000..53f3f41f --- /dev/null +++ b/pygments/lexers/varnish.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.varnish + ~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Varnish configuration + + :copyright: Copyright 2016 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include, bygroups, using, this, inherit, words, \ + default +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation + +from pygments.lexers.c_cpp import CLexer, CppLexer +from pygments.lexers import _mql_builtins + +__all__ = ['VCLLexer', 'VCLSnippetLexer'] + +class VCLLexer(CLexer): + """ + For Varnish Configuration Language (VCL). + + """ + name = 'VCL' + aliases = ['vcl'] + filenames = [ '*.vcl' ] + mimetypes = ['text/x-vclsrc'] + + tokens = { + 'time': [ + (r'\d+(s|d|w|h|m)',Number) + ], + 'probe': [ + (r'(\s*\.\w+)(\s*=\s*)([^;]*)(;)', + bygroups(Name.Attribute, Operator, using(this), Punctuation)), + (r'\s*}', Punctuation, '#pop') + ], + 'backend': [ + include('whitespace'), + (r'(\s*)(\.host|\.port)(\s*=\s*)([^;]*)(\s*;)', + bygroups(Punctuation, Name.Attribute, Operator, using(this), Punctuation)), + (r'(\s*\.probe)(\s*=\s*)(\w+)(;)', + bygroups(Name.Attribute,Operator,Name.Variable.Global,Punctuation)), + (r'(\s*\.probe)(\s*=\s*)({)', + bygroups(Name.Attribute,Operator,Punctuation),'probe'), + (r'{',Punctuation,'#push'), + (r'}',Punctuation,'#pop') + ], + 'statements': [ + include('time'), + (r'[~!%^&*+=|?:<>/-]', Operator), + (r'\s*(hash_data)(\()(.+)(\)\s*;\s*$)', + bygroups(Keyword, Punctuation, using(this), Punctuation)), + (r'(\s*set\s)([^\s]+)(\s*=\s*)(.+)(\s*;\s*)($|#.*$|//.*$|/\*.*$)', + bygroups(Keyword, Name.Variable, Punctuation, using(this), Punctuation, using(this))), + (r'(\s*unset\s)(\s*[^\s]+)(\s*;)', + bygroups(Keyword, Name.Variable, Punctuation)), + (r'(\s*regsub\s*)(\()(.*)(,)(.*)(,)(.*)(\))', + bygroups(Keyword, Punctuation, using(this), Punctuation, + using(this), Punctuation, using(this), Punctuation)), + (r'(\s*regsuball\s*)(\()(.*)(,)(.*)(,)(.*)(\))', + bygroups(Keyword, Punctuation, using(this), Punctuation, + using(this), Punctuation, using(this), Punctuation)), + (r'(import\s)(\w+)(;\s*)$', + bygroups(Keyword, Name.Variable.Global, Punctuation)), + (words(('vcl_recv','vcl_pipe','vcl_pass','vcl_hash','vcl_purge', + 'vcl_hit','vcl_miss','vcl_deliver','vcl_synth','vcl_backend_fetch', + 'vcl_backend_response','vcl_backend_error','vcl_init','vcl_fini'), + suffix=r'\b'),Name.Function), + (words(('if','else','elsif','synth', + 'synthetic'), suffix=r'\b'),Keyword), + (words(('true','false')),Name.Builtin), + (r'(\s*call \s*)([^\s;]+)(;)', + bygroups(Keyword,Name.Variable.Global,Punctuation)), + (r'obj.ttl',Name.Variable), + (r'(req|bereq|obj|resp|beresp)\.http\.[^\s]+',Name.Variable), + (r'(req|bereq)\.(url|method|xid)',Name.Variable), + (r'(resp|beresp|obj)\.(status|reason)',Name.Variable), + (r'(beresp|obj)\.(ttl|grace)',Name.Variable), + (r'(backend )(\w*)(\s*{)', + bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), + (r'(\s*probe )(\s*\w+\s)({)', + bygroups(Keyword,Name.Variable.Global,Punctuation),'probe'), + (r'[();]', Punctuation), + (r'(client|server)\.(ip|identity)',Name.Variable), + (r'^(vcl )(4.0)(;)$', + bygroups(Keyword.Reserved,Name.Constant,Punctuation)), + ], + 'sub': [ + include('whitespace'), + include('comments'), + include('returns'), + include('statements'), + (r'\s*\{\s*',Punctuation,'#push'), + (r'\s*\}\s*',Punctuation,'#pop') + ], + 'comment': [ + (r'[^*/]', Comment.Multiline), + (r'/\*', Comment.Multiline, '#push'), + (r'\*/', Comment.Multiline, '#pop'), + (r'[*/]', Comment.Multiline) + ], + 'comments': [ + (r'#.*$', Comment), + (r'/\*', Comment.Multiline, 'comment'), + (r'//.*$', Comment) + ], + 'string': [ + (r'"', String, '#pop'), + (r'[^\\"\n]+', String), # all other characters + + ], + 'multistring': [ + (r'[^"}]', String), + (r'{"', String, '#push'), + (r'"}', String, '#pop'), + (r'["}]', String) + ], + 'whitespace': [ + (r'L?"', String, 'string'), + (r'{"', String, 'multistring'), + (r'\n', Text), + (r'\s+', Text), + (r'\\\n', Text) # line continuation + ], + 'returns': [ + (r'(\s*return )(\()(hash|lookup|ok|deliver|miss|fetch|pass|pipe)(\)\s*;$)', + bygroups(Keyword, Punctuation, Name.Constant, Punctuation)), + (r'(\s*return )(\()(\s*synth\s*)(\()(\s*\d+\s*)(,)([^)]+)(\)\s*\)\s*;)', + bygroups(Keyword, Punctuation, Keyword, Punctuation,Number,Punctuation,using(this),Punctuation)), + (r'(\s*return )(\()(\s*synth\s*)(\()(\s*\d+\s*)(\)\s*\)\s*;)', + bygroups(Keyword, Punctuation, Keyword, Punctuation,Number,Punctuation)) + ], + 'root': [ + include('whitespace'), + include('comments'), + include('returns'), + (r'(sub )(\w+)(\s*{)', + bygroups(Keyword, Name.Function, Punctuation),'sub'), + include('statements'), + (r'\s+', Text) + ], + } + +class VCLSnippetLexer(VCLLexer): + """ + For Varnish Configuration Language snippets. + """ + + name = 'VCLSnippets' + aliases = ['vclsnippets', 'vclsnippet'] + mimetypes = ['text/x-vclsnippet'] + tokens = { + 'snippetspre': [ + (r'\', Name.Variable), + (r'\', Name.Variable) + ], + 'snippetspost': [ + (r'(req|bereq|obj|resp|beresp|client|server)(\.http)?\.\*',Name.Variable), + (r'(req|bereq|obj|resp|beresp|client|server)',Name.Variable), + (r'(backend)', Keyword.Reserved) + ], + 'root': [ + include('snippetspre'), + inherit, + include('snippetspost') + ] + } -- cgit v1.2.1 From c4f20708a1ec553070316f7fcf740793e48cce0d Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Mon, 8 Feb 2016 13:05:43 +0100 Subject: Add Varnish VCL test-file --- tests/examplefiles/varnish.vcl | 164 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tests/examplefiles/varnish.vcl diff --git a/tests/examplefiles/varnish.vcl b/tests/examplefiles/varnish.vcl new file mode 100644 index 00000000..826f13e9 --- /dev/null +++ b/tests/examplefiles/varnish.vcl @@ -0,0 +1,164 @@ +# This is the VCL configuration Varnish will automatically append to your VCL +# file during compilation/loading. See the vcl(7) man page for details on syntax +# and semantics. +# New users is recommended to use the example.vcl file as a starting point. + +vcl 4.0; + +backend foo { .host = "192.168.1.1"; } + +probe blatti { .url = "foo"; } + +sub vcl_recv { + if (req.method == "PRI") { + /* We do not support SPDY or HTTP/2.0 */ + return (synth(405)); + } + if (req.method != "GET" && + req.method != "HEAD" && + req.method != "PUT" && + req.method != "POST" && + req.method != "TRACE" && + req.method != "OPTIONS" && + req.method != "DELETE") { + /* Non-RFC2616 or CONNECT which is weird. */ + return (pipe); + } + + if (req.method != "GET" && req.method != "HEAD") { + /* We only deal with GET and HEAD by default */ + return (pass); + } + if (req.http.Authorization || req.http.Cookie) { + /* Not cacheable by default */ + return (pass); + } + return (hash); +} + +sub vcl_pipe { + # By default Connection: close is set on all piped requests, to stop + # connection reuse from sending future requests directly to the + # (potentially) wrong backend. If you do want this to happen, you can undo + # it here. + # unset bereq.http.connection; + return (pipe); +} + +sub vcl_pass { + return (fetch); +} + +sub vcl_hash { + hash_data(req.url); + if (req.http.host) { + hash_data(req.http.host); + } else { + hash_data(server.ip); + } + return (lookup); +} + +sub vcl_purge { + return (synth(200, "Purged")); +} + +sub vcl_hit { + if (obj.ttl >= 0s) { + // A pure unadultered hit, deliver it + return (deliver); + } + if (obj.ttl + obj.grace > 0s) { + // Object is in grace, deliver it + // Automatically triggers a background fetch + return (deliver); + } + // fetch & deliver once we get the result + return (miss); +} + +sub vcl_miss { + return (fetch); +} + +sub vcl_deliver { + return (deliver); +} + +/* + * We can come here "invisibly" with the following errors: 413, 417 & 503 + */ +sub vcl_synth { + set resp.http.Content-Type = "text/html; charset=utf-8"; + set resp.http.Retry-After = "5"; + synthetic( {" + + + "} + resp.status + " " + resp.reason + {" + + +

Error "} + resp.status + " " + resp.reason + {"

+

"} + resp.reason + {"

+

Guru Meditation:

+

XID: "} + req.xid + {"

+
+

Varnish cache server

+ + +"} ); + return (deliver); +} + +####################################################################### +# Backend Fetch + +sub vcl_backend_fetch { + return (fetch); +} + +sub vcl_backend_response { + if (beresp.ttl <= 0s || + beresp.http.Set-Cookie || + beresp.http.Surrogate-control ~ "no-store" || + (!beresp.http.Surrogate-Control && + beresp.http.Cache-Control ~ "no-cache|no-store|private") || + beresp.http.Vary == "*") { + /* + * Mark as "Hit-For-Pass" for the next 2 minutes + */ + set beresp.ttl = 120s; + set beresp.uncacheable = true; + } + return (deliver); +} + +sub vcl_backend_error { + set beresp.http.Content-Type = "text/html; charset=utf-8"; + set beresp.http.Retry-After = "5"; + synthetic( {" + + + "} + beresp.status + " " + beresp.reason + {" + + +

Error "} + beresp.status + " " + beresp.reason + {"

+

"} + beresp.reason + {"

+

Guru Meditation:

+

XID: "} + bereq.xid + {"

+
+

Varnish cache server

+ + +"} ); + return (deliver); +} + +####################################################################### +# Housekeeping + +sub vcl_init { +} + +sub vcl_fini { + return (ok); +} -- cgit v1.2.1 From d3dad2c199691d708ea8522f5e20457ed7a1dea5 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Mon, 8 Feb 2016 13:15:08 +0100 Subject: Update AUTHORS with Varnish Lexer author --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 5108c2ab..600f6b22 100644 --- a/AUTHORS +++ b/AUTHORS @@ -110,6 +110,7 @@ Other contributors, listed alphabetically, are: * Jon Larimer, Google Inc. -- Smali lexer * Olov Lassus -- Dart lexer * Matt Layman -- TAP lexer +* Kristian Lyngstøl -- Varnish lexers * Sylvestre Ledru -- Scilab lexer * Mark Lee -- Vala lexer * Valentin Lorentz -- C++ lexer improvements -- cgit v1.2.1 From 7fc62fcc8c248084e0c00d376ef496b3222ae227 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Mon, 8 Feb 2016 13:59:20 +0100 Subject: Varnish lexer: Incorporate review feedback --- pygments/lexers/varnish.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 53f3f41f..74b629ff 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -14,14 +14,11 @@ import re from pygments.lexer import RegexLexer, include, bygroups, using, this, inherit, words, \ default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation - -from pygments.lexers.c_cpp import CLexer, CppLexer -from pygments.lexers import _mql_builtins + Number, Punctuation, Literal __all__ = ['VCLLexer', 'VCLSnippetLexer'] -class VCLLexer(CLexer): +class VCLLexer(RegexLexer): """ For Varnish Configuration Language (VCL). @@ -33,7 +30,7 @@ class VCLLexer(CLexer): tokens = { 'time': [ - (r'\d+(s|d|w|h|m)',Number) + (r'\d+(s|d|w|h|m)',Literal.Date) ], 'probe': [ (r'(\s*\.\w+)(\s*=\s*)([^;]*)(;)', @@ -42,11 +39,11 @@ class VCLLexer(CLexer): ], 'backend': [ include('whitespace'), - (r'(\s*)(\.host|\.port)(\s*=\s*)([^;]*)(\s*;)', - bygroups(Punctuation, Name.Attribute, Operator, using(this), Punctuation)), - (r'(\s*\.probe)(\s*=\s*)(\w+)(;)', + (r'(\.host|\.port)(\s*=\s*)([^;]*)(\s*;)', + bygroups(Name.Attribute, Operator, using(this), Punctuation)), + (r'(\.probe)(\s*=\s*)(\w+)(;)', bygroups(Name.Attribute,Operator,Name.Variable.Global,Punctuation)), - (r'(\s*\.probe)(\s*=\s*)({)', + (r'(\.probe)(\s*=\s*)({)', bygroups(Name.Attribute,Operator,Punctuation),'probe'), (r'{',Punctuation,'#push'), (r'}',Punctuation,'#pop') @@ -54,16 +51,16 @@ class VCLLexer(CLexer): 'statements': [ include('time'), (r'[~!%^&*+=|?:<>/-]', Operator), - (r'\s*(hash_data)(\()(.+)(\)\s*;\s*$)', + (r'(hash_data)(\()(.+)(\)\s*;\s*$)', bygroups(Keyword, Punctuation, using(this), Punctuation)), - (r'(\s*set\s)([^\s]+)(\s*=\s*)(.+)(\s*;\s*)($|#.*$|//.*$|/\*.*$)', + (r'(set\s)([^\s]+)(\s*=\s*)(.+)(\s*;\s*)($|#.*$|//.*$|/\*.*$)', bygroups(Keyword, Name.Variable, Punctuation, using(this), Punctuation, using(this))), - (r'(\s*unset\s)(\s*[^\s]+)(\s*;)', + (r'(unset\s)(\s*[^\s]+)(\s*;)', bygroups(Keyword, Name.Variable, Punctuation)), - (r'(\s*regsub\s*)(\()(.*)(,)(.*)(,)(.*)(\))', + (r'(regsub\s*)(\()(.*)(,)(.*)(,)(.*)(\))', bygroups(Keyword, Punctuation, using(this), Punctuation, using(this), Punctuation, using(this), Punctuation)), - (r'(\s*regsuball\s*)(\()(.*)(,)(.*)(,)(.*)(\))', + (r'(regsuball\s*)(\()(.*)(,)(.*)(,)(.*)(\))', bygroups(Keyword, Punctuation, using(this), Punctuation, using(this), Punctuation, using(this), Punctuation)), (r'(import\s)(\w+)(;\s*)$', @@ -75,16 +72,16 @@ class VCLLexer(CLexer): (words(('if','else','elsif','synth', 'synthetic'), suffix=r'\b'),Keyword), (words(('true','false')),Name.Builtin), - (r'(\s*call \s*)([^\s;]+)(;)', + (r'(call \s*)([^\s;]+)(;)', bygroups(Keyword,Name.Variable.Global,Punctuation)), - (r'obj.ttl',Name.Variable), + (r'obj\.ttl',Name.Variable), (r'(req|bereq|obj|resp|beresp)\.http\.[^\s]+',Name.Variable), (r'(req|bereq)\.(url|method|xid)',Name.Variable), (r'(resp|beresp|obj)\.(status|reason)',Name.Variable), (r'(beresp|obj)\.(ttl|grace)',Name.Variable), - (r'(backend )(\w*)(\s*{)', + (r'(backend)(\s+\w+)(\s*{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), - (r'(\s*probe )(\s*\w+\s)({)', + (r'(probe )(\s*\w+\s)({)', bygroups(Keyword,Name.Variable.Global,Punctuation),'probe'), (r'[();]', Punctuation), (r'(client|server)\.(ip|identity)',Name.Variable), @@ -100,7 +97,7 @@ class VCLLexer(CLexer): (r'\s*\}\s*',Punctuation,'#pop') ], 'comment': [ - (r'[^*/]', Comment.Multiline), + (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline) @@ -140,7 +137,7 @@ class VCLLexer(CLexer): include('whitespace'), include('comments'), include('returns'), - (r'(sub )(\w+)(\s*{)', + (r'(sub\s+)([a-zA-Z]\w*)(\s*{)', bygroups(Keyword, Name.Function, Punctuation),'sub'), include('statements'), (r'\s+', Text) -- cgit v1.2.1 From 7322aef63ac2f20339f3747ea90dd24be62c69c2 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Mon, 8 Feb 2016 14:17:46 +0100 Subject: Varnish VCL: Use character class instead for time --- pygments/lexers/varnish.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 74b629ff..431e1494 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -30,7 +30,7 @@ class VCLLexer(RegexLexer): tokens = { 'time': [ - (r'\d+(s|d|w|h|m)',Literal.Date) + (r'\d+[sdwhm]',Literal.Date) ], 'probe': [ (r'(\s*\.\w+)(\s*=\s*)([^;]*)(;)', -- cgit v1.2.1 From 0497d3d825c7f144c50e8f3566dc19e01618bcc9 Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Mon, 8 Feb 2016 15:22:51 +0200 Subject: Parentheses are now lexed as part of the delimiter in C++ raw strings. --- pygments/lexers/c_cpp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index 7f061539..2f77158b 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -218,9 +218,9 @@ class CppLexer(CFamilyLexer): (r'char(16_t|32_t)\b', Keyword.Type), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), # C++11 raw strings - (r'(R)(")([^\\()\s]{,16})(\((?:.|\n)*?\))(\3)(")', - bygroups(String.Affix, String, String.Delimiter, String, - String.Delimiter, String)), + (r'(R)(")([^\\()\s]{,16})(\()((?:.|\n)*?)(\)\3)(")', + bygroups(String.Affix, String, String.Delimiter, String.Delimiter, + String, String.Delimiter, String)), # C++11 UTF-8/16/32 strings (r'(u8|u|U)(")', bygroups(String.Affix, String), 'string'), inherit, -- cgit v1.2.1 From be1c4414d2f5e91d38696774edd88b1c861c2012 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Mon, 8 Feb 2016 14:31:33 +0100 Subject: Varnish: Tweak strings {" Multi- line string"} "But this doesn't have any \escape" --- pygments/lexers/varnish.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 431e1494..09d93213 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -109,12 +109,11 @@ class VCLLexer(RegexLexer): ], 'string': [ (r'"', String, '#pop'), - (r'[^\\"\n]+', String), # all other characters + (r'[^"\n]+', String), # all other characters ], 'multistring': [ (r'[^"}]', String), - (r'{"', String, '#push'), (r'"}', String, '#pop'), (r'["}]', String) ], -- cgit v1.2.1 From 93270e27e8c4597a33103ad851ec72728c88b20c Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Mon, 8 Feb 2016 18:58:13 +0200 Subject: String prefixes and dollar-quoted string delimiters lexing in Postgresql. Also closes issue #886. --- pygments/lexers/sql.py | 35 ++++++++++++++++++++++++---------- tests/examplefiles/postgresql_test.txt | 34 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/pygments/lexers/sql.py b/pygments/lexers/sql.py index 05503c3a..948d876a 100644 --- a/pygments/lexers/sql.py +++ b/pygments/lexers/sql.py @@ -57,11 +57,14 @@ line_re = re.compile('.*?\n') language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE) +do_re = re.compile(r'\bDO\b', re.IGNORECASE) + def language_callback(lexer, match): """Parse the content of a $-string using a lexer - The lexer is chosen looking for a nearby LANGUAGE. + The lexer is chosen looking for a nearby LANGUAGE or assumed as + plpgsql if inside a DO statement and no LANGUAGE has been found. """ l = None m = language_re.match(lexer.text[match.end():match.end()+100]) @@ -72,15 +75,26 @@ def language_callback(lexer, match): lexer.text[max(0, match.start()-100):match.start()])) if m: l = lexer._get_lexer(m[-1].group(1)) - + else: + m = list(do_re.finditer( + lexer.text[max(0, match.start()-25):match.start()])) + if m: + l = lexer._get_lexer('plpgsql') + + # 1 = $, 2 = delimiter, 3 = $ + yield (match.start(1), String, match.group(1)) + yield (match.start(2), String.Delimiter, match.group(2)) + yield (match.start(3), String, match.group(3)) + # 4 = string contents if l: - yield (match.start(1), String, match.group(1)) - for x in l.get_tokens_unprocessed(match.group(2)): + for x in l.get_tokens_unprocessed(match.group(4)): yield x - yield (match.start(3), String, match.group(3)) - else: - yield (match.start(), String, match.group()) + yield (match.start(4), String, match.group(4)) + # 5 = $, 6 = delimiter, 7 = $ + yield (match.start(5), String, match.group(5)) + yield (match.start(6), String.Delimiter, match.group(6)) + yield (match.start(7), String, match.group(7)) class PostgresBase(object): @@ -148,9 +162,10 @@ class PostgresLexer(PostgresBase, RegexLexer): (r'\$\d+', Name.Variable), (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float), (r'[0-9]+', Number.Integer), - (r"(E|U&)?'(''|[^'])*'", String.Single), - (r'(U&)?"(""|[^"])*"', String.Name), # quoted identifier - (r'(?s)(\$[^$]*\$)(.*?)(\1)', language_callback), + (r"((?:E|U&)?)('(?:''|[^'])*')", bygroups(String.Affix, String.Single)), + # quoted identifier + (r'((?:U&)?)("(?:""|[^"])*")', bygroups(String.Affix, String.Name)), + (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback), (r'[a-z_]\w*', Name), # psql variable in SQL diff --git a/tests/examplefiles/postgresql_test.txt b/tests/examplefiles/postgresql_test.txt index 190d184f..28db5ee3 100644 --- a/tests/examplefiles/postgresql_test.txt +++ b/tests/examplefiles/postgresql_test.txt @@ -45,3 +45,37 @@ $$; SELECT U&'\0441\043B\043E\043D' FROM U&"\0441\043B\043E\043D"; +-- Escapes +SELECT E'1\n2\n3'; + +-- DO example from postgresql documentation +/* + * PostgreSQL is Copyright © 1996-2016 by the PostgreSQL Global Development Group. + * + * Postgres95 is Copyright © 1994-5 by the Regents of the University of California. + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose, without fee, and without a written agreement + * is hereby granted, provided that the above copyright notice and this paragraph + * and the following two paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, + * EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS-IS" BASIS, + * AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ +DO $$DECLARE r record; +BEGIN + FOR r IN SELECT table_schema, table_name FROM information_schema.tables + WHERE table_type = 'VIEW' AND table_schema = 'public' + LOOP + EXECUTE 'GRANT ALL ON ' || quote_ident(r.table_schema) || '.' || quote_ident(r.table_name) || ' TO webuser'; + END LOOP; +END$$; -- cgit v1.2.1 From b05b2487cb08ff23d71c3a3365a7e3ff8d020212 Mon Sep 17 00:00:00 2001 From: Miikka Salminen Date: Mon, 8 Feb 2016 19:08:36 +0200 Subject: Add the additions to CHANGES. --- CHANGES | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index ced0ffb0..e05e20ac 100644 --- a/CHANGES +++ b/CHANGES @@ -21,11 +21,11 @@ Version 2.2 - Added new token types and lexing for magic methods and variables in Python and PHP. -- Added a new token type for string affixes and lexing for them in Python and - C++ lexers. +- Added a new token type for string affixes and lexing for them in Python, C++ + and Postgresql lexers. - Added a new token type for heredoc (and similar) string delimiters and - lexing for them in C++, Perl, PHP and Ruby lexers. + lexing for them in C++, Perl, PHP, Postgresql and Ruby lexers. Version 2.1.1 -- cgit v1.2.1 From e7647cf59896cea248079f89e53b3dc089247218 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Mon, 8 Feb 2016 19:05:25 +0100 Subject: Varnish Lexer: Improve accuracy --- pygments/lexers/varnish.py | 45 ++++++++++++++++++++++++------------------ tests/examplefiles/varnish.vcl | 14 +++++++++++++ 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 09d93213..e8ee716b 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -29,14 +29,17 @@ class VCLLexer(RegexLexer): mimetypes = ['text/x-vclsrc'] tokens = { - 'time': [ - (r'\d+[sdwhm]',Literal.Date) - ], 'probe': [ (r'(\s*\.\w+)(\s*=\s*)([^;]*)(;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'\s*}', Punctuation, '#pop') ], + 'acl': [ + include('whitespace'), + (r'(\.\w+)(\s*=\s*)([^;]*)(;)', + bygroups(Name.Attribute, Operator, using(this), Punctuation)), + (r'}', Punctuation, '#pop') + ], 'backend': [ include('whitespace'), (r'(\.host|\.port)(\s*=\s*)([^;]*)(\s*;)', @@ -49,7 +52,7 @@ class VCLLexer(RegexLexer): (r'}',Punctuation,'#pop') ], 'statements': [ - include('time'), + (r'\d+[sdwhm]',Literal.Date), (r'[~!%^&*+=|?:<>/-]', Operator), (r'(hash_data)(\()(.+)(\)\s*;\s*$)', bygroups(Keyword, Punctuation, using(this), Punctuation)), @@ -64,7 +67,7 @@ class VCLLexer(RegexLexer): bygroups(Keyword, Punctuation, using(this), Punctuation, using(this), Punctuation, using(this), Punctuation)), (r'(import\s)(\w+)(;\s*)$', - bygroups(Keyword, Name.Variable.Global, Punctuation)), + bygroups(Keyword.Namespace, Name.Variable.Global, Punctuation)), (words(('vcl_recv','vcl_pipe','vcl_pass','vcl_hash','vcl_purge', 'vcl_hit','vcl_miss','vcl_deliver','vcl_synth','vcl_backend_fetch', 'vcl_backend_response','vcl_backend_error','vcl_init','vcl_fini'), @@ -75,26 +78,30 @@ class VCLLexer(RegexLexer): (r'(call \s*)([^\s;]+)(;)', bygroups(Keyword,Name.Variable.Global,Punctuation)), (r'obj\.ttl',Name.Variable), - (r'(req|bereq|obj|resp|beresp)\.http\.[^\s]+',Name.Variable), - (r'(req|bereq)\.(url|method|xid)',Name.Variable), - (r'(resp|beresp|obj)\.(status|reason)',Name.Variable), - (r'(beresp|obj)\.(ttl|grace)',Name.Variable), + (r'(req|bereq|obj|resp|beresp)\.http\.[^\s]+\b',Name.Variable), + (r'(req|bereq)\.(url|method|xid)\b',Name.Variable), + (r'(resp|beresp|obj)\.(status|reason)\b',Name.Variable), + (r'(beresp|obj)\.(ttl|grace)\b',Name.Variable), (r'(backend)(\s+\w+)(\s*{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), - (r'(probe )(\s*\w+\s)({)', + (r'(probe\s)(\s*\w+\s)({)', bygroups(Keyword,Name.Variable.Global,Punctuation),'probe'), + (r'(acl\s)(\s*\w+\s)({)', + bygroups(Keyword,Name.Variable.Global,Punctuation),'acl'), (r'[();]', Punctuation), - (r'(client|server)\.(ip|identity)',Name.Variable), + (r'(client|server)\.(ip|identity)\b',Name.Variable), (r'^(vcl )(4.0)(;)$', bygroups(Keyword.Reserved,Name.Constant,Punctuation)), + (r'(include\s+)("[^"]+"\s*)(;)', + bygroups(Keyword,String,Punctuation)) ], 'sub': [ include('whitespace'), include('comments'), include('returns'), include('statements'), - (r'\s*\{\s*',Punctuation,'#push'), - (r'\s*\}\s*',Punctuation,'#pop') + (r'{',Punctuation,'#push'), + (r'}',Punctuation,'#pop') ], 'comment': [ (r'[^*/]+', Comment.Multiline), @@ -125,11 +132,11 @@ class VCLLexer(RegexLexer): (r'\\\n', Text) # line continuation ], 'returns': [ - (r'(\s*return )(\()(hash|lookup|ok|deliver|miss|fetch|pass|pipe)(\)\s*;$)', + (r'(return\s)(\()(hash|lookup|ok|deliver|miss|fetch|pass|pipe)(\)\s*;$)', bygroups(Keyword, Punctuation, Name.Constant, Punctuation)), - (r'(\s*return )(\()(\s*synth\s*)(\()(\s*\d+\s*)(,)([^)]+)(\)\s*\)\s*;)', + (r'(return\s)(\()(\s*synth\s*)(\()(\s*\d+\s*)(,)([^)]+)(\)\s*\)\s*;)', bygroups(Keyword, Punctuation, Keyword, Punctuation,Number,Punctuation,using(this),Punctuation)), - (r'(\s*return )(\()(\s*synth\s*)(\()(\s*\d+\s*)(\)\s*\)\s*;)', + (r'(return\s)(\()(\s*synth\s*)(\()(\s*\d+\s*)(\)\s*\)\s*;)', bygroups(Keyword, Punctuation, Keyword, Punctuation,Number,Punctuation)) ], 'root': [ @@ -157,9 +164,9 @@ class VCLSnippetLexer(VCLLexer): (r'\', Name.Variable) ], 'snippetspost': [ - (r'(req|bereq|obj|resp|beresp|client|server)(\.http)?\.\*',Name.Variable), - (r'(req|bereq|obj|resp|beresp|client|server)',Name.Variable), - (r'(backend)', Keyword.Reserved) + (r'(req|bereq|obj|resp|beresp|client|server)(\.http)?\.\*\b',Name.Variable), + (r'(req|bereq|obj|resp|beresp|client|server)\b',Name.Variable), + (r'(backend\b)', Keyword.Reserved) ], 'root': [ include('snippetspre'), diff --git a/tests/examplefiles/varnish.vcl b/tests/examplefiles/varnish.vcl index 826f13e9..c14e1779 100644 --- a/tests/examplefiles/varnish.vcl +++ b/tests/examplefiles/varnish.vcl @@ -8,6 +8,20 @@ vcl 4.0; backend foo { .host = "192.168.1.1"; } probe blatti { .url = "foo"; } +probe fooy { + .url = "beh"; + +} + +acl foo { + "192.168.1.1"; + "192.168.0.0"/24; + ! "192.168.0.1"; +} + +include "foo.vcl"; + +import std; sub vcl_recv { if (req.method == "PRI") { -- cgit v1.2.1 From 2a0288be69cdeaa747a5651297575ff0f3811484 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Mon, 8 Feb 2016 19:42:36 +0100 Subject: Varnihs Lexer: Add various missing tokens The lexing is a bit imprecise right now, but other than that I think the biggest missing parts right now is the inner workings of ACLs and vmods. It might be missing a few variables too. In general, the lexer is in need of some reorganizing to tidy things up. --- pygments/lexers/varnish.py | 23 +++++++++++++++++------ tests/examplefiles/varnish.vcl | 9 +++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index e8ee716b..0162f55f 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -52,8 +52,9 @@ class VCLLexer(RegexLexer): (r'}',Punctuation,'#pop') ], 'statements': [ - (r'\d+[sdwhm]',Literal.Date), - (r'[~!%^&*+=|?:<>/-]', Operator), + (r'\d+[sdwhmy]',Literal.Date), + (r'\d+ms',Literal.Date), + (r'[~!^&*+=|<>/-]', Operator), (r'(hash_data)(\()(.+)(\)\s*;\s*$)', bygroups(Keyword, Punctuation, using(this), Punctuation)), (r'(set\s)([^\s]+)(\s*=\s*)(.+)(\s*;\s*)($|#.*$|//.*$|/\*.*$)', @@ -72,18 +73,27 @@ class VCLLexer(RegexLexer): 'vcl_hit','vcl_miss','vcl_deliver','vcl_synth','vcl_backend_fetch', 'vcl_backend_response','vcl_backend_error','vcl_init','vcl_fini'), suffix=r'\b'),Name.Function), - (words(('if','else','elsif','synth', + (words(('if','else','elsif','elif','synth', 'synthetic'), suffix=r'\b'),Keyword), + (r'(new\s+)(\w+)(\s*=)(.*)(;)', + bygroups(Keyword.Namespace,Name.Variable.Global,Punctuation,Text,Punctuation)), + (r'(rollback\s*)(\(\s*\)\s*;)', + bygroups(Keyword,Punctuation)), + (r'storage\.\w+\.\w+\b', Name.Variable), + (r'(local|remote)\.ip\b', Name.Variable), + (r'now\b', Name.Variable), (words(('true','false')),Name.Builtin), (r'(call \s*)([^\s;]+)(;)', bygroups(Keyword,Name.Variable.Global,Punctuation)), (r'obj\.ttl',Name.Variable), - (r'(req|bereq|obj|resp|beresp)\.http\.[^\s]+\b',Name.Variable), - (r'(req|bereq)\.(url|method|xid)\b',Name.Variable), + (r'(req_top|req|bereq|obj|resp|beresp)\.http\.[^\s]+\b',Name.Variable), + (r'(req_top|req|bereq)\.(url|method|xid)\b',Name.Variable), (r'(resp|beresp|obj)\.(status|reason)\b',Name.Variable), (r'(beresp|obj)\.(ttl|grace)\b',Name.Variable), (r'(backend)(\s+\w+)(\s*{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), + (r'(ban\s*)(\()(.*)(\)\s*;)', + bygroups(Keyword,Punctuation,using(this),Punctuation)), (r'(probe\s)(\s*\w+\s)({)', bygroups(Keyword,Name.Variable.Global,Punctuation),'probe'), (r'(acl\s)(\s*\w+\s)({)', @@ -161,7 +171,8 @@ class VCLSnippetLexer(VCLLexer): tokens = { 'snippetspre': [ (r'\', Name.Variable), - (r'\', Name.Variable) + (r'\', Name.Variable), + (r'\.\.\.+', Comment) ], 'snippetspost': [ (r'(req|bereq|obj|resp|beresp|client|server)(\.http)?\.\*\b',Name.Variable), diff --git a/tests/examplefiles/varnish.vcl b/tests/examplefiles/varnish.vcl index c14e1779..6258c313 100644 --- a/tests/examplefiles/varnish.vcl +++ b/tests/examplefiles/varnish.vcl @@ -23,6 +23,14 @@ include "foo.vcl"; import std; +sub vcl_init { + new b = director.foo(); +} + +sub vcl_recv { + ban(req.url ~ "foo"); + rollback(); +} sub vcl_recv { if (req.method == "PRI") { /* We do not support SPDY or HTTP/2.0 */ @@ -96,6 +104,7 @@ sub vcl_miss { } sub vcl_deliver { + set resp.http.x-storage = storage.s0.free; return (deliver); } -- cgit v1.2.1 From 342fc93725ce00442e835b02218e232b1dd05b1e Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Mon, 8 Feb 2016 13:25:00 -0800 Subject: Handle some `#ansi*` in HtmlFormatter --- doc/docs/styles.rst | 8 ++++++-- pygments/formatters/html.py | 21 +++++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst index 13326129..baf18113 100644 --- a/doc/docs/styles.rst +++ b/doc/docs/styles.rst @@ -182,5 +182,9 @@ foreground color. >>> print(result.encode()) b'print(\x1b[34;01m"\x1b[39m\x1b[34;01mHello World\x1b[39m\x1b[34;01m"\x1b[39m)\n' -Style that use `#ansi*` foreground colors do not currently work with formatters -others than ``Terminal256``. +Style that use `#ansi*` foreground colors might not correctly work with +formatters others than ``Terminal256``. `HtmlFormatter` is capable of handling +some `#ansi*` code and will map to the corresponding HTML/CSS color. That is to +say, `#ansiblue` will be converted to `color:blue` , `#ansired` to `color:red`. +The behavior is undefined for argument like `#ansireset`, `#ansiunderline`, +`#ansibold`... etc. diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index b03a4bd5..38e49f15 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -20,6 +20,23 @@ from pygments.token import Token, Text, STANDARD_TYPES from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ StringIO, string_types, iteritems + +_deansify_map = { + '#darkyellow':'#brown', + '#darkteal': '#turquoise', + '#fusia': '#fushia' +} + + + +def _deansify(color): + if color.startswith('#ansi'): + color = color[5:] + else: + color = '#%s'% color + + return _deansify_map.get(color, color) + try: import ctags except ImportError: @@ -444,7 +461,7 @@ class HtmlFormatter(Formatter): name = self._get_css_class(ttype) style = '' if ndef['color']: - style += 'color: #%s; ' % ndef['color'] + style += 'color: %s; ' % _deansify(ndef['color']) if ndef['bold']: style += 'font-weight: bold; ' if ndef['italic']: @@ -452,7 +469,7 @@ class HtmlFormatter(Formatter): if ndef['underline']: style += 'text-decoration: underline; ' if ndef['bgcolor']: - style += 'background-color: #%s; ' % ndef['bgcolor'] + style += 'background-color: %s; ' % _deansify(ndef['bgcolor']) if ndef['border']: style += 'border: 1px solid #%s; ' % ndef['border'] if style: -- cgit v1.2.1 From bbe2d4e697e326ad231af247c235431acf710efc Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Mon, 8 Feb 2016 13:38:09 -0800 Subject: Remove rogue byte sequence in documentation. --- doc/docs/styles.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst index baf18113..766cf866 100644 --- a/doc/docs/styles.rst +++ b/doc/docs/styles.rst @@ -90,7 +90,7 @@ Here a small overview of all allowed styles: don't render text as bold (to prevent subtokens being highlighted bold) ``italic`` render text italic -``noitalic``\x1b[38;5;124 +``noitalic`` don't render text as italic ``underline`` render text underlined -- cgit v1.2.1 From 42b5a94660d635989248de5c2b93fa608ab2b861 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Tue, 9 Feb 2016 13:26:40 +0100 Subject: Varnish Lexer: Add analyse_text() It looks for documents starting with 'vcl 4.0;', optionally with comments preceding it. As all valid VCL as of Varnish 4.0 starts with this (even Varnish 4.1), this is pretty much guaranteed to work. --- pygments/lexers/varnish.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 0162f55f..d2d81f6d 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -28,6 +28,20 @@ class VCLLexer(RegexLexer): filenames = [ '*.vcl' ] mimetypes = ['text/x-vclsrc'] + def analyse_text(text): + # If the very first line is 'vcl 4.0;' it's pretty much guaranteed + # that this is VCL + if re.search('^vcl 4\.0;\n', text): + return 1.0 + + # Skip over comments and blank lines + # This is accurate enough that returning 0.9 is reasonable. + # Almost no VCL files start without some comments. + if re.search('^((\s+)|(#[^\n]*\n)|(\n)|(\s*//[^\n]*\n)|(/\*[^*/]*\*/))*vcl 4\.0;', text): + return 0.9 + + return 0.0 + tokens = { 'probe': [ (r'(\s*\.\w+)(\s*=\s*)([^;]*)(;)', @@ -100,7 +114,7 @@ class VCLLexer(RegexLexer): bygroups(Keyword,Name.Variable.Global,Punctuation),'acl'), (r'[();]', Punctuation), (r'(client|server)\.(ip|identity)\b',Name.Variable), - (r'^(vcl )(4.0)(;)$', + (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved,Name.Constant,Punctuation)), (r'(include\s+)("[^"]+"\s*)(;)', bygroups(Keyword,String,Punctuation)) -- cgit v1.2.1 From 794fc70785edc35d3c52514a58769337cd3e265a Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Tue, 9 Feb 2016 16:50:26 +0100 Subject: Varnish Lexer: Improve precision based on feedback (and also testing) --- pygments/lexers/varnish.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index d2d81f6d..2c24dc2c 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -186,12 +186,12 @@ class VCLSnippetLexer(VCLLexer): 'snippetspre': [ (r'\', Name.Variable), (r'\', Name.Variable), + (r'(req|bereq|obj|resp|beresp|client|server)(\.http)?\.\*(\b|$)',Name.Variable), (r'\.\.\.+', Comment) ], 'snippetspost': [ - (r'(req|bereq|obj|resp|beresp|client|server)(\.http)?\.\*\b',Name.Variable), - (r'(req|bereq|obj|resp|beresp|client|server)\b',Name.Variable), - (r'(backend\b)', Keyword.Reserved) + (r'(req|bereq|obj|resp|beresp|client|server)(\b|$)',Name.Variable), + (r'(backend)(\b|$)', Keyword.Reserved) ], 'root': [ include('snippetspre'), -- cgit v1.2.1 From 89876fe32eefab317b64e500c7143ad9d82a00c5 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Tue, 9 Feb 2016 18:07:04 +0100 Subject: Varnish Lexer: (Presumably) complete language support This has been tested with all the VCL used in the Varnish Book ( https://github.com/varnish/Varnish-Book/tree/master/vcl) which I consider a very good measure of language support. Unfortunately those VCL files are CC-BY-SA-NC so are not suitable as example files here for legal reasons. --- pygments/lexers/varnish.py | 50 ++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 2c24dc2c..ba1a4280 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -44,24 +44,28 @@ class VCLLexer(RegexLexer): tokens = { 'probe': [ - (r'(\s*\.\w+)(\s*=\s*)([^;]*)(;)', + include('whitespace'), + include('comments'), + (r'(\.\w+)(\s*=\s*)([^;]*)(;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), - (r'\s*}', Punctuation, '#pop') + (r'}', Punctuation, '#pop') ], 'acl': [ include('whitespace'), - (r'(\.\w+)(\s*=\s*)([^;]*)(;)', - bygroups(Name.Attribute, Operator, using(this), Punctuation)), + include('comments'), + (r'[!/]+',Operator), + (r';',Punctuation), + (r'\d+',Number), (r'}', Punctuation, '#pop') ], 'backend': [ include('whitespace'), - (r'(\.host|\.port)(\s*=\s*)([^;]*)(\s*;)', - bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'(\.probe)(\s*=\s*)(\w+)(;)', bygroups(Name.Attribute,Operator,Name.Variable.Global,Punctuation)), (r'(\.probe)(\s*=\s*)({)', bygroups(Name.Attribute,Operator,Punctuation),'probe'), + (r'(\..*\b)(\s*=\s*)([^;]*)(\s*;)', + bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'{',Punctuation,'#push'), (r'}',Punctuation,'#pop') ], @@ -69,28 +73,21 @@ class VCLLexer(RegexLexer): (r'\d+[sdwhmy]',Literal.Date), (r'\d+ms',Literal.Date), (r'[~!^&*+=|<>/-]', Operator), - (r'(hash_data)(\()(.+)(\)\s*;\s*$)', - bygroups(Keyword, Punctuation, using(this), Punctuation)), + (r'[,]', Punctuation), (r'(set\s)([^\s]+)(\s*=\s*)(.+)(\s*;\s*)($|#.*$|//.*$|/\*.*$)', bygroups(Keyword, Name.Variable, Punctuation, using(this), Punctuation, using(this))), (r'(unset\s)(\s*[^\s]+)(\s*;)', bygroups(Keyword, Name.Variable, Punctuation)), - (r'(regsub\s*)(\()(.*)(,)(.*)(,)(.*)(\))', - bygroups(Keyword, Punctuation, using(this), Punctuation, - using(this), Punctuation, using(this), Punctuation)), - (r'(regsuball\s*)(\()(.*)(,)(.*)(,)(.*)(\))', - bygroups(Keyword, Punctuation, using(this), Punctuation, - using(this), Punctuation, using(this), Punctuation)), - (r'(import\s)(\w+)(;\s*)$', + (r'(import\s)(\w+)(;)', bygroups(Keyword.Namespace, Name.Variable.Global, Punctuation)), (words(('vcl_recv','vcl_pipe','vcl_pass','vcl_hash','vcl_purge', 'vcl_hit','vcl_miss','vcl_deliver','vcl_synth','vcl_backend_fetch', 'vcl_backend_response','vcl_backend_error','vcl_init','vcl_fini'), suffix=r'\b'),Name.Function), - (words(('if','else','elsif','elif','synth', - 'synthetic'), suffix=r'\b'),Keyword), + (words(('hash_data','regsub','regsuball','if','else','elsif','elif','synth', + 'synthetic','ban'), suffix=r'\b'),Keyword), (r'(new\s+)(\w+)(\s*=)(.*)(;)', - bygroups(Keyword.Namespace,Name.Variable.Global,Punctuation,Text,Punctuation)), + bygroups(Keyword.Namespace,Name.Variable.Global,Punctuation,using(this),Punctuation)), (r'(rollback\s*)(\(\s*\)\s*;)', bygroups(Keyword,Punctuation)), (r'storage\.\w+\.\w+\b', Name.Variable), @@ -99,15 +96,14 @@ class VCLLexer(RegexLexer): (words(('true','false')),Name.Builtin), (r'(call \s*)([^\s;]+)(;)', bygroups(Keyword,Name.Variable.Global,Punctuation)), - (r'obj\.ttl',Name.Variable), + (r'obj\.(ttl|hits)',Name.Variable), + (r'\d+\b', Number), (r'(req_top|req|bereq|obj|resp|beresp)\.http\.[^\s]+\b',Name.Variable), - (r'(req_top|req|bereq)\.(url|method|xid)\b',Name.Variable), + (r'(req_top|req|bereq)\.(restarts|backend_hint|url|method|xid)\b',Name.Variable), (r'(resp|beresp|obj)\.(status|reason)\b',Name.Variable), (r'(beresp|obj)\.(ttl|grace)\b',Name.Variable), (r'(backend)(\s+\w+)(\s*{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), - (r'(ban\s*)(\()(.*)(\)\s*;)', - bygroups(Keyword,Punctuation,using(this),Punctuation)), (r'(probe\s)(\s*\w+\s)({)', bygroups(Keyword,Name.Variable.Global,Punctuation),'probe'), (r'(acl\s)(\s*\w+\s)({)', @@ -117,7 +113,13 @@ class VCLLexer(RegexLexer): (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved,Name.Constant,Punctuation)), (r'(include\s+)("[^"]+"\s*)(;)', - bygroups(Keyword,String,Punctuation)) + bygroups(Keyword,String,Punctuation)), + ( r'([a-zA-Z_]\w*)' + r'(\.)' + r'([a-zA-Z_]\w*)' + r'(\s*\(.*\))', + bygroups(Name.Function,Punctuation,Name.Function,using(this))), + ('[a-zA-Z_]\w*', Name) ], 'sub': [ include('whitespace'), @@ -156,7 +158,7 @@ class VCLLexer(RegexLexer): (r'\\\n', Text) # line continuation ], 'returns': [ - (r'(return\s)(\()(hash|lookup|ok|deliver|miss|fetch|pass|pipe)(\)\s*;$)', + (r'(return\s)(\()(hash|lookup|ok|deliver|miss|fetch|pass|pipe|purge|retry|restart)(\)\s*;$)', bygroups(Keyword, Punctuation, Name.Constant, Punctuation)), (r'(return\s)(\()(\s*synth\s*)(\()(\s*\d+\s*)(,)([^)]+)(\)\s*\)\s*;)', bygroups(Keyword, Punctuation, Keyword, Punctuation,Number,Punctuation,using(this),Punctuation)), -- cgit v1.2.1 From e94330ce649d5bad08735f2d72c2ca59398eac4c Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Tue, 9 Feb 2016 18:27:01 +0100 Subject: Varnish Lexer: Also support fractions of seconds Of course this was staring me right in the face when I declared the lexer feature complete. --- pygments/lexers/varnish.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index ba1a4280..187a6d9f 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -70,8 +70,8 @@ class VCLLexer(RegexLexer): (r'}',Punctuation,'#pop') ], 'statements': [ - (r'\d+[sdwhmy]',Literal.Date), - (r'\d+ms',Literal.Date), + (r'(\d\.)?\d+[sdwhmy]',Literal.Date), + (r'(\d\.)?\d+ms',Literal.Date), (r'[~!^&*+=|<>/-]', Operator), (r'[,]', Punctuation), (r'(set\s)([^\s]+)(\s*=\s*)(.+)(\s*;\s*)($|#.*$|//.*$|/\*.*$)', -- cgit v1.2.1 From f1d2bc688f608c794a7e28bcc858114104e76df9 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Wed, 10 Feb 2016 00:09:30 +0100 Subject: Varnish lexer: Once again, improve precision --- pygments/lexers/varnish.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 187a6d9f..38ac4387 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -64,7 +64,7 @@ class VCLLexer(RegexLexer): bygroups(Name.Attribute,Operator,Name.Variable.Global,Punctuation)), (r'(\.probe)(\s*=\s*)({)', bygroups(Name.Attribute,Operator,Punctuation),'probe'), - (r'(\..*\b)(\s*=\s*)([^;]*)(\s*;)', + (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), (r'{',Punctuation,'#push'), (r'}',Punctuation,'#pop') @@ -73,9 +73,12 @@ class VCLLexer(RegexLexer): (r'(\d\.)?\d+[sdwhmy]',Literal.Date), (r'(\d\.)?\d+ms',Literal.Date), (r'[~!^&*+=|<>/-]', Operator), - (r'[,]', Punctuation), - (r'(set\s)([^\s]+)(\s*=\s*)(.+)(\s*;\s*)($|#.*$|//.*$|/\*.*$)', - bygroups(Keyword, Name.Variable, Punctuation, using(this), Punctuation, using(this))), + (r'[,]+', Punctuation), + (r'(hash|lookup|ok|deliver|miss|fetch|pass|pipe|purge|retry|restart)\b', Name.Constant), + (r'(synth)(\(.*\))', bygroups(Keyword,using(this))), + (r'(return\s)(\(.*\)\s*;)', bygroups(Keyword, using(this))), + (r'(set\s)([^\s]+)(\s*=\s*)(.+)(\s*;)', + bygroups(Keyword, Name.Variable, Punctuation, using(this), Punctuation)), (r'(unset\s)(\s*[^\s]+)(\s*;)', bygroups(Keyword, Name.Variable, Punctuation)), (r'(import\s)(\w+)(;)', @@ -92,13 +95,12 @@ class VCLLexer(RegexLexer): bygroups(Keyword,Punctuation)), (r'storage\.\w+\.\w+\b', Name.Variable), (r'(local|remote)\.ip\b', Name.Variable), - (r'now\b', Name.Variable), (words(('true','false')),Name.Builtin), (r'(call \s*)([^\s;]+)(;)', bygroups(Keyword,Name.Variable.Global,Punctuation)), (r'obj\.(ttl|hits)',Name.Variable), (r'\d+\b', Number), - (r'(req_top|req|bereq|obj|resp|beresp)\.http\.[^\s]+\b',Name.Variable), + (r'(req_top|req|bereq|obj|resp|beresp)\.http\.\w+\b',Name.Variable), (r'(req_top|req|bereq)\.(restarts|backend_hint|url|method|xid)\b',Name.Variable), (r'(resp|beresp|obj)\.(status|reason)\b',Name.Variable), (r'(beresp|obj)\.(ttl|grace)\b',Name.Variable), @@ -110,6 +112,7 @@ class VCLLexer(RegexLexer): bygroups(Keyword,Name.Variable.Global,Punctuation),'acl'), (r'[();]', Punctuation), (r'(client|server)\.(ip|identity)\b',Name.Variable), + (r'(now|req|req_top|storage|client|server|remote|local|resp|beresp|bereq|obj)\b', Name.Variable), (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved,Name.Constant,Punctuation)), (r'(include\s+)("[^"]+"\s*)(;)', @@ -124,10 +127,9 @@ class VCLLexer(RegexLexer): 'sub': [ include('whitespace'), include('comments'), - include('returns'), include('statements'), (r'{',Punctuation,'#push'), - (r'}',Punctuation,'#pop') + (r'}',Punctuation,'#pop') ], 'comment': [ (r'[^*/]+', Comment.Multiline), @@ -157,18 +159,9 @@ class VCLLexer(RegexLexer): (r'\s+', Text), (r'\\\n', Text) # line continuation ], - 'returns': [ - (r'(return\s)(\()(hash|lookup|ok|deliver|miss|fetch|pass|pipe|purge|retry|restart)(\)\s*;$)', - bygroups(Keyword, Punctuation, Name.Constant, Punctuation)), - (r'(return\s)(\()(\s*synth\s*)(\()(\s*\d+\s*)(,)([^)]+)(\)\s*\)\s*;)', - bygroups(Keyword, Punctuation, Keyword, Punctuation,Number,Punctuation,using(this),Punctuation)), - (r'(return\s)(\()(\s*synth\s*)(\()(\s*\d+\s*)(\)\s*\)\s*;)', - bygroups(Keyword, Punctuation, Keyword, Punctuation,Number,Punctuation)) - ], 'root': [ include('whitespace'), include('comments'), - include('returns'), (r'(sub\s+)([a-zA-Z]\w*)(\s*{)', bygroups(Keyword, Name.Function, Punctuation),'sub'), include('statements'), -- cgit v1.2.1 From 2f524eb51f4be3c2c92e32ab8d3a34975f9beb67 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Thu, 11 Feb 2016 20:25:19 +0100 Subject: Varnish Lexer: Simplify and use machine-generated tokens Reduces complexity, but the list of variables, operators and return statements are largely machine generated from the Varnish source code now, making them somewhat more complete. Still tested on previously mentioned examples and passes with flying colors. --- pygments/lexers/_mapping.py | 2 +- pygments/lexers/varnish.py | 76 +++++++++++++-------------------------------- 2 files changed, 22 insertions(+), 56 deletions(-) diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index b34fcc18..a414a565 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -397,7 +397,7 @@ LEXERS = { 'TypoScriptLexer': ('pygments.lexers.typoscript', 'TypoScript', ('typoscript',), ('*.ts', '*.txt'), ('text/x-typoscript',)), 'UrbiscriptLexer': ('pygments.lexers.urbi', 'UrbiScript', ('urbiscript',), ('*.u',), ('application/x-urbiscript',)), 'VCLLexer': ('pygments.lexers.varnish', 'VCL', ('vcl',), ('*.vcl',), ('text/x-vclsrc',)), - 'VCLSnippetLexer': ('pygments.lexers.varnish', 'VCLSnippets', ('vclsnippets', 'vclsnippet'), ('*.vcl',), ('text/x-vclsnippet',)), + 'VCLSnippetLexer': ('pygments.lexers.varnish', 'VCLSnippets', ('vclsnippets', 'vclsnippet'), (), ('text/x-vclsnippet',)), 'VCTreeStatusLexer': ('pygments.lexers.console', 'VCTreeStatus', ('vctreestatus',), (), ()), 'VGLLexer': ('pygments.lexers.dsls', 'VGL', ('vgl',), ('*.rpf',), ()), 'ValaLexer': ('pygments.lexers.c_like', 'Vala', ('vala', 'vapi'), ('*.vala', '*.vapi'), ('text/x-vala',)), diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 38ac4387..05adf993 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -72,51 +72,24 @@ class VCLLexer(RegexLexer): 'statements': [ (r'(\d\.)?\d+[sdwhmy]',Literal.Date), (r'(\d\.)?\d+ms',Literal.Date), - (r'[~!^&*+=|<>/-]', Operator), + (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function), + (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|miss|fetch|restart)\b', Name.Constant), + (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b', Name.Variable), +(words(( 'obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level', 'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits', 'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid', 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip', 'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now', 'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto', 'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi', 'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout', 'beresp.do_gzip', 'resp.status', 'beresp.do_gunzip', 'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream', 'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status', 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries', 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', 'server.ip', 'bereq.uncacheable','now'),suffix=r'(\b|$)'), Name.Variable), + (r'[!%&+*-,/<.}{>=|~]+', Operator), + (r'[();]', Punctuation), + (r'[,]+', Punctuation), - (r'(hash|lookup|ok|deliver|miss|fetch|pass|pipe|purge|retry|restart)\b', Name.Constant), - (r'(synth)(\(.*\))', bygroups(Keyword,using(this))), - (r'(return\s)(\(.*\)\s*;)', bygroups(Keyword, using(this))), - (r'(set\s)([^\s]+)(\s*=\s*)(.+)(\s*;)', - bygroups(Keyword, Name.Variable, Punctuation, using(this), Punctuation)), - (r'(unset\s)(\s*[^\s]+)(\s*;)', - bygroups(Keyword, Name.Variable, Punctuation)), - (r'(import\s)(\w+)(;)', - bygroups(Keyword.Namespace, Name.Variable.Global, Punctuation)), - (words(('vcl_recv','vcl_pipe','vcl_pass','vcl_hash','vcl_purge', - 'vcl_hit','vcl_miss','vcl_deliver','vcl_synth','vcl_backend_fetch', - 'vcl_backend_response','vcl_backend_error','vcl_init','vcl_fini'), - suffix=r'\b'),Name.Function), - (words(('hash_data','regsub','regsuball','if','else','elsif','elif','synth', - 'synthetic','ban'), suffix=r'\b'),Keyword), - (r'(new\s+)(\w+)(\s*=)(.*)(;)', - bygroups(Keyword.Namespace,Name.Variable.Global,Punctuation,using(this),Punctuation)), - (r'(rollback\s*)(\(\s*\)\s*;)', - bygroups(Keyword,Punctuation)), + (words(('include','hash_data','regsub','regsuball','if','else','elsif','elif','synth', 'synthetic','ban','synth','return','set','unset','import','include','new','rollback','call'), suffix=r'\b'),Keyword), (r'storage\.\w+\.\w+\b', Name.Variable), - (r'(local|remote)\.ip\b', Name.Variable), (words(('true','false')),Name.Builtin), - (r'(call \s*)([^\s;]+)(;)', - bygroups(Keyword,Name.Variable.Global,Punctuation)), - (r'obj\.(ttl|hits)',Name.Variable), (r'\d+\b', Number), - (r'(req_top|req|bereq|obj|resp|beresp)\.http\.\w+\b',Name.Variable), - (r'(req_top|req|bereq)\.(restarts|backend_hint|url|method|xid)\b',Name.Variable), - (r'(resp|beresp|obj)\.(status|reason)\b',Name.Variable), - (r'(beresp|obj)\.(ttl|grace)\b',Name.Variable), - (r'(backend)(\s+\w+)(\s*{)', - bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), - (r'(probe\s)(\s*\w+\s)({)', - bygroups(Keyword,Name.Variable.Global,Punctuation),'probe'), - (r'(acl\s)(\s*\w+\s)({)', - bygroups(Keyword,Name.Variable.Global,Punctuation),'acl'), - (r'[();]', Punctuation), - (r'(client|server)\.(ip|identity)\b',Name.Variable), - (r'(now|req|req_top|storage|client|server|remote|local|resp|beresp|bereq|obj)\b', Name.Variable), - (r'(vcl )(4.0)(;)$', - bygroups(Keyword.Reserved,Name.Constant,Punctuation)), - (r'(include\s+)("[^"]+"\s*)(;)', - bygroups(Keyword,String,Punctuation)), + (r'(backend)(\s+\w+)(\s*{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), + (r'(probe\s)(\s*\w+\s)({)', bygroups(Keyword,Name.Variable.Global,Punctuation),'probe'), + (r'(acl\s)(\s*\w+\s)({)', bygroups(Keyword,Name.Variable.Global,Punctuation),'acl'), + (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved,Name.Constant,Punctuation)), + (r'(sub\s+)([a-zA-Z]\w*)(\s*{)', + bygroups(Keyword, Name.Function, Punctuation)), ( r'([a-zA-Z_]\w*)' r'(\.)' r'([a-zA-Z_]\w*)' @@ -124,13 +97,6 @@ class VCLLexer(RegexLexer): bygroups(Name.Function,Punctuation,Name.Function,using(this))), ('[a-zA-Z_]\w*', Name) ], - 'sub': [ - include('whitespace'), - include('comments'), - include('statements'), - (r'{',Punctuation,'#push'), - (r'}',Punctuation,'#pop') - ], 'comment': [ (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), @@ -162,8 +128,6 @@ class VCLLexer(RegexLexer): 'root': [ include('whitespace'), include('comments'), - (r'(sub\s+)([a-zA-Z]\w*)(\s*{)', - bygroups(Keyword, Name.Function, Punctuation),'sub'), include('statements'), (r'\s+', Text) ], @@ -177,15 +141,17 @@ class VCLSnippetLexer(VCLLexer): name = 'VCLSnippets' aliases = ['vclsnippets', 'vclsnippet'] mimetypes = ['text/x-vclsnippet'] + filenames = [ ] + + def analyse_text(text): + return 0.0 + tokens = { 'snippetspre': [ - (r'\', Name.Variable), - (r'\', Name.Variable), - (r'(req|bereq|obj|resp|beresp|client|server)(\.http)?\.\*(\b|$)',Name.Variable), - (r'\.\.\.+', Comment) + (r'\.\.\.+', Comment), + (r'(bereq|req|req_top|resp|beresp|obj|client|server|local|remote|storage)($|\.\*)',Name.Variable) ], 'snippetspost': [ - (r'(req|bereq|obj|resp|beresp|client|server)(\b|$)',Name.Variable), (r'(backend)(\b|$)', Keyword.Reserved) ], 'root': [ -- cgit v1.2.1 From 9834d006ab8c196189a788ead199e0e1542da7b7 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Fri, 12 Feb 2016 14:30:03 -0800 Subject: Update to leave compatibility for other formatters. --- doc/docs/styles.rst | 48 +++++++++++++++++++++++------------- pygments/formatters/html.py | 21 ++-------------- pygments/formatters/terminal256.py | 22 ++++++++++++++--- pygments/style.py | 50 ++++++++++++++++++++++++++++++++++---- 4 files changed, 96 insertions(+), 45 deletions(-) diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst index 766cf866..0076d062 100644 --- a/doc/docs/styles.rst +++ b/doc/docs/styles.rst @@ -152,16 +152,19 @@ Terminal Styles .. versionadded:: 2.2 -Custom styles used with `Terminal256` formatter can also defines foreground -colors using ansi-color. to do so use the `#ansigreen`, `#ansired` or any other -colors defined in ``pygments.style.ansilist``. Foreground ANSI colors will be -mapped to the corresponding `escape codes 30 to 37 +Custom styles used with `Terminal256` formatter can also defines colors using +ansi-color. To do so use the `#ansigreen`, `#ansired` or any other colors +defined in ``pygments.style.ansilist``. Foreground ANSI colors will be mapped +to the corresponding `escape codes 30 to 37 `_ thus respecting any -custom color mapping and themes provided by many terminal emulators. +custom color mapping and themes provided by many terminal emulators. Light +variant are treated for foreground color with and extra bold flag. +`bg:#ansi` will also be respected, except the light variant will be the +same shade as their light variant. See following example where the color of the string `"hello world"` is governed -by the escape sequence `\x1b34;01m` (Ansi Blue) instead of an extended -foreground color. +by the escape sequence `\x1b34;01m` (Ansi Blue, Bold, `41` beeing red background) +instead of an extended foreground & background color. .. sourcecode:: pycon @@ -172,19 +175,30 @@ foreground color. >>> from pygments.formatters import Terminal256Formatter >>> class MyStyle(Style): - >>> - >>> styles = { - >>> Token.String: '#ansiblue', - >>> } + styles = { + Token.String: '#ansiblue bg:#ansired', + } >>> code = 'print("Hello World")' >>> result = highlight(code, Python3Lexer(), Terminal256Formatter(style=MyStyle)) >>> print(result.encode()) - b'print(\x1b[34;01m"\x1b[39m\x1b[34;01mHello World\x1b[39m\x1b[34;01m"\x1b[39m)\n' + b'print(\x1b[34;41;01m"\x1b[39;49;00m\x1b[34;41;01mHello World\x1b[39;49;00m\x1b[34;41;01m"\x1b[39;49;00m)\n' -Style that use `#ansi*` foreground colors might not correctly work with +Style that use `#ansi*` colors might not correctly work with formatters others than ``Terminal256``. `HtmlFormatter` is capable of handling -some `#ansi*` code and will map to the corresponding HTML/CSS color. That is to -say, `#ansiblue` will be converted to `color:blue` , `#ansired` to `color:red`. -The behavior is undefined for argument like `#ansireset`, `#ansiunderline`, -`#ansibold`... etc. +some `#ansi*` code and will map to a fixed HTML/CSS color. For example, +`#ansiblue` will be converted to `color:#0000ff` , `#ansired` to `color:#ff0000`. + +By definition of Ansi color the following color are considered "light" colors, +and will be rendered by most terminal as bold: + + - "darkgray", "red", "green", "yellow", "blue", "fuchsia", "turquoise", + "white" + + +The following are considered "dark" color and will be rendered as non-bold: + + - "black", "darkred", "darkgreen", "brown", "darkblue", "purple", "teal", + "lightgray" + +Exact behavior might depends on the terminal emulator you are using, and its settings. diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py index 38e49f15..b03a4bd5 100644 --- a/pygments/formatters/html.py +++ b/pygments/formatters/html.py @@ -20,23 +20,6 @@ from pygments.token import Token, Text, STANDARD_TYPES from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \ StringIO, string_types, iteritems - -_deansify_map = { - '#darkyellow':'#brown', - '#darkteal': '#turquoise', - '#fusia': '#fushia' -} - - - -def _deansify(color): - if color.startswith('#ansi'): - color = color[5:] - else: - color = '#%s'% color - - return _deansify_map.get(color, color) - try: import ctags except ImportError: @@ -461,7 +444,7 @@ class HtmlFormatter(Formatter): name = self._get_css_class(ttype) style = '' if ndef['color']: - style += 'color: %s; ' % _deansify(ndef['color']) + style += 'color: #%s; ' % ndef['color'] if ndef['bold']: style += 'font-weight: bold; ' if ndef['italic']: @@ -469,7 +452,7 @@ class HtmlFormatter(Formatter): if ndef['underline']: style += 'text-decoration: underline; ' if ndef['bgcolor']: - style += 'background-color: %s; ' % _deansify(ndef['bgcolor']) + style += 'background-color: #%s; ' % ndef['bgcolor'] if ndef['border']: style += 'border: 1px solid #%s; ' % ndef['border'] if style: diff --git a/pygments/formatters/terminal256.py b/pygments/formatters/terminal256.py index 913536c4..1aa19f25 100644 --- a/pygments/formatters/terminal256.py +++ b/pygments/formatters/terminal256.py @@ -50,11 +50,20 @@ class EscapeSequence: attrs = [] if self.fg is not None: if self.fg in ansilist: - attrs.append(codes[self.fg[5:]][2:-1]) + esc = codes[self.fg[5:]] + if ';01m' in esc: + self.bold = True + # extract fg color code. + attrs.append(esc[2:4]) else : attrs.extend(("38", "5", "%i" % self.fg)) if self.bg is not None: - attrs.extend(("48", "5", "%i" % self.bg)) + if self.bg in ansilist: + esc = codes[self.bg[5:]] + # extract fg color code, add 10 for bg. + attrs.append(str(int(esc[2:4])+10)) + else : + attrs.extend(("48", "5", "%i" % self.bg)) if self.bold: attrs.append("01") if self.underline: @@ -201,9 +210,14 @@ class Terminal256Formatter(Formatter): def _setup_styles(self): for ttype, ndef in self.style: escape = EscapeSequence() - if ndef['color']: + # get foreground from ansicolor if set + if ndef['ansicolor']: + escape.fg = self._color_index(ndef['ansicolor']) + elif ndef['color']: escape.fg = self._color_index(ndef['color']) - if ndef['bgcolor']: + if ndef['bgansicolor']: + escape.bg = self._color_index(ndef['bgansicolor']) + elif ndef['bgcolor']: escape.bg = self._color_index(ndef['bgcolor']) if self.usebold and ndef['bold']: escape.bold = True diff --git a/pygments/style.py b/pygments/style.py index 7a272b53..bc318354 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -11,11 +11,31 @@ from pygments.token import Token, STANDARD_TYPES from pygments.util import add_metaclass -from pygments.console import codes -ansilist = ['#ansi'+x for x in codes.keys() if x] +_ansimap = { + ## + '#ansiblack': '000000', + '#ansidarkred': '7f0000', + '#ansidarkgreen': '007f00', + '#ansibrown': '7f7fe0', + '#ansidarkblue': '00007f', + '#ansipurple': '7f007f', + '#ansiteal': '007f7f', + '#ansilightgray': 'e5e5e5', + ### normal + '#ansidarkgray': '555555', + '#ansired': 'ff0000', + '#ansigreen': '00ff00', + '#ansiyellow': 'ffff00', + '#ansiblue': '0000ff', + '#ansifuchsia': 'ff00ff', + '#ansiturquoise': '00ffff', + '#ansiwhite': 'ffffff', + } +ansilist = list(_ansimap.keys()) + class StyleMeta(type): def __new__(mcs, name, bases, dct): @@ -35,7 +55,13 @@ class StyleMeta(type): return col[0]*2 + col[1]*2 + col[2]*2 elif text == '': return '' - assert False, "wrong color format %r" % text + didyoumean = '' + if 'ansi' in text: + import difflib + possibility = difflib.get_close_matches(text, ansilist, 1) + if possibility: + didyoumean = '. Did you mean {} ?'.format(possibility[0]) + assert False, "wrong color format %r%s" % (text, didyoumean) _styles = obj._styles = {} @@ -84,16 +110,30 @@ class StyleMeta(type): def style_for_token(cls, token): t = cls._styles[token] + ansicolor = None + color = t[0] + if color.startswith('#ansi'): + ansicolor = color + color = _ansimap[color] + bgansicolor = None + bgcolor = t[4] + if bgcolor.startswith('#ansi'): + bgansicolor = bgcolor + bgcolor = _ansimap[bgcolor] + return { - 'color': t[0] or None, + 'color': color or None, 'bold': bool(t[1]), 'italic': bool(t[2]), 'underline': bool(t[3]), - 'bgcolor': t[4] or None, + 'bgcolor': bgcolor or None, 'border': t[5] or None, 'roman': bool(t[6]) or None, 'sans': bool(t[7]) or None, 'mono': bool(t[8]) or None, + 'ansicolor': ansicolor, + 'bgansicolor': bgansicolor, + } def list_styles(cls): -- cgit v1.2.1 From 4b70ceead784566eff81456239e3aee99d9b994d Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Fri, 12 Feb 2016 16:06:14 -0800 Subject: Add tests for Ansi functionality. --- tests/test_lexers_other.py | 21 +++++++------- tests/test_terminal_formatter.py | 61 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 11 deletions(-) diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py index bb667c05..d3feaefc 100644 --- a/tests/test_lexers_other.py +++ b/tests/test_lexers_other.py @@ -9,6 +9,7 @@ import glob import os import unittest +import sys from pygments.lexers import guess_lexer from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer @@ -42,16 +43,16 @@ class AnalyseTextTest(unittest.TestCase): for lexerToTest in LEXERS_TO_TEST: self._testCanRecognizeAndGuessExampleFiles(lexerToTest) - -class EasyTrieveLexerTest(unittest.TestCase): - def testCanGuessFromText(self): - self.assertLess(0, EasytrieveLexer.analyse_text('MACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text( - '*\n *\n\n \n*\n MACRO')) +if sys.version_info > (2,7,): + class EasyTrieveLexerTest(unittest.TestCase): + def testCanGuessFromText(self): + self.assertLess(0, EasytrieveLexer.analyse_text('MACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text( + '*\n *\n\n \n*\n MACRO')) class RexxLexerTest(unittest.TestCase): diff --git a/tests/test_terminal_formatter.py b/tests/test_terminal_formatter.py index 07337cd5..f3836bd1 100644 --- a/tests/test_terminal_formatter.py +++ b/tests/test_terminal_formatter.py @@ -10,11 +10,17 @@ from __future__ import print_function import unittest +import sys import re from pygments.util import StringIO from pygments.lexers.sql import PlPgsqlLexer -from pygments.formatters import TerminalFormatter +from pygments.formatters import TerminalFormatter,Terminal256Formatter, HtmlFormatter, LatexFormatter + +from pygments.style import Style +from pygments.token import Token +from pygments.lexers import Python3Lexer +from pygments import highlight DEMO_TEXT = '''\ -- comment @@ -49,3 +55,56 @@ class TerminalFormatterTest(unittest.TestCase): for a, b in zip(DEMO_TEXT.splitlines(), plain.splitlines()): self.assertTrue(a in b) + + + + + + +class MyStyle(Style): + + styles = { + Token.Comment: '#ansidarkgray', + Token.String: '#ansiblue bg:#ansidarkred', + Token.Number : '#ansigreen bg:#ansidarkgreen', + Token.Number.Hex: '#ansidarkgreen bg:#ansired', + } + + + +code = ''' +# this should be a comment +print("Hello World") +async def function(a,b,c, *d, **kwarg:Bool)->Bool: + pass + return 123, 0xb3e3 + +''' + + +termtest = lambda x: highlight(x, Python3Lexer(), Terminal256Formatter(style=MyStyle)) +if sys.version_info > (2,7): + class Terminal256FormatterTest(unittest.TestCase): + + + def test_style_html(self): + style = HtmlFormatter(style=MyStyle).get_style_defs() + self.assertIn('#555555',style, "ansigray for comment not html css style") + + def test_tex_works(self): + """check tex Formatter don't crash""" + highlight(code, Python3Lexer(), LatexFormatter(style=MyStyle)) + + def test_html_works(self): + highlight(code, Python3Lexer(), HtmlFormatter(style=MyStyle)) + + def test_256esc_seq(self): + """ + test that a few escape sequences are actualy used when using #ansi<> color codes + """ + self.assertIn('32;41',termtest('0x123')) + self.assertIn('32;42',termtest('123')) + self.assertIn('30;01',termtest('#comment')) + self.assertIn('34;41',termtest('"String"')) + + -- cgit v1.2.1 From 1b19c19d8563ffd766feefea2e2fdccc81ee929f Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 14 Feb 2016 15:02:14 +0100 Subject: Varnish: style nits, make analyse_text faster --- pygments/lexers/varnish.py | 138 ++++++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 57 deletions(-) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 05adf993..90d8d292 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -1,162 +1,186 @@ # -*- coding: utf-8 -*- """ pygments.lexers.varnish - ~~~~~~~~~~~~~~~~~~~~~~ + ~~~~~~~~~~~~~~~~~~~~~~~ Lexers for Varnish configuration - :copyright: Copyright 2016 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -import re - -from pygments.lexer import RegexLexer, include, bygroups, using, this, inherit, words, \ - default +from pygments.lexer import RegexLexer, include, bygroups, using, this, \ + inherit, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Literal __all__ = ['VCLLexer', 'VCLSnippetLexer'] + class VCLLexer(RegexLexer): """ For Varnish Configuration Language (VCL). + .. versionadded:: 2.2 """ name = 'VCL' aliases = ['vcl'] - filenames = [ '*.vcl' ] + filenames = ['*.vcl'] mimetypes = ['text/x-vclsrc'] def analyse_text(text): # If the very first line is 'vcl 4.0;' it's pretty much guaranteed # that this is VCL - if re.search('^vcl 4\.0;\n', text): + if text.startswith('vcl 4.0;'): return 1.0 - # Skip over comments and blank lines # This is accurate enough that returning 0.9 is reasonable. # Almost no VCL files start without some comments. - if re.search('^((\s+)|(#[^\n]*\n)|(\n)|(\s*//[^\n]*\n)|(/\*[^*/]*\*/))*vcl 4\.0;', text): + elif '\nvcl 4\.0;' in text[:1000]: return 0.9 - return 0.0 - tokens = { 'probe': [ include('whitespace'), include('comments'), (r'(\.\w+)(\s*=\s*)([^;]*)(;)', - bygroups(Name.Attribute, Operator, using(this), Punctuation)), - (r'}', Punctuation, '#pop') + bygroups(Name.Attribute, Operator, using(this), Punctuation)), + (r'}', Punctuation, '#pop'), ], 'acl': [ include('whitespace'), include('comments'), - (r'[!/]+',Operator), - (r';',Punctuation), - (r'\d+',Number), - (r'}', Punctuation, '#pop') + (r'[!/]+', Operator), + (r';', Punctuation), + (r'\d+', Number), + (r'}', Punctuation, '#pop'), ], 'backend': [ include('whitespace'), (r'(\.probe)(\s*=\s*)(\w+)(;)', - bygroups(Name.Attribute,Operator,Name.Variable.Global,Punctuation)), + bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)), (r'(\.probe)(\s*=\s*)({)', - bygroups(Name.Attribute,Operator,Punctuation),'probe'), + bygroups(Name.Attribute, Operator, Punctuation), 'probe'), (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)', - bygroups(Name.Attribute, Operator, using(this), Punctuation)), - (r'{',Punctuation,'#push'), - (r'}',Punctuation,'#pop') + bygroups(Name.Attribute, Operator, using(this), Punctuation)), + (r'{', Punctuation, '#push'), + (r'}', Punctuation, '#pop'), ], 'statements': [ - (r'(\d\.)?\d+[sdwhmy]',Literal.Date), - (r'(\d\.)?\d+ms',Literal.Date), - (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function), - (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|miss|fetch|restart)\b', Name.Constant), + (r'(\d\.)?\d+[sdwhmy]', Literal.Date), + (r'(\d\.)?\d+ms', Literal.Date), + (r'(vcl_pass|vcl_hash|vcl_hit|vcl_init|vcl_backend_fetch|vcl_pipe|' + r'vcl_backend_response|vcl_synth|vcl_deliver|vcl_backend_error|' + r'vcl_fini|vcl_recv|vcl_purge|vcl_miss)\b', Name.Function), + (r'(pipe|retry|hash|synth|deliver|purge|abandon|lookup|pass|fail|ok|' + r'miss|fetch|restart)\b', Name.Constant), (r'(beresp|obj|resp|req|req_top|bereq)\.http\.[a-zA-Z_-]+\b', Name.Variable), -(words(( 'obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level', 'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits', 'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid', 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip', 'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now', 'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto', 'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi', 'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout', 'beresp.do_gzip', 'resp.status', 'beresp.do_gunzip', 'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream', 'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status', 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries', 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', 'server.ip', 'bereq.uncacheable','now'),suffix=r'(\b|$)'), Name.Variable), + (words(( + 'obj.status', 'req.hash_always_miss', 'beresp.backend', 'req.esi_level', + 'req.can_gzip', 'beresp.ttl', 'obj.uncacheable', 'req.ttl', 'obj.hits', + 'client.identity', 'req.hash_ignore_busy', 'obj.reason', 'req.xid', + 'req_top.proto', 'beresp.age', 'obj.proto', 'obj.age', 'local.ip', + 'beresp.uncacheable', 'req.method', 'beresp.backend.ip', 'now', + 'obj.grace', 'req.restarts', 'beresp.keep', 'req.proto', 'resp.proto', + 'bereq.xid', 'bereq.between_bytes_timeout', 'req.esi', + 'bereq.first_byte_timeout', 'bereq.method', 'bereq.connect_timeout', + 'beresp.do_gzip', 'resp.status', 'beresp.do_gunzip', + 'beresp.storage_hint', 'resp.is_streaming', 'beresp.do_stream', + 'req_top.method', 'bereq.backend', 'beresp.backend.name', 'beresp.status', + 'req.url', 'obj.keep', 'obj.ttl', 'beresp.reason', 'bereq.retries', + 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', + 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', + 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', + 'server.ip', 'bereq.uncacheable', 'now'), suffix=r'(\b|$)'), + Name.Variable), (r'[!%&+*-,/<.}{>=|~]+', Operator), (r'[();]', Punctuation), (r'[,]+', Punctuation), - (words(('include','hash_data','regsub','regsuball','if','else','elsif','elif','synth', 'synthetic','ban','synth','return','set','unset','import','include','new','rollback','call'), suffix=r'\b'),Keyword), + (words(('include', 'hash_data', 'regsub', 'regsuball', 'if', 'else', + 'elsif', 'elif', 'synth', 'synthetic', 'ban', 'synth', + 'return', 'set', 'unset', 'import', 'include', 'new', + 'rollback', 'call'), suffix=r'\b'), + Keyword), (r'storage\.\w+\.\w+\b', Name.Variable), - (words(('true','false')),Name.Builtin), + (words(('true', 'false')), Name.Builtin), (r'\d+\b', Number), - (r'(backend)(\s+\w+)(\s*{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), - (r'(probe\s)(\s*\w+\s)({)', bygroups(Keyword,Name.Variable.Global,Punctuation),'probe'), - (r'(acl\s)(\s*\w+\s)({)', bygroups(Keyword,Name.Variable.Global,Punctuation),'acl'), - (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved,Name.Constant,Punctuation)), + (r'(backend)(\s+\w+)(\s*{)', + bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), + (r'(probe\s)(\s*\w+\s)({)', + bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'), + (r'(acl\s)(\s*\w+\s)({)', + bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'), + (r'(vcl )(4.0)(;)$', + bygroups(Keyword.Reserved, Name.Constant, Punctuation)), (r'(sub\s+)([a-zA-Z]\w*)(\s*{)', bygroups(Keyword, Name.Function, Punctuation)), - ( r'([a-zA-Z_]\w*)' - r'(\.)' - r'([a-zA-Z_]\w*)' - r'(\s*\(.*\))', - bygroups(Name.Function,Punctuation,Name.Function,using(this))), - ('[a-zA-Z_]\w*', Name) + (r'([a-zA-Z_]\w*)' + r'(\.)' + r'([a-zA-Z_]\w*)' + r'(\s*\(.*\))', + bygroups(Name.Function, Punctuation, Name.Function, using(this))), + ('[a-zA-Z_]\w*', Name), ], 'comment': [ (r'[^*/]+', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), - (r'[*/]', Comment.Multiline) + (r'[*/]', Comment.Multiline), ], 'comments': [ (r'#.*$', Comment), (r'/\*', Comment.Multiline, 'comment'), - (r'//.*$', Comment) + (r'//.*$', Comment), ], 'string': [ (r'"', String, '#pop'), (r'[^"\n]+', String), # all other characters - - ], + ], 'multistring': [ (r'[^"}]', String), (r'"}', String, '#pop'), - (r'["}]', String) + (r'["}]', String), ], 'whitespace': [ (r'L?"', String, 'string'), (r'{"', String, 'multistring'), (r'\n', Text), (r'\s+', Text), - (r'\\\n', Text) # line continuation + (r'\\\n', Text), # line continuation ], 'root': [ include('whitespace'), include('comments'), include('statements'), - (r'\s+', Text) + (r'\s+', Text), ], } + class VCLSnippetLexer(VCLLexer): """ For Varnish Configuration Language snippets. - """ + .. versionadded:: 2.2 + """ name = 'VCLSnippets' aliases = ['vclsnippets', 'vclsnippet'] mimetypes = ['text/x-vclsnippet'] - filenames = [ ] - - def analyse_text(text): - return 0.0 + filenames = [] tokens = { 'snippetspre': [ (r'\.\.\.+', Comment), - (r'(bereq|req|req_top|resp|beresp|obj|client|server|local|remote|storage)($|\.\*)',Name.Variable) - ], + (r'(bereq|req|req_top|resp|beresp|obj|client|server|local|remote|' + r'storage)($|\.\*)', Name.Variable), + ], 'snippetspost': [ - (r'(backend)(\b|$)', Keyword.Reserved) + (r'(backend)(\b|$)', Keyword.Reserved), ], 'root': [ include('snippetspre'), inherit, - include('snippetspost') - ] + include('snippetspost'), + ], } -- cgit v1.2.1 From 85daa4b5f9c93a7a90841bf2f22dca9b29a55def Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 14 Feb 2016 15:02:40 +0100 Subject: Add changelog entry. --- CHANGES | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES b/CHANGES index e05e20ac..25361c5a 100644 --- a/CHANGES +++ b/CHANGES @@ -15,6 +15,7 @@ Version 2.2 * AMPL * TypoScript (#1173) + * Varnish config (PR#554) - Added `lexers.find_lexer_class_by_name()` (#1203) -- cgit v1.2.1 From 04e2064dc6b26731aade77a7d131b6d68c3b4cba Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 14 Feb 2016 15:04:09 +0100 Subject: Update changelog, set version to 2.1.1 --- CHANGES | 2 +- pygments/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 395515a2..fbe7bc3b 100644 --- a/CHANGES +++ b/CHANGES @@ -9,7 +9,7 @@ pull request numbers to the requests at Version 2.1.1 ------------- -(in development) +(relased Feb 14, 2016) - Fixed Jython compatibility (#1205) - Fixed HTML formatter output with leading empty lines (#1111) diff --git a/pygments/__init__.py b/pygments/__init__.py index 7bd7557f..0c17500e 100644 --- a/pygments/__init__.py +++ b/pygments/__init__.py @@ -26,7 +26,7 @@ :license: BSD, see LICENSE for details. """ -__version__ = '2.1' +__version__ = '2.1.1' __docformat__ = 'restructuredtext' __all__ = ['lex', 'format', 'highlight'] diff --git a/setup.py b/setup.py index a8667fda..b15e0bc9 100755 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ else: setup( name = 'Pygments', - version = '2.1', + version = '2.1.1', url = 'http://pygments.org/', license = 'BSD License', author = 'Georg Brandl', -- cgit v1.2.1 -- cgit v1.2.1 From 2882c34a141f4f36ded54d1473c6d4f73684ffc7 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 14 Feb 2016 16:47:28 +0100 Subject: some pep8 fixups --- pygments/__init__.py | 20 +++++++++----------- pygments/console.py | 24 ++++++++++++------------ pygments/filter.py | 8 ++++---- pygments/formatter.py | 2 +- pygments/lexer.py | 4 ++-- pygments/scanner.py | 3 ++- pygments/sphinxext.py | 1 + pygments/token.py | 31 ++++++++++++++++--------------- 8 files changed, 47 insertions(+), 46 deletions(-) diff --git a/pygments/__init__.py b/pygments/__init__.py index e825aa39..ffac59ef 100644 --- a/pygments/__init__.py +++ b/pygments/__init__.py @@ -25,6 +25,9 @@ :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ +import sys + +from pygments.util import StringIO, BytesIO __version__ = '2.2a0' __docformat__ = 'restructuredtext' @@ -32,11 +35,6 @@ __docformat__ = 'restructuredtext' __all__ = ['lex', 'format', 'highlight'] -import sys - -from pygments.util import StringIO, BytesIO - - def lex(code, lexer): """ Lex ``code`` with ``lexer`` and return an iterable of tokens. @@ -44,9 +42,9 @@ def lex(code, lexer): try: return lexer.get_tokens(code) except TypeError as err: - if isinstance(err.args[0], str) and \ - ('unbound method get_tokens' in err.args[0] or - 'missing 1 required positional argument' in err.args[0]): + if (isinstance(err.args[0], str) and + ('unbound method get_tokens' in err.args[0] or + 'missing 1 required positional argument' in err.args[0])): raise TypeError('lex() argument must be a lexer instance, ' 'not a class') raise @@ -68,9 +66,9 @@ def format(tokens, formatter, outfile=None): # pylint: disable=redefined-builti else: formatter.format(tokens, outfile) except TypeError as err: - if isinstance(err.args[0], str) and \ - ('unbound method format' in err.args[0] or - 'missing 1 required positional argument' in err.args[0]): + if (isinstance(err.args[0], str) and + ('unbound method format' in err.args[0] or + 'missing 1 required positional argument' in err.args[0])): raise TypeError('format() argument must be a formatter instance, ' 'not a class') raise diff --git a/pygments/console.py b/pygments/console.py index 4a2c9acb..4aaf5fcb 100644 --- a/pygments/console.py +++ b/pygments/console.py @@ -12,18 +12,18 @@ esc = "\x1b[" codes = {} -codes[""] = "" -codes["reset"] = esc + "39;49;00m" +codes[""] = "" +codes["reset"] = esc + "39;49;00m" -codes["bold"] = esc + "01m" -codes["faint"] = esc + "02m" -codes["standout"] = esc + "03m" +codes["bold"] = esc + "01m" +codes["faint"] = esc + "02m" +codes["standout"] = esc + "03m" codes["underline"] = esc + "04m" -codes["blink"] = esc + "05m" -codes["overline"] = esc + "06m" +codes["blink"] = esc + "05m" +codes["overline"] = esc + "06m" -dark_colors = ["black", "darkred", "darkgreen", "brown", "darkblue", - "purple", "teal", "lightgray"] +dark_colors = ["black", "darkred", "darkgreen", "brown", "darkblue", + "purple", "teal", "lightgray"] light_colors = ["darkgray", "red", "green", "yellow", "blue", "fuchsia", "turquoise", "white"] @@ -35,10 +35,10 @@ for d, l in zip(dark_colors, light_colors): del d, l, x -codes["darkteal"] = codes["turquoise"] +codes["darkteal"] = codes["turquoise"] codes["darkyellow"] = codes["brown"] -codes["fuscia"] = codes["fuchsia"] -codes["white"] = codes["bold"] +codes["fuscia"] = codes["fuchsia"] +codes["white"] = codes["bold"] def reset_color(): diff --git a/pygments/filter.py b/pygments/filter.py index c8176ed9..f3082037 100644 --- a/pygments/filter.py +++ b/pygments/filter.py @@ -34,10 +34,10 @@ def simplefilter(f): yield ttype, value.lower() """ return type(f.__name__, (FunctionFilter,), { - 'function': f, - '__module__': getattr(f, '__module__'), - '__doc__': f.__doc__ - }) + '__module__': getattr(f, '__module__'), + '__doc__': f.__doc__, + 'function': f, + }) class Filter(object): diff --git a/pygments/formatter.py b/pygments/formatter.py index addd07d7..9f22b3bc 100644 --- a/pygments/formatter.py +++ b/pygments/formatter.py @@ -65,7 +65,7 @@ class Formatter(object): def __init__(self, **options): self.style = _lookup_style(options.get('style', 'default')) - self.full = get_bool_opt(options, 'full', False) + self.full = get_bool_opt(options, 'full', False) self.title = options.get('title', '') self.encoding = options.get('encoding', None) or None if self.encoding in ('guess', 'chardet'): diff --git a/pygments/lexer.py b/pygments/lexer.py index dd6c01e4..f16d8106 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -319,8 +319,8 @@ def bygroups(*args): if data is not None: if ctx: ctx.pos = match.start(i + 1) - for item in action( - lexer, _PseudoMatch(match.start(i + 1), data), ctx): + for item in action(lexer, + _PseudoMatch(match.start(i + 1), data), ctx): if item: yield item if ctx: diff --git a/pygments/scanner.py b/pygments/scanner.py index 35dbbadd..3ff11e4a 100644 --- a/pygments/scanner.py +++ b/pygments/scanner.py @@ -66,7 +66,8 @@ class Scanner(object): def test(self, pattern): """Apply a pattern on the current position and check - if it patches. Doesn't touch pos.""" + if it patches. Doesn't touch pos. + """ return self.check(pattern) is not None def scan(self, pattern): diff --git a/pygments/sphinxext.py b/pygments/sphinxext.py index 2dc9810f..de8cd73b 100644 --- a/pygments/sphinxext.py +++ b/pygments/sphinxext.py @@ -57,6 +57,7 @@ FILTERDOC = ''' ''' + class PygmentsDoc(Directive): """ A directive to collect all lexers/formatters/filters and generate diff --git a/pygments/token.py b/pygments/token.py index 40c3214a..fbd5b805 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -9,6 +9,7 @@ :license: BSD, see LICENSE for details. """ + class _TokenType(tuple): parent = None @@ -52,30 +53,30 @@ class _TokenType(tuple): return self -Token = _TokenType() +Token = _TokenType() # Special token types -Text = Token.Text -Whitespace = Text.Whitespace -Escape = Token.Escape -Error = Token.Error +Text = Token.Text +Whitespace = Text.Whitespace +Escape = Token.Escape +Error = Token.Error # Text that doesn't belong to this lexer (e.g. HTML in PHP) -Other = Token.Other +Other = Token.Other # Common token types for source code -Keyword = Token.Keyword -Name = Token.Name -Literal = Token.Literal -String = Literal.String -Number = Literal.Number +Keyword = Token.Keyword +Name = Token.Name +Literal = Token.Literal +String = Literal.String +Number = Literal.Number Punctuation = Token.Punctuation -Operator = Token.Operator -Comment = Token.Comment +Operator = Token.Operator +Comment = Token.Comment # Generic types for non-source code -Generic = Token.Generic +Generic = Token.Generic -# String and some others are not direct childs of Token. +# String and some others are not direct children of Token. # alias them: Token.Token = Token Token.String = String -- cgit v1.2.1 From fff7caebc1befe71ab088a5d245fbb7708cfc0f2 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 14 Feb 2016 17:10:00 +0100 Subject: Fix most complaints from regexlint. --- pygments/lexers/algebra.py | 6 +-- pygments/lexers/ampl.py | 63 ++++++++++++----------- pygments/lexers/business.py | 106 ++++++++++++++++++++------------------- pygments/lexers/c_cpp.py | 3 +- pygments/lexers/configs.py | 32 ++++++------ pygments/lexers/csound.py | 30 +++++------ pygments/lexers/css.py | 6 +-- pygments/lexers/dotnet.py | 45 +++++++++-------- pygments/lexers/dsls.py | 6 +-- pygments/lexers/elm.py | 14 +++--- pygments/lexers/erlang.py | 8 +-- pygments/lexers/esoteric.py | 4 +- pygments/lexers/felix.py | 2 +- pygments/lexers/fortran.py | 9 ++-- pygments/lexers/haskell.py | 2 +- pygments/lexers/idl.py | 13 ++--- pygments/lexers/j.py | 2 +- pygments/lexers/javascript.py | 80 ++++++++++++++--------------- pygments/lexers/julia.py | 48 ++++++++++-------- pygments/lexers/jvm.py | 4 +- pygments/lexers/lisp.py | 83 +++++++++++++++--------------- pygments/lexers/markup.py | 4 +- pygments/lexers/modula2.py | 2 +- pygments/lexers/oberon.py | 6 +-- pygments/lexers/parasail.py | 2 +- pygments/lexers/praat.py | 34 ++++++------- pygments/lexers/python.py | 64 +++++++++++++---------- pygments/lexers/qvt.py | 84 ++++++++++++++++--------------- pygments/lexers/rdf.py | 10 ++-- pygments/lexers/scripting.py | 21 ++++---- pygments/lexers/supercollider.py | 8 +-- pygments/lexers/testing.py | 2 +- pygments/lexers/theorem.py | 31 ++++++------ pygments/lexers/typoscript.py | 62 +++++++++++------------ pygments/lexers/varnish.py | 28 +++++------ 35 files changed, 479 insertions(+), 445 deletions(-) diff --git a/pygments/lexers/algebra.py b/pygments/lexers/algebra.py index fc54c3c3..79460ad4 100644 --- a/pygments/lexers/algebra.py +++ b/pygments/lexers/algebra.py @@ -104,9 +104,9 @@ class MathematicaLexer(RegexLexer): (r'#\d*', Name.Variable), (r'([a-zA-Z]+[a-zA-Z0-9]*)', Name), - (r'-?[0-9]+\.[0-9]*', Number.Float), - (r'-?[0-9]*\.[0-9]+', Number.Float), - (r'-?[0-9]+', Number.Integer), + (r'-?\d+\.\d*', Number.Float), + (r'-?\d*\.\d+', Number.Float), + (r'-?\d+', Number.Integer), (words(operators), Operator), (words(punctuation), Punctuation), diff --git a/pygments/lexers/ampl.py b/pygments/lexers/ampl.py index f57b486f..c3ca80d4 100644 --- a/pygments/lexers/ampl.py +++ b/pygments/lexers/ampl.py @@ -9,13 +9,10 @@ :license: BSD, see LICENSE for details. """ -import re - -from pygments.lexer import RegexLexer, bygroups, using, this +from pygments.lexer import RegexLexer, bygroups, using, this, words from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation - __all__ = ['AmplLexer'] @@ -30,27 +27,30 @@ class AmplLexer(RegexLexer): filenames = ['*.run'] tokens = { - 'root':[ + 'root': [ (r'\n', Text), (r'\s+', Text.Whitespace), (r'#.*?\n', Comment.Single), (r'/[*](.|\n)*?[*]/', Comment.Multiline), - (r'(call|cd|close|commands|data|delete|display|drop|end|environ|' - r'exit|expand|include|load|model|objective|option|problem|purge|' - r'quit|redeclare|reload|remove|reset|restore|shell|show|solexpand|' - r'solution|solve|update|unload|xref|' - r'coeff|coef|cover|obj|interval|' - r'default|from|to|to_come|net_in|net_out|dimen|dimension|' - r'check|complements|write|end|function|pipe|' - r'format|if|then|else|in|while|repeat|for)\b', Keyword.Reserved), - (r'(integer|binary|symbolic|ordered|circular|reversed|IN|INOUT|OUT|LOCAL)', + (words(( + 'call', 'cd', 'close', 'commands', 'data', 'delete', 'display', + 'drop', 'end', 'environ', 'exit', 'expand', 'include', 'load', + 'model', 'objective', 'option', 'problem', 'purge', 'quit', + 'redeclare', 'reload', 'remove', 'reset', 'restore', 'shell', + 'show', 'solexpand', 'solution', 'solve', 'update', 'unload', + 'xref', 'coeff', 'coef', 'cover', 'obj', 'interval', 'default', + 'from', 'to', 'to_come', 'net_in', 'net_out', 'dimen', + 'dimension', 'check', 'complements', 'write', 'function', + 'pipe', 'format', 'if', 'then', 'else', 'in', 'while', 'repeat', + 'for'), suffix=r'\b'), Keyword.Reserved), + (r'(integer|binary|symbolic|ordered|circular|reversed|INOUT|IN|OUT|LOCAL)', Keyword.Type), (r'\".*?\"', String.Double), (r'\'.*?\'', String.Single), (r'[()\[\]{},;:]+', Punctuation), - (r'\b(\w+)(\.)(astatus|init|init0|lb|lb0|lb1|lb2|lrc|' - r'lslack|rc|relax|slack|sstatus|status|ub|ub0|ub1|' - r'ub2|urc|uslack|val)', + (r'\b(\w+)(\.)(astatus|init0|init|lb0|lb1|lb2|lb|lrc|' + r'lslack|rc|relax|slack|sstatus|status|ub0|ub1|ub2|' + r'ub|urc|uslack|val)', bygroups(Name.Variable, Punctuation, Keyword.Reserved)), (r'(set|param|var|arc|minimize|maximize|subject to|s\.t\.|subj to|' r'node|table|suffix|read table|write table)(\s+)(\w+)', @@ -58,21 +58,26 @@ class AmplLexer(RegexLexer): (r'(param)(\s*)(:)(\s*)(\w+)(\s*)(:)(\s*)((\w|\s)+)', bygroups(Keyword.Declaration, Text, Punctuation, Text, Name.Variable, Text, Punctuation, Text, Name.Variable)), - (r'(let|fix|unfix)(\s*)(\{.*\}|)(\s*)(\w+)', + (r'(let|fix|unfix)(\s*)((?:\{.*\})?)(\s*)(\w+)', bygroups(Keyword.Declaration, Text, using(this), Text, Name.Variable)), - (r'\b(abs|acos|acosh|alias|' - r'asin|asinh|atan|atan2|atanh|ceil|ctime|cos|exp|floor|log|log10|' - r'max|min|precision|round|sin|sinh|sqrt|tan|tanh|time|trunc|Beta|' - r'Cauchy|Exponential|Gamma|Irand224|Normal|Normal01|Poisson|Uniform|Uniform01|' - r'num|num0|ichar|char|length|substr|sprintf|match|sub|gsub|print|printf' - r'next|nextw|prev|prevw|first|last|ord|ord0|card|arity|indexarity)\b', - Name.Builtin), + (words(( + 'abs', 'acos', 'acosh', 'alias', 'asin', 'asinh', 'atan', 'atan2', + 'atanh', 'ceil', 'ctime', 'cos', 'exp', 'floor', 'log', 'log10', + 'max', 'min', 'precision', 'round', 'sin', 'sinh', 'sqrt', 'tan', + 'tanh', 'time', 'trunc', 'Beta', 'Cauchy', 'Exponential', 'Gamma', + 'Irand224', 'Normal', 'Normal01', 'Poisson', 'Uniform', 'Uniform01', + 'num', 'num0', 'ichar', 'char', 'length', 'substr', 'sprintf', + 'match', 'sub', 'gsub', 'print', 'printf', 'next', 'nextw', 'prev', + 'prevw', 'first', 'last', 'ord', 'ord0', 'card', 'arity', + 'indexarity'), prefix=r'\b', suffix=r'\b'), Name.Builtin), (r'(\+|\-|\*|/|\*\*|=|<=|>=|==|\||\^|<|>|\!|\.\.|:=|\&|\!=|<<|>>)', Operator), - (r'(or|exists|forall|and|in|not|within|union|diff|' - r'difference|symdiff|inter|intersect|intersection|' - r'cross|setof|by|less|sum|prod|product|div|mod)', - Keyword.Reserved), #Operator.Name but not enough emphasized with Operator.Name + (words(( + 'or', 'exists', 'forall', 'and', 'in', 'not', 'within', 'union', + 'diff', 'difference', 'symdiff', 'inter', 'intersect', + 'intersection', 'cross', 'setof', 'by', 'less', 'sum', 'prod', + 'product', 'div', 'mod'), suffix=r'\b'), + Keyword.Reserved), # Operator.Name but not enough emphasized with that (r'(\d+\.(?!\.)\d*|\.(?!.)\d+)([eE][+-]?\d+)?', Number.Float), (r'\d+([eE][+-]?\d+)?', Number.Integer), (r'[+-]?Infinity', Number.Integer), diff --git a/pygments/lexers/business.py b/pygments/lexers/business.py index ea888245..43978690 100644 --- a/pygments/lexers/business.py +++ b/pygments/lexers/business.py @@ -57,9 +57,9 @@ class CobolLexer(RegexLexer): ], 'core': [ # Figurative constants - (r'(^|(?<=[^0-9a-z_\-]))(ALL\s+)?' + (r'(^|(?<=[^\w\-]))(ALL\s+)?' r'((ZEROES)|(HIGH-VALUE|LOW-VALUE|QUOTE|SPACE|ZERO)(S)?)' - r'\s*($|(?=[^0-9a-z_\-]))', + r'\s*($|(?=[^\w\-]))', Name.Constant), # Reserved words STATEMENTS and other bolds @@ -79,8 +79,8 @@ class CobolLexer(RegexLexer): 'RETURN', 'REWRITE', 'SCREEN', 'SD', 'SEARCH', 'SECTION', 'SET', 'SORT', 'START', 'STOP', 'STRING', 'SUBTRACT', 'SUPPRESS', 'TERMINATE', 'THEN', 'UNLOCK', 'UNSTRING', 'USE', 'VALIDATE', - 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^0-9a-z_\-]))', - suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + 'WORKING-STORAGE', 'WRITE'), prefix=r'(^|(?<=[^\w\-]))', + suffix=r'\s*($|(?=[^\w\-]))'), Keyword.Reserved), # Reserved words @@ -89,33 +89,33 @@ class CobolLexer(RegexLexer): 'ALPHABET', 'ALPHABETIC', 'ALPHABETIC-LOWER', 'ALPHABETIC-UPPER', 'ALPHANUMERIC', 'ALPHANUMERIC-EDITED', 'ALSO', 'ALTER', 'ALTERNATE' 'ANY', 'ARE', 'AREA', 'AREAS', 'ARGUMENT-NUMBER', 'ARGUMENT-VALUE', 'AS', - 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC', 'AUTOTERMINATE', - 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL', + 'ASCENDING', 'ASSIGN', 'AT', 'AUTO', 'AUTO-SKIP', 'AUTOMATIC', + 'AUTOTERMINATE', 'BACKGROUND-COLOR', 'BASED', 'BEEP', 'BEFORE', 'BELL', 'BLANK', 'BLINK', 'BLOCK', 'BOTTOM', 'BY', 'BYTE-LENGTH', 'CHAINING', - 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL', 'COLLATING', - 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE', 'COMMIT', 'COMMON', - 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL', + 'CHARACTER', 'CHARACTERS', 'CLASS', 'CODE', 'CODE-SET', 'COL', + 'COLLATING', 'COLS', 'COLUMN', 'COLUMNS', 'COMMA', 'COMMAND-LINE', + 'COMMIT', 'COMMON', 'CONSTANT', 'CONTAINS', 'CONTENT', 'CONTROL', 'CONTROLS', 'CONVERTING', 'COPY', 'CORR', 'CORRESPONDING', 'COUNT', 'CRT', - 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE', 'DEBUGGING', - 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED', + 'CURRENCY', 'CURSOR', 'CYCLE', 'DATE', 'DAY', 'DAY-OF-WEEK', 'DE', + 'DEBUGGING', 'DECIMAL-POINT', 'DECLARATIVES', 'DEFAULT', 'DELIMITED', 'DELIMITER', 'DEPENDING', 'DESCENDING', 'DETAIL', 'DISK', 'DOWN', 'DUPLICATES', 'DYNAMIC', 'EBCDIC', 'ENTRY', 'ENVIRONMENT-NAME', 'ENVIRONMENT-VALUE', 'EOL', 'EOP', 'EOS', 'ERASE', 'ERROR', 'ESCAPE', 'EXCEPTION', - 'EXCLUSIVE', 'EXTEND', 'EXTERNAL', - 'FILE-ID', 'FILLER', 'FINAL', 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT', - 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL', 'FUNCTION', - 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP', + 'EXCLUSIVE', 'EXTEND', 'EXTERNAL', 'FILE-ID', 'FILLER', 'FINAL', + 'FIRST', 'FIXED', 'FLOAT-LONG', 'FLOAT-SHORT', + 'FOOTING', 'FOR', 'FOREGROUND-COLOR', 'FORMAT', 'FROM', 'FULL', + 'FUNCTION', 'FUNCTION-ID', 'GIVING', 'GLOBAL', 'GROUP', 'HEADING', 'HIGHLIGHT', 'I-O', 'ID', 'IGNORE', 'IGNORING', 'IN', 'INDEX', 'INDEXED', 'INDICATE', - 'INITIAL', 'INITIALIZED', 'INPUT', - 'INTO', 'INTRINSIC', 'INVALID', 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL', + 'INITIAL', 'INITIALIZED', 'INPUT', 'INTO', 'INTRINSIC', 'INVALID', + 'IS', 'JUST', 'JUSTIFIED', 'KEY', 'LABEL', 'LAST', 'LEADING', 'LEFT', 'LENGTH', 'LIMIT', 'LIMITS', 'LINAGE', 'LINAGE-COUNTER', 'LINE', 'LINES', 'LOCALE', 'LOCK', - 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE', - 'MULTIPLE', 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE', - 'NEGATIVE', 'NEXT', 'NO', 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC', - 'NUMERIC-EDITED', 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY', + 'LOWLIGHT', 'MANUAL', 'MEMORY', 'MINUS', 'MODE', 'MULTIPLE', + 'NATIONAL', 'NATIONAL-EDITED', 'NATIVE', 'NEGATIVE', 'NEXT', 'NO', + 'NULL', 'NULLS', 'NUMBER', 'NUMBERS', 'NUMERIC', 'NUMERIC-EDITED', + 'OBJECT-COMPUTER', 'OCCURS', 'OF', 'OFF', 'OMITTED', 'ON', 'ONLY', 'OPTIONAL', 'ORDER', 'ORGANIZATION', 'OTHER', 'OUTPUT', 'OVERFLOW', 'OVERLINE', 'PACKED-DECIMAL', 'PADDING', 'PAGE', 'PARAGRAPH', 'PLUS', 'POINTER', 'POSITION', 'POSITIVE', 'PRESENT', 'PREVIOUS', @@ -137,40 +137,42 @@ class CobolLexer(RegexLexer): 'UNSIGNED-INT', 'UNSIGNED-LONG', 'UNSIGNED-SHORT', 'UNTIL', 'UP', 'UPDATE', 'UPON', 'USAGE', 'USING', 'VALUE', 'VALUES', 'VARYING', 'WAIT', 'WHEN', 'WITH', 'WORDS', 'YYYYDDD', 'YYYYMMDD'), - prefix=r'(^|(?<=[^0-9a-z_\-]))', suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Keyword.Pseudo), # inactive reserved words (words(( - 'ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE', 'B-AND', - 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER', 'CF', 'CH', 'CHAIN', 'CLASS-ID', - 'CLASSIFICATION', 'COMMUNICATION', 'CONDITION', 'DATA-POINTER', - 'DESTINATION', 'DISABLE', 'EC', 'EGI', 'EMI', 'ENABLE', 'END-RECEIVE', - 'ENTRY-CONVENTION', 'EO', 'ESI', 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY', - 'FLOAT-BINARY-16', 'FLOAT-BINARY-34', 'FLOAT-BINARY-7', - 'FLOAT-DECIMAL-16', 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT', - 'FUNCTION-POINTER', 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY', - 'INHERITS', 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE', + 'ACTIVE-CLASS', 'ALIGNED', 'ANYCASE', 'ARITHMETIC', 'ATTRIBUTE', + 'B-AND', 'B-NOT', 'B-OR', 'B-XOR', 'BIT', 'BOOLEAN', 'CD', 'CENTER', + 'CF', 'CH', 'CHAIN', 'CLASS-ID', 'CLASSIFICATION', 'COMMUNICATION', + 'CONDITION', 'DATA-POINTER', 'DESTINATION', 'DISABLE', 'EC', 'EGI', + 'EMI', 'ENABLE', 'END-RECEIVE', 'ENTRY-CONVENTION', 'EO', 'ESI', + 'EXCEPTION-OBJECT', 'EXPANDS', 'FACTORY', 'FLOAT-BINARY-16', + 'FLOAT-BINARY-34', 'FLOAT-BINARY-7', 'FLOAT-DECIMAL-16', + 'FLOAT-DECIMAL-34', 'FLOAT-EXTENDED', 'FORMAT', 'FUNCTION-POINTER', + 'GET', 'GROUP-USAGE', 'IMPLEMENTS', 'INFINITY', 'INHERITS', + 'INTERFACE', 'INTERFACE-ID', 'INVOKE', 'LC_ALL', 'LC_COLLATE', 'LC_CTYPE', 'LC_MESSAGES', 'LC_MONETARY', 'LC_NUMERIC', 'LC_TIME', - 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE', 'NORMAL', - 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE', 'PAGE-COUNTER', 'PF', 'PH', - 'PROPERTY', 'PROTOTYPE', 'PURGE', 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE', - 'RELATION', 'REPLACE', 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY', - 'RF', 'RH', 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT', 'STEP', - 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3', 'SUPER', 'SYMBOL', - 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT', 'TYPEDEF', 'UCS-4', 'UNIVERSAL', - 'USER-DEFAULT', 'UTF-16', 'UTF-8', 'VAL-STATUS', 'VALID', 'VALIDATE', - 'VALIDATE-STATUS'), - prefix=r'(^|(?<=[^0-9a-z_\-]))', suffix=r'\s*($|(?=[^0-9a-z_\-]))'), + 'LINE-COUNTER', 'MESSAGE', 'METHOD', 'METHOD-ID', 'NESTED', 'NONE', + 'NORMAL', 'OBJECT', 'OBJECT-REFERENCE', 'OPTIONS', 'OVERRIDE', + 'PAGE-COUNTER', 'PF', 'PH', 'PROPERTY', 'PROTOTYPE', 'PURGE', + 'QUEUE', 'RAISE', 'RAISING', 'RECEIVE', 'RELATION', 'REPLACE', + 'REPRESENTS-NOT-A-NUMBER', 'RESET', 'RESUME', 'RETRY', 'RF', 'RH', + 'SECONDS', 'SEGMENT', 'SELF', 'SEND', 'SOURCES', 'STATEMENT', + 'STEP', 'STRONG', 'SUB-QUEUE-1', 'SUB-QUEUE-2', 'SUB-QUEUE-3', + 'SUPER', 'SYMBOL', 'SYSTEM-DEFAULT', 'TABLE', 'TERMINAL', 'TEXT', + 'TYPEDEF', 'UCS-4', 'UNIVERSAL', 'USER-DEFAULT', 'UTF-16', 'UTF-8', + 'VAL-STATUS', 'VALID', 'VALIDATE', 'VALIDATE-STATUS'), + prefix=r'(^|(?<=[^\w\-]))', suffix=r'\s*($|(?=[^\w\-]))'), Error), # Data Types - (r'(^|(?<=[^0-9a-z_\-]))' + (r'(^|(?<=[^\w\-]))' r'(PIC\s+.+?(?=(\s|\.\s))|PICTURE\s+.+?(?=(\s|\.\s))|' r'(COMPUTATIONAL)(-[1-5X])?|(COMP)(-[1-5X])?|' r'BINARY-C-LONG|' r'BINARY-CHAR|BINARY-DOUBLE|BINARY-LONG|BINARY-SHORT|' - r'BINARY)\s*($|(?=[^0-9a-z_\-]))', Keyword.Type), + r'BINARY)\s*($|(?=[^\w\-]))', Keyword.Type), # Operators (r'(\*\*|\*|\+|-|/|<=|>=|<|>|==|/=|=)', Operator), @@ -180,7 +182,7 @@ class CobolLexer(RegexLexer): (r'([(),;:&%.])', Punctuation), # Intrinsics - (r'(^|(?<=[^0-9a-z_\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|' + (r'(^|(?<=[^\w\-]))(ABS|ACOS|ANNUITY|ASIN|ATAN|BYTE-LENGTH|' r'CHAR|COMBINED-DATETIME|CONCATENATE|COS|CURRENT-DATE|' r'DATE-OF-INTEGER|DATE-TO-YYYYMMDD|DAY-OF-INTEGER|DAY-TO-YYYYDDD|' r'EXCEPTION-(?:FILE|LOCATION|STATEMENT|STATUS)|EXP10|EXP|E|' @@ -192,13 +194,13 @@ class CobolLexer(RegexLexer): r'STANDARD-DEVIATION|STORED-CHAR-LENGTH|SUBSTITUTE(?:-CASE)?|' r'SUM|TAN|TEST-DATE-YYYYMMDD|TEST-DAY-YYYYDDD|TRIM|' r'UPPER-CASE|VARIANCE|WHEN-COMPILED|YEAR-TO-YYYY)\s*' - r'($|(?=[^0-9a-z_\-]))', Name.Function), + r'($|(?=[^\w\-]))', Name.Function), # Booleans - (r'(^|(?<=[^0-9a-z_\-]))(true|false)\s*($|(?=[^0-9a-z_\-]))', Name.Builtin), + (r'(^|(?<=[^\w\-]))(true|false)\s*($|(?=[^\w\-]))', Name.Builtin), # Comparing Operators - (r'(^|(?<=[^0-9a-z_\-]))(equal|equals|ne|lt|le|gt|ge|' - r'greater|less|than|not|and|or)\s*($|(?=[^0-9a-z_\-]))', Operator.Word), + (r'(^|(?<=[^\w\-]))(equal|equals|ne|lt|le|gt|ge|' + r'greater|less|than|not|and|or)\s*($|(?=[^\w\-]))', Operator.Word), ], # \"[^\"\n]*\"|\'[^\'\n]*\' @@ -439,15 +441,15 @@ class OpenEdgeLexer(RegexLexer): filenames = ['*.p', '*.cls'] mimetypes = ['text/x-openedge', 'application/x-openedge'] - types = (r'(?i)(^|(?<=[^0-9a-z_\-]))(CHARACTER|CHAR|CHARA|CHARAC|CHARACT|CHARACTE|' + types = (r'(?i)(^|(?<=[^\w\-]))(CHARACTER|CHAR|CHARA|CHARAC|CHARACT|CHARACTE|' r'COM-HANDLE|DATE|DATETIME|DATETIME-TZ|' r'DECIMAL|DEC|DECI|DECIM|DECIMA|HANDLE|' r'INT64|INTEGER|INT|INTE|INTEG|INTEGE|' - r'LOGICAL|LONGCHAR|MEMPTR|RAW|RECID|ROWID)\s*($|(?=[^0-9a-z_\-]))') + r'LOGICAL|LONGCHAR|MEMPTR|RAW|RECID|ROWID)\s*($|(?=[^\w\-]))') keywords = words(OPENEDGEKEYWORDS, - prefix=r'(?i)(^|(?<=[^0-9a-z_\-]))', - suffix=r'\s*($|(?=[^0-9a-z_\-]))') + prefix=r'(?i)(^|(?<=[^\w\-]))', + suffix=r'\s*($|(?=[^\w\-]))') tokens = { 'root': [ diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index 2f77158b..632871ba 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -124,7 +124,8 @@ class CFamilyLexer(RegexLexer): (r'\\', String), # stray backslash ], 'macro': [ - (r'(include)(' + _ws1 + ')([^\n]+)', bygroups(Comment.Preproc, Text, Comment.PreprocFile)), + (r'(include)(' + _ws1 + r')([^\n]+)', + bygroups(Comment.Preproc, Text, Comment.PreprocFile)), (r'[^/\n]+', Comment.Preproc), (r'/[*](.|\n)*?[*]/', Comment.Multiline), (r'//.*?\n', Comment.Single, '#pop'), diff --git a/pygments/lexers/configs.py b/pygments/lexers/configs.py index 77c7714d..9cc291e5 100644 --- a/pygments/lexers/configs.py +++ b/pygments/lexers/configs.py @@ -42,7 +42,7 @@ class IniLexer(RegexLexer): bygroups(Name.Attribute, Text, Operator, Text, String)), # standalone option, supported by some INI parsers (r'(.+?)$', Name.Attribute), - ] + ], } def analyse_text(text): @@ -600,7 +600,7 @@ class TerraformLexer(RegexLexer): (r'(".*")', bygroups(String.Double)), ], 'punctuation': [ - (r'[\[\]\(\),.]', Punctuation), + (r'[\[\](),.]', Punctuation), ], # Keep this seperate from punctuation - we sometimes want to use different # Tokens for { } @@ -631,9 +631,8 @@ class TermcapLexer(RegexLexer): .. versionadded:: 2.1 """ name = 'Termcap' - aliases = ['termcap',] - - filenames = ['termcap', 'termcap.src',] + aliases = ['termcap'] + filenames = ['termcap', 'termcap.src'] mimetypes = [] # NOTE: @@ -644,13 +643,13 @@ class TermcapLexer(RegexLexer): tokens = { 'root': [ (r'^#.*$', Comment), - (r'^[^\s#:\|]+', Name.Tag, 'names'), + (r'^[^\s#:|]+', Name.Tag, 'names'), ], 'names': [ (r'\n', Text, '#pop'), (r':', Punctuation, 'defs'), (r'\|', Punctuation), - (r'[^:\|]+', Name.Attribute), + (r'[^:|]+', Name.Attribute), ], 'defs': [ (r'\\\n[ \t]*', Text), @@ -678,9 +677,8 @@ class TerminfoLexer(RegexLexer): .. versionadded:: 2.1 """ name = 'Terminfo' - aliases = ['terminfo',] - - filenames = ['terminfo', 'terminfo.src',] + aliases = ['terminfo'] + filenames = ['terminfo', 'terminfo.src'] mimetypes = [] # NOTE: @@ -691,13 +689,13 @@ class TerminfoLexer(RegexLexer): tokens = { 'root': [ (r'^#.*$', Comment), - (r'^[^\s#,\|]+', Name.Tag, 'names'), + (r'^[^\s#,|]+', Name.Tag, 'names'), ], 'names': [ (r'\n', Text, '#pop'), (r'(,)([ \t]*)', bygroups(Punctuation, Text), 'defs'), (r'\|', Punctuation), - (r'[^,\|]+', Name.Attribute), + (r'[^,|]+', Name.Attribute), ], 'defs': [ (r'\n[ \t]+', Text), @@ -726,8 +724,8 @@ class PkgConfigLexer(RegexLexer): """ name = 'PkgConfig' - aliases = ['pkgconfig',] - filenames = ['*.pc',] + aliases = ['pkgconfig'] + filenames = ['*.pc'] mimetypes = [] tokens = { @@ -793,8 +791,8 @@ class PacmanConfLexer(RegexLexer): """ name = 'PacmanConf' - aliases = ['pacmanconf',] - filenames = ['pacman.conf',] + aliases = ['pacmanconf'] + filenames = ['pacman.conf'] mimetypes = [] tokens = { @@ -822,7 +820,7 @@ class PacmanConfLexer(RegexLexer): '%u', # url ), suffix=r'\b'), Name.Variable), - + # fallback (r'.', Text), ], diff --git a/pygments/lexers/csound.py b/pygments/lexers/csound.py index 51414073..95ee73d8 100644 --- a/pygments/lexers/csound.py +++ b/pygments/lexers/csound.py @@ -9,7 +9,7 @@ :license: BSD, see LICENSE for details. """ -import copy, re +import re from pygments.lexer import RegexLexer, bygroups, default, include, using, words from pygments.token import Comment, Keyword, Name, Number, Operator, Punctuation, \ @@ -21,7 +21,7 @@ from pygments.lexers.scripting import LuaLexer __all__ = ['CsoundScoreLexer', 'CsoundOrchestraLexer', 'CsoundDocumentLexer'] -newline = (r'((?:;|//).*)*(\n)', bygroups(Comment.Single, Text)) +newline = (r'((?:(?:;|//).*)*)(\n)', bygroups(Comment.Single, Text)) class CsoundLexer(RegexLexer): @@ -177,7 +177,7 @@ class CsoundOrchestraLexer(CsoundLexer): (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+', Number.Integer), (r'"', String, 'single-line string'), - (r'{{', String, 'multi-line string'), + (r'\{\{', String, 'multi-line string'), (r'[+\-*/%^!=&|<>#~¬]', Operator), (r'[](),?:[]', Punctuation), (words(( @@ -273,40 +273,40 @@ class CsoundOrchestraLexer(CsoundLexer): (r'[\\"~$%\^\n]', String) ], 'multi-line string': [ - (r'}}', String, '#pop'), - (r'[^\}]+|\}(?!\})', String) + (r'\}\}', String, '#pop'), + (r'[^}]+|\}(?!\})', String) ], 'scoreline opcode': [ include('whitespace or macro call'), - (r'{{', String, 'scoreline'), + (r'\{\{', String, 'scoreline'), default('#pop') ], 'scoreline': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(CsoundScoreLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(CsoundScoreLexer)) ], 'python opcode': [ include('whitespace or macro call'), - (r'{{', String, 'python'), + (r'\{\{', String, 'python'), default('#pop') ], 'python': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(PythonLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(PythonLexer)) ], 'lua opcode': [ include('whitespace or macro call'), (r'"', String, 'single-line string'), - (r'{{', String, 'lua'), + (r'\{\{', String, 'lua'), (r',', Punctuation), default('#pop') ], 'lua': [ - (r'}}', String, '#pop'), - (r'([^\}]+)|\}(?!\})', using(LuaLexer)) + (r'\}\}', String, '#pop'), + (r'([^}]+)|\}(?!\})', using(LuaLexer)) ] } @@ -315,7 +315,7 @@ class CsoundDocumentLexer(RegexLexer): """ For `Csound `_ documents. - + .. versionadded:: 2.1 """ name = 'Csound Document' diff --git a/pygments/lexers/css.py b/pygments/lexers/css.py index b40201f4..6c585dfa 100644 --- a/pygments/lexers/css.py +++ b/pygments/lexers/css.py @@ -476,8 +476,8 @@ class ScssLexer(RegexLexer): (r'@[\w-]+', Keyword, 'selector'), (r'(\$[\w-]*\w)([ \t]*:)', bygroups(Name.Variable, Operator), 'value'), # TODO: broken, and prone to infinite loops. - #(r'(?=[^;{}][;}])', Name.Attribute, 'attr'), - #(r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), + # (r'(?=[^;{}][;}])', Name.Attribute, 'attr'), + # (r'(?=[^;{}:]+:[^a-z])', Name.Attribute, 'attr'), default('selector'), ], @@ -518,7 +518,7 @@ class LessCssLexer(CssLexer): inherit, ], 'content': [ - (r'{', Punctuation, '#push'), + (r'\{', Punctuation, '#push'), inherit, ], } diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py index eac4b5e5..11b4573e 100644 --- a/pygments/lexers/dotnet.py +++ b/pygments/lexers/dotnet.py @@ -11,7 +11,7 @@ import re from pygments.lexer import RegexLexer, DelegatingLexer, bygroups, include, \ - using, this, default + using, this, default, words from pygments.token import Punctuation, \ Text, Comment, Operator, Keyword, Name, String, Number, Literal, Other from pygments.util import get_choice_opt, iteritems @@ -375,8 +375,8 @@ class VbNetLexer(RegexLexer): filenames = ['*.vb', '*.bas'] mimetypes = ['text/x-vbnet', 'text/x-vba'] # (?) - uni_name = '[_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl') + ']' + \ - '[' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', + uni_name = '[_' + uni.combine('Ll', 'Lt', 'Lm', 'Nl') + ']' + \ + '[' + uni.combine('Ll', 'Lt', 'Lm', 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*' flags = re.MULTILINE | re.IGNORECASE @@ -394,25 +394,26 @@ class VbNetLexer(RegexLexer): (r'[(){}!#,.:]', Punctuation), (r'Option\s+(Strict|Explicit|Compare)\s+' r'(On|Off|Binary|Text)', Keyword.Declaration), - (r'(?\[\]]', Punctuation), - (r'[a-zA-Z_](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*', Name), + (r'[:;,{}()<>\[\]]', Punctuation), + (r'[a-zA-Z_](\.\w|\w)*', Name), ], 'whitespace': [ (r'\n', Text.Whitespace), @@ -135,7 +135,7 @@ class ThriftLexer(RegexLexer): (r'[^\\\'\n]+', String.Single), ], 'namespace': [ - (r'[a-z\*](\.[a-zA-Z_0-9]|[a-zA-Z_0-9])*', Name.Namespace, '#pop'), + (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'), default('#pop'), ], 'class': [ diff --git a/pygments/lexers/elm.py b/pygments/lexers/elm.py index 7df6346a..cd1fb98e 100644 --- a/pygments/lexers/elm.py +++ b/pygments/lexers/elm.py @@ -46,7 +46,7 @@ class ElmLexer(RegexLexer): 'root': [ # Comments - (r'{-', Comment.Multiline, 'comment'), + (r'\{-', Comment.Multiline, 'comment'), (r'--.*', Comment.Single), # Whitespace @@ -86,20 +86,20 @@ class ElmLexer(RegexLexer): (validName, Name.Variable), # Parens - (r'[,\(\)\[\]{}]', Punctuation), + (r'[,()\[\]{}]', Punctuation), ], 'comment': [ - (r'-(?!})', Comment.Multiline), - (r'{-', Comment.Multiline, 'comment'), + (r'-(?!\})', Comment.Multiline), + (r'\{-', Comment.Multiline, 'comment'), (r'[^-}]', Comment.Multiline), - (r'-}', Comment.Multiline, '#pop'), + (r'-\}', Comment.Multiline, '#pop'), ], 'doublequote': [ - (r'\\u[0-9a-fA-F]\{4}', String.Escape), - (r'\\[nrfvb\\\"]', String.Escape), + (r'\\u[0-9a-fA-F]{4}', String.Escape), + (r'\\[nrfvb\\"]', String.Escape), (r'[^"]', String), (r'"', String, '#pop'), ], diff --git a/pygments/lexers/erlang.py b/pygments/lexers/erlang.py index 7838b3c5..93ddd2c2 100644 --- a/pygments/lexers/erlang.py +++ b/pygments/lexers/erlang.py @@ -127,7 +127,7 @@ class ErlangLexer(RegexLexer): 'string': [ (escape_re, String.Escape), (r'"', String, '#pop'), - (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), + (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol), (r'[^"\\~]+', String), (r'~', String), ], @@ -240,11 +240,11 @@ class ElixirLexer(RegexLexer): KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') BUILTIN = ( 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', - 'quote', 'unquote', 'unquote_splicing', 'throw', 'super' + 'quote', 'unquote', 'unquote_splicing', 'throw', 'super', ) BUILTIN_DECLARATION = ( 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', - 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback' + 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback', ) BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') @@ -263,7 +263,7 @@ class ElixirLexer(RegexLexer): OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') PUNCTUATION = ( - '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']' + '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']', ) def get_tokens_unprocessed(self, text): diff --git a/pygments/lexers/esoteric.py b/pygments/lexers/esoteric.py index 73ea4a4a..2b17615c 100644 --- a/pygments/lexers/esoteric.py +++ b/pygments/lexers/esoteric.py @@ -90,7 +90,7 @@ class CAmkESLexer(RegexLexer): filenames = ['*.camkes', '*.idl4'] tokens = { - 'root':[ + 'root': [ # C pre-processor directive (r'^\s*#.*\n', Comment.Preproc), @@ -99,7 +99,7 @@ class CAmkESLexer(RegexLexer): (r'/\*(.|\n)*?\*/', Comment), (r'//.*\n', Comment), - (r'[\[\(\){},\.;=\]]', Punctuation), + (r'[\[(){},.;=\]]', Punctuation), (words(('assembly', 'attribute', 'component', 'composition', 'configuration', 'connection', 'connector', 'consumes', diff --git a/pygments/lexers/felix.py b/pygments/lexers/felix.py index b7659769..9631bcc1 100644 --- a/pygments/lexers/felix.py +++ b/pygments/lexers/felix.py @@ -237,7 +237,7 @@ class FelixLexer(RegexLexer): ], 'strings': [ (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), diff --git a/pygments/lexers/fortran.py b/pygments/lexers/fortran.py index 4c22139d..e2f95b11 100644 --- a/pygments/lexers/fortran.py +++ b/pygments/lexers/fortran.py @@ -11,7 +11,7 @@ import re -from pygments.lexer import RegexLexer, bygroups, include, words, using +from pygments.lexer import RegexLexer, bygroups, include, words, using, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic @@ -191,16 +191,15 @@ class FortranFixedLexer(RegexLexer): (r'(.{5})', Name.Label, 'cont-char'), (r'.*\n', using(FortranLexer)), ], - 'cont-char': [ (' ', Text, 'code'), ('0', Comment, 'code'), - ('.', Generic.Strong, 'code') + ('.', Generic.Strong, 'code'), ], - 'code': [ (r'(.{66})(.*)(\n)', bygroups(_lex_fortran, Comment, Text), 'root'), (r'(.*)(\n)', bygroups(_lex_fortran, Text), 'root'), - (r'', Text, 'root')] + default('root'), + ] } diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py index 95e68a33..ffc3a3a2 100644 --- a/pygments/lexers/haskell.py +++ b/pygments/lexers/haskell.py @@ -321,7 +321,7 @@ class AgdaLexer(RegexLexer): 'module': [ (r'\{-', Comment.Multiline, 'comment'), (r'[a-zA-Z][\w.]*', Name, '#pop'), - (r'[^a-zA-Z]+', Text) + (r'[\W0-9_]+', Text) ], 'comment': HaskellLexer.tokens['comment'], 'character': HaskellLexer.tokens['character'], diff --git a/pygments/lexers/idl.py b/pygments/lexers/idl.py index d745bcfd..a0b39492 100644 --- a/pygments/lexers/idl.py +++ b/pygments/lexers/idl.py @@ -258,12 +258,13 @@ class IDLLexer(RegexLexer): (r'\b(mod|lt|le|eq|ne|ge|gt|not|and|or|xor)\b', Operator), (r'"[^\"]*"', String.Double), (r"'[^\']*'", String.Single), - (r'\b[\+\-]?([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(D|E)?([\+\-]?[0-9]+)?\b', Number.Float), - (r'\b\'[\+\-]?[0-9A-F]+\'X(U?(S?|L{1,2})|B)\b', Number.Hex), - (r'\b\'[\+\-]?[0-7]+\'O(U?(S?|L{1,2})|B)\b', Number.Oct), - (r'\b[\+\-]?[0-9]+U?L{1,2}\b', Number.Integer.Long), - (r'\b[\+\-]?[0-9]+U?S?\b', Number.Integer), - (r'\b[\+\-]?[0-9]+B\b', Number), + (r'\b[+\-]?([0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(D|E)?([+\-]?[0-9]+)?\b', + Number.Float), + (r'\b\'[+\-]?[0-9A-F]+\'X(U?(S?|L{1,2})|B)\b', Number.Hex), + (r'\b\'[+\-]?[0-7]+\'O(U?(S?|L{1,2})|B)\b', Number.Oct), + (r'\b[+\-]?[0-9]+U?L{1,2}\b', Number.Integer.Long), + (r'\b[+\-]?[0-9]+U?S?\b', Number.Integer), + (r'\b[+\-]?[0-9]+B\b', Number), (r'.', Text), ] } diff --git a/pygments/lexers/j.py b/pygments/lexers/j.py index 278374e5..f15595f8 100644 --- a/pygments/lexers/j.py +++ b/pygments/lexers/j.py @@ -48,7 +48,7 @@ class JLexer(RegexLexer): # Definitions (r'0\s+:\s*0|noun\s+define\s*$', Name.Entity, 'nounDefinition'), - (r'\b(([1-4]|13)\s+:\s*0)|((adverb|conjunction|dyad|monad|verb)\s+define)\b', + (r'(([1-4]|13)\s+:\s*0|(adverb|conjunction|dyad|monad|verb)\s+define)\b', Name.Function, 'explicitDefinition'), # Flow Control diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py index 2a01cd42..5dca6832 100644 --- a/pygments/lexers/javascript.py +++ b/pygments/lexers/javascript.py @@ -97,13 +97,13 @@ class JavascriptLexer(RegexLexer): (r'`', String.Backtick, '#pop'), (r'\\\\', String.Backtick), (r'\\`', String.Backtick), - (r'\${', String.Interpol, 'interp-inside'), + (r'\$\{', String.Interpol, 'interp-inside'), (r'\$', String.Backtick), (r'[^`\\$]+', String.Backtick), ], 'interp-inside': [ # TODO: should this include single-line comments and allow nesting strings? - (r'}', String.Interpol, '#pop'), + (r'\}', String.Interpol, '#pop'), include('root'), ], # (\\\\|\\`|[^`])*`', String.Backtick), @@ -1245,32 +1245,32 @@ class EarlGreyLexer(RegexLexer): include('control'), (r'[^\S\n]+', Text), (r';;.*\n', Comment), - (r'[\[\]\{\}\:\(\)\,\;]', Punctuation), + (r'[\[\]{}:(),;]', Punctuation), (r'\\\n', Text), (r'\\', Text), include('errors'), (words(( 'with', 'where', 'when', 'and', 'not', 'or', 'in', 'as', 'of', 'is'), - prefix=r'(?<=\s|\[)', suffix=r'(?![\w\$\-])'), + prefix=r'(?<=\s|\[)', suffix=r'(?![\w$\-])'), Operator.Word), - (r'[\*@]?->', Name.Function), + (r'[*@]?->', Name.Function), (r'[+\-*/~^<>%&|?!@#.]*=', Operator.Word), (r'\.{2,3}', Operator.Word), # Range Operator (r'([+*/~^<>&|?!]+)|([#\-](?=\s))|@@+(?=\s)|=+', Operator), - (r'(?%&|?!@#.]*\=\s)', bygroups(Punctuation, Name.Variable)) ], 'errors': [ (words(('Error', 'TypeError', 'ReferenceError'), - prefix=r'(?%&|?!@#.])?[a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)''', + (?=(?:[+\-*/~^<>%&|?!@#.])?[a-zA-Z$_](?:[\w$-]*[\w$])?)''', Keyword.Control), - (r'([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(?!\n)\s+(?=[\'"\d\{\[\(])', + (r'([a-zA-Z$_](?:[\w$-]*[\w$])?)(?!\n)\s+(?=[\'"\d{\[(])', Keyword.Control), (r'''(?x) (?: @@ -1324,28 +1324,28 @@ class EarlGreyLexer(RegexLexer): (?<=with|each|with)| (?<=each\*|where) )(\s+) - ([a-zA-Z$_](?:[a-zA-Z$0-9_\-]*[a-zA-Z$0-9_])?)(:)''', + ([a-zA-Z$_](?:[\w$-]*[\w$])?)(:)''', bygroups(Text, Keyword.Control, Punctuation)), (r'''(?x) (?%&|?!@#.])(\s+) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?)(:)''', + ([a-zA-Z$_](?:[\w$-]*[\w$])?)(:)''', bygroups(Text, Keyword.Control, Punctuation)), ], 'nested': [ (r'''(?x) - (?<=[a-zA-Z$0-9_\]\}\)])(\.) - ([a-zA-Z$_](?:[a-zA-Z$0-9_-]*[a-zA-Z$0-9_])?) + (?<=[\w$\]})])(\.) + ([a-zA-Z$_](?:[\w$-]*[\w$])?) (?=\s+with(?:\s|\n))''', bygroups(Punctuation, Name.Function)), (r'''(?x) (?=|->|&&|\|\||::|<:|[-~+/*%=<>&^|.?!$]', Operator), (r'\.\*|\.\^|\.\\|\.\/|\\', Operator), # builtins - ('(' + '|'.join(builtins) + r')\b', Name.Builtin), + (words(builtins, suffix=r'\b'), Name.Builtin), # backticks (r'`(?s).*?`', String.Backtick), @@ -116,12 +127,12 @@ class JuliaLexer(RegexLexer): ], 'typename': [ - ('[a-zA-Z_]\w*', Name.Class, '#pop') + ('[a-zA-Z_]\w*', Name.Class, '#pop'), ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|' - r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) + r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape), ], "blockcomment": [ (r'[^=#]', Comment.Multiline), @@ -138,7 +149,7 @@ class JuliaLexer(RegexLexer): (r'\$[a-zA-Z_]+', String.Interpol), (r'\$\(', String.Interpol, 'in-intp'), # @printf and @sprintf formats - (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[diouxXeEfFgGcrs%]', + (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^$%"\\]+', String), # unhandled special signs @@ -155,9 +166,6 @@ class JuliaLexer(RegexLexer): return shebang_matches(text, r'julia') -line_re = re.compile('.*?\n') - - class JuliaConsoleLexer(Lexer): """ For Julia console sessions. Modeled after MatlabSessionLexer. diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 41fc0fdb..af7f8105 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -564,14 +564,14 @@ class IokeLexer(RegexLexer): ], 'slashRegexp': [ - (r'(? >= <= <-address <-vector abort absvector - absvector? address-> adjoin append arity assoc bind boolean? - bound? call cd close cn compile concat cons cons? cut destroy - difference element? empty? enable-type-theory error-to-string - eval eval-kl exception explode external fail fail-if file - findall fix fst fwhen gensym get-time hash hd hdstr hdv head - identical implementation in include include-all-but inferences - input input+ integer? intern intersection is kill language - length limit lineread loaded macro macroexpand map mapcan - maxinferences mode n->string nl nth null number? occurrences - occurs-check open os out port porters pos pr preclude - preclude-all-but print profile profile-results ps quit read - read+ read-byte read-file read-file-as-bytelist - read-file-as-string read-from-string release remove return - reverse run save set simple-error snd specialise spy step - stinput stoutput str string->n string->symbol string? subst - symbol? systemf tail tc tc? thaw tl tlstr tlv track tuple? - undefmacro unify unify! union unprofile unspecialise untrack - variable? vector vector-> vector? verified version warn when - write-byte write-to-file y-or-n? - """) - - BUILTINS_ANYWHERE = re.findall(r'\S+', """ - where skip >> _ ! - """) + DECLARATIONS = ( + 'datatype', 'define', 'defmacro', 'defprolog', 'defcc', + 'synonyms', 'declare', 'package', 'type', 'function', + ) + + SPECIAL_FORMS = ( + 'lambda', 'get', 'let', 'if', 'cases', 'cond', 'put', 'time', 'freeze', + 'value', 'load', '$', 'protect', 'or', 'and', 'not', 'do', 'output', + 'prolog?', 'trap-error', 'error', 'make-string', '/.', 'set', '@p', + '@s', '@v', + ) + + BUILTINS = ( + '==', '=', '*', '+', '-', '/', '<', '>', '>=', '<=', '<-address', + '<-vector', 'abort', 'absvector', 'absvector?', 'address->', 'adjoin', + 'append', 'arity', 'assoc', 'bind', 'boolean?', 'bound?', 'call', 'cd', + 'close', 'cn', 'compile', 'concat', 'cons', 'cons?', 'cut', 'destroy', + 'difference', 'element?', 'empty?', 'enable-type-theory', + 'error-to-string', 'eval', 'eval-kl', 'exception', 'explode', 'external', + 'fail', 'fail-if', 'file', 'findall', 'fix', 'fst', 'fwhen', 'gensym', + 'get-time', 'hash', 'hd', 'hdstr', 'hdv', 'head', 'identical', + 'implementation', 'in', 'include', 'include-all-but', 'inferences', + 'input', 'input+', 'integer?', 'intern', 'intersection', 'is', 'kill', + 'language', 'length', 'limit', 'lineread', 'loaded', 'macro', 'macroexpand', + 'map', 'mapcan', 'maxinferences', 'mode', 'n->string', 'nl', 'nth', 'null', + 'number?', 'occurrences', 'occurs-check', 'open', 'os', 'out', 'port', + 'porters', 'pos', 'pr', 'preclude', 'preclude-all-but', 'print', 'profile', + 'profile-results', 'ps', 'quit', 'read', 'read+', 'read-byte', 'read-file', + 'read-file-as-bytelist', 'read-file-as-string', 'read-from-string', + 'release', 'remove', 'return', 'reverse', 'run', 'save', 'set', + 'simple-error', 'snd', 'specialise', 'spy', 'step', 'stinput', 'stoutput', + 'str', 'string->n', 'string->symbol', 'string?', 'subst', 'symbol?', + 'systemf', 'tail', 'tc', 'tc?', 'thaw', 'tl', 'tlstr', 'tlv', 'track', + 'tuple?', 'undefmacro', 'unify', 'unify!', 'union', 'unprofile', + 'unspecialise', 'untrack', 'variable?', 'vector', 'vector->', 'vector?', + 'verified', 'version', 'warn', 'when', 'write-byte', 'write-to-file', + 'y-or-n?', + ) + + BUILTINS_ANYWHERE = ('where', 'skip', '>>', '_', '!', '', '') MAPPINGS = dict((s, Keyword) for s in DECLARATIONS) MAPPINGS.update((s, Name.Builtin) for s in BUILTINS) MAPPINGS.update((s, Keyword) for s in SPECIAL_FORMS) - valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:_-]' + valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:-]' valid_name = '%s+' % valid_symbol_chars symbol_name = r'[a-z!$%%*+,<=>?/.\'@&#_-]%s*' % valid_symbol_chars variable = r'[A-Z]%s*' % valid_symbol_chars @@ -2313,7 +2316,7 @@ class CPSALexer(SchemeLexer): # valid names for identifiers # well, names can only not consist fully of numbers # but this should be good enough for now - valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+' + valid_name = r'[\w!$%&*+,/:<=>?@^~|-]+' tokens = { 'root': [ @@ -2334,7 +2337,7 @@ class CPSALexer(SchemeLexer): # strings, symbols and characters (r'"(\\\\|\\"|[^"])*"', String), (r"'" + valid_name, String.Symbol), - (r"#\\([()/'\"._!§$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), + (r"#\\([()/'\"._!§$%& ?=+-]|[a-zA-Z0-9]+)", String.Char), # constants (r'(#t|#f)', Name.Constant), diff --git a/pygments/lexers/markup.py b/pygments/lexers/markup.py index aac8d27e..02146597 100644 --- a/pygments/lexers/markup.py +++ b/pygments/lexers/markup.py @@ -204,7 +204,7 @@ class RstLexer(RegexLexer): bygroups(Text, Operator, using(this, state='inline'))), # Sourcecode directives (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' - r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', + r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', _handle_sourcecode), # A directive (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', @@ -230,7 +230,7 @@ class RstLexer(RegexLexer): (r'^(\S.*(?|<=>|\*\*=|<\|=|<<=|>>=|==|!=|=\?|<=|>=|' - r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\||\|=|/=|\+|-|\*|/|' + r'\*\*|<<|>>|=>|:=|\+=|-=|\*=|\|=|\||/=|\+|-|\*|/|' r'\.\.|<\.\.|\.\.<|<\.\.<)', Operator), (r'(<|>|\[|\]|\(|\)|\||:|;|,|.|\{|\}|->)', diff --git a/pygments/lexers/praat.py b/pygments/lexers/praat.py index 776c38b8..9255216d 100644 --- a/pygments/lexers/praat.py +++ b/pygments/lexers/praat.py @@ -27,21 +27,21 @@ class PraatLexer(RegexLexer): aliases = ['praat'] filenames = ['*.praat', '*.proc', '*.psc'] - keywords = [ + keywords = ( 'if', 'then', 'else', 'elsif', 'elif', 'endif', 'fi', 'for', 'from', 'to', 'endfor', 'endproc', 'while', 'endwhile', 'repeat', 'until', 'select', 'plus', 'minus', 'demo', 'assert', 'stopwatch', 'nocheck', 'nowarn', 'noprogress', 'editor', 'endeditor', 'clearinfo', - ] + ) - functions_string = [ + functions_string = ( 'backslashTrigraphsToUnicode', 'chooseDirectory', 'chooseReadFile', 'chooseWriteFile', 'date', 'demoKey', 'do', 'environment', 'extractLine', 'extractWord', 'fixed', 'info', 'left', 'mid', 'percent', 'readFile', 'replace', 'replace_regex', 'right', 'selected', 'string', 'unicodeToBackslashTrigraphs', - ] + ) - functions_numeric = [ + functions_numeric = ( 'abs', 'appendFile', 'appendFileLine', 'appendInfo', 'appendInfoLine', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'barkToHertz', 'beginPause', 'beginSendPraat', 'besselI', 'besselK', 'beta', 'beta2', @@ -67,13 +67,13 @@ class PraatLexer(RegexLexer): 'sincpi', 'sinh', 'soundPressureToPhon', 'sqrt', 'startsWith', 'studentP', 'studentQ', 'tan', 'tanh', 'variableExists', 'word', 'writeFile', 'writeFileLine', 'writeInfo', 'writeInfoLine', - ] + ) - functions_array = [ + functions_array = ( 'linear', 'randomGauss', 'randomInteger', 'randomUniform', 'zero', - ] + ) - objects = [ + objects = ( 'Activation', 'AffineTransform', 'AmplitudeTier', 'Art', 'Artword', 'Autosegment', 'BarkFilter', 'BarkSpectrogram', 'CCA', 'Categories', 'Cepstrogram', 'Cepstrum', 'Cepstrumc', 'ChebyshevSeries', 'ClassificationTable', @@ -100,17 +100,17 @@ class PraatLexer(RegexLexer): 'Strings', 'StringsIndex', 'Table', 'TableOfReal', 'TextGrid', 'TextInterval', 'TextPoint', 'TextTier', 'Tier', 'Transition', 'VocalTract', 'VocalTractTier', 'Weight', 'WordList', - ] + ) - variables_numeric = [ + variables_numeric = ( 'macintosh', 'windows', 'unix', 'praatVersion', 'pi', 'e', 'undefined', - ] + ) - variables_string = [ + variables_string = ( 'praatVersion', 'tab', 'shellDirectory', 'homeDirectory', 'preferencesDirectory', 'newline', 'temporaryDirectory', 'defaultDirectory', - ] + ) tokens = { 'root': [ @@ -151,7 +151,7 @@ class PraatLexer(RegexLexer): (r"'(?=.*')", String.Interpol, 'string_interpolated'), (r'\.{3}', Keyword, ('#pop', 'old_arguments')), (r':', Keyword, ('#pop', 'comma_list')), - (r'[\s\n]', Text, '#pop'), + (r'\s', Text, '#pop'), ], 'procedure_call': [ (r'\s+', Text), @@ -230,7 +230,7 @@ class PraatLexer(RegexLexer): bygroups(Name.Builtin, Name.Builtin, String.Interpol), ('object_attributes', 'string_interpolated')), - (r'\.?_?[a-z][a-zA-Z0-9_.]*(\$|#)?', Text), + (r'\.?_?[a-z][\w.]*(\$|#)?', Text), (r'[\[\]]', Punctuation, 'comma_list'), (r"'(?=.*')", String.Interpol, 'string_interpolated'), ], @@ -239,7 +239,7 @@ class PraatLexer(RegexLexer): (r'\b(and|or|not|div|mod)\b', Operator.Word), ], 'string_interpolated': [ - (r'\.?[_a-z][a-zA-Z0-9_.]*[\$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?', + (r'\.?[_a-z][\w.]*[$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?', String.Interpol), (r"'", String.Interpol, '#pop'), ], diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index f483071b..7601afa8 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -39,7 +39,7 @@ class PythonLexer(RegexLexer): return [ # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), @@ -51,8 +51,10 @@ class PythonLexer(RegexLexer): tokens = { 'root': [ (r'\n', Text), - (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), - (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), + (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', + bygroups(Text, String.Affix, String.Doc)), + (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", + bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), @@ -72,14 +74,22 @@ class PythonLexer(RegexLexer): include('magicfuncs'), include('magicvars'), include('backtick'), - ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), - ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), - ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), - ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), - ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), - ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), - ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), - ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), + ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', + bygroups(String.Affix, String.Double), 'tdqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", + bygroups(String.Affix, String.Single), 'tsqs'), + ('([rR]|[uUbB][rR]|[rR][uUbB])(")', + bygroups(String.Affix, String.Double), 'dqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(')", + bygroups(String.Affix, String.Single), 'sqs'), + ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), + combined('stringescape', 'tdqs')), + ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), + combined('stringescape', 'tsqs')), + ('([uUbB]?)(")', bygroups(String.Affix, String.Double), + combined('stringescape', 'dqs')), + ("([uUbB]?)(')", bygroups(String.Affix, String.Single), + combined('stringescape', 'sqs')), include('name'), include('numbers'), ], @@ -252,16 +262,16 @@ class Python3Lexer(RegexLexer): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), # the new style '{}'.format(...) string formatting (r'\{' - '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name - '(\![sra])?' # conversion - '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[bcdeEfFgGnosxX%]?)?' + '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name + '(\![sra])?' # conversion + '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' '\}', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time - (r'[^\\\'"%\{\n]+', ttype), + (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) @@ -700,7 +710,7 @@ class CythonLexer(RegexLexer): ], 'strings': [ (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' - '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), + '[hlL]?[E-GXc-giorsux%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), @@ -771,18 +781,20 @@ class DgLexer(RegexLexer): (words(( 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'', 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object', - 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', 'super', - 'tuple', 'tuple\'', 'type'), prefix=r'(?>)\b', Keyword), # stereotypes - (r'!=|<>|=|==|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator), + (r'(:{1,2}=|[-+]=)\b', Operator.Word), + (r'(@|<<|>>)\b', Keyword), # stereotypes + (r'!=|<>|==|=|!->|->|>=|<=|[.]{3}|[+/*%=<>&|.~]', Operator), (r'[]{}:(),;[]', Punctuation), (r'(true|false|unlimited|null)\b', Keyword.Constant), (r'(this|self|result)\b', Name.Builtin.Pseudo), (r'(var)\b', Keyword.Declaration), (r'(from|import)\b', Keyword.Namespace, 'fromimport'), - (r'(metamodel|class|exception|primitive|enum|transformation|library)(\s+)([a-zA-Z0-9_]+)', + (r'(metamodel|class|exception|primitive|enum|transformation|' + r'library)(\s+)(\w+)', bygroups(Keyword.Word, Text, Name.Class)), - (r'(exception)(\s+)([a-zA-Z0-9_]+)', bygroups(Keyword.Word, Text, Name.Exception)), + (r'(exception)(\s+)(\w+)', + bygroups(Keyword.Word, Text, Name.Exception)), (r'(main)\b', Name.Function), - (r'(mapping|helper|query)(\s+)', bygroups(Keyword.Declaration, Text), 'operation'), + (r'(mapping|helper|query)(\s+)', + bygroups(Keyword.Declaration, Text), 'operation'), (r'(assert)(\s+)\b', bygroups(Keyword, Text), 'assert'), (r'(Bag|Collection|Dict|OrderedSet|Sequence|Set|Tuple|List)\b', Keyword.Type), @@ -75,46 +79,45 @@ class QVToLexer(RegexLexer): ("'", String, combined('stringescape', 'sqs')), include('name'), include('numbers'), - # (r'([a-zA-Z_][a-zA-Z0-9_]*)(::)([a-zA-Z_][a-zA-Z0-9_]*)', + # (r'([a-zA-Z_]\w*)(::)([a-zA-Z_]\w*)', # bygroups(Text, Text, Text)), - ], + ], 'fromimport': [ (r'(?:[ \t]|\\\n)+', Text), - (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace), - (r'', Text, '#pop'), - ], + (r'[a-zA-Z_][\w.]*', Name.Namespace), + default('#pop'), + ], 'operation': [ (r'::', Text), - (r'(.*::)([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*(\()', bygroups(Text,Name.Function, Text), '#pop') - ], + (r'(.*::)([a-zA-Z_]\w*)([ \t]*)(\()', + bygroups(Text, Name.Function, Text, Punctuation), '#pop') + ], 'assert': [ (r'(warning|error|fatal)\b', Keyword, '#pop'), - (r'', Text, '#pop') # all else: go back - ], + default('#pop'), # all else: go back + ], 'keywords': [ - (r'(abstract|access|any|assert|' - r'blackbox|break|case|collect|collectNested|' - r'collectOne|collectselect|collectselectOne|composes|' - r'compute|configuration|constructor|continue|datatype|' - r'default|derived|disjuncts|do|elif|else|end|' - r'endif|except|exists|extends|' - r'forAll|forEach|forOne|from|if|' - r'implies|in|inherits|init|inout|' - r'intermediate|invresolve|invresolveIn|invresolveone|' - r'invresolveoneIn|isUnique|iterate|late|let|' - r'literal|log|map|merges|' - r'modeltype|new|object|one|' - r'ordered|out|package|population|' - r'property|raise|readonly|references|refines|' - r'reject|resolve|resolveIn|resolveone|resolveoneIn|' - r'return|select|selectOne|sortedBy|static|switch|' - r'tag|then|try|typedef|' - r'unlimited|uses|when|where|while|with|' - r'xcollect|xmap|xselect)\b', Keyword), + (words(( + 'abstract', 'access', 'any', 'assert', 'blackbox', 'break', + 'case', 'collect', 'collectNested', 'collectOne', 'collectselect', + 'collectselectOne', 'composes', 'compute', 'configuration', + 'constructor', 'continue', 'datatype', 'default', 'derived', + 'disjuncts', 'do', 'elif', 'else', 'end', 'endif', 'except', + 'exists', 'extends', 'forAll', 'forEach', 'forOne', 'from', 'if', + 'implies', 'in', 'inherits', 'init', 'inout', 'intermediate', + 'invresolve', 'invresolveIn', 'invresolveone', 'invresolveoneIn', + 'isUnique', 'iterate', 'late', 'let', 'literal', 'log', 'map', + 'merges', 'modeltype', 'new', 'object', 'one', 'ordered', 'out', + 'package', 'population', 'property', 'raise', 'readonly', + 'references', 'refines', 'reject', 'resolve', 'resolveIn', + 'resolveone', 'resolveoneIn', 'return', 'select', 'selectOne', + 'sortedBy', 'static', 'switch', 'tag', 'then', 'try', 'typedef', + 'unlimited', 'uses', 'when', 'where', 'while', 'with', 'xcollect', + 'xmap', 'xselect'), suffix=r'\b'), Keyword), ], # There is no need to distinguish between String.Single and @@ -127,18 +130,18 @@ class QVToLexer(RegexLexer): 'stringescape': [ (r'\\([\\btnfr"\']|u[0-3][0-7]{2}|u[0-7]{1,2})', String.Escape) ], - 'dqs': [ # double-quoted string + 'dqs': [ # double-quoted string (r'"', String, '#pop'), (r'\\\\|\\"', String.Escape), include('strings') ], - 'sqs': [ # single-quoted string + 'sqs': [ # single-quoted string (r"'", String, '#pop'), (r"\\\\|\\'", String.Escape), include('strings') ], 'name': [ - ('[a-zA-Z_][a-zA-Z0-9_]*', Name), + ('[a-zA-Z_]\w*', Name), ], # numbers: excerpt taken from the python lexer 'numbers': [ @@ -146,5 +149,4 @@ class QVToLexer(RegexLexer): (r'\d+[eE][+-]?[0-9]+', Number.Float), (r'\d+', Number.Integer) ], - } - + } diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py index 103b4ad0..6dd6e8b9 100644 --- a/pygments/lexers/rdf.py +++ b/pygments/lexers/rdf.py @@ -42,8 +42,7 @@ class SparqlLexer(RegexLexer): u'\u2c00-\u2fef' u'\u3001-\ud7ff' u'\uf900-\ufdcf' - u'\ufdf0-\ufffd' - u'\U00010000-\U000effff') + u'\ufdf0-\ufffd') PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') @@ -56,7 +55,7 @@ class SparqlLexer(RegexLexer): HEX_GRP = '0-9A-Fa-f' - PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&""()*+,;=/?#@%' + PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' # terminal productions :: @@ -191,7 +190,7 @@ class TurtleLexer(RegexLexer): flags = re.IGNORECASE patterns = { - 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range + 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' } @@ -258,8 +257,7 @@ class TurtleLexer(RegexLexer): (r'.', String, '#pop'), ], 'end-of-string': [ - - (r'(@)([a-zA-Z]+(:?-[a-zA-Z0-9]+)*)', + (r'(@)([a-z]+(:?-[a-z0-9]+)*)', bygroups(Operator, Generic.Emph), '#pop:2'), (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), diff --git a/pygments/lexers/scripting.py b/pygments/lexers/scripting.py index 4dd9594b..ac0f7533 100644 --- a/pygments/lexers/scripting.py +++ b/pygments/lexers/scripting.py @@ -1020,11 +1020,11 @@ class EasytrieveLexer(RegexLexer): (r"'(''|[^'])*'", String), (r'\s+', Whitespace), # Everything else just belongs to a name - (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name), ], 'after_declaration': [ (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name.Function), - ('', Whitespace, '#pop') + default('#pop'), ], 'after_macro_argument': [ (r'\*.*\n', Comment.Single, '#pop'), @@ -1032,7 +1032,7 @@ class EasytrieveLexer(RegexLexer): (_OPERATORS_PATTERN, Operator, '#pop'), (r"'(''|[^'])*'", String, '#pop'), # Everything else just belongs to a name - (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name) + (_NON_DELIMITER_OR_COMMENT_PATTERN + r'+', Name), ], } _COMMENT_LINE_REGEX = re.compile(r'^\s*\*') @@ -1122,7 +1122,8 @@ class EasytrieveLexer(RegexLexer): class JclLexer(RegexLexer): """ - `Job Control Language (JCL) `_ + `Job Control Language (JCL) + `_ is a scripting language used on mainframe platforms to instruct the system on how to run a batch job or start a subsystem. It is somewhat comparable to MS DOS batch and Unix shell scripts. @@ -1145,10 +1146,10 @@ class JclLexer(RegexLexer): ], 'statement': [ (r'\s*\n', Whitespace, '#pop'), - (r'([a-z][a-z_0-9]*)(\s+)(exec|job)(\s*)', + (r'([a-z]\w*)(\s+)(exec|job)(\s*)', bygroups(Name.Label, Whitespace, Keyword.Reserved, Whitespace), 'option'), - (r'[a-z][a-z_0-9]*', Name.Variable, 'statement_command'), + (r'[a-z]\w*', Name.Variable, 'statement_command'), (r'\s+', Whitespace, 'statement_command'), ], 'statement_command': [ @@ -1167,10 +1168,10 @@ class JclLexer(RegexLexer): (r'\*', Name.Builtin), (r'[\[\](){}<>;,]', Punctuation), (r'[-+*/=&%]', Operator), - (r'[a-z_][a-z_0-9]*', Name), - (r'[0-9]+\.[0-9]*', Number.Float), - (r'\.[0-9]+', Number.Float), - (r'[0-9]+', Number.Integer), + (r'[a-z_]\w*', Name), + (r'\d+\.\d*', Number.Float), + (r'\.\d+', Number.Float), + (r'\d+', Number.Integer), (r"'", String, 'option_string'), (r'[ \t]+', Whitespace, 'option_comment'), (r'\.', Punctuation), diff --git a/pygments/lexers/supercollider.py b/pygments/lexers/supercollider.py index d3e4c460..cef147b8 100644 --- a/pygments/lexers/supercollider.py +++ b/pygments/lexers/supercollider.py @@ -11,7 +11,7 @@ import re -from pygments.lexer import RegexLexer, include, words +from pygments.lexer import RegexLexer, include, words, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation @@ -43,7 +43,7 @@ class SuperColliderLexer(RegexLexer): (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/' r'([gim]+\b|\B)', String.Regex, '#pop'), (r'(?=/)', Text, ('#pop', 'badregex')), - (r'', Text, '#pop') + default('#pop'), ], 'badregex': [ (r'\n', Text, '#pop') @@ -79,8 +79,8 @@ class SuperColliderLexer(RegexLexer): 'thisFunctionDef', 'thisFunction', 'thisMethod', 'thisProcess', 'thisThread', 'this'), suffix=r'\b'), Name.Builtin), - (r'[$a-zA-Z_][a-zA-Z0-9_]*', Name.Other), - (r'\\?[$a-zA-Z_][a-zA-Z0-9_]*', String.Symbol), + (r'[$a-zA-Z_]\w*', Name.Other), + (r'\\?[$a-zA-Z_]\w*', String.Symbol), (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), (r'0x[0-9a-fA-F]+', Number.Hex), (r'[0-9]+', Number.Integer), diff --git a/pygments/lexers/testing.py b/pygments/lexers/testing.py index 0bdebe74..be8b6f71 100644 --- a/pygments/lexers/testing.py +++ b/pygments/lexers/testing.py @@ -147,7 +147,7 @@ class TAPLexer(RegexLexer): (r'^TAP version \d+\n', Name.Namespace), # Specify a plan with a plan line. - (r'^1..\d+', Keyword.Declaration, 'plan'), + (r'^1\.\.\d+', Keyword.Declaration, 'plan'), # A test failure (r'^(not ok)([^\S\n]*)(\d*)', diff --git a/pygments/lexers/theorem.py b/pygments/lexers/theorem.py index 60a101cc..f8c7d0a9 100644 --- a/pygments/lexers/theorem.py +++ b/pygments/lexers/theorem.py @@ -390,20 +390,23 @@ class LeanLexer(RegexLexer): flags = re.MULTILINE | re.UNICODE - keywords1 = ('import', 'abbreviation', 'opaque_hint', 'tactic_hint', 'definition', 'renaming', - 'inline', 'hiding', 'exposing', 'parameter', 'parameters', 'conjecture', - 'hypothesis', 'lemma', 'corollary', 'variable', 'variables', 'print', 'theorem', - 'axiom', 'inductive', 'structure', 'universe', 'alias', 'help', - 'options', 'precedence', 'postfix', 'prefix', 'calc_trans', 'calc_subst', 'calc_refl', - 'infix', 'infixl', 'infixr', 'notation', 'eval', 'check', 'exit', 'coercion', 'end', - 'private', 'using', 'namespace', 'including', 'instance', 'section', 'context', - 'protected', 'expose', 'export', 'set_option', 'add_rewrite', 'extends', - 'open', 'example', 'constant', 'constants', 'print', 'opaque', 'reducible', 'irreducible' + keywords1 = ( + 'import', 'abbreviation', 'opaque_hint', 'tactic_hint', 'definition', + 'renaming', 'inline', 'hiding', 'exposing', 'parameter', 'parameters', + 'conjecture', 'hypothesis', 'lemma', 'corollary', 'variable', 'variables', + 'print', 'theorem', 'axiom', 'inductive', 'structure', 'universe', 'alias', + 'help', 'options', 'precedence', 'postfix', 'prefix', 'calc_trans', + 'calc_subst', 'calc_refl', 'infix', 'infixl', 'infixr', 'notation', 'eval', + 'check', 'exit', 'coercion', 'end', 'private', 'using', 'namespace', + 'including', 'instance', 'section', 'context', 'protected', 'expose', + 'export', 'set_option', 'add_rewrite', 'extends', 'open', 'example', + 'constant', 'constants', 'print', 'opaque', 'reducible', 'irreducible', ) keywords2 = ( - 'forall', 'fun', 'Pi', 'obtain', 'from', 'have', 'show', 'assume', 'take', - 'let', 'if', 'else', 'then', 'by', 'in', 'with', 'begin', 'proof', 'qed', 'calc', 'match' + 'forall', 'fun', 'Pi', 'obtain', 'from', 'have', 'show', 'assume', + 'take', 'let', 'if', 'else', 'then', 'by', 'in', 'with', 'begin', + 'proof', 'qed', 'calc', 'match', ) keywords3 = ( @@ -414,10 +417,10 @@ class LeanLexer(RegexLexer): operators = ( '!=', '#', '&', '&&', '*', '+', '-', '/', '@', '!', '`', '-.', '->', '.', '..', '...', '::', ':>', ';', ';;', '<', - '<-', '=', '==', '>', '_', '`', '|', '||', '~', '=>', '<=', '>=', + '<-', '=', '==', '>', '_', '|', '||', '~', '=>', '<=', '>=', '/\\', '\\/', u'∀', u'Π', u'λ', u'↔', u'∧', u'∨', u'≠', u'≤', u'≥', - u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', u'⌟', u'≡', - u'⟨', u'⟩' + u'¬', u'⁻¹', u'⬝', u'▸', u'→', u'∃', u'ℕ', u'ℤ', u'≈', u'×', u'⌞', + u'⌟', u'≡', u'⟨', u'⟩', ) punctuation = ('(', ')', ':', '{', '}', '[', ']', u'⦃', u'⦄', ':=', ',') diff --git a/pygments/lexers/typoscript.py b/pygments/lexers/typoscript.py index 25bfef9c..407847ed 100644 --- a/pygments/lexers/typoscript.py +++ b/pygments/lexers/typoscript.py @@ -44,11 +44,11 @@ class TypoScriptCssDataLexer(RegexLexer): # marker: ###MARK### (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)), # constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # constant: {register:somevalue} - (r'(.*)(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})(.*)', + (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)', bygroups(String, String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol, String)), # constant # whitespace @@ -58,8 +58,8 @@ class TypoScriptCssDataLexer(RegexLexer): (r'(?,:=\.\*%+\|]', String), - (r'[\w"_\-!\/&;\(\)\{\}]+', String), + (r'[<>,:=.*%+|]', String), + (r'[\w"\-!/&;(){}]+', String), ] } @@ -79,22 +79,22 @@ class TypoScriptHtmlDataLexer(RegexLexer): # INCLUDE_TYPOSCRIPT (r'(INCLUDE_TYPOSCRIPT)', Name.Class), # Language label or extension resource FILE:... or LLL:... or EXT:... - (r'(EXT|FILE|LLL):[^\}\n"]*', String), + (r'(EXT|FILE|LLL):[^}\n"]*', String), # marker: ###MARK### (r'(.*)(###\w+###)(.*)', bygroups(String, Name.Constant, String)), # constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # constant: {register:somevalue} - (r'(.*)(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})(.*)', + (r'(.*)(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})(.*)', bygroups(String, String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol, String)), # constant # whitespace (r'\s+', Text), # other - (r'[<>,:=\.\*%+\|]', String), - (r'[\w"_\-!\/&;\(\)\{\}#]+', String), + (r'[<>,:=.*%+|]', String), + (r'[\w"\-!/&;(){}#]+', String), ] } @@ -138,38 +138,38 @@ class TypoScriptLexer(RegexLexer): r'version)([^\]]*)(\])', bygroups(String.Symbol, Name.Constant, Text, String.Symbol)), # Functions - (r'(?=[\w\-_])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|' + (r'(?=[\w\-])(HTMLparser|HTMLparser_tags|addParams|cache|encapsLines|' r'filelink|if|imageLinkWrap|imgResource|makelinks|numRows|numberFormat|' r'parseFunc|replacement|round|select|split|stdWrap|strPad|tableStyle|' - r'tags|textStyle|typolink)(?![\w\-_])', Name.Function), + r'tags|textStyle|typolink)(?![\w\-])', Name.Function), # Toplevel objects and _* (r'(?:(=?\s*]*>', using(TypoScriptHtmlDataLexer)), + (r'<\S[^\n>]*>', using(TypoScriptHtmlDataLexer)), (r'&[^;\n]*;', String), (r'(_CSS_DEFAULT_STYLE)(\s*)(\()(?s)(.*(?=\n\)))', bygroups(Name.Class, Text, String.Symbol, using(TypoScriptCssDataLexer))), @@ -182,28 +182,28 @@ class TypoScriptLexer(RegexLexer): ], 'label': [ # Language label or extension resource FILE:... or LLL:... or EXT:... - (r'(EXT|FILE|LLL):[^\}\n"]*', String), + (r'(EXT|FILE|LLL):[^}\n"]*', String), # Path to a resource - (r'(?![^\w\-_])([\w\-_]+(?:/[\w\-_]+)+/?)([^\s]*\n)', + (r'(?![^\w\-])([\w\-]+(?:/[\w\-]+)+/?)(\S*\n)', bygroups(String, String)), ], 'punctuation': [ - (r'[,\.]', Punctuation), + (r'[,.]', Punctuation), ], 'operator': [ - (r'[<>,:=\.\*%+\|]', Operator), + (r'[<>,:=.*%+|]', Operator), ], 'structure': [ # Brackets and braces - (r'[\{\}\(\)\[\]\\\\]', String.Symbol), + (r'[{}()\[\]\\]', String.Symbol), ], 'constant': [ # Constant: {$some.constant} - (r'(\{)(\$)((?:[\w\-_]+\.)*)([\w\-_]+)(\})', + (r'(\{)(\$)((?:[\w\-]+\.)*)([\w\-]+)(\})', bygroups(String.Symbol, Operator, Name.Constant, Name.Constant, String.Symbol)), # constant # Constant: {register:somevalue} - (r'(\{)([\w\-_]+)(\s*:\s*)([\w\-_]+)(\})', + (r'(\{)([\w\-]+)(\s*:\s*)([\w\-]+)(\})', bygroups(String.Symbol, Name.Constant, Operator, Name.Constant, String.Symbol)), # constant # Hex color: #ff0077 @@ -216,7 +216,7 @@ class TypoScriptLexer(RegexLexer): (r'(\s*#\s*\n)', Comment), ], 'other': [ - (r'[\w"\-_!\/&;]+', Text), + (r'[\w"\-!/&;]+', Text), ], } diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 90d8d292..437913e9 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -45,7 +45,7 @@ class VCLLexer(RegexLexer): include('comments'), (r'(\.\w+)(\s*=\s*)([^;]*)(;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), - (r'}', Punctuation, '#pop'), + (r'\}', Punctuation, '#pop'), ], 'acl': [ include('whitespace'), @@ -53,18 +53,18 @@ class VCLLexer(RegexLexer): (r'[!/]+', Operator), (r';', Punctuation), (r'\d+', Number), - (r'}', Punctuation, '#pop'), + (r'\}', Punctuation, '#pop'), ], 'backend': [ include('whitespace'), (r'(\.probe)(\s*=\s*)(\w+)(;)', bygroups(Name.Attribute, Operator, Name.Variable.Global, Punctuation)), - (r'(\.probe)(\s*=\s*)({)', + (r'(\.probe)(\s*=\s*)(\{)', bygroups(Name.Attribute, Operator, Punctuation), 'probe'), (r'(\.\w+\b)(\s*=\s*)([^;]*)(\s*;)', bygroups(Name.Attribute, Operator, using(this), Punctuation)), - (r'{', Punctuation, '#push'), - (r'}', Punctuation, '#pop'), + (r'\{', Punctuation, '#push'), + (r'\}', Punctuation, '#pop'), ], 'statements': [ (r'(\d\.)?\d+[sdwhmy]', Literal.Date), @@ -91,9 +91,9 @@ class VCLLexer(RegexLexer): 'resp.reason', 'bereq.url', 'beresp.do_esi', 'beresp.proto', 'client.ip', 'bereq.proto', 'server.hostname', 'remote.ip', 'req.backend_hint', 'server.identity', 'req_top.url', 'beresp.grace', 'beresp.was_304', - 'server.ip', 'bereq.uncacheable', 'now'), suffix=r'(\b|$)'), + 'server.ip', 'bereq.uncacheable', 'now'), suffix=r'\b'), Name.Variable), - (r'[!%&+*-,/<.}{>=|~]+', Operator), + (r'[!%&+*\-,/<.}{>=|~]+', Operator), (r'[();]', Punctuation), (r'[,]+', Punctuation), @@ -105,15 +105,15 @@ class VCLLexer(RegexLexer): (r'storage\.\w+\.\w+\b', Name.Variable), (words(('true', 'false')), Name.Builtin), (r'\d+\b', Number), - (r'(backend)(\s+\w+)(\s*{)', + (r'(backend)(\s+\w+)(\s*\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'backend'), - (r'(probe\s)(\s*\w+\s)({)', + (r'(probe\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'probe'), - (r'(acl\s)(\s*\w+\s)({)', + (r'(acl\s)(\s*\w+\s)(\{)', bygroups(Keyword, Name.Variable.Global, Punctuation), 'acl'), (r'(vcl )(4.0)(;)$', bygroups(Keyword.Reserved, Name.Constant, Punctuation)), - (r'(sub\s+)([a-zA-Z]\w*)(\s*{)', + (r'(sub\s+)([a-zA-Z]\w*)(\s*\{)', bygroups(Keyword, Name.Function, Punctuation)), (r'([a-zA-Z_]\w*)' r'(\.)' @@ -139,12 +139,12 @@ class VCLLexer(RegexLexer): ], 'multistring': [ (r'[^"}]', String), - (r'"}', String, '#pop'), + (r'"\}', String, '#pop'), (r'["}]', String), ], 'whitespace': [ (r'L?"', String, 'string'), - (r'{"', String, 'multistring'), + (r'\{"', String, 'multistring'), (r'\n', Text), (r'\s+', Text), (r'\\\n', Text), # line continuation @@ -176,7 +176,7 @@ class VCLSnippetLexer(VCLLexer): r'storage)($|\.\*)', Name.Variable), ], 'snippetspost': [ - (r'(backend)(\b|$)', Keyword.Reserved), + (r'(backend)\b', Keyword.Reserved), ], 'root': [ include('snippetspre'), -- cgit v1.2.1 From e71840a35ffb2aa453542ecd6f770ffbaa7db439 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 14 Feb 2016 18:23:21 +0100 Subject: Revert rst lexer part of last commit. Apparently the re module of 2.7.3 has a problem while 2.7.11 hasnt. --- pygments/lexers/markup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pygments/lexers/markup.py b/pygments/lexers/markup.py index 02146597..aac8d27e 100644 --- a/pygments/lexers/markup.py +++ b/pygments/lexers/markup.py @@ -204,7 +204,7 @@ class RstLexer(RegexLexer): bygroups(Text, Operator, using(this, state='inline'))), # Sourcecode directives (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' - r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', + r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', _handle_sourcecode), # A directive (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', @@ -230,7 +230,7 @@ class RstLexer(RegexLexer): (r'^(\S.*(? Date: Sun, 14 Feb 2016 10:00:16 -0800 Subject: Takes various comments into account. --- pygments/style.py | 48 ++++++++++++++++++---------------------- tests/test_lexers_other.py | 27 +++++++++++++--------- tests/test_terminal_formatter.py | 44 +++++++++++++++++++++--------------- 3 files changed, 64 insertions(+), 55 deletions(-) diff --git a/pygments/style.py b/pygments/style.py index bc318354..8c7de528 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -15,26 +15,26 @@ from pygments.util import add_metaclass _ansimap = { - ## - '#ansiblack': '000000', - '#ansidarkred': '7f0000', - '#ansidarkgreen': '007f00', - '#ansibrown': '7f7fe0', - '#ansidarkblue': '00007f', - '#ansipurple': '7f007f', - '#ansiteal': '007f7f', - '#ansilightgray': 'e5e5e5', - ### normal - '#ansidarkgray': '555555', - '#ansired': 'ff0000', - '#ansigreen': '00ff00', - '#ansiyellow': 'ffff00', - '#ansiblue': '0000ff', - '#ansifuchsia': 'ff00ff', - '#ansiturquoise': '00ffff', - '#ansiwhite': 'ffffff', - } -ansilist = list(_ansimap.keys()) + ## + '#ansiblack': '000000', + '#ansidarkred': '7f0000', + '#ansidarkgreen': '007f00', + '#ansibrown': '7f7fe0', + '#ansidarkblue': '00007f', + '#ansipurple': '7f007f', + '#ansiteal': '007f7f', + '#ansilightgray': 'e5e5e5', + ### normal + '#ansidarkgray': '555555', + '#ansired': 'ff0000', + '#ansigreen': '00ff00', + '#ansiyellow': 'ffff00', + '#ansiblue': '0000ff', + '#ansifuchsia': 'ff00ff', + '#ansiturquoise': '00ffff', + '#ansiwhite': 'ffffff', + } +ansilist = set(_ansimap.keys()) class StyleMeta(type): @@ -55,13 +55,7 @@ class StyleMeta(type): return col[0]*2 + col[1]*2 + col[2]*2 elif text == '': return '' - didyoumean = '' - if 'ansi' in text: - import difflib - possibility = difflib.get_close_matches(text, ansilist, 1) - if possibility: - didyoumean = '. Did you mean {} ?'.format(possibility[0]) - assert False, "wrong color format %r%s" % (text, didyoumean) + assert False, "wrong color format %r" % text _styles = obj._styles = {} diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py index d3feaefc..c59ae323 100644 --- a/tests/test_lexers_other.py +++ b/tests/test_lexers_other.py @@ -17,6 +17,14 @@ from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer def _exampleFilePath(filename): return os.path.join(os.path.dirname(__file__), 'examplefiles', filename) +class MyTestCase(unittest.TestCase): + ### Assert less is 2.7+ only. + def assertLess(self, a, b, msg=None): + """Just like self.assertTrue(a < b), but with a nicer default message.""" + if not a < b: + standardMsg = '%s not less than %s' % (safe_repr(a), safe_repr(b)) + self.fail(self._formatMessage(msg, standardMsg)) + class AnalyseTextTest(unittest.TestCase): def _testCanRecognizeAndGuessExampleFiles(self, lexer): @@ -43,16 +51,15 @@ class AnalyseTextTest(unittest.TestCase): for lexerToTest in LEXERS_TO_TEST: self._testCanRecognizeAndGuessExampleFiles(lexerToTest) -if sys.version_info > (2,7,): - class EasyTrieveLexerTest(unittest.TestCase): - def testCanGuessFromText(self): - self.assertLess(0, EasytrieveLexer.analyse_text('MACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text( - '*\n *\n\n \n*\n MACRO')) +class EasyTrieveLexerTest(MyTestCase): + def testCanGuessFromText(self): + self.assertLess(0, EasytrieveLexer.analyse_text('MACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO')) + self.assertLess(0, EasytrieveLexer.analyse_text( + '*\n *\n\n \n*\n MACRO')) class RexxLexerTest(unittest.TestCase): diff --git a/tests/test_terminal_formatter.py b/tests/test_terminal_formatter.py index f3836bd1..6111fc48 100644 --- a/tests/test_terminal_formatter.py +++ b/tests/test_terminal_formatter.py @@ -81,30 +81,38 @@ async def function(a,b,c, *d, **kwarg:Bool)->Bool: ''' +class MyTest(unittest.TestCase): + + def assertIn(self, member, container, msg=None): + """Just like self.assertTrue(a in b), but with a nicer default message.""" + if member not in container: + standardMsg = '%s not found in %s' % (safe_repr(member), + safe_repr(container)) + self.fail(self._formatMessage(msg, standardMsg)) + termtest = lambda x: highlight(x, Python3Lexer(), Terminal256Formatter(style=MyStyle)) -if sys.version_info > (2,7): - class Terminal256FormatterTest(unittest.TestCase): +class Terminal256FormatterTest(MyTest): - def test_style_html(self): - style = HtmlFormatter(style=MyStyle).get_style_defs() - self.assertIn('#555555',style, "ansigray for comment not html css style") + def test_style_html(self): + style = HtmlFormatter(style=MyStyle).get_style_defs() + self.assertIn('#555555',style, "ansigray for comment not html css style") - def test_tex_works(self): - """check tex Formatter don't crash""" - highlight(code, Python3Lexer(), LatexFormatter(style=MyStyle)) + def test_tex_works(self): + """check tex Formatter don't crash""" + highlight(code, Python3Lexer(), LatexFormatter(style=MyStyle)) - def test_html_works(self): - highlight(code, Python3Lexer(), HtmlFormatter(style=MyStyle)) + def test_html_works(self): + highlight(code, Python3Lexer(), HtmlFormatter(style=MyStyle)) - def test_256esc_seq(self): - """ - test that a few escape sequences are actualy used when using #ansi<> color codes - """ - self.assertIn('32;41',termtest('0x123')) - self.assertIn('32;42',termtest('123')) - self.assertIn('30;01',termtest('#comment')) - self.assertIn('34;41',termtest('"String"')) + def test_256esc_seq(self): + """ + test that a few escape sequences are actualy used when using #ansi<> color codes + """ + self.assertIn('32;41',termtest('0x123')) + self.assertIn('32;42',termtest('123')) + self.assertIn('30;01',termtest('#comment')) + self.assertIn('34;41',termtest('"String"')) -- cgit v1.2.1 From 064edec39bc9075dad066450c9a5bab254f4a581 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sun, 14 Feb 2016 10:05:12 -0800 Subject: Finish backporting assertLess and assertIn --- tests/test_lexers_other.py | 13 ++++++++++++- tests/test_terminal_formatter.py | 11 ++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py index c59ae323..4c5132ad 100644 --- a/tests/test_lexers_other.py +++ b/tests/test_lexers_other.py @@ -9,7 +9,6 @@ import glob import os import unittest -import sys from pygments.lexers import guess_lexer from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer @@ -17,6 +16,18 @@ from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer def _exampleFilePath(filename): return os.path.join(os.path.dirname(__file__), 'examplefiles', filename) +_MAX_LENGTH = 80 + +def safe_repr(obj, short=False): + try: + result = repr(obj) + except Exception: + result = object.__repr__(obj) + if not short or len(result) < _MAX_LENGTH: + return result + return result[:_MAX_LENGTH] + ' [truncated]...' + + class MyTestCase(unittest.TestCase): ### Assert less is 2.7+ only. def assertLess(self, a, b, msg=None): diff --git a/tests/test_terminal_formatter.py b/tests/test_terminal_formatter.py index 6111fc48..84373790 100644 --- a/tests/test_terminal_formatter.py +++ b/tests/test_terminal_formatter.py @@ -10,7 +10,6 @@ from __future__ import print_function import unittest -import sys import re from pygments.util import StringIO @@ -80,6 +79,16 @@ async def function(a,b,c, *d, **kwarg:Bool)->Bool: return 123, 0xb3e3 ''' +_MAX_LENGTH = 80 + +def safe_repr(obj, short=False): + try: + result = repr(obj) + except Exception: + result = object.__repr__(obj) + if not short or len(result) < _MAX_LENGTH: + return result + return result[:_MAX_LENGTH] + ' [truncated]...' class MyTest(unittest.TestCase): -- cgit v1.2.1 From e085fca74e08b166ad7d448c96558547d93b3806 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 14 Feb 2016 19:24:23 +0100 Subject: Add authors entry, changelog for PR#531 and fixup a few style issues. --- AUTHORS | 1 + CHANGES | 5 ++- doc/docs/styles.rst | 51 +++++++++++++-------------- pygments/formatters/terminal256.py | 20 +++++------ pygments/style.py | 18 +++++----- tests/test_lexers_other.py | 49 ++++++++------------------ tests/test_terminal_formatter.py | 71 ++++++++++++-------------------------- 7 files changed, 84 insertions(+), 131 deletions(-) diff --git a/AUTHORS b/AUTHORS index 600f6b22..9f45d8ea 100644 --- a/AUTHORS +++ b/AUTHORS @@ -33,6 +33,7 @@ Other contributors, listed alphabetically, are: * Adam Blinkinsop -- Haskell, Redcode lexers * Frits van Bommel -- assembler lexers * Pierre Bourdon -- bugfixes +* Matthias Bussonnier -- ANSI style handling for terminal-256 formatter * chebee7i -- Python traceback lexer improvements * Hiram Chirino -- Scaml and Jade lexers * Ian Cooper -- VGL lexer diff --git a/CHANGES b/CHANGES index 0205c912..4eab214c 100644 --- a/CHANGES +++ b/CHANGES @@ -17,7 +17,7 @@ Version 2.2 * TypoScript (#1173) * Varnish config (PR#554) -- Added `lexers.find_lexer_class_by_name()` (#1203) +- Added `lexers.find_lexer_class_by_name()`. (#1203) - Added new token types and lexing for magic methods and variables in Python and PHP. @@ -28,6 +28,9 @@ Version 2.2 - Added a new token type for heredoc (and similar) string delimiters and lexing for them in C++, Perl, PHP, Postgresql and Ruby lexers. +- Styles can now define colors with ANSI colors for use in the 256-color + terminal formatter. (PR#531) + Version 2.1.1 ------------- diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst index 0076d062..394c8ed2 100644 --- a/doc/docs/styles.rst +++ b/doc/docs/styles.rst @@ -152,19 +152,19 @@ Terminal Styles .. versionadded:: 2.2 -Custom styles used with `Terminal256` formatter can also defines colors using -ansi-color. To do so use the `#ansigreen`, `#ansired` or any other colors -defined in ``pygments.style.ansilist``. Foreground ANSI colors will be mapped -to the corresponding `escape codes 30 to 37 -`_ thus respecting any -custom color mapping and themes provided by many terminal emulators. Light -variant are treated for foreground color with and extra bold flag. -`bg:#ansi` will also be respected, except the light variant will be the -same shade as their light variant. - -See following example where the color of the string `"hello world"` is governed -by the escape sequence `\x1b34;01m` (Ansi Blue, Bold, `41` beeing red background) -instead of an extended foreground & background color. +Custom styles used with the 256-color terminal formatter can also map colors to +use the 8 default ANSI colors. To do so, use ``#ansigreen``, ``#ansired`` or +any other colors defined in :attr:`pygments.style.ansilist`. Foreground ANSI +colors will be mapped to the corresponding `escape codes 30 to 37 +`_ thus respecting any +custom color mapping and themes provided by many terminal emulators. Light +variants are treated as foreground color with and an added bold flag. +``bg:#ansi`` will also be respected, except the light variant will be the +same shade as their dark variant. + +See the following example where the color of the string ``"hello world"`` is +governed by the escape sequence ``\x1b[34;01m`` (Ansi Blue, Bold, 41 being red +background) instead of an extended foreground & background color. .. sourcecode:: pycon @@ -182,23 +182,20 @@ instead of an extended foreground & background color. >>> code = 'print("Hello World")' >>> result = highlight(code, Python3Lexer(), Terminal256Formatter(style=MyStyle)) >>> print(result.encode()) - b'print(\x1b[34;41;01m"\x1b[39;49;00m\x1b[34;41;01mHello World\x1b[39;49;00m\x1b[34;41;01m"\x1b[39;49;00m)\n' + b'\x1b[34;41;01m"\x1b[39;49;00m\x1b[34;41;01mHello World\x1b[39;49;00m\x1b[34;41;01m"\x1b[39;49;00m' -Style that use `#ansi*` colors might not correctly work with -formatters others than ``Terminal256``. `HtmlFormatter` is capable of handling -some `#ansi*` code and will map to a fixed HTML/CSS color. For example, -`#ansiblue` will be converted to `color:#0000ff` , `#ansired` to `color:#ff0000`. +Colors specified using ``#ansi*`` are converted to a default set of RGB colors +when used with formatters other than the terminal-256 formatter. -By definition of Ansi color the following color are considered "light" colors, -and will be rendered by most terminal as bold: +By definition of ANSI, the following colors are considered "light" colors, and +will be rendered by most terminals as bold: - - "darkgray", "red", "green", "yellow", "blue", "fuchsia", "turquoise", - "white" +- "darkgray", "red", "green", "yellow", "blue", "fuchsia", "turquoise", "white" +The following are considered "dark" colors and will be rendered as non-bold: -The following are considered "dark" color and will be rendered as non-bold: - - - "black", "darkred", "darkgreen", "brown", "darkblue", "purple", "teal", - "lightgray" +- "black", "darkred", "darkgreen", "brown", "darkblue", "purple", "teal", + "lightgray" -Exact behavior might depends on the terminal emulator you are using, and its settings. +Exact behavior might depends on the terminal emulator you are using, and its +settings. diff --git a/pygments/formatters/terminal256.py b/pygments/formatters/terminal256.py index 1aa19f25..03c3a42a 100644 --- a/pygments/formatters/terminal256.py +++ b/pygments/formatters/terminal256.py @@ -55,14 +55,14 @@ class EscapeSequence: self.bold = True # extract fg color code. attrs.append(esc[2:4]) - else : + else: attrs.extend(("38", "5", "%i" % self.fg)) if self.bg is not None: if self.bg in ansilist: esc = codes[self.bg[5:]] # extract fg color code, add 10 for bg. attrs.append(str(int(esc[2:4])+10)) - else : + else: attrs.extend(("48", "5", "%i" % self.bg)) if self.bold: attrs.append("01") @@ -103,15 +103,13 @@ class Terminal256Formatter(Formatter): and converts them to nearest ANSI 256-color escape sequences. Bold and underline attributes from the style are preserved (and displayed). - .. versionadded:: 2.2 - - If the used style defined foreground colors in the form `#ansi*`, then - `Terminal256Formatter` will map these to non extended foreground color. - - See AnsiTerminalStyle_ for more informations. - .. versionadded:: 0.9 + .. versionchanged:: 2.2 + If the used style defines foreground colors in the form ``#ansi*``, then + `Terminal256Formatter` will map these to non extended foreground color. + See :ref:`AnsiTerminalStyle` for more information. + Options accepted: `style` @@ -190,8 +188,8 @@ class Terminal256Formatter(Formatter): def _color_index(self, color): index = self.best_match.get(color, None) - if color in ansilist: - # strip the `#ansi` part an look up code + if color in ansilist: + # strip the `#ansi` part and look up code index = color self.best_match[color] = index if index is None: diff --git a/pygments/style.py b/pygments/style.py index 8c7de528..797a1d34 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -12,10 +12,9 @@ from pygments.token import Token, STANDARD_TYPES from pygments.util import add_metaclass - - +# Default mapping of #ansixxx to RGB colors. _ansimap = { - ## + # dark '#ansiblack': '000000', '#ansidarkred': '7f0000', '#ansidarkgreen': '007f00', @@ -24,7 +23,7 @@ _ansimap = { '#ansipurple': '7f007f', '#ansiteal': '007f7f', '#ansilightgray': 'e5e5e5', - ### normal + # normal '#ansidarkgray': '555555', '#ansired': 'ff0000', '#ansigreen': '00ff00', @@ -33,8 +32,9 @@ _ansimap = { '#ansifuchsia': 'ff00ff', '#ansiturquoise': '00ffff', '#ansiwhite': 'ffffff', - } -ansilist = set(_ansimap.keys()) +} +ansilist = set(_ansimap) + class StyleMeta(type): @@ -104,12 +104,11 @@ class StyleMeta(type): def style_for_token(cls, token): t = cls._styles[token] - ansicolor = None + ansicolor = bgansicolor = None color = t[0] if color.startswith('#ansi'): ansicolor = color color = _ansimap[color] - bgansicolor = None bgcolor = t[4] if bgcolor.startswith('#ansi'): bgansicolor = bgcolor @@ -126,8 +125,7 @@ class StyleMeta(type): 'sans': bool(t[7]) or None, 'mono': bool(t[8]) or None, 'ansicolor': ansicolor, - 'bgansicolor': bgansicolor, - + 'bgansicolor': bgansicolor, } def list_styles(cls): diff --git a/tests/test_lexers_other.py b/tests/test_lexers_other.py index 4c5132ad..90d05ef8 100644 --- a/tests/test_lexers_other.py +++ b/tests/test_lexers_other.py @@ -13,29 +13,10 @@ import unittest from pygments.lexers import guess_lexer from pygments.lexers.scripting import EasytrieveLexer, JclLexer, RexxLexer + def _exampleFilePath(filename): return os.path.join(os.path.dirname(__file__), 'examplefiles', filename) -_MAX_LENGTH = 80 - -def safe_repr(obj, short=False): - try: - result = repr(obj) - except Exception: - result = object.__repr__(obj) - if not short or len(result) < _MAX_LENGTH: - return result - return result[:_MAX_LENGTH] + ' [truncated]...' - - -class MyTestCase(unittest.TestCase): - ### Assert less is 2.7+ only. - def assertLess(self, a, b, msg=None): - """Just like self.assertTrue(a < b), but with a nicer default message.""" - if not a < b: - standardMsg = '%s not less than %s' % (safe_repr(a), safe_repr(b)) - self.fail(self._formatMessage(msg, standardMsg)) - class AnalyseTextTest(unittest.TestCase): def _testCanRecognizeAndGuessExampleFiles(self, lexer): @@ -48,8 +29,8 @@ class AnalyseTextTest(unittest.TestCase): text = fp.read().decode('utf-8') probability = lexer.analyse_text(text) self.assertTrue(probability > 0, - '%s must recognize %r' % ( - lexer.name, exampleFilePath)) + '%s must recognize %r' % ( + lexer.name, exampleFilePath)) guessedLexer = guess_lexer(text) self.assertEqual(guessedLexer.name, lexer.name) @@ -62,27 +43,27 @@ class AnalyseTextTest(unittest.TestCase): for lexerToTest in LEXERS_TO_TEST: self._testCanRecognizeAndGuessExampleFiles(lexerToTest) -class EasyTrieveLexerTest(MyTestCase): + +class EasyTrieveLexerTest(unittest.TestCase): def testCanGuessFromText(self): - self.assertLess(0, EasytrieveLexer.analyse_text('MACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text('\nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text(' \nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text(' \n MACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text('*\nMACRO')) - self.assertLess(0, EasytrieveLexer.analyse_text( + self.assertTrue(EasytrieveLexer.analyse_text('MACRO')) + self.assertTrue(EasytrieveLexer.analyse_text('\nMACRO')) + self.assertTrue(EasytrieveLexer.analyse_text(' \nMACRO')) + self.assertTrue(EasytrieveLexer.analyse_text(' \n MACRO')) + self.assertTrue(EasytrieveLexer.analyse_text('*\nMACRO')) + self.assertTrue(EasytrieveLexer.analyse_text( '*\n *\n\n \n*\n MACRO')) class RexxLexerTest(unittest.TestCase): def testCanGuessFromText(self): - self.assertAlmostEqual(0.01, - RexxLexer.analyse_text('/* */')) + self.assertAlmostEqual(0.01, RexxLexer.analyse_text('/* */')) self.assertAlmostEqual(1.0, - RexxLexer.analyse_text('''/* Rexx */ + RexxLexer.analyse_text('''/* Rexx */ say "hello world"''')) val = RexxLexer.analyse_text('/* */\n' - 'hello:pRoceduRe\n' - ' say "hello world"') + 'hello:pRoceduRe\n' + ' say "hello world"') self.assertTrue(val > 0.5, val) val = RexxLexer.analyse_text('''/* */ if 1 > 0 then do diff --git a/tests/test_terminal_formatter.py b/tests/test_terminal_formatter.py index 84373790..cb5c6c44 100644 --- a/tests/test_terminal_formatter.py +++ b/tests/test_terminal_formatter.py @@ -14,7 +14,8 @@ import re from pygments.util import StringIO from pygments.lexers.sql import PlPgsqlLexer -from pygments.formatters import TerminalFormatter,Terminal256Formatter, HtmlFormatter, LatexFormatter +from pygments.formatters import TerminalFormatter, Terminal256Formatter, \ + HtmlFormatter, LatexFormatter from pygments.style import Style from pygments.token import Token @@ -31,9 +32,11 @@ DEMO_TOKENS = list(DEMO_LEXER().get_tokens(DEMO_TEXT)) ANSI_RE = re.compile(r'\x1b[\w\W]*?m') + def strip_ansi(x): return ANSI_RE.sub('', x) + class TerminalFormatterTest(unittest.TestCase): def test_reasonable_output(self): out = StringIO() @@ -56,22 +59,17 @@ class TerminalFormatterTest(unittest.TestCase): self.assertTrue(a in b) - - - - class MyStyle(Style): - styles = { Token.Comment: '#ansidarkgray', - Token.String: '#ansiblue bg:#ansidarkred', - Token.Number : '#ansigreen bg:#ansidarkgreen', - Token.Number.Hex: '#ansidarkgreen bg:#ansired', + Token.String: '#ansiblue bg:#ansidarkred', + Token.Number: '#ansigreen bg:#ansidarkgreen', + Token.Number.Hex: '#ansidarkgreen bg:#ansired', } - -code = ''' +class Terminal256FormatterTest(unittest.TestCase): + code = ''' # this should be a comment print("Hello World") async def function(a,b,c, *d, **kwarg:Bool)->Bool: @@ -79,49 +77,26 @@ async def function(a,b,c, *d, **kwarg:Bool)->Bool: return 123, 0xb3e3 ''' -_MAX_LENGTH = 80 - -def safe_repr(obj, short=False): - try: - result = repr(obj) - except Exception: - result = object.__repr__(obj) - if not short or len(result) < _MAX_LENGTH: - return result - return result[:_MAX_LENGTH] + ' [truncated]...' - -class MyTest(unittest.TestCase): - - def assertIn(self, member, container, msg=None): - """Just like self.assertTrue(a in b), but with a nicer default message.""" - if member not in container: - standardMsg = '%s not found in %s' % (safe_repr(member), - safe_repr(container)) - self.fail(self._formatMessage(msg, standardMsg)) - - -termtest = lambda x: highlight(x, Python3Lexer(), Terminal256Formatter(style=MyStyle)) -class Terminal256FormatterTest(MyTest): - def test_style_html(self): style = HtmlFormatter(style=MyStyle).get_style_defs() - self.assertIn('#555555',style, "ansigray for comment not html css style") + self.assertTrue('#555555' in style, + "ansigray for comment not html css style") - def test_tex_works(self): - """check tex Formatter don't crash""" - highlight(code, Python3Lexer(), LatexFormatter(style=MyStyle)) - - def test_html_works(self): - highlight(code, Python3Lexer(), HtmlFormatter(style=MyStyle)) + def test_others_work(self): + """check other formatters don't crash""" + highlight(self.code, Python3Lexer(), LatexFormatter(style=MyStyle)) + highlight(self.code, Python3Lexer(), HtmlFormatter(style=MyStyle)) def test_256esc_seq(self): """ test that a few escape sequences are actualy used when using #ansi<> color codes """ - self.assertIn('32;41',termtest('0x123')) - self.assertIn('32;42',termtest('123')) - self.assertIn('30;01',termtest('#comment')) - self.assertIn('34;41',termtest('"String"')) - - + def termtest(x): + return highlight(x, Python3Lexer(), + Terminal256Formatter(style=MyStyle)) + + self.assertTrue('32;41' in termtest('0x123')) + self.assertTrue('32;42' in termtest('123')) + self.assertTrue('30;01' in termtest('#comment')) + self.assertTrue('34;41' in termtest('"String"')) -- cgit v1.2.1 From 5cc92cf8771627278cdb7b374ba0a101fad72bd2 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 14 Feb 2016 21:46:33 +0100 Subject: Add back VCL snippet lexer analyse_text. --- pygments/lexers/varnish.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pygments/lexers/varnish.py b/pygments/lexers/varnish.py index 437913e9..e64a601b 100644 --- a/pygments/lexers/varnish.py +++ b/pygments/lexers/varnish.py @@ -169,6 +169,10 @@ class VCLSnippetLexer(VCLLexer): mimetypes = ['text/x-vclsnippet'] filenames = [] + def analyse_text(text): + # override method inherited from VCLLexer + return 0 + tokens = { 'snippetspre': [ (r'\.\.\.+', Comment), -- cgit v1.2.1 From a34c939edf7b8b3f008daa3c1b7e2c94d34db550 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sun, 14 Feb 2016 15:17:47 -0800 Subject: Rename ansilist that now is a set, slipped though PR 531 review --- doc/docs/styles.rst | 2 +- pygments/formatters/terminal256.py | 8 ++++---- pygments/style.py | 4 ++-- pygments/styles/arduino.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/docs/styles.rst b/doc/docs/styles.rst index 394c8ed2..1094a270 100644 --- a/doc/docs/styles.rst +++ b/doc/docs/styles.rst @@ -154,7 +154,7 @@ Terminal Styles Custom styles used with the 256-color terminal formatter can also map colors to use the 8 default ANSI colors. To do so, use ``#ansigreen``, ``#ansired`` or -any other colors defined in :attr:`pygments.style.ansilist`. Foreground ANSI +any other colors defined in :attr:`pygments.style.ansicolors`. Foreground ANSI colors will be mapped to the corresponding `escape codes 30 to 37 `_ thus respecting any custom color mapping and themes provided by many terminal emulators. Light diff --git a/pygments/formatters/terminal256.py b/pygments/formatters/terminal256.py index 03c3a42a..5110bc9e 100644 --- a/pygments/formatters/terminal256.py +++ b/pygments/formatters/terminal256.py @@ -28,7 +28,7 @@ import sys from pygments.formatter import Formatter from pygments.console import codes -from pygments.style import ansilist +from pygments.style import ansicolors __all__ = ['Terminal256Formatter', 'TerminalTrueColorFormatter'] @@ -49,7 +49,7 @@ class EscapeSequence: def color_string(self): attrs = [] if self.fg is not None: - if self.fg in ansilist: + if self.fg in ansicolors: esc = codes[self.fg[5:]] if ';01m' in esc: self.bold = True @@ -58,7 +58,7 @@ class EscapeSequence: else: attrs.extend(("38", "5", "%i" % self.fg)) if self.bg is not None: - if self.bg in ansilist: + if self.bg in ansicolors: esc = codes[self.bg[5:]] # extract fg color code, add 10 for bg. attrs.append(str(int(esc[2:4])+10)) @@ -188,7 +188,7 @@ class Terminal256Formatter(Formatter): def _color_index(self, color): index = self.best_match.get(color, None) - if color in ansilist: + if color in ansicolors: # strip the `#ansi` part and look up code index = color self.best_match[color] = index diff --git a/pygments/style.py b/pygments/style.py index 797a1d34..68ee3a19 100644 --- a/pygments/style.py +++ b/pygments/style.py @@ -33,7 +33,7 @@ _ansimap = { '#ansiturquoise': '00ffff', '#ansiwhite': 'ffffff', } -ansilist = set(_ansimap) +ansicolors = set(_ansimap) class StyleMeta(type): @@ -45,7 +45,7 @@ class StyleMeta(type): obj.styles[token] = '' def colorformat(text): - if text in ansilist: + if text in ansicolors: return text if text[0:1] == '#': col = text[1:] diff --git a/pygments/styles/arduino.py b/pygments/styles/arduino.py index 5b31bb84..1bf2103c 100644 --- a/pygments/styles/arduino.py +++ b/pygments/styles/arduino.py @@ -95,4 +95,4 @@ class ArduinoStyle(Style): Generic.Strong: "", # class: 'gs' Generic.Subheading: "", # class: 'gu' Generic.Traceback: "", # class: 'gt' - } \ No newline at end of file + } -- cgit v1.2.1 From 3f7d5c58d1f87959b241daa554abdbf5613d7ba5 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 15 Feb 2016 08:39:51 +0100 Subject: Add a few more introductory words to lexerdev. --- doc/docs/lexerdevelopment.rst | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/docs/lexerdevelopment.rst b/doc/docs/lexerdevelopment.rst index 2c868440..02d466a3 100644 --- a/doc/docs/lexerdevelopment.rst +++ b/doc/docs/lexerdevelopment.rst @@ -88,8 +88,12 @@ one. Adding and testing a new lexer ============================== -To make Pygments aware of your new lexer, you have to perform the following -steps: +Using a lexer that is not part of Pygments can be done via the Python API. You +can import and instantiate the lexer, and pass it to :func:`pygments.highlight`. + +To prepare your new lexer for inclusion in the Pygments distribution, so that it +will be found when passing filenames or lexer aliases from the command line, you +have to perform the following steps. First, change to the current directory containing the Pygments source code: @@ -101,11 +105,13 @@ Select a matching module under ``pygments/lexers``, or create a new module for your lexer class. Next, make sure the lexer is known from outside of the module. All modules in -the ``pygments.lexers`` specify ``__all__``. For example, ``esoteric.py`` sets:: +the ``pygments.lexers`` package specify ``__all__``. For example, +``esoteric.py`` sets:: __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] -Simply add the name of your lexer class to this list. +Add the name of your lexer class to this list (or create the list if your lexer +is the only class in the module). Finally the lexer can be made publicly known by rebuilding the lexer mapping: -- cgit v1.2.1 From 96372ec5bd65fcc95c1dbffdba4475d3e8e425f7 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 15 Feb 2016 08:41:08 +0100 Subject: Elaborate on source code dir. --- doc/docs/lexerdevelopment.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/docs/lexerdevelopment.rst b/doc/docs/lexerdevelopment.rst index 02d466a3..fd6e76b9 100644 --- a/doc/docs/lexerdevelopment.rst +++ b/doc/docs/lexerdevelopment.rst @@ -95,7 +95,9 @@ To prepare your new lexer for inclusion in the Pygments distribution, so that it will be found when passing filenames or lexer aliases from the command line, you have to perform the following steps. -First, change to the current directory containing the Pygments source code: +First, change to the current directory containing the Pygments source code. You +will need to have either an unpacked source tarball, or (preferably) a copy +cloned from BitBucket. .. code-block:: console -- cgit v1.2.1 From 529617b57c1625527765c6fee6e327f90d71a956 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Tue, 16 Feb 2016 10:00:09 +0100 Subject: Fix Clean: allow typedef on first line --- pygments/lexers/clean.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py index 7fb86844..755c989c 100644 --- a/pygments/lexers/clean.py +++ b/pygments/lexers/clean.py @@ -135,6 +135,7 @@ class CleanLexer(ExtendedRegexLexer): # Type definitions (r'(?<=\n)([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), + (r'^([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), # Literals (r'\'\\?.(? Date: Tue, 16 Feb 2016 18:08:59 +0100 Subject: Store indent in lexer instance --- pygments/lexers/clean.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py index 755c989c..f6f62cff 100644 --- a/pygments/lexers/clean.py +++ b/pygments/lexers/clean.py @@ -27,8 +27,7 @@ class CleanLexer(ExtendedRegexLexer): def __init__(self, *args, **kwargs): super(CleanLexer, self).__init__(*args, **kwargs) - global stored_indent - stored_indent = 0 + self.stored_indent = 0 def check_class_not_import(lexer, match, ctx): if match.group(0) == 'import': @@ -47,33 +46,30 @@ class CleanLexer(ExtendedRegexLexer): ctx.pos = match.end() def store_indent(lexer, match, ctx): - global stored_indent # Tabs are four spaces: # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt - stored_indent = len(match.group(0).replace('\t',' ')) + self.stored_indent = len(match.group(0).replace('\t',' ')) ctx.pos = match.end() yield match.start(), Text, match.group(0) def check_indent1(lexer, match, ctx): - global stored_indent indent = len(match.group(0)) - 1 - if indent > stored_indent: + if indent > self.stored_indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - stored_indent = 0 + self.stored_indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-1] yield match.start(), Whitespace, match.group(0)[1:] def check_indent2(lexer, match, ctx): - global stored_indent indent = len(match.group(0)) - 1 - if indent > stored_indent: + if indent > self.stored_indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - stored_indent = 0 + self.stored_indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-2] yield match.start(), Whitespace, match.group(0)[1:] @@ -81,13 +77,12 @@ class CleanLexer(ExtendedRegexLexer): ctx.pos = ctx.pos + 1 def check_indent3(lexer, match, ctx): - global stored_indent indent = len(match.group(0)) - 1 - if indent > stored_indent: + if indent > self.stored_indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - stored_indent = 0 + self.stored_indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-3] yield match.start(), Whitespace, match.group(0)[1:] -- cgit v1.2.1 From a943ba5c2fec8f4000b97dd9e1f72a7fd7e58aac Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Tue, 16 Feb 2016 18:17:51 +0100 Subject: Fix previous commit --- pygments/lexers/clean.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py index f6f62cff..a29233c3 100644 --- a/pygments/lexers/clean.py +++ b/pygments/lexers/clean.py @@ -48,28 +48,28 @@ class CleanLexer(ExtendedRegexLexer): def store_indent(lexer, match, ctx): # Tabs are four spaces: # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt - self.stored_indent = len(match.group(0).replace('\t',' ')) + lexer.stored_indent = len(match.group(0).replace('\t',' ')) ctx.pos = match.end() yield match.start(), Text, match.group(0) def check_indent1(lexer, match, ctx): indent = len(match.group(0)) - 1 - if indent > self.stored_indent: + if indent > lexer.stored_indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - self.stored_indent = 0 + lexer.stored_indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-1] yield match.start(), Whitespace, match.group(0)[1:] def check_indent2(lexer, match, ctx): indent = len(match.group(0)) - 1 - if indent > self.stored_indent: + if indent > lexer.stored_indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - self.stored_indent = 0 + lexer.stored_indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-2] yield match.start(), Whitespace, match.group(0)[1:] @@ -78,11 +78,11 @@ class CleanLexer(ExtendedRegexLexer): def check_indent3(lexer, match, ctx): indent = len(match.group(0)) - 1 - if indent > self.stored_indent: + if indent > lexer.stored_indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - self.stored_indent = 0 + lexer.stored_indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-3] yield match.start(), Whitespace, match.group(0)[1:] -- cgit v1.2.1 From 06f7bf67a0ae45d0ee2aeefa572c4c2068ca05ee Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Tue, 16 Feb 2016 21:23:32 +0100 Subject: Fixes & cleanup Clean lexer as per PR discussion --- AUTHORS | 1 + pygments/lexers/clean.py | 40 ++++++++++++++++++++-------------------- pygments/lexers/functional.py | 1 - 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/AUTHORS b/AUTHORS index d4d85bb7..c9e0d3ff 100644 --- a/AUTHORS +++ b/AUTHORS @@ -151,6 +151,7 @@ Other contributors, listed alphabetically, are: * Alexander Smishlajev -- Visual FoxPro lexer * Steve Spigarelli -- XQuery lexer * Jerome St-Louis -- eC lexer +* Camil Staps -- Clean lexer * James Strachan -- Kotlin lexer * Tom Stuart -- Treetop lexer * Tiberius Teng -- default style overhaul diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py index a29233c3..2d3313d1 100644 --- a/pygments/lexers/clean.py +++ b/pygments/lexers/clean.py @@ -1,16 +1,16 @@ # -*- coding: utf-8 -*- """ - pygments.lexers.make + pygments.lexers.clean ~~~~~~~~~~~~~~~~~~~~ - Lexers for Makefiles and similar. + Lexer for the Clean language. - :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. + :copyright: Copyright 2016 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -from pygments.lexer import ExtendedRegexLexer, bygroups, words, include -from pygments.token import * +from pygments.lexer import ExtendedRegexLexer, bygroups, words, include, default +from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, Punctuation, String, Text, Whitespace __all__ = ['CleanLexer'] @@ -120,7 +120,7 @@ class CleanLexer(ExtendedRegexLexer): (words(('class','instance','where','with','let','let!','with','in', 'case','of','infix','infixr','infixl','generic','derive', 'otherwise', 'code', 'inline'), - prefix=r'(?s)^', suffix=r'(?=\s)'), Keyword), + prefix=r'^', suffix=r'(?=\s)'), Keyword), # Function definitions (r'(?=\{\|)', Whitespace, 'genericfunction'), @@ -136,10 +136,10 @@ class CleanLexer(ExtendedRegexLexer): (r'\'\\?.(?~*\^\|\+&%]+', Name.Function), (r'(?=\{\|)', Punctuation, 'genericfunction'), - (r'', Text, '#pop') + default('#pop') ] } diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index 5d4cdf0c..180d3fd4 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -17,6 +17,5 @@ from pygments.lexers.theorem import CoqLexer from pygments.lexers.erlang import ErlangLexer, ErlangShellLexer, \ ElixirConsoleLexer, ElixirLexer from pygments.lexers.ml import SMLLexer, OcamlLexer, OpaLexer -from pygments.lexers.clean import CleanLexer __all__ = [] -- cgit v1.2.1 From 9b95964dd7509ac55456f0c1913c2f716723f6b8 Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Tue, 16 Feb 2016 21:37:33 +0100 Subject: Adapted CleanLexer according to PR discussion Most importantly, CleanLexer now stores indent in a LexerContext instead of in the lexer instance. --- pygments/lexers/clean.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py index 2d3313d1..b33bf884 100644 --- a/pygments/lexers/clean.py +++ b/pygments/lexers/clean.py @@ -9,8 +9,10 @@ :license: BSD, see LICENSE for details. """ -from pygments.lexer import ExtendedRegexLexer, bygroups, words, include, default -from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, Punctuation, String, Text, Whitespace +from pygments.lexer import ExtendedRegexLexer, LexerContext,\ + bygroups, words, include, default +from pygments.token import Comment, Keyword, Literal, Name, Number, Operator,\ + Punctuation, String, Text, Whitespace __all__ = ['CleanLexer'] @@ -27,7 +29,11 @@ class CleanLexer(ExtendedRegexLexer): def __init__(self, *args, **kwargs): super(CleanLexer, self).__init__(*args, **kwargs) - self.stored_indent = 0 + + def get_tokens_unprocessed(self, text=None, context=None): + ctx = LexerContext(text, 0) + ctx.indent = 0 + return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context=ctx) def check_class_not_import(lexer, match, ctx): if match.group(0) == 'import': @@ -48,28 +54,28 @@ class CleanLexer(ExtendedRegexLexer): def store_indent(lexer, match, ctx): # Tabs are four spaces: # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt - lexer.stored_indent = len(match.group(0).replace('\t',' ')) + ctx.indent = len(match.group(0).replace('\t',' ')) ctx.pos = match.end() yield match.start(), Text, match.group(0) def check_indent1(lexer, match, ctx): indent = len(match.group(0)) - 1 - if indent > lexer.stored_indent: + if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - lexer.stored_indent = 0 + ctx.indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-1] yield match.start(), Whitespace, match.group(0)[1:] def check_indent2(lexer, match, ctx): indent = len(match.group(0)) - 1 - if indent > lexer.stored_indent: + if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - lexer.stored_indent = 0 + ctx.indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-2] yield match.start(), Whitespace, match.group(0)[1:] @@ -78,11 +84,11 @@ class CleanLexer(ExtendedRegexLexer): def check_indent3(lexer, match, ctx): indent = len(match.group(0)) - 1 - if indent > lexer.stored_indent: + if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + indent + 1 else: - lexer.stored_indent = 0 + ctx.indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-3] yield match.start(), Whitespace, match.group(0)[1:] @@ -136,10 +142,10 @@ class CleanLexer(ExtendedRegexLexer): (r'\'\\?.(? Date: Tue, 16 Feb 2016 21:59:38 +0100 Subject: Clean lexer: style nits --- CHANGES | 1 + pygments/lexers/_mapping.py | 2 +- pygments/lexers/clean.py | 121 +++++++++++++++++++++----------------------- 3 files changed, 61 insertions(+), 63 deletions(-) diff --git a/CHANGES b/CHANGES index 4eab214c..81e22722 100644 --- a/CHANGES +++ b/CHANGES @@ -16,6 +16,7 @@ Version 2.2 * AMPL * TypoScript (#1173) * Varnish config (PR#554) + * Clean (PR#503) - Added `lexers.find_lexer_class_by_name()`. (#1203) diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py index c5728cf2..5337dc55 100644 --- a/pygments/lexers/_mapping.py +++ b/pygments/lexers/_mapping.py @@ -79,7 +79,7 @@ LEXERS = { 'CheetahXmlLexer': ('pygments.lexers.templates', 'XML+Cheetah', ('xml+cheetah', 'xml+spitfire'), (), ('application/xml+cheetah', 'application/xml+spitfire')), 'CirruLexer': ('pygments.lexers.webmisc', 'Cirru', ('cirru',), ('*.cirru',), ('text/x-cirru',)), 'ClayLexer': ('pygments.lexers.c_like', 'Clay', ('clay',), ('*.clay',), ('text/x-clay',)), - 'CleanLexer': ('pygments.lexers.clean', 'CleanLexer', ('Clean', 'clean'), ('*.icl', '*.dcl'), ()), + 'CleanLexer': ('pygments.lexers.clean', 'Clean', ('clean',), ('*.icl', '*.dcl'), ()), 'ClojureLexer': ('pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')), 'ClojureScriptLexer': ('pygments.lexers.jvm', 'ClojureScript', ('clojurescript', 'cljs'), ('*.cljs',), ('text/x-clojurescript', 'application/x-clojurescript')), 'CobolFreeformatLexer': ('pygments.lexers.business', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()), diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py index b33bf884..acff807e 100644 --- a/pygments/lexers/clean.py +++ b/pygments/lexers/clean.py @@ -1,35 +1,33 @@ # -*- coding: utf-8 -*- """ pygments.lexers.clean - ~~~~~~~~~~~~~~~~~~~~ + ~~~~~~~~~~~~~~~~~~~~~ Lexer for the Clean language. - :copyright: Copyright 2016 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -from pygments.lexer import ExtendedRegexLexer, LexerContext,\ - bygroups, words, include, default -from pygments.token import Comment, Keyword, Literal, Name, Number, Operator,\ - Punctuation, String, Text, Whitespace +from pygments.lexer import ExtendedRegexLexer, LexerContext, \ + bygroups, words, include, default +from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, \ + Punctuation, String, Text, Whitespace __all__ = ['CleanLexer'] + class CleanLexer(ExtendedRegexLexer): """ Lexer for the general purpose, state-of-the-art, pure and lazy functional programming language Clean (http://clean.cs.ru.nl/Clean). - .. versionadded: 2.1 + .. versionadded: 2.2 """ - name = 'CleanLexer' - aliases = ['Clean', 'clean'] + name = 'Clean' + aliases = ['clean'] filenames = ['*.icl', '*.dcl'] - def __init__(self, *args, **kwargs): - super(CleanLexer, self).__init__(*args, **kwargs) - def get_tokens_unprocessed(self, text=None, context=None): ctx = LexerContext(text, 0) ctx.indent = 0 @@ -52,9 +50,9 @@ class CleanLexer(ExtendedRegexLexer): ctx.pos = match.end() def store_indent(lexer, match, ctx): - # Tabs are four spaces: + # Tabs are four spaces: # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt - ctx.indent = len(match.group(0).replace('\t',' ')) + ctx.indent = len(match.group(0).replace('\t', ' ')) ctx.pos = match.end() yield match.start(), Text, match.group(0) @@ -100,10 +98,14 @@ class CleanLexer(ExtendedRegexLexer): ctx.pos = match.end() yield match.start(), Comment, match.group(0) + keywords = ('class', 'instance', 'where', 'with', 'let', 'let!', 'with', + 'in', 'case', 'of', 'infix', 'infixr', 'infixl', 'generic', + 'derive', 'otherwise', 'code', 'inline') + tokens = { 'common': [ (r';', Punctuation, '#pop'), - (r'//', Comment, 'singlecomment') + (r'//', Comment, 'singlecomment'), ], 'root': [ # Comments @@ -112,27 +114,22 @@ class CleanLexer(ExtendedRegexLexer): (r'(?s)/\*.*?\*/', Comment.Multi), # Modules, imports, etc. - (r'\b((?:implementation|definition|system)\s+)?(module)(\s+)([\w`]+)', + (r'\b((?:implementation|definition|system)\s+)?(module)(\s+)([\w`]+)', bygroups(Keyword.Namespace, Keyword.Namespace, Text, Name.Class)), (r'(?<=\n)import(?=\s)', Keyword.Namespace, 'import'), (r'(?<=\n)from(?=\s)', Keyword.Namespace, 'fromimport'), # Keywords # We cannot use (?s)^|(?<=\s) as prefix, so need to repeat this - (words(('class','instance','where','with','let','let!','with','in', - 'case','of','infix','infixr','infixl','generic','derive', - 'otherwise', 'code', 'inline'), - prefix=r'(?<=\s)', suffix=r'(?=\s)'), Keyword), - (words(('class','instance','where','with','let','let!','with','in', - 'case','of','infix','infixr','infixl','generic','derive', - 'otherwise', 'code', 'inline'), - prefix=r'^', suffix=r'(?=\s)'), Keyword), + (words(keywords, prefix=r'(?<=\s)', suffix=r'(?=\s)'), Keyword), + (words(keywords, prefix=r'^', suffix=r'(?=\s)'), Keyword), # Function definitions (r'(?=\{\|)', Whitespace, 'genericfunction'), - (r'(?<=\n)(\s*)([\w`\$\(\)=\-<>~*\^\|\+&%]+)(\s+[\w])*(\s*)(::)', - bygroups(store_indent, Name.Function, Keyword.Type, Whitespace, Punctuation), - 'functiondefargs'), + (r'(?<=\n)(\s*)([\w`$()=\-<>~*\^|+&%]+)((?:\s+[\w])*)(\s*)(::)', + bygroups(store_indent, Name.Function, Keyword.Type, Whitespace, + Punctuation), + 'functiondefargs'), # Type definitions (r'(?<=\n)([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), @@ -142,19 +139,20 @@ class CleanLexer(ExtendedRegexLexer): (r'\'\\?.(?\|&~*\^/]', Operator), + (r'[{}()\[\],:;.#]', Punctuation), + (r'[+\-=!<>|&~*\^/]', Operator), (r'\\\\', Operator), # Lambda expressions @@ -163,64 +161,64 @@ class CleanLexer(ExtendedRegexLexer): # Whitespace (r'\s', Whitespace), - include('common') + include('common'), ], 'fromimport': [ include('common'), (r'([\w`]+)', check_class_not_import), (r'\n', Whitespace, '#pop'), - (r'\s', Whitespace) + (r'\s', Whitespace), ], 'fromimportfunc': [ include('common'), - (r'([\w`\$\(\)=\-<>~*\^\|\+&%]+)', check_instance_class), + (r'([\w`$()=\-<>~*\^|+&%]+)', check_instance_class), (r',', Punctuation), (r'\n', Whitespace, '#pop'), - (r'\s', Whitespace) + (r'\s', Whitespace), ], 'fromimportfunctype': [ include('common'), (r'[{(\[]', Punctuation, 'combtype'), (r',', Punctuation, '#pop'), - (r'[:;\.#]', Punctuation), + (r'[:;.#]', Punctuation), (r'\n', Whitespace, '#pop:2'), (r'[^\S\n]+', Whitespace), - (r'\S+', Keyword.Type) + (r'\S+', Keyword.Type), ], 'combtype': [ include('common'), (r'[})\]]', Punctuation, '#pop'), (r'[{(\[]', Punctuation, '#pop'), - (r'[,:;\.#]', Punctuation), + (r'[,:;.#]', Punctuation), (r'\s+', Whitespace), - (r'\S+', Keyword.Type) + (r'\S+', Keyword.Type), ], 'import': [ include('common'), - (words(('from', 'import', 'as', 'qualified'), - prefix='(?<=\s)', suffix='(?=\s)'), Keyword.Namespace), + (words(('from', 'import', 'as', 'qualified'), + prefix='(?<=\s)', suffix='(?=\s)'), Keyword.Namespace), (r'[\w`]+', Name.Class), (r'\n', Whitespace, '#pop'), (r',', Punctuation), - (r'[^\S\n]+', Whitespace) + (r'[^\S\n]+', Whitespace), ], 'singlecomment': [ (r'(.)(?=\n)', skip), - (r'.+(?!\n)', Comment) + (r'.+(?!\n)', Comment), ], 'doubleqstring': [ (r'[^\\"]+', String.Double), (r'"', String.Double, '#pop'), - (r'\\.', String.Double) + (r'\\.', String.Double), ], 'typedef': [ include('common'), (r'[\w`]+', Keyword.Type), - (r'[:=\|\(\),\[\]\{\}\!\*]', Punctuation), + (r'[:=|(),\[\]{}!*]', Punctuation), (r'->', Punctuation), - (r'\n(?=[^\s\|])', Whitespace, '#pop'), + (r'\n(?=[^\s|])', Whitespace, '#pop'), (r'\s', Whitespace), - (r'.', Keyword.Type) + (r'.', Keyword.Type), ], 'genericfunction': [ include('common'), @@ -231,46 +229,45 @@ class CleanLexer(ExtendedRegexLexer): (r'(\s+of\s+)(\{)', bygroups(Keyword, Punctuation), 'genericftypes'), (r'\s', Whitespace), (r'[\w`]+', Keyword.Type), - (r'[\*\(\)]', Punctuation) + (r'[*()]', Punctuation), ], 'genericftypes': [ include('common'), (r'[\w`]+', Keyword.Type), (r',', Punctuation), (r'\s', Whitespace), - (r'\}', Punctuation, '#pop') + (r'\}', Punctuation, '#pop'), ], 'functiondefargs': [ include('common'), (r'\n(\s*)', check_indent1), - (r'[!{}()\[\],:;\.#]', Punctuation), + (r'[!{}()\[\],:;.#]', Punctuation), (r'->', Punctuation, 'functiondefres'), (r'^(?=\S)', Whitespace, '#pop'), (r'\S', Keyword.Type), - (r'\s', Whitespace) + (r'\s', Whitespace), ], 'functiondefres': [ include('common'), (r'\n(\s*)', check_indent2), (r'^(?=\S)', Whitespace, '#pop:2'), - (r'[!{}()\[\],:;\.#]', Punctuation), + (r'[!{}()\[\],:;.#]', Punctuation), (r'\|', Punctuation, 'functiondefclasses'), (r'\S', Keyword.Type), - (r'\s', Whitespace) + (r'\s', Whitespace), ], 'functiondefclasses': [ include('common'), (r'\n(\s*)', check_indent3), (r'^(?=\S)', Whitespace, '#pop:3'), (r'[,&]', Punctuation), - (r'[\w`\$\(\)=\-<>~*\^\|\+&%]', Name.Function, 'functionname'), - (r'\s', Whitespace) + (r'[\w`$()=\-<>~*\^|+&%]', Name.Function, 'functionname'), + (r'\s', Whitespace), ], 'functionname': [ include('common'), - (r'[\w`\$\(\)=\-<>~*\^\|\+&%]+', Name.Function), + (r'[\w`$()=\-<>~*\^|+&%]+', Name.Function), (r'(?=\{\|)', Punctuation, 'genericfunction'), - default('#pop') + default('#pop'), ] } - -- cgit v1.2.1 From 32c5e9febcde1847cf6c4a289766c1bb57f234da Mon Sep 17 00:00:00 2001 From: Camil Staps Date: Tue, 16 Feb 2016 23:23:36 +0100 Subject: Fixes Clean indentation issues --- pygments/lexers/clean.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/pygments/lexers/clean.py b/pygments/lexers/clean.py index acff807e..a3e81534 100644 --- a/pygments/lexers/clean.py +++ b/pygments/lexers/clean.py @@ -49,18 +49,23 @@ class CleanLexer(ExtendedRegexLexer): ctx.stack = ctx.stack + ['fromimportfunctype'] ctx.pos = match.end() - def store_indent(lexer, match, ctx): + @staticmethod + def indent_len(text): # Tabs are four spaces: # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt - ctx.indent = len(match.group(0).replace('\t', ' ')) + text = text.replace('\n', '') + return len(text.replace('\t', ' ')), len(text) + + def store_indent(lexer, match, ctx): + ctx.indent, _ = CleanLexer.indent_len(match.group(0)) ctx.pos = match.end() yield match.start(), Text, match.group(0) def check_indent1(lexer, match, ctx): - indent = len(match.group(0)) - 1 + indent, reallen = CleanLexer.indent_len(match.group(0)) if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) - ctx.pos = match.start() + indent + 1 + ctx.pos = match.start() + reallen + 1 else: ctx.indent = 0 ctx.pos = match.start() @@ -68,23 +73,20 @@ class CleanLexer(ExtendedRegexLexer): yield match.start(), Whitespace, match.group(0)[1:] def check_indent2(lexer, match, ctx): - indent = len(match.group(0)) - 1 + indent, reallen = CleanLexer.indent_len(match.group(0)) if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) - ctx.pos = match.start() + indent + 1 + ctx.pos = match.start() + reallen + 1 else: ctx.indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-2] - yield match.start(), Whitespace, match.group(0)[1:] - if match.group(0) == '\n\n': - ctx.pos = ctx.pos + 1 def check_indent3(lexer, match, ctx): - indent = len(match.group(0)) - 1 + indent, reallen = CleanLexer.indent_len(match.group(0)) if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) - ctx.pos = match.start() + indent + 1 + ctx.pos = match.start() + reallen + 1 else: ctx.indent = 0 ctx.pos = match.start() @@ -126,7 +128,7 @@ class CleanLexer(ExtendedRegexLexer): # Function definitions (r'(?=\{\|)', Whitespace, 'genericfunction'), - (r'(?<=\n)(\s*)([\w`$()=\-<>~*\^|+&%]+)((?:\s+[\w])*)(\s*)(::)', + (r'(?<=\n)([ \t]*)([\w`$()=\-<>~*\^|+&%]+)((?:\s+[\w])*)(\s*)(::)', bygroups(store_indent, Name.Function, Keyword.Type, Whitespace, Punctuation), 'functiondefargs'), -- cgit v1.2.1 From b52b5fd49d18cd8da1fd5acc0712db848256b1bd Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Wed, 17 Feb 2016 09:23:56 +0100 Subject: Add wdiff to authors/changes. --- AUTHORS | 3 +- CHANGES | 1 + pygments/lexers/diff.py | 6 +- tests/examplefiles/wdiff_example2.wdiff | 758 -------------------------------- 4 files changed, 6 insertions(+), 762 deletions(-) delete mode 100644 tests/examplefiles/wdiff_example2.wdiff diff --git a/AUTHORS b/AUTHORS index c5ed18b7..795876c2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -89,7 +89,8 @@ Other contributors, listed alphabetically, are: * Tim Howard -- BlitzMax lexer * Dustin Howett -- Logos lexer * Ivan Inozemtsev -- Fantom lexer -* Hiroaki Itoh -- Shell console rewrite, Lexers for PowerShell session, MSDOS session, BC +* Hiroaki Itoh -- Shell console rewrite, Lexers for PowerShell session, + MSDOS session, BC, WDiff * Brian R. Jackson -- Tea lexer * Christian Jann -- ShellSession lexer * Dennis Kaarsemaker -- sources.list lexer diff --git a/CHANGES b/CHANGES index 81e22722..edf4e0b6 100644 --- a/CHANGES +++ b/CHANGES @@ -17,6 +17,7 @@ Version 2.2 * TypoScript (#1173) * Varnish config (PR#554) * Clean (PR#503) + * WDiff (PR#513) - Added `lexers.find_lexer_class_by_name()`. (#1203) diff --git a/pygments/lexers/diff.py b/pygments/lexers/diff.py index 2056fbff..726b49ad 100644 --- a/pygments/lexers/diff.py +++ b/pygments/lexers/diff.py @@ -118,12 +118,12 @@ class WDiffLexer(RegexLexer): * if target files of wdiff contain "[-", "-]", "{+", "+}", especially they are unbalanced, this lexer will get confusing. - .. versionadded:: 2.1 + .. versionadded:: 2.2 """ name = 'WDiff' - aliases = ['wdiff',] - filenames = ['*.wdiff',] + aliases = ['wdiff'] + filenames = ['*.wdiff'] mimetypes = [] flags = re.MULTILINE | re.DOTALL diff --git a/tests/examplefiles/wdiff_example2.wdiff b/tests/examplefiles/wdiff_example2.wdiff deleted file mode 100644 index 1a746fe5..00000000 --- a/tests/examplefiles/wdiff_example2.wdiff +++ /dev/null @@ -1,758 +0,0 @@ -.. -*- mode: rst -*- - -[-..-] - -{+{+..+} highlight:: [-python-] {+python+}+} - -==================== -Write your own lexer -==================== - -If a lexer for your favorite language is missing in the Pygments package, you -can easily write your own and extend Pygments. - -All you need can be found inside the :mod:`pygments.lexer` module. As you can -read in the :doc:`API documentation `, a lexer is a class that is -initialized with some keyword arguments (the lexer options) and that provides a -:meth:`.get_tokens_unprocessed()` method which is given a string or unicode -object with the data to [-lex.-] {+[-parse.-] {+lex.+}+} - -The :meth:`.get_tokens_unprocessed()` method must return an iterator or iterable -containing tuples in the form ``(index, token, value)``. Normally you don't -need to do this since there are {+[-numerous-]+} base lexers [-that-] {+{+that+} do most of the work and [-that-] {+that+}+} -you can subclass. - - -RegexLexer -========== - -[-The-] - -{+[-A very powerful (but quite easy to use)-] - -{+The+}+} lexer [-base-] {+{+base+} class used by almost all of Pygments' [-lexers-] {+lexers+}+} is the -:class:`RegexLexer`. This -{+[-lexer base-]+} class allows you to define lexing rules in terms of -*regular expressions* for different *states*. - -States are groups of regular expressions that are matched against the input -string at the *current position*. If one of these expressions matches, a -corresponding action is performed [-(such as-] {+[-(normally-] {+(such as+}+} yielding a token with a specific -[-type,-] -{+[-type),-] -{+type,+} or changing [-state),-] {+state),+}+} the current position is set to where the last match -ended and the matching process continues with the first regex of the current -state. - -Lexer states are kept [-on-] {+[-in-] {+on+}+} a {+[-state-]+} stack: each time a new state is entered, the new -state is pushed onto the stack. The most basic lexers (like the `DiffLexer`) -just need one state. - -Each state is defined as a list of tuples in the form (`regex`, `action`, -`new_state`) where the last item is optional. In the most basic form, `action` -is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a -token with the match text and type `tokentype` and push `new_state` on the state -stack. If the new state is ``'#pop'``, the topmost state is popped from the -stack instead. [-To-] {+[-(To-] {+To+}+} pop more than one state, use ``'#pop:2'`` and so [-on.-] {+[-on.)-] {+on.+}+} -``'#push'`` is a synonym for pushing the current state on the stack. - -The following example shows the `DiffLexer` from the builtin lexers. Note that -it contains some additional attributes `name`, `aliases` and `filenames` which -aren't required for a lexer. They are used by the builtin lexer lookup -functions. [-::-] - -{+[-.. sourcecode:: python-] {+::+}+} - - from pygments.lexer import RegexLexer - from pygments.token import * - - class DiffLexer(RegexLexer): - name = 'Diff' - aliases = ['diff'] - filenames = ['*.diff'] - - tokens = { - 'root': [ - (r' .*\n', Text), - (r'\+.*\n', Generic.Inserted), - (r'-.*\n', Generic.Deleted), - (r'@.*\n', Generic.Subheading), - (r'Index.*\n', Generic.Heading), - (r'=.*\n', Generic.Heading), - (r'.*\n', Text), - ] - } - -As you can see this lexer only uses one state. When the lexer starts scanning -the text, it first checks if the current character is a space. If this is true -it scans everything until newline and returns the {+[-parsed-]+} data as [-a-] {+{+a+}+} `Text` [-token-] {+[-token.-] {+token+} (which -is the "no special highlighting" [-token).-] {+token).+}+} - -If this rule doesn't match, it checks if the current char is a plus sign. And -so on. - -If no rule matches at the current position, the current char is emitted as an -`Error` token that indicates a [-lexing-] {+[-parsing-] {+lexing+}+} error, and the position is increased by -[-one.-] -{+[-1.-] -{+one.+}+} - - -Adding and testing a new lexer -============================== - -To make [-Pygments-] {+[-pygments-] {+Pygments+}+} aware of your new lexer, you have to perform the following -steps: - -First, change to the current directory containing the [-Pygments-] {+[-pygments-] {+Pygments+}+} source code: - -.. [-code-block::-] {+[-sourcecode::-] {+code-block::+}+} console - - $ cd .../pygments-main - -[-Select-] - -{+{+Select+} a matching module under ``pygments/lexers``, or create a new module for -your lexer [-class.-] {+class.+}+} - -Next, make sure the lexer is known from outside of the module. All modules in -the ``pygments.lexers`` specify ``__all__``. For example, [-``esoteric.py`` sets::-] {+[-``other.py`` sets: - -.. sourcecode:: python-] {+``esoteric.py`` sets::+}+} - - __all__ = ['BrainfuckLexer', 'BefungeLexer', ...] - -Simply add the name of your lexer class to this list. - -Finally the lexer can be made [-publicly-] {+[-publically-] {+publicly+}+} known by rebuilding the lexer mapping: - -.. [-code-block::-] {+[-sourcecode::-] {+code-block::+}+} console - - $ make mapfiles - -To test the new lexer, store an example file with the proper extension in -``tests/examplefiles``. For example, to test your ``DiffLexer``, add a -``tests/examplefiles/example.diff`` containing a sample diff output. - -Now you can use pygmentize to render your example to HTML: - -.. [-code-block::-] {+[-sourcecode::-] {+code-block::+}+} console - - $ ./pygmentize -O full -f html -o /tmp/example.html tests/examplefiles/example.diff - -Note that this [-explicitly-] {+[-explicitely-] {+explicitly+}+} calls the ``pygmentize`` in the current directory -by preceding it with ``./``. This ensures your modifications are used. -Otherwise a possibly already installed, unmodified version without your new -lexer would have been called from the system search path (``$PATH``). - -To view the result, open ``/tmp/example.html`` in your browser. - -Once the example renders as expected, you should run the complete test suite: - -.. [-code-block::-] {+[-sourcecode::-] {+code-block::+}+} console - - $ make test - -[-It-] - -{+{+It+} also tests that your lexer fulfills the lexer API and certain invariants, -such as that the concatenation of all token text is the same as the input [-text.-] {+text.+}+} - - -Regex Flags -=========== - -You can either define regex flags [-locally-] {+{+locally+}+} in the regex (``r'(?x)foo bar'``) or -[-globally-] -{+{+globally+}+} by adding a `flags` attribute to your lexer class. If no attribute is -defined, it defaults to `re.MULTILINE`. For more [-information-] {+[-informations-] {+information+}+} about regular -expression flags see the [-page about-] {+{+page about+}+} `regular expressions`_ {+[-help page-]+} in the [-Python-] {+[-python-] {+Python+}+} -documentation. - -.. _regular expressions: [-http://docs.python.org/library/re.html#regular-expression-syntax-] {+[-http://docs.python.org/lib/re-syntax.html-] {+http://docs.python.org/library/re.html#regular-expression-syntax+}+} - - -Scanning multiple tokens at once -================================ - -[-So-] - -{+{+So+} far, the `action` element in the rule tuple of regex, action and state has -been a single token type. Now we look at the first of several other possible -[-values.-] -{+values.+}+} - -Here is a more complex lexer that highlights INI files. INI files consist of -sections, comments and [-``key-] {+[-key-] {+``key+}+} = [-value`` pairs::-] {+[-value pairs: - -.. sourcecode:: python-] {+value`` pairs::+}+} - - from pygments.lexer import RegexLexer, bygroups - from pygments.token import * - - class IniLexer(RegexLexer): - name = 'INI' - aliases = ['ini', 'cfg'] - filenames = ['*.ini', '*.cfg'] - - tokens = { - 'root': [ - (r'\s+', Text), - (r';.*?$', Comment), - (r'\[.*?\]$', Keyword), - (r'(.*?)(\s*)(=)(\s*)(.*?)$', - bygroups(Name.Attribute, Text, Operator, Text, String)) - ] - } - -The lexer first looks for whitespace, comments and section names. [-Later-] {+[-And later-] {+Later+}+} it -looks for a line that looks like a key, value pair, separated by an ``'='`` -sign, and optional whitespace. - -The `bygroups` helper [-yields-] {+[-makes sure that-] {+yields+}+} each [-capturing-] {+{+capturing+}+} group [-in-] {+[-is yielded-] {+in+} the [-regex-] {+regex+}+} with a different -token type. First the `Name.Attribute` token, then a `Text` token for the -optional whitespace, after that a `Operator` token for the equals sign. Then a -`Text` token for the whitespace again. The rest of the line is returned as -`String`. - -Note that for this to work, every part of the match must be inside a capturing -group (a ``(...)``), and there must not be any nested capturing groups. If you -nevertheless need a group, use a non-capturing group defined using this syntax: -[-``(?:some|words|here)``-] -{+[-``r'(?:some|words|here)'``-] -{+``(?:some|words|here)``+}+} (note the ``?:`` after the beginning parenthesis). - -If you find yourself needing a capturing group inside the regex which shouldn't -be part of the output but is used in the regular expressions for backreferencing -(eg: ``r'(<(foo|bar)>)(.*?)()'``), you can pass `None` to the bygroups -function and {+[-it will skip-]+} that group will be skipped in the output. - - -Changing states -=============== - -Many lexers need multiple states to work as expected. For example, some -languages allow multiline comments to be nested. Since this is a recursive -pattern it's impossible to lex just using regular expressions. - -Here is [-a-] {+[-the solution: - -.. sourcecode:: python-] {+a+} lexer that recognizes C++ style comments (multi-line with ``/* */`` -and single-line with ``//`` until end of [-line)::-] {+line)::+}+} - - from pygments.lexer import RegexLexer - from pygments.token import * - - class [-CppCommentLexer(RegexLexer):-] {+[-ExampleLexer(RegexLexer):-] {+CppCommentLexer(RegexLexer):+}+} - name = 'Example Lexer with states' - - tokens = { - 'root': [ - (r'[^/]+', Text), - (r'/\*', Comment.Multiline, 'comment'), - (r'//.*?$', Comment.Singleline), - (r'/', Text) - ], - 'comment': [ - (r'[^*/]', Comment.Multiline), - (r'/\*', Comment.Multiline, '#push'), - (r'\*/', Comment.Multiline, '#pop'), - (r'[*/]', Comment.Multiline) - ] - } - -This lexer starts lexing in the ``'root'`` state. It tries to match as much as -possible until it finds a slash (``'/'``). If the next character after the slash -is [-an asterisk-] {+[-a star-] {+an asterisk+}+} (``'*'``) the `RegexLexer` sends those two characters to the -output stream marked as `Comment.Multiline` and continues [-lexing-] {+[-parsing-] {+lexing+}+} with the rules -defined in the ``'comment'`` state. - -If there wasn't [-an asterisk-] {+[-a star-] {+an asterisk+}+} after the slash, the `RegexLexer` checks if it's a -[-Singleline-] -{+[-singleline-] -{+Singleline+}+} comment [-(i.e.-] {+[-(eg:-] {+(i.e.+}+} followed by a second slash). If this also wasn't the -case it must be a single [-slash,-] {+[-slash-] {+slash,+} which is not a comment [-starter-] {+starter+}+} (the separate -regex for a single slash must also be given, else the slash would be marked as -an error token). - -Inside the ``'comment'`` state, we do the same thing again. Scan until the -lexer finds a star or slash. If it's the opening of a multiline comment, push -the ``'comment'`` state on the stack and continue scanning, again in the -``'comment'`` state. Else, check if it's the end of the multiline comment. If -yes, pop one state from the stack. - -Note: If you pop from an empty stack you'll get an `IndexError`. (There is an -easy way to prevent this from happening: don't ``'#pop'`` in the root state). - -If the `RegexLexer` encounters a newline that is flagged as an error token, the -stack is emptied and the lexer continues scanning in the ``'root'`` state. This -[-can help-] -{+[-helps-] -{+can help+}+} producing error-tolerant highlighting for erroneous input, e.g. when a -single-line string is not closed. - - -Advanced state tricks -===================== - -There are a few more things you can do with states: - -- You can push multiple states onto the stack if you give a tuple instead of a - simple string as the third item in a rule tuple. For example, if you want to - match a comment containing a directive, something [-like:-] {+[-like::-] {+like:+} - - .. code-block:: [-text-] {+text+}+} - - /* rest of comment */ - - you can use this [-rule::-] {+[-rule: - - .. sourcecode:: python-] {+rule::+}+} - - tokens = { - 'root': [ - (r'/\* <', Comment, ('comment', 'directive')), - ... - ], - 'directive': [ - (r'[^>]*', Comment.Directive), - (r'>', Comment, '#pop'), - ], - 'comment': [ - (r'[^*]+', Comment), - (r'\*/', Comment, '#pop'), - (r'\*', Comment), - ] - } - - When this encounters the above sample, first ``'comment'`` and ``'directive'`` - are pushed onto the stack, then the lexer continues in the directive state - until it finds the closing ``>``, then it continues in the comment state until - the closing ``*/``. Then, both states are popped from the stack again and - lexing continues in the root state. - - .. versionadded:: 0.9 - The tuple can contain the special ``'#push'`` and ``'#pop'`` (but not - ``'#pop:n'``) directives. - - -- You can include the rules of a state in the definition of another. This is - done by using `include` from [-`pygments.lexer`::-] {+[-`pygments.lexer`: - - .. sourcecode:: python-] {+`pygments.lexer`::+}+} - - from pygments.lexer import RegexLexer, bygroups, include - from pygments.token import * - - class ExampleLexer(RegexLexer): - tokens = { - 'comments': [ - (r'/\*.*?\*/', Comment), - (r'//.*?\n', Comment), - ], - 'root': [ - include('comments'), - (r'(function )(\w+)( {)', - bygroups(Keyword, Name, Keyword), 'function'), - (r'.', Text), - ], - 'function': [ - (r'[^}/]+', Text), - include('comments'), - (r'/', Text), - [-(r'\}',-] - {+[-(r'}',-] - {+(r'\}',+}+} Keyword, '#pop'), - ] - } - - This is a hypothetical lexer for a language that consist of functions and - comments. Because comments can occur at toplevel and in functions, we need - rules for comments in both states. As you can see, the `include` helper saves - repeating rules that occur more than once (in this example, the state - ``'comment'`` will never be entered by the lexer, as it's only there to be - included in ``'root'`` and ``'function'``). - -- Sometimes, you may want to "combine" a state from existing ones. This is - possible with the [-`combined`-] {+[-`combine`-] {+`combined`+}+} helper from `pygments.lexer`. - - If you, instead of a new state, write ``combined('state1', 'state2')`` as the - third item of a rule tuple, a new anonymous state will be formed from state1 - and state2 and if the rule matches, the lexer will enter this state. - - This is not used very often, but can be helpful in some cases, such as the - `PythonLexer`'s string literal processing. - -- If you want your lexer to start lexing in a different state you can modify the - stack by [-overriding-] {+[-overloading-] {+overriding+}+} the `get_tokens_unprocessed()` [-method::-] {+[-method: - - .. sourcecode:: python-] {+method::+}+} - - from pygments.lexer import RegexLexer - - class [-ExampleLexer(RegexLexer):-] {+[-MyLexer(RegexLexer):-] {+ExampleLexer(RegexLexer):+}+} - tokens = {...} - - def get_tokens_unprocessed(self, [-text,-] {+[-text): - stack = ['root', 'otherstate']-] {+text,+} stack=('root', [-'otherstate')):-] {+'otherstate')):+}+} - for item in RegexLexer.get_tokens_unprocessed(text, stack): - yield item - - Some lexers like the `PhpLexer` use this to make the leading ``', Name.Tag), - ], - 'script-content': [ - (r'(.+?)(<\s*/\s*script\s*>)', - bygroups(using(JavascriptLexer), Name.Tag), - '#pop'), - ] - } - -Here the content of a ```` end tag is processed by the `JavascriptLexer`, -while the end tag is yielded as a normal token with the `Name.Tag` type. - -{+[-As an additional goodie, if the lexer class is replaced by `this` (imported from -`pygments.lexer`), the "other" lexer will be the current one (because you cannot -refer to the current class within the code that runs at class definition time).-]+} - -Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule. -Here, two states are pushed onto the state stack, ``'script-content'`` and -``'tag'``. That means that first ``'tag'`` is processed, which will [-lex-] {+[-parse-] {+lex+}+} -attributes and the closing ``>``, then the ``'tag'`` state is popped and the -next state on top of the stack will be ``'script-content'``. - -[-Since-] - -{+{+Since+} you cannot refer to the class currently being defined, use `this` -(imported from `pygments.lexer`) to refer to the current lexer class, i.e. -``using(this)``. This construct may seem unnecessary, but this is often the -most obvious way of lexing arbitrary syntax between fixed delimiters without -introducing deeply nested [-states.-] {+states.+}+} - -The `using()` helper has a special keyword argument, `state`, which works as -follows: if given, the lexer to use initially is not in the ``"root"`` state, -but in the state given by this argument. This [-does-] {+[-*only* works-] {+does+} not [-work-] {+work+}+} with [-advanced-] {+[-a `RegexLexer`.-] {+advanced+} -`RegexLexer` subclasses such as `ExtendedRegexLexer` (see [-below).-] {+below).+}+} - -Any other keywords arguments passed to `using()` are added to the keyword -arguments used to create the lexer. - - -Delegating Lexer -================ - -Another approach for nested lexers is the `DelegatingLexer` which is for example -used for the template engine lexers. It takes two lexers as arguments on -initialisation: a `root_lexer` and a `language_lexer`. - -The input is processed as follows: First, the whole text is lexed with the -`language_lexer`. All tokens yielded with [-the special-] {+[-a-] {+the special+}+} type of ``Other`` are -then concatenated and given to the `root_lexer`. The language tokens of the -`language_lexer` are then inserted into the `root_lexer`'s token stream at the -appropriate positions. [-::-] - -{+[-.. sourcecode:: python-] {+::+}+} - - from pygments.lexer import DelegatingLexer - from pygments.lexers.web import HtmlLexer, PhpLexer - - class HtmlPhpLexer(DelegatingLexer): - def __init__(self, **options): - super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options) - -This procedure ensures that e.g. HTML with template tags in it is highlighted -correctly even if the template tags are put into HTML tags or attributes. - -If you want to change the needle token ``Other`` to something else, you can give -the lexer another token type as the third [-parameter::-] {+[-parameter: - -.. sourcecode:: python-] {+parameter::+}+} - - DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options) - - -Callbacks -========= - -Sometimes the grammar of a language is so complex that a lexer would be unable -to [-process-] {+[-parse-] {+process+}+} it just by using regular expressions and stacks. - -For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead -of token types (`bygroups` and `using` are nothing else but preimplemented -callbacks). The callback must be a function taking two arguments: - -* the lexer itself -* the match object for the last matched rule - -The callback must then return an iterable of (or simply yield) ``(index, -tokentype, value)`` tuples, which are then just passed through by -`get_tokens_unprocessed()`. The ``index`` here is the position of the token in -the input string, ``tokentype`` is the normal token type (like `Name.Builtin`), -and ``value`` the associated part of the input string. - -You can see an example [-here::-] {+[-here: - -.. sourcecode:: python-] {+here::+}+} - - from pygments.lexer import RegexLexer - from pygments.token import Generic - - class HypotheticLexer(RegexLexer): - - def headline_callback(lexer, match): - equal_signs = match.group(1) - text = match.group(2) - yield match.start(), Generic.Headline, equal_signs + text + equal_signs - - tokens = { - 'root': [ - (r'(=+)(.*?)(\1)', headline_callback) - ] - } - -If the regex for the `headline_callback` matches, the function is called with -the match object. Note that after the callback is done, processing continues -normally, that is, after the end of the previous match. The callback has no -possibility to influence the position. - -There are not really any simple examples for lexer callbacks, but you can see -them in action e.g. in the [-`SMLLexer` class in `ml.py`_.-] {+[-`compiled.py`_ source code-] {+`SMLLexer` class+} in [-the `CLexer` and -`JavaLexer` classes.-] {+`ml.py`_.+}+} - -.. [-_ml.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ml.py-] {+[-_compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py-] {+_ml.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ml.py+}+} - - -The ExtendedRegexLexer class -============================ - -The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for -the funky syntax rules of {+[-some-]+} languages {+[-that will go unnamed,-]+} such as Ruby. - -But fear not; even then you don't have to abandon the regular expression -[-approach:-] -{+[-approach. For-] -{+approach:+}+} Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`. -All features known from RegexLexers are available here too, and the tokens are -specified in exactly the same way, *except* for one detail: - -The `get_tokens_unprocessed()` method holds its internal state data not as local -variables, but in an instance of the `pygments.lexer.LexerContext` class, and -that instance is passed to callbacks as a third argument. This means that you -can modify the lexer state in callbacks. - -The `LexerContext` class has the following members: - -* `text` -- the input text -* `pos` -- the current starting position that is used for matching regexes -* `stack` -- a list containing the state stack -* `end` -- the maximum position to which regexes are matched, this defaults to - the length of `text` - -Additionally, the `get_tokens_unprocessed()` method can be given a -`LexerContext` instead of a string and will then process this context instead of -creating a new one for the string argument. - -Note that because you can set the current position to anything in the callback, -it won't be automatically be set by the caller after the callback is finished. -For example, this is how the hypothetical lexer above would be written with the -[-`ExtendedRegexLexer`::-] -{+[-`ExtendedRegexLexer`: - -.. sourcecode:: python-] -{+`ExtendedRegexLexer`::+}+} - - from pygments.lexer import ExtendedRegexLexer - from pygments.token import Generic - - class ExHypotheticLexer(ExtendedRegexLexer): - - def headline_callback(lexer, match, ctx): - equal_signs = match.group(1) - text = match.group(2) - yield match.start(), Generic.Headline, equal_signs + text + equal_signs - ctx.pos = match.end() - - tokens = { - 'root': [ - (r'(=+)(.*?)(\1)', headline_callback) - ] - } - -This might sound confusing (and it can really be). But it is needed, and for an -example look at the Ruby lexer in [-`ruby.py`_.-] {+[-`agile.py`_.-] {+`ruby.py`_.+}+} - -.. [-_ruby.py:-] {+[-_agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py - - -Filtering-] {+_ruby.py:+} https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/ruby.py - - -Handling Lists of Keywords -========================== - -For a relatively short list (hundreds) you can construct an optimized regular -expression directly using ``words()`` (longer lists, see next section). This -function handles a few things for you automatically, including escaping -metacharacters and Python's first-match rather than longest-match in -alternations. Feel free to put the lists themselves in -``pygments/lexers/_$lang_builtins.py`` (see examples there), and generated by -code if possible. - -An example of using ``words()`` is something like:: - - from pygments.lexer import RegexLexer, words, Name - - class MyLexer(RegexLexer): - - tokens = { - 'root': [ - (words(('else', 'elseif'), suffix=r'\b'), Name.Builtin), - (r'\w+', Name), - ], - } - -As you can see, you can add ``prefix`` and ``suffix`` parts to the constructed -regex. - - -[-Modifying-] - - -{+Modifying+}+} Token Streams -======================= - -Some languages ship a lot of builtin functions (for example PHP). The total -amount of those functions differs from system to system because not everybody -has every extension installed. In the case of PHP there are over 3000 builtin -functions. That's an [-incredibly-] {+[-incredible-] {+incredibly+}+} huge amount of functions, much more than you -[-want to-] -{+[-can-] -{+want to+}+} put into a regular expression. - -But because only `Name` tokens can be function names [-this is-] {+[-it's-] {+this is+}+} solvable by -overriding the ``get_tokens_unprocessed()`` method. The following lexer -subclasses the `PythonLexer` so that it highlights some additional names as -pseudo [-keywords::-] {+[-keywords: - -.. sourcecode:: python-] {+keywords::+}+} - - from [-pygments.lexers.python-] {+[-pygments.lexers.agile-] {+pygments.lexers.python+}+} import PythonLexer - from pygments.token import Name, Keyword - - class MyPythonLexer(PythonLexer): - EXTRA_KEYWORDS = [-set(('foo',-] {+[-['foo',-] {+set(('foo',+}+} 'bar', 'foobar', 'barfoo', 'spam', [-'eggs'))-] {+[-'eggs']-] {+'eggs'))+}+} - - def get_tokens_unprocessed(self, text): - for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): - if token is Name and value in self.EXTRA_KEYWORDS: - yield index, Keyword.Pseudo, value - else: - yield index, token, value - -The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions. - -{+[-.. note:: Do not confuse this with the :doc:`filter ` system.-]+} -- cgit v1.2.1