diff options
-rw-r--r-- | CHANGES | 6 | ||||
-rw-r--r-- | doc/docs/tokens.rst | 8 | ||||
-rw-r--r-- | pygments/lexers/c_cpp.py | 16 | ||||
-rw-r--r-- | pygments/lexers/perl.py | 3 | ||||
-rw-r--r-- | pygments/lexers/php.py | 4 | ||||
-rw-r--r-- | pygments/lexers/python.py | 20 | ||||
-rw-r--r-- | pygments/lexers/ruby.py | 8 | ||||
-rw-r--r-- | pygments/styles/lovelace.py | 2 | ||||
-rw-r--r-- | pygments/token.py | 2 | ||||
-rw-r--r-- | tests/examplefiles/example2.cpp | 20 | ||||
-rw-r--r-- | tests/examplefiles/test.php | 5 |
11 files changed, 70 insertions, 24 deletions
@@ -21,6 +21,12 @@ Version 2.2 - Added new token types and lexing for magic methods and variables in Python and PHP. +- Added a new token type for string affixes and lexing for them in Python and + C++ lexers. + +- Added a new token type for heredoc (and similar) string delimiters and + lexing for them in C++, Perl, PHP and Ruby lexers. + Version 2.1.1 ------------- diff --git a/doc/docs/tokens.rst b/doc/docs/tokens.rst index 96a6d003..801fc638 100644 --- a/doc/docs/tokens.rst +++ b/doc/docs/tokens.rst @@ -223,12 +223,20 @@ Literals `String` For any string literal. +`String.Affix` + Token type for affixes that further specify the type of the string they're + attached to (e.g. the prefixes ``r`` and ``u8`` in ``r"foo"`` and ``u8"foo"``). + `String.Backtick` Token type for strings enclosed in backticks. `String.Char` Token type for single characters (e.g. Java, C). +`String.Delimiter` + Token type for delimiting identifiers in "heredoc", raw and other similar + strings (e.g. the word ``END`` in Perl code ``print <<'END';``). + `String.Doc` Token type for documentation strings (for example Python). diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py index 5c724d03..7f061539 100644 --- a/pygments/lexers/c_cpp.py +++ b/pygments/lexers/c_cpp.py @@ -50,8 +50,9 @@ class CFamilyLexer(RegexLexer): (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), ], 'statements': [ - (r'L?"', String, 'string'), - (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char), + (r'(L?)(")', bygroups(String.Affix, String), 'string'), + (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')", + bygroups(String.Affix, String.Char, String.Char, String.Char)), (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), @@ -217,7 +218,11 @@ class CppLexer(CFamilyLexer): (r'char(16_t|32_t)\b', Keyword.Type), (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), # C++11 raw strings - (r'R"\(', String, 'rawstring'), + (r'(R)(")([^\\()\s]{,16})(\((?:.|\n)*?\))(\3)(")', + bygroups(String.Affix, String, String.Delimiter, String, + String.Delimiter, String)), + # C++11 UTF-8/16/32 strings + (r'(u8|u|U)(")', bygroups(String.Affix, String), 'string'), inherit, ], 'root': [ @@ -234,11 +239,6 @@ class CppLexer(CFamilyLexer): # template specification (r'\s*(?=>)', Text, '#pop'), ], - 'rawstring': [ - (r'\)"', String, '#pop'), - (r'[^)]+', String), - (r'\)', String), - ], } def analyse_text(text): diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py index b78963d0..8df3c810 100644 --- a/pygments/lexers/perl.py +++ b/pygments/lexers/perl.py @@ -109,7 +109,8 @@ class PerlLexer(RegexLexer): 'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'), Name.Builtin), (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), - (r'<<([\'"]?)([a-zA-Z_]\w*)\1;?\n.*?\n\2\n', String), + (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)', + bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)), (r'__END__', Comment.Preproc, 'end-part'), (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), diff --git a/pygments/lexers/php.py b/pygments/lexers/php.py index 257dd94f..2421738f 100644 --- a/pygments/lexers/php.py +++ b/pygments/lexers/php.py @@ -138,7 +138,9 @@ class PhpLexer(RegexLexer): ], 'php': [ (r'\?>', Comment.Preproc, '#pop'), - (r'<<<([\'"]?)(' + _ident_inner + r')\1\n.*?\n\s*\2;?\n', String), + (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)', + bygroups(String, String, String.Delimiter, String, String.Delimiter, + Punctuation, Text)), (r'\s+', Text), (r'#.*?\n', Comment.Single), (r'//.*?\n', Comment.Single), diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py index 57af00e2..f483071b 100644 --- a/pygments/lexers/python.py +++ b/pygments/lexers/python.py @@ -51,8 +51,8 @@ class PythonLexer(RegexLexer): tokens = { 'root': [ (r'\n', Text), - (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)), - (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)), + (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), + (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), @@ -72,14 +72,14 @@ class PythonLexer(RegexLexer): include('magicfuncs'), include('magicvars'), include('backtick'), - ('(?:[rR]|[uU][rR]|[rR][uU])"""', String.Double, 'tdqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'''", String.Single, 'tsqs'), - ('(?:[rR]|[uU][rR]|[rR][uU])"', String.Double, 'dqs'), - ("(?:[rR]|[uU][rR]|[rR][uU])'", String.Single, 'sqs'), - ('[uU]?"""', String.Double, combined('stringescape', 'tdqs')), - ("[uU]?'''", String.Single, combined('stringescape', 'tsqs')), - ('[uU]?"', String.Double, combined('stringescape', 'dqs')), - ("[uU]?'", String.Single, combined('stringescape', 'sqs')), + ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), + ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), + ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), + ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), + ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), + ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), + ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), include('name'), include('numbers'), ], diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py index e81d6ecf..f16416d3 100644 --- a/pygments/lexers/ruby.py +++ b/pygments/lexers/ruby.py @@ -47,9 +47,9 @@ class RubyLexer(ExtendedRegexLexer): start = match.start(1) yield start, Operator, match.group(1) # <<-? - yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` - yield match.start(3), Name.Constant, match.group(3) # heredoc name - yield match.start(4), String.Heredoc, match.group(4) # quote again + yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` + yield match.start(3), String.Delimiter, match.group(3) # heredoc name + yield match.start(4), String.Heredoc, match.group(4) # quote again heredocstack = ctx.__dict__.setdefault('heredocstack', []) outermost = not bool(heredocstack) @@ -74,7 +74,7 @@ class RubyLexer(ExtendedRegexLexer): if check == hdname: for amatch in lines: yield amatch.start(), String.Heredoc, amatch.group() - yield match.start(), Name.Constant, match.group() + yield match.start(), String.Delimiter, match.group() ctx.pos = match.end() break else: diff --git a/pygments/styles/lovelace.py b/pygments/styles/lovelace.py index 712f3e5c..236dde9b 100644 --- a/pygments/styles/lovelace.py +++ b/pygments/styles/lovelace.py @@ -71,7 +71,9 @@ class LovelaceStyle(Style): Name.Variable.Magic: _DOC_ORANGE, String: _STR_RED, + String.Affix: '#444444', String.Char: _OW_PURPLE, + String.Delimiter: _DOC_ORANGE, String.Doc: 'italic '+_DOC_ORANGE, String.Escape: _ESCAPE_LIME, String.Interpol: 'underline', diff --git a/pygments/token.py b/pygments/token.py index 097ff064..40c3214a 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -163,8 +163,10 @@ STANDARD_TYPES = { Literal.Date: 'ld', String: 's', + String.Affix: 'sa', String.Backtick: 'sb', String.Char: 'sc', + String.Delimiter: 'dl', String.Doc: 'sd', String.Double: 's2', String.Escape: 'se', diff --git a/tests/examplefiles/example2.cpp b/tests/examplefiles/example2.cpp new file mode 100644 index 00000000..ccd99383 --- /dev/null +++ b/tests/examplefiles/example2.cpp @@ -0,0 +1,20 @@ +/* + * A Test file for the different string literals. + */ + +#include <iostream> + +int main() { + char *_str = "a normal string"; + wchar_t *L_str = L"a wide string"; + char *u8_str = u8"utf-8 string"; + char16_t *u_str = u"utf-16 string"; + char32_t *U_str = U"utf-32 string"; + char *R_str = R""""(raw string with +""" +as a delimiter)""""; + + std::cout << R_str << std::endl; + + return 0; +} diff --git a/tests/examplefiles/test.php b/tests/examplefiles/test.php index 794961c1..e8efdc6a 100644 --- a/tests/examplefiles/test.php +++ b/tests/examplefiles/test.php @@ -535,5 +535,10 @@ $magic->__toString(); EOF; +echo <<<"some_delimiter" +more heredoc testing +continues on this line +some_delimiter; + ?> |