summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES6
-rw-r--r--doc/docs/tokens.rst8
-rw-r--r--pygments/lexers/c_cpp.py16
-rw-r--r--pygments/lexers/perl.py3
-rw-r--r--pygments/lexers/php.py4
-rw-r--r--pygments/lexers/python.py20
-rw-r--r--pygments/lexers/ruby.py8
-rw-r--r--pygments/styles/lovelace.py2
-rw-r--r--pygments/token.py2
-rw-r--r--tests/examplefiles/example2.cpp20
-rw-r--r--tests/examplefiles/test.php5
11 files changed, 70 insertions, 24 deletions
diff --git a/CHANGES b/CHANGES
index e88582c8..ced0ffb0 100644
--- a/CHANGES
+++ b/CHANGES
@@ -21,6 +21,12 @@ Version 2.2
- Added new token types and lexing for magic methods and variables in Python
and PHP.
+- Added a new token type for string affixes and lexing for them in Python and
+ C++ lexers.
+
+- Added a new token type for heredoc (and similar) string delimiters and
+ lexing for them in C++, Perl, PHP and Ruby lexers.
+
Version 2.1.1
-------------
diff --git a/doc/docs/tokens.rst b/doc/docs/tokens.rst
index 96a6d003..801fc638 100644
--- a/doc/docs/tokens.rst
+++ b/doc/docs/tokens.rst
@@ -223,12 +223,20 @@ Literals
`String`
For any string literal.
+`String.Affix`
+ Token type for affixes that further specify the type of the string they're
+ attached to (e.g. the prefixes ``r`` and ``u8`` in ``r"foo"`` and ``u8"foo"``).
+
`String.Backtick`
Token type for strings enclosed in backticks.
`String.Char`
Token type for single characters (e.g. Java, C).
+`String.Delimiter`
+ Token type for delimiting identifiers in "heredoc", raw and other similar
+ strings (e.g. the word ``END`` in Perl code ``print <<'END';``).
+
`String.Doc`
Token type for documentation strings (for example Python).
diff --git a/pygments/lexers/c_cpp.py b/pygments/lexers/c_cpp.py
index 5c724d03..7f061539 100644
--- a/pygments/lexers/c_cpp.py
+++ b/pygments/lexers/c_cpp.py
@@ -50,8 +50,9 @@ class CFamilyLexer(RegexLexer):
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
],
'statements': [
- (r'L?"', String, 'string'),
- (r"L?'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])'", String.Char),
+ (r'(L?)(")', bygroups(String.Affix, String), 'string'),
+ (r"(L?)(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",
+ bygroups(String.Affix, String.Char, String.Char, String.Char)),
(r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
(r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
(r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
@@ -217,7 +218,11 @@ class CppLexer(CFamilyLexer):
(r'char(16_t|32_t)\b', Keyword.Type),
(r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
# C++11 raw strings
- (r'R"\(', String, 'rawstring'),
+ (r'(R)(")([^\\()\s]{,16})(\((?:.|\n)*?\))(\3)(")',
+ bygroups(String.Affix, String, String.Delimiter, String,
+ String.Delimiter, String)),
+ # C++11 UTF-8/16/32 strings
+ (r'(u8|u|U)(")', bygroups(String.Affix, String), 'string'),
inherit,
],
'root': [
@@ -234,11 +239,6 @@ class CppLexer(CFamilyLexer):
# template specification
(r'\s*(?=>)', Text, '#pop'),
],
- 'rawstring': [
- (r'\)"', String, '#pop'),
- (r'[^)]+', String),
- (r'\)', String),
- ],
}
def analyse_text(text):
diff --git a/pygments/lexers/perl.py b/pygments/lexers/perl.py
index b78963d0..8df3c810 100644
--- a/pygments/lexers/perl.py
+++ b/pygments/lexers/perl.py
@@ -109,7 +109,8 @@ class PerlLexer(RegexLexer):
'utime', 'values', 'vec', 'wait', 'waitpid', 'wantarray', 'warn', 'write'), suffix=r'\b'),
Name.Builtin),
(r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
- (r'<<([\'"]?)([a-zA-Z_]\w*)\1;?\n.*?\n\2\n', String),
+ (r'(<<)([\'"]?)([a-zA-Z_]\w*)(\2;?\n.*?\n)(\3)(\n)',
+ bygroups(String, String, String.Delimiter, String, String.Delimiter, Text)),
(r'__END__', Comment.Preproc, 'end-part'),
(r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
(r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
diff --git a/pygments/lexers/php.py b/pygments/lexers/php.py
index 257dd94f..2421738f 100644
--- a/pygments/lexers/php.py
+++ b/pygments/lexers/php.py
@@ -138,7 +138,9 @@ class PhpLexer(RegexLexer):
],
'php': [
(r'\?>', Comment.Preproc, '#pop'),
- (r'<<<([\'"]?)(' + _ident_inner + r')\1\n.*?\n\s*\2;?\n', String),
+ (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
+ bygroups(String, String, String.Delimiter, String, String.Delimiter,
+ Punctuation, Text)),
(r'\s+', Text),
(r'#.*?\n', Comment.Single),
(r'//.*?\n', Comment.Single),
diff --git a/pygments/lexers/python.py b/pygments/lexers/python.py
index 57af00e2..f483071b 100644
--- a/pygments/lexers/python.py
+++ b/pygments/lexers/python.py
@@ -51,8 +51,8 @@ class PythonLexer(RegexLexer):
tokens = {
'root': [
(r'\n', Text),
- (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
- (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
+ (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)),
+ (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)),
(r'[^\S\n]+', Text),
(r'\A#!.+$', Comment.Hashbang),
(r'#.*$', Comment.Single),
@@ -72,14 +72,14 @@ class PythonLexer(RegexLexer):
include('magicfuncs'),
include('magicvars'),
include('backtick'),
- ('(?:[rR]|[uU][rR]|[rR][uU])"""', String.Double, 'tdqs'),
- ("(?:[rR]|[uU][rR]|[rR][uU])'''", String.Single, 'tsqs'),
- ('(?:[rR]|[uU][rR]|[rR][uU])"', String.Double, 'dqs'),
- ("(?:[rR]|[uU][rR]|[rR][uU])'", String.Single, 'sqs'),
- ('[uU]?"""', String.Double, combined('stringescape', 'tdqs')),
- ("[uU]?'''", String.Single, combined('stringescape', 'tsqs')),
- ('[uU]?"', String.Double, combined('stringescape', 'dqs')),
- ("[uU]?'", String.Single, combined('stringescape', 'sqs')),
+ ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'),
+ ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'),
+ ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'),
+ ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'),
+ ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')),
+ ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')),
+ ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')),
+ ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')),
include('name'),
include('numbers'),
],
diff --git a/pygments/lexers/ruby.py b/pygments/lexers/ruby.py
index e81d6ecf..f16416d3 100644
--- a/pygments/lexers/ruby.py
+++ b/pygments/lexers/ruby.py
@@ -47,9 +47,9 @@ class RubyLexer(ExtendedRegexLexer):
start = match.start(1)
yield start, Operator, match.group(1) # <<-?
- yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
- yield match.start(3), Name.Constant, match.group(3) # heredoc name
- yield match.start(4), String.Heredoc, match.group(4) # quote again
+ yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
+ yield match.start(3), String.Delimiter, match.group(3) # heredoc name
+ yield match.start(4), String.Heredoc, match.group(4) # quote again
heredocstack = ctx.__dict__.setdefault('heredocstack', [])
outermost = not bool(heredocstack)
@@ -74,7 +74,7 @@ class RubyLexer(ExtendedRegexLexer):
if check == hdname:
for amatch in lines:
yield amatch.start(), String.Heredoc, amatch.group()
- yield match.start(), Name.Constant, match.group()
+ yield match.start(), String.Delimiter, match.group()
ctx.pos = match.end()
break
else:
diff --git a/pygments/styles/lovelace.py b/pygments/styles/lovelace.py
index 712f3e5c..236dde9b 100644
--- a/pygments/styles/lovelace.py
+++ b/pygments/styles/lovelace.py
@@ -71,7 +71,9 @@ class LovelaceStyle(Style):
Name.Variable.Magic: _DOC_ORANGE,
String: _STR_RED,
+ String.Affix: '#444444',
String.Char: _OW_PURPLE,
+ String.Delimiter: _DOC_ORANGE,
String.Doc: 'italic '+_DOC_ORANGE,
String.Escape: _ESCAPE_LIME,
String.Interpol: 'underline',
diff --git a/pygments/token.py b/pygments/token.py
index 097ff064..40c3214a 100644
--- a/pygments/token.py
+++ b/pygments/token.py
@@ -163,8 +163,10 @@ STANDARD_TYPES = {
Literal.Date: 'ld',
String: 's',
+ String.Affix: 'sa',
String.Backtick: 'sb',
String.Char: 'sc',
+ String.Delimiter: 'dl',
String.Doc: 'sd',
String.Double: 's2',
String.Escape: 'se',
diff --git a/tests/examplefiles/example2.cpp b/tests/examplefiles/example2.cpp
new file mode 100644
index 00000000..ccd99383
--- /dev/null
+++ b/tests/examplefiles/example2.cpp
@@ -0,0 +1,20 @@
+/*
+ * A Test file for the different string literals.
+ */
+
+#include <iostream>
+
+int main() {
+ char *_str = "a normal string";
+ wchar_t *L_str = L"a wide string";
+ char *u8_str = u8"utf-8 string";
+ char16_t *u_str = u"utf-16 string";
+ char32_t *U_str = U"utf-32 string";
+ char *R_str = R""""(raw string with
+"""
+as a delimiter)"""";
+
+ std::cout << R_str << std::endl;
+
+ return 0;
+}
diff --git a/tests/examplefiles/test.php b/tests/examplefiles/test.php
index 794961c1..e8efdc6a 100644
--- a/tests/examplefiles/test.php
+++ b/tests/examplefiles/test.php
@@ -535,5 +535,10 @@ $magic->__toString();
EOF;
+echo <<<"some_delimiter"
+more heredoc testing
+continues on this line
+some_delimiter;
+
?>