summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJharrod LaFon <jlafon@eyesopen.com>2014-04-14 18:26:01 -0400
committerJharrod LaFon <jlafon@eyesopen.com>2014-04-14 18:26:01 -0400
commit17357720b7b42f54afe1da831e27b2f2953a6439 (patch)
treee17830beb8724951990fe3030af125b3ff2f2c3b
parent1946764d7ecfd7c770c6e3bbadd5f78da67f6676 (diff)
downloadpygments-17357720b7b42f54afe1da831e27b2f2953a6439.tar.gz
Correctly recognize PHP identifiers with Unicode characters
-rw-r--r--pygments/lexers/web.py29
1 files changed, 16 insertions, 13 deletions
diff --git a/pygments/lexers/web.py b/pygments/lexers/web.py
index a12caffe..f388f611 100644
--- a/pygments/lexers/web.py
+++ b/pygments/lexers/web.py
@@ -799,6 +799,13 @@ class PhpLexer(RegexLexer):
filenames = ['*.php', '*.php[345]', '*.inc']
mimetypes = ['text/x-php']
+ # Note that a backslash is included in the following two patterns
+ # PHP uses a backslash as a namespace separator
+ _ident_char = r'[\\_a-zA-Z0-9]|[^\x00-\x7f]'
+ _ident_begin = r'(?:[\\_a-zA-Z]|[^\x00-\x7f])'
+ _ident_end = r'(?:' + _ident_char + ')*'
+ _ident_inner = _ident_begin + _ident_end
+
flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
tokens = {
'root': [
@@ -808,7 +815,7 @@ class PhpLexer(RegexLexer):
],
'php': [
(r'\?>', Comment.Preproc, '#pop'),
- (r'<<<(\'?)([a-zA-Z_][a-zA-Z0-9_]*)\1\n.*?\n\2\;?\n', String),
+ (r'<<<(\'?)(' + _ident_inner + ')\1\n.*?\n\2\;?\n', String),
(r'\s+', Text),
(r'#.*?\n', Comment.Single),
(r'//.*?\n', Comment.Single),
@@ -817,7 +824,7 @@ class PhpLexer(RegexLexer):
(r'/\*\*/', Comment.Multiline),
(r'/\*\*.*?\*/', String.Doc),
(r'/\*.*?\*/', Comment.Multiline),
- (r'(->|::)(\s*)([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)',
+ (r'(->|::)(\s*)(' + _ident_inner + ')',
bygroups(Operator, Text, Name.Attribute)),
(r'[~!%^&*+=|:.<>/?@-]+', Operator),
(r'[\[\]{}();,]+', Punctuation),
@@ -825,7 +832,7 @@ class PhpLexer(RegexLexer):
(r'(function)(\s*)(?=\()', bygroups(Keyword, Text)),
(r'(function)(\s+)(&?)(\s*)',
bygroups(Keyword, Text, Operator, Text), 'functionname'),
- (r'(const)(\s+)([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)',
+ (r'(const)(\s+)(' + _ident_inner + ')',
bygroups(Keyword, Text, Name.Constant)),
(r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
@@ -839,9 +846,9 @@ class PhpLexer(RegexLexer):
r'catch|throw|this|use|namespace|trait|yield|'
r'finally)\b', Keyword),
(r'(true|false|null)\b', Keyword.Constant),
- (r'\$\{\$+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*\}', Name.Variable),
- (r'\$+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*', Name.Variable),
- (r'[\\a-zA-Z_\x7f-\xff][\\a-zA-Z0-9_\x7f-\xff]*', Name.Other),
+ (r'\$\{\$+' + _ident_inner + '\}', Name.Variable),
+ (r'\$+' + _ident_inner, Name.Variable),
+ (_ident_inner, Name.Other),
(r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
(r'\d+[eE][+-]?[0-9]+', Number.Float),
(r'0[0-7]+', Number.Oct),
@@ -853,20 +860,16 @@ class PhpLexer(RegexLexer):
(r'"', String.Double, 'string'),
],
'classname': [
- (r'[a-zA-Z_\x7f-\xff][\\a-zA-Z0-9_\x7f-\xff]*', Name.Class, '#pop')
+ (_ident_inner, Name.Class, '#pop')
],
'functionname': [
- (r'[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*', Name.Function, '#pop')
+ (_ident_inner, Name.Function, '#pop')
],
'string': [
(r'"', String.Double, '#pop'),
(r'[^{$"\\]+', String.Double),
(r'\\([nrt\"$\\]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})', String.Escape),
-<<<<<<< local
- (r'\$[a-zA-Z_][a-zA-Z0-9_]*(\[\S+?\]|->[a-zA-Z_][a-zA-Z0-9_]*)?',
-=======
- (r'\$[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*(\[\S+\]|->[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)?',
->>>>>>> other
+ (r'\$' + _ident_inner + '(\[\S+?\]|->' + _ident_inner + ')?',
String.Interpol),
(r'(\{\$\{)(.*?)(\}\})',
bygroups(String.Interpol, using(this, _startinline=True),