summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Hatch <tim@timhatch.com>2014-10-07 13:29:08 -0700
committerTim Hatch <tim@timhatch.com>2014-10-07 13:29:08 -0700
commit34d2f291d161ac07e090a597660c9fb383b9522b (patch)
treed0974a57e084ace3c964bd0b61ba3fa0dc53cd5f
parent702bccea86fa7bb11f4d1e6e153fa1394ca7cfcb (diff)
downloadpygments-34d2f291d161ac07e090a597660c9fb383b9522b.tar.gz
JavascriptLexer: Support unicode identifiers per standard.
Fixes #999
-rw-r--r--pygments/lexers/javascript.py9
-rw-r--r--tests/examplefiles/unicode.js5
2 files changed, 13 insertions, 1 deletions
diff --git a/pygments/lexers/javascript.py b/pygments/lexers/javascript.py
index 968436ed..8b5f9c76 100644
--- a/pygments/lexers/javascript.py
+++ b/pygments/lexers/javascript.py
@@ -16,11 +16,18 @@ from pygments.lexer import RegexLexer, include, bygroups, default, \
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Other
from pygments.util import get_bool_opt, iteritems
+import pygments.unistring as uni
__all__ = ['JavascriptLexer', 'KalLexer', 'LiveScriptLexer', 'DartLexer',
'TypeScriptLexer', 'LassoLexer', 'ObjectiveJLexer',
'CoffeeScriptLexer', 'MaskLexer']
+JS_IDENT_START = ('(?:[$_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Lo + uni.Nl
+ + ']|\\\\u[a-fA-F0-9]{4})')
+JS_IDENT_PART = ('(?:[$_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Lo + uni.Nl
+ + uni.Mn + uni.Mc + uni.Nd + uni.Pc
+ + u'\u200c\u200d]|\\\\u[a-fA-F0-9]{4})')
+JS_IDENT = JS_IDENT_START + '(?:' + JS_IDENT_PART + ')*'
class JavascriptLexer(RegexLexer):
"""
@@ -72,7 +79,7 @@ class JavascriptLexer(RegexLexer):
r'decodeURIComponent|encodeURI|encodeURIComponent|'
r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
r'window)\b', Name.Builtin),
- (r'[$a-zA-Z_]\w*', Name.Other),
+ (JS_IDENT, Name.Other),
(r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
(r'0x[0-9a-fA-F]+', Number.Hex),
(r'[0-9]+', Number.Integer),
diff --git a/tests/examplefiles/unicode.js b/tests/examplefiles/unicode.js
new file mode 100644
index 00000000..e77bfb80
--- /dev/null
+++ b/tests/examplefiles/unicode.js
@@ -0,0 +1,5 @@
+var école;
+var sinθ;
+var เมือง;
+var a\u1234b;
+