diff options
author | Tim Hatch <tim@timhatch.com> | 2012-08-27 00:30:35 -0700 |
---|---|---|
committer | Tim Hatch <tim@timhatch.com> | 2012-08-27 00:30:35 -0700 |
commit | 08ad16c5ad5e80dfddd7ae81e368227e6bb6b989 (patch) | |
tree | 241ddb7a63077090db41462691995b459d3bc807 /pygments/lexers/jvm.py | |
parent | 33e67db7ac05825a5b873104d900958b8c55de06 (diff) | |
download | pygments-08ad16c5ad5e80dfddd7ae81e368227e6bb6b989.tar.gz |
Centralize regex metachar escaping, since the surrogate support breaks
one-parsed-char per unicode codepoint already.
Diffstat (limited to 'pygments/lexers/jvm.py')
-rw-r--r-- | pygments/lexers/jvm.py | 11 |
1 files changed, 3 insertions, 8 deletions
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py index 696bb1a1..c8caa2c0 100644 --- a/pygments/lexers/jvm.py +++ b/pygments/lexers/jvm.py @@ -789,20 +789,15 @@ class KotlinLexer(RegexLexer): # for the range of allowed unicode characters in identifiers, # see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf - def _escape(st): - return st.replace(u'\\', ur'\\').replace(u'-', ur'\-').\ - replace(u'[', ur'\[').replace(u']', ur'\]') - levels = { 'none': '@?[_a-zA-Z][a-zA-Z0-9_]*', 'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' + '[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'), 'full': ('@?(?:_|[^' + - _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])' - + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', - 'Nl', 'Nd', 'Pc', 'Cf', 'Mn', - 'Mc')) + ']*'), + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])' + + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', + 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'), } tokens = {} |