summaryrefslogtreecommitdiff
path: root/pygments
diff options
context:
space:
mode:
authorTim Hatch <tim@timhatch.com>2012-08-27 00:30:35 -0700
committerTim Hatch <tim@timhatch.com>2012-08-27 00:30:35 -0700
commit08ad16c5ad5e80dfddd7ae81e368227e6bb6b989 (patch)
tree241ddb7a63077090db41462691995b459d3bc807 /pygments
parent33e67db7ac05825a5b873104d900958b8c55de06 (diff)
downloadpygments-08ad16c5ad5e80dfddd7ae81e368227e6bb6b989.tar.gz
Centralize regex metachar escaping, since the surrogate support breaks
one-parsed-char per unicode codepoint already.
Diffstat (limited to 'pygments')
-rw-r--r--pygments/lexers/dotnet.py20
-rw-r--r--pygments/lexers/jvm.py11
-rw-r--r--pygments/unistring.py4
3 files changed, 12 insertions, 23 deletions
diff --git a/pygments/lexers/dotnet.py b/pygments/lexers/dotnet.py
index 0a2770ca..17edddc0 100644
--- a/pygments/lexers/dotnet.py
+++ b/pygments/lexers/dotnet.py
@@ -23,10 +23,6 @@ __all__ = ['CSharpLexer', 'NemerleLexer', 'BooLexer', 'VbNetLexer',
'CSharpAspxLexer', 'VbNetAspxLexer', 'FSharpLexer']
-def _escape(st):
- return st.replace(u'\\', ur'\\').replace(u'-', ur'\-').\
- replace(u'[', ur'\[').replace(u']', ur'\]')
-
class CSharpLexer(RegexLexer):
"""
For `C# <http://msdn2.microsoft.com/en-us/vcsharp/default.aspx>`_
@@ -67,10 +63,9 @@ class CSharpLexer(RegexLexer):
'[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl +
uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
'full': ('@?(?:_|[^' +
- _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])'
- + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo',
- 'Nl', 'Nd', 'Pc', 'Cf', 'Mn',
- 'Mc')) + ']*'),
+ uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])'
+ + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl',
+ 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
}
tokens = {}
@@ -179,11 +174,10 @@ class NemerleLexer(RegexLexer):
basic = ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' +
'[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl +
uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
- full = ('@?(?:_|[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm',
- 'Lo', 'Nl')) + '])'
- + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo',
- 'Nl', 'Nd', 'Pc', 'Cf', 'Mn',
- 'Mc')) + ']*'),
+ full = ('@?(?:_|[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo',
+ 'Nl') + '])'
+ + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl',
+ 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
)
tokens = {}
diff --git a/pygments/lexers/jvm.py b/pygments/lexers/jvm.py
index 696bb1a1..c8caa2c0 100644
--- a/pygments/lexers/jvm.py
+++ b/pygments/lexers/jvm.py
@@ -789,20 +789,15 @@ class KotlinLexer(RegexLexer):
# for the range of allowed unicode characters in identifiers,
# see http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-334.pdf
- def _escape(st):
- return st.replace(u'\\', ur'\\').replace(u'-', ur'\-').\
- replace(u'[', ur'\[').replace(u']', ur'\]')
-
levels = {
'none': '@?[_a-zA-Z][a-zA-Z0-9_]*',
'basic': ('@?[_' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl + ']' +
'[' + uni.Lu + uni.Ll + uni.Lt + uni.Lm + uni.Nl +
uni.Nd + uni.Pc + uni.Cf + uni.Mn + uni.Mc + ']*'),
'full': ('@?(?:_|[^' +
- _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl')) + '])'
- + '[^' + _escape(uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo',
- 'Nl', 'Nd', 'Pc', 'Cf', 'Mn',
- 'Mc')) + ']*'),
+ uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') + '])'
+ + '[^' + uni.allexcept('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl',
+ 'Nd', 'Pc', 'Cf', 'Mn', 'Mc') + ']*'),
}
tokens = {}
diff --git a/pygments/unistring.py b/pygments/unistring.py
index 2b99f1f0..b6f53e89 100644
--- a/pygments/unistring.py
+++ b/pygments/unistring.py
@@ -114,8 +114,8 @@ if __name__ == '__main__':
# Hack to avoid combining this combining with the preceeding high
# surrogate, 0xdbff, when doing a repr.
c = u'\\' + c
- elif ord(c) in (0x2d, 0x5c):
- # Escape backslash itself and dash.
+ elif ord(c) in (0x2d, 0x5b, 0x5c, 0x5d):
+ # Escape regex metachars.
c = u'\\' + c
categories.setdefault(cat, []).append(c)