Backout 10520a0c4913 - this breaks Jython.

If you want to handle non-BMP characters correctly with Python 2.x, you should use a build with wide unicode characters. Closes #1205.
author: Georg Brandl <georg@python.org> 2016-02-02 10:30:58 +0100
committer: Georg Brandl <georg@python.org> 2016-02-02 10:30:58 +0100
commit: 28c93d6d5bd08e158ca8529e520560796059b510 (patch)
tree: 3242ecd439e3fa314452df58e899969efa4b3db2 /pygments/lexers/rdf.py
parent: a3134c1f01023eba264bf5eb04764df41890a24e (diff)
download: pygments-28c93d6d5bd08e158ca8529e520560796059b510.tar.gz
1 files changed, 42 insertions, 29 deletions
diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py
index cb634ee0..103b4ad0 100644
--- a/pygments/lexers/rdf.py
+++ b/pygments/lexers/rdf.py
@@ -29,43 +29,56 @@ class SparqlLexer(RegexLexer):
     filenames = ['*.rq', '*.sparql']
     mimetypes = ['application/sparql-query']
 
+    # character group definitions ::
+
+    PN_CHARS_BASE_GRP = (u'a-zA-Z'
+                         u'\u00c0-\u00d6'
+                         u'\u00d8-\u00f6'
+                         u'\u00f8-\u02ff'
+                         u'\u0370-\u037d'
+                         u'\u037f-\u1fff'
+                         u'\u200c-\u200d'
+                         u'\u2070-\u218f'
+                         u'\u2c00-\u2fef'
+                         u'\u3001-\ud7ff'
+                         u'\uf900-\ufdcf'
+                         u'\ufdf0-\ufffd'
+                         u'\U00010000-\U000effff')
+
+    PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
+
+    PN_CHARS_GRP = (PN_CHARS_U_GRP +
+                    r'\-' +
+                    r'0-9' +
+                    u'\u00b7' +
+                    u'\u0300-\u036f' +
+                    u'\u203f-\u2040')
+
+    HEX_GRP = '0-9A-Fa-f'
+
+    PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&""()*+,;=/?#@%'
+
     # terminal productions ::
 
-    PN_CHARS_BASE = (u'(?:[a-zA-Z'
-                     u'\u00c0-\u00d6'
-                     u'\u00d8-\u00f6'
-                     u'\u00f8-\u02ff'
-                     u'\u0370-\u037d'
-                     u'\u037f-\u1fff'
-                     u'\u200c-\u200d'
-                     u'\u2070-\u218f'
-                     u'\u2c00-\u2fef'
-                     u'\u3001-\ud7ff'
-                     u'\uf900-\ufdcf'
-                     u'\ufdf0-\ufffd]|'
-                     u'[^\u0000-\uffff]|'
-                     u'[\ud800-\udbff][\udc00-\udfff])')
+    PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
 
-    PN_CHARS_U = '(?:' + PN_CHARS_BASE + '|_)'
+    PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']'
 
-    PN_CHARS = ('(?:' + PN_CHARS_U + r'|[\-0-9' +
-                u'\u00b7' +
-                u'\u0300-\u036f' +
-                u'\u203f-\u2040])')
+    PN_CHARS = '[' + PN_CHARS_GRP + ']'
 
-    HEX = '[0-9A-Fa-f]'
+    HEX = '[' + HEX_GRP + ']'
 
-    PN_LOCAL_ESC_CHARS = r'[ _~.\-!$&""()*+,;=/?#@%]'
+    PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
 
     IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>'
 
-    BLANK_NODE_LABEL = '_:(?:' + PN_CHARS_U + '|[0-9])(?:(?:' + PN_CHARS + '|\.)*' + \
-                       PN_CHARS + ')?'
+    BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \
+                       '.]*' + PN_CHARS + ')?'
 
-    PN_PREFIX = PN_CHARS_BASE + '(?:(?:' + PN_CHARS + '|\.)*' + PN_CHARS + ')?'
+    PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
 
-    VARNAME = '(?:' + PN_CHARS_U + '|[0-9])(?:' + PN_CHARS_U + \
-              u'|[0-9\u00b7\u0300-\u036f\u203f-\u2040])*'
+    VARNAME = u'[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \
+              u'0-9\u00b7\u0300-\u036f\u203f-\u2040]*'
 
     PERCENT = '%' + HEX + HEX
 
@@ -73,9 +86,9 @@ class SparqlLexer(RegexLexer):
 
     PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
 
-    PN_LOCAL = ('(?:(?:' + PN_CHARS_U + '|[:0-9])|' + PLX + ')' +
-                '(?:(?:(?:' + PN_CHARS + '|[.:])|' + PLX + ')*(?:(?:' +
-                PN_CHARS + '|:)|' + PLX + '))?')
+    PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
+                '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
+                PN_CHARS_GRP + ':]|' + PLX + '))?')
 
     EXPONENT = r'[eE][+-]?\d+'
author	Georg Brandl <georg@python.org>	2016-02-02 10:30:58 +0100
committer	Georg Brandl <georg@python.org>	2016-02-02 10:30:58 +0100
commit	28c93d6d5bd08e158ca8529e520560796059b510 (patch)
tree	3242ecd439e3fa314452df58e899969efa4b3db2 /pygments/lexers/rdf.py
parent	a3134c1f01023eba264bf5eb04764df41890a24e (diff)
download	pygments-28c93d6d5bd08e158ca8529e520560796059b510.tar.gz