diff options
author | Georg Brandl <georg@python.org> | 2016-02-02 10:30:58 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2016-02-02 10:30:58 +0100 |
commit | 28c93d6d5bd08e158ca8529e520560796059b510 (patch) | |
tree | 3242ecd439e3fa314452df58e899969efa4b3db2 /pygments/lexers/rdf.py | |
parent | a3134c1f01023eba264bf5eb04764df41890a24e (diff) | |
download | pygments-28c93d6d5bd08e158ca8529e520560796059b510.tar.gz |
Backout 10520a0c4913 - this breaks Jython.
If you want to handle non-BMP characters correctly with Python 2.x,
you should use a build with wide unicode characters.
Closes #1205.
Diffstat (limited to 'pygments/lexers/rdf.py')
-rw-r--r-- | pygments/lexers/rdf.py | 71 |
1 files changed, 42 insertions, 29 deletions
diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py index cb634ee0..103b4ad0 100644 --- a/pygments/lexers/rdf.py +++ b/pygments/lexers/rdf.py @@ -29,43 +29,56 @@ class SparqlLexer(RegexLexer): filenames = ['*.rq', '*.sparql'] mimetypes = ['application/sparql-query'] + # character group definitions :: + + PN_CHARS_BASE_GRP = (u'a-zA-Z' + u'\u00c0-\u00d6' + u'\u00d8-\u00f6' + u'\u00f8-\u02ff' + u'\u0370-\u037d' + u'\u037f-\u1fff' + u'\u200c-\u200d' + u'\u2070-\u218f' + u'\u2c00-\u2fef' + u'\u3001-\ud7ff' + u'\uf900-\ufdcf' + u'\ufdf0-\ufffd' + u'\U00010000-\U000effff') + + PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') + + PN_CHARS_GRP = (PN_CHARS_U_GRP + + r'\-' + + r'0-9' + + u'\u00b7' + + u'\u0300-\u036f' + + u'\u203f-\u2040') + + HEX_GRP = '0-9A-Fa-f' + + PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&""()*+,;=/?#@%' + # terminal productions :: - PN_CHARS_BASE = (u'(?:[a-zA-Z' - u'\u00c0-\u00d6' - u'\u00d8-\u00f6' - u'\u00f8-\u02ff' - u'\u0370-\u037d' - u'\u037f-\u1fff' - u'\u200c-\u200d' - u'\u2070-\u218f' - u'\u2c00-\u2fef' - u'\u3001-\ud7ff' - u'\uf900-\ufdcf' - u'\ufdf0-\ufffd]|' - u'[^\u0000-\uffff]|' - u'[\ud800-\udbff][\udc00-\udfff])') + PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' - PN_CHARS_U = '(?:' + PN_CHARS_BASE + '|_)' + PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']' - PN_CHARS = ('(?:' + PN_CHARS_U + r'|[\-0-9' + - u'\u00b7' + - u'\u0300-\u036f' + - u'\u203f-\u2040])') + PN_CHARS = '[' + PN_CHARS_GRP + ']' - HEX = '[0-9A-Fa-f]' + HEX = '[' + HEX_GRP + ']' - PN_LOCAL_ESC_CHARS = r'[ _~.\-!$&""()*+,;=/?#@%]' + PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>' - BLANK_NODE_LABEL = '_:(?:' + PN_CHARS_U + '|[0-9])(?:(?:' + PN_CHARS + '|\.)*' + \ - PN_CHARS + ')?' + BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \ + '.]*' + PN_CHARS + ')?' - PN_PREFIX = PN_CHARS_BASE + '(?:(?:' + PN_CHARS + '|\.)*' + PN_CHARS + ')?' + PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' - VARNAME = '(?:' + PN_CHARS_U + '|[0-9])(?:' + PN_CHARS_U + \ - u'|[0-9\u00b7\u0300-\u036f\u203f-\u2040])*' + VARNAME = u'[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \ + u'0-9\u00b7\u0300-\u036f\u203f-\u2040]*' PERCENT = '%' + HEX + HEX @@ -73,9 +86,9 @@ class SparqlLexer(RegexLexer): PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' - PN_LOCAL = ('(?:(?:' + PN_CHARS_U + '|[:0-9])|' + PLX + ')' + - '(?:(?:(?:' + PN_CHARS + '|[.:])|' + PLX + ')*(?:(?:' + - PN_CHARS + '|:)|' + PLX + '))?') + PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + + '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + + PN_CHARS_GRP + ':]|' + PLX + '))?') EXPONENT = r'[eE][+-]?\d+' |