all: remove "u" string prefix (#1536)

* all: remove "u" string prefix * util: remove unirange Since Python 3.3, all builds are wide unicode compatible. * unistring: remove support for narrow-unicode builds which stopped being relevant with Python 3.3
author: Georg Brandl <georg@python.org> 2020-09-08 20:20:19 +0200
committer: GitHub <noreply@github.com> 2020-09-08 20:20:19 +0200
commit: 9f5672672bd61f7149d2a165b49f0617a1a9fe8e (patch)
tree: ad3d62c5c167c6a75edf67a88c20341c77566c7e /pygments/util.py
parent: d9a9e9ee40eb9815ecc3d9ec9d6f5e57499009d2 (diff)
download: pygments-git-9f5672672bd61f7149d2a165b49f0617a1a9fe8e.tar.gz
1 files changed, 4 insertions, 44 deletions
diff --git a/pygments/util.py b/pygments/util.py
index febde921..3a79029f 100644
--- a/pygments/util.py
+++ b/pygments/util.py
@@ -205,55 +205,15 @@ def looks_like_xml(text):
         return rv
 
 
-# Python narrow build compatibility
-
-def _surrogatepair(c):
-    # Given a unicode character code
-    # with length greater than 16 bits,
-    # return the two 16 bit surrogate pair.
+def surrogatepair(c):
+    """Given a unicode character code with length greater than 16 bits,
+    return the two 16 bit surrogate pair.
+    """
     # From example D28 of:
     # http://www.unicode.org/book/ch03.pdf
     return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
 
 
-def unirange(a, b):
-    """Returns a regular expression string to match the given non-BMP range."""
-    if b < a:
-        raise ValueError("Bad character range")
-    if a < 0x10000 or b < 0x10000:
-        raise ValueError("unirange is only defined for non-BMP ranges")
-
-    if sys.maxunicode > 0xffff:
-        # wide build
-        return u'[%s-%s]' % (chr(a), chr(b))
-    else:
-        # narrow build stores surrogates, and the 're' module handles them
-        # (incorrectly) as characters.  Since there is still ordering among
-        # these characters, expand the range to one that it understands.  Some
-        # background in http://bugs.python.org/issue3665 and
-        # http://bugs.python.org/issue12749
-        #
-        # Additionally, the lower constants are using chr rather than
-        # literals because jython [which uses the wide path] can't load this
-        # file if they are literals.
-        ah, al = _surrogatepair(a)
-        bh, bl = _surrogatepair(b)
-        if ah == bh:
-            return u'(?:%s[%s-%s])' % (chr(ah), chr(al), chr(bl))
-        else:
-            buf = []
-            buf.append(u'%s[%s-%s]' % (chr(ah), chr(al),
-                                       ah == bh and chr(bl) or chr(0xdfff)))
-            if ah - bh > 1:
-                buf.append(u'[%s-%s][%s-%s]' %
-                           chr(ah+1), chr(bh-1), chr(0xdc00), chr(0xdfff))
-            if ah != bh:
-                buf.append(u'%s[%s-%s]' %
-                           (chr(bh), chr(0xdc00), chr(bl)))
-
-            return u'(?:' + u'|'.join(buf) + u')'
-
-
 def format_lines(var_name, seq, raw=False, indent_level=0):
     """Formats a sequence of strings for output."""
     lines = []
author	Georg Brandl <georg@python.org>	2020-09-08 20:20:19 +0200
committer	GitHub <noreply@github.com>	2020-09-08 20:20:19 +0200
commit	9f5672672bd61f7149d2a165b49f0617a1a9fe8e (patch)
tree	ad3d62c5c167c6a75edf67a88c20341c77566c7e /pygments/util.py
parent	d9a9e9ee40eb9815ecc3d9ec9d6f5e57499009d2 (diff)
download	pygments-git-9f5672672bd61f7149d2a165b49f0617a1a9fe8e.tar.gz