summaryrefslogtreecommitdiff
path: root/Cython/Compiler/StringEncoding.py
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2013-01-10 22:09:37 +0100
committerStefan Behnel <stefan_ml@behnel.de>2013-01-10 22:09:37 +0100
commitf989876bd4e3df666f53941cf355cc20cd96d5fc (patch)
treecbd5db2e24675e6662870208270cc15ae562cc24 /Cython/Compiler/StringEncoding.py
parent86f14e2b4065e24036d5f2578157eba02bc10810 (diff)
downloadcython-f989876bd4e3df666f53941cf355cc20cd96d5fc.tar.gz
undo Py3.3 surrogates support fixes - breaks too many special cases with strings
Diffstat (limited to 'Cython/Compiler/StringEncoding.py')
-rw-r--r--Cython/Compiler/StringEncoding.py38
1 files changed, 12 insertions, 26 deletions
diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py
index 8fc37fc16..1ca490efc 100644
--- a/Cython/Compiler/StringEncoding.py
+++ b/Cython/Compiler/StringEncoding.py
@@ -12,8 +12,6 @@ else:
_unicode, _str, _bytes = unicode, str, str
IS_PYTHON3 = False
-IS_PYTHON24 = sys.version_info[:2] < (2,5)
-
empty_bytes = _bytes()
empty_unicode = _unicode()
@@ -128,13 +126,6 @@ class EncodedString(_unicode):
assert self.encoding is None
return self.encode("UTF-8")
- def escapeencode(self):
- assert self.encoding is None
- if IS_PYTHON24:
- # work around bug in Py24 encoder
- return self.replace(u'\\', u'\\\\').encode('unicode_escape')
- return self.encode('unicode_escape')
-
def is_unicode(self):
return self.encoding is None
is_unicode = property(is_unicode)
@@ -156,9 +147,6 @@ class BytesLiteral(_bytes):
def utf8encode(self):
assert False, "this is not a unicode string: %r" % self
- def escapeencode(self):
- assert False, "this is not a unicode string: %r" % self
-
def __str__(self):
"""Fake-decode the byte string to unicode to support %
formatting of unicode strings.
@@ -177,8 +165,6 @@ char_from_escape_sequence = {
r'\v' : u'\v',
}.get
-_c_special = ('\\', '??', '"') + tuple(map(chr, range(32)))
-
def _to_escape_sequence(s):
if s in '\n\r\t':
return repr(s)[1:-1]
@@ -190,22 +176,19 @@ def _to_escape_sequence(s):
# within a character sequence, oct passes much better than hex
return ''.join(['\\%03o' % ord(c) for c in s])
-def _build_specials_replacer():
+_c_special = ('\\', '??', '"') + tuple(map(chr, range(32)))
+_c_special_replacements = [(orig.encode('ASCII'),
+ _to_escape_sequence(orig).encode('ASCII'))
+ for orig in _c_special ]
+
+def _build_specials_test():
subexps = []
- replacements = {}
for special in _c_special:
regexp = ''.join(['[%s]' % c.replace('\\', '\\\\') for c in special])
subexps.append(regexp)
- replacements[special.encode('ASCII')] = _to_escape_sequence(special).encode('ASCII')
-
- sub = re.compile(('(%s)' % '|'.join(subexps)).encode('ASCII')).sub
- def replace_specials(m):
- return replacements[m.group(1)]
- def replace(s):
- return sub(replace_specials, s)
- return replace
+ return re.compile('|'.join(subexps).encode('ASCII')).search
-_replace_specials = _build_specials_replacer()
+_has_specials = _build_specials_test()
def escape_char(c):
if IS_PYTHON3:
@@ -227,7 +210,10 @@ def escape_byte_string(s):
encoded as ISO-8859-1, will result in the correct byte sequence
being written.
"""
- s = _replace_specials(s)
+ if _has_specials(s):
+ for special, replacement in _c_special_replacements:
+ if special in s:
+ s = s.replace(special, replacement)
try:
return s.decode("ASCII") # trial decoding: plain ASCII => done
except UnicodeDecodeError: