summaryrefslogtreecommitdiff
path: root/Cython/Compiler/StringEncoding.py
diff options
context:
space:
mode:
authorStefan Behnel <scoder@users.berlios.de>2010-09-04 20:54:10 +0200
committerStefan Behnel <scoder@users.berlios.de>2010-09-04 20:54:10 +0200
commitbf558ecf389a4902a1458eee187934c88757b8da (patch)
tree6fe33b6dfd560857a9bd3620644c36d8c6ace731 /Cython/Compiler/StringEncoding.py
parent184482af6eb1e68f2a1c063cd5be4ab18aab88d0 (diff)
downloadcython-bf558ecf389a4902a1458eee187934c88757b8da.tar.gz
support redundant parsing of string literals as unicode *and* bytes string, fix 'str' literal assignments to char* targets when using Future.unicode_literals
Diffstat (limited to 'Cython/Compiler/StringEncoding.py')
-rw-r--r--Cython/Compiler/StringEncoding.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py
index b5bd45f06..8c474f421 100644
--- a/Cython/Compiler/StringEncoding.py
+++ b/Cython/Compiler/StringEncoding.py
@@ -44,9 +44,15 @@ class UnicodeLiteralBuilder(object):
def append_charval(self, char_number):
self.chars.append( unichr(char_number) )
+ def append_uescape(self, char_number, escape_string):
+ self.append_charval(char_number)
+
def getstring(self):
return EncodedString(u''.join(self.chars))
+ def getstrings(self):
+ return (None, self.getstring())
+
class BytesLiteralBuilder(object):
"""Assemble a byte string or char value.
@@ -64,6 +70,9 @@ class BytesLiteralBuilder(object):
def append_charval(self, char_number):
self.chars.append( unichr(char_number).encode('ISO-8859-1') )
+ def append_uescape(self, char_number, escape_string):
+ self.append(escape_string)
+
def getstring(self):
# this *must* return a byte string!
s = BytesLiteral(join_bytes(self.chars))
@@ -74,6 +83,32 @@ class BytesLiteralBuilder(object):
# this *must* return a byte string!
return self.getstring()
+ def getstrings(self):
+ return (self.getstring(), None)
+
+class StrLiteralBuilder(object):
+ """Assemble both a bytes and a unicode representation of a string.
+ """
+ def __init__(self, target_encoding):
+ self._bytes = BytesLiteralBuilder(target_encoding)
+ self._unicode = UnicodeLiteralBuilder()
+
+ def append(self, characters):
+ self._bytes.append(characters)
+ self._unicode.append(characters)
+
+ def append_charval(self, char_number):
+ self._bytes.append_charval(char_number)
+ self._unicode.append_charval(char_number)
+
+ def append_uescape(self, char_number, escape_string):
+ self._bytes.append(escape_string)
+ self._unicode.append_charval(char_number)
+
+ def getstrings(self):
+ return (self._bytes.getstring(), self._unicode.getstring())
+
+
class EncodedString(_unicode):
# unicode string subclass to keep track of the original encoding.
# 'encoding' is None for unicode strings and the source encoding