diff options
author | Stefan Behnel <scoder@users.berlios.de> | 2010-09-04 20:54:10 +0200 |
---|---|---|
committer | Stefan Behnel <scoder@users.berlios.de> | 2010-09-04 20:54:10 +0200 |
commit | bf558ecf389a4902a1458eee187934c88757b8da (patch) | |
tree | 6fe33b6dfd560857a9bd3620644c36d8c6ace731 /Cython/Compiler/StringEncoding.py | |
parent | 184482af6eb1e68f2a1c063cd5be4ab18aab88d0 (diff) | |
download | cython-bf558ecf389a4902a1458eee187934c88757b8da.tar.gz |
support redundant parsing of string literals as unicode *and* bytes string, fix 'str' literal assignments to char* targets when using Future.unicode_literals
Diffstat (limited to 'Cython/Compiler/StringEncoding.py')
-rw-r--r-- | Cython/Compiler/StringEncoding.py | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py index b5bd45f06..8c474f421 100644 --- a/Cython/Compiler/StringEncoding.py +++ b/Cython/Compiler/StringEncoding.py @@ -44,9 +44,15 @@ class UnicodeLiteralBuilder(object): def append_charval(self, char_number): self.chars.append( unichr(char_number) ) + def append_uescape(self, char_number, escape_string): + self.append_charval(char_number) + def getstring(self): return EncodedString(u''.join(self.chars)) + def getstrings(self): + return (None, self.getstring()) + class BytesLiteralBuilder(object): """Assemble a byte string or char value. @@ -64,6 +70,9 @@ class BytesLiteralBuilder(object): def append_charval(self, char_number): self.chars.append( unichr(char_number).encode('ISO-8859-1') ) + def append_uescape(self, char_number, escape_string): + self.append(escape_string) + def getstring(self): # this *must* return a byte string! s = BytesLiteral(join_bytes(self.chars)) @@ -74,6 +83,32 @@ class BytesLiteralBuilder(object): # this *must* return a byte string! return self.getstring() + def getstrings(self): + return (self.getstring(), None) + +class StrLiteralBuilder(object): + """Assemble both a bytes and a unicode representation of a string. + """ + def __init__(self, target_encoding): + self._bytes = BytesLiteralBuilder(target_encoding) + self._unicode = UnicodeLiteralBuilder() + + def append(self, characters): + self._bytes.append(characters) + self._unicode.append(characters) + + def append_charval(self, char_number): + self._bytes.append_charval(char_number) + self._unicode.append_charval(char_number) + + def append_uescape(self, char_number, escape_string): + self._bytes.append(escape_string) + self._unicode.append_charval(char_number) + + def getstrings(self): + return (self._bytes.getstring(), self._unicode.getstring()) + + class EncodedString(_unicode): # unicode string subclass to keep track of the original encoding. # 'encoding' is None for unicode strings and the source encoding |