diff options
Diffstat (limited to 'lib/yaml/reader.py')
-rw-r--r-- | lib/yaml/reader.py | 41 |
1 files changed, 3 insertions, 38 deletions
diff --git a/lib/yaml/reader.py b/lib/yaml/reader.py index 1e7a4db..3249e6b 100644 --- a/lib/yaml/reader.py +++ b/lib/yaml/reader.py @@ -21,41 +21,6 @@ from error import YAMLError, Mark import codecs, re -# Unfortunately, codec functions in Python 2.3 does not support the `finish` -# arguments, so we have to write our own wrappers. - -try: - codecs.utf_8_decode('', 'strict', False) - from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode - -except TypeError: - - def utf_16_le_decode(data, errors, finish=False): - if not finish and len(data) % 2 == 1: - data = data[:-1] - return codecs.utf_16_le_decode(data, errors) - - def utf_16_be_decode(data, errors, finish=False): - if not finish and len(data) % 2 == 1: - data = data[:-1] - return codecs.utf_16_be_decode(data, errors) - - def utf_8_decode(data, errors, finish=False): - if not finish: - # We are trying to remove a possible incomplete multibyte character - # from the suffix of the data. - # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd. - # All further bytes are in the range 0x80 to 0xbf. - # UTF-8 encoded UCS characters may be up to six bytes long. - count = 0 - while count < 5 and count < len(data) \ - and '\x80' <= data[-count-1] <= '\xBF': - count -= 1 - if count < 5 and count < len(data) \ - and '\xC0' <= data[-count-1] <= '\xFD': - data = data[:-count-1] - return codecs.utf_8_decode(data, errors) - class ReaderError(YAMLError): def __init__(self, name, position, character, encoding, reason): @@ -159,13 +124,13 @@ class Reader(object): self.update_raw() if not isinstance(self.raw_buffer, unicode): if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): - self.raw_decode = utf_16_le_decode + self.raw_decode = codecs.utf_16_le_decode self.encoding = 'utf-16-le' elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): - self.raw_decode = utf_16_be_decode + self.raw_decode = codecs.utf_16_be_decode self.encoding = 'utf-16-be' else: - self.raw_decode = utf_8_decode + self.raw_decode = codecs.utf_8_decode self.encoding = 'utf-8' self.update(1) |