summaryrefslogtreecommitdiff
path: root/Lib/email/header.py
diff options
context:
space:
mode:
authorR. David Murray <rdmurray@bitdance.com>2011-01-05 01:39:32 +0000
committerR. David Murray <rdmurray@bitdance.com>2011-01-05 01:39:32 +0000
commitd14169bf79169c356451b9d0c64868650f1a4cd5 (patch)
treed38f0d22bee6080375417731f93a14d0f57f4e46 /Lib/email/header.py
parent1baa4a1e114b0649bdf019153066e44482935e58 (diff)
downloadcpython-d14169bf79169c356451b9d0c64868650f1a4cd5.tar.gz
#10790: make append work when output codec is different from input codec
There's still a bug here (the encode call shouldn't use the 'errors' paramter), but I'll fix that later.
Diffstat (limited to 'Lib/email/header.py')
-rw-r--r--Lib/email/header.py26
1 files changed, 10 insertions, 16 deletions
diff --git a/Lib/email/header.py b/Lib/email/header.py
index bb2c5ee765..94eb1a94aa 100644
--- a/Lib/email/header.py
+++ b/Lib/email/header.py
@@ -245,32 +245,26 @@ class Header:
that byte string, and a UnicodeError will be raised if the string
cannot be decoded with that charset. If s is a Unicode string, then
charset is a hint specifying the character set of the characters in
- the string. In this case, when producing an RFC 2822 compliant header
- using RFC 2047 rules, the Unicode string will be encoded using the
- following charsets in order: us-ascii, the charset hint, utf-8. The
- first character set not to provoke a UnicodeError is used.
+ the string. In either case, when producing an RFC 2822 compliant
+ header using RFC 2047 rules, the string will be encoded using the
+ output codec of the charset. If the string cannot be encoded to the
+ output codec, a UnicodeError will be raised.
- Optional `errors' is passed as the third argument to any unicode() or
- ustr.encode() call.
+ Optional `errors' is passed as the errors argument to the decode
+ call if s is a byte string.
"""
if charset is None:
charset = self._charset
elif not isinstance(charset, Charset):
charset = Charset(charset)
- if isinstance(s, str):
- # Convert the string from the input character set to the output
- # character set and store the resulting bytes and the charset for
- # composition later.
+ if not isinstance(s, str):
input_charset = charset.input_codec or 'us-ascii'
- input_bytes = s.encode(input_charset, errors)
- else:
- # We already have the bytes we will store internally.
- input_bytes = s
+ s = s.decode(input_charset, errors)
# Ensure that the bytes we're storing can be decoded to the output
# character set, otherwise an early error is thrown.
output_charset = charset.output_codec or 'us-ascii'
- output_string = input_bytes.decode(output_charset, errors)
- self._chunks.append((output_string, charset))
+ s.encode(output_charset, errors)
+ self._chunks.append((s, charset))
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
"""Encode a message header into an RFC-compliant format.