diff options
author | Barry Warsaw <barry@python.org> | 2004-10-03 03:16:19 +0000 |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2004-10-03 03:16:19 +0000 |
commit | 391f3377087b502a854c13d39b1a0116549e10c7 (patch) | |
tree | b71606f57266fd904ca369711539028a2e0ed00a /Lib/email | |
parent | 07cda4527655e78069a734a0626823d1f5265a4f (diff) | |
download | cpython-391f3377087b502a854c13d39b1a0116549e10c7.tar.gz |
Big email 3.0 API changes, with updated unit tests and documentation.
Briefly (from the NEWS file):
- Updates for the email package:
+ All deprecated APIs that in email 2.x issued warnings have been removed:
_encoder argument to the MIMEText constructor, Message.add_payload(),
Utils.dump_address_pair(), Utils.decode(), Utils.encode()
+ New deprecations: Generator.__call__(), Message.get_type(),
Message.get_main_type(), Message.get_subtype(), the 'strict' argument to
the Parser constructor. These will be removed in email 3.1.
+ Support for Python earlier than 2.3 has been removed (see PEP 291).
+ All defect classes have been renamed to end in 'Defect'.
+ Some FeedParser fixes; also a MultipartInvariantViolationDefect will be
added to messages that claim to be multipart but really aren't.
+ Updates to documentation.
Diffstat (limited to 'Lib/email')
-rw-r--r-- | Lib/email/Charset.py | 24 | ||||
-rw-r--r-- | Lib/email/Encoders.py | 35 | ||||
-rw-r--r-- | Lib/email/Errors.py | 18 | ||||
-rw-r--r-- | Lib/email/FeedParser.py | 22 | ||||
-rw-r--r-- | Lib/email/Generator.py | 30 | ||||
-rw-r--r-- | Lib/email/Header.py | 3 | ||||
-rw-r--r-- | Lib/email/Iterators.py | 6 | ||||
-rw-r--r-- | Lib/email/MIMEAudio.py | 7 | ||||
-rw-r--r-- | Lib/email/MIMEBase.py | 8 | ||||
-rw-r--r-- | Lib/email/MIMEImage.py | 10 | ||||
-rw-r--r-- | Lib/email/MIMEMessage.py | 10 | ||||
-rw-r--r-- | Lib/email/MIMEMultipart.py | 6 | ||||
-rw-r--r-- | Lib/email/MIMENonMultipart.py | 8 | ||||
-rw-r--r-- | Lib/email/MIMEText.py | 27 | ||||
-rw-r--r-- | Lib/email/Message.py | 89 | ||||
-rw-r--r-- | Lib/email/Parser.py | 34 | ||||
-rw-r--r-- | Lib/email/Utils.py | 64 | ||||
-rw-r--r-- | Lib/email/__init__.py | 17 | ||||
-rw-r--r-- | Lib/email/_parseaddr.py | 3 | ||||
-rw-r--r-- | Lib/email/base64MIME.py | 5 | ||||
-rw-r--r-- | Lib/email/quopriMIME.py | 7 | ||||
-rw-r--r-- | Lib/email/test/__init__.py | 2 | ||||
-rw-r--r-- | Lib/email/test/data/msg_41.txt | 8 | ||||
-rw-r--r-- | Lib/email/test/test_email.py | 158 |
24 files changed, 230 insertions, 371 deletions
diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py index 3c8f7a4d59..6a3e3ca10f 100644 --- a/Lib/email/Charset.py +++ b/Lib/email/Charset.py @@ -1,18 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield), barry@python.org (Barry Warsaw) - -# XXX The following information needs updating. - -# Python 2.3 doesn't come with any Asian codecs by default. Two packages are -# currently available and supported as of this writing (30-Dec-2003): -# -# CJKCodecs -# http://cjkpython.i18n.org -# This package contains Chinese, Japanese, and Korean codecs - -# JapaneseCodecs -# http://www.asahi-net.or.jp/~rd6t-kjym/python -# Some Japanese users prefer this codec package +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org import email.base64MIME import email.quopriMIME @@ -21,9 +9,9 @@ from email.Encoders import encode_7or8bit # Flags for types of header encodings -QP = 1 # Quoted-Printable -BASE64 = 2 # Base64 -SHORTEST = 3 # the shorter of QP and base64, but only for headers +QP = 1 # Quoted-Printable +BASE64 = 2 # Base64 +SHORTEST = 3 # the shorter of QP and base64, but only for headers # In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7 MISC_LEN = 7 @@ -128,7 +116,7 @@ def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): documentation for more information. """ if body_enc == SHORTEST: - raise ValueError, 'SHORTEST not allowed for body_enc' + raise ValueError('SHORTEST not allowed for body_enc') CHARSETS[charset] = (header_enc, body_enc, output_charset) diff --git a/Lib/email/Encoders.py b/Lib/email/Encoders.py index 6851094e9b..baac2a3458 100644 --- a/Lib/email/Encoders.py +++ b/Lib/email/Encoders.py @@ -1,37 +1,16 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """Encodings and related functions.""" import base64 +from quopri import encodestring as _encodestring - - -# Helpers -try: - from quopri import encodestring as _encodestring - - def _qencode(s): - enc = _encodestring(s, quotetabs=1) - # Must encode spaces, which quopri.encodestring() doesn't do - return enc.replace(' ', '=20') -except ImportError: - # Python 2.1 doesn't have quopri.encodestring() - from cStringIO import StringIO - import quopri as _quopri - - def _qencode(s): - if not s: - return s - hasnewline = (s[-1] == '\n') - infp = StringIO(s) - outfp = StringIO() - _quopri.encode(infp, outfp, quotetabs=1) - # Python 2.x's encode() doesn't encode spaces even when quotetabs==1 - value = outfp.getvalue().replace(' ', '=20') - if not hasnewline and value[-1] == '\n': - return value[:-1] - return value +def _qencode(s): + enc = _encodestring(s, quotetabs=True) + # Must encode spaces, which quopri.encodestring() doesn't do + return enc.replace(' ', '=20') def _bencode(s): diff --git a/Lib/email/Errors.py b/Lib/email/Errors.py index e233219225..e13a2c75c8 100644 --- a/Lib/email/Errors.py +++ b/Lib/email/Errors.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """email package exception classes.""" @@ -33,17 +34,20 @@ class MessageDefect: def __init__(self, line=None): self.line = line -class NoBoundaryInMultipart(MessageDefect): +class NoBoundaryInMultipartDefect(MessageDefect): """A message claimed to be a multipart but had no boundary parameter.""" -class StartBoundaryNotFound(MessageDefect): +class StartBoundaryNotFoundDefect(MessageDefect): """The claimed start boundary was never found.""" -class FirstHeaderLineIsContinuation(MessageDefect): +class FirstHeaderLineIsContinuationDefect(MessageDefect): """A message had a continuation line as its first header line.""" -class MisplacedEnvelopeHeader(MessageDefect): +class MisplacedEnvelopeHeaderDefect(MessageDefect): """A 'Unix-from' header was found in the middle of a header block.""" -class MalformedHeader(MessageDefect): - """Found a header that was missing a colon, or was otherwise malformed""" +class MalformedHeaderDefect(MessageDefect): + """Found a header that was missing a colon, or was otherwise malformed.""" + +class MultipartInvariantViolationDefect(MessageDefect): + """A message claimed to be a multipart but no subparts were found.""" diff --git a/Lib/email/FeedParser.py b/Lib/email/FeedParser.py index dc3027dbda..de2754e099 100644 --- a/Lib/email/FeedParser.py +++ b/Lib/email/FeedParser.py @@ -1,5 +1,6 @@ # Copyright (C) 2004 Python Software Foundation # Authors: Baxter, Wouters and Warsaw +# Contact: email-sig@python.org """FeedParser - An email feed parser. @@ -15,7 +16,7 @@ This completes the parsing and returns the root message object. The other advantage of this parser is that it will never throw a parsing exception. Instead, when it finds something unexpected, it adds a 'defect' to the current message. Defects are just instances that live on the message -object's .defect attribute. +object's .defects attribute. """ import re @@ -100,7 +101,7 @@ class BufferedSubFile(object): # and the eol character(s). Gather up a list of lines after # re-attaching the newlines. lines = [] - for i in range(len(parts) / 2): + for i in range(len(parts) // 2): lines.append(parts[i*2] + parts[i*2+1]) self.pushlines(lines) @@ -156,6 +157,10 @@ class FeedParser: self._call_parse() root = self._pop_message() assert not self._msgstack + # Look for final set of defects + if root.get_content_maintype() == 'multipart' \ + and not root.is_multipart(): + root.defects.append(Errors.MultipartInvariantViolationDefect()) return root def _new_message(self): @@ -166,7 +171,6 @@ class FeedParser: self._msgstack[-1].attach(msg) self._msgstack.append(msg) self._cur = msg - self._cur.defects = [] self._last = msg def _pop_message(self): @@ -259,7 +263,7 @@ class FeedParser: # defined a boundary. That's a problem which we'll handle by # reading everything until the EOF and marking the message as # defective. - self._cur.defects.append(Errors.NoBoundaryInMultipart()) + self._cur.defects.append(Errors.NoBoundaryInMultipartDefect()) lines = [] for line in self._input: if line is NeedMoreData: @@ -305,6 +309,8 @@ class FeedParser: if eolmo: preamble[-1] = lastline[:-len(eolmo.group(0))] self._cur.preamble = EMPTYSTRING.join(preamble) + #import pdb ; pdb.set_trace() + # See SF bug #1030941 capturing_preamble = False self._input.unreadline(line) continue @@ -363,7 +369,7 @@ class FeedParser: # that as a defect and store the captured text as the payload. # Otherwise everything from here to the EOF is epilogue. if capturing_preamble: - self._cur.defects.append(Errors.StartBoundaryNotFound()) + self._cur.defects.append(Errors.StartBoundaryNotFoundDefect()) self._cur.set_payload(EMPTYSTRING.join(preamble)) return # If the end boundary ended in a newline, we'll need to make sure @@ -408,7 +414,7 @@ class FeedParser: # The first line of the headers was a continuation. This # is illegal, so let's note the defect, store the illegal # line, and ignore it for purposes of headers. - defect = Errors.FirstHeaderLineIsContinuation(line) + defect = Errors.FirstHeaderLineIsContinuationDefect(line) self._cur.defects.append(defect) continue lastvalue.append(line) @@ -436,13 +442,13 @@ class FeedParser: else: # Weirdly placed unix-from line. Note this as a defect # and ignore it. - defect = Errors.MisplacedEnvelopeHeader(line) + defect = Errors.MisplacedEnvelopeHeaderDefect(line) self._cur.defects.append(defect) continue # Split the line on the colon separating field name from value. i = line.find(':') if i < 0: - defect = Errors.MalformedHeader(line) + defect = Errors.MalformedHeaderDefect(line) self._cur.defects.append(defect) continue lastheader = line[:i] diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py index 7fe634fb8f..9411a9e6ca 100644 --- a/Lib/email/Generator.py +++ b/Lib/email/Generator.py @@ -1,13 +1,14 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Classes to generate plain text from a message object tree. -""" +"""Classes to generate plain text from a message object tree.""" import re import sys import time import random +import warnings from cStringIO import StringIO from email.Header import Header @@ -81,7 +82,10 @@ class Generator: self._write(msg) # For backwards compatibility, but this is slower - __call__ = flatten + def __call__(self, msg, unixfrom=False): + warnings.warn('__call__() deprecated; use flatten()', + DeprecationWarning, 2) + self.flatten(msg, unixfrom) def clone(self, fp): """Clone this generator with the exact same options.""" @@ -175,7 +179,7 @@ class Generator: if cset is not None: payload = cset.body_encode(payload) if not isinstance(payload, basestring): - raise TypeError, 'string payload expected: %s' % type(payload) + raise TypeError('string payload expected: %s' % type(payload)) if self._mangle_from_: payload = fcre.sub('>From ', payload) self._fp.write(payload) @@ -271,6 +275,8 @@ class Generator: +_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' + class DecodedGenerator(Generator): """Generator a text representation of a message. @@ -301,13 +307,13 @@ class DecodedGenerator(Generator): """ Generator.__init__(self, outfp, mangle_from_, maxheaderlen) if fmt is None: - fmt = ('[Non-text (%(type)s) part of message omitted, ' - 'filename %(filename)s]') - self._fmt = fmt + self._fmt = _FMT + else: + self._fmt = fmt def _dispatch(self, msg): for part in msg.walk(): - maintype = part.get_main_type('text') + maintype = part.get_content_maintype() if maintype == 'text': print >> self, part.get_payload(decode=True) elif maintype == 'multipart': @@ -315,9 +321,9 @@ class DecodedGenerator(Generator): pass else: print >> self, self._fmt % { - 'type' : part.get_type('[no MIME type]'), - 'maintype' : part.get_main_type('[no main MIME type]'), - 'subtype' : part.get_subtype('[no sub-MIME type]'), + 'type' : part.get_content_type(), + 'maintype' : part.get_content_maintype(), + 'subtype' : part.get_content_subtype(), 'filename' : part.get_filename('[no filename]'), 'description': part.get('Content-Description', '[no description]'), diff --git a/Lib/email/Header.py b/Lib/email/Header.py index 21acaf1179..5e24afede0 100644 --- a/Lib/email/Header.py +++ b/Lib/email/Header.py @@ -1,5 +1,6 @@ # Copyright (C) 2002-2004 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield), barry@python.org (Barry Warsaw) +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org """Header encoding and decoding functionality.""" diff --git a/Lib/email/Iterators.py b/Lib/email/Iterators.py index af6095e7c7..74a93c7b93 100644 --- a/Lib/email/Iterators.py +++ b/Lib/email/Iterators.py @@ -1,8 +1,8 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: Barry Warsaw <barry@python.org> +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Various types of useful iterators and generators. -""" +"""Various types of useful iterators and generators.""" import sys from cStringIO import StringIO diff --git a/Lib/email/MIMEAudio.py b/Lib/email/MIMEAudio.py index dda7689a4c..266ec4c409 100644 --- a/Lib/email/MIMEAudio.py +++ b/Lib/email/MIMEAudio.py @@ -1,7 +1,8 @@ +# Copyright (C) 2001-2004 Python Software Foundation # Author: Anthony Baxter +# Contact: email-sig@python.org -"""Class representing audio/* type MIME documents. -""" +"""Class representing audio/* type MIME documents.""" import sndhdr from cStringIO import StringIO @@ -65,7 +66,7 @@ class MIMEAudio(MIMENonMultipart): if _subtype is None: _subtype = _whatsnd(_audiodata) if _subtype is None: - raise TypeError, 'Could not find audio MIME subtype' + raise TypeError('Could not find audio MIME subtype') MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) self.set_payload(_audiodata) _encoder(self) diff --git a/Lib/email/MIMEBase.py b/Lib/email/MIMEBase.py index 7485d855c4..88691f802e 100644 --- a/Lib/email/MIMEBase.py +++ b/Lib/email/MIMEBase.py @@ -1,8 +1,8 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Base class for MIME specializations. -""" +"""Base class for MIME specializations.""" from email import Message diff --git a/Lib/email/MIMEImage.py b/Lib/email/MIMEImage.py index 5306e53706..a658067839 100644 --- a/Lib/email/MIMEImage.py +++ b/Lib/email/MIMEImage.py @@ -1,8 +1,8 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Class representing image/* type MIME documents. -""" +"""Class representing image/* type MIME documents.""" import imghdr @@ -39,7 +39,7 @@ class MIMEImage(MIMENonMultipart): if _subtype is None: _subtype = imghdr.what(None, _imagedata) if _subtype is None: - raise TypeError, 'Could not guess image MIME subtype' + raise TypeError('Could not guess image MIME subtype') MIMENonMultipart.__init__(self, 'image', _subtype, **_params) self.set_payload(_imagedata) _encoder(self) diff --git a/Lib/email/MIMEMessage.py b/Lib/email/MIMEMessage.py index 2042dd9752..3021934ef9 100644 --- a/Lib/email/MIMEMessage.py +++ b/Lib/email/MIMEMessage.py @@ -1,8 +1,8 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Class representing message/* MIME documents. -""" +"""Class representing message/* MIME documents.""" from email import Message from email.MIMENonMultipart import MIMENonMultipart @@ -24,7 +24,7 @@ class MIMEMessage(MIMENonMultipart): """ MIMENonMultipart.__init__(self, 'message', _subtype) if not isinstance(_msg, Message.Message): - raise TypeError, 'Argument is not an instance of Message' + raise TypeError('Argument is not an instance of Message') # It's convenient to use this base class method. We need to do it # this way or we'll get an exception Message.Message.attach(self, _msg) diff --git a/Lib/email/MIMEMultipart.py b/Lib/email/MIMEMultipart.py index ea6ae0c5ed..9072a6421c 100644 --- a/Lib/email/MIMEMultipart.py +++ b/Lib/email/MIMEMultipart.py @@ -1,8 +1,8 @@ # Copyright (C) 2002-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Base class for MIME multipart/* type messages. -""" +"""Base class for MIME multipart/* type messages.""" from email import MIMEBase diff --git a/Lib/email/MIMENonMultipart.py b/Lib/email/MIMENonMultipart.py index 1b3bcfd353..4195d2a668 100644 --- a/Lib/email/MIMENonMultipart.py +++ b/Lib/email/MIMENonMultipart.py @@ -1,8 +1,8 @@ -# Copyright (C) 2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2002-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Base class for MIME type messages that are not multipart. -""" +"""Base class for MIME type messages that are not multipart.""" from email import Errors from email import MIMEBase diff --git a/Lib/email/MIMEText.py b/Lib/email/MIMEText.py index d049ad9fd8..5ef187673a 100644 --- a/Lib/email/MIMEText.py +++ b/Lib/email/MIMEText.py @@ -1,10 +1,9 @@ -# Copyright (C) 2001,2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) +# Copyright (C) 2001-2004 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org -"""Class representing text/* type MIME documents. -""" +"""Class representing text/* type MIME documents.""" -import warnings from email.MIMENonMultipart import MIMENonMultipart from email.Encoders import encode_7or8bit @@ -13,8 +12,7 @@ from email.Encoders import encode_7or8bit class MIMEText(MIMENonMultipart): """Class for generating text/* type MIME documents.""" - def __init__(self, _text, _subtype='plain', _charset='us-ascii', - _encoder=None): + def __init__(self, _text, _subtype='plain', _charset='us-ascii'): """Create a text/* type MIME document. _text is the string for this message object. @@ -24,22 +22,7 @@ class MIMEText(MIMENonMultipart): _charset is the character set parameter added to the Content-Type header. This defaults to "us-ascii". Note that as a side-effect, the Content-Transfer-Encoding header will also be set. - - The use of the _encoder is deprecated. The encoding of the payload, - and the setting of the character set parameter now happens implicitly - based on the _charset argument. If _encoder is supplied, then a - DeprecationWarning is used, and the _encoder functionality may - override any header settings indicated by _charset. This is probably - not what you want. """ MIMENonMultipart.__init__(self, 'text', _subtype, **{'charset': _charset}) self.set_payload(_text, _charset) - if _encoder is not None: - warnings.warn('_encoder argument is obsolete.', - DeprecationWarning, 2) - # Because set_payload() with a _charset will set its own - # Content-Transfer-Encoding header, we need to delete the - # existing one or will end up with two of them. :( - del self['content-transfer-encoding'] - _encoder(self) diff --git a/Lib/email/Message.py b/Lib/email/Message.py index 2245f9b834..d23a26f8c8 100644 --- a/Lib/email/Message.py +++ b/Lib/email/Message.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """Basic message object for the email package object model.""" @@ -69,6 +70,10 @@ def _parseparam(s): def _unquotevalue(value): + # This is different than Utils.collapse_rfc2231_value() because it doesn't + # try to convert the value to a unicode. Message.get_param() and + # Message.get_params() are both currently defined to return the tuple in + # the face of RFC 2231 parameters. if isinstance(value, tuple): return value[0], value[1], Utils.unquote(value[2]) else: @@ -98,6 +103,7 @@ class Message: self._charset = None # Defaults for multipart messages self.preamble = self.epilogue = None + self.defects = [] # Default content type self._default_type = 'text/plain' @@ -124,9 +130,7 @@ class Message: def is_multipart(self): """Return True if the message consists of multiple parts.""" - if isinstance(self._payload, list): - return True - return False + return isinstance(self._payload, list) # # Unix From_ line @@ -140,26 +144,6 @@ class Message: # # Payload manipulation. # - def add_payload(self, payload): - """Add the given payload to the current payload. - - If the current payload is empty, then the current payload will be made - a scalar, set to the given value. - - Note: This method is deprecated. Use .attach() instead. - """ - warnings.warn('add_payload() is deprecated, use attach() instead.', - DeprecationWarning, 2) - if self._payload is None: - self._payload = payload - elif isinstance(self._payload, list): - self._payload.append(payload) - elif self.get_main_type() not in (None, 'multipart'): - raise Errors.MultipartConversionError( - 'Message main content type must be "multipart" or missing') - else: - self._payload = [self._payload, payload] - def attach(self, payload): """Add the given payload to the current payload. @@ -195,7 +179,7 @@ class Message: if i is None: payload = self._payload elif not isinstance(self._payload, list): - raise TypeError, 'Expected list, got %s' % type(self._payload) + raise TypeError('Expected list, got %s' % type(self._payload)) else: payload = self._payload[i] if decode: @@ -254,7 +238,7 @@ class Message: if isinstance(charset, str): charset = Charset.Charset(charset) if not isinstance(charset, Charset.Charset): - raise TypeError, charset + raise TypeError(charset) # BAW: should we accept strings that can serve as arguments to the # Charset constructor? self._charset = charset @@ -267,9 +251,9 @@ class Message: self.set_param('charset', charset.get_output_charset()) if not self.has_key('Content-Transfer-Encoding'): cte = charset.get_body_encoding() - if callable(cte): + try: cte(self) - else: + except TypeError: self.add_header('Content-Transfer-Encoding', cte) def get_charset(self): @@ -290,7 +274,7 @@ class Message: Return None if the header is missing instead of raising an exception. Note that if the header appeared multiple times, exactly which - occurrance gets returned is undefined. Use getall() to get all + occurrance gets returned is undefined. Use get_all() to get all the values matching a header field name. """ return self.get(name) @@ -320,7 +304,7 @@ class Message: def has_key(self, name): """Return true if the message contains the header.""" - missing = [] + missing = object() return self.get(name, missing) is not missing def keys(self): @@ -422,11 +406,10 @@ class Message: self._headers[i] = (k, _value) break else: - raise KeyError, _name + raise KeyError(_name) # - # These methods are silently deprecated in favor of get_content_type() and - # friends (see below). They will be noisily deprecated in email 3.0. + # Deprecated methods. These will be removed in email 3.1. # def get_type(self, failobj=None): @@ -436,7 +419,9 @@ class Message: string of the form `maintype/subtype'. If there was no Content-Type header in the message, failobj is returned (defaults to None). """ - missing = [] + warnings.warn('get_type() deprecated; use get_content_type()', + DeprecationWarning, 2) + missing = object() value = self.get('content-type', missing) if value is missing: return failobj @@ -444,7 +429,9 @@ class Message: def get_main_type(self, failobj=None): """Return the message's main content type if present.""" - missing = [] + warnings.warn('get_main_type() deprecated; use get_content_maintype()', + DeprecationWarning, 2) + missing = object() ctype = self.get_type(missing) if ctype is missing: return failobj @@ -454,7 +441,9 @@ class Message: def get_subtype(self, failobj=None): """Return the message's content subtype if present.""" - missing = [] + warnings.warn('get_subtype() deprecated; use get_content_subtype()', + DeprecationWarning, 2) + missing = object() ctype = self.get_type(missing) if ctype is missing: return failobj @@ -479,7 +468,7 @@ class Message: appears inside a multipart/digest container, in which case it would be message/rfc822. """ - missing = [] + missing = object() value = self.get('content-type', missing) if value is missing: # This should have no parameters @@ -529,7 +518,7 @@ class Message: def _get_params_preserve(self, failobj, header): # Like get_params() but preserves the quoting of values. BAW: # should this be part of the public interface? - missing = [] + missing = object() value = self.get(header, missing) if value is missing: return failobj @@ -560,7 +549,7 @@ class Message: header. Optional header is the header to search instead of Content-Type. If unquote is True, the value is unquoted. """ - missing = [] + missing = object() params = self._get_params_preserve(missing, header) if params is missing: return failobj @@ -713,17 +702,11 @@ class Message: The filename is extracted from the Content-Disposition header's `filename' parameter, and it is unquoted. """ - missing = [] + missing = object() filename = self.get_param('filename', missing, 'content-disposition') if filename is missing: return failobj - if isinstance(filename, tuple): - # It's an RFC 2231 encoded parameter - newvalue = _unquotevalue(filename) - return unicode(newvalue[2], newvalue[0] or 'us-ascii') - else: - newvalue = _unquotevalue(filename.strip()) - return newvalue + return Utils.collapse_rfc2231_value(filename).strip() def get_boundary(self, failobj=None): """Return the boundary associated with the payload if present. @@ -731,15 +714,11 @@ class Message: The boundary is extracted from the Content-Type header's `boundary' parameter, and it is unquoted. """ - missing = [] + missing = object() boundary = self.get_param('boundary', missing) if boundary is missing: return failobj - if isinstance(boundary, tuple): - # RFC 2231 encoded, so decode. It better end up as ascii - charset = boundary[0] or 'us-ascii' - return unicode(boundary[2], charset).encode('us-ascii') - return _unquotevalue(boundary.strip()) + return Utils.collapse_rfc2231_value(boundary).strip() def set_boundary(self, boundary): """Set the boundary parameter in Content-Type to 'boundary'. @@ -751,7 +730,7 @@ class Message: HeaderParseError is raised if the message has no Content-Type header. """ - missing = [] + missing = object() params = self._get_params_preserve(missing, 'content-type') if params is missing: # There was no Content-Type header, and we don't know what type @@ -793,7 +772,7 @@ class Message: Content-Type header, or if that header has no charset parameter, failobj is returned. """ - missing = [] + missing = object() charset = self.get_param('charset', missing) if charset is missing: return failobj diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py index 8c5661db99..0c05224489 100644 --- a/Lib/email/Parser.py +++ b/Lib/email/Parser.py @@ -4,17 +4,15 @@ """A parser of RFC 2822 and MIME email messages.""" -import re +import warnings from cStringIO import StringIO from email.FeedParser import FeedParser from email.Message import Message -NLCRE = re.compile('\r\n|\r|\n') - class Parser: - def __init__(self, _class=Message, strict=False): + def __init__(self, *args, **kws): """Parser of RFC 2822 and MIME email messages. Creates an in-memory object tree representing the email message, which @@ -29,14 +27,28 @@ class Parser: _class is the class to instantiate for new message objects when they must be created. This class must have a constructor that can take zero arguments. Default is Message.Message. - - Optional strict tells the parser to be strictly RFC compliant or to be - more forgiving in parsing of ill-formatted MIME documents. When - non-strict mode is used, the parser will try to make up for missing or - erroneous boundaries and other peculiarities seen in the wild. - Default is non-strict parsing. """ - self._class = _class + if len(args) >= 1: + if '_class' in kws: + raise TypeError("Multiple values for keyword arg '_class'") + kws['_class'] = args[0] + if len(args) == 2: + if 'strict' in kws: + raise TypeError("Multiple values for keyword arg 'strict'") + kws['strict'] = args[1] + if len(args) > 2: + raise TypeError('Too many arguments') + if '_class' in kws: + self._class = kws['_class'] + del kws['_class'] + else: + self._class = Message + if 'strict' in kws: + warnings.warn("'strict' argument is deprecated (and ignored)", + DeprecationWarning, 2) + del kws['strict'] + if kws: + raise TypeError('Unexpected keyword arguments') def parse(self, fp, headersonly=False): """Create a message structure from the data in a file. diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py index 3a4bbc83b6..e786d26586 100644 --- a/Lib/email/Utils.py +++ b/Lib/email/Utils.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """Miscellaneous utilities.""" @@ -80,12 +81,6 @@ def formataddr(pair): return '%s%s%s <%s>' % (quotes, name, quotes, address) return address -# For backwards compatibility -def dump_address_pair(pair): - warnings.warn('Use email.Utils.formataddr() instead', - DeprecationWarning, 2) - return formataddr(pair) - def getaddresses(fieldvalues): @@ -107,46 +102,6 @@ ecre = re.compile(r''' ''', re.VERBOSE | re.IGNORECASE) -def decode(s): - """Return a decoded string according to RFC 2047, as a unicode string. - - NOTE: This function is deprecated. Use Header.decode_header() instead. - """ - warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2) - # Intra-package import here to avoid circular import problems. - from email.Header import decode_header - L = decode_header(s) - if not isinstance(L, list): - # s wasn't decoded - return s - - rtn = [] - for atom, charset in L: - if charset is None: - rtn.append(atom) - else: - # Convert the string to Unicode using the given encoding. Leave - # Unicode conversion errors to strict. - rtn.append(unicode(atom, charset)) - # Now that we've decoded everything, we just need to join all the parts - # together into the final string. - return UEMPTYSTRING.join(rtn) - - - -def encode(s, charset='iso-8859-1', encoding='q'): - """Encode a string according to RFC 2047.""" - warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2) - encoding = encoding.lower() - if encoding == 'q': - estr = _qencode(s) - elif encoding == 'b': - estr = _bencode(s) - else: - raise ValueError, 'Illegal encoding code: ' + encoding - return '=?%s?%s?%s?=' % (charset.lower(), encoding, estr) - - def formatdate(timeval=None, localtime=False): """Returns a date string as specified by RFC 2822, e.g.: @@ -179,7 +134,7 @@ def formatdate(timeval=None, localtime=False): sign = '-' else: sign = '+' - zone = '%s%02d%02d' % (sign, hours, minutes / 60) + zone = '%s%02d%02d' % (sign, hours, minutes // 60) else: now = time.gmtime(timeval) # Timezone offset is always -0000 @@ -314,3 +269,16 @@ def decode_params(params): new_params.append( (name, (charset, language, '"%s"' % quote(value)))) return new_params + +def collapse_rfc2231_value(value, errors='replace', + fallback_charset='us-ascii'): + if isinstance(value, tuple): + rawval = unquote(value[2]) + charset = value[0] or 'us-ascii' + try: + return unicode(rawval, charset, errors) + except LookupError: + # XXX charset is unknown to Python. + return unicode(rawval, fallback_charset, errors) + else: + return unquote(value) diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py index 74b9b731e1..8a46fec288 100644 --- a/Lib/email/__init__.py +++ b/Lib/email/__init__.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: barry@python.org (Barry Warsaw) +# Author: Barry Warsaw +# Contact: email-sig@python.org """A package for parsing, handling, and generating email messages.""" @@ -33,25 +34,19 @@ __all__ = [ # Some convenience routines. Don't import Parser and Message as side-effects # of importing email since those cascadingly import most of the rest of the # email package. -def message_from_string(s, _class=None, strict=False): +def message_from_string(s, *args, **kws): """Parse a string into a Message object model. Optional _class and strict are passed to the Parser constructor. """ from email.Parser import Parser - if _class is None: - from email.Message import Message - _class = Message - return Parser(_class, strict=strict).parsestr(s) + return Parser(*args, **kws).parsestr(s) -def message_from_file(fp, _class=None, strict=False): +def message_from_file(fp, *args, **kws): """Read a file and parse its contents into a Message object model. Optional _class and strict are passed to the Parser constructor. """ from email.Parser import Parser - if _class is None: - from email.Message import Message - _class = Message - return Parser(_class, strict=strict).parse(fp) + return Parser(*args, **kws).parse(fp) diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 4a31affe9f..f6efcd5c1e 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -1,4 +1,5 @@ # Copyright (C) 2002-2004 Python Software Foundation +# Contact: email-sig@python.org """Email address parsing code. @@ -115,7 +116,7 @@ def parsedate_tz(data): tzoffset = -tzoffset else: tzsign = 1 - tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60) + tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60) tuple = (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset) return tuple diff --git a/Lib/email/base64MIME.py b/Lib/email/base64MIME.py index af85949763..6ed1d53584 100644 --- a/Lib/email/base64MIME.py +++ b/Lib/email/base64MIME.py @@ -1,5 +1,6 @@ -# Copyright (C) 2002 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield) +# Copyright (C) 2002-2004 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org """Base64 content transfer encoding per RFCs 2045-2047. diff --git a/Lib/email/quopriMIME.py b/Lib/email/quopriMIME.py index 2802bc2f1c..a9b5d490ae 100644 --- a/Lib/email/quopriMIME.py +++ b/Lib/email/quopriMIME.py @@ -1,5 +1,6 @@ # Copyright (C) 2001-2004 Python Software Foundation -# Author: che@debian.org (Ben Gertzfield) +# Author: Ben Gertzfield +# Contact: email-sig@python.org """Quoted-printable content transfer encoding per RFCs 2045-2047. @@ -43,12 +44,12 @@ bqre = re.compile(r'[^ !-<>-~\t]') # Helpers def header_quopri_check(c): """Return True if the character should be escaped with header quopri.""" - return hqre.match(c) and True + return bool(hqre.match(c)) def body_quopri_check(c): """Return True if the character should be escaped with body quopri.""" - return bqre.match(c) and True + return bool(bqre.match(c)) def header_quopri_len(s): diff --git a/Lib/email/test/__init__.py b/Lib/email/test/__init__.py index b8a7774849..e69de29bb2 100644 --- a/Lib/email/test/__init__.py +++ b/Lib/email/test/__init__.py @@ -1,2 +0,0 @@ -# Copyright (C) 2002 Python Software Foundation -# Author: barry@zope.com (Barry Warsaw) diff --git a/Lib/email/test/data/msg_41.txt b/Lib/email/test/data/msg_41.txt new file mode 100644 index 0000000000..76cdd1cb7f --- /dev/null +++ b/Lib/email/test/data/msg_41.txt @@ -0,0 +1,8 @@ +From: "Allison Dunlap" <xxx@example.com> +To: yyy@example.com +Subject: 64423 +Date: Sun, 11 Jul 2004 16:09:27 -0300 +MIME-Version: 1.0 +Content-Type: multipart/alternative; + +Blah blah blah diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index d079b9e546..a55ef5c935 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1,4 +1,5 @@ # Copyright (C) 2001-2004 Python Software Foundation +# Contact: email-sig@python.org # email package unit tests import os @@ -51,25 +52,20 @@ def openfile(filename, mode='r'): # Base test class class TestEmailBase(unittest.TestCase): - if hasattr(difflib, 'ndiff'): - # Python 2.2 and beyond - def ndiffAssertEqual(self, first, second): - """Like failUnlessEqual except use ndiff for readable output.""" - if first <> second: - sfirst = str(first) - ssecond = str(second) - diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) - fp = StringIO() - print >> fp, NL, NL.join(diff) - raise self.failureException, fp.getvalue() - else: - # Python 2.1 - ndiffAssertEqual = unittest.TestCase.assertEqual - - def _msgobj(self, filename, strict=False): + def ndiffAssertEqual(self, first, second): + """Like failUnlessEqual except use ndiff for readable output.""" + if first <> second: + sfirst = str(first) + ssecond = str(second) + diff = difflib.ndiff(sfirst.splitlines(), ssecond.splitlines()) + fp = StringIO() + print >> fp, NL, NL.join(diff) + raise self.failureException, fp.getvalue() + + def _msgobj(self, filename): fp = openfile(findfile(filename)) try: - msg = email.message_from_file(fp, strict=strict) + msg = email.message_from_file(fp) finally: fp.close() return msg @@ -493,44 +489,12 @@ class TestMessageAPI(TestEmailBase): # Test the email.Encoders module class TestEncoders(unittest.TestCase): - def test_encode_noop(self): - eq = self.assertEqual - msg = MIMEText('hello world', _encoder=Encoders.encode_noop) - eq(msg.get_payload(), 'hello world') - - def test_encode_7bit(self): - eq = self.assertEqual - msg = MIMEText('hello world', _encoder=Encoders.encode_7or8bit) - eq(msg.get_payload(), 'hello world') - eq(msg['content-transfer-encoding'], '7bit') - msg = MIMEText('hello \x7f world', _encoder=Encoders.encode_7or8bit) - eq(msg.get_payload(), 'hello \x7f world') - eq(msg['content-transfer-encoding'], '7bit') - - def test_encode_8bit(self): - eq = self.assertEqual - msg = MIMEText('hello \x80 world', _encoder=Encoders.encode_7or8bit) - eq(msg.get_payload(), 'hello \x80 world') - eq(msg['content-transfer-encoding'], '8bit') - def test_encode_empty_payload(self): eq = self.assertEqual msg = Message() msg.set_charset('us-ascii') eq(msg['content-transfer-encoding'], '7bit') - def test_encode_base64(self): - eq = self.assertEqual - msg = MIMEText('hello world', _encoder=Encoders.encode_base64) - eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=') - eq(msg['content-transfer-encoding'], 'base64') - - def test_encode_quoted_printable(self): - eq = self.assertEqual - msg = MIMEText('hello world', _encoder=Encoders.encode_quopri) - eq(msg.get_payload(), 'hello=20world') - eq(msg['content-transfer-encoding'], 'quoted-printable') - def test_default_cte(self): eq = self.assertEqual msg = MIMEText('hello world') @@ -932,16 +896,6 @@ class TestMIMEAudio(unittest.TestCase): au = MIMEAudio(self._audiodata, 'fish') self.assertEqual(im.get_type(), 'audio/fish') - def test_custom_encoder(self): - eq = self.assertEqual - def encoder(msg): - orig = msg.get_payload() - msg.set_payload(0) - msg['Content-Transfer-Encoding'] = 'broken64' - au = MIMEAudio(self._audiodata, _encoder=encoder) - eq(au.get_payload(), 0) - eq(au['content-transfer-encoding'], 'broken64') - def test_add_header(self): eq = self.assertEqual unless = self.failUnless @@ -985,16 +939,6 @@ class TestMIMEImage(unittest.TestCase): im = MIMEImage(self._imgdata, 'fish') self.assertEqual(im.get_type(), 'image/fish') - def test_custom_encoder(self): - eq = self.assertEqual - def encoder(msg): - orig = msg.get_payload() - msg.set_payload(0) - msg['Content-Transfer-Encoding'] = 'broken64' - im = MIMEImage(self._imgdata, _encoder=encoder) - eq(im.get_payload(), 0) - eq(im['content-transfer-encoding'], 'broken64') - def test_add_header(self): eq = self.assertEqual unless = self.failUnless @@ -1396,8 +1340,8 @@ class TestNonConformant(TestEmailBase): eq = self.assertEqual msg = self._msgobj('msg_14.txt') eq(msg.get_type(), 'text') - eq(msg.get_main_type(), None) - eq(msg.get_subtype(), None) + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') def test_same_boundary_inner_outer(self): unless = self.failUnless @@ -1406,14 +1350,17 @@ class TestNonConformant(TestEmailBase): inner = msg.get_payload(0) unless(hasattr(inner, 'defects')) self.assertEqual(len(inner.defects), 1) - unless(isinstance(inner.defects[0], Errors.StartBoundaryNotFound)) + unless(isinstance(inner.defects[0], + Errors.StartBoundaryNotFoundDefect)) def test_multipart_no_boundary(self): unless = self.failUnless msg = self._msgobj('msg_25.txt') unless(isinstance(msg.get_payload(), str)) - self.assertEqual(len(msg.defects), 1) - unless(isinstance(msg.defects[0], Errors.NoBoundaryInMultipart)) + self.assertEqual(len(msg.defects), 2) + unless(isinstance(msg.defects[0], Errors.NoBoundaryInMultipartDefect)) + unless(isinstance(msg.defects[1], + Errors.MultipartInvariantViolationDefect)) def test_invalid_content_type(self): eq = self.assertEqual @@ -1464,40 +1411,19 @@ Subject: here's something interesting counter to RFC 2822, there's no separating newline here """) + def test_lying_multipart(self): + unless = self.failUnless + msg = self._msgobj('msg_41.txt') + unless(hasattr(msg, 'defects')) + self.assertEqual(len(msg.defects), 2) + unless(isinstance(msg.defects[0], Errors.NoBoundaryInMultipartDefect)) + unless(isinstance(msg.defects[1], + Errors.MultipartInvariantViolationDefect)) + # Test RFC 2047 header encoding and decoding class TestRFC2047(unittest.TestCase): - def test_iso_8859_1(self): - eq = self.assertEqual - s = '=?iso-8859-1?q?this=20is=20some=20text?=' - eq(Utils.decode(s), 'this is some text') - s = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=' - eq(Utils.decode(s), u'Keld J\xf8rn Simonsen') - s = '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=' \ - '=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=' - eq(Utils.decode(s), 'If you can read this you understand the example.') - s = '=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=' - eq(Utils.decode(s), - u'\u05dd\u05d5\u05dc\u05e9 \u05df\u05d1 \u05d9\u05dc\u05d8\u05e4\u05e0') - s = '=?iso-8859-1?q?this=20is?= =?iso-8859-1?q?some=20text?=' - eq(Utils.decode(s), u'this issome text') - s = '=?iso-8859-1?q?this=20is_?= =?iso-8859-1?q?some=20text?=' - eq(Utils.decode(s), u'this is some text') - - def test_encode_header(self): - eq = self.assertEqual - s = 'this is some text' - eq(Utils.encode(s), '=?iso-8859-1?q?this=20is=20some=20text?=') - s = 'Keld_J\xf8rn_Simonsen' - eq(Utils.encode(s), '=?iso-8859-1?q?Keld_J=F8rn_Simonsen?=') - s1 = 'If you can read this yo' - s2 = 'u understand the example.' - eq(Utils.encode(s1, encoding='b'), - '=?iso-8859-1?b?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=') - eq(Utils.encode(s2, charset='iso-8859-2', encoding='b'), - '=?iso-8859-2?b?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=') - def test_rfc2047_multiline(self): eq = self.assertEqual s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz @@ -1517,10 +1443,7 @@ class TestRFC2047(unittest.TestCase): s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>' dh = decode_header(s) eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard@dom.ain>', None)]) - # Python 2.1's unicode() builtin doesn't call the object's - # __unicode__() method. Use the following alternative instead. - #hu = unicode(make_header(dh)).encode('latin-1') - hu = make_header(dh).__unicode__().encode('latin-1') + hu = unicode(make_header(dh)).encode('latin-1') eq(hu, 'Andr\xe9 Pirard <pirard@dom.ain>') def test_whitespace_eater_unicode_2(self): @@ -1870,8 +1793,8 @@ class TestIdempotent(TestEmailBase): eq = self.assertEquals msg, text = self._msgobj('msg_01.txt') eq(msg.get_type(), 'text/plain') - eq(msg.get_main_type(), 'text') - eq(msg.get_subtype(), 'plain') + eq(msg.get_content_maintype(), 'text') + eq(msg.get_content_subtype(), 'plain') eq(msg.get_params()[1], ('charset', 'us-ascii')) eq(msg.get_param('charset'), 'us-ascii') eq(msg.preamble, None) @@ -2712,11 +2635,7 @@ class TestHeader(TestEmailBase): eq(decode_header(enc), [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"), (utf8_head, "utf-8")]) - # Test for conversion to unicode. BAW: Python 2.1 doesn't support the - # __unicode__() protocol, so do things this way for compatibility. - ustr = h.__unicode__() - # For Python 2.2 and beyond - #ustr = unicode(h) + ustr = unicode(h) eq(ustr.encode('utf-8'), 'Die Mieter treten hier ein werden mit einem Foerderband ' 'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen ' @@ -2956,6 +2875,15 @@ Content-Type: text/plain; self.assertEqual(msg.get_content_charset(), 'this is even more ***fun*** is it not.pdf') + def test_rfc2231_unknown_encoding(self): + m = """\ +Content-Transfer-Encoding: 8bit +Content-Disposition: inline; filename*0=X-UNKNOWN''myfile.txt + +""" + msg = email.message_from_string(m) + self.assertEqual(msg.get_filename(), 'myfile.txt') + def _testclasses(): |