summaryrefslogtreecommitdiff
path: root/Lib/email/headerregistry.py
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2012-06-24 05:03:27 -0400
committerR David Murray <rdmurray@bitdance.com>2012-06-24 05:03:27 -0400
commit603e30deb8be28492e5f716e4e4dc0da4e697af6 (patch)
tree3bdaa0cf197d0db7341872bf9de6caf39961b937 /Lib/email/headerregistry.py
parentef4d53c93c63a5a68dca42b97a9b38719ab0c6c9 (diff)
downloadcpython-603e30deb8be28492e5f716e4e4dc0da4e697af6.tar.gz
#15160: Extend the new email parser to handle MIME headers.
This code passes all the same tests that the existing RFC mime header parser passes, plus a bunch of additional ones. There are a couple of commented out tests where there are issues with the folding. The folding doesn't normally get invoked for headers parsed from source, and the cases are marginal anyway (headers with invalid binary data) so I'm not worried about them, but will fix them after the beta. There are things that can be done to make this API even more convenient, but I think this is a solid foundation worth having. And the parser is a full RFC parser, so it handles cases that the current parser doesn't. (There are also probably cases where it fails when the current parser doesn't, but I haven't found them yet ;) Oh, yeah, and there are some really ugly bits in the parser for handling some 'postel' cases that are unfortunately common. I hope/plan to to eventually refactor a lot of the code in the parser which should reduce the line count...but there is no escaping the fact that the error recovery is welter of special cases.
Diffstat (limited to 'Lib/email/headerregistry.py')
-rw-r--r--Lib/email/headerregistry.py157
1 files changed, 142 insertions, 15 deletions
diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py
index 658854630f..1fae950820 100644
--- a/Lib/email/headerregistry.py
+++ b/Lib/email/headerregistry.py
@@ -391,24 +391,151 @@ class UniqueSingleAddressHeader(SingleAddressHeader):
max_count = 1
+class MIMEVersionHeader:
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_mime_version)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ kwds['major'] = None if parse_tree.minor is None else parse_tree.major
+ kwds['minor'] = parse_tree.minor
+ if parse_tree.minor is not None:
+ kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
+ else:
+ kwds['version'] = None
+
+ def init(self, *args, **kw):
+ self._version = kw.pop('version')
+ self._major = kw.pop('major')
+ self._minor = kw.pop('minor')
+ super().init(*args, **kw)
+
+ @property
+ def major(self):
+ return self._major
+
+ @property
+ def minor(self):
+ return self._minor
+
+ @property
+ def version(self):
+ return self._version
+
+
+class ParameterizedMIMEHeader:
+
+ # Mixin that handles the params dict. Must be subclassed and
+ # a property value_parser for the specific header provided.
+
+ max_count = 1
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ if parse_tree.params is None:
+ kwds['params'] = {}
+ else:
+ # The MIME RFCs specify that parameter ordering is arbitrary.
+ kwds['params'] = {utils._sanitize(name).lower():
+ utils._sanitize(value)
+ for name, value in parse_tree.params}
+
+ def init(self, *args, **kw):
+ self._params = kw.pop('params')
+ super().init(*args, **kw)
+
+ @property
+ def params(self):
+ return self._params.copy()
+
+
+class ContentTypeHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_type_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._maintype = utils._sanitize(self._parse_tree.maintype)
+ self._subtype = utils._sanitize(self._parse_tree.subtype)
+
+ @property
+ def maintype(self):
+ return self._maintype
+
+ @property
+ def subtype(self):
+ return self._subtype
+
+ @property
+ def content_type(self):
+ return self.maintype + '/' + self.subtype
+
+
+class ContentDispositionHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_disposition_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ cd = self._parse_tree.content_disposition
+ self._content_disposition = cd if cd is None else utils._sanitize(cd)
+
+ @property
+ def content_disposition(self):
+ return self._content_disposition
+
+
+class ContentTransferEncodingHeader:
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._cte = utils._sanitize(self._parse_tree.cte)
+
+ @property
+ def cte(self):
+ return self._cte
+
+
# The header factory #
_default_header_map = {
- 'subject': UniqueUnstructuredHeader,
- 'date': UniqueDateHeader,
- 'resent-date': DateHeader,
- 'orig-date': UniqueDateHeader,
- 'sender': UniqueSingleAddressHeader,
- 'resent-sender': SingleAddressHeader,
- 'to': UniqueAddressHeader,
- 'resent-to': AddressHeader,
- 'cc': UniqueAddressHeader,
- 'resent-cc': AddressHeader,
- 'bcc': UniqueAddressHeader,
- 'resent-bcc': AddressHeader,
- 'from': UniqueAddressHeader,
- 'resent-from': AddressHeader,
- 'reply-to': UniqueAddressHeader,
+ 'subject': UniqueUnstructuredHeader,
+ 'date': UniqueDateHeader,
+ 'resent-date': DateHeader,
+ 'orig-date': UniqueDateHeader,
+ 'sender': UniqueSingleAddressHeader,
+ 'resent-sender': SingleAddressHeader,
+ 'to': UniqueAddressHeader,
+ 'resent-to': AddressHeader,
+ 'cc': UniqueAddressHeader,
+ 'resent-cc': AddressHeader,
+ 'bcc': UniqueAddressHeader,
+ 'resent-bcc': AddressHeader,
+ 'from': UniqueAddressHeader,
+ 'resent-from': AddressHeader,
+ 'reply-to': UniqueAddressHeader,
+ 'mime-version': MIMEVersionHeader,
+ 'content-type': ContentTypeHeader,
+ 'content-disposition': ContentDispositionHeader,
+ 'content-transfer-encoding': ContentTransferEncodingHeader,
}
class HeaderRegistry: