summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2015-03-29 21:53:05 -0400
committerR David Murray <rdmurray@bitdance.com>2015-03-29 21:53:05 -0400
commitdee0f186f266252095a91ee649409ff4c0d5b146 (patch)
tree6b53456c9c5a670310bc8791938df6fa9d6df725
parent0325ed4f2c6775433a9b835036b368fb3c7a1077 (diff)
downloadcpython-dee0f186f266252095a91ee649409ff4c0d5b146.tar.gz
#23745: handle duplicate MIME parameter names in new parser.
This mimics get_param's error handling for the most part. It is slightly better in some regards as get_param can produce some really weird results for duplicate *0* parts. It departs from get_param slightly in that if we have a mix of non-extended and extended pieces for the same parameter name, the new parser assumes they were all supposed to be extended and concatenates all the values, whereas get_param always picks the non-extended parameter value. All of this error recovery is pretty much arbitrary decisions...
-rw-r--r--Lib/email/_header_value_parser.py34
-rw-r--r--Lib/test/test_email/test__header_value_parser.py109
-rw-r--r--Misc/NEWS3
3 files changed, 139 insertions, 7 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index 1806cac905..a9bdf4458b 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -71,6 +71,7 @@ import re
import urllib # For urllib.parse.unquote
from string import hexdigits
from collections import OrderedDict
+from operator import itemgetter
from email import _encoded_words as _ew
from email import errors
from email import utils
@@ -1098,15 +1099,34 @@ class MimeParameters(TokenList):
params[name] = []
params[name].append((token.section_number, token))
for name, parts in params.items():
- parts = sorted(parts)
- # XXX: there might be more recovery we could do here if, for
- # example, this is really a case of a duplicate attribute name.
+ parts = sorted(parts, key=itemgetter(0))
+ first_param = parts[0][1]
+ charset = first_param.charset
+ # Our arbitrary error recovery is to ignore duplicate parameters,
+ # to use appearance order if there are duplicate rfc 2231 parts,
+ # and to ignore gaps. This mimics the error recovery of get_param.
+ if not first_param.extended and len(parts) > 1:
+ if parts[1][0] == 0:
+ parts[1][1].defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate(s) ignored'))
+ parts = parts[:1]
+ # Else assume the *0* was missing...note that this is different
+ # from get_param, but we registered a defect for this earlier.
value_parts = []
- charset = parts[0][1].charset
- for i, (section_number, param) in enumerate(parts):
+ i = 0
+ for section_number, param in parts:
if section_number != i:
- param.defects.append(errors.InvalidHeaderDefect(
- "inconsistent multipart parameter numbering"))
+ # We could get fancier here and look for a complete
+ # duplicate extended parameter and ignore the second one
+ # seen. But we're not doing that. The old code didn't.
+ if not param.extended:
+ param.defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate ignored'))
+ continue
+ else:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "inconsistent RFC2231 parameter numbering"))
+ i += 1
value = param.param_value
if param.extended:
try:
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 5404d1913f..d028f7440b 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -2456,6 +2456,115 @@ class TestParser(TestParserMixin, TestEmailBase):
";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3
)
+
+@parameterize
+class Test_parse_mime_parameters(TestParserMixin, TestEmailBase):
+
+ def mime_parameters_as_value(self,
+ value,
+ tl_str,
+ tl_value,
+ params,
+ defects):
+ mime_parameters = self._test_parse_x(parser.parse_mime_parameters,
+ value, tl_str, tl_value, defects)
+ self.assertEqual(mime_parameters.token_type, 'mime-parameters')
+ self.assertEqual(list(mime_parameters.params), params)
+
+
+ mime_parameters_params = {
+
+ 'simple': (
+ 'filename="abc.py"',
+ ' filename="abc.py"',
+ 'filename=abc.py',
+ [('filename', 'abc.py')],
+ []),
+
+ 'multiple_keys': (
+ 'filename="abc.py"; xyz=abc',
+ ' filename="abc.py"; xyz="abc"',
+ 'filename=abc.py; xyz=abc',
+ [('filename', 'abc.py'), ('xyz', 'abc')],
+ []),
+
+ 'split_value': (
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
+ ' filename="201.tif"',
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
+ [('filename', '201.tif')],
+ []),
+
+ # Note that it is undefined what we should do for error recovery when
+ # there are duplicate parameter names or duplicate parts in a split
+ # part. We choose to ignore all duplicate parameters after the first
+ # and to take duplicate or missing rfc 2231 parts in apperance order.
+ # This is backward compatible with get_param's behavior, but the
+ # decisions are arbitrary.
+
+ 'duplicate_key': (
+ 'filename=abc.gif; filename=def.tiff',
+ ' filename="abc.gif"',
+ "filename=abc.gif; filename=def.tiff",
+ [('filename', 'abc.gif')],
+ [errors.InvalidHeaderDefect]),
+
+ 'duplicate_key_with_split_value': (
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
+ " filename=abc.gif",
+ ' filename="201.tif"',
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
+ " filename=abc.gif",
+ [('filename', '201.tif')],
+ [errors.InvalidHeaderDefect]),
+
+ 'duplicate_key_with_split_value_other_order': (
+ "filename=abc.gif; "
+ " filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
+ ' filename="abc.gif"',
+ "filename=abc.gif;"
+ " filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66",
+ [('filename', 'abc.gif')],
+ [errors.InvalidHeaderDefect]),
+
+ 'duplicate_in_split_value': (
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
+ " filename*1*=abc.gif",
+ ' filename="201.tifabc.gif"',
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*1*=%74%69%66;"
+ " filename*1*=abc.gif",
+ [('filename', '201.tifabc.gif')],
+ [errors.InvalidHeaderDefect]),
+
+ 'missing_split_value': (
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;",
+ ' filename="201.tif"',
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;",
+ [('filename', '201.tif')],
+ [errors.InvalidHeaderDefect]),
+
+ 'duplicate_and_missing_split_value': (
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;"
+ " filename*3*=abc.gif",
+ ' filename="201.tifabc.gif"',
+ "filename*0*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66;"
+ " filename*3*=abc.gif",
+ [('filename', '201.tifabc.gif')],
+ [errors.InvalidHeaderDefect]*2),
+
+ # Here we depart from get_param and assume the *0* was missing.
+ 'duplicate_with_broken_split_value': (
+ "filename=abc.gif; "
+ " filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66",
+ ' filename="abc.gif201.tif"',
+ "filename=abc.gif;"
+ " filename*2*=iso-8859-1''%32%30%31%2E; filename*3*=%74%69%66",
+ [('filename', 'abc.gif201.tif')],
+ # Defects are apparent missing *0*, and two 'out of sequence'.
+ [errors.InvalidHeaderDefect]*3),
+
+ }
+
@parameterize
class Test_parse_mime_version(TestParserMixin, TestEmailBase):
diff --git a/Misc/NEWS b/Misc/NEWS
index b80cbe25f9..ebac8d52f6 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -21,6 +21,9 @@ Core and Builtins
Library
-------
+- Issue #23745: The new email header parser now handles duplicate MIME
+ parameter names without error, similar to how get_param behaves.
+
- Issue #23792: Ignore KeyboardInterrupt when the pydoc pager is active.
This mimics the behavior of the standard unix pagers, and prevents
pipepager from shutting down while the pager itself is still running.