From 0b2647d3d59d29427130ee6cc42ea3f47b473b10 Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Wed, 13 Apr 2011 16:46:05 -0400
Subject: #11684: Complete parser bytes interface by adding BytesHeaderParser

Patch by Steffen Daode Nurpmeso.
---
 Lib/email/generator.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Lib/email/generator.py')

diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index f0e7a95477..fdd34e4ace 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -297,10 +297,12 @@ class Generator:
         # message/rfc822.  Such messages are generated by, for example,
         # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So
         # in that case we just emit the string body.
-        payload = msg.get_payload()
+        payload = msg._payload
         if isinstance(payload, list):
             g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
             payload = s.getvalue()
+        else:
+            payload = self._encode(payload)
         self._fp.write(payload)
 
     # This used to be a module level function; we use a classmethod for this
-- 
cgit v1.2.1


From 25096720d023b59fead5325c3a2003871004484c Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Mon, 18 Apr 2011 13:59:37 -0400
Subject: #11731: simplify/enhance parser/generator API by introducing policy
 objects.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This new interface will also allow for future planned enhancements
in control over the parser/generator without requiring any additional
complexity in the parser/generator API.

Patch reviewed by Éric Araujo and Barry Warsaw.
---
 Lib/email/generator.py | 62 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 25 deletions(-)

(limited to 'Lib/email/generator.py')

diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index fdd34e4ace..d8b8fa960b 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -13,8 +13,10 @@ import random
 import warnings
 
 from io import StringIO, BytesIO
+from email import policy
 from email.header import Header
 from email.message import _has_surrogates
+import email.charset as _charset
 
 UNDERSCORE = '_'
 NL = '\n'  # XXX: no longer used by the code below.
@@ -33,7 +35,8 @@ class Generator:
     # Public interface
     #
 
-    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
+    def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *,
+                 policy=policy.default):
         """Create the generator for message flattening.
 
         outfp is the output file-like object for writing the message to.  It
@@ -49,16 +52,23 @@ class Generator:
         defined in the Header class.  Set maxheaderlen to zero to disable
         header wrapping.  The default is 78, as recommended (but not required)
         by RFC 2822.
+
+        The policy keyword specifies a policy object that controls a number of
+        aspects of the generator's operation.  The default policy maintains
+        backward compatibility.
+
         """
         self._fp = outfp
         self._mangle_from_ = mangle_from_
-        self._maxheaderlen = maxheaderlen
+        self._maxheaderlen = (maxheaderlen if maxheaderlen is not None else
+                                 policy.max_line_length)
+        self.policy = policy
 
     def write(self, s):
         # Just delegate to the file object
         self._fp.write(s)
 
-    def flatten(self, msg, unixfrom=False, linesep='\n'):
+    def flatten(self, msg, unixfrom=False, linesep=None):
         r"""Print the message object tree rooted at msg to the output file
         specified when the Generator instance was created.
 
@@ -70,17 +80,15 @@ class Generator:
         Note that for subobjects, no From_ line is printed.
 
         linesep specifies the characters used to indicate a new line in
-        the output.  The default value is the most useful for typical
-        Python applications, but it can be set to \r\n to produce RFC-compliant
-        line separators when needed.
+        the output.  The default value is determined by the policy.
 
         """
         # We use the _XXX constants for operating on data that comes directly
         # from the msg, and _encoded_XXX constants for operating on data that
         # has already been converted (to bytes in the BytesGenerator) and
         # inserted into a temporary buffer.
-        self._NL = linesep
-        self._encoded_NL = self._encode(linesep)
+        self._NL = linesep if linesep is not None else self.policy.linesep
+        self._encoded_NL = self._encode(self._NL)
         self._EMPTY = ''
         self._encoded_EMTPY = self._encode('')
         if unixfrom:
@@ -338,7 +346,10 @@ class BytesGenerator(Generator):
 
     Functionally identical to the base Generator except that the output is
     bytes and not string.  When surrogates were used in the input to encode
-    bytes, these are decoded back to bytes for output.
+    bytes, these are decoded back to bytes for output.  If the policy has
+    must_be_7bit set true, then the message is transformed such that the
+    non-ASCII bytes are properly content transfer encoded, using the
+    charset unknown-8bit.
 
     The outfp object must accept bytes in its write method.
     """
@@ -361,21 +372,22 @@ class BytesGenerator(Generator):
         # strings with 8bit bytes.
         for h, v in msg._headers:
             self.write('%s: ' % h)
-            if isinstance(v, Header):
-                self.write(v.encode(maxlinelen=self._maxheaderlen)+NL)
-            elif _has_surrogates(v):
-                # If we have raw 8bit data in a byte string, we have no idea
-                # what the encoding is.  There is no safe way to split this
-                # string.  If it's ascii-subset, then we could do a normal
-                # ascii split, but if it's multibyte then we could break the
-                # string.  There's no way to know so the least harm seems to
-                # be to not split the string and risk it being too long.
-                self.write(v+NL)
-            else:
-                # Header's got lots of smarts and this string is safe...
-                header = Header(v, maxlinelen=self._maxheaderlen,
-                                header_name=h)
-                self.write(header.encode(linesep=self._NL)+self._NL)
+            if isinstance(v, str):
+                if _has_surrogates(v):
+                    if not self.policy.must_be_7bit:
+                        # If we have raw 8bit data in a byte string, we have no idea
+                        # what the encoding is.  There is no safe way to split this
+                        # string.  If it's ascii-subset, then we could do a normal
+                        # ascii split, but if it's multibyte then we could break the
+                        # string.  There's no way to know so the least harm seems to
+                        # be to not split the string and risk it being too long.
+                        self.write(v+NL)
+                        continue
+                    h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h)
+                else:
+                    h = Header(v, header_name=h)
+            self.write(h.encode(linesep=self._NL,
+                                maxlinelen=self._maxheaderlen)+self._NL)
         # A blank line always separates headers from body
         self.write(self._NL)
 
@@ -384,7 +396,7 @@ class BytesGenerator(Generator):
         # just write it back out.
         if msg._payload is None:
             return
-        if _has_surrogates(msg._payload):
+        if _has_surrogates(msg._payload) and not self.policy.must_be_7bit:
             self.write(msg._payload)
         else:
             super(BytesGenerator,self)._handle_text(msg)
-- 
cgit v1.2.1


From 25a003a10f523a12abae1f81ab0119f1ae170777 Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Fri, 25 May 2012 15:01:48 -0400
Subject: #14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API.  (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.

There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase.  That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.
---
 Lib/email/generator.py | 87 +++++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 51 deletions(-)

(limited to 'Lib/email/generator.py')

diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index dcfea4cd8a..bfa288bea4 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -13,9 +13,9 @@ import random
 import warnings
 
 from io import StringIO, BytesIO
-from email import policy
+from email._policybase import compat32
 from email.header import Header
-from email.message import _has_surrogates
+from email.utils import _has_surrogates
 import email.charset as _charset
 
 UNDERSCORE = '_'
@@ -36,7 +36,7 @@ class Generator:
     #
 
     def __init__(self, outfp, mangle_from_=True, maxheaderlen=None, *,
-                 policy=policy.default):
+                 policy=None):
         """Create the generator for message flattening.
 
         outfp is the output file-like object for writing the message to.  It
@@ -60,8 +60,7 @@ class Generator:
         """
         self._fp = outfp
         self._mangle_from_ = mangle_from_
-        self._maxheaderlen = (maxheaderlen if maxheaderlen is not None else
-                                 policy.max_line_length)
+        self.maxheaderlen = maxheaderlen
         self.policy = policy
 
     def write(self, s):
@@ -87,20 +86,33 @@ class Generator:
         # from the msg, and _encoded_XXX constants for operating on data that
         # has already been converted (to bytes in the BytesGenerator) and
         # inserted into a temporary buffer.
-        self._NL = linesep if linesep is not None else self.policy.linesep
+        policy = msg.policy if self.policy is None else self.policy
+        if linesep is not None:
+            policy = policy.clone(linesep=linesep)
+        if self.maxheaderlen is not None:
+            policy = policy.clone(max_line_length=self.maxheaderlen)
+        self._NL = policy.linesep
         self._encoded_NL = self._encode(self._NL)
         self._EMPTY = ''
         self._encoded_EMTPY = self._encode('')
-        if unixfrom:
-            ufrom = msg.get_unixfrom()
-            if not ufrom:
-                ufrom = 'From nobody ' + time.ctime(time.time())
-            self.write(ufrom + self._NL)
-        self._write(msg)
+        p = self.policy
+        try:
+            self.policy = policy
+            if unixfrom:
+                ufrom = msg.get_unixfrom()
+                if not ufrom:
+                    ufrom = 'From nobody ' + time.ctime(time.time())
+                self.write(ufrom + self._NL)
+            self._write(msg)
+        finally:
+            self.policy = p
 
     def clone(self, fp):
         """Clone this generator with the exact same options."""
-        return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
+        return self.__class__(fp,
+                              self._mangle_from_,
+                              None, # Use policy setting, which we've adjusted
+                              policy=self.policy)
 
     #
     # Protected interface - undocumented ;/
@@ -175,16 +187,8 @@ class Generator:
     #
 
     def _write_headers(self, msg):
-        for h, v in msg.items():
-            self.write('%s: ' % h)
-            if isinstance(v, Header):
-                self.write(v.encode(
-                    maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)
-            else:
-                # Header's got lots of smarts, so use it.
-                header = Header(v, maxlinelen=self._maxheaderlen,
-                                header_name=h)
-                self.write(header.encode(linesep=self._NL)+self._NL)
+        for h, v in msg.raw_items():
+            self.write(self.policy.fold(h, v))
         # A blank line always separates headers from body
         self.write(self._NL)
 
@@ -265,12 +269,12 @@ class Generator:
         # The contents of signed parts has to stay unmodified in order to keep
         # the signature intact per RFC1847 2.1, so we disable header wrapping.
         # RDM: This isn't enough to completely preserve the part, but it helps.
-        old_maxheaderlen = self._maxheaderlen
+        p = self.policy
+        self.policy = p.clone(max_line_length=0)
         try:
-            self._maxheaderlen = 0
             self._handle_multipart(msg)
         finally:
-            self._maxheaderlen = old_maxheaderlen
+            self.policy = p
 
     def _handle_message_delivery_status(self, msg):
         # We can't just write the headers directly to self's file object
@@ -347,9 +351,9 @@ class BytesGenerator(Generator):
     Functionally identical to the base Generator except that the output is
     bytes and not string.  When surrogates were used in the input to encode
     bytes, these are decoded back to bytes for output.  If the policy has
-    must_be_7bit set true, then the message is transformed such that the
-    non-ASCII bytes are properly content transfer encoded, using the
-    charset unknown-8bit.
+    cte_type set to 7bit, then the message is transformed such that the
+    non-ASCII bytes are properly content transfer encoded, using the charset
+    unknown-8bit.
 
     The outfp object must accept bytes in its write method.
     """
@@ -370,27 +374,8 @@ class BytesGenerator(Generator):
     def _write_headers(self, msg):
         # This is almost the same as the string version, except for handling
         # strings with 8bit bytes.
-        for h, v in msg._headers:
-            self.write('%s: ' % h)
-            if isinstance(v, str):
-                if _has_surrogates(v):
-                    if not self.policy.must_be_7bit:
-                        # If we have raw 8bit data in a byte string, we have no idea
-                        # what the encoding is.  There is no safe way to split this
-                        # string.  If it's ascii-subset, then we could do a normal
-                        # ascii split, but if it's multibyte then we could break the
-                        # string.  There's no way to know so the least harm seems to
-                        # be to not split the string and risk it being too long.
-                        self.write(v+NL)
-                        continue
-                    h = Header(v, charset=_charset.UNKNOWN8BIT, header_name=h)
-                else:
-                    h = Header(v, header_name=h)
-            else:
-                # Assume it is a Header-like object.
-                h = v
-            self.write(h.encode(linesep=self._NL,
-                                maxlinelen=self._maxheaderlen)+self._NL)
+        for h, v in msg.raw_items():
+            self._fp.write(self.policy.fold_binary(h, v))
         # A blank line always separates headers from body
         self.write(self._NL)
 
@@ -399,7 +384,7 @@ class BytesGenerator(Generator):
         # just write it back out.
         if msg._payload is None:
             return
-        if _has_surrogates(msg._payload) and not self.policy.must_be_7bit:
+        if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
             self.write(msg._payload)
         else:
             super(BytesGenerator,self)._handle_text(msg)
-- 
cgit v1.2.1


From 453836e4750201b27e3397f092f342e758b7943e Mon Sep 17 00:00:00 2001
From: R David Murray <rdmurray@bitdance.com>
Date: Fri, 25 May 2012 18:42:14 -0400
Subject: #12586: add provisional email policy with new header parsing and
 folding.

When the new policies are used (and only when the new policies are explicitly
used) headers turn into objects that have attributes based on their parsed
values, and can be set using objects that encapsulate the values, as well as
set directly from unicode strings.  The folding algorithm then takes care of
encoding unicode where needed, and folding according to the highest level
syntactic objects.

With this patch only date and time headers are parsed as anything other than
unstructured, but that is all the helper methods in the existing API handle.
I do plan to add more parsers, and complete the set specified in the RFC
before the package becomes stable.
---
 Lib/email/generator.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'Lib/email/generator.py')

diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index bfa288bea4..fcecf939a8 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -95,9 +95,15 @@ class Generator:
         self._encoded_NL = self._encode(self._NL)
         self._EMPTY = ''
         self._encoded_EMTPY = self._encode('')
-        p = self.policy
+        # Because we use clone (below) when we recursively process message
+        # subparts, and because clone uses the computed policy (not None),
+        # submessages will automatically get set to the computed policy when
+        # they are processed by this code.
+        old_gen_policy = self.policy
+        old_msg_policy = msg.policy
         try:
             self.policy = policy
+            msg.policy = policy
             if unixfrom:
                 ufrom = msg.get_unixfrom()
                 if not ufrom:
@@ -105,7 +111,8 @@ class Generator:
                 self.write(ufrom + self._NL)
             self._write(msg)
         finally:
-            self.policy = p
+            self.policy = old_gen_policy
+            msg.policy = old_msg_policy
 
     def clone(self, fp):
         """Clone this generator with the exact same options."""
-- 
cgit v1.2.1