Remove accidental stripping of non-printable characters

Continuation/follow-up for: https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4 which showcased the initial problem with the way that waitress was dealing with whitespace in headers. Additional testing by ZeddYu Lu also led to other potential issues with non-printable ascii characters that are stripped by default by Python's string.strip() function
author: Bert JW Regeer <bertjw@regeer.org> 2020-01-02 12:16:00 -0800
committer: Bert JW Regeer <bertjw@regeer.org> 2020-01-02 14:49:36 -0800
commit: ddb65b489d01d696afa1695b75fdd5df3e4ffdf8 (patch)
tree: 863813242f4bd49b19c542b0c03481d16bd70e12
parent: cd8359864400592ba0db5d74e650a8fac4763d0d (diff)
download: waitress-ddb65b489d01d696afa1695b75fdd5df3e4ffdf8.tar.gz
2 files changed, 43 insertions, 3 deletions
diff --git a/waitress/parser.py b/waitress/parser.py
index 8b07dd6..fef8a3d 100644
--- a/waitress/parser.py
+++ b/waitress/parser.py
@@ -213,13 +213,15 @@ class HTTPRequestParser(object):
             if not header:
                 raise ParsingError("Invalid header")
 
-            key, value = header.group('name', 'value')
+            key, value = header.group("name", "value")
 
             if b"_" in key:
                 # TODO(xistence): Should we drop this request instead?
                 continue
 
-            value = value.strip()
+            # Only strip off whitespace that is considered valid whitespace by
+            # RFC7230, don't strip the rest
+            value = value.strip(b" \t")
             key1 = tostr(key.upper().replace(b"-", b"_"))
             # If a header already exists, we append subsequent values
             # seperated by a comma. Applications already need to handle
@@ -257,7 +259,16 @@ class HTTPRequestParser(object):
             # here
             te = headers.pop("TRANSFER_ENCODING", "")
 
-            encodings = [encoding.strip().lower() for encoding in te.split(",") if encoding]
+            # NB: We can not just call bare strip() here because it will also
+            # remove other non-printable characters that we explicitly do not
+            # want removed so that if someone attempts to smuggle a request
+            # with these characters we don't fall prey to it.
+            #
+            # For example \x85 is stripped by default, but it is not considered
+            # valid whitespace to be stripped by RFC7230.
+            encodings = [
+                encoding.strip(" \t").lower() for encoding in te.split(",") if encoding
+            ]
 
             for encoding in encodings:
                 # Out of the transfer-codings listed in
diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py
index 8d42600..5373fd5 100644
--- a/waitress/tests/test_parser.py
+++ b/waitress/tests/test_parser.py
@@ -236,6 +236,35 @@ class TestHTTPRequestParser(unittest.TestCase):
         else:  # pragma: nocover
             self.assertTrue(False)
 
+    def test_parse_header_transfer_encoding_invalid_whitespace(self):
+        from waitress.parser import TransferEncodingNotImplemented
+
+        data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding:\x85chunked\r\n"
+
+        try:
+            self.parser.parse_header(data)
+        except TransferEncodingNotImplemented as e:
+            self.assertIn("Transfer-Encoding requested is not supported.", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
+    def test_parse_header_transfer_encoding_invalid_unicode(self):
+        from waitress.parser import TransferEncodingNotImplemented
+
+        # This is the binary encoding for the UTF-8 character
+        # https://www.compart.com/en/unicode/U+212A "unicode character "K""
+        # which if waitress were to accidentally do the wrong thing get
+        # lowercased to just the ascii "k" due to unicode collisions during
+        # transformation
+        data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding: chun\xe2\x84\xaaed\r\n"
+
+        try:
+            self.parser.parse_header(data)
+        except TransferEncodingNotImplemented as e:
+            self.assertIn("Transfer-Encoding requested is not supported.", e.args[0])
+        else:  # pragma: nocover
+            self.assertTrue(False)
+
     def test_parse_header_11_expect_continue(self):
         data = b"GET /foobar HTTP/1.1\r\nexpect: 100-continue\r\n"
         self.parser.parse_header(data)
author	Bert JW Regeer <bertjw@regeer.org>	2020-01-02 12:16:00 -0800
committer	Bert JW Regeer <bertjw@regeer.org>	2020-01-02 14:49:36 -0800
commit	ddb65b489d01d696afa1695b75fdd5df3e4ffdf8 (patch)
tree	863813242f4bd49b19c542b0c03481d16bd70e12
parent	cd8359864400592ba0db5d74e650a8fac4763d0d (diff)
download	waitress-ddb65b489d01d696afa1695b75fdd5df3e4ffdf8.tar.gz