diff options
author | Bert JW Regeer <bertjw@regeer.org> | 2020-01-02 12:16:00 -0800 |
---|---|---|
committer | Bert JW Regeer <bertjw@regeer.org> | 2020-01-02 14:49:36 -0800 |
commit | ddb65b489d01d696afa1695b75fdd5df3e4ffdf8 (patch) | |
tree | 863813242f4bd49b19c542b0c03481d16bd70e12 | |
parent | cd8359864400592ba0db5d74e650a8fac4763d0d (diff) | |
download | waitress-ddb65b489d01d696afa1695b75fdd5df3e4ffdf8.tar.gz |
Remove accidental stripping of non-printable characters
Continuation/follow-up for:
https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4
which showcased the initial problem with the way that waitress was
dealing with whitespace in headers.
Additional testing by ZeddYu Lu also led to other potential issues with
non-printable ascii characters that are stripped by default by Python's
string.strip() function
-rw-r--r-- | waitress/parser.py | 17 | ||||
-rw-r--r-- | waitress/tests/test_parser.py | 29 |
2 files changed, 43 insertions, 3 deletions
diff --git a/waitress/parser.py b/waitress/parser.py index 8b07dd6..fef8a3d 100644 --- a/waitress/parser.py +++ b/waitress/parser.py @@ -213,13 +213,15 @@ class HTTPRequestParser(object): if not header: raise ParsingError("Invalid header") - key, value = header.group('name', 'value') + key, value = header.group("name", "value") if b"_" in key: # TODO(xistence): Should we drop this request instead? continue - value = value.strip() + # Only strip off whitespace that is considered valid whitespace by + # RFC7230, don't strip the rest + value = value.strip(b" \t") key1 = tostr(key.upper().replace(b"-", b"_")) # If a header already exists, we append subsequent values # seperated by a comma. Applications already need to handle @@ -257,7 +259,16 @@ class HTTPRequestParser(object): # here te = headers.pop("TRANSFER_ENCODING", "") - encodings = [encoding.strip().lower() for encoding in te.split(",") if encoding] + # NB: We can not just call bare strip() here because it will also + # remove other non-printable characters that we explicitly do not + # want removed so that if someone attempts to smuggle a request + # with these characters we don't fall prey to it. + # + # For example \x85 is stripped by default, but it is not considered + # valid whitespace to be stripped by RFC7230. + encodings = [ + encoding.strip(" \t").lower() for encoding in te.split(",") if encoding + ] for encoding in encodings: # Out of the transfer-codings listed in diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py index 8d42600..5373fd5 100644 --- a/waitress/tests/test_parser.py +++ b/waitress/tests/test_parser.py @@ -236,6 +236,35 @@ class TestHTTPRequestParser(unittest.TestCase): else: # pragma: nocover self.assertTrue(False) + def test_parse_header_transfer_encoding_invalid_whitespace(self): + from waitress.parser import TransferEncodingNotImplemented + + data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding:\x85chunked\r\n" + + try: + self.parser.parse_header(data) + except TransferEncodingNotImplemented as e: + self.assertIn("Transfer-Encoding requested is not supported.", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + + def test_parse_header_transfer_encoding_invalid_unicode(self): + from waitress.parser import TransferEncodingNotImplemented + + # This is the binary encoding for the UTF-8 character + # https://www.compart.com/en/unicode/U+212A "unicode character "K"" + # which if waitress were to accidentally do the wrong thing get + # lowercased to just the ascii "k" due to unicode collisions during + # transformation + data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding: chun\xe2\x84\xaaed\r\n" + + try: + self.parser.parse_header(data) + except TransferEncodingNotImplemented as e: + self.assertIn("Transfer-Encoding requested is not supported.", e.args[0]) + else: # pragma: nocover + self.assertTrue(False) + def test_parse_header_11_expect_continue(self): data = b"GET /foobar HTTP/1.1\r\nexpect: 100-continue\r\n" self.parser.parse_header(data) |