summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBert JW Regeer <bertjw@regeer.org>2020-01-02 12:16:00 -0800
committerBert JW Regeer <bertjw@regeer.org>2020-01-02 14:49:36 -0800
commitddb65b489d01d696afa1695b75fdd5df3e4ffdf8 (patch)
tree863813242f4bd49b19c542b0c03481d16bd70e12
parentcd8359864400592ba0db5d74e650a8fac4763d0d (diff)
downloadwaitress-ddb65b489d01d696afa1695b75fdd5df3e4ffdf8.tar.gz
Remove accidental stripping of non-printable characters
Continuation/follow-up for: https://github.com/Pylons/waitress/security/advisories/GHSA-m5ff-3wj3-8ph4 which showcased the initial problem with the way that waitress was dealing with whitespace in headers. Additional testing by ZeddYu Lu also led to other potential issues with non-printable ascii characters that are stripped by default by Python's string.strip() function
-rw-r--r--waitress/parser.py17
-rw-r--r--waitress/tests/test_parser.py29
2 files changed, 43 insertions, 3 deletions
diff --git a/waitress/parser.py b/waitress/parser.py
index 8b07dd6..fef8a3d 100644
--- a/waitress/parser.py
+++ b/waitress/parser.py
@@ -213,13 +213,15 @@ class HTTPRequestParser(object):
if not header:
raise ParsingError("Invalid header")
- key, value = header.group('name', 'value')
+ key, value = header.group("name", "value")
if b"_" in key:
# TODO(xistence): Should we drop this request instead?
continue
- value = value.strip()
+ # Only strip off whitespace that is considered valid whitespace by
+ # RFC7230, don't strip the rest
+ value = value.strip(b" \t")
key1 = tostr(key.upper().replace(b"-", b"_"))
# If a header already exists, we append subsequent values
# seperated by a comma. Applications already need to handle
@@ -257,7 +259,16 @@ class HTTPRequestParser(object):
# here
te = headers.pop("TRANSFER_ENCODING", "")
- encodings = [encoding.strip().lower() for encoding in te.split(",") if encoding]
+ # NB: We can not just call bare strip() here because it will also
+ # remove other non-printable characters that we explicitly do not
+ # want removed so that if someone attempts to smuggle a request
+ # with these characters we don't fall prey to it.
+ #
+ # For example \x85 is stripped by default, but it is not considered
+ # valid whitespace to be stripped by RFC7230.
+ encodings = [
+ encoding.strip(" \t").lower() for encoding in te.split(",") if encoding
+ ]
for encoding in encodings:
# Out of the transfer-codings listed in
diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py
index 8d42600..5373fd5 100644
--- a/waitress/tests/test_parser.py
+++ b/waitress/tests/test_parser.py
@@ -236,6 +236,35 @@ class TestHTTPRequestParser(unittest.TestCase):
else: # pragma: nocover
self.assertTrue(False)
+ def test_parse_header_transfer_encoding_invalid_whitespace(self):
+ from waitress.parser import TransferEncodingNotImplemented
+
+ data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding:\x85chunked\r\n"
+
+ try:
+ self.parser.parse_header(data)
+ except TransferEncodingNotImplemented as e:
+ self.assertIn("Transfer-Encoding requested is not supported.", e.args[0])
+ else: # pragma: nocover
+ self.assertTrue(False)
+
+ def test_parse_header_transfer_encoding_invalid_unicode(self):
+ from waitress.parser import TransferEncodingNotImplemented
+
+ # This is the binary encoding for the UTF-8 character
+ # https://www.compart.com/en/unicode/U+212A "unicode character "K""
+ # which if waitress were to accidentally do the wrong thing get
+ # lowercased to just the ascii "k" due to unicode collisions during
+ # transformation
+ data = b"GET /foobar HTTP/1.1\r\nTransfer-Encoding: chun\xe2\x84\xaaed\r\n"
+
+ try:
+ self.parser.parse_header(data)
+ except TransferEncodingNotImplemented as e:
+ self.assertIn("Transfer-Encoding requested is not supported.", e.args[0])
+ else: # pragma: nocover
+ self.assertTrue(False)
+
def test_parse_header_11_expect_continue(self):
data = b"GET /foobar HTTP/1.1\r\nexpect: 100-continue\r\n"
self.parser.parse_header(data)