summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBert JW Regeer <xistence@0x58.com>2020-02-02 21:45:15 -0800
committerGitHub <noreply@github.com>2020-02-02 21:45:15 -0800
commit6e46f9e3f014d64dd7d1e258eaf626e39870ee1f (patch)
treee9dce7345561f3b90c5b64fab3aad14a7217a3d0
parent8af9adb8e9078bfdbc3adf9153b04e2d4471222d (diff)
parent51b9bd429a97ac1cadfe4e37938ff7fac84ee642 (diff)
downloadwaitress-6e46f9e3f014d64dd7d1e258eaf626e39870ee1f.tar.gz
Merge pull request from GHSA-73m2-3pwg-5fgc
Fix catastrophic backtracking in regular expression
-rw-r--r--waitress/rfc7230.py8
-rw-r--r--waitress/tests/test_parser.py26
2 files changed, 28 insertions, 6 deletions
diff --git a/waitress/rfc7230.py b/waitress/rfc7230.py
index 97a90a4..cd33c90 100644
--- a/waitress/rfc7230.py
+++ b/waitress/rfc7230.py
@@ -40,13 +40,13 @@ VCHAR = r"\x21-\x7e"
# field-vchar ]
FIELD_VCHAR = "[" + VCHAR + OBS_TEXT + "]"
-FIELD_CONTENT = FIELD_VCHAR + "([ \t" + VCHAR + OBS_TEXT + "]+" + FIELD_VCHAR + "){,1}"
-FIELD_VALUE = "(" + FIELD_CONTENT + "){0,}"
+# Field content is more greedy than the ABNF, in that it will match the whole value
+FIELD_CONTENT = FIELD_VCHAR + "+(?:[ \t]+" + FIELD_VCHAR + "+)*"
+# Which allows the field value here to just see if there is even a value in the first place
+FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"
HEADER_FIELD = re.compile(
tobytes(
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
)
)
-
-OWS_STRIP = re.compile(OWS + "(?P<value>.*?)" + OWS)
diff --git a/waitress/tests/test_parser.py b/waitress/tests/test_parser.py
index 71703e2..91837c7 100644
--- a/waitress/tests/test_parser.py
+++ b/waitress/tests/test_parser.py
@@ -55,6 +55,7 @@ class TestHTTPRequestParser(unittest.TestCase):
def test_received_bad_transfer_encoding(self):
from waitress.utilities import ServerNotImplemented
+
data = (
b"GET /foobar HTTP/1.1\r\n"
b"Transfer-Encoding: foo\r\n"
@@ -211,7 +212,6 @@ class TestHTTPRequestParser(unittest.TestCase):
self.parser.parse_header(data)
self.assertEqual(self.parser.body_rcv.__class__.__name__, "ChunkedReceiver")
-
def test_parse_header_transfer_encoding_invalid(self):
from waitress.parser import TransferEncodingNotImplemented
@@ -377,7 +377,7 @@ class TestHTTPRequestParser(unittest.TestCase):
def test_parse_header_invalid_chars(self):
from waitress.parser import ParsingError
- data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\n\foo: \x0bbaz\r\n"
+ data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nfoo: \x0bbaz\r\n"
try:
self.parser.parse_header(data)
except ParsingError as e:
@@ -433,6 +433,28 @@ class TestHTTPRequestParser(unittest.TestCase):
self.assertIn("FOO", self.parser.headers)
self.assertEqual(self.parser.headers["FOO"], "abrowser/0.001 (C O M M E N T)")
+ def test_parse_header_invalid_backtrack_bad(self):
+ from waitress.parser import ParsingError
+
+ data = b"GET /foobar HTTP/1.1\r\nfoo: bar\r\nfoo: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\x10\r\n"
+ try:
+ self.parser.parse_header(data)
+ except ParsingError as e:
+ self.assertIn("Invalid header", e.args[0])
+ else: # pragma: nocover
+ self.assertTrue(False)
+
+ def test_parse_header_short_values(self):
+ from waitress.parser import ParsingError
+
+ data = b"GET /foobar HTTP/1.1\r\none: 1\r\ntwo: 22\r\n"
+ self.parser.parse_header(data)
+
+ self.assertIn("ONE", self.parser.headers)
+ self.assertIn("TWO", self.parser.headers)
+ self.assertEqual(self.parser.headers["ONE"], "1")
+ self.assertEqual(self.parser.headers["TWO"], "22")
+
class Test_split_uri(unittest.TestCase):
def _callFUT(self, uri):