summaryrefslogtreecommitdiff
path: root/src/waitress/parser.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/waitress/parser.py')
-rw-r--r--src/waitress/parser.py52
1 files changed, 38 insertions, 14 deletions
diff --git a/src/waitress/parser.py b/src/waitress/parser.py
index 4530b23..3b99921 100644
--- a/src/waitress/parser.py
+++ b/src/waitress/parser.py
@@ -18,9 +18,10 @@ processing but threads to do work.
"""
from io import BytesIO
import re
+from urllib import parse
+from urllib.parse import unquote_to_bytes
from waitress.buffers import OverflowableBuffer
-from waitress.compat import tostr, unquote_bytes_to_wsgi, urlparse
from waitress.receiver import ChunkedReceiver, FixedStreamReceiver
from waitress.utilities import (
BadRequest,
@@ -33,6 +34,10 @@ from waitress.utilities import (
from .rfc7230 import HEADER_FIELD
+def unquote_bytes_to_wsgi(bytestring):
+ return unquote_to_bytes(bytestring).decode("latin-1")
+
+
class ParsingError(Exception):
pass
@@ -80,11 +85,13 @@ class HTTPRequestParser:
bytes consumed. Sets the completed flag once both the header and the
body have been received.
"""
+
if self.completed:
return 0 # Can't consume any more.
datalen = len(data)
br = self.body_rcv
+
if br is None:
# In header.
max_header = self.adj.max_request_header_size
@@ -106,12 +113,14 @@ class HTTPRequestParser:
# If the first line + headers is over the max length, we return a
# RequestHeaderFieldsTooLarge error rather than continuing to
# attempt to parse the headers.
+
if self.header_bytes_received >= max_header:
self.parse_header(b"GET / HTTP/1.0\r\n")
self.error = RequestHeaderFieldsTooLarge(
"exceeds max_header of %s" % max_header
)
self.completed = True
+
return consumed
if index >= 0:
@@ -195,6 +204,7 @@ class HTTPRequestParser:
first line of the request).
"""
index = header_plus.find(b"\r\n")
+
if index >= 0:
first_line = header_plus[:index].rstrip()
header = header_plus[index + 2 :]
@@ -209,6 +219,7 @@ class HTTPRequestParser:
lines = get_header_lines(header)
headers = self.headers
+
for line in lines:
header = HEADER_FIELD.match(line)
@@ -219,25 +230,26 @@ class HTTPRequestParser:
if b"_" in key:
# TODO(xistence): Should we drop this request instead?
+
continue
# Only strip off whitespace that is considered valid whitespace by
# RFC7230, don't strip the rest
value = value.strip(b" \t")
- key1 = tostr(key.upper().replace(b"-", b"_"))
+ key1 = key.upper().replace(b"-", b"_").decode("latin-1")
# If a header already exists, we append subsequent values
# separated by a comma. Applications already need to handle
# the comma separated values, as HTTP front ends might do
# the concatenation for you (behavior specified in RFC2616).
try:
- headers[key1] += tostr(b", " + value)
+ headers[key1] += (b", " + value).decode("latin-1")
except KeyError:
- headers[key1] = tostr(value)
+ headers[key1] = value.decode("latin-1")
# command, uri, version will be bytes
command, uri, version = crack_first_line(first_line)
- version = tostr(version)
- command = tostr(command)
+ version = version.decode("latin-1")
+ command = command.decode("latin-1")
self.command = command
self.version = version
(
@@ -280,6 +292,7 @@ class HTTPRequestParser:
# Note: the identity transfer-coding was removed in RFC7230:
# https://tools.ietf.org/html/rfc7230#appendix-A.2 and is thus
# not supported
+
if encoding not in {"chunked"}:
raise TransferEncodingNotImplemented(
"Transfer-Encoding requested is not supported."
@@ -296,6 +309,7 @@ class HTTPRequestParser:
expect = headers.get("EXPECT", "").lower()
self.expect_continue = expect == "100-continue"
+
if connection.lower() == "close":
self.connection_close = True
@@ -306,12 +320,14 @@ class HTTPRequestParser:
raise ParsingError("Content-Length is invalid")
self.content_length = cl
+
if cl > 0:
buf = OverflowableBuffer(self.adj.inbuf_overflow)
self.body_rcv = FixedStreamReceiver(cl, buf)
def get_body_stream(self):
body_rcv = self.body_rcv
+
if body_rcv is not None:
return body_rcv.getfile()
else:
@@ -319,6 +335,7 @@ class HTTPRequestParser:
def close(self):
body_rcv = self.body_rcv
+
if body_rcv is not None:
body_rcv.getbuf().close()
@@ -346,16 +363,16 @@ def split_uri(uri):
path, query = path.split(b"?", 1)
else:
try:
- scheme, netloc, path, query, fragment = urlparse.urlsplit(uri)
+ scheme, netloc, path, query, fragment = parse.urlsplit(uri)
except UnicodeError:
raise ParsingError("Bad URI")
return (
- tostr(scheme),
- tostr(netloc),
+ scheme.decode("latin-1"),
+ netloc.decode("latin-1"),
unquote_bytes_to_wsgi(path),
- tostr(query),
- tostr(fragment),
+ query.decode("latin-1"),
+ fragment.decode("latin-1"),
)
@@ -365,20 +382,24 @@ def get_header_lines(header):
"""
r = []
lines = header.split(b"\r\n")
+
for line in lines:
if not line:
continue
if b"\r" in line or b"\n" in line:
- raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line))
+ raise ParsingError(
+ 'Bare CR or LF found in header line "%s"' % str(line, "latin-1")
+ )
if line.startswith((b" ", b"\t")):
if not r:
# https://corte.si/posts/code/pathod/pythonservers/index.html
- raise ParsingError('Malformed header line "%s"' % tostr(line))
+ raise ParsingError('Malformed header line "%s"' % str(line, "latin-1"))
r[-1] += line
else:
r.append(line)
+
return r
@@ -391,6 +412,7 @@ first_line_re = re.compile(
def crack_first_line(line):
m = first_line_re.match(line)
+
if m is not None and m.end() == len(line):
if m.group(3):
version = m.group(5)
@@ -407,9 +429,11 @@ def crack_first_line(line):
# unsuspecting souls from sending lowercase HTTP methods to waitress
# and having the request complete, while servers like nginx drop the
# request onto the floor.
+
if method != method.upper():
- raise ParsingError('Malformed HTTP method "%s"' % tostr(method))
+ raise ParsingError('Malformed HTTP method "%s"' % str(method, "latin-1"))
uri = m.group(2)
+
return method, uri, version
else:
return b"", b"", b""