1 files changed, 38 insertions, 14 deletions
diff --git a/src/waitress/parser.py b/src/waitress/parser.py
index 4530b23..3b99921 100644
--- a/src/waitress/parser.py
+++ b/src/waitress/parser.py
@@ -18,9 +18,10 @@ processing but threads to do work.
 """
 from io import BytesIO
 import re
+from urllib import parse
+from urllib.parse import unquote_to_bytes
 
 from waitress.buffers import OverflowableBuffer
-from waitress.compat import tostr, unquote_bytes_to_wsgi, urlparse
 from waitress.receiver import ChunkedReceiver, FixedStreamReceiver
 from waitress.utilities import (
     BadRequest,
@@ -33,6 +34,10 @@ from waitress.utilities import (
 from .rfc7230 import HEADER_FIELD
 
 
+def unquote_bytes_to_wsgi(bytestring):
+    return unquote_to_bytes(bytestring).decode("latin-1")
+
+
 class ParsingError(Exception):
     pass
 
@@ -80,11 +85,13 @@ class HTTPRequestParser:
         bytes consumed.  Sets the completed flag once both the header and the
         body have been received.
         """
+
         if self.completed:
             return 0  # Can't consume any more.
 
         datalen = len(data)
         br = self.body_rcv
+
         if br is None:
             # In header.
             max_header = self.adj.max_request_header_size
@@ -106,12 +113,14 @@ class HTTPRequestParser:
             # If the first line + headers is over the max length, we return a
             # RequestHeaderFieldsTooLarge error rather than continuing to
             # attempt to parse the headers.
+
             if self.header_bytes_received >= max_header:
                 self.parse_header(b"GET / HTTP/1.0\r\n")
                 self.error = RequestHeaderFieldsTooLarge(
                     "exceeds max_header of %s" % max_header
                 )
                 self.completed = True
+
                 return consumed
 
             if index >= 0:
@@ -195,6 +204,7 @@ class HTTPRequestParser:
         first line of the request).
         """
         index = header_plus.find(b"\r\n")
+
         if index >= 0:
             first_line = header_plus[:index].rstrip()
             header = header_plus[index + 2 :]
@@ -209,6 +219,7 @@ class HTTPRequestParser:
         lines = get_header_lines(header)
 
         headers = self.headers
+
         for line in lines:
             header = HEADER_FIELD.match(line)
 
@@ -219,25 +230,26 @@ class HTTPRequestParser:
 
             if b"_" in key:
                 # TODO(xistence): Should we drop this request instead?
+
                 continue
 
             # Only strip off whitespace that is considered valid whitespace by
             # RFC7230, don't strip the rest
             value = value.strip(b" \t")
-            key1 = tostr(key.upper().replace(b"-", b"_"))
+            key1 = key.upper().replace(b"-", b"_").decode("latin-1")
             # If a header already exists, we append subsequent values
             # separated by a comma. Applications already need to handle
             # the comma separated values, as HTTP front ends might do
             # the concatenation for you (behavior specified in RFC2616).
             try:
-                headers[key1] += tostr(b", " + value)
+                headers[key1] += (b", " + value).decode("latin-1")
             except KeyError:
-                headers[key1] = tostr(value)
+                headers[key1] = value.decode("latin-1")
 
         # command, uri, version will be bytes
         command, uri, version = crack_first_line(first_line)
-        version = tostr(version)
-        command = tostr(command)
+        version = version.decode("latin-1")
+        command = command.decode("latin-1")
         self.command = command
         self.version = version
         (
@@ -280,6 +292,7 @@ class HTTPRequestParser:
                 # Note: the identity transfer-coding was removed in RFC7230:
                 # https://tools.ietf.org/html/rfc7230#appendix-A.2 and is thus
                 # not supported
+
                 if encoding not in {"chunked"}:
                     raise TransferEncodingNotImplemented(
                         "Transfer-Encoding requested is not supported."
@@ -296,6 +309,7 @@ class HTTPRequestParser:
 
             expect = headers.get("EXPECT", "").lower()
             self.expect_continue = expect == "100-continue"
+
             if connection.lower() == "close":
                 self.connection_close = True
 
@@ -306,12 +320,14 @@ class HTTPRequestParser:
                 raise ParsingError("Content-Length is invalid")
 
             self.content_length = cl
+
             if cl > 0:
                 buf = OverflowableBuffer(self.adj.inbuf_overflow)
                 self.body_rcv = FixedStreamReceiver(cl, buf)
 
     def get_body_stream(self):
         body_rcv = self.body_rcv
+
         if body_rcv is not None:
             return body_rcv.getfile()
         else:
@@ -319,6 +335,7 @@ class HTTPRequestParser:
 
     def close(self):
         body_rcv = self.body_rcv
+
         if body_rcv is not None:
             body_rcv.getbuf().close()
 
@@ -346,16 +363,16 @@ def split_uri(uri):
             path, query = path.split(b"?", 1)
     else:
         try:
-            scheme, netloc, path, query, fragment = urlparse.urlsplit(uri)
+            scheme, netloc, path, query, fragment = parse.urlsplit(uri)
         except UnicodeError:
             raise ParsingError("Bad URI")
 
     return (
-        tostr(scheme),
-        tostr(netloc),
+        scheme.decode("latin-1"),
+        netloc.decode("latin-1"),
         unquote_bytes_to_wsgi(path),
-        tostr(query),
-        tostr(fragment),
+        query.decode("latin-1"),
+        fragment.decode("latin-1"),
     )
 
 
@@ -365,20 +382,24 @@ def get_header_lines(header):
     """
     r = []
     lines = header.split(b"\r\n")
+
     for line in lines:
         if not line:
             continue
 
         if b"\r" in line or b"\n" in line:
-            raise ParsingError('Bare CR or LF found in header line "%s"' % tostr(line))
+            raise ParsingError(
+                'Bare CR or LF found in header line "%s"' % str(line, "latin-1")
+            )
 
         if line.startswith((b" ", b"\t")):
             if not r:
                 # https://corte.si/posts/code/pathod/pythonservers/index.html
-                raise ParsingError('Malformed header line "%s"' % tostr(line))
+                raise ParsingError('Malformed header line "%s"' % str(line, "latin-1"))
             r[-1] += line
         else:
             r.append(line)
+
     return r
 
 
@@ -391,6 +412,7 @@ first_line_re = re.compile(
 
 def crack_first_line(line):
     m = first_line_re.match(line)
+
     if m is not None and m.end() == len(line):
         if m.group(3):
             version = m.group(5)
@@ -407,9 +429,11 @@ def crack_first_line(line):
         # unsuspecting souls from sending lowercase HTTP methods to waitress
         # and having the request complete, while servers like nginx drop the
         # request onto the floor.
+
         if method != method.upper():
-            raise ParsingError('Malformed HTTP method "%s"' % tostr(method))
+            raise ParsingError('Malformed HTTP method "%s"' % str(method, "latin-1"))
         uri = m.group(2)
+
         return method, uri, version
     else:
         return b"", b"", b""