summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBert JW Regeer <bertjw@regeer.org>2022-03-12 18:30:30 -0700
committerBert JW Regeer <bertjw@regeer.org>2022-03-12 19:48:25 -0700
commite75b0d9afbea8a933f8f5f11d279e661cbfd676b (patch)
tree73ce3d4ee1dbaba235d228022d704a359d3e6df4
parent22c03947e3bcd7631120aae40d3d844d4f35e49f (diff)
downloadwaitress-e75b0d9afbea8a933f8f5f11d279e661cbfd676b.tar.gz
Add new regular expressions for Chunked Encoding
This also moves some regular expressions for QUOTED_PAIR/QUOTED_STRING into this module from utilities so that they may be reused.
-rw-r--r--src/waitress/rfc7230.py27
-rw-r--r--src/waitress/utilities.py28
2 files changed, 29 insertions, 26 deletions
diff --git a/src/waitress/rfc7230.py b/src/waitress/rfc7230.py
index 9b25fbd..26e6426 100644
--- a/src/waitress/rfc7230.py
+++ b/src/waitress/rfc7230.py
@@ -5,6 +5,9 @@ needed to properly parse HTTP messages.
import re
+HEXDIG = "[0-9a-fA-F]"
+DIGIT = "[0-9]"
+
WS = "[ \t]"
OWS = WS + "{0,}?"
RWS = WS + "{1,}?"
@@ -25,6 +28,12 @@ TOKEN = TCHAR + "{1,}"
# ; visible (printing) characters
VCHAR = r"\x21-\x7e"
+# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
+QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]"
+
+QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])"
+QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"'
+
# header-field = field-name ":" OWS field-value OWS
# field-name = token
# field-value = *( field-content / obs-fold )
@@ -43,8 +52,24 @@ FIELD_CONTENT = FIELD_VCHAR + "+(?:[ \t]+" + FIELD_VCHAR + "+)*"
# Which allows the field value here to just see if there is even a value in the first place
FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"
-HEADER_FIELD = re.compile(
+# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+# chunk-ext-name = token
+# chunk-ext-val = token / quoted-string
+
+CHUNK_EXT_NAME = TOKEN
+CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")"
+CHUNK_EXT = (
+ "(?:;(?P<extension>" + CHUNK_EXT_NAME + ")(?:=(?P<value>" + CHUNK_EXT_VAL + "))?)*"
+)
+
+# Pre-compiled regular expressions for use elsewhere
+ONLY_HEXDIG_RE = re.compile(("^" + HEXDIG + "+$").encode("latin-1"))
+ONLY_DIGIT_RE = re.compile(("^" + DIGIT + "+$").encode("latin-1"))
+HEADER_FIELD_RE = re.compile(
(
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
).encode("latin-1")
)
+QUOTED_PAIR_RE = re.compile(QUOTED_PAIR)
+QUOTED_STRING_RE = re.compile(QUOTED_STRING)
+CHUNK_EXT_RE = re.compile(("^" + CHUNK_EXT + "$").encode("latin-1"))
diff --git a/src/waitress/utilities.py b/src/waitress/utilities.py
index 3caaa33..6ae4742 100644
--- a/src/waitress/utilities.py
+++ b/src/waitress/utilities.py
@@ -22,7 +22,7 @@ import re
import stat
import time
-from .rfc7230 import OBS_TEXT, VCHAR
+from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE
logger = logging.getLogger("waitress")
queue_logger = logging.getLogger("waitress.queue")
@@ -216,32 +216,10 @@ def parse_http_date(d):
return retval
-# RFC 5234 Appendix B.1 "Core Rules":
-# VCHAR = %x21-7E
-# ; visible (printing) characters
-vchar_re = VCHAR
-
-# RFC 7230 Section 3.2.6 "Field Value Components":
-# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
-# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
-# obs-text = %x80-FF
-# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
-obs_text_re = OBS_TEXT
-
-# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
-qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"
-
-quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
-quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'
-
-quoted_string = re.compile(quoted_string_re)
-quoted_pair = re.compile(quoted_pair_re)
-
-
def undquote(value):
if value.startswith('"') and value.endswith('"'):
# So it claims to be DQUOTE'ed, let's validate that
- matches = quoted_string.match(value)
+ matches = QUOTED_STRING_RE.match(value)
if matches and matches.end() == len(value):
# Remove the DQUOTE's from the value
@@ -249,7 +227,7 @@ def undquote(value):
# Remove all backslashes that are followed by a valid vchar or
# obs-text
- value = quoted_pair.sub(r"\1", value)
+ value = QUOTED_PAIR_RE.sub(r"\1", value)
return value
elif not value.startswith('"') and not value.endswith('"'):