Skip to content

Commit

Permalink
Add new regular expressions for Chunked Encoding
Browse files Browse the repository at this point in the history
This also moves some regular expressions for QUOTED_PAIR/QUOTED_STRING
into this module from utilities so that they may be reused.
  • Loading branch information
digitalresistor committed Mar 13, 2022
1 parent 22c0394 commit e75b0d9
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 26 deletions.
27 changes: 26 additions & 1 deletion src/waitress/rfc7230.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

import re

HEXDIG = "[0-9a-fA-F]"
DIGIT = "[0-9]"

WS = "[ \t]"
OWS = WS + "{0,}?"
RWS = WS + "{1,}?"
Expand All @@ -25,6 +28,12 @@
# ; visible (printing) characters
VCHAR = r"\x21-\x7e"

# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]"

QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])"
QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"'

# header-field = field-name ":" OWS field-value OWS
# field-name = token
# field-value = *( field-content / obs-fold )
Expand All @@ -43,8 +52,24 @@
# Which allows the field value here to just see if there is even a value in the first place
FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"

HEADER_FIELD = re.compile(
# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
# chunk-ext-name = token
# chunk-ext-val = token / quoted-string

CHUNK_EXT_NAME = TOKEN
CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")"
CHUNK_EXT = (
"(?:;(?P<extension>" + CHUNK_EXT_NAME + ")(?:=(?P<value>" + CHUNK_EXT_VAL + "))?)*"
)

# Pre-compiled regular expressions for use elsewhere
ONLY_HEXDIG_RE = re.compile(("^" + HEXDIG + "+$").encode("latin-1"))
ONLY_DIGIT_RE = re.compile(("^" + DIGIT + "+$").encode("latin-1"))
HEADER_FIELD_RE = re.compile(
(
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
).encode("latin-1")
)
QUOTED_PAIR_RE = re.compile(QUOTED_PAIR)
QUOTED_STRING_RE = re.compile(QUOTED_STRING)
CHUNK_EXT_RE = re.compile(("^" + CHUNK_EXT + "$").encode("latin-1"))
28 changes: 3 additions & 25 deletions src/waitress/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import stat
import time

from .rfc7230 import OBS_TEXT, VCHAR
from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE

logger = logging.getLogger("waitress")
queue_logger = logging.getLogger("waitress.queue")
Expand Down Expand Up @@ -216,40 +216,18 @@ def parse_http_date(d):
return retval


# RFC 5234 Appendix B.1 "Core Rules":
# VCHAR = %x21-7E
# ; visible (printing) characters
vchar_re = VCHAR

# RFC 7230 Section 3.2.6 "Field Value Components":
# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
# obs-text = %x80-FF
# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
obs_text_re = OBS_TEXT

# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"

quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'

quoted_string = re.compile(quoted_string_re)
quoted_pair = re.compile(quoted_pair_re)


def undquote(value):
if value.startswith('"') and value.endswith('"'):
# So it claims to be DQUOTE'ed, let's validate that
matches = quoted_string.match(value)
matches = QUOTED_STRING_RE.match(value)

if matches and matches.end() == len(value):
# Remove the DQUOTE's from the value
value = value[1:-1]

# Remove all backslashes that are followed by a valid vchar or
# obs-text
value = quoted_pair.sub(r"\1", value)
value = QUOTED_PAIR_RE.sub(r"\1", value)

return value
elif not value.startswith('"') and not value.endswith('"'):
Expand Down

0 comments on commit e75b0d9

Please sign in to comment.