Skip to content

Commit

Permalink
Merge pull request from GHSA-4f7p-27jc-3c36
Browse files Browse the repository at this point in the history
Fix for HTTP request smuggling due to incorrect validation
  • Loading branch information
digitalresistor authored Mar 16, 2022
2 parents 22c0394 + b28c9e8 commit 9e0b8c8
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 44 deletions.
26 changes: 26 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,29 @@
2.1.1
-----

Security Bugfix
~~~~~~~~~~~~~~~

- Waitress now validates that chunked encoding extensions are valid, and don't
contain invalid characters that are not allowed. They are still skipped/not
processed, but if they contain invalid data we no longer continue in and
return a 400 Bad Request. This stops potential HTTP desync/HTTP request
smuggling. Thanks to Zhang Zeyu for reporting this issue. See
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36

- Waitress now validates that the chunk length is only valid hex digits when
parsing chunked encoding, and values such as ``0x01`` and ``+01`` are no
longer supported. This stops potential HTTP desync/HTTP request smuggling.
Thanks to Zhang Zeyu for reporting this issue. See
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36

- Waitress now validates that the Content-Length sent by a remote contains only
digits in accordance with RFC7230 and will return a 400 Bad Request when the
Content-Length header contains invalid data, such as ``+10`` which would
previously get parsed as ``10`` and accepted. This stops potential HTTP
desync/HTTP request smuggling Thanks to Zhang Zeyu for reporting this issue. See
https://github.com/Pylons/waitress/security/advisories/GHSA-4f7p-27jc-3c36

2.1.0
-----

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = waitress
version = 2.1.0
version = 2.1.1
description = Waitress WSGI server
long_description = file: README.rst, CHANGES.txt
long_description_content_type = text/x-rst
Expand Down
12 changes: 6 additions & 6 deletions src/waitress/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from waitress.buffers import OverflowableBuffer
from waitress.receiver import ChunkedReceiver, FixedStreamReceiver
from waitress.rfc7230 import HEADER_FIELD_RE, ONLY_DIGIT_RE
from waitress.utilities import (
BadRequest,
RequestEntityTooLarge,
Expand All @@ -31,8 +32,6 @@
find_double_newline,
)

from .rfc7230 import HEADER_FIELD


def unquote_bytes_to_wsgi(bytestring):
return unquote_to_bytes(bytestring).decode("latin-1")
Expand Down Expand Up @@ -221,7 +220,7 @@ def parse_header(self, header_plus):
headers = self.headers

for line in lines:
header = HEADER_FIELD.match(line)
header = HEADER_FIELD_RE.match(line)

if not header:
raise ParsingError("Invalid header")
Expand Down Expand Up @@ -317,11 +316,12 @@ def parse_header(self, header_plus):
self.connection_close = True

if not self.chunked:
try:
cl = int(headers.get("CONTENT_LENGTH", 0))
except ValueError:
cl = headers.get("CONTENT_LENGTH", "0")

if not ONLY_DIGIT_RE.match(cl.encode("latin-1")):
raise ParsingError("Content-Length is invalid")

cl = int(cl)
self.content_length = cl

if cl > 0:
Expand Down
28 changes: 21 additions & 7 deletions src/waitress/receiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Data Chunk Receiver
"""

from waitress.rfc7230 import CHUNK_EXT_RE, ONLY_HEXDIG_RE
from waitress.utilities import BadRequest, find_double_newline


Expand Down Expand Up @@ -110,6 +111,7 @@ def received(self, s):
s = b""
else:
self.chunk_end = b""

if pos == 0:
# Chop off the terminating CR LF from the chunk
s = s[2:]
Expand All @@ -133,20 +135,32 @@ def received(self, s):
line = s[:pos]
s = s[pos + 2 :]
self.control_line = b""
line = line.strip()

if line:
# Begin a new chunk.
semi = line.find(b";")

if semi >= 0:
# discard extension info.
extinfo = line[semi:]
valid_ext_info = CHUNK_EXT_RE.match(extinfo)

if not valid_ext_info:
self.error = BadRequest("Invalid chunk extension")
self.all_chunks_received = True

break

line = line[:semi]
try:
sz = int(line.strip(), 16) # hexadecimal
except ValueError: # garbage in input
self.error = BadRequest("garbage in chunked encoding input")
sz = 0

if not ONLY_HEXDIG_RE.match(line):
self.error = BadRequest("Invalid chunk size")
self.all_chunks_received = True

break

# Can not fail due to matching against the regular
# expression above
sz = int(line, 16) # hexadecimal

if sz > 0:
# Start a new chunk.
Expand Down
27 changes: 26 additions & 1 deletion src/waitress/rfc7230.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

import re

HEXDIG = "[0-9a-fA-F]"
DIGIT = "[0-9]"

WS = "[ \t]"
OWS = WS + "{0,}?"
RWS = WS + "{1,}?"
Expand All @@ -25,6 +28,12 @@
# ; visible (printing) characters
VCHAR = r"\x21-\x7e"

# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
QDTEXT = "[\t \x21\x23-\x5b\\\x5d-\x7e" + OBS_TEXT + "]"

QUOTED_PAIR = r"\\" + "([\t " + VCHAR + OBS_TEXT + "])"
QUOTED_STRING = '"(?:(?:' + QDTEXT + ")|(?:" + QUOTED_PAIR + '))*"'

# header-field = field-name ":" OWS field-value OWS
# field-name = token
# field-value = *( field-content / obs-fold )
Expand All @@ -43,8 +52,24 @@
# Which allows the field value here to just see if there is even a value in the first place
FIELD_VALUE = "(?:" + FIELD_CONTENT + ")?"

HEADER_FIELD = re.compile(
# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
# chunk-ext-name = token
# chunk-ext-val = token / quoted-string

CHUNK_EXT_NAME = TOKEN
CHUNK_EXT_VAL = "(?:" + TOKEN + ")|(?:" + QUOTED_STRING + ")"
CHUNK_EXT = (
"(?:;(?P<extension>" + CHUNK_EXT_NAME + ")(?:=(?P<value>" + CHUNK_EXT_VAL + "))?)*"
)

# Pre-compiled regular expressions for use elsewhere
ONLY_HEXDIG_RE = re.compile(("^" + HEXDIG + "+$").encode("latin-1"))
ONLY_DIGIT_RE = re.compile(("^" + DIGIT + "+$").encode("latin-1"))
HEADER_FIELD_RE = re.compile(
(
"^(?P<name>" + TOKEN + "):" + OWS + "(?P<value>" + FIELD_VALUE + ")" + OWS + "$"
).encode("latin-1")
)
QUOTED_PAIR_RE = re.compile(QUOTED_PAIR)
QUOTED_STRING_RE = re.compile(QUOTED_STRING)
CHUNK_EXT_RE = re.compile(("^" + CHUNK_EXT + "$").encode("latin-1"))
28 changes: 3 additions & 25 deletions src/waitress/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import stat
import time

from .rfc7230 import OBS_TEXT, VCHAR
from .rfc7230 import QUOTED_PAIR_RE, QUOTED_STRING_RE

logger = logging.getLogger("waitress")
queue_logger = logging.getLogger("waitress.queue")
Expand Down Expand Up @@ -216,40 +216,18 @@ def parse_http_date(d):
return retval


# RFC 5234 Appendix B.1 "Core Rules":
# VCHAR = %x21-7E
# ; visible (printing) characters
vchar_re = VCHAR

# RFC 7230 Section 3.2.6 "Field Value Components":
# quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
# qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text
# obs-text = %x80-FF
# quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
obs_text_re = OBS_TEXT

# The '\\' between \x5b and \x5d is needed to escape \x5d (']')
qdtext_re = "[\t \x21\x23-\x5b\\\x5d-\x7e" + obs_text_re + "]"

quoted_pair_re = r"\\" + "([\t " + vchar_re + obs_text_re + "])"
quoted_string_re = '"(?:(?:' + qdtext_re + ")|(?:" + quoted_pair_re + '))*"'

quoted_string = re.compile(quoted_string_re)
quoted_pair = re.compile(quoted_pair_re)


def undquote(value):
if value.startswith('"') and value.endswith('"'):
# So it claims to be DQUOTE'ed, let's validate that
matches = quoted_string.match(value)
matches = QUOTED_STRING_RE.match(value)

if matches and matches.end() == len(value):
# Remove the DQUOTE's from the value
value = value[1:-1]

# Remove all backslashes that are followed by a valid vchar or
# obs-text
value = quoted_pair.sub(r"\1", value)
value = QUOTED_PAIR_RE.sub(r"\1", value)

return value
elif not value.startswith('"') and not value.endswith('"'):
Expand Down
50 changes: 47 additions & 3 deletions tests/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def test_chunking_request_without_content(self):
self.assertFalse("transfer-encoding" in headers)

def test_chunking_request_with_content(self):
control_line = b"20;\r\n" # 20 hex = 32 dec
control_line = b"20\r\n" # 20 hex = 32 dec
s = b"This string has 32 characters.\r\n"
expected = s * 12
header = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
Expand All @@ -341,7 +341,7 @@ def test_chunking_request_with_content(self):
self.assertFalse("transfer-encoding" in headers)

def test_broken_chunked_encoding(self):
control_line = b"20;\r\n" # 20 hex = 32 dec
control_line = b"20\r\n" # 20 hex = 32 dec
s = b"This string has 32 characters.\r\n"
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
to_send += control_line + s + b"\r\n"
Expand All @@ -364,8 +364,52 @@ def test_broken_chunked_encoding(self):
self.send_check_error(to_send)
self.assertRaises(ConnectionClosed, read_http, fp)

def test_broken_chunked_encoding_invalid_hex(self):
control_line = b"0x20\r\n" # 20 hex = 32 dec
s = b"This string has 32 characters.\r\n"
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
to_send += control_line + s + b"\r\n"
self.connect()
self.sock.send(to_send)
with self.sock.makefile("rb", 0) as fp:
line, headers, response_body = read_http(fp)
self.assertline(line, "400", "Bad Request", "HTTP/1.1")
cl = int(headers["content-length"])
self.assertEqual(cl, len(response_body))
self.assertIn(b"Invalid chunk size", response_body)
self.assertEqual(
sorted(headers.keys()),
["connection", "content-length", "content-type", "date", "server"],
)
self.assertEqual(headers["content-type"], "text/plain")
# connection has been closed
self.send_check_error(to_send)
self.assertRaises(ConnectionClosed, read_http, fp)

def test_broken_chunked_encoding_invalid_extension(self):
control_line = b"20;invalid=\r\n" # 20 hex = 32 dec
s = b"This string has 32 characters.\r\n"
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
to_send += control_line + s + b"\r\n"
self.connect()
self.sock.send(to_send)
with self.sock.makefile("rb", 0) as fp:
line, headers, response_body = read_http(fp)
self.assertline(line, "400", "Bad Request", "HTTP/1.1")
cl = int(headers["content-length"])
self.assertEqual(cl, len(response_body))
self.assertIn(b"Invalid chunk extension", response_body)
self.assertEqual(
sorted(headers.keys()),
["connection", "content-length", "content-type", "date", "server"],
)
self.assertEqual(headers["content-type"], "text/plain")
# connection has been closed
self.send_check_error(to_send)
self.assertRaises(ConnectionClosed, read_http, fp)

def test_broken_chunked_encoding_missing_chunk_end(self):
control_line = b"20;\r\n" # 20 hex = 32 dec
control_line = b"20\r\n" # 20 hex = 32 dec
s = b"This string has 32 characters.\r\n"
to_send = b"GET / HTTP/1.1\r\nTransfer-Encoding: chunked\r\n\r\n"
to_send += control_line + s
Expand Down
22 changes: 21 additions & 1 deletion tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def test_received_chunked_completed_sets_content_length(self):
b"Transfer-Encoding: chunked\r\n"
b"X-Foo: 1\r\n"
b"\r\n"
b"1d;\r\n"
b"1d\r\n"
b"This string has 29 characters\r\n"
b"0\r\n\r\n"
)
Expand Down Expand Up @@ -193,6 +193,26 @@ def test_parse_header_bad_content_length(self):
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_bad_content_length_plus(self):
data = b"GET /foobar HTTP/8.4\r\ncontent-length: +10\r\n"

try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Content-Length is invalid", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_bad_content_length_minus(self):
data = b"GET /foobar HTTP/8.4\r\ncontent-length: -10\r\n"

try:
self.parser.parse_header(data)
except ParsingError as e:
self.assertIn("Content-Length is invalid", e.args[0])
else: # pragma: nocover
self.assertTrue(False)

def test_parse_header_multiple_content_length(self):
data = b"GET /foobar HTTP/8.4\r\ncontent-length: 10\r\ncontent-length: 20\r\n"

Expand Down
Loading

0 comments on commit 9e0b8c8

Please sign in to comment.